From 15babc2beac0306f790d9008ae9348aac4eed170 Mon Sep 17 00:00:00 2001 From: MarkLo Date: Tue, 25 Nov 2025 05:35:54 +0800 Subject: [PATCH] --- backend/app/services/pdf_generator.py | 37 ++++++++++++------- .../agents/researchers/bear_researcher.py | 10 ++++- .../agents/researchers/bull_researcher.py | 15 +++++++- tradingagents/agents/trader/trader.py | 15 +++++++- 4 files changed, 57 insertions(+), 20 deletions(-) diff --git a/backend/app/services/pdf_generator.py b/backend/app/services/pdf_generator.py index b3a11f24..5c065f87 100644 --- a/backend/app/services/pdf_generator.py +++ b/backend/app/services/pdf_generator.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ PDF Generation Service for Analyst Reports Converts markdown reports to PDF format with Chinese character support @@ -101,7 +102,7 @@ class PDFGenerator: # Define styles styles = getSampleStyleSheet() - # Custom styles with Cactus Classical Serif font + # Custom styles with proper spacing and wrapping title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], @@ -110,6 +111,7 @@ class PDFGenerator: textColor=HexColor('#1a1a1a'), spaceAfter=30, alignment=TA_CENTER, + wordWrap='CJK', ) subtitle_style = ParagraphStyle( @@ -118,8 +120,9 @@ class PDFGenerator: fontName=self.primary_font, fontSize=12, textColor=HexColor('#666666'), - spaceAfter=20, + spaceAfter=12, alignment=TA_CENTER, + wordWrap='CJK', ) heading_style = ParagraphStyle( @@ -129,7 +132,8 @@ class PDFGenerator: fontSize=16, textColor=HexColor('#2c3e50'), spaceAfter=12, - spaceBefore=12, + spaceBefore=16, + wordWrap='CJK', ) body_style = ParagraphStyle( @@ -137,9 +141,11 @@ class PDFGenerator: parent=styles['Normal'], fontName=self.primary_font, fontSize=10, - leading=14, + leading=16, # Increased from 14 for better readability textColor=HexColor('#333333'), - spaceAfter=8, + spaceAfter=10, + wordWrap='CJK', + splitLongWords=True, ) # Add title @@ -178,11 +184,8 @@ class PDFGenerator: else: # Regular paragraph - escape HTML chars and handle special characters text = self._escape_html(para) - try: - elements.append(Paragraph(text, body_style)) - except Exception as e: - # If paragraph fails, add as plain text - elements.append(Paragraph(text.encode('ascii', 'xmlcharrefreplace').decode(), body_style)) + # Ensure proper UTF-8 handling + elements.append(Paragraph(text, body_style)) # Build PDF doc.build(elements) @@ -206,14 +209,14 @@ class PDFGenerator: # Remove markdown links but keep text text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) - # Remove bold/italic markers + # Remove bold/italic markers carefully to avoid orphan characters text = re.sub(r'\*\*([^\*]+)\*\*', r'\1', text) - text = re.sub(r'\*([^\*]+)\*', r'\1', text) + text = re.sub(r'(? str: diff --git a/tradingagents/agents/researchers/bear_researcher.py b/tradingagents/agents/researchers/bear_researcher.py index 77f69ca2..6a0164a5 100644 --- a/tradingagents/agents/researchers/bear_researcher.py +++ b/tradingagents/agents/researchers/bear_researcher.py @@ -47,10 +47,16 @@ def create_bear_researcher(llm, memory): # 目標:將每個報告限制在合理的字符數內,總共不超過約 15000 字符(約 20000-30000 tokens) def truncate_text(text, max_chars): - """截斷文本到指定字符數""" + """智能截斷文本到指定字符數,在句子邊界處截斷""" if len(text) <= max_chars: return text - return text[:max_chars] + "\n...(內容已截斷)" + + truncated = text[:max_chars] + for delimiter in ['。', '\n', ',', '、', ' ']: + last_pos = truncated.rfind(delimiter) + if last_pos > max_chars * 0.8: + return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)" + return truncated + "...(為控制長度已精簡)" # 為每個報告設置合理的字符限制 # 模型 gpt-4.1-mini 的限制是 8192 tokens diff --git a/tradingagents/agents/researchers/bull_researcher.py b/tradingagents/agents/researchers/bull_researcher.py index 8b364feb..e4260706 100644 --- a/tradingagents/agents/researchers/bull_researcher.py +++ b/tradingagents/agents/researchers/bull_researcher.py @@ -47,10 +47,21 @@ def create_bull_researcher(llm, memory): # 目標:將每個報告限制在合理的字符數內,總共不超過約 15000 字符(約 20000-30000 tokens) def truncate_text(text, max_chars): - """截斷文本到指定字符數""" + """智能截斷文本到指定字符數,在句子邊界處截斷""" if len(text) <= max_chars: return text - return text[:max_chars] + "\n...(內容已截斷)" + + # 在max_chars附近尋找句子結束標記 + truncated = text[:max_chars] + + # 尋找最後一個句號、換行或逗號 + for delimiter in ['。', '\n', ',', '、', ' ']: + last_pos = truncated.rfind(delimiter) + if last_pos > max_chars * 0.8: # 至少保留80%的內容 + return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)" + + # 如果找不到合適的分隔符,直接在字符處截斷 + return truncated + "...(為控制長度已精簡)" # 為每個報告設置合理的字符限制 # 模型 gpt-4.1-mini 的限制是 8192 tokens diff --git a/tradingagents/agents/trader/trader.py b/tradingagents/agents/trader/trader.py index 7f0a9f66..285ab510 100644 --- a/tradingagents/agents/trader/trader.py +++ b/tradingagents/agents/trader/trader.py @@ -41,10 +41,21 @@ def create_trader(llm, memory): # 定義文本截斷函數以避免超過 token 限制 def truncate_text(text, max_chars): - """截斷文本到指定字符數""" + """智能截斷文本到指定字符數,在句子邊界處截斷""" if len(text) <= max_chars: return text - return text[:max_chars] + "\n...(內容已截斷)" + + # 在max_chars附近尋找句子結束標記 + truncated = text[:max_chars] + + # 尋找最後一個句號、換行或逗號 + for delimiter in ['。', '\n', ',', '、', ' ']: + last_pos = truncated.rfind(delimiter) + if last_pos > max_chars * 0.8: # 至少保留80%的內容 + return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)" + + # 如果找不到合適的分隔符,直接在字符處截斷 + return truncated + "...(為控制長度已精簡)" # 截斷各類報告以控制 token 使用量 # 這些報告將用於記憶檢索(embedding)和 LLM prompt