2025-11-25 05:35:54 +08:00 · 2025-11-25 05:35:54 +08:00 · 15babc2bea
parent 4cf7e808d7
commit 15babc2bea
4 changed files with 57 additions and 20 deletions
--- a/backend/app/services/pdf_generator.py
+++ b/backend/app/services/pdf_generator.py
@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 PDF Generation Service for Analyst Reports
 Converts markdown reports to PDF format with Chinese character support
@ -101,7 +102,7 @@ class PDFGenerator:
        # Define styles
        styles = getSampleStyleSheet()
        
-        # Custom styles with Cactus Classical Serif font
+        # Custom styles with proper spacing and wrapping
        title_style = ParagraphStyle(
            'CustomTitle',
            parent=styles['Heading1'],
@ -110,6 +111,7 @@ class PDFGenerator:
            textColor=HexColor('#1a1a1a'),
            spaceAfter=30,
            alignment=TA_CENTER,
+            wordWrap='CJK',
        )
        
        subtitle_style = ParagraphStyle(
@ -118,8 +120,9 @@ class PDFGenerator:
            fontName=self.primary_font,
            fontSize=12,
            textColor=HexColor('#666666'),
-            spaceAfter=20,
+            spaceAfter=12,
            alignment=TA_CENTER,
+            wordWrap='CJK',
        )
        
        heading_style = ParagraphStyle(
@ -129,7 +132,8 @@ class PDFGenerator:
            fontSize=16,
            textColor=HexColor('#2c3e50'),
            spaceAfter=12,
-            spaceBefore=12,
+            spaceBefore=16,
+            wordWrap='CJK',
        )
        
        body_style = ParagraphStyle(
@ -137,9 +141,11 @@ class PDFGenerator:
            parent=styles['Normal'],
            fontName=self.primary_font,
            fontSize=10,
-            leading=14,
+            leading=16,  # Increased from 14 for better readability
            textColor=HexColor('#333333'),
-            spaceAfter=8,
+            spaceAfter=10,
+            wordWrap='CJK',
+            splitLongWords=True,
        )
        
        # Add title
@ -178,11 +184,8 @@ class PDFGenerator:
            else:
                # Regular paragraph - escape HTML chars and handle special characters
                text = self._escape_html(para)
-                try:
-                    elements.append(Paragraph(text, body_style))
-                except Exception as e:
-                    # If paragraph fails, add as plain text
-                    elements.append(Paragraph(text.encode('ascii', 'xmlcharrefreplace').decode(), body_style))
+                # Ensure proper UTF-8 handling
+                elements.append(Paragraph(text, body_style))
        
        # Build PDF
        doc.build(elements)
@ -206,14 +209,14 @@ class PDFGenerator:
        # Remove markdown links but keep text
        text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
        
-        # Remove bold/italic markers
+        # Remove bold/italic markers carefully to avoid orphan characters
        text = re.sub(r'\*\*([^\*]+)\*\*', r'\1', text)
-        text = re.sub(r'\*([^\*]+)\*', r'\1', text)
+        text = re.sub(r'(?<!\*)\*([^\*]+)\*(?!\*)', r'\1', text)  # Avoid double asterisks
        text = re.sub(r'__([^_]+)__', r'\1', text)
-        text = re.sub(r'_([^_]+)_', r'\1', text)
+        text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', text)  # Avoid double underscores
        
        # Remove code blocks
-        text = re.sub(r'```[^`]*```', '', text, flags=re.DOTALL)
+        text = re.sub(r'```[^`]*```', ' ', text, flags=re.DOTALL)  # Replace with space not empty
        text = re.sub(r'`([^`]+)`', r'\1', text)
        
        # Clean up bullet points
@ -222,6 +225,12 @@ class PDFGenerator:
        # Remove horizontal rules
        text = re.sub(r'^[\-\*\_]{3,}\s*$', '', text, flags=re.MULTILINE)
        
+        # Remove multiple consecutive spaces
+        text = re.sub(r' {2,}', ' ', text)
+        
+        # Remove orphaned single characters that might be markdown artifacts
+        text = re.sub(r'(?<=[^\w])([*_`~#])(?=[^\w])', '', text)
+        
        return text
    
    def _escape_html(self, text: str) -> str:
--- a/tradingagents/agents/researchers/bear_researcher.py
+++ b/tradingagents/agents/researchers/bear_researcher.py
@ -47,10 +47,16 @@ def create_bear_researcher(llm, memory):
        # 目標：將每個報告限制在合理的字符數內，總共不超過約 15000 字符（約 20000-30000 tokens）
        
        def truncate_text(text, max_chars):
-            """截斷文本到指定字符數"""
+            """智能截斷文本到指定字符數，在句子邊界處截斷"""
            if len(text) <= max_chars:
                return text
-            return text[:max_chars] + "\n...(內容已截斷)"
+            
+            truncated = text[:max_chars]
+            for delimiter in ['。', '\n', '，', '、', ' ']:
+                last_pos = truncated.rfind(delimiter)
+                if last_pos > max_chars * 0.8:
+                    return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)"
+            return truncated + "...(為控制長度已精簡)"
        
        # 為每個報告設置合理的字符限制
        # 模型 gpt-4.1-mini 的限制是 8192 tokens
--- a/tradingagents/agents/researchers/bull_researcher.py
+++ b/tradingagents/agents/researchers/bull_researcher.py
@ -47,10 +47,21 @@ def create_bull_researcher(llm, memory):
        # 目標：將每個報告限制在合理的字符數內，總共不超過約 15000 字符（約 20000-30000 tokens）
        
        def truncate_text(text, max_chars):
-            """截斷文本到指定字符數"""
+            """智能截斷文本到指定字符數，在句子邊界處截斷"""
            if len(text) <= max_chars:
                return text
-            return text[:max_chars] + "\n...(內容已截斷)"
+            
+            # 在max_chars附近尋找句子結束標記
+            truncated = text[:max_chars]
+            
+            # 尋找最後一個句號、換行或逗號
+            for delimiter in ['。', '\n', '，', '、', ' ']:
+                last_pos = truncated.rfind(delimiter)
+                if last_pos > max_chars * 0.8:  # 至少保留80%的內容
+                    return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)"
+            
+            # 如果找不到合適的分隔符，直接在字符處截斷
+            return truncated + "...(為控制長度已精簡)"
        
        # 為每個報告設置合理的字符限制
        # 模型 gpt-4.1-mini 的限制是 8192 tokens
--- a/tradingagents/agents/trader/trader.py
+++ b/tradingagents/agents/trader/trader.py
@ -41,10 +41,21 @@ def create_trader(llm, memory):

        # 定義文本截斷函數以避免超過 token 限制
        def truncate_text(text, max_chars):
-            """截斷文本到指定字符數"""
+            """智能截斷文本到指定字符數，在句子邊界處截斷"""
            if len(text) <= max_chars:
                return text
-            return text[:max_chars] + "\n...(內容已截斷)"
+            
+            # 在max_chars附近尋找句子結束標記
+            truncated = text[:max_chars]
+            
+            # 尋找最後一個句號、換行或逗號
+            for delimiter in ['。', '\n', '，', '、', ' ']:
+                last_pos = truncated.rfind(delimiter)
+                if last_pos > max_chars * 0.8:  # 至少保留80%的內容
+                    return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)"
+            
+            # 如果找不到合適的分隔符，直接在字符處截斷
+            return truncated + "...(為控制長度已精簡)"
        
        # 截斷各類報告以控制 token 使用量
        # 這些報告將用於記憶檢索（embedding）和 LLM prompt