2025-11-26 13:39:14 +08:00 · 2025-11-26 13:39:14 +08:00 · 3dcdbb5659
parent ace244e7be
commit 3dcdbb5659
13 changed files with 78 additions and 144 deletions
--- a/backend/app/services/pdf_generator.py
+++ b/backend/app/services/pdf_generator.py
@ -224,7 +224,7 @@ class PDFGenerator:
    def _clean_markdown(self, text: str) -> str:
        """
        Clean markdown formatting for PDF - IMPROVED VERSION
-        Fixes spurious character issues and improves cleaning logic
+        Simplified regex patterns to prevent encoding artifacts
        
        Args:
            text: Markdown text
@ -232,47 +232,54 @@ class PDFGenerator:
        Returns:
            Cleaned text
        """
+        import unicodedata
+        
+        # 0. Normalize Unicode to prevent encoding issues
+        text = unicodedata.normalize('NFKC', text)
+        
        # 1. Remove markdown links but keep text
        text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
        
-        # 2. Remove bold markers (improved version)
+        # 2. Remove bold markers (simplified version)
        text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
-        
-        # 3. Remove italic markers (more precise to avoid side effects)
-        text = re.sub(r'(?<![\*_])\*([^\*\n]+?)\*(?![\*_])', r'\1', text)
-        text = re.sub(r'(?<![\*_])_([^_\n]+?)_(?![\*_])', r'\1', text)
-        
-        # 4. Remove underscore bold
        text = re.sub(r'__(.+?)__', r'\1', text)
        
-        # 5. Remove code blocks
+        # 3. Remove italic markers (SIMPLIFIED - avoid complex lookahead/lookbehind)
+        # Only match single * or _ that are NOT part of ** or __
+        text = re.sub(r'(?<![\*])\*([^\*]+?)\*(?![\*])', r'\1', text)
+        text = re.sub(r'(?<![_])_([^_]+?)_(?![_])', r'\1', text)
+        
+        # 4. Remove code blocks
        text = re.sub(r'```[^`]*?```', '', text, flags=re.DOTALL)
        text = re.sub(r'`([^`]+?)`', r'\1', text)
        
-        # 6. Clean up bullet points
+        # 5. Clean up bullet points
        text = re.sub(r'^\s*[\*\-\+]\s+', '• ', text, flags=re.MULTILINE)
        
-        # 7. Remove horizontal rules
+        # 6. Remove horizontal rules
        text = re.sub(r'^[\-\*_]{3,}\s*$', '', text, flags=re.MULTILINE)
        
-        # 8. Clean table separators
+        # 7. Clean table separators (simplified)
        text = re.sub(r'^\s*\|?\s*:?-+:?\s*\|?\s*$', '', text, flags=re.MULTILINE)
        
-        # 9. Remove table | symbols (keep content)
+        # 8. Remove table | symbols (keep content)
        text = re.sub(r'^\s*\|', '', text, flags=re.MULTILINE)
        text = re.sub(r'\|\s*$', '', text, flags=re.MULTILINE)
        text = re.sub(r'\|', ' | ', text)
        
-        # 10. Clean excess spaces
+        # 9. Clean excess spaces
        text = re.sub(r' {2,}', ' ', text)
        
-        # 11. Clean excess blank lines
+        # 10. Clean excess blank lines
        text = re.sub(r'\n{3,}', '\n\n', text)
        
-        # 12. Remove isolated markdown symbols (more cautious to avoid spurious chars)
-        text = re.sub(r'(?<=\s)[\*_`~#]+(?=\s)', '', text)
-        text = re.sub(r'^[\*_`~#]+(?=\s)', '', text, flags=re.MULTILINE)
-        text = re.sub(r'(?<=\s)[\*_`~#]+$', '', text, flags=re.MULTILINE)
+        # 11. Remove isolated markdown symbols (SIMPLIFIED - no complex patterns)
+        # Remove lines that only contain markdown symbols
+        text = re.sub(r'^[\*_`~#\-\+]+\s*$', '', text, flags=re.MULTILINE)
+        
+        # 12. Final Unicode check - remove any characters that might cause PDF encoding issues
+        # Keep only printable characters and common Chinese characters
+        text = ''.join(char for char in text if char.isprintable() or char in '\n\r\t' or '\u4e00' <= char <= '\u9fff')
        
        return text.strip()
    
--- a/tradingagents/agents/analysts/fundamentals_analyst.py
+++ b/tradingagents/agents/analysts/fundamentals_analyst.py
@ -54,7 +54,9 @@ def create_fundamentals_analyst(llm):
 • 整合數據進行綜合評估

 【報告架構】
-**字數要求**：**至少800字以上（不含表格）**
+**字數要求**：**800-1500字（不含表格）**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
+
 **內容結構**：
 1. 公司概述（150字以上）：業務特性與競爭地位
 2. 財務分析（400-450字）：獲利能力、財務結構、現金流分析
--- a/tradingagents/agents/analysts/market_analyst.py
+++ b/tradingagents/agents/analysts/market_analyst.py
@ -53,18 +53,21 @@ def create_market_analyst(llm):
 • 整合數據後提出專業見解

 【報告架構】
-**字數要求**：**至少800字以上（不含表格）**
+**字數要求**：**800-1500字（不含表格）**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
+
 **內容結構**：
-1. 市場概況（150字以上）：趨勢方向與動能強弱
-2. 技術分析（400-450字）：指標解讀與相互驗證
-3. 關鍵價位（100字以上）：支撐/壓力位及其技術意義
-4. 操作策略（150字以上）：進場點位、停損設定、目標價位
-5. 數據摘要表格（必須）
+1. 市場概況（120-150字）：趨勢方向與動能強弱
+2. 技術分析（400-600字）：指標解讀與相互驗證
+3. 關鍵價位（80-120字）：支撐/壓力位及其技術意義
+4. 操作策略（150-200字）：進場點位、停損設定、目標價位
+5. 數據摘要表格（必須，不計入字數）

 **撰寫原則**：
 - 專業但清晰，避免過度技術化的表述
 - 結論明確，提供可執行的交易建議
 - 必須包含核心數據整理表格
+- 控制篇幅，確保在1500字以內完成分析

 **結尾提示**：
 請在報告最後加上以下結尾：
--- a/tradingagents/agents/analysts/news_analyst.py
+++ b/tradingagents/agents/analysts/news_analyst.py
@ -50,18 +50,21 @@ def create_news_analyst(llm):
 • 篩選高價值資訊並進行深度解讀

 【報告架構】
-**字數要求**：**至少800字以上（不含表格）**
+**字數要求**：**800-1500字（不含表格）**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
+
 **內容結構**：
-1. 新聞摘要（150字以上）：重點事件概述
-2. 影響分析（400-450字）：事件對股價的多維度影響評估
-3. 風險提示（100字以上）：潛在風險或市場未注意的因素
-4. 操作建議（150字以上）：基於新聞面的投資策略
-5. 新聞事件表格（必須）
+1. 新聞摘要（120-150字）：重點事件概述
+2. 影響分析（400-600字）：事件對股價的多維度影響評估
+3. 風險提示（80-120字）：潛在風險或市場未注意的因素
+4. 操作建議（150-200字）：基於新聞面的投資策略
+5. 新聞事件表格（必須，不計入字數）

 **撰寫原則**：
 - 聚焦實質影響，過濾非重要資訊
 - 提供獨立觀點與專業解讀
 - 必須包含關鍵新聞整理表格
+- 控制篇幅，確保在1500字以內完成分析

 **結尾提示**：
 請在報告最後加上以下結尾：
--- a/tradingagents/agents/analysts/social_media_analyst.py
+++ b/tradingagents/agents/analysts/social_media_analyst.py
@ -50,7 +50,9 @@ def create_social_media_analyst(llm):
 • 分析輿情傾向與討論熱度

 【報告架構】
-**字數要求**：**至少800字以上（不含表格）**
+**字數要求**：**800-1500字（不含表格）**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
+
 **內容結構**：
 1. 情緒概要（150字以上）：市場氛圍與情緒指標
 2. 輿情分析（400-450字）：主要討論議題與觀點分布
--- a/tradingagents/agents/managers/research_manager.py
+++ b/tradingagents/agents/managers/research_manager.py
@ -39,20 +39,8 @@ def create_research_manager(llm, memory):
        news_report = state["news_report"]
        fundamentals_report = state["fundamentals_report"]

-        # 定義文本截斷函數以避免超過 token 限制
-        def truncate_text(text, max_chars):
-            """截斷文本到指定字符數"""
-            if len(text) <= max_chars:
-                return text
-            return text[:max_chars] + "\n...(內容已截斷)"
-        
-        
-        # 為每個報告設置合理的字符限制
-        # 增加限制以確保 800+ 字的報告不被截斷
-        market_research_report = truncate_text(market_research_report, 2000)
-        sentiment_report = truncate_text(sentiment_report, 2000)
-        news_report = truncate_text(news_report, 2500)
-        fundamentals_report = truncate_text(fundamentals_report, 2000)
+        # 定義文本截斷函數以避免超過 token 限制 - 移除截斷邏輯以保留完整報告內容
+        # 為每個報告設置合理的字符限制 - 移除，保留完整報告
        
        # 整合當前情況
        curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}"
@ -60,18 +48,13 @@ def create_research_manager(llm, memory):
        # 從記憶體中獲取過去相似情況的經驗
        past_memories = memory.get_memories(curr_situation, n_matches=2)

-        # 將過去的經驗格式化為字串（限制長度）
+        # 將過去的經驗格式化為字串
        past_memory_str = ""
        for i, rec in enumerate(past_memories, 1):
            recommendation = rec["recommendation"]
-            # 限制每條記憶的長度
-            if len(recommendation) > 200:
-                recommendation = recommendation[:200] + "...(已截斷)"
            past_memory_str += recommendation + "\n\n"
        
-        # 截斷辯論歷史 - 這是最容易超過限制的部分
-        # 增加限制以容納更長的辯論內容
-        history = truncate_text(history, 3000)
+        # 截斷辯論歷史 - 這是最容易超過限制的部分 - 移除截斷以保留完整內容

        # 建立提示 (prompt)
        prompt = f"""**重要：您必須使用繁體中文（Traditional Chinese）回覆所有內容。**
@ -90,7 +73,8 @@ def create_research_manager(llm, memory):
 - 辯論歷史：{history}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 決策摘要（150字以上）：明確的買入/賣出/持有決策與核心理由
 2. 論證評估（200字以上）：公正評估雙方最強論點與分歧點，不偏袒任何一方
--- a/tradingagents/agents/managers/risk_manager.py
+++ b/tradingagents/agents/managers/risk_manager.py
@ -41,21 +41,7 @@ def create_risk_manager(llm, memory):
        sentiment_report = state["sentiment_report"]
        trader_plan = state["investment_plan"]

-        # 定義文本截斷函數以避免超過 token 限制
-        def truncate_text(text, max_chars):
-            """截斷文本到指定字符數"""
-            if len(text) <= max_chars:
-                return text
-            return text[:max_chars] + "\n...(內容已截斷)"
-        
-        
-        # 為每個報告設置合理的字符限制
-        # 增加限制以確保 800+ 字的報告不被截斷
-        market_research_report = truncate_text(market_research_report, 2000)
-        sentiment_report = truncate_text(sentiment_report, 2000)
-        news_report = truncate_text(news_report, 2500)
-        fundamentals_report = truncate_text(fundamentals_report, 2000)
-        trader_plan = truncate_text(trader_plan, 2000)
+        # 移除截斷邏輯以保留完整報告內容
        
        # 整合當前情況
        curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}"
@ -74,7 +60,7 @@ def create_risk_manager(llm, memory):
        
        # 截斷辯論歷史 - 這是最容易超過限制的部分
        # 增加限制以容納更長的辯論內容（風險辯論通常有3方，比投資辯論更長）
-        history = truncate_text(history, 3000)
+        history = history # 移除截斷，保留完整歷史

        
        # 建立提示 (prompt)
@ -96,7 +82,8 @@ def create_risk_manager(llm, memory):
 - 辯論歷史：{history}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 風控結論（150字以上）：風險評級與最終決策的明確陳述
 2. 論證評估（200字以上）：三方風險觀點的綜合評估，公正分析
--- a/tradingagents/agents/researchers/bear_researcher.py
+++ b/tradingagents/agents/researchers/bear_researcher.py
@ -42,47 +42,19 @@ def create_bear_researcher(llm, memory):
        news_report = state["news_report"]
        fundamentals_report = state["fundamentals_report"]

-        # 整合當前情況並智能截斷以避免超過 token 限制
-        # 估算：1 個中文字符 ≈ 2.5 tokens，1 個英文字符 ≈ 0.25 tokens
-        # 目標：將每個報告限制在合理的字符數內，總共不超過約 15000 字符（約 20000-30000 tokens）
-        
-        def truncate_text(text, max_chars):
-            """智能截斷文本到指定字符數，在句子邊界處截斷"""
-            if len(text) <= max_chars:
-                return text
-            
-            truncated = text[:max_chars]
-            for delimiter in ['。', '\n', '，', '、', ' ']:
-                last_pos = truncated.rfind(delimiter)
-                if last_pos > max_chars * 0.8:
-                    return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)"
-            return truncated + "...(為控制長度已精簡)"
-        
-        
-        # 為每個報告設置合理的字符限制
-        # 增加限制以確保 800+ 字的報告不被截斷
-        market_research_report = truncate_text(market_research_report, 2000)
-        sentiment_report = truncate_text(sentiment_report, 2000)
-        news_report = truncate_text(news_report, 2500)
-        fundamentals_report = truncate_text(fundamentals_report, 2000)
-        
+        # 整合當前情況 - 移除截斷邏輯以保留完整報告內容
        curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}"
        
        # 從記憶體中獲取過去相似情況的經驗
        past_memories = memory.get_memories(curr_situation, n_matches=2)

-        # 將過去的經驗格式化為字串（限制長度）
+        # 將過去的經驗格式化為字串
        past_memory_str = ""
        for i, rec in enumerate(past_memories, 1):
            recommendation = rec["recommendation"]
-            # 限制每條記憶的長度
-            if len(recommendation) > 200:
-                recommendation = recommendation[:200] + "...(已截斷)"
            past_memory_str += recommendation + "\n\n"

-        # 建立提示 (prompt) - 限制歷史長度以控制總 token 數
-        history = truncate_text(history, 300)
-        current_response = truncate_text(current_response, 200)
+        # 建立提示 (prompt) - 保留完整歷史以確保context完整性
        
        prompt = f"""**重要：您必須使用繁體中文（Traditional Chinese）回覆所有內容。**

@ -106,7 +78,8 @@ def create_bear_researcher(llm, memory):
 - 過往經驗：{past_memory_str}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 核心警示（150字以上）：清晰且強勢地陳述看跌理由，展現堅定立場
 2. 風險論證（450-500字）：用詳實數據支撐風險分析，層層揭露隱患
--- a/tradingagents/agents/researchers/bull_researcher.py
+++ b/tradingagents/agents/researchers/bull_researcher.py
@ -42,51 +42,19 @@ def create_bull_researcher(llm, memory):
        news_report = state["news_report"]
        fundamentals_report = state["fundamentals_report"]

-        # 整合當前情況並智能截斷以避免超過 token 限制
-        # 估算：1 個中文字符 ≈ 2.5 tokens，1 個英文字符 ≈ 0.25 tokens
-        # 目標：將每個報告限制在合理的字符數內，總共不超過約 15000 字符（約 20000-30000 tokens）
-        
-        def truncate_text(text, max_chars):
-            """智能截斷文本到指定字符數，在句子邊界處截斷"""
-            if len(text) <= max_chars:
-                return text
-            
-            # 在max_chars附近尋找句子結束標記
-            truncated = text[:max_chars]
-            
-            # 尋找最後一個句號、換行或逗號
-            for delimiter in ['。', '\n', '，', '、', ' ']:
-                last_pos = truncated.rfind(delimiter)
-                if last_pos > max_chars * 0.8:  # 至少保留80%的內容
-                    return text[:last_pos + 1] + "\n\n...(為控制長度已精簡)"
-            
-            # 如果找不到合適的分隔符，直接在字符處截斷
-            return truncated + "...(為控制長度已精簡)"
-        
-        # 為每個報告設置合理的字符限制
-        # 增加限制以確保 800+ 字的報告不被截斷
-        market_research_report = truncate_text(market_research_report, 2000)
-        sentiment_report = truncate_text(sentiment_report, 2000)
-        news_report = truncate_text(news_report, 2500)
-        fundamentals_report = truncate_text(fundamentals_report, 2000)
-        
+        # 整合當前情況 - 移除截斷邏輯以保留完整報告內容
        curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}"
        
        # 從記憶體中獲取過去相似情況的經驗
        past_memories = memory.get_memories(curr_situation, n_matches=2)

-        # 將過去的經驗格式化為字串（限制長度）
+        # 將過去的經驗格式化為字串
        past_memory_str = ""
        for i, rec in enumerate(past_memories, 1):
            recommendation = rec["recommendation"]
-            # 限制每條記憶的長度
-            if len(recommendation) > 200:
-                recommendation = recommendation[:200] + "...(已截斷)"
            past_memory_str += recommendation + "\n\n"

-        # 建立提示 (prompt) - 限制歷史長度以控制總 token 數
-        history = truncate_text(history, 300)
-        current_response = truncate_text(current_response, 200)
+        # 建立提示 (prompt) - 保留完整歷史以確保context完整性
        
        prompt = f"""**重要：您必須使用繁體中文（Traditional Chinese）回覆所有內容。**

@ -110,7 +78,8 @@ def create_bull_researcher(llm, memory):
 - 過往經驗：{past_memory_str}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 核心論點（150字以上）：清晰且強勢地陳述看漲理由，展現必勝信心
 2. 成長論證（450-500字）：用詳實數據支撐成長邏輯，層層推進論述
--- a/tradingagents/agents/risk_mgmt/aggresive_debator.py
+++ b/tradingagents/agents/risk_mgmt/aggresive_debator.py
@ -85,7 +85,8 @@ def create_risky_debator(llm):
 - 對手觀點：{current_safe_response}, {current_neutral_response}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 核心主張（150字以上）：清晰且強勢地陳述積極策略的理由，展現必勝信心
 2. 機會分析（450-500字）：詳細論證上檔潛力，層層推進論述
--- a/tradingagents/agents/risk_mgmt/conservative_debator.py
+++ b/tradingagents/agents/risk_mgmt/conservative_debator.py
@ -86,7 +86,8 @@ def create_safe_debator(llm):
 - 對手觀點：{current_risky_response}, {current_neutral_response}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 核心警示（150字以上）：清晰且強勢地陳述保守建議的理由，展現堅定立場
 2. 風險盤點（450-500字）：詳細分析下檔風險，層層揭露隱患
--- a/tradingagents/agents/risk_mgmt/neutral_debator.py
+++ b/tradingagents/agents/risk_mgmt/neutral_debator.py
@ -85,7 +85,8 @@ def create_neutral_debator(llm):
 - 對手觀點：{current_risky_response}, {current_safe_response}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 核心觀點（150字以上）：清晰陳述平衡策略的理由與價值
 2. 風險報酬評估（450-500字）：客觀分析損益比，綜合評估雙方論點
--- a/tradingagents/agents/trader/trader.py
+++ b/tradingagents/agents/trader/trader.py
@ -100,7 +100,8 @@ def create_trader(llm, memory):
 - 過去反思：{past_memory_str}

 【輸出要求】
-**字數要求**：**至少800字以上**
+**字數要求**：**800-1500字**
+**嚴格遵守字數限制，少於800字或超過1500字的報告將被退回**
 **內容結構**：
 1. 執行摘要（150字以上）：最終決策與核心理由的清晰陳述
 2. 決策整合（150字以上）：研究與風控觀點的平衡整合過程