From 40f5b529d28e373293e97f65770a0a1280e8196f Mon Sep 17 00:00:00 2001
From: Shashwat17-vit <Shash14negi@gmail.com>
Date: Sat, 6 Dec 2025 15:06:16 -0600
Subject: [PATCH] Updates to news analyst

---
 tradingagents/agents/analysts/news_analyst.py | 172 ++++++++++++++++--
 1 file changed, 160 insertions(+), 12 deletions(-)

diff --git a/tradingagents/agents/analysts/news_analyst.py b/tradingagents/agents/analysts/news_analyst.py
index f263a214..9dd75e56 100644
--- a/tradingagents/agents/analysts/news_analyst.py
+++ b/tradingagents/agents/analysts/news_analyst.py
@@ -2,9 +2,103 @@ from langchain_core.messages import SystemMessage, HumanMessage
 from datetime import datetime, timedelta
 from tradingagents.agents.utils.agent_utils import get_news, get_global_news
 from tradingagents.dataflows.config import get_config
+from tradingagents.dataflows.news_parsers import parse_stock_news, parse_global_news
+import sys
+from typing import List, Dict, Any, Tuple, Optional
+
+# Add external utilities path for confidence/relevance and LoRA scoring
+CONF_UTILS_PATH = "/u/v/d/vdhanuka/CS769-TradingAgents"
+if CONF_UTILS_PATH not in sys.path:
+    sys.path.append(CONF_UTILS_PATH)
+
+# Import confidence utilities
+try:
+    import confidence as conf  # type: ignore
+    from sentence_transformers import SentenceTransformer  # type: ignore
+except Exception as _e:
+    conf = None  # type: ignore
+    SentenceTransformer = None  # type: ignore
 
 
 def create_news_analyst(llm):
+    # Lazy singletons for model and embedder to avoid reloading every call
+    lora_loaded: Dict[str, Any] = {"tokenizer": None, "model": None, "embedder": None}
+
+    def _ensure_models():
+        if conf is None:
+            raise RuntimeError("confidence.py utilities not available on sys.path.")
+        if lora_loaded["tokenizer"] is None or lora_loaded["model"] is None:
+            adapters_path = "/u/v/d/vdhanuka/defeatbeta-api-main/dapt_sft_adapters_e4_60_20_20"
+            base_model_id = "meta-llama/Llama-3.1-8B"
+            tok, mdl = conf.load_lora_causal_model(base_model_id, adapters_path)
+            lora_loaded["tokenizer"] = tok
+            lora_loaded["model"] = mdl
+        if lora_loaded["embedder"] is None:
+            if SentenceTransformer is None:
+                raise RuntimeError("sentence-transformers not available for relevance computation.")
+            lora_loaded["embedder"] = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+
+    def _score_items(
+        items: List[Dict[str, Any]],
+        company: str,
+        ticker: str,
+        alpha: float,
+        beta_relevance: float,
+    ) -> Tuple[List[Dict[str, Any]], float, str]:
+        """
+        Score each item with sentiment (LoRA) + confidence and relevance, then compute
+        net sentiment as sum(w_i * S_i) / sum(w_i), where w_i = alpha*confidence + (1-alpha)*relevance.
+        S_i in {-1, 0, 1}.
+        """
+        if not items:
+            return [], 0.0, "Neutral"
+
+        _ensure_models()
+        tokenizer = lora_loaded["tokenizer"]
+        model = lora_loaded["model"]
+        embedder = lora_loaded["embedder"]
+
+        # Build prompts from item text
+        texts: List[str] = []
+        for it in items:
+            # Priority: raw -> headline -> title -> summary
+            text = it.get("raw") or it.get("headline") or it.get("title") or it.get("summary") or ""
+            texts.append(text)
+        prompts = [conf.build_instruction_prompt(t) for t in texts]
+
+        # Sentiment via LoRA scoring (label, confidence)
+        label_texts = ["Positive", "Neutral", "Negative"]
+        sent_conf: List[Tuple[str, float]] = conf.score_labels_with_lora(tokenizer, model, prompts, label_texts)
+
+        scored_items: List[Dict[str, Any]] = []
+        weighted_sum = 0.0
+        weight_total = 0.0
+
+        for it, (lbl, conf_score), txt in zip(items, sent_conf, texts):
+            # lbl already lowercased in confidence.py output path
+            numeric = conf.label_to_numeric(lbl)
+            # Relevance using embedder, company name (if available) and ticker
+            relevance = conf.compute_relevance(embedder, txt if len(txt) <= 160 else (it.get("title") or txt[:160]), company or ticker, ticker, beta=beta_relevance)
+            weight = float(alpha) * float(conf_score) + float(1.0 - alpha) * float(relevance)
+
+            scored = dict(it)
+            scored.update(
+                {
+                    "sentiment_label": lbl,
+                    "sentiment_score": int(numeric),  # -1/0/1
+                    "confidence": float(round(conf_score, 3)),
+                    "relevance": float(round(relevance, 3)),
+                    "weight": float(round(weight, 3)),
+                }
+            )
+            scored_items.append(scored)
+            weighted_sum += weight * numeric
+            weight_total += weight
+
+        net_score = 0.0 if weight_total == 0.0 else float(weighted_sum / weight_total)
+        net_label = "Positive" if net_score > 0.2 else ("Negative" if net_score < -0.2 else "Neutral")
+        return scored_items, net_score, net_label
+
     def news_analyst_node(state):
         current_date = state["trade_date"]
         ticker = state["company_of_interest"]
@@ -31,33 +125,87 @@ def create_news_analyst(llm):
             global_news = ""
 
         # Build a data-grounded instruction and feed fetched data to the LLM
+        # Use completion-style prompt that works better with causal LMs (DAPT model)
         system_instruction = (
-            "You are a news researcher tasked with analyzing recent news and trends over the past week. "
-            "Write a comprehensive, data-grounded report relevant for trading and macroeconomics. "
-            "Use the provided fetched news data as primary evidence. "
-            "Do not simply state that trends are mixed. Provide detailed and nuanced insights with implications. "
-            "Append a concise Markdown table at the end summarizing key points.\n\n"
-            f"Context:\n"
-            f"- Current date: {current_date}\n"
-            f"- Company: {ticker}\n\n"
-            f"Fetched company news ({ticker}, {start_date} to {current_date}):\n{company_news}\n\n"
-            f"Fetched global/macro news (last 7 days):\n{global_news}\n"
+            "You are a financial news analyst. Your task is to write a trading-relevant report "
+            "based on the news data provided below.\n\n"
+            "IMPORTANT: Do NOT repeat or echo any part of this prompt. Do NOT ask questions. "
+            "Do NOT output task lists or checklists. Start writing the report directly.\n\n"
+            f"Date: {current_date}\n"
+            f"Company: {ticker}\n\n"
+            f"=== Company News ({ticker}, {start_date} to {current_date}) ===\n{company_news}\n\n"
+            f"=== Global/Macro News (last 7 days) ===\n{global_news}\n\n"
+            "=== END OF NEWS DATA ===\n\n"
+            "Now write a comprehensive analysis report with trading implications. "
+            "End with a Markdown table summarizing key points."
         )
 
+        # Use a single HumanMessage with a starter phrase to guide completion
+        # This helps causal LMs continue naturally instead of echoing
         messages = [
             SystemMessage(content=system_instruction),
-            HumanMessage(content=f"Produce the final report for {ticker} using the fetched data above."),
+            HumanMessage(content=f"Write the {ticker} news analysis report now:"),
         ]
         result = llm.invoke(messages)
 
         report = ""
 
         # Use the generated content as the report
-        report = getattr(result, "content", "") or ""
+        raw_report = getattr(result, "content", "") or ""
+        
+        # Post-process: remove any echoed prompt fragments
+        # Common echo patterns to filter out
+        echo_patterns = [
+            "Write the",
+            "Produce the final report",
+            "news analysis report now",
+            "using the fetched data above",
+        ]
+        report = raw_report
+        for pattern in echo_patterns:
+            if report.strip().startswith(pattern):
+                # Remove the echoed line
+                lines = report.split('\n', 1)
+                report = lines[1] if len(lines) > 1 else ""
+        report = report.strip()
+
+        # Now (after report generation), parse and compute net sentiment (keep logic intact)
+        company_items = parse_stock_news(company_news) if company_news else []
+        global_items = parse_global_news(global_news) if global_news else []
+
+        cfg = get_config()
+        alpha = float(cfg.get("sentiment_conf_alpha", 0.7))
+        beta_relevance = float(cfg.get("relevance_beta", 0.8))
+
+        all_items = []
+        all_items.extend([dict(x, source="company") for x in company_items])
+        all_items.extend([dict(x, source="global") for x in global_items])
+
+        news_items_scored: List[Dict[str, Any]] = []
+        news_net_sentiment_score: float = 0.0
+        news_net_sentiment_label: str = "Neutral"
+
+        if (company_items or global_items) and conf is not None:
+            try:
+                news_items_scored, news_net_sentiment_score, news_net_sentiment_label = _score_items(
+                    all_items,
+                    company=ticker,
+                    ticker=ticker,
+                    alpha=alpha,
+                    beta_relevance=beta_relevance,
+                )
+            except Exception:
+                news_items_scored = []
+                news_net_sentiment_score = 0.0
+                news_net_sentiment_label = "Neutral"
 
         return {
             "messages": [result],
             "news_report": report,
+            # New outputs for FinLLama
+            "news_items_scored": news_items_scored,
+            "news_net_sentiment_score": news_net_sentiment_score,
+            "news_net_sentiment_label": news_net_sentiment_label,
         }
 
     return news_analyst_node