From 6b644c60588db8669e99594e0453061b759a0386 Mon Sep 17 00:00:00 2001 From: ahmet guzererler Date: Thu, 26 Mar 2026 09:33:06 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20string=20buildin?= =?UTF-8?q?g=20in=20yfinance=5Fscanner.py=20(#114)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com> --- .jules/bolt.md | 3 + tradingagents/dataflows/yfinance_scanner.py | 79 ++++++++++++--------- 2 files changed, 49 insertions(+), 33 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000..30ee46dd --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-24 - [Avoid Pandas Vectorized String Operations on Tiny Arrays] +**Learning:** While `df.columns.astype(str).str.lower()` is faster for large datasets (e.g., 1000+ columns), it is actually a micro-deoptimization for typical DataFrames with few columns. The overhead of pandas' `.str` accessor dispatch and Index object creation outweighs the raw iteration speed of a simple Python list comprehension `[str(c).lower() for c in df.columns]`. +**Action:** Do not replace list comprehensions with pandas vectorized string accessors when the array size is known to be very small (like DataFrame columns), unless the number of columns is explicitly known to be massive. diff --git a/tradingagents/dataflows/yfinance_scanner.py b/tradingagents/dataflows/yfinance_scanner.py index 21b5b3e5..0cd6d8fd 100644 --- a/tradingagents/dataflows/yfinance_scanner.py +++ b/tradingagents/dataflows/yfinance_scanner.py @@ -43,9 +43,12 @@ def get_market_movers_yfinance( header = f"# Market Movers: {category.replace('_', ' ').title()}\n" header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" - result_str = header - result_str += "| Symbol | Name | Price | Change % | Volume | Market Cap |\n" - result_str += "|--------|------|-------|----------|--------|------------|\n" + # Optimized: Used list collection and string join to avoid memory reallocation overhead + lines = [ + header, + "| Symbol | Name | Price | Change % | Volume | Market Cap |", + "|--------|------|-------|----------|--------|------------|" + ] for quote in quotes[:15]: # Top 15 symbol = quote.get('symbol', 'N/A') @@ -65,9 +68,9 @@ def get_market_movers_yfinance( if isinstance(market_cap, (int, float)): market_cap = f"${market_cap:,.0f}" - result_str += f"| {symbol} | {name[:30]} | {price} | {change_pct} | {volume} | {market_cap} |\n" + lines.append(f"| {symbol} | {name[:30]} | {price} | {change_pct} | {volume} | {market_cap} |") - return result_str + return "\n".join(lines) + "\n" except Exception as e: return f"Error fetching market movers for {category}: {str(e)}" @@ -90,12 +93,15 @@ def get_market_indices_yfinance() -> str: "^RUT": "Russell 2000" } - header = f"# Major Market Indices\n" + header = "# Major Market Indices\n" header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" - result_str = header - result_str += "| Index | Current Price | Change | Change % | 52W High | 52W Low |\n" - result_str += "|-------|---------------|--------|----------|----------|----------|\n" + # Optimized: Used list collection and string join to avoid memory reallocation overhead + lines = [ + header, + "| Index | Current Price | Change | Change % | 52W High | 52W Low |", + "|-------|---------------|--------|----------|----------|----------|" + ] # Batch-download 1-day history for all symbols in a single request symbols = list(indices.keys()) @@ -117,7 +123,7 @@ def get_market_indices_yfinance() -> str: closes = None if closes is None or len(closes) == 0: - result_str += f"| {name} | N/A | - | - | - | - |\n" + lines.append(f"| {name} | N/A | - | - | - | - |") continue current_price = closes.iloc[-1] @@ -138,12 +144,12 @@ def get_market_indices_yfinance() -> str: high_str = f"{high_52w:.2f}" if isinstance(high_52w, (int, float)) else str(high_52w) low_str = f"{low_52w:.2f}" if isinstance(low_52w, (int, float)) else str(low_52w) - result_str += f"| {name} | {current_str} | {change_str} | {change_pct_str} | {high_str} | {low_str} |\n" + lines.append(f"| {name} | {current_str} | {change_str} | {change_pct_str} | {high_str} | {low_str} |") except Exception as e: - result_str += f"| {name} | Error: {str(e)} | - | - | - | - |\n" + lines.append(f"| {name} | Error: {str(e)} | - | - | - | - |") - return result_str + return "\n".join(lines) + "\n" except Exception as e: return f"Error fetching market indices: {str(e)}" @@ -180,12 +186,15 @@ def get_sector_performance_yfinance() -> str: # Download ~6 months of data to cover YTD, 1-month, 1-week hist = yf.download(symbols, period="6mo", auto_adjust=True, progress=False, threads=True) - header = f"# Sector Performance Overview\n" + header = "# Sector Performance Overview\n" header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" - result_str = header - result_str += "| Sector | 1-Day % | 1-Week % | 1-Month % | YTD % |\n" - result_str += "|--------|---------|----------|-----------|-------|\n" + # Optimized: Used list collection and string join to avoid memory reallocation overhead + lines = [ + header, + "| Sector | 1-Day % | 1-Week % | 1-Month % | YTD % |", + "|--------|---------|----------|-----------|-------|" + ] for sector_name, etf in sector_etfs.items(): try: @@ -196,7 +205,7 @@ def get_sector_performance_yfinance() -> str: closes = hist["Close"].dropna() if closes.empty or len(closes) < 2: - result_str += f"| {sector_name} | N/A | N/A | N/A | N/A |\n" + lines.append(f"| {sector_name} | N/A | N/A | N/A | N/A |") continue current = closes.iloc[-1] @@ -222,12 +231,12 @@ def get_sector_performance_yfinance() -> str: month_str = f"{month_pct:+.2f}%" if month_pct is not None else "N/A" ytd_str = f"{ytd_pct:+.2f}%" if ytd_pct is not None else "N/A" - result_str += f"| {sector_name} | {day_str} | {week_str} | {month_str} | {ytd_str} |\n" + lines.append(f"| {sector_name} | {day_str} | {week_str} | {month_str} | {ytd_str} |") except Exception as e: - result_str += f"| {sector_name} | Error: {str(e)[:30]} | - | - | - |\n" + lines.append(f"| {sector_name} | Error: {str(e)[:30]} | - | - | - |") - return result_str + return "\n".join(lines) + "\n" except Exception as e: return f"Error fetching sector performance: {str(e)}" @@ -299,9 +308,12 @@ def get_industry_performance_yfinance( header = f"# Industry Performance: {sector_key.replace('-', ' ').title()}\n" header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" - result_str = header - result_str += "| Company | Symbol | Rating | Market Weight | 1-Day % | 1-Week % | 1-Month % |\n" - result_str += "|---------|--------|--------|---------------|---------|----------|-----------|\n" + # Optimized: Used list collection and string join to avoid memory reallocation overhead + lines = [ + header, + "| Company | Symbol | Rating | Market Weight | 1-Day % | 1-Week % | 1-Month % |", + "|---------|--------|--------|---------------|---------|----------|-----------|" + ] # top_companies has ticker as the DataFrame index (index.name == 'symbol') # Columns: name, rating, market weight @@ -319,12 +331,12 @@ def get_industry_performance_yfinance( week_str = f"{ret['1w']:+.2f}%" if ret.get('1w') is not None else "N/A" month_str = f"{ret['1m']:+.2f}%" if ret.get('1m') is not None else "N/A" - result_str += ( + lines.append( f"| {name_short} | {symbol} | {rating} | {weight_str}" - f" | {day_str} | {week_str} | {month_str} |\n" + f" | {day_str} | {week_str} | {month_str} |" ) - return result_str + return "\n".join(lines) + "\n" except Exception as e: return f"Error fetching industry performance for sector '{sector_key}': {str(e)}" @@ -357,7 +369,8 @@ def get_topic_news_yfinance( header = f"# News for Topic: {topic}\n" header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" - result_str = header + # Optimized: Used list collection and string join to avoid memory reallocation overhead + lines = [header.strip(), ""] for article in search.news[:limit]: # Handle nested content structure @@ -377,14 +390,14 @@ def get_topic_news_yfinance( publisher = article.get("publisher", "Unknown") link = article.get("link", "") - result_str += f"### {title} (source: {publisher})\n" + lines.append(f"### {title} (source: {publisher})") if summary: - result_str += f"{summary}\n" + lines.append(f"{summary}") if link: - result_str += f"Link: {link}\n" - result_str += "\n" + lines.append(f"Link: {link}") + lines.append("") - return result_str + return "\n".join(lines) + "\n" except Exception as e: return f"Error fetching news for topic '{topic}': {str(e)}"