Bolt: Optimize string building in yfinance_scanner.py (#114)

Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com>
This commit is contained in:
ahmet guzererler 2026-03-26 09:33:06 +01:00 committed by GitHub
parent 0efbbd9400
commit 6b644c6058
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 49 additions and 33 deletions

3
.jules/bolt.md Normal file
View File

@ -0,0 +1,3 @@
## 2024-05-24 - [Avoid Pandas Vectorized String Operations on Tiny Arrays]
**Learning:** While `df.columns.astype(str).str.lower()` is faster for large datasets (e.g., 1000+ columns), it is actually a micro-deoptimization for typical DataFrames with few columns. The overhead of pandas' `.str` accessor dispatch and Index object creation outweighs the raw iteration speed of a simple Python list comprehension `[str(c).lower() for c in df.columns]`.
**Action:** Do not replace list comprehensions with pandas vectorized string accessors when the array size is known to be very small (like DataFrame columns), unless the number of columns is explicitly known to be massive.

View File

@ -43,9 +43,12 @@ def get_market_movers_yfinance(
header = f"# Market Movers: {category.replace('_', ' ').title()}\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
result_str = header
result_str += "| Symbol | Name | Price | Change % | Volume | Market Cap |\n"
result_str += "|--------|------|-------|----------|--------|------------|\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Symbol | Name | Price | Change % | Volume | Market Cap |",
"|--------|------|-------|----------|--------|------------|"
]
for quote in quotes[:15]: # Top 15
symbol = quote.get('symbol', 'N/A')
@ -65,9 +68,9 @@ def get_market_movers_yfinance(
if isinstance(market_cap, (int, float)):
market_cap = f"${market_cap:,.0f}"
result_str += f"| {symbol} | {name[:30]} | {price} | {change_pct} | {volume} | {market_cap} |\n"
lines.append(f"| {symbol} | {name[:30]} | {price} | {change_pct} | {volume} | {market_cap} |")
return result_str
return "\n".join(lines) + "\n"
except Exception as e:
return f"Error fetching market movers for {category}: {str(e)}"
@ -90,12 +93,15 @@ def get_market_indices_yfinance() -> str:
"^RUT": "Russell 2000"
}
header = f"# Major Market Indices\n"
header = "# Major Market Indices\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
result_str = header
result_str += "| Index | Current Price | Change | Change % | 52W High | 52W Low |\n"
result_str += "|-------|---------------|--------|----------|----------|----------|\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Index | Current Price | Change | Change % | 52W High | 52W Low |",
"|-------|---------------|--------|----------|----------|----------|"
]
# Batch-download 1-day history for all symbols in a single request
symbols = list(indices.keys())
@ -117,7 +123,7 @@ def get_market_indices_yfinance() -> str:
closes = None
if closes is None or len(closes) == 0:
result_str += f"| {name} | N/A | - | - | - | - |\n"
lines.append(f"| {name} | N/A | - | - | - | - |")
continue
current_price = closes.iloc[-1]
@ -138,12 +144,12 @@ def get_market_indices_yfinance() -> str:
high_str = f"{high_52w:.2f}" if isinstance(high_52w, (int, float)) else str(high_52w)
low_str = f"{low_52w:.2f}" if isinstance(low_52w, (int, float)) else str(low_52w)
result_str += f"| {name} | {current_str} | {change_str} | {change_pct_str} | {high_str} | {low_str} |\n"
lines.append(f"| {name} | {current_str} | {change_str} | {change_pct_str} | {high_str} | {low_str} |")
except Exception as e:
result_str += f"| {name} | Error: {str(e)} | - | - | - | - |\n"
lines.append(f"| {name} | Error: {str(e)} | - | - | - | - |")
return result_str
return "\n".join(lines) + "\n"
except Exception as e:
return f"Error fetching market indices: {str(e)}"
@ -180,12 +186,15 @@ def get_sector_performance_yfinance() -> str:
# Download ~6 months of data to cover YTD, 1-month, 1-week
hist = yf.download(symbols, period="6mo", auto_adjust=True, progress=False, threads=True)
header = f"# Sector Performance Overview\n"
header = "# Sector Performance Overview\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
result_str = header
result_str += "| Sector | 1-Day % | 1-Week % | 1-Month % | YTD % |\n"
result_str += "|--------|---------|----------|-----------|-------|\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Sector | 1-Day % | 1-Week % | 1-Month % | YTD % |",
"|--------|---------|----------|-----------|-------|"
]
for sector_name, etf in sector_etfs.items():
try:
@ -196,7 +205,7 @@ def get_sector_performance_yfinance() -> str:
closes = hist["Close"].dropna()
if closes.empty or len(closes) < 2:
result_str += f"| {sector_name} | N/A | N/A | N/A | N/A |\n"
lines.append(f"| {sector_name} | N/A | N/A | N/A | N/A |")
continue
current = closes.iloc[-1]
@ -222,12 +231,12 @@ def get_sector_performance_yfinance() -> str:
month_str = f"{month_pct:+.2f}%" if month_pct is not None else "N/A"
ytd_str = f"{ytd_pct:+.2f}%" if ytd_pct is not None else "N/A"
result_str += f"| {sector_name} | {day_str} | {week_str} | {month_str} | {ytd_str} |\n"
lines.append(f"| {sector_name} | {day_str} | {week_str} | {month_str} | {ytd_str} |")
except Exception as e:
result_str += f"| {sector_name} | Error: {str(e)[:30]} | - | - | - |\n"
lines.append(f"| {sector_name} | Error: {str(e)[:30]} | - | - | - |")
return result_str
return "\n".join(lines) + "\n"
except Exception as e:
return f"Error fetching sector performance: {str(e)}"
@ -299,9 +308,12 @@ def get_industry_performance_yfinance(
header = f"# Industry Performance: {sector_key.replace('-', ' ').title()}\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
result_str = header
result_str += "| Company | Symbol | Rating | Market Weight | 1-Day % | 1-Week % | 1-Month % |\n"
result_str += "|---------|--------|--------|---------------|---------|----------|-----------|\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Company | Symbol | Rating | Market Weight | 1-Day % | 1-Week % | 1-Month % |",
"|---------|--------|--------|---------------|---------|----------|-----------|"
]
# top_companies has ticker as the DataFrame index (index.name == 'symbol')
# Columns: name, rating, market weight
@ -319,12 +331,12 @@ def get_industry_performance_yfinance(
week_str = f"{ret['1w']:+.2f}%" if ret.get('1w') is not None else "N/A"
month_str = f"{ret['1m']:+.2f}%" if ret.get('1m') is not None else "N/A"
result_str += (
lines.append(
f"| {name_short} | {symbol} | {rating} | {weight_str}"
f" | {day_str} | {week_str} | {month_str} |\n"
f" | {day_str} | {week_str} | {month_str} |"
)
return result_str
return "\n".join(lines) + "\n"
except Exception as e:
return f"Error fetching industry performance for sector '{sector_key}': {str(e)}"
@ -357,7 +369,8 @@ def get_topic_news_yfinance(
header = f"# News for Topic: {topic}\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
result_str = header
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [header.strip(), ""]
for article in search.news[:limit]:
# Handle nested content structure
@ -377,14 +390,14 @@ def get_topic_news_yfinance(
publisher = article.get("publisher", "Unknown")
link = article.get("link", "")
result_str += f"### {title} (source: {publisher})\n"
lines.append(f"### {title} (source: {publisher})")
if summary:
result_str += f"{summary}\n"
lines.append(f"{summary}")
if link:
result_str += f"Link: {link}\n"
result_str += "\n"
lines.append(f"Link: {link}")
lines.append("")
return result_str
return "\n".join(lines) + "\n"
except Exception as e:
return f"Error fetching news for topic '{topic}': {str(e)}"