[add] parse_date_range

This commit is contained in:
kimheesu 2025-07-08 11:34:43 +09:00
parent 938c27a6b9
commit d35b62e999
5 changed files with 55 additions and 47 deletions

View File

@ -13,7 +13,7 @@ class AnalystType(str, Enum):
class TradingAnalysisRequest(BaseModel): class TradingAnalysisRequest(BaseModel):
ticker: str = "NVDA" ticker: str = "NVDA"
analysis_date: str = "2025-07-07" analysis_date: str = "2025-07-07"
analysts: List[AnalystType] = [AnalystType.MARKET, AnalystType.SOCIAL, AnalystType.NEWS, AnalystType.FUNDAMENTALS] analysts: List[AnalystType] = [AnalystType.MARKET, AnalystType.NEWS, AnalystType.FUNDAMENTALS]
research_depth: int = 1 research_depth: int = 1
llm_provider: str = "google" llm_provider: str = "google"
backend_url: str = "https://generativelanguage.googleapis.com/v1" backend_url: str = "https://generativelanguage.googleapis.com/v1"

View File

@ -132,6 +132,8 @@ def select_shallow_thinking_agent(provider) -> str:
("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"), ("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"), ("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"),
("GPT-4o - Standard model with solid capabilities", "gpt-4o"), ("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
("o4-mini - Specialized reasoning model (compact)", "o4-mini"),
("o3 - Full advanced reasoning model", "o3"),
], ],
"anthropic": [ "anthropic": [
("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"), ("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),

View File

@ -3,7 +3,7 @@ from typing import Dict, Optional
# Use default config but allow it to be overridden # Use default config but allow it to be overridden
_config: Optional[Dict] = None _config: Optional[Dict] = None
DATA_DIR: Optional[str] = None DATA_DIR: str = ""
def initialize_config(): def initialize_config():

View File

@ -1,4 +1,4 @@
from typing import Annotated, Dict from typing import Annotated, Dict, Tuple
from .reddit_utils import fetch_top_from_category from .reddit_utils import fetch_top_from_category
from .yfin_utils import * from .yfin_utils import *
from .stockstats_utils import * from .stockstats_utils import *
@ -17,6 +17,23 @@ from .config import get_config, set_config, DATA_DIR
from .search_provider_factory import SearchProviderFactory from .search_provider_factory import SearchProviderFactory
def parse_date_range(curr_date: str, look_back_days: int) -> Tuple[str, str]:
"""
Parse date range and return start and end dates.
Args:
curr_date: Current date in yyyy-mm-dd format
look_back_days: Number of days to look back
Returns:
Tuple of (start_date, end_date) as strings
"""
end_date = curr_date
start_date_obj = datetime.strptime(curr_date, "%Y-%m-%d")
before = start_date_obj - relativedelta(days=look_back_days)
return before.strftime("%Y-%m-%d"), end_date
def get_finnhub_news( def get_finnhub_news(
ticker: Annotated[ ticker: Annotated[
str, str,
@ -37,9 +54,7 @@ def get_finnhub_news(
""" """
start_date = datetime.strptime(curr_date, "%Y-%m-%d") before, _ = parse_date_range(curr_date, look_back_days)
before = start_date - relativedelta(days=look_back_days)
before = before.strftime("%Y-%m-%d")
result = get_data_in_range(ticker, before, curr_date, "news_data", DATA_DIR) result = get_data_in_range(ticker, before, curr_date, "news_data", DATA_DIR)
@ -76,9 +91,7 @@ def get_finnhub_company_insider_sentiment(
str: a report of the sentiment in the past 15 days starting at curr_date str: a report of the sentiment in the past 15 days starting at curr_date
""" """
date_obj = datetime.strptime(curr_date, "%Y-%m-%d") before, _ = parse_date_range(curr_date, look_back_days)
before = date_obj - relativedelta(days=look_back_days)
before = before.strftime("%Y-%m-%d")
data = get_data_in_range(ticker, before, curr_date, "insider_senti", DATA_DIR) data = get_data_in_range(ticker, before, curr_date, "insider_senti", DATA_DIR)
@ -117,9 +130,7 @@ def get_finnhub_company_insider_transactions(
str: a report of the company's insider transaction/trading informtaion in the past 15 days str: a report of the company's insider transaction/trading informtaion in the past 15 days
""" """
date_obj = datetime.strptime(curr_date, "%Y-%m-%d") before, _ = parse_date_range(curr_date, look_back_days)
before = date_obj - relativedelta(days=look_back_days)
before = before.strftime("%Y-%m-%d")
data = get_data_in_range(ticker, before, curr_date, "insider_trans", DATA_DIR) data = get_data_in_range(ticker, before, curr_date, "insider_trans", DATA_DIR)
@ -290,9 +301,7 @@ def get_google_news(
) -> str: ) -> str:
query = query.replace(" ", "+") query = query.replace(" ", "+")
start_date = datetime.strptime(curr_date, "%Y-%m-%d") before, _ = parse_date_range(curr_date, look_back_days)
before = start_date - relativedelta(days=look_back_days)
before = before.strftime("%Y-%m-%d")
news_results = getNewsData(query, before, curr_date) news_results = getNewsData(query, before, curr_date)
@ -323,18 +332,17 @@ def get_reddit_global_news(
str: A formatted dataframe containing the latest news articles posts on reddit and meta information in these columns: "created_utc", "id", "title", "selftext", "score", "num_comments", "url" str: A formatted dataframe containing the latest news articles posts on reddit and meta information in these columns: "created_utc", "id", "title", "selftext", "score", "num_comments", "url"
""" """
start_date = datetime.strptime(start_date, "%Y-%m-%d") before, start_date_str = parse_date_range(start_date, look_back_days)
before = start_date - relativedelta(days=look_back_days) start_date_dt = datetime.strptime(start_date_str, "%Y-%m-%d")
before = before.strftime("%Y-%m-%d")
posts = [] posts = []
# iterate from start_date to end_date # iterate from start_date to end_date
curr_date = datetime.strptime(before, "%Y-%m-%d") curr_date = datetime.strptime(before, "%Y-%m-%d")
total_iterations = (start_date - curr_date).days + 1 total_iterations = (start_date_dt - curr_date).days + 1
pbar = tqdm(desc=f"Getting Global News on {start_date}", total=total_iterations) pbar = tqdm(desc=f"Getting Global News on {start_date_dt}", total=total_iterations)
while curr_date <= start_date: while curr_date <= start_date_dt:
curr_date_str = curr_date.strftime("%Y-%m-%d") curr_date_str = curr_date.strftime("%Y-%m-%d")
fetch_result = fetch_top_from_category( fetch_result = fetch_top_from_category(
"global_news", "global_news",
@ -377,21 +385,20 @@ def get_reddit_company_news(
str: A formatted dataframe containing the latest news articles posts on reddit and meta information in these columns: "created_utc", "id", "title", "selftext", "score", "num_comments", "url" str: A formatted dataframe containing the latest news articles posts on reddit and meta information in these columns: "created_utc", "id", "title", "selftext", "score", "num_comments", "url"
""" """
start_date = datetime.strptime(start_date, "%Y-%m-%d") before, start_date_str = parse_date_range(start_date, look_back_days)
before = start_date - relativedelta(days=look_back_days) start_date_dt = datetime.strptime(start_date_str, "%Y-%m-%d")
before = before.strftime("%Y-%m-%d")
posts = [] posts = []
# iterate from start_date to end_date # iterate from start_date to end_date
curr_date = datetime.strptime(before, "%Y-%m-%d") curr_date = datetime.strptime(before, "%Y-%m-%d")
total_iterations = (start_date - curr_date).days + 1 total_iterations = (start_date_dt - curr_date).days + 1
pbar = tqdm( pbar = tqdm(
desc=f"Getting Company News for {ticker} on {start_date}", desc=f"Getting Company News for {ticker} on {start_date_dt}",
total=total_iterations, total=total_iterations,
) )
while curr_date <= start_date: while curr_date <= start_date_dt:
curr_date_str = curr_date.strftime("%Y-%m-%d") curr_date_str = curr_date.strftime("%Y-%m-%d")
fetch_result = fetch_top_from_category( fetch_result = fetch_top_from_category(
"company_news", "company_news",
@ -509,8 +516,9 @@ def get_stock_stats_indicators_window(
) )
end_date = curr_date end_date = curr_date
curr_date = datetime.strptime(curr_date, "%Y-%m-%d") before_str, _ = parse_date_range(curr_date, look_back_days)
before = curr_date - relativedelta(days=look_back_days) before_dt = datetime.strptime(before_str, "%Y-%m-%d")
curr_date_dt = datetime.strptime(curr_date, "%Y-%m-%d")
if not online: if not online:
# read from YFin data # read from YFin data
@ -524,30 +532,30 @@ def get_stock_stats_indicators_window(
dates_in_df = data["Date"].astype(str).str[:10] dates_in_df = data["Date"].astype(str).str[:10]
ind_string = "" ind_string = ""
while curr_date >= before: while curr_date_dt >= before_dt:
# only do the trading dates # only do the trading dates
if curr_date.strftime("%Y-%m-%d") in dates_in_df.values: if curr_date_dt.strftime("%Y-%m-%d") in dates_in_df.values:
indicator_value = get_stockstats_indicator( indicator_value = get_stockstats_indicator(
symbol, indicator, curr_date.strftime("%Y-%m-%d"), online symbol, indicator, curr_date_dt.strftime("%Y-%m-%d"), online
) )
ind_string += f"{curr_date.strftime('%Y-%m-%d')}: {indicator_value}\n" ind_string += f"{curr_date_dt.strftime('%Y-%m-%d')}: {indicator_value}\n"
curr_date = curr_date - relativedelta(days=1) curr_date_dt = curr_date_dt - relativedelta(days=1)
else: else:
# online gathering # online gathering
ind_string = "" ind_string = ""
while curr_date >= before: while curr_date_dt >= before_dt:
indicator_value = get_stockstats_indicator( indicator_value = get_stockstats_indicator(
symbol, indicator, curr_date.strftime("%Y-%m-%d"), online symbol, indicator, curr_date_dt.strftime("%Y-%m-%d"), online
) )
ind_string += f"{curr_date.strftime('%Y-%m-%d')}: {indicator_value}\n" ind_string += f"{curr_date_dt.strftime('%Y-%m-%d')}: {indicator_value}\n"
curr_date = curr_date - relativedelta(days=1) curr_date_dt = curr_date_dt - relativedelta(days=1)
result_str = ( result_str = (
f"## {indicator} values from {before.strftime('%Y-%m-%d')} to {end_date}:\n\n" f"## {indicator} values from {before_dt.strftime('%Y-%m-%d')} to {end_date}:\n\n"
+ ind_string + ind_string
+ "\n\n" + "\n\n"
+ best_ind_params.get(indicator, "No description available.") + best_ind_params.get(indicator, "No description available.")
@ -565,20 +573,20 @@ def get_stockstats_indicator(
online: Annotated[bool, "to fetch data online or offline"], online: Annotated[bool, "to fetch data online or offline"],
) -> str: ) -> str:
curr_date = datetime.strptime(curr_date, "%Y-%m-%d") curr_date_dt = datetime.strptime(curr_date, "%Y-%m-%d")
curr_date = curr_date.strftime("%Y-%m-%d") curr_date_str = curr_date_dt.strftime("%Y-%m-%d")
try: try:
indicator_value = StockstatsUtils.get_stock_stats( indicator_value = StockstatsUtils.get_stock_stats(
symbol, symbol,
indicator, indicator,
curr_date, curr_date_str,
os.path.join(DATA_DIR, "market_data", "price_data"), os.path.join(DATA_DIR, "market_data", "price_data"),
online=online, online=online,
) )
except Exception as e: except Exception as e:
print( print(
f"Error getting stockstats indicator data for indicator {indicator} on {curr_date}: {e}" f"Error getting stockstats indicator data for indicator {indicator} on {curr_date_str}: {e}"
) )
return "" return ""
@ -591,9 +599,7 @@ def get_YFin_data_window(
look_back_days: Annotated[int, "how many days to look back"], look_back_days: Annotated[int, "how many days to look back"],
) -> str: ) -> str:
# calculate past days # calculate past days
date_obj = datetime.strptime(curr_date, "%Y-%m-%d") start_date, _ = parse_date_range(curr_date, look_back_days)
before = date_obj - relativedelta(days=look_back_days)
start_date = before.strftime("%Y-%m-%d")
# read in data # read in data
data = pd.read_csv( data = pd.read_csv(

View File

@ -7,7 +7,7 @@ from abc import ABC, abstractmethod
class SearchProvider(ABC): class SearchProvider(ABC):
@abstractmethod @abstractmethod
def search(self, query: str, ticker: str, curr_date: str) -> str: def search(self, query: str) -> str:
pass pass