typecheck with pyrefly
This commit is contained in:
parent
1cfe018f77
commit
fd45639ad6
14
.mise.toml
14
.mise.toml
|
|
@ -2,12 +2,10 @@
|
||||||
python = "3.13"
|
python = "3.13"
|
||||||
uv = "latest"
|
uv = "latest"
|
||||||
ruff = "latest"
|
ruff = "latest"
|
||||||
"npm:pyright" = "latest"
|
|
||||||
|
|
||||||
[env]
|
[env]
|
||||||
_.file = ".env"
|
_.file = ".env"
|
||||||
# Python environment settings
|
# Python environment settings
|
||||||
PYTHONPATH = "."
|
|
||||||
PYTHONDONTWRITEBYTECODE = "1"
|
PYTHONDONTWRITEBYTECODE = "1"
|
||||||
PYTHONUNBUFFERED = "1"
|
PYTHONUNBUFFERED = "1"
|
||||||
|
|
||||||
|
|
@ -40,8 +38,8 @@ description = "Format code with ruff"
|
||||||
run = "ruff format ."
|
run = "ruff format ."
|
||||||
|
|
||||||
[tasks.typecheck]
|
[tasks.typecheck]
|
||||||
description = "Run pyright type checking"
|
description = "Run pyrefly type checking"
|
||||||
run = "pyright"
|
run = "uv run pyrefly check ."
|
||||||
|
|
||||||
[tasks.fix]
|
[tasks.fix]
|
||||||
description = "Auto-fix linting issues"
|
description = "Auto-fix linting issues"
|
||||||
|
|
@ -49,7 +47,7 @@ run = "ruff check --fix ."
|
||||||
|
|
||||||
[tasks.all]
|
[tasks.all]
|
||||||
description = "Run format, lint, and typecheck"
|
description = "Run format, lint, and typecheck"
|
||||||
run = ["ruff format .", "ruff check .", "pyright"]
|
run = ["ruff format .", "ruff check .", "uv run pyrefly check ."]
|
||||||
|
|
||||||
[tasks.clean]
|
[tasks.clean]
|
||||||
description = "Clean up cache and build artifacts"
|
description = "Clean up cache and build artifacts"
|
||||||
|
|
@ -60,9 +58,5 @@ run = [
|
||||||
"rm -rf .ruff_cache",
|
"rm -rf .ruff_cache",
|
||||||
"rm -rf dist",
|
"rm -rf dist",
|
||||||
"rm -rf build",
|
"rm -rf build",
|
||||||
"rm -rf *.egg-info"
|
"rm -rf *.egg-info",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tasks.litellm]
|
|
||||||
run = "uvx --from litellm[proxy] litellm --config litellm.yml --port 4000"
|
|
||||||
description = "Start LiteLLM proxy for Claude Code → OpenRouter"
|
|
||||||
|
|
|
||||||
92
cli/main.py
92
cli/main.py
|
|
@ -2,6 +2,7 @@ import datetime
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Literal, cast
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from rich import box
|
from rich import box
|
||||||
|
|
@ -16,6 +17,7 @@ from rich.spinner import Spinner
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
from rich.text import Text
|
from rich.text import Text
|
||||||
|
|
||||||
|
from cli.models import AnalystType
|
||||||
from cli.utils import (
|
from cli.utils import (
|
||||||
get_analysis_date,
|
get_analysis_date,
|
||||||
get_ticker,
|
get_ticker,
|
||||||
|
|
@ -39,6 +41,9 @@ app = typer.Typer(
|
||||||
|
|
||||||
# Create a deque to store recent messages with a maximum length
|
# Create a deque to store recent messages with a maximum length
|
||||||
class MessageBuffer:
|
class MessageBuffer:
|
||||||
|
current_report: str | None
|
||||||
|
final_report: str | None
|
||||||
|
|
||||||
def __init__(self, max_length=100):
|
def __init__(self, max_length=100):
|
||||||
self.messages = deque(maxlen=max_length)
|
self.messages = deque(maxlen=max_length)
|
||||||
self.tool_calls = deque(maxlen=max_length)
|
self.tool_calls = deque(maxlen=max_length)
|
||||||
|
|
@ -304,7 +309,7 @@ def update_display(layout, spinner_text=None):
|
||||||
content_str = content
|
content_str = content
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
# Handle list of content blocks (Anthropic format)
|
# Handle list of content blocks (Anthropic format)
|
||||||
text_parts = []
|
text_parts: list[str] = []
|
||||||
for item in content:
|
for item in content:
|
||||||
if isinstance(item, dict):
|
if isinstance(item, dict):
|
||||||
if item.get("type") == "text":
|
if item.get("type") == "text":
|
||||||
|
|
@ -396,7 +401,7 @@ def update_display(layout, spinner_text=None):
|
||||||
layout["footer"].update(Panel(stats_table, border_style="grey50"))
|
layout["footer"].update(Panel(stats_table, border_style="grey50"))
|
||||||
|
|
||||||
|
|
||||||
def get_user_selections():
|
def get_user_selections() -> dict[str, str | int | list[AnalystType]]:
|
||||||
"""Get all user selections before starting the analysis display."""
|
"""Get all user selections before starting the analysis display."""
|
||||||
# Display ASCII art welcome message
|
# Display ASCII art welcome message
|
||||||
with open("./cli/static/welcome.txt") as f:
|
with open("./cli/static/welcome.txt") as f:
|
||||||
|
|
@ -698,7 +703,7 @@ def extract_content_string(content):
|
||||||
return content
|
return content
|
||||||
elif isinstance(content, list):
|
elif isinstance(content, list):
|
||||||
# Handle Anthropic's list format
|
# Handle Anthropic's list format
|
||||||
text_parts = []
|
text_parts: list[str] = []
|
||||||
for item in content:
|
for item in content:
|
||||||
if isinstance(item, dict):
|
if isinstance(item, dict):
|
||||||
if item.get("type") == "text":
|
if item.get("type") == "text":
|
||||||
|
|
@ -717,23 +722,51 @@ def run_analysis():
|
||||||
selections = get_user_selections()
|
selections = get_user_selections()
|
||||||
|
|
||||||
# Create config with selected research depth
|
# Create config with selected research depth
|
||||||
|
research_depth = selections["research_depth"]
|
||||||
|
shallow_thinker = selections["shallow_thinker"]
|
||||||
|
deep_thinker = selections["deep_thinker"]
|
||||||
|
backend_url = selections["backend_url"]
|
||||||
|
llm_provider = selections["llm_provider"]
|
||||||
|
|
||||||
config = TradingAgentsConfig(
|
config = TradingAgentsConfig(
|
||||||
max_debate_rounds=selections["research_depth"],
|
max_debate_rounds=research_depth if isinstance(research_depth, int) else 1,
|
||||||
max_risk_discuss_rounds=selections["research_depth"],
|
max_risk_discuss_rounds=research_depth
|
||||||
quick_think_llm=selections["shallow_thinker"],
|
if isinstance(research_depth, int)
|
||||||
deep_think_llm=selections["deep_thinker"],
|
else 1,
|
||||||
backend_url=selections["backend_url"],
|
quick_think_llm=shallow_thinker
|
||||||
llm_provider=selections["llm_provider"].lower(),
|
if isinstance(shallow_thinker, str)
|
||||||
|
else "gpt-4o-mini",
|
||||||
|
deep_think_llm=deep_thinker if isinstance(deep_thinker, str) else "o4-mini",
|
||||||
|
backend_url=backend_url
|
||||||
|
if isinstance(backend_url, str)
|
||||||
|
else "https://api.openai.com/v1",
|
||||||
|
llm_provider=cast(
|
||||||
|
"Literal['openai', 'anthropic', 'google', 'ollama', 'openrouter']",
|
||||||
|
"openai"
|
||||||
|
if not isinstance(llm_provider, str)
|
||||||
|
else (
|
||||||
|
llm_provider.lower()
|
||||||
|
if llm_provider.lower()
|
||||||
|
in ["openai", "anthropic", "google", "ollama", "openrouter"]
|
||||||
|
else "openai"
|
||||||
|
),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize the graph
|
# Initialize the graph
|
||||||
graph = TradingAgentsGraph(
|
analysts_list = selections["analysts"]
|
||||||
[analyst.value for analyst in selections["analysts"]], config=config, debug=True
|
if isinstance(analysts_list, list):
|
||||||
)
|
analyst_values = [analyst.value for analyst in analysts_list]
|
||||||
|
else:
|
||||||
|
analyst_values = []
|
||||||
|
|
||||||
|
graph = TradingAgentsGraph(analyst_values, config=config, debug=True)
|
||||||
|
|
||||||
# Create result directory
|
# Create result directory
|
||||||
results_dir = (
|
results_dir = (
|
||||||
Path(config.results_dir) / selections["ticker"] / selections["analysis_date"]
|
Path(config.results_dir)
|
||||||
|
/ str(selections["ticker"])
|
||||||
|
/ str(selections["analysis_date"])
|
||||||
)
|
)
|
||||||
results_dir.mkdir(parents=True, exist_ok=True)
|
results_dir.mkdir(parents=True, exist_ok=True)
|
||||||
report_dir = results_dir / "reports"
|
report_dir = results_dir / "reports"
|
||||||
|
|
@ -805,9 +838,14 @@ def run_analysis():
|
||||||
message_buffer.add_message(
|
message_buffer.add_message(
|
||||||
"System", f"Analysis date: {selections['analysis_date']}"
|
"System", f"Analysis date: {selections['analysis_date']}"
|
||||||
)
|
)
|
||||||
|
analysts_list = selections["analysts"]
|
||||||
|
if isinstance(analysts_list, list):
|
||||||
|
analysts_str = ", ".join(analyst.value for analyst in analysts_list)
|
||||||
|
else:
|
||||||
|
analysts_str = "None"
|
||||||
message_buffer.add_message(
|
message_buffer.add_message(
|
||||||
"System",
|
"System",
|
||||||
f"Selected analysts: {', '.join(analyst.value for analyst in selections['analysts'])}",
|
f"Selected analysts: {analysts_str}",
|
||||||
)
|
)
|
||||||
update_display(layout)
|
update_display(layout)
|
||||||
|
|
||||||
|
|
@ -822,7 +860,11 @@ def run_analysis():
|
||||||
message_buffer.final_report = None
|
message_buffer.final_report = None
|
||||||
|
|
||||||
# Update agent status to in_progress for the first analyst
|
# Update agent status to in_progress for the first analyst
|
||||||
first_analyst = f"{selections['analysts'][0].value.capitalize()} Analyst"
|
analysts_list = selections["analysts"]
|
||||||
|
if isinstance(analysts_list, list) and len(analysts_list) > 0:
|
||||||
|
first_analyst = f"{analysts_list[0].value.capitalize()} Analyst"
|
||||||
|
else:
|
||||||
|
first_analyst = "Market Analyst"
|
||||||
message_buffer.update_agent_status(first_analyst, "in_progress")
|
message_buffer.update_agent_status(first_analyst, "in_progress")
|
||||||
update_display(layout)
|
update_display(layout)
|
||||||
|
|
||||||
|
|
@ -834,7 +876,7 @@ def run_analysis():
|
||||||
|
|
||||||
# Initialize state and get graph args
|
# Initialize state and get graph args
|
||||||
init_agent_state = graph.propagator.create_initial_state(
|
init_agent_state = graph.propagator.create_initial_state(
|
||||||
selections["ticker"], selections["analysis_date"]
|
str(selections["ticker"]), str(selections["analysis_date"])
|
||||||
)
|
)
|
||||||
args = graph.propagator.get_graph_args()
|
args = graph.propagator.get_graph_args()
|
||||||
|
|
||||||
|
|
@ -877,7 +919,11 @@ def run_analysis():
|
||||||
)
|
)
|
||||||
message_buffer.update_agent_status("Market Analyst", "completed")
|
message_buffer.update_agent_status("Market Analyst", "completed")
|
||||||
# Set next analyst to in_progress
|
# Set next analyst to in_progress
|
||||||
if "social" in selections["analysts"]:
|
analysts_list = selections["analysts"]
|
||||||
|
if (
|
||||||
|
isinstance(analysts_list, list)
|
||||||
|
and AnalystType.SOCIAL in analysts_list
|
||||||
|
):
|
||||||
message_buffer.update_agent_status(
|
message_buffer.update_agent_status(
|
||||||
"Social Analyst", "in_progress"
|
"Social Analyst", "in_progress"
|
||||||
)
|
)
|
||||||
|
|
@ -888,7 +934,11 @@ def run_analysis():
|
||||||
)
|
)
|
||||||
message_buffer.update_agent_status("Social Analyst", "completed")
|
message_buffer.update_agent_status("Social Analyst", "completed")
|
||||||
# Set next analyst to in_progress
|
# Set next analyst to in_progress
|
||||||
if "news" in selections["analysts"]:
|
analysts_list = selections["analysts"]
|
||||||
|
if (
|
||||||
|
isinstance(analysts_list, list)
|
||||||
|
and AnalystType.NEWS in analysts_list
|
||||||
|
):
|
||||||
message_buffer.update_agent_status(
|
message_buffer.update_agent_status(
|
||||||
"News Analyst", "in_progress"
|
"News Analyst", "in_progress"
|
||||||
)
|
)
|
||||||
|
|
@ -899,7 +949,11 @@ def run_analysis():
|
||||||
)
|
)
|
||||||
message_buffer.update_agent_status("News Analyst", "completed")
|
message_buffer.update_agent_status("News Analyst", "completed")
|
||||||
# Set next analyst to in_progress
|
# Set next analyst to in_progress
|
||||||
if "fundamentals" in selections["analysts"]:
|
analysts_list = selections["analysts"]
|
||||||
|
if (
|
||||||
|
isinstance(analysts_list, list)
|
||||||
|
and AnalystType.FUNDAMENTALS in analysts_list
|
||||||
|
):
|
||||||
message_buffer.update_agent_status(
|
message_buffer.update_agent_status(
|
||||||
"Fundamentals Analyst", "in_progress"
|
"Fundamentals Analyst", "in_progress"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,7 @@ markers = [
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = [
|
||||||
|
"pyrefly>=0.28.1",
|
||||||
"pytest>=8.4.1",
|
"pytest>=8.4.1",
|
||||||
"pytest-asyncio>=1.1.0",
|
"pytest-asyncio>=1.1.0",
|
||||||
"pytest-cov>=6.2.1",
|
"pytest-cov>=6.2.1",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
python-version = "3.13.0"
|
||||||
|
search_path = ["."] # Changed from src-roots
|
||||||
|
|
||||||
|
[errors]
|
||||||
|
import-error = "warn"
|
||||||
|
|
@ -7,6 +7,5 @@
|
||||||
"reportMissingTypeStubs": false,
|
"reportMissingTypeStubs": false,
|
||||||
"useLibraryCodeForTypes": true,
|
"useLibraryCodeForTypes": true,
|
||||||
"autoSearchPaths": true,
|
"autoSearchPaths": true,
|
||||||
"extraPaths": [],
|
"extraPaths": []
|
||||||
"stubPath": "typings"
|
|
||||||
}
|
}
|
||||||
|
|
@ -49,7 +49,10 @@ def temp_data_dir():
|
||||||
"""Temporary directory for testing real repository persistence."""
|
"""Temporary directory for testing real repository persistence."""
|
||||||
temp_dir = tempfile.mkdtemp()
|
temp_dir = tempfile.mkdtemp()
|
||||||
yield temp_dir
|
yield temp_dir
|
||||||
shutil.rmtree(temp_dir)
|
try:
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
except OSError:
|
||||||
|
pass # Directory might already be deleted
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
Tests for Google News RSS feed client using pytest-vcr.
|
Tests for Google News RSS feed client using pytest-vcr.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from unittest.mock import Mock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
|
|
@ -139,9 +139,9 @@ class TestGoogleNewsClient:
|
||||||
mock_entry.id = "guid-456"
|
mock_entry.id = "guid-456"
|
||||||
|
|
||||||
# Should use current time as fallback
|
# Should use current time as fallback
|
||||||
before = datetime.utcnow()
|
before = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||||
article = client._parse_feed_entry(mock_entry)
|
article = client._parse_feed_entry(mock_entry)
|
||||||
after = datetime.utcnow()
|
after = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||||
|
|
||||||
assert before <= article.published <= after
|
assert before <= article.published <= after
|
||||||
assert article.title == "Breaking News"
|
assert article.title == "Breaking News"
|
||||||
|
|
@ -259,7 +259,7 @@ class TestGoogleNewsClient:
|
||||||
GoogleNewsArticle(
|
GoogleNewsArticle(
|
||||||
title="Tech News",
|
title="Tech News",
|
||||||
link="https://tech.com",
|
link="https://tech.com",
|
||||||
published=datetime.utcnow(),
|
published=datetime.now(timezone.utc).replace(tzinfo=None),
|
||||||
summary="Tech summary",
|
summary="Tech summary",
|
||||||
source="TechSite",
|
source="TechSite",
|
||||||
guid="tech-1",
|
guid="tech-1",
|
||||||
|
|
@ -351,7 +351,7 @@ class TestIntegrationScenarios:
|
||||||
successful_article = GoogleNewsArticle(
|
successful_article = GoogleNewsArticle(
|
||||||
title="Success",
|
title="Success",
|
||||||
link="https://success.com",
|
link="https://success.com",
|
||||||
published=datetime.utcnow(),
|
published=datetime.now(timezone.utc).replace(tzinfo=None),
|
||||||
summary="Successful fetch",
|
summary="Successful fetch",
|
||||||
source="GoodSource",
|
source="GoodSource",
|
||||||
guid="success-1",
|
guid="success-1",
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,6 @@ from unittest.mock import Mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
# Import mock ScrapeResult from conftest to avoid newspaper3k import issues
|
|
||||||
from conftest import ScrapeResult
|
|
||||||
|
|
||||||
from tradingagents.domains.news.news_repository import (
|
from tradingagents.domains.news.news_repository import (
|
||||||
NewsData,
|
NewsData,
|
||||||
)
|
)
|
||||||
|
|
@ -26,6 +23,9 @@ from tradingagents.domains.news.news_service import (
|
||||||
SentimentScore,
|
SentimentScore,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Import mock ScrapeResult from conftest to avoid newspaper3k import issues
|
||||||
|
from ...conftest import ScrapeResult
|
||||||
|
|
||||||
|
|
||||||
class TestNewsServiceCollaboratorInteractions:
|
class TestNewsServiceCollaboratorInteractions:
|
||||||
"""Test NewsService interactions with its collaborators (I/O boundaries)."""
|
"""Test NewsService interactions with its collaborators (I/O boundaries)."""
|
||||||
|
|
|
||||||
14992
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_barchart.yaml
vendored
Normal file
14992
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_barchart.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
8281
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_bloomberg.yaml
vendored
Normal file
8281
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_bloomberg.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
4142
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_business_insider.yaml
vendored
Normal file
4142
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_business_insider.yaml
vendored
Normal file
File diff suppressed because it is too large
Load Diff
11491
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_cnbc.yaml
vendored
Normal file
11491
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_cnbc.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
4715
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_forbes.yaml
vendored
Normal file
4715
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_forbes.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
293
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_marketwatch.yaml
vendored
Normal file
293
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_marketwatch.yaml
vendored
Normal file
|
|
@ -0,0 +1,293 @@
|
||||||
|
interactions:
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: HEAD
|
||||||
|
uri: https://www.marketwatch.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: ''
|
||||||
|
headers:
|
||||||
|
Age:
|
||||||
|
- '9'
|
||||||
|
Cache-Control:
|
||||||
|
- max-age=15,s-maxage=15,stale-while-revalidate=3600
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- text/html; charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:48 GMT
|
||||||
|
ETag:
|
||||||
|
- '"16541uq28i8d2eu"'
|
||||||
|
Referrer-Policy:
|
||||||
|
- strict-origin-when-cross-origin
|
||||||
|
Strict-Transport-Security:
|
||||||
|
- max-age=63072000; includeSubDomains; preload
|
||||||
|
Vary:
|
||||||
|
- Accept-Encoding
|
||||||
|
Via:
|
||||||
|
- 1.1 9158fa1ac72d0c0684fe558c8655aeda.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- WnxOaO_X9Ha6Bl1kttceeANQOWPglhx6yji3g-trhzE8DzqOV5ShLg==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P6
|
||||||
|
X-Cache:
|
||||||
|
- Hit from cloudfront
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
X-XSS-Protection:
|
||||||
|
- 1; mode=block
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: HEAD
|
||||||
|
uri: https://www.marketwatch.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: ''
|
||||||
|
headers:
|
||||||
|
Age:
|
||||||
|
- '9'
|
||||||
|
Cache-Control:
|
||||||
|
- max-age=15,s-maxage=15,stale-while-revalidate=3600
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- text/html; charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:48 GMT
|
||||||
|
ETag:
|
||||||
|
- '"16541uq28i8d2eu"'
|
||||||
|
Referrer-Policy:
|
||||||
|
- strict-origin-when-cross-origin
|
||||||
|
Strict-Transport-Security:
|
||||||
|
- max-age=63072000; includeSubDomains; preload
|
||||||
|
Vary:
|
||||||
|
- Accept-Encoding
|
||||||
|
Via:
|
||||||
|
- 1.1 b7258653b42aa6de9758e92b2878c108.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- KY6PMh_UdRBvwvw2_X8DiocGroAMWN-H5Yvm3XQ9lsgR-x9QzNq_fg==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P6
|
||||||
|
X-Cache:
|
||||||
|
- Hit from cloudfront
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
X-XSS-Protection:
|
||||||
|
- 1; mode=block
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Range:
|
||||||
|
- bytes=0-100
|
||||||
|
method: GET
|
||||||
|
uri: https://www.marketwatch.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>marketwatch.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMAjSPVV_1oLcoA1V1s-g=='',''hsh'':''D428D51E28968797BC27FB9153435D'',''t'':''fe'',''qp'':'''',''s'':47891,''e'':''790151f692f4dd9eb3ed40b035ea9a8d35d12abd8085f8d73942e7cd7d933885'',''host'':''geo.captcha-delivery.com'',''cookie'':''iOk_wpxMHnH24N9G8907~eT~5C2Erb1E3lwtqPUrELHb7b329G6JXl8C5OMaY_Ocly5R3Ho1bjdZK266cwBD~IokhajgO5aZ0EVCkAvVh1gDdEo84pnYC2rrU2IYI6sI''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '743'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:57 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 b7258653b42aa6de9758e92b2878c108.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- eaigBLeIFd3UqzVz9ugedb3xWFMCpbwKkCXQentj0NT88W-bG-mwBQ==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P6
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=iOk_wpxMHnH24N9G8907~eT~5C2Erb1E3lwtqPUrELHb7b329G6JXl8C5OMaY_Ocly5R3Ho1bjdZK266cwBD~IokhajgO5aZ0EVCkAvVh1gDdEo84pnYC2rrU2IYI6sI;
|
||||||
|
Max-Age=31536000; Domain=.marketwatch.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMAjSPVV_1oLcoA1V1s-g==
|
||||||
|
x-dd-b:
|
||||||
|
- '1'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: GET
|
||||||
|
uri: https://www.marketwatch.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>marketwatch.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMAjSPVV_1oLcoA1V1s-g=='',''hsh'':''D428D51E28968797BC27FB9153435D'',''t'':''fe'',''qp'':'''',''s'':47891,''e'':''790151f692f4dd9eb3ed40b035ea9a8d7390bea2e91fcde9893303bba5852699'',''host'':''geo.captcha-delivery.com'',''cookie'':''h0EmeS4jcwCiFsI~oVw3c2mMI7n02GLQFd0ohSnM6f_YL58eqz3UNqF~~FpnXKpjPSL0JLWPXzCj9awouSFxvoaV6WTc9NhWhl3yRrRWlj0M3zxqFklIXCrYX76b12It''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '743'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:57 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 65c7ccdbbbb8463f3d45d2d76098350e.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- CUcbO9OnOzfBgRtFzyBkqgQ59cql-jWQoYl7wy9mHDHkVHxWroAhBQ==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P6
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=h0EmeS4jcwCiFsI~oVw3c2mMI7n02GLQFd0ohSnM6f_YL58eqz3UNqF~~FpnXKpjPSL0JLWPXzCj9awouSFxvoaV6WTc9NhWhl3yRrRWlj0M3zxqFklIXCrYX76b12It;
|
||||||
|
Max-Age=31536000; Domain=.marketwatch.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMAjSPVV_1oLcoA1V1s-g==
|
||||||
|
x-dd-b:
|
||||||
|
- '1'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: GET
|
||||||
|
uri: https://www.marketwatch.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>marketwatch.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMAjSPVV_1oLcoA1V1s-g=='',''hsh'':''D428D51E28968797BC27FB9153435D'',''t'':''fe'',''qp'':'''',''s'':47891,''e'':''790151f692f4dd9eb3ed40b035ea9a8d2e67c62e5c3d0cb9374648bf7b1df7fc'',''host'':''geo.captcha-delivery.com'',''cookie'':''2yiKpqoh3ChnNVUjdsQLQa2Bf5zaYI_Ey7DiwJtN6AS09YpimYFAF6jRXfj1rB6M4h7ivw3lIhHdWcVNDBObUnTTOP26otua83leFBUM3rlDzEgiFwvNKnfwWptcP_od''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '743'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:57 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 32301bfd0e3b06c528ccd8abdb13411e.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- xCjoAKCautAdoyEWF6gWh395azLOJk4U8cqvVkcrKFkEUrtmgUcCxQ==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P6
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=2yiKpqoh3ChnNVUjdsQLQa2Bf5zaYI_Ey7DiwJtN6AS09YpimYFAF6jRXfj1rB6M4h7ivw3lIhHdWcVNDBObUnTTOP26otua83leFBUM3rlDzEgiFwvNKnfwWptcP_od;
|
||||||
|
Max-Age=31536000; Domain=.marketwatch.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMAjSPVV_1oLcoA1V1s-g==
|
||||||
|
x-dd-b:
|
||||||
|
- '1'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
version: 1
|
||||||
344
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_reuters.yaml
vendored
Normal file
344
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_reuters.yaml
vendored
Normal file
|
|
@ -0,0 +1,344 @@
|
||||||
|
interactions:
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: HEAD
|
||||||
|
uri: https://www.reuters.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: ''
|
||||||
|
headers:
|
||||||
|
Cache-Control:
|
||||||
|
- private, max-age=60
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Length:
|
||||||
|
- '141231'
|
||||||
|
Content-Security-Policy:
|
||||||
|
- frame-ancestors 'self'; report-uri https://reuters.report-uri.com/r/t/csp/enforce;
|
||||||
|
report-to report-uri
|
||||||
|
Content-Type:
|
||||||
|
- text/html; charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:57 GMT
|
||||||
|
ETag:
|
||||||
|
- W/"bee6e-P0/5FwvNlv4e0VsbLNb+vjlAAEY"
|
||||||
|
Expires:
|
||||||
|
- Sat, 16 Aug 2025 09:26:57 GMT
|
||||||
|
Last-Modified:
|
||||||
|
- Sat, 16 Aug 2025 09:25:27 GMT
|
||||||
|
MPULSE_CDN_CACHE:
|
||||||
|
- HIT
|
||||||
|
MPULSE_ORIGIN_TIME:
|
||||||
|
- '0'
|
||||||
|
Report-To:
|
||||||
|
- '{"endpoints":[{"url":"https://reuters.report-uri.com/a/t/g"}],"group":"report-uri","include_subdomains":true,"max_age":31536000}'
|
||||||
|
Server:
|
||||||
|
- openresty
|
||||||
|
Server-Timing:
|
||||||
|
- ak_p; desc="1755336357935_34831709_40131180_29_7877_1_0_-";dur=1
|
||||||
|
Set-Cookie:
|
||||||
|
- reuters-geo={"country":"-", "region":"-"}; path=/; secure
|
||||||
|
Strict-Transport-Security:
|
||||||
|
- max-age=31536000
|
||||||
|
Vary:
|
||||||
|
- Accept-Encoding
|
||||||
|
Via:
|
||||||
|
- 1.1 23776effa8a63b2e2dccd702e73b0c86.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- rMcLvpwjqiBqwcqb1GFwi1tfSST9qPKTcbN11SQTTwvoBtnMe7Craw==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS54-C1
|
||||||
|
X-Cache:
|
||||||
|
- Miss from cloudfront
|
||||||
|
x-arc-pb-mx-id:
|
||||||
|
- '00000000'
|
||||||
|
x-arc-pb-request-id:
|
||||||
|
- 19242937-e9db-4fde-8f07-1791a9202aed
|
||||||
|
- 15b77945-30be-4faa-9440-5a387918a1dd
|
||||||
|
x-arc-request-id:
|
||||||
|
- 0.5d7d1302.1755336357.2645a6c
|
||||||
|
x-arc-ttl:
|
||||||
|
- '120'
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: HEAD
|
||||||
|
uri: https://www.reuters.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: ''
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '739'
|
||||||
|
Content-Security-Policy:
|
||||||
|
- frame-ancestors 'self'; report-uri https://reuters.report-uri.com/r/t/csp/enforce;
|
||||||
|
report-to report-uri
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:58 GMT
|
||||||
|
Report-To:
|
||||||
|
- '{"endpoints":[{"url":"https://reuters.report-uri.com/a/t/g"}],"group":"report-uri","include_subdomains":true,"max_age":31536000}'
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 51d16867ea09d1b4c52eca0e090ad4a2.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- pK4jzbeRY6WcQ74bMVmG2g8TMoDL805RO8lqKkBIJZn2yqTPd9QFWQ==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS54-C1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=8JtQQOWicM1pWAuUGGsn3Z3aDHbLkSrazvz192M~cXEcXFnJ_k_YVv66DKGPh3yLVP26EgrnlmAWPc5Q4H9hYy8Ed12jn~nw5l4~8XBCveoNmfC96zfgHaXz0eypvbqm;
|
||||||
|
Max-Age=31536000; Domain=.reuters.com; Path=/; Secure; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMAzt9oZr8-sFUA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '2'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Range:
|
||||||
|
- bytes=0-100
|
||||||
|
method: GET
|
||||||
|
uri: https://www.reuters.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>reuters.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMAzt9oZr8-sFUA-2tpSg=='',''hsh'':''2013457ADA70C67D6A4123E0A76873'',''t'':''fe'',''qp'':'''',''s'':43909,''e'':''b8ed04ef439919d24cea1cdb4fb467bd8211a028a180bfec65a2baf8818a5653'',''host'':''geo.captcha-delivery.com'',''cookie'':''3K2lJwZebAA2ppdi3lBPVWPZ32bt6VWN3Nh5huAhYlmfUsYCTMtjT_SOm79DJn5k18LyFoHxZRO9bYk1JDAV5KBidmDlcBCz7dw9ZJCo8xsCLHGQxPgBlwjCVPiiuWo4''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '739'
|
||||||
|
Content-Security-Policy:
|
||||||
|
- frame-ancestors 'self'; report-uri https://reuters.report-uri.com/r/t/csp/enforce;
|
||||||
|
report-to report-uri
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:58 GMT
|
||||||
|
Report-To:
|
||||||
|
- '{"endpoints":[{"url":"https://reuters.report-uri.com/a/t/g"}],"group":"report-uri","include_subdomains":true,"max_age":31536000}'
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 630336d6cdf08cf266841fd503dc03d0.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- Sc-7yH3FKvJORjvEQgeWQ9Lo1ipQ3WNd6O5R9q69BJzoL5CrcmCypA==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS54-C1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=3K2lJwZebAA2ppdi3lBPVWPZ32bt6VWN3Nh5huAhYlmfUsYCTMtjT_SOm79DJn5k18LyFoHxZRO9bYk1JDAV5KBidmDlcBCz7dw9ZJCo8xsCLHGQxPgBlwjCVPiiuWo4;
|
||||||
|
Max-Age=31536000; Domain=.reuters.com; Path=/; Secure; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMAzt9oZr8-sFUA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '1'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: GET
|
||||||
|
uri: https://www.reuters.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>reuters.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMAzt9oZr8-sFUA-2tpSg=='',''hsh'':''2013457ADA70C67D6A4123E0A76873'',''t'':''bv'',''qp'':'''',''s'':43909,''e'':''b8ed04ef439919d24cea1cdb4fb467bde05105d2c0de62249e9b88f593a72010'',''host'':''geo.captcha-delivery.com'',''cookie'':''9YZ3B5U3HGUH5O8yQPRxDY_I84AQdEPfbW1RTNsrCYwL661Y5WVC_c5ppLf0Q~GMQ_jOwBPVKPWFXIXM1BIJ_mSYJxktC3Js7Jm5UlC5AT4IlqviZX_Aj_ICqmCVPlLG''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '739'
|
||||||
|
Content-Security-Policy:
|
||||||
|
- frame-ancestors 'self'; report-uri https://reuters.report-uri.com/r/t/csp/enforce;
|
||||||
|
report-to report-uri
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:58 GMT
|
||||||
|
Report-To:
|
||||||
|
- '{"endpoints":[{"url":"https://reuters.report-uri.com/a/t/g"}],"group":"report-uri","include_subdomains":true,"max_age":31536000}'
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 4445c4223f8c2460ef5d29a08d1cc6ac.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- rhlp66QzFyLoY7XY79SzyPG2gFlMU8_SchOyI9iPokXuZ62I06AsHw==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS54-C1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=9YZ3B5U3HGUH5O8yQPRxDY_I84AQdEPfbW1RTNsrCYwL661Y5WVC_c5ppLf0Q~GMQ_jOwBPVKPWFXIXM1BIJ_mSYJxktC3Js7Jm5UlC5AT4IlqviZX_Aj_ICqmCVPlLG;
|
||||||
|
Max-Age=31536000; Domain=.reuters.com; Path=/; Secure; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMAzt9oZr8-sFUA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '2'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: GET
|
||||||
|
uri: https://www.reuters.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>reuters.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMAzt9oZr8-sFUA-2tpSg=='',''hsh'':''2013457ADA70C67D6A4123E0A76873'',''t'':''bv'',''qp'':'''',''s'':43909,''e'':''b8ed04ef439919d24cea1cdb4fb467bdb6cc88d6664522bbc3c933b568f504a7'',''host'':''geo.captcha-delivery.com'',''cookie'':''XmvCV5uyqrYoNpGAsMA0e_UcvVMDG8liKq3eEu2VT1ZMRQ9SIcMrtslwcZsWMja68FYKidccHstjCgAEFqEjrCVeAwaDIyvcYAgeRRTQkDIb3mkrMK028Ov5jLBbnQe5''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '739'
|
||||||
|
Content-Security-Policy:
|
||||||
|
- frame-ancestors 'self'; report-uri https://reuters.report-uri.com/r/t/csp/enforce;
|
||||||
|
report-to report-uri
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:25:58 GMT
|
||||||
|
Report-To:
|
||||||
|
- '{"endpoints":[{"url":"https://reuters.report-uri.com/a/t/g"}],"group":"report-uri","include_subdomains":true,"max_age":31536000}'
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 b911c551065b8f78ad33b4c4564141be.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- mFvfnCJ2-GyPL5e2wMascrxVhUnX3uzIFJUrShkzGI9GQvR-JnIgkA==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS54-C1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=XmvCV5uyqrYoNpGAsMA0e_UcvVMDG8liKq3eEu2VT1ZMRQ9SIcMrtslwcZsWMja68FYKidccHstjCgAEFqEjrCVeAwaDIyvcYAgeRRTQkDIb3mkrMK028Ov5jLBbnQe5;
|
||||||
|
Max-Age=31536000; Domain=.reuters.com; Path=/; Secure; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMAzt9oZr8-sFUA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '2'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
version: 1
|
||||||
12511
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_seeking_alpha.yaml
vendored
Normal file
12511
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_seeking_alpha.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
5047
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_tip_ranks.yaml
vendored
Normal file
5047
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_tip_ranks.yaml
vendored
Normal file
File diff suppressed because it is too large
Load Diff
19068
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_with_404.yaml
vendored
Normal file
19068
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_with_404.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
311
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_wsj.yaml
vendored
Normal file
311
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_wsj.yaml
vendored
Normal file
|
|
@ -0,0 +1,311 @@
|
||||||
|
interactions:
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: HEAD
|
||||||
|
uri: https://www.wsj.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: ''
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '738'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:26:01 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 b10069b378f22e10f0382c21d0a9578e.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- vNjsXH59e05LYupXAQP8wjsmyHxzz-9AgDvoQjXSJPCIPuCZ0dBjmw==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=Jokk81LE2F1PFmL49~2Hz1XFEPuU3trSD8yy3oY9HhQrvX49vlvozDmpO1xm5rzmH7KysWhEzsm3K64J3Xym3x_9DTRW_Qlx~tZZ13Ar5t52hyQ~YTnIZbE2WTfGd50E;
|
||||||
|
Max-Age=31536000; Domain=.wsj.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '3'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: HEAD
|
||||||
|
uri: https://www.wsj.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: ''
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '738'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:26:01 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 e3d9ae12f22103dbc65c451ae520a012.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- TCVqPkT0SuaKHJDgEuXTbHBAVs9cPgYi5yq9rFnM9qCTYBsdwFqJhg==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=5dabMne~GgdgdC4hjN~Pf8UK5u4hPTPHIDrFbWUwlbZGpiqoI9pQ4NTSbFnqX2qU3tv6miL7f1M6prrza0Kjy70kLYml_UTVjQ4nuupttmO6bitno123X4Xamfe1wfYc;
|
||||||
|
Max-Age=31536000; Domain=.wsj.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '3'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Range:
|
||||||
|
- bytes=0-100
|
||||||
|
method: GET
|
||||||
|
uri: https://www.wsj.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>wsj.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg=='',''hsh'':''D428D51E28968797BC27FB9153435D'',''t'':''fe'',''qp'':'''',''s'':47129,''e'':''b8ed04ef439919d24cea1cdb4fb467bdbb2efee457f880544ee0925aa5e0d1ad'',''host'':''geo.captcha-delivery.com'',''cookie'':''SGkTf3XHVKfP1gixZQyqVkS8UPZZnrbyAziyp9qpp86H1QADEXGHFgDFnZe3c8DWeUvPxyz5Tr2OUeFUq9yz3nk1i_j6in~56ebQURowQOvKkakNszCb59lxhSa~Dvvf''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '735'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:26:01 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 9bc84c94880403a2bdfe0bc8f1800e4e.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- AOY5ZCyfptVbxbE7LUG_P-1GJtrX6edhh1L0-NHcOlQqdC4oAE7_Ug==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=SGkTf3XHVKfP1gixZQyqVkS8UPZZnrbyAziyp9qpp86H1QADEXGHFgDFnZe3c8DWeUvPxyz5Tr2OUeFUq9yz3nk1i_j6in~56ebQURowQOvKkakNszCb59lxhSa~Dvvf;
|
||||||
|
Max-Age=31536000; Domain=.wsj.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '1'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: GET
|
||||||
|
uri: https://www.wsj.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>wsj.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg=='',''hsh'':''D428D51E28968797BC27FB9153435D'',''t'':''fe'',''qp'':'''',''s'':47129,''e'':''b8ed04ef439919d24cea1cdb4fb467bd911c4f88e9533e789473c50f99ea32e5'',''host'':''geo.captcha-delivery.com'',''cookie'':''qRo9XDdsx5fpzzyXlGiW43b1IneHZVAau~NHsU5eCDnTllnDaxmJPjJkM0JB_NF2VK4qtZvoi2j62WbXLLOmR1z9O~8IruEALYMMy6Ar8Mxpv6kvtNiHS4mtDAQxC14E''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '735'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:26:01 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 6592b72953c66e8c26c29c332cf2edf0.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- _1yHeaiqkvfST2UXw4JBoQdokr2e1AbNyFCdD9Q85fUMfAS3WC8V3g==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=qRo9XDdsx5fpzzyXlGiW43b1IneHZVAau~NHsU5eCDnTllnDaxmJPjJkM0JB_NF2VK4qtZvoi2j62WbXLLOmR1z9O~8IruEALYMMy6Ar8Mxpv6kvtNiHS4mtDAQxC14E;
|
||||||
|
Max-Age=31536000; Domain=.wsj.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '1'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
- request:
|
||||||
|
body: null
|
||||||
|
headers:
|
||||||
|
Accept:
|
||||||
|
- '*/*'
|
||||||
|
Accept-Encoding:
|
||||||
|
- gzip, deflate
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
method: GET
|
||||||
|
uri: https://www.wsj.com/
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: '<html lang="en"><head><title>wsj.com</title><style>#cmsg{animation:
|
||||||
|
A 1.5s;}@keyframes A{0%{opacity:0;}99%{opacity:0;}100%{opacity:1;}}</style></head><body
|
||||||
|
style="margin:0"><p id="cmsg">Please enable JS and disable any ad blocker</p><script
|
||||||
|
data-cfasync="false">var dd={''rt'':''c'',''cid'':''AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg=='',''hsh'':''D428D51E28968797BC27FB9153435D'',''t'':''fe'',''qp'':'''',''s'':47129,''e'':''b8ed04ef439919d24cea1cdb4fb467bd6ec4494f79fb008f6a406ee7318a4bfc'',''host'':''geo.captcha-delivery.com'',''cookie'':''P5IiQnsntc60sKxwU64s~u5k8q__8mi46dxCuKB6qjevDG~xsYakp07Tfn2m1Ofwwe3p5sYYa8kW7Z~gZ2xN09B0_B8gule7LijzHHfeg3CJDVuqpFNPJk_~o44XvPOC''}</script><script
|
||||||
|
data-cfasync="false" src="https://ct.captcha-delivery.com/c.js"></script></body></html>'
|
||||||
|
headers:
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Length:
|
||||||
|
- '735'
|
||||||
|
Content-Type:
|
||||||
|
- text/html;charset=utf-8
|
||||||
|
Date:
|
||||||
|
- Sat, 16 Aug 2025 09:26:01 GMT
|
||||||
|
Server:
|
||||||
|
- CloudFront
|
||||||
|
Via:
|
||||||
|
- 1.1 b10069b378f22e10f0382c21d0a9578e.cloudfront.net (CloudFront)
|
||||||
|
X-Amz-Cf-Id:
|
||||||
|
- 1uZRunhOeCQSxhvodKml5TxhuZAJuIbcRAJN-NRsjZOFg_KI40UEPA==
|
||||||
|
X-Amz-Cf-Pop:
|
||||||
|
- AMS58-P1
|
||||||
|
X-Cache:
|
||||||
|
- LambdaGeneratedResponse from cloudfront
|
||||||
|
accept-ch:
|
||||||
|
- Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
||||||
|
access-control-allow-credentials:
|
||||||
|
- 'true'
|
||||||
|
access-control-allow-origin:
|
||||||
|
- '*'
|
||||||
|
access-control-expose-headers:
|
||||||
|
- x-dd-b, x-set-cookie
|
||||||
|
cache-control:
|
||||||
|
- max-age=0, private, no-cache, no-store, must-revalidate
|
||||||
|
charset:
|
||||||
|
- utf-8
|
||||||
|
pragma:
|
||||||
|
- no-cache
|
||||||
|
set-cookie:
|
||||||
|
- datadome=P5IiQnsntc60sKxwU64s~u5k8q__8mi46dxCuKB6qjevDG~xsYakp07Tfn2m1Ofwwe3p5sYYa8kW7Z~gZ2xN09B0_B8gule7LijzHHfeg3CJDVuqpFNPJk_~o44XvPOC;
|
||||||
|
Max-Age=31536000; Domain=.wsj.com; Path=/; SameSite=Lax
|
||||||
|
x-datadome:
|
||||||
|
- protected
|
||||||
|
x-datadome-cid:
|
||||||
|
- AHrlqAAAAAMA0s2HZEKMSUAA-2tpSg==
|
||||||
|
x-dd-b:
|
||||||
|
- '1'
|
||||||
|
status:
|
||||||
|
code: 401
|
||||||
|
message: HTTP Forbidden
|
||||||
|
version: 1
|
||||||
65678
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_yahoo_finance.yaml
vendored
Normal file
65678
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_article_yahoo_finance.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
73225
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_multiple_financial_sites.yaml
vendored
Normal file
73225
tests/fixtures/vcr_cassettes/news/TestArticleScraperClient.test_scrape_multiple_financial_sites.yaml
vendored
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -13,19 +13,19 @@ class FinancialSituationMemory:
|
||||||
self.chroma_client = chromadb.Client(Settings(allow_reset=True))
|
self.chroma_client = chromadb.Client(Settings(allow_reset=True))
|
||||||
self.situation_collection = self.chroma_client.create_collection(name=name)
|
self.situation_collection = self.chroma_client.create_collection(name=name)
|
||||||
|
|
||||||
def get_embedding(self, text):
|
def get_embedding(self, text) -> list[float]:
|
||||||
"""Get OpenAI embedding for a text"""
|
"""Get OpenAI embedding for a text"""
|
||||||
|
|
||||||
response = self.client.embeddings.create(model=self.embedding, input=text)
|
response = self.client.embeddings.create(model=self.embedding, input=text)
|
||||||
return response.data[0].embedding
|
return response.data[0].embedding
|
||||||
|
|
||||||
def add_situations(self, situations_and_advice):
|
def add_situations(self, situations_and_advice):
|
||||||
"""Add financial situations and their corresponding advice. Parameter is a list of tuples (situation, rec)"""
|
"""Add financial situations and their corresponding advice. Parameter is a list of tuples (situation, rec)"""
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
situations = []
|
situations: list[str] = []
|
||||||
advice = []
|
advice: list[str] = []
|
||||||
ids = []
|
ids: list[str] = []
|
||||||
embeddings = []
|
embeddings: list[Any] = [] # ChromaDB expects flexible embedding types
|
||||||
|
|
||||||
offset = self.situation_collection.count()
|
offset = self.situation_collection.count()
|
||||||
|
|
||||||
|
|
@ -44,10 +44,11 @@ class FinancialSituationMemory:
|
||||||
|
|
||||||
def get_memories(self, current_situation, n_matches=1):
|
def get_memories(self, current_situation, n_matches=1):
|
||||||
"""Find matching recommendations using OpenAI embeddings"""
|
"""Find matching recommendations using OpenAI embeddings"""
|
||||||
|
|
||||||
query_embedding = self.get_embedding(current_situation)
|
query_embedding = self.get_embedding(current_situation)
|
||||||
|
|
||||||
results = self.situation_collection.query(
|
results = self.situation_collection.query(
|
||||||
query_embeddings=[query_embedding],
|
query_embeddings=[query_embedding], # type: ignore
|
||||||
n_results=n_matches,
|
n_results=n_matches,
|
||||||
include=["metadatas", "documents", "distances"],
|
include=["metadatas", "documents", "distances"],
|
||||||
)
|
)
|
||||||
|
|
@ -58,6 +59,8 @@ class FinancialSituationMemory:
|
||||||
and "documents" in results
|
and "documents" in results
|
||||||
and results["documents"]
|
and results["documents"]
|
||||||
and len(results["documents"]) > 0
|
and len(results["documents"]) > 0
|
||||||
|
and results["documents"][0] is not None
|
||||||
|
and len(results["documents"][0]) > 0
|
||||||
):
|
):
|
||||||
for i in range(len(results["documents"][0])):
|
for i in range(len(results["documents"][0])):
|
||||||
if (
|
if (
|
||||||
|
|
@ -70,6 +73,9 @@ class FinancialSituationMemory:
|
||||||
and len(results["distances"]) > 0
|
and len(results["distances"]) > 0
|
||||||
and i < len(results["distances"][0])
|
and i < len(results["distances"][0])
|
||||||
):
|
):
|
||||||
|
# Type checker satisfaction - we've already checked these exist
|
||||||
|
assert results["documents"] is not None
|
||||||
|
assert results["documents"][0] is not None
|
||||||
matched_results.append(
|
matched_results.append(
|
||||||
{
|
{
|
||||||
"matched_situation": results["documents"][0][i],
|
"matched_situation": results["documents"][0][i],
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ Yahoo Finance client for live market data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
@ -99,7 +99,9 @@ class YFinanceClient:
|
||||||
"source": "yahoo_finance",
|
"source": "yahoo_finance",
|
||||||
"record_count": len(records),
|
"record_count": len(records),
|
||||||
"columns": list(data.columns),
|
"columns": list(data.columns),
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -126,7 +128,9 @@ class YFinanceClient:
|
||||||
"info": info,
|
"info": info,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"source": "yahoo_finance",
|
"source": "yahoo_finance",
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -138,7 +142,9 @@ class YFinanceClient:
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"source": "yahoo_finance",
|
"source": "yahoo_finance",
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -177,7 +183,9 @@ class YFinanceClient:
|
||||||
"quarterly": {},
|
"quarterly": {},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"source": "yahoo_finance",
|
"source": "yahoo_finance",
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -209,7 +217,9 @@ class YFinanceClient:
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"source": "yahoo_finance",
|
"source": "yahoo_finance",
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@ Market data service that provides structured market context.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from typing import Any
|
from typing import Any, cast
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import talib
|
import talib
|
||||||
|
|
@ -18,6 +18,8 @@ from tradingagents.domains.marketdata.models import (
|
||||||
IndicatorParamValue,
|
IndicatorParamValue,
|
||||||
IndicatorPresets,
|
IndicatorPresets,
|
||||||
InputSpec,
|
InputSpec,
|
||||||
|
OutputSpec,
|
||||||
|
ParamRanges,
|
||||||
PriceDataContext,
|
PriceDataContext,
|
||||||
TAReportContext,
|
TAReportContext,
|
||||||
TechnicalAnalysisError,
|
TechnicalAnalysisError,
|
||||||
|
|
@ -125,7 +127,9 @@ class MarketDataService:
|
||||||
"service": "market_data",
|
"service": "market_data",
|
||||||
"record_count": len(price_data),
|
"record_count": len(price_data),
|
||||||
"source": "repository" if not df.empty else "client",
|
"source": "repository" if not df.empty else "client",
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
}
|
}
|
||||||
|
|
||||||
return PriceDataContext(
|
return PriceDataContext(
|
||||||
|
|
@ -153,7 +157,9 @@ class MarketDataService:
|
||||||
"data_quality": DataQuality.LOW.value,
|
"data_quality": DataQuality.LOW.value,
|
||||||
"service": "market_data",
|
"service": "market_data",
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -224,27 +230,23 @@ class MarketDataService:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create indicator config from the calculation
|
# Create indicator config from the calculation
|
||||||
|
definition = INDICATOR_DEFINITIONS.get(indicator.upper(), {})
|
||||||
indicator_config = IndicatorConfig(
|
indicator_config = IndicatorConfig(
|
||||||
name=indicator.upper(),
|
name=indicator.upper(),
|
||||||
parameters=indicator_data[0].parameters if indicator_data else {},
|
parameters=indicator_data[0].parameters if indicator_data else {},
|
||||||
input_types=INDICATOR_DEFINITIONS.get(indicator.upper(), {}).get(
|
input_types=cast(
|
||||||
"input_types", ["close"]
|
"list[InputSpec]", definition.get("input_types", ["close"])
|
||||||
),
|
),
|
||||||
output_format=INDICATOR_DEFINITIONS.get(indicator.upper(), {}).get(
|
output_format=cast(
|
||||||
"output_format", "single"
|
"OutputSpec", definition.get("output_format", "single")
|
||||||
),
|
),
|
||||||
param_ranges=INDICATOR_DEFINITIONS.get(indicator.upper(), {}).get(
|
param_ranges=cast("ParamRanges", definition.get("param_ranges", {})),
|
||||||
"param_ranges", {}
|
default_params=cast(
|
||||||
),
|
"dict[str, IndicatorParamValue]",
|
||||||
default_params=INDICATOR_DEFINITIONS.get(indicator.upper(), {}).get(
|
definition.get("default_params", {}),
|
||||||
"default_params", {}
|
|
||||||
),
|
|
||||||
talib_function=INDICATOR_DEFINITIONS.get(indicator.upper(), {}).get(
|
|
||||||
"talib_function", ""
|
|
||||||
),
|
|
||||||
description=INDICATOR_DEFINITIONS.get(indicator.upper(), {}).get(
|
|
||||||
"description", ""
|
|
||||||
),
|
),
|
||||||
|
talib_function=str(definition.get("talib_function", "")),
|
||||||
|
description=str(definition.get("description", "")),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate parameter summary
|
# Generate parameter summary
|
||||||
|
|
@ -265,7 +267,9 @@ class MarketDataService:
|
||||||
"data_quality": DataQuality.HIGH.value,
|
"data_quality": DataQuality.HIGH.value,
|
||||||
"service": "technical_analysis",
|
"service": "technical_analysis",
|
||||||
"indicator_count": len(indicator_data),
|
"indicator_count": len(indicator_data),
|
||||||
"retrieved_at": datetime.utcnow().isoformat(),
|
"retrieved_at": datetime.now(timezone.utc)
|
||||||
|
.replace(tzinfo=None)
|
||||||
|
.isoformat(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -308,19 +312,21 @@ class MarketDataService:
|
||||||
raise TechnicalAnalysisError(f"Unknown indicator: {indicator}")
|
raise TechnicalAnalysisError(f"Unknown indicator: {indicator}")
|
||||||
|
|
||||||
definition = INDICATOR_DEFINITIONS[indicator.upper()]
|
definition = INDICATOR_DEFINITIONS[indicator.upper()]
|
||||||
param_ranges = definition.get("param_ranges", {})
|
param_ranges = cast("ParamRanges", definition.get("param_ranges", {}))
|
||||||
|
|
||||||
for param_name, value in params.items():
|
for param_name, value in params.items():
|
||||||
if param_name in param_ranges:
|
if param_name in param_ranges:
|
||||||
min_val, max_val = param_ranges[param_name]
|
range_tuple = param_ranges[param_name]
|
||||||
if not isinstance(value, int | float):
|
if isinstance(range_tuple, tuple) and len(range_tuple) == 2:
|
||||||
raise TechnicalAnalysisError(
|
min_val, max_val = range_tuple
|
||||||
f"Parameter {param_name} must be numeric"
|
if not isinstance(value, int | float):
|
||||||
)
|
raise TechnicalAnalysisError(
|
||||||
if not (min_val <= value <= max_val):
|
f"Parameter {param_name} must be numeric"
|
||||||
raise TechnicalAnalysisError(
|
)
|
||||||
f"Parameter {param_name}={value} out of range [{min_val}, {max_val}]"
|
if not (min_val <= value <= max_val):
|
||||||
)
|
raise TechnicalAnalysisError(
|
||||||
|
f"Parameter {param_name}={value} out of range [{min_val}, {max_val}]"
|
||||||
|
)
|
||||||
|
|
||||||
def _prepare_price_arrays(
|
def _prepare_price_arrays(
|
||||||
self, price_data: list[dict[str, Any]], input_types: list[InputSpec]
|
self, price_data: list[dict[str, Any]], input_types: list[InputSpec]
|
||||||
|
|
@ -383,20 +389,26 @@ class MarketDataService:
|
||||||
# Use provided params or defaults
|
# Use provided params or defaults
|
||||||
final_params: dict[str, IndicatorParamValue]
|
final_params: dict[str, IndicatorParamValue]
|
||||||
if params is None:
|
if params is None:
|
||||||
final_params = definition["default_params"].copy()
|
final_params = cast(
|
||||||
|
"dict[str, IndicatorParamValue]", definition["default_params"]
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# Merge with defaults for missing parameters
|
# Merge with defaults for missing parameters
|
||||||
final_params = definition["default_params"].copy()
|
final_params = cast(
|
||||||
|
"dict[str, IndicatorParamValue]", definition["default_params"]
|
||||||
|
).copy()
|
||||||
final_params.update(params)
|
final_params.update(params)
|
||||||
|
|
||||||
# Validate parameters
|
# Validate parameters
|
||||||
self._validate_parameters(indicator, final_params)
|
self._validate_parameters(indicator, final_params)
|
||||||
|
|
||||||
# Prepare price arrays
|
# Prepare price arrays
|
||||||
arrays = self._prepare_price_arrays(price_data, definition["input_types"])
|
arrays = self._prepare_price_arrays(
|
||||||
|
price_data, cast("list[InputSpec]", definition["input_types"])
|
||||||
|
)
|
||||||
|
|
||||||
# Get TA-Lib function
|
# Get TA-Lib function
|
||||||
talib_func_name = definition["talib_function"].split(".")[
|
talib_func_name = str(definition["talib_function"]).split(".")[
|
||||||
-1
|
-1
|
||||||
] # Extract function name
|
] # Extract function name
|
||||||
talib_func = getattr(talib, talib_func_name)
|
talib_func = getattr(talib, talib_func_name)
|
||||||
|
|
@ -406,7 +418,7 @@ class MarketDataService:
|
||||||
func_kwargs = {}
|
func_kwargs = {}
|
||||||
|
|
||||||
# Add required price arrays based on input types
|
# Add required price arrays based on input types
|
||||||
for input_type in definition["input_types"]:
|
for input_type in list(definition["input_types"]):
|
||||||
if input_type == "close":
|
if input_type == "close":
|
||||||
func_args.append(arrays["close"])
|
func_args.append(arrays["close"])
|
||||||
elif input_type == "ohlc":
|
elif input_type == "ohlc":
|
||||||
|
|
@ -433,7 +445,7 @@ class MarketDataService:
|
||||||
# Process results based on output format
|
# Process results based on output format
|
||||||
result = []
|
result = []
|
||||||
dates = arrays["dates"]
|
dates = arrays["dates"]
|
||||||
output_format = definition["output_format"]
|
output_format = str(definition["output_format"])
|
||||||
|
|
||||||
if output_format == "single":
|
if output_format == "single":
|
||||||
# Single output array
|
# Single output array
|
||||||
|
|
@ -576,7 +588,8 @@ class MarketDataService:
|
||||||
def get_available_indicators(self) -> dict[str, str]:
|
def get_available_indicators(self) -> dict[str, str]:
|
||||||
"""Get list of all available indicators with descriptions."""
|
"""Get list of all available indicators with descriptions."""
|
||||||
return {
|
return {
|
||||||
name: info["description"] for name, info in INDICATOR_DEFINITIONS.items()
|
name: str(info["description"])
|
||||||
|
for name, info in INDICATOR_DEFINITIONS.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_available_presets(
|
def get_available_presets(
|
||||||
|
|
@ -612,13 +625,17 @@ class MarketDataService:
|
||||||
definition = INDICATOR_DEFINITIONS[indicator_upper]
|
definition = INDICATOR_DEFINITIONS[indicator_upper]
|
||||||
return IndicatorConfig(
|
return IndicatorConfig(
|
||||||
name=indicator_upper,
|
name=indicator_upper,
|
||||||
parameters=definition["default_params"],
|
parameters=cast(
|
||||||
input_types=definition["input_types"],
|
"dict[str, IndicatorParamValue]", definition["default_params"]
|
||||||
output_format=definition["output_format"],
|
),
|
||||||
param_ranges=definition["param_ranges"],
|
input_types=cast("list[InputSpec]", definition["input_types"]),
|
||||||
default_params=definition["default_params"],
|
output_format=cast("OutputSpec", definition["output_format"]),
|
||||||
talib_function=definition["talib_function"],
|
param_ranges=cast("ParamRanges", definition["param_ranges"]),
|
||||||
description=definition["description"],
|
default_params=cast(
|
||||||
|
"dict[str, IndicatorParamValue]", definition["default_params"]
|
||||||
|
),
|
||||||
|
talib_function=str(definition["talib_function"]),
|
||||||
|
description=str(definition["description"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
def _calculate_signal_strength(
|
def _calculate_signal_strength(
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class ScrapeResult:
|
||||||
title: str = ""
|
title: str = ""
|
||||||
publish_date: str = ""
|
publish_date: str = ""
|
||||||
is_paywall: bool = False
|
is_paywall: bool = False
|
||||||
keywords: list[str] = None # Extracted keywords from newspaper4k
|
keywords: list[str] | None = None # Extracted keywords from newspaper4k
|
||||||
summary: str = "" # Article summary from newspaper4k
|
summary: str = "" # Article summary from newspaper4k
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ Google News client for live news data via RSS feeds.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
|
|
@ -168,11 +168,19 @@ class GoogleNewsClient:
|
||||||
|
|
||||||
# Parse published date with fallback to current time
|
# Parse published date with fallback to current time
|
||||||
try:
|
try:
|
||||||
published = (
|
if published_str:
|
||||||
date_parser.parse(published_str) if published_str else datetime.utcnow()
|
parsed_result = date_parser.parse(published_str)
|
||||||
)
|
# dateutil.parser.parse() always returns datetime unless fuzzy_with_tokens=True
|
||||||
|
# Since we're not using fuzzy_with_tokens, we know it's a datetime
|
||||||
|
if isinstance(parsed_result, datetime):
|
||||||
|
published = parsed_result
|
||||||
|
else:
|
||||||
|
# Fallback for any unexpected types (shouldn't happen)
|
||||||
|
published = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||||
|
else:
|
||||||
|
published = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||||
except (ValueError, OverflowError, TypeError):
|
except (ValueError, OverflowError, TypeError):
|
||||||
published = datetime.utcnow()
|
published = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||||
|
|
||||||
# Extract source from title (Google News format: "Title - Source")
|
# Extract source from title (Google News format: "Title - Source")
|
||||||
title_parts = raw_title.split(" - ")
|
title_parts = raw_title.split(" - ")
|
||||||
|
|
@ -181,7 +189,7 @@ class GoogleNewsClient:
|
||||||
|
|
||||||
return GoogleNewsArticle(
|
return GoogleNewsArticle(
|
||||||
title=title,
|
title=title,
|
||||||
link=link,
|
link=str(link) if link else "",
|
||||||
published=published,
|
published=published,
|
||||||
summary=summary,
|
summary=summary,
|
||||||
source=source,
|
source=source,
|
||||||
|
|
|
||||||
|
|
@ -639,7 +639,7 @@ class NewsService:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Simple keyword extraction from titles
|
# Simple keyword extraction from titles
|
||||||
word_counts = {}
|
word_counts: dict[str, int] = {}
|
||||||
stop_words = {
|
stop_words = {
|
||||||
"the",
|
"the",
|
||||||
"a",
|
"a",
|
||||||
|
|
@ -680,7 +680,10 @@ class NewsService:
|
||||||
# Clean word
|
# Clean word
|
||||||
word = "".join(c for c in word if c.isalnum())
|
word = "".join(c for c in word if c.isalnum())
|
||||||
if len(word) > 3 and word not in stop_words:
|
if len(word) > 3 and word not in stop_words:
|
||||||
word_counts[word] = word_counts.get(word, 0) + 1
|
if word in word_counts:
|
||||||
|
word_counts[word] += 1
|
||||||
|
else:
|
||||||
|
word_counts[word] = 1
|
||||||
|
|
||||||
# Get top trending words
|
# Get top trending words
|
||||||
trending = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
trending = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
# TradingAgents/graph/__init__.py
|
# TradingAgents/graph/__init__.py
|
||||||
|
|
||||||
from .conditional_logic import ConditionalLogic
|
from .conditional_logic import ConditionalLogic
|
||||||
|
from .graph_setup import GraphSetup
|
||||||
from .propagation import Propagator
|
from .propagation import Propagator
|
||||||
from .reflection import Reflector
|
from .reflection import Reflector
|
||||||
from .setup import GraphSetup
|
|
||||||
from .signal_processing import SignalProcessor
|
from .signal_processing import SignalProcessor
|
||||||
from .trading_graph import TradingAgentsGraph
|
from .trading_graph import TradingAgentsGraph
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,28 +24,24 @@ class Propagator:
|
||||||
"company_of_interest": company_name,
|
"company_of_interest": company_name,
|
||||||
"trade_date": str(trade_date),
|
"trade_date": str(trade_date),
|
||||||
"investment_debate_state": InvestDebateState(
|
"investment_debate_state": InvestDebateState(
|
||||||
{
|
bull_history="",
|
||||||
"bull_history": "",
|
bear_history="",
|
||||||
"bear_history": "",
|
history="",
|
||||||
"history": "",
|
current_response="",
|
||||||
"current_response": "",
|
judge_decision="",
|
||||||
"judge_decision": "",
|
count=0,
|
||||||
"count": 0,
|
|
||||||
}
|
|
||||||
),
|
),
|
||||||
"risk_debate_state": RiskDebateState(
|
"risk_debate_state": RiskDebateState(
|
||||||
{
|
risky_history="",
|
||||||
"risky_history": "",
|
safe_history="",
|
||||||
"safe_history": "",
|
neutral_history="",
|
||||||
"neutral_history": "",
|
history="",
|
||||||
"history": "",
|
latest_speaker="",
|
||||||
"latest_speaker": "",
|
current_risky_response="",
|
||||||
"current_risky_response": "",
|
current_safe_response="",
|
||||||
"current_safe_response": "",
|
current_neutral_response="",
|
||||||
"current_neutral_response": "",
|
judge_decision="",
|
||||||
"judge_decision": "",
|
count=0,
|
||||||
"count": 0,
|
|
||||||
}
|
|
||||||
),
|
),
|
||||||
"market_report": "",
|
"market_report": "",
|
||||||
"fundamentals_report": "",
|
"fundamentals_report": "",
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from langchain_anthropic import ChatAnthropic
|
from langchain_anthropic import ChatAnthropic
|
||||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||||
|
|
@ -21,15 +22,20 @@ from tradingagents.domains.news.news_service import NewsService
|
||||||
from tradingagents.domains.socialmedia.social_media_service import SocialMediaService
|
from tradingagents.domains.socialmedia.social_media_service import SocialMediaService
|
||||||
|
|
||||||
from .conditional_logic import ConditionalLogic
|
from .conditional_logic import ConditionalLogic
|
||||||
|
from .graph_setup import GraphSetup
|
||||||
from .propagation import Propagator
|
from .propagation import Propagator
|
||||||
from .reflection import Reflector
|
from .reflection import Reflector
|
||||||
from .setup import GraphSetup
|
|
||||||
from .signal_processing import SignalProcessor
|
from .signal_processing import SignalProcessor
|
||||||
|
|
||||||
|
|
||||||
class TradingAgentsGraph:
|
class TradingAgentsGraph:
|
||||||
"""Main class that orchestrates the trading agents framework."""
|
"""Main class that orchestrates the trading agents framework."""
|
||||||
|
|
||||||
|
# Type annotations for LLM attributes
|
||||||
|
deep_thinking_llm: ChatOpenAI | ChatAnthropic | ChatGoogleGenerativeAI
|
||||||
|
quick_thinking_llm: ChatOpenAI | ChatAnthropic | ChatGoogleGenerativeAI
|
||||||
|
curr_state: dict[str, Any] | None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
selected_analysts=None,
|
selected_analysts=None,
|
||||||
|
|
@ -48,6 +54,7 @@ class TradingAgentsGraph:
|
||||||
|
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
self.config = config or TradingAgentsConfig()
|
self.config = config or TradingAgentsConfig()
|
||||||
|
self.curr_state = None
|
||||||
|
|
||||||
# Create necessary directories
|
# Create necessary directories
|
||||||
os.makedirs(
|
os.makedirs(
|
||||||
|
|
|
||||||
18
uv.lock
18
uv.lock
|
|
@ -2908,6 +2908,22 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" },
|
{ url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyrefly"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/b9/36/55f1503e8722ae60e7db9040c2562a69cd1ac1b698188b34b56ece9461e9/pyrefly-0.28.1.tar.gz", hash = "sha256:9ebc67e4a2e3d33c78f1962e7b2a16cd9b4415ce22fcf7a290b741ed9f3b7535", size = 1220266, upload-time = "2025-08-12T05:31:01.835Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ae/b6/e5857aa7225f6ca65f9c3f21a710b0942b62d7f37ee065321bfc03be8d87/pyrefly-0.28.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a4abd5218f43c25c00571fc498f85892b434d2361882a9e38ca7bb0ccb949bff", size = 6434495, upload-time = "2025-08-12T05:30:43.81Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a7/53/4a8810ede4c049856ea890f6d4093a7204e63fa4fdea750697b458ef49ee/pyrefly-0.28.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7b0ef6859ceca146f41be152e3bc783248cac7425f904a67bb9d3b130210e03c", size = 6006320, upload-time = "2025-08-12T05:30:46.259Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ed/66/9a0a90de127d4c1a186c49e8ca6ee94498c0c53e289e42d8a1baac5278b0/pyrefly-0.28.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36ac5fa3dcf83d51f9a7524a391f3614e8227fa4d22f4c053437f91e763d1fa6", size = 6222713, upload-time = "2025-08-12T05:30:49.416Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b0/52/d214d8df6fa24b256835b7a9e7d31b8a2fc161fd72fbc7b38d4f0f2302e6/pyrefly-0.28.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:428e8a174891a14d9e7364e20073162d66c7a0e2575dc5433e2f8228a0fe94ca", size = 7003024, upload-time = "2025-08-12T05:30:51.304Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/de/db/9fdbc4b73348bc1c3c4899051c1fbd614824ceb63e838b399f81a5786f23/pyrefly-0.28.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a5f66c43fc2e4676307b539d1ed085744a5625c579365cfc889cb9faf9ef8d0", size = 6695834, upload-time = "2025-08-12T05:30:53.501Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/0f/fb/a3bf2c416735068f86342bf155bb30bf791a2ace87cc99eec1889f8a1845/pyrefly-0.28.1-py3-none-win32.whl", hash = "sha256:ccf2e7d1253de03940953aeb8746c189435899620d113cb05114e8d2175892e4", size = 6209107, upload-time = "2025-08-12T05:30:55.659Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/dd/bc/d75894cf78b9c961e76c9a13e79216ff9081d7ecd9190d2148ffa72a7e99/pyrefly-0.28.1-py3-none-win_amd64.whl", hash = "sha256:cb973dc1fc3c128f9d674f943eca9eea6d4ed272a329836efc9d6e5c16ebe12a", size = 6630549, upload-time = "2025-08-12T05:30:58.289Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7d/8d/63a69a61aea24e4580e25f2a6bf5bb27bea30f06d34c4de0f3d414bf11ec/pyrefly-0.28.1-py3-none-win_arm64.whl", hash = "sha256:b3aa87f12555dda76b60aa101466ad5fde54a53f20c5112b02ea2eaaf0d6bfe9", size = 6258641, upload-time = "2025-08-12T05:31:00.195Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyright"
|
name = "pyright"
|
||||||
version = "1.1.403"
|
version = "1.1.403"
|
||||||
|
|
@ -3649,6 +3665,7 @@ dev = [
|
||||||
|
|
||||||
[package.dev-dependencies]
|
[package.dev-dependencies]
|
||||||
dev = [
|
dev = [
|
||||||
|
{ name = "pyrefly" },
|
||||||
{ name = "pytest" },
|
{ name = "pytest" },
|
||||||
{ name = "pytest-asyncio" },
|
{ name = "pytest-asyncio" },
|
||||||
{ name = "pytest-cov" },
|
{ name = "pytest-cov" },
|
||||||
|
|
@ -3700,6 +3717,7 @@ provides-extras = ["dev"]
|
||||||
|
|
||||||
[package.metadata.requires-dev]
|
[package.metadata.requires-dev]
|
||||||
dev = [
|
dev = [
|
||||||
|
{ name = "pyrefly", specifier = ">=0.28.1" },
|
||||||
{ name = "pytest", specifier = ">=8.4.1" },
|
{ name = "pytest", specifier = ">=8.4.1" },
|
||||||
{ name = "pytest-asyncio", specifier = ">=1.1.0" },
|
{ name = "pytest-asyncio", specifier = ">=1.1.0" },
|
||||||
{ name = "pytest-cov", specifier = ">=6.2.1" },
|
{ name = "pytest-cov", specifier = ">=6.2.1" },
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue