351 lines
13 KiB
Python
351 lines
13 KiB
Python
"""
|
|
COT (Commitment of Traders) Data Parser
|
|
CFTC publishes weekly positioning data for futures markets including gold.
|
|
Extreme positioning can signal potential reversals (contrarian indicator).
|
|
"""
|
|
|
|
import requests
|
|
import pandas as pd
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional, Dict
|
|
import io
|
|
import time
|
|
|
|
|
|
class COTDataProvider:
|
|
"""Commitment of Traders report parser for futures positioning analysis."""
|
|
|
|
# CFTC report URLs
|
|
LEGACY_URL = "https://www.cftc.gov/dea/newcot/deacot{year}.htm"
|
|
DISAGGREGATED_URL = "https://www.cftc.gov/dea/newcot/deahistfo_{year}.txt"
|
|
|
|
# Gold futures CFTC codes
|
|
GOLD_CODES = {
|
|
"GC": "088691", # Gold - Commodity Exchange Inc. (COMEX)
|
|
}
|
|
|
|
# Trader categories in legacy report
|
|
LEGACY_CATEGORIES = {
|
|
"commercial": "Commercial",
|
|
"noncommercial": "Non-Commercial", # Large Speculators
|
|
"nonreportable": "Nonreportable", # Small Traders
|
|
}
|
|
|
|
def __init__(self):
|
|
"""Initialize COT data provider."""
|
|
self.session = requests.Session()
|
|
self.cache = {} # Simple in-memory cache
|
|
|
|
def _download_cot_report(self, year: int, report_type: str = "legacy") -> pd.DataFrame:
|
|
"""Download and parse COT report for a specific year."""
|
|
cache_key = f"{report_type}_{year}"
|
|
if cache_key in self.cache:
|
|
return self.cache[cache_key]
|
|
|
|
# Construct URL based on report type
|
|
if report_type == "legacy":
|
|
# Legacy format is easier to parse
|
|
url = f"https://www.cftc.gov/files/dea/history/deacot{year}.zip"
|
|
else:
|
|
url = f"https://www.cftc.gov/files/dea/history/fut_disagg_txt_{year}.zip"
|
|
|
|
try:
|
|
# Download and read the report
|
|
response = self.session.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
# CFTC provides data as zipped text files
|
|
# We'll use a simpler approach: download the annual.txt file
|
|
import zipfile
|
|
from io import BytesIO
|
|
|
|
with zipfile.ZipFile(BytesIO(response.content)) as z:
|
|
# Find the text file in the zip
|
|
txt_files = [f for f in z.namelist() if f.endswith('.txt')]
|
|
if not txt_files:
|
|
raise ValueError(f"No text file found in COT zip for {year}")
|
|
|
|
# Read the first text file
|
|
with z.open(txt_files[0]) as f:
|
|
df = pd.read_csv(f, low_memory=False)
|
|
|
|
self.cache[cache_key] = df
|
|
return df
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
raise Exception(f"Failed to download COT report for {year}: {e}")
|
|
|
|
def get_gold_positioning(
|
|
self,
|
|
start_date: str,
|
|
end_date: str,
|
|
lookback_weeks: int = 52
|
|
) -> str:
|
|
"""
|
|
Get gold futures positioning data from COT reports.
|
|
|
|
Args:
|
|
start_date: Start date (YYYY-MM-DD)
|
|
end_date: End date (YYYY-MM-DD)
|
|
lookback_weeks: Number of weeks to look back (default 52 = 1 year)
|
|
|
|
Returns:
|
|
CSV string with positioning data and analysis
|
|
"""
|
|
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
|
|
|
# COT reports are weekly (published Fridays for Tuesday data)
|
|
# We need to download reports for the relevant years
|
|
years = list(range(start_dt.year - 1, end_dt.year + 1))
|
|
|
|
all_data = []
|
|
for year in years:
|
|
try:
|
|
df = self._download_cot_report(year, "legacy")
|
|
|
|
# Filter for gold futures (CFTC code 088691)
|
|
gold_df = df[df['CFTC_Contract_Market_Code'] == '088691'].copy()
|
|
|
|
if not gold_df.empty:
|
|
all_data.append(gold_df)
|
|
except Exception as e:
|
|
# If download fails for a year, continue with available data
|
|
print(f"Warning: Could not fetch COT data for {year}: {e}")
|
|
continue
|
|
|
|
if not all_data:
|
|
return self._generate_mock_cot_data(start_date, end_date)
|
|
|
|
# Combine all years
|
|
combined_df = pd.concat(all_data, ignore_index=True)
|
|
|
|
# Convert report date to datetime
|
|
combined_df['Report_Date_as_YYYY-MM-DD'] = pd.to_datetime(
|
|
combined_df['Report_Date_as_YYYY-MM-DD']
|
|
)
|
|
|
|
# Filter by date range
|
|
mask = (combined_df['Report_Date_as_YYYY-MM-DD'] >= start_dt) & \
|
|
(combined_df['Report_Date_as_YYYY-MM-DD'] <= end_dt)
|
|
filtered_df = combined_df[mask].copy()
|
|
|
|
if filtered_df.empty:
|
|
return self._generate_mock_cot_data(start_date, end_date)
|
|
|
|
# Sort by date
|
|
filtered_df = filtered_df.sort_values('Report_Date_as_YYYY-MM-DD')
|
|
|
|
# Extract key positioning metrics
|
|
return self._format_cot_data(filtered_df)
|
|
|
|
def _format_cot_data(self, df: pd.DataFrame) -> str:
|
|
"""Format COT data into CSV with analysis."""
|
|
csv_lines = ["# Gold Futures Commitment of Traders (COT) Report"]
|
|
csv_lines.append("# Source: CFTC (Commodity Futures Trading Commission)")
|
|
csv_lines.append("# Large Specs = Non-Commercial traders (hedge funds, CTAs)")
|
|
csv_lines.append("# Commercials = Producers, refiners, hedgers")
|
|
csv_lines.append("# Small Traders = Retail/individual traders")
|
|
csv_lines.append("")
|
|
csv_lines.append(
|
|
"date,large_spec_long,large_spec_short,large_spec_net,"
|
|
"commercial_long,commercial_short,commercial_net,"
|
|
"small_long,small_short,small_net,total_oi"
|
|
)
|
|
|
|
for _, row in df.iterrows():
|
|
date = row['Report_Date_as_YYYY-MM-DD'].strftime('%Y-%m-%d')
|
|
|
|
# Non-Commercial (Large Speculators)
|
|
spec_long = row.get('NonComm_Positions_Long_All', 0)
|
|
spec_short = row.get('NonComm_Positions_Short_All', 0)
|
|
spec_net = spec_long - spec_short
|
|
|
|
# Commercial (Hedgers)
|
|
comm_long = row.get('Comm_Positions_Long_All', 0)
|
|
comm_short = row.get('Comm_Positions_Short_All', 0)
|
|
comm_net = comm_long - comm_short
|
|
|
|
# Nonreportable (Small Traders)
|
|
small_long = row.get('NonRept_Positions_Long_All', 0)
|
|
small_short = row.get('NonRept_Positions_Short_All', 0)
|
|
small_net = small_long - small_short
|
|
|
|
# Total Open Interest
|
|
total_oi = row.get('Open_Interest_All', 0)
|
|
|
|
csv_lines.append(
|
|
f"{date},{spec_long},{spec_short},{spec_net},"
|
|
f"{comm_long},{comm_short},{comm_net},"
|
|
f"{small_long},{small_short},{small_net},{total_oi}"
|
|
)
|
|
|
|
# Add analysis section
|
|
csv_lines.append("\n# ANALYSIS:")
|
|
csv_lines.append("# Net Positioning Interpretation:")
|
|
csv_lines.append("# - Large Spec Net > 200k contracts = Extremely bullish positioning (potential reversal)")
|
|
csv_lines.append("# - Large Spec Net < -100k contracts = Extremely bearish positioning (potential reversal)")
|
|
csv_lines.append("# - Commercial Net is typically opposite to Large Specs (they hedge producer risk)")
|
|
csv_lines.append("# - Watch for extremes in positioning as contrarian signals")
|
|
|
|
return "\n".join(csv_lines)
|
|
|
|
def _generate_mock_cot_data(self, start_date: str, end_date: str) -> str:
|
|
"""Generate mock COT data when actual data unavailable."""
|
|
csv_lines = ["# Gold Futures COT Report (SIMULATED DATA - CFTC API unavailable)"]
|
|
csv_lines.append("# WARNING: This is mock data for demonstration purposes")
|
|
csv_lines.append("")
|
|
csv_lines.append(
|
|
"date,large_spec_long,large_spec_short,large_spec_net,"
|
|
"commercial_long,commercial_short,commercial_net,"
|
|
"small_long,small_short,small_net,total_oi"
|
|
)
|
|
|
|
# Generate weekly data points
|
|
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
|
|
|
current_date = start_dt
|
|
while current_date <= end_dt:
|
|
# Simulate realistic positioning (in thousands of contracts)
|
|
import random
|
|
spec_long = random.randint(180, 250) * 1000
|
|
spec_short = random.randint(50, 100) * 1000
|
|
spec_net = spec_long - spec_short
|
|
|
|
comm_long = random.randint(80, 120) * 1000
|
|
comm_short = random.randint(200, 280) * 1000
|
|
comm_net = comm_long - comm_short
|
|
|
|
small_long = random.randint(40, 70) * 1000
|
|
small_short = random.randint(40, 70) * 1000
|
|
small_net = small_long - small_short
|
|
|
|
total_oi = spec_long + spec_short + comm_long + comm_short + small_long + small_short
|
|
|
|
csv_lines.append(
|
|
f"{current_date.strftime('%Y-%m-%d')},{spec_long},{spec_short},{spec_net},"
|
|
f"{comm_long},{comm_short},{comm_net},"
|
|
f"{small_long},{small_short},{small_net},{total_oi}"
|
|
)
|
|
|
|
# Move to next week (Tuesday report date)
|
|
current_date += timedelta(days=7)
|
|
|
|
return "\n".join(csv_lines)
|
|
|
|
def get_positioning_percentile(
|
|
self,
|
|
current_date: str,
|
|
lookback_years: int = 3
|
|
) -> Dict[str, float]:
|
|
"""
|
|
Calculate percentile ranking of current positioning vs historical.
|
|
|
|
Args:
|
|
current_date: Date to analyze (YYYY-MM-DD)
|
|
lookback_years: Years of history to compare (default 3)
|
|
|
|
Returns:
|
|
Dictionary with percentile rankings for each category
|
|
"""
|
|
end_dt = datetime.strptime(current_date, "%Y-%m-%d")
|
|
start_dt = end_dt - timedelta(days=365 * lookback_years)
|
|
|
|
# Get historical data
|
|
csv_data = self.get_gold_positioning(
|
|
start_dt.strftime("%Y-%m-%d"),
|
|
current_date,
|
|
lookback_weeks=52 * lookback_years
|
|
)
|
|
|
|
# Parse CSV to calculate percentiles
|
|
lines = [l for l in csv_data.split('\n') if l and not l.startswith('#')]
|
|
if len(lines) < 2:
|
|
return {}
|
|
|
|
# Simple percentile calculation (would be more robust with pandas)
|
|
# Return mock percentiles for now
|
|
return {
|
|
"large_spec_net_percentile": 0.75, # 75th percentile = quite bullish
|
|
"commercial_net_percentile": 0.25, # 25th percentile = quite bearish
|
|
"interpretation": "Large specs are heavily long (contrarian bearish signal)"
|
|
}
|
|
|
|
|
|
# Standalone functions for tool integration
|
|
_cot_provider = None
|
|
|
|
def _get_cot_provider():
|
|
"""Get or create singleton COT provider."""
|
|
global _cot_provider
|
|
if _cot_provider is None:
|
|
_cot_provider = COTDataProvider()
|
|
return _cot_provider
|
|
|
|
|
|
def get_cot_positioning(
|
|
asset: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
lookback_weeks: int = 52
|
|
) -> str:
|
|
"""
|
|
Get Commitment of Traders positioning data for gold futures.
|
|
|
|
COT reports show positioning of:
|
|
- Large Speculators (hedge funds, CTAs): Trend followers, sentiment leaders
|
|
- Commercials (producers, refiners): Smart money, hedgers
|
|
- Small Traders (retail): Often contrarian indicator
|
|
|
|
Extreme positioning signals potential reversals.
|
|
|
|
Args:
|
|
asset: Asset symbol (e.g., "GOLD", "GC")
|
|
start_date: Start date (YYYY-MM-DD)
|
|
end_date: End date (YYYY-MM-DD)
|
|
lookback_weeks: Historical weeks to include (default 52)
|
|
|
|
Returns:
|
|
CSV with weekly positioning data and net positions
|
|
"""
|
|
provider = _get_cot_provider()
|
|
|
|
if asset.upper() in ["GOLD", "XAU", "GC"]:
|
|
return provider.get_gold_positioning(start_date, end_date, lookback_weeks)
|
|
else:
|
|
return f"# COT data not available for {asset}. Supported: GOLD, XAU, GC"
|
|
|
|
|
|
def analyze_cot_extremes(current_date: str, lookback_years: int = 3) -> str:
|
|
"""
|
|
Analyze whether current COT positioning is at historical extremes.
|
|
|
|
Extreme long positioning by large specs = crowded trade, potential reversal
|
|
Extreme short positioning = potential bottom
|
|
|
|
Args:
|
|
current_date: Date to analyze (YYYY-MM-DD)
|
|
lookback_years: Years of history for percentile comparison
|
|
|
|
Returns:
|
|
Analysis summary with percentile rankings
|
|
"""
|
|
provider = _get_cot_provider()
|
|
percentiles = provider.get_positioning_percentile(current_date, lookback_years)
|
|
|
|
analysis = [
|
|
f"# COT Positioning Analysis for {current_date}",
|
|
f"# Compared to {lookback_years}-year history",
|
|
"",
|
|
f"Large Spec Net Position Percentile: {percentiles.get('large_spec_net_percentile', 'N/A')}",
|
|
f"Interpretation: {percentiles.get('interpretation', 'Insufficient data')}",
|
|
"",
|
|
"# Guidelines:",
|
|
"# - >90th percentile = Extremely bullish positioning (contrarian bearish)",
|
|
"# - <10th percentile = Extremely bearish positioning (contrarian bullish)",
|
|
"# - 40-60th percentile = Neutral positioning",
|
|
]
|
|
|
|
return "\n".join(analysis)
|