398 lines
15 KiB
Python
398 lines
15 KiB
Python
"""Korean market data source using FinanceDataReader and web scraping.
|
||
|
||
Provides KRX stock data (OHLCV), technical indicators, exchange rates,
|
||
KOSPI/KOSDAQ index data, and foreign/institutional investor flow data.
|
||
"""
|
||
|
||
from typing import Annotated
|
||
from datetime import datetime, timedelta
|
||
from dateutil.relativedelta import relativedelta
|
||
import os
|
||
import pandas as pd
|
||
|
||
try:
|
||
import FinanceDataReader as fdr
|
||
except ImportError:
|
||
fdr = None
|
||
|
||
|
||
def _ensure_fdr():
|
||
if fdr is None:
|
||
raise ImportError(
|
||
"FinanceDataReader is required for Korean market data. "
|
||
"Install it with: pip install finance-datareader"
|
||
)
|
||
|
||
|
||
def _normalize_krx_symbol(symbol: str) -> str:
|
||
"""Normalize KRX stock symbol (e.g., '005930' for Samsung Electronics).
|
||
|
||
Handles both pure numeric codes and codes with market suffix like '005930.KS'.
|
||
"""
|
||
symbol = symbol.strip().upper()
|
||
# Remove market suffixes
|
||
for suffix in [".KS", ".KQ", ".KRX"]:
|
||
if symbol.endswith(suffix):
|
||
symbol = symbol[: -len(suffix)]
|
||
return symbol
|
||
|
||
|
||
def get_krx_stock_data(
|
||
symbol: Annotated[str, "KRX ticker symbol (e.g., '005930' for Samsung Electronics)"],
|
||
start_date: Annotated[str, "Start date in yyyy-mm-dd format"],
|
||
end_date: Annotated[str, "End date in yyyy-mm-dd format"],
|
||
) -> str:
|
||
"""Retrieve KRX stock OHLCV data using FinanceDataReader."""
|
||
_ensure_fdr()
|
||
|
||
symbol = _normalize_krx_symbol(symbol)
|
||
|
||
try:
|
||
data = fdr.DataReader(symbol, start_date, end_date)
|
||
|
||
if data is None or data.empty:
|
||
return f"No data found for KRX symbol '{symbol}' between {start_date} and {end_date}"
|
||
|
||
# Standardize column names
|
||
col_map = {
|
||
"Open": "Open",
|
||
"High": "High",
|
||
"Low": "Low",
|
||
"Close": "Close",
|
||
"Volume": "Volume",
|
||
"Change": "Change",
|
||
}
|
||
data = data.rename(columns={k: v for k, v in col_map.items() if k in data.columns})
|
||
|
||
# Round numeric columns
|
||
for col in ["Open", "High", "Low", "Close"]:
|
||
if col in data.columns:
|
||
data[col] = data[col].round(0).astype(int)
|
||
|
||
csv_string = data.to_csv()
|
||
|
||
header = f"# KRX Stock data for {symbol} from {start_date} to {end_date}\n"
|
||
header += f"# Total records: {len(data)}\n"
|
||
header += f"# Currency: KRW (Korean Won)\n"
|
||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||
|
||
return header + csv_string
|
||
|
||
except Exception as e:
|
||
return f"Error retrieving KRX stock data for {symbol}: {str(e)}"
|
||
|
||
|
||
def get_krx_indicators(
|
||
symbol: Annotated[str, "KRX ticker symbol"],
|
||
indicator: Annotated[str, "technical indicator name"],
|
||
curr_date: Annotated[str, "Current trading date, YYYY-mm-dd"],
|
||
look_back_days: Annotated[int, "how many days to look back"] = 30,
|
||
) -> str:
|
||
"""Calculate technical indicators for KRX stocks using FinanceDataReader + stockstats."""
|
||
_ensure_fdr()
|
||
from stockstats import wrap
|
||
|
||
symbol = _normalize_krx_symbol(symbol)
|
||
|
||
best_ind_params = {
|
||
"close_50_sma": "50 SMA: 중기 추세 지표. 추세 방향 및 동적 지지/저항 확인.",
|
||
"close_200_sma": "200 SMA: 장기 추세 기준선. 골든크로스/데드크로스 확인.",
|
||
"close_10_ema": "10 EMA: 단기 반응형 이동평균. 빠른 모멘텀 변화 포착.",
|
||
"macd": "MACD: EMA 차이 기반 모멘텀 지표. 크로스오버/다이버전스 확인.",
|
||
"macds": "MACD Signal: MACD 스무딩 라인. 매매 시그널 트리거.",
|
||
"macdh": "MACD Histogram: MACD와 시그널 차이. 모멘텀 강도/다이버전스.",
|
||
"rsi": "RSI: 과매수/과매도 판단. 70/30 기준선, 다이버전스 확인.",
|
||
"boll": "Bollinger Middle: 20 SMA 기반. 가격 움직임 기준선.",
|
||
"boll_ub": "Bollinger Upper: +2σ. 과매수/돌파 구간.",
|
||
"boll_lb": "Bollinger Lower: -2σ. 과매도/반등 구간.",
|
||
"atr": "ATR: 변동성 측정. 손절가/포지션 사이즈 결정 기준.",
|
||
"vwma": "VWMA: 거래량 가중 이동평균. 거래량과 가격 통합 추세 확인.",
|
||
"mfi": "MFI: 자금흐름지수. 가격+거래량 기반 과매수(>80)/과매도(<20) 판단.",
|
||
}
|
||
|
||
if indicator not in best_ind_params:
|
||
return (
|
||
f"Indicator '{indicator}' not supported. "
|
||
f"Available: {list(best_ind_params.keys())}"
|
||
)
|
||
|
||
try:
|
||
curr_date_dt = datetime.strptime(curr_date, "%Y-%m-%d")
|
||
# Fetch extra data for indicator calculation warmup
|
||
fetch_start = (curr_date_dt - relativedelta(years=1)).strftime("%Y-%m-%d")
|
||
fetch_end = curr_date
|
||
|
||
data = fdr.DataReader(symbol, fetch_start, fetch_end)
|
||
if data is None or data.empty:
|
||
return f"No data for KRX symbol '{symbol}'"
|
||
|
||
data = data.reset_index()
|
||
# Ensure Date column exists
|
||
if "Date" not in data.columns:
|
||
data = data.rename(columns={data.columns[0]: "Date"})
|
||
|
||
df = wrap(data)
|
||
df["Date"] = pd.to_datetime(df["Date"]).dt.strftime("%Y-%m-%d")
|
||
|
||
# Calculate indicator
|
||
df[indicator]
|
||
|
||
# Build result for look_back period
|
||
before = curr_date_dt - relativedelta(days=look_back_days)
|
||
result_dict = {}
|
||
for _, row in df.iterrows():
|
||
date_str = row["Date"]
|
||
val = row[indicator]
|
||
result_dict[date_str] = "N/A" if pd.isna(val) else str(round(float(val), 4))
|
||
|
||
ind_string = ""
|
||
current_dt = curr_date_dt
|
||
while current_dt >= before:
|
||
date_str = current_dt.strftime("%Y-%m-%d")
|
||
value = result_dict.get(date_str, "N/A: 비거래일 (주말/공휴일)")
|
||
ind_string += f"{date_str}: {value}\n"
|
||
current_dt -= timedelta(days=1)
|
||
|
||
return (
|
||
f"## {indicator} values for KRX:{symbol} "
|
||
f"from {before.strftime('%Y-%m-%d')} to {curr_date}:\n\n"
|
||
+ ind_string
|
||
+ f"\n\n{best_ind_params[indicator]}"
|
||
)
|
||
|
||
except Exception as e:
|
||
return f"Error calculating indicator for KRX:{symbol}: {str(e)}"
|
||
|
||
|
||
def get_exchange_rate(
|
||
currency_pair: Annotated[str, "Currency pair (e.g., 'USD/KRW', 'JPY/KRW', 'EUR/KRW')"],
|
||
start_date: Annotated[str, "Start date in yyyy-mm-dd format"],
|
||
end_date: Annotated[str, "End date in yyyy-mm-dd format"],
|
||
) -> str:
|
||
"""Retrieve exchange rate data using FinanceDataReader."""
|
||
_ensure_fdr()
|
||
|
||
try:
|
||
data = fdr.DataReader(currency_pair, start_date, end_date)
|
||
|
||
if data is None or data.empty:
|
||
return f"No exchange rate data for '{currency_pair}' between {start_date} and {end_date}"
|
||
|
||
csv_string = data.to_csv()
|
||
|
||
header = f"# Exchange Rate: {currency_pair} from {start_date} to {end_date}\n"
|
||
header += f"# Total records: {len(data)}\n"
|
||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||
|
||
return header + csv_string
|
||
|
||
except Exception as e:
|
||
return f"Error retrieving exchange rate for {currency_pair}: {str(e)}"
|
||
|
||
|
||
def get_korea_index_data(
|
||
index_code: Annotated[str, "Index code: 'KS11' (KOSPI), 'KQ11' (KOSDAQ), 'KS200' (KOSPI200)"],
|
||
start_date: Annotated[str, "Start date in yyyy-mm-dd format"],
|
||
end_date: Annotated[str, "End date in yyyy-mm-dd format"],
|
||
) -> str:
|
||
"""Retrieve Korean market index data (KOSPI, KOSDAQ, KOSPI200)."""
|
||
_ensure_fdr()
|
||
|
||
index_names = {
|
||
"KS11": "KOSPI",
|
||
"KQ11": "KOSDAQ",
|
||
"KS200": "KOSPI 200",
|
||
"KS50": "KOSPI 50",
|
||
}
|
||
|
||
index_name = index_names.get(index_code, index_code)
|
||
|
||
try:
|
||
data = fdr.DataReader(index_code, start_date, end_date)
|
||
|
||
if data is None or data.empty:
|
||
return f"No index data found for '{index_name}' between {start_date} and {end_date}"
|
||
|
||
csv_string = data.to_csv()
|
||
|
||
header = f"# {index_name} Index Data from {start_date} to {end_date}\n"
|
||
header += f"# Total records: {len(data)}\n"
|
||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||
|
||
return header + csv_string
|
||
|
||
except Exception as e:
|
||
return f"Error retrieving {index_name} index data: {str(e)}"
|
||
|
||
|
||
def get_investor_trading_data(
|
||
symbol: Annotated[str, "KRX ticker symbol (e.g., '005930')"],
|
||
start_date: Annotated[str, "Start date in yyyy-mm-dd format"],
|
||
end_date: Annotated[str, "End date in yyyy-mm-dd format"],
|
||
) -> str:
|
||
"""Retrieve foreign and institutional investor trading (buy/sell) data for a KRX stock.
|
||
|
||
Uses pykrx for detailed investor flow data.
|
||
"""
|
||
symbol = _normalize_krx_symbol(symbol)
|
||
|
||
try:
|
||
from pykrx import stock as krx_stock
|
||
|
||
# Get investor trading data by investor type
|
||
df = krx_stock.get_market_trading_value_by_investor(
|
||
start_date.replace("-", ""),
|
||
end_date.replace("-", ""),
|
||
symbol,
|
||
)
|
||
|
||
if df is None or df.empty:
|
||
return f"No investor trading data found for '{symbol}' between {start_date} and {end_date}"
|
||
|
||
csv_string = df.to_csv()
|
||
|
||
header = f"# 투자자별 매매동향: {symbol} ({start_date} ~ {end_date})\n"
|
||
header += f"# 단위: KRW (원)\n"
|
||
header += f"# 양수 = 순매수 (Net Buy), 음수 = 순매도 (Net Sell)\n"
|
||
header += f"# 컬럼: 금융투자, 보험, 투신, 사모, 은행, 기타금융, 연기금, 기관합계, 기타법인, 개인, 외국인, 기타외국인, 전체\n"
|
||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||
|
||
return header + csv_string
|
||
|
||
except ImportError:
|
||
# Fallback: try to get basic data from FinanceDataReader
|
||
return _get_investor_data_fallback(symbol, start_date, end_date)
|
||
except Exception as e:
|
||
return f"Error retrieving investor trading data for {symbol}: {str(e)}"
|
||
|
||
|
||
def _get_investor_data_fallback(symbol: str, start_date: str, end_date: str) -> str:
|
||
"""Fallback for investor data when pykrx is not available."""
|
||
return (
|
||
f"투자자별 매매동향 데이터를 가져올 수 없습니다 (symbol: {symbol}).\n"
|
||
f"pykrx 패키지가 필요합니다: pip install pykrx\n"
|
||
f"pykrx 설치 후 외국인/기관 수급 데이터를 확인할 수 있습니다."
|
||
)
|
||
|
||
|
||
def get_krx_market_cap(
|
||
symbol: Annotated[str, "KRX ticker symbol (e.g., '005930')"],
|
||
curr_date: Annotated[str, "Current date in yyyy-mm-dd format"],
|
||
) -> str:
|
||
"""Retrieve market capitalization and trading info for a KRX stock."""
|
||
_ensure_fdr()
|
||
symbol = _normalize_krx_symbol(symbol)
|
||
|
||
try:
|
||
from pykrx import stock as krx_stock
|
||
|
||
date_str = curr_date.replace("-", "")
|
||
# Get market cap for specific date
|
||
df = krx_stock.get_market_cap_by_date(date_str, date_str, symbol)
|
||
|
||
if df is None or df.empty:
|
||
return f"No market cap data for '{symbol}' on {curr_date}"
|
||
|
||
result = f"# KRX 시가총액 정보: {symbol} ({curr_date})\n\n"
|
||
for _, row in df.iterrows():
|
||
result += f"시가총액: {row.get('시가총액', 'N/A'):,} 원\n"
|
||
result += f"거래량: {row.get('거래량', 'N/A'):,} 주\n"
|
||
result += f"거래대금: {row.get('거래대금', 'N/A'):,} 원\n"
|
||
result += f"상장주식수: {row.get('상장주식수', 'N/A'):,} 주\n"
|
||
|
||
return result
|
||
|
||
except ImportError:
|
||
return f"시가총액 데이터를 가져오려면 pykrx 패키지가 필요합니다: pip install pykrx"
|
||
except Exception as e:
|
||
return f"Error retrieving market cap for {symbol}: {str(e)}"
|
||
|
||
|
||
def get_krx_fundamentals(
|
||
ticker: Annotated[str, "KRX ticker symbol (e.g., '005930')"],
|
||
curr_date: Annotated[str, "current date in yyyy-mm-dd format"] = None,
|
||
) -> str:
|
||
"""Get fundamental data for a KRX-listed company.
|
||
|
||
Combines FinanceDataReader stock info with pykrx fundamental ratios.
|
||
"""
|
||
_ensure_fdr()
|
||
ticker = _normalize_krx_symbol(ticker)
|
||
|
||
result_lines = []
|
||
result_lines.append(f"# KRX 기업 기본정보: {ticker}\n")
|
||
|
||
# Try to get basic info from FinanceDataReader
|
||
try:
|
||
listing = fdr.StockListing("KRX")
|
||
if listing is not None and not listing.empty:
|
||
# Search for the ticker
|
||
match = listing[listing["Code"] == ticker]
|
||
if match.empty:
|
||
match = listing[listing["Symbol"] == ticker]
|
||
|
||
if not match.empty:
|
||
row = match.iloc[0]
|
||
name = row.get("Name", row.get("ISU_ABBRV", "N/A"))
|
||
market = row.get("Market", "N/A")
|
||
sector = row.get("Sector", row.get("업종명", "N/A"))
|
||
industry = row.get("Industry", "N/A")
|
||
|
||
result_lines.append(f"종목명: {name}")
|
||
result_lines.append(f"시장: {market}")
|
||
result_lines.append(f"업종: {sector}")
|
||
if industry != "N/A":
|
||
result_lines.append(f"산업: {industry}")
|
||
except Exception:
|
||
pass
|
||
|
||
# Try to get fundamental ratios from pykrx
|
||
try:
|
||
from pykrx import stock as krx_stock
|
||
|
||
if curr_date:
|
||
date_str = curr_date.replace("-", "")
|
||
else:
|
||
date_str = datetime.now().strftime("%Y%m%d")
|
||
|
||
# Get PER, PBR, DIV from pykrx
|
||
fund_df = krx_stock.get_market_fundamental_by_date(date_str, date_str, ticker)
|
||
if fund_df is not None and not fund_df.empty:
|
||
row = fund_df.iloc[0]
|
||
result_lines.append(f"\n## 투자지표 ({curr_date or 'latest'})")
|
||
if "BPS" in fund_df.columns:
|
||
result_lines.append(f"BPS (주당순자산): {row['BPS']:,.0f} 원")
|
||
if "PER" in fund_df.columns:
|
||
result_lines.append(f"PER (주가수익비율): {row['PER']:.2f}")
|
||
if "PBR" in fund_df.columns:
|
||
result_lines.append(f"PBR (주가순자산비율): {row['PBR']:.2f}")
|
||
if "EPS" in fund_df.columns:
|
||
result_lines.append(f"EPS (주당순이익): {row['EPS']:,.0f} 원")
|
||
if "DIV" in fund_df.columns:
|
||
result_lines.append(f"배당수익률: {row['DIV']:.2f}%")
|
||
if "DPS" in fund_df.columns:
|
||
result_lines.append(f"DPS (주당배당금): {row['DPS']:,.0f} 원")
|
||
|
||
# Get market cap
|
||
cap_df = krx_stock.get_market_cap_by_date(date_str, date_str, ticker)
|
||
if cap_df is not None and not cap_df.empty:
|
||
cap_row = cap_df.iloc[0]
|
||
result_lines.append(f"\n## 시가총액 정보")
|
||
if "시가총액" in cap_df.columns:
|
||
market_cap = cap_row["시가총액"]
|
||
# Format in 억 원
|
||
result_lines.append(f"시가총액: {market_cap:,.0f} 원 ({market_cap / 100_000_000:,.0f} 억원)")
|
||
if "상장주식수" in cap_df.columns:
|
||
result_lines.append(f"상장주식수: {cap_row['상장주식수']:,.0f} 주")
|
||
|
||
except ImportError:
|
||
result_lines.append("\n(pykrx 패키지 미설치 - 투자지표 데이터 제한)")
|
||
except Exception as e:
|
||
result_lines.append(f"\n투자지표 조회 오류: {str(e)}")
|
||
|
||
if len(result_lines) <= 1:
|
||
return f"No fundamental data found for KRX symbol '{ticker}'"
|
||
|
||
result_lines.append(f"\n# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
return "\n".join(result_lines)
|