From e14d07ea81b19a923f8c06349f23ce65d3df8b6a Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:05:29 +0000 Subject: [PATCH] perf(risk_metrics): optimize _percentile using heapq Optimize the _percentile calculation to use heapq.nsmallest or heapq.nlargest when requesting small extreme percentiles (like 5% VaR) from large lists, falling back to sorted() only when necessary. This avoids fully sorting the entire array. Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com> --- tradingagents/portfolio/risk_metrics.py | 35 +++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/tradingagents/portfolio/risk_metrics.py b/tradingagents/portfolio/risk_metrics.py index 46896dd7..57b9cade 100644 --- a/tradingagents/portfolio/risk_metrics.py +++ b/tradingagents/portfolio/risk_metrics.py @@ -35,6 +35,7 @@ See ``docs/portfolio/00_overview.md`` — Phase 3 for the full specification. from __future__ import annotations +import heapq import math from typing import Any @@ -111,16 +112,40 @@ def _percentile(values: list[float], pct: float) -> float: """ if not values: raise ValueError("Cannot compute percentile of empty list") - sorted_vals = sorted(values) - n = len(sorted_vals) + + n = len(values) # Linear interpolation index index = (pct / 100.0) * (n - 1) lower = int(index) upper = lower + 1 frac = index - lower - if upper >= n: - return sorted_vals[-1] - return sorted_vals[lower] * (1.0 - frac) + sorted_vals[upper] * frac + + k_lower = lower + 1 + k_upper = n - lower + + # Small lists or k is too large to use nsmallest/nlargest efficiently -> sort + if min(k_lower, k_upper) > 50 and min(k_lower, k_upper) > n * 0.05: + sorted_vals = sorted(values) + if upper >= n: + return sorted_vals[-1] + return sorted_vals[lower] * (1.0 - frac) + sorted_vals[upper] * frac + + if k_lower <= k_upper: + # Use nsmallest + k = k_lower + 1 + k_smallest = heapq.nsmallest(k, values) + if upper >= n: + return k_smallest[-1] + return k_smallest[-2] * (1.0 - frac) + k_smallest[-1] * frac + else: + # Use nlargest + k = k_upper + if k == 0: + return max(values) + k_largest = heapq.nlargest(k, values) + if upper >= n: + return k_largest[-1] + return k_largest[-1] * (1.0 - frac) + k_largest[-2] * frac # ---------------------------------------------------------------------------