From 3b9eef380de91b9abf86deaa8e522d43d055be32 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sat, 21 Mar 2026 01:54:23 +0000
Subject: [PATCH] test: add unit tests for _tokenize in
 FinancialSituationMemory

Adds comprehensive tests for `_tokenize` method inside
`tradingagents/agents/utils/memory.py` focusing on regex behavior
`\b\w+\b` with case mixing, punctuation, edges (empty string),
numbers, underscores, quoting, and hyphenation strings.

Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com>
---
 tests/unit/agents/utils/test_memory.py | 61 ++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 tests/unit/agents/utils/test_memory.py

diff --git a/tests/unit/agents/utils/test_memory.py b/tests/unit/agents/utils/test_memory.py
new file mode 100644
index 00000000..f3fdbe59
--- /dev/null
+++ b/tests/unit/agents/utils/test_memory.py
@@ -0,0 +1,61 @@
+import pytest
+from tradingagents.agents.utils.memory import FinancialSituationMemory
+
+@pytest.fixture
+def memory_instance():
+    """Fixture to provide a FinancialSituationMemory instance."""
+    return FinancialSituationMemory(name="test_memory")
+
+@pytest.mark.parametrize(
+    "input_text, expected_tokens",
+    [
+        # Simple cases
+        ("hello world", ["hello", "world"]),
+        ("SINGLE", ["single"]),
+        ("Mixed Case String", ["mixed", "case", "string"]),
+
+        # Numbers
+        ("123 456", ["123", "456"]),
+        ("year 2024", ["year", "2024"]),
+
+        # Punctuation
+        ("hello, world!", ["hello", "world"]),
+        ("end. start", ["end", "start"]),
+        ("questions?", ["questions"]),
+        ("multiple... dots", ["multiple", "dots"]),
+
+        # Edge cases with quotes, apostrophes, and hyphens (based on current implementation)
+        ("don't", ["don", "t"]),
+        ("it's", ["it", "s"]),
+        ("a-b", ["a", "b"]),
+        ("long-term", ["long", "term"]),
+        ('"quote"', ["quote"]),
+
+        # Underscores (word boundary \b and \w behavior)
+        ("_leading", ["_leading"]),
+        ("trailing_", ["trailing_"]),
+        ("in_between", ["in_between"]),
+
+        # Symbols
+        ("100% growth", ["100", "growth"]),
+        ("price $50", ["price", "50"]),
+        ("a & b", ["a", "b"]),
+        ("tech @ sector", ["tech", "sector"]),
+
+        # Empty and whitespace
+        ("", []),
+        ("   ", []),
+        ("\t\n", []),
+        ("  spaces  around  ", ["spaces", "around"]),
+
+        # Complex sentence
+        (
+            "High inflation (CPI at 8.5%) affects the $SPY heavily!",
+            ["high", "inflation", "cpi", "at", "8", "5", "affects", "the", "spy", "heavily"]
+        ),
+    ]
+)
+def test_tokenize(memory_instance, input_text, expected_tokens):
+    """Test the _tokenize method handles various strings correctly."""
+    tokens = memory_instance._tokenize(input_text)
+    assert tokens == expected_tokens