test: add unit tests for _tokenize in FinancialSituationMemory

Adds comprehensive tests for `_tokenize` method inside
`tradingagents/agents/utils/memory.py` focusing on regex behavior
`\b\w+\b` with case mixing, punctuation, edges (empty string),
numbers, underscores, quoting, and hyphenation strings.

Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com>
This commit is contained in:
google-labs-jules[bot] 2026-03-21 01:54:23 +00:00
parent 25457930ae
commit 3b9eef380d
1 changed files with 61 additions and 0 deletions

View File

@ -0,0 +1,61 @@
import pytest
from tradingagents.agents.utils.memory import FinancialSituationMemory
@pytest.fixture
def memory_instance():
"""Fixture to provide a FinancialSituationMemory instance."""
return FinancialSituationMemory(name="test_memory")
@pytest.mark.parametrize(
"input_text, expected_tokens",
[
# Simple cases
("hello world", ["hello", "world"]),
("SINGLE", ["single"]),
("Mixed Case String", ["mixed", "case", "string"]),
# Numbers
("123 456", ["123", "456"]),
("year 2024", ["year", "2024"]),
# Punctuation
("hello, world!", ["hello", "world"]),
("end. start", ["end", "start"]),
("questions?", ["questions"]),
("multiple... dots", ["multiple", "dots"]),
# Edge cases with quotes, apostrophes, and hyphens (based on current implementation)
("don't", ["don", "t"]),
("it's", ["it", "s"]),
("a-b", ["a", "b"]),
("long-term", ["long", "term"]),
('"quote"', ["quote"]),
# Underscores (word boundary \b and \w behavior)
("_leading", ["_leading"]),
("trailing_", ["trailing_"]),
("in_between", ["in_between"]),
# Symbols
("100% growth", ["100", "growth"]),
("price $50", ["price", "50"]),
("a & b", ["a", "b"]),
("tech @ sector", ["tech", "sector"]),
# Empty and whitespace
("", []),
(" ", []),
("\t\n", []),
(" spaces around ", ["spaces", "around"]),
# Complex sentence
(
"High inflation (CPI at 8.5%) affects the $SPY heavily!",
["high", "inflation", "cpi", "at", "8", "5", "affects", "the", "spy", "heavily"]
),
]
)
def test_tokenize(memory_instance, input_text, expected_tokens):
"""Test the _tokenize method handles various strings correctly."""
tokens = memory_instance._tokenize(input_text)
assert tokens == expected_tokens