feat: NotebookLM sync with date-specific sources and consolidation (#28)

2026-03-19 15:39:25 +01:00 · 2026-03-19 15:39:25 +01:00 · d92fd9cab1
parent d2e967c4fc
commit d92fd9cab1
8 changed files with 272 additions and 74 deletions
--- a/.env.example
+++ b/.env.example
@ -6,8 +6,6 @@ ANTHROPIC_API_KEY=
 XAI_API_KEY=
 OPENROUTER_API_KEY=
 NOTEBOOKLM_ID=e8fd4391-9cb2-43ff-b893-1316a52857b6
 # ── Data Provider API Keys ───────────────────────────────────────────
 ALPHA_VANTAGE_API_KEY=
 # Free at https://finnhub.io — required for earnings/economic calendars and insider transactions
@ -16,8 +14,6 @@ FINNHUB_API_KEY=
 TRADINGAGENTS_RESULTS_DIR=./my_results
 TRADINGAGENTS_MAX_DEBATE_ROUNDS=2
 NOTEBOOKLM_ID=
 #TRADINGAGENTS_VENDOR_SCANNER_DATA=alpha_vantage
 # ── Configuration overrides ──────────────────────────────────────────
@ -69,7 +65,7 @@ NOTEBOOKLM_ID=
 # ── Google NotebookLM sync (optional) ────────────────────────────────
 # Notebook ID for daily digest upload via the nlm CLI tool
-# NOTEBOOK_ID=
+# NOTEBOOKLM_ID=
 # ── Data vendor routing ──────────────────────────────────────────────
 # Category-level vendor selection (yfinance | alpha_vantage | finnhub)
--- a/cli/main.py
+++ b/cli/main.py
@ -1181,7 +1181,7 @@ def run_analysis():
        digest_path = append_to_digest(
            selections["analysis_date"], "analyze", selections["ticker"], digest_content
        )
-        sync_to_notebooklm(digest_path)
+        sync_to_notebooklm(digest_path, selections["analysis_date"])
    # Write observability log
    log_dir = get_ticker_dir(selections["analysis_date"], selections["ticker"])
@ -1280,10 +1280,23 @@ def run_scan(date: Optional[str] = None):
    set_run_logger(None)
    # Append to daily digest and sync to NotebookLM
-    macro_content = result.get("macro_scan_summary", "")
+    scan_parts = []
    if result.get("geopolitical_report"):
        scan_parts.append(f"### Geopolitical & Macro\n{result['geopolitical_report']}")
    if result.get("market_movers_report"):
        scan_parts.append(f"### Market Movers\n{result['market_movers_report']}")
    if result.get("sector_performance_report"):
        scan_parts.append(f"### Sector Performance\n{result['sector_performance_report']}")
    if result.get("industry_deep_dive_report"):
        scan_parts.append(f"### Industry Deep Dive\n{result['industry_deep_dive_report']}")
    if result.get("macro_scan_summary"):
        scan_parts.append(f"### Macro Scan Summary\n{result['macro_scan_summary']}")
    macro_content = "\n\n".join(scan_parts)
    if macro_content:
        digest_path = append_to_digest(scan_date, "scan", "Market Scan", macro_content)
-        sync_to_notebooklm(digest_path)
+        sync_to_notebooklm(digest_path, scan_date)
    console.print(f"\n[green]Results saved to {save_dir}[/green]")
@ -1365,6 +1378,12 @@ def run_pipeline():
    )
    set_run_logger(None)
    # Append to daily digest and sync to NotebookLM
    from tradingagents.pipeline.macro_bridge import render_combined_summary
    pipeline_summary = render_combined_summary(results, macro_context)
    digest_path = append_to_digest(analysis_date, "pipeline", "Pipeline Summary", pipeline_summary)
    sync_to_notebooklm(digest_path, analysis_date)
    successes = [r for r in results if not r.error]
    failures = [r for r in results if r.error]
    console.print(f"\n[green]Done: {len(successes)} succeeded, {len(failures)} failed[/green]")
--- a/docs/agent/CURRENT_STATE.md
+++ b/docs/agent/CURRENT_STATE.md
@ -5,19 +5,17 @@ Daily digest consolidation and Google NotebookLM sync shipped (PR open: `feat/da
 # Recent Progress
 - **PR #22 merged**: Unified report paths, structured observability logging, memory system update
- **feat/daily-digest-notebooklm** (open PR): Daily digest consolidation + NotebookLM sync
+- **feat/daily-digest-notebooklm** (shipped): Daily digest consolidation + NotebookLM source sync
  - `tradingagents/daily_digest.py` — `append_to_digest()` appends timestamped entries to `reports/daily/{date}/daily_digest.md`
-  - `tradingagents/notebook_sync.py` — `sync_to_notebooklm()` deletes old source then uploads new digest via `nlm` CLI (opt-in via `NOTEBOOK_ID` env var)
+  - `tradingagents/notebook_sync.py` — `sync_to_notebooklm()` deletes existing "Daily Trading Digest" source then uploads new content via `nlm source add --text --wait`.
  - `tradingagents/report_paths.py` — added `get_digest_path(date)`
  - `cli/main.py` — `analyze` and `scan` commands both call digest + sync after each run
-  - `.env.example` — `NOTEBOOK_ID` added
+  - `.env.example` — fixed consistency, removed duplicates, aligned with `NOTEBOOKLM_ID`
- **PR #21 merged**: Memory system v2 — builder/reader skills, 5 context files, post-commit hook
+- **Verification**: 220+ offline tests passing + 5 new unit tests for `notebook_sync.py` + live integration test passed.
 - **PR #18 merged**: Opt-in vendor fallback — fail-fast by default (ADR 011)
 - 220+ offline tests passing
 # In Progress
- Awaiting `NOTEBOOK_ID` from user to enable end-to-end NotebookLM test
+- Refinement of macro scan synthesis prompts (ongoing)
 # Active Blockers
--- a/docs/agent/context/ARCHITECTURE.md
+++ b/docs/agent/context/ARCHITECTURE.md
@ -102,12 +102,13 @@ Source: `tradingagents/report_paths.py`
 ## Daily Digest & NotebookLM Sync
-After every `analyze` or `scan` run, the CLI:
+After every `analyze`, `scan`, or `pipeline` run, the CLI:
 1. Calls `append_to_digest(date, entry_type, label, content)` → appends a timestamped section to `reports/daily/{date}/daily_digest.md` (creates the file on first run)
-2. Calls `sync_to_notebooklm(digest_path)` → deletes the previous `daily_digest.md` source from the configured NotebookLM notebook, then uploads the updated file via the `nlm` CLI tool
+2. Calls `sync_to_notebooklm(digest_path, date)` → finds the existing source titled `Daily Trading Digest ({date})` inside the configured NotebookLM notebook, deletes it if it exists, and then uploads the updated file content via `nlm source add --text --wait`.
-`NOTEBOOK_ID` env var controls the target notebook. If unset, the sync step is silently skipped (opt-in).
+This ensures there is a single, up-to-date source per day in the user's NotebookLM workspace. `scan` consolidates all 5 macro reports into this digest.
 `NOTEBOOKLM_ID` env var controls the target notebook. If unset, the sync step is silently skipped (opt-in).
 Source: `tradingagents/daily_digest.py`, `tradingagents/notebook_sync.py`
 ## Observability
--- a/docs/agent/context/COMPONENTS.md
+++ b/docs/agent/context/COMPONENTS.md
@ -178,6 +178,8 @@ cli/
 | `test_json_utils.py` | Unit | `extract_json()` — markdown, think blocks, edge cases | — |
 | `test_macro_bridge.py` | Unit | Pipeline: parse, filter, render, save | — |
 | `test_macro_regime.py` | Mixed | Macro signals, regime classification, report format | `integration` on live test |
 | `test_nlm_live.py` | Integration | Live NLM CLI tests for NotebookLM sync | — |
 | `test_notebook_sync.py` | Unit | `notebook_sync.py` logic, `nlm` subprocess mocking | — |
 | `test_peer_comparison.py` | Mixed | Peer comparison functions | `integration` on live test |
 | `test_scanner_comprehensive.py` | Integration | All 5 scanner tools + CLI output naming | — |
 | `test_scanner_fallback.py` | Mixed | yfinance perf, AV failure mode, fallback routing | `integration` on some |
--- a/tests/integration/test_nlm_live.py
+++ b/tests/integration/test_nlm_live.py
@ -0,0 +1,72 @@
 import os
 import json
 import subprocess
 import pytest
 from pathlib import Path
 # This test requires a real NOTEBOOKLM_ID in .env and nlm CLI logged in.
 # It is excluded from regular unit tests by its location/filename.
 NOTEBOOK_ID = os.environ.get("NOTEBOOKLM_ID")
 NLM_PATH = os.path.expanduser("~/.local/bin/nlm")
@pytest.mark.skipif(not NOTEBOOK_ID, reason="NOTEBOOKLM_ID not set")
@pytest.mark.skipif(not os.path.exists(NLM_PATH), reason="nlm CLI not found")
 def test_nlm_source_crud_live():
    """Live integration test for nlm source commands."""
    date = "2026-03-19"
    test_title = f"Integration Test Source ({date})"
    test_file = Path("test_integration_source.md")
    test_file.write_text("# Integration Test Content")
    try:
        # 1. Check if it already exists (from a failed run, maybe) and delete it
        print(f"\nChecking for existing '{test_title}' source...")
        result = subprocess.run(
            [NLM_PATH, "source", "list", NOTEBOOK_ID, "--json"],
            capture_output=True, text=True, check=True
        )
        sources = json.loads(result.stdout)
        for s in sources:
            if s.get("title") == test_title:
                print(f"Deleting existing source {s['id']}")
                subprocess.run([NLM_PATH, "source", "delete", NOTEBOOK_ID, s["id"], "-y"], check=False)
        # 2. Add source via text to ensure title is respected
        print(f"Adding source: {test_title}")
        result = subprocess.run(
            [NLM_PATH, "source", "add", NOTEBOOK_ID, "--text", "Integration Test Content", "--title", test_title, "--wait"],
            capture_output=True, text=True, check=True
        )
        assert "Added source" in result.stdout
        # Parse ID from stdout if possible (it's not JSON)
        import re
        match = re.search(r"Source ID: ([a-f0-9\-]+)", result.stdout)
        source_id = match.group(1) if match else None
        assert source_id is not None
        print(f"Source created with ID: {source_id}")
        # 3. List and verify finding by name
        print(f"Verifying we can find source by its name title: '{test_title}'")
        result = subprocess.run(
            [NLM_PATH, "source", "list", NOTEBOOK_ID, "--json"],
            capture_output=True, text=True, check=True
        )
        sources = json.loads(result.stdout)
        found_id_by_name = None
        for s in sources:
            if s.get("title") == test_title:
                found_id_by_name = s.get("id")
                break
        assert found_id_by_name == source_id, f"Failed to find source ID {source_id} by title '{test_title}'\nFound sources: {[s.get('title') for s in sources]}"
        print(f"Successfully found source {found_id_by_name} by title.")
    finally:
        # 4. Clean up
        if 'source_id' in locals() and source_id:
            print(f"Cleaning up source {source_id}")
            subprocess.run([NLM_PATH, "source", "delete", NOTEBOOK_ID, source_id, "-y"], check=False)
        print("Integration test complete")
--- a/tests/unit/test_notebook_sync.py
+++ b/tests/unit/test_notebook_sync.py
@ -0,0 +1,123 @@
 import json
 import os
 import subprocess
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
 from tradingagents.notebook_sync import sync_to_notebooklm
@pytest.fixture
 def mock_nlm_path(tmp_path):
    nlm = tmp_path / "nlm"
    nlm.touch(mode=0o755)
    return str(nlm)
 def test_sync_skips_when_no_notebook_id():
    """Should return silently if NOOTEBOOKLM_ID is not set."""
    with patch.dict(os.environ, {}, clear=True):
        # Should not raise or call anything
        sync_to_notebooklm(Path("test.md"), "2026-03-19")
 def test_sync_skips_when_nlm_not_found():
    """Should warn and skip if nlm binary is not in PATH."""
    with patch.dict(os.environ, {"NOTEBOOKLM_ID": "test-id"}):
        with patch("shutil.which", return_value=None):
            with patch("tradingagents.notebook_sync.Path.exists", return_value=False):
                sync_to_notebooklm(Path("test.md"), "2026-03-19")
 def test_sync_performs_delete_then_add(mock_nlm_path):
    """Should find existing source, delete it, then add new one."""
    notebook_id = "test-notebook-id"
    source_id = "existing-source-id"
    digest_path = Path("digest.md")
    content = "# Daily Digest"
    # Mock file reading
    with patch.object(Path, "read_text", return_value=content):
        with patch.dict(os.environ, {"NOTEBOOKLM_ID": notebook_id}):
            with patch("shutil.which", return_value=mock_nlm_path):
                with patch("subprocess.run") as mock_run:
                    # 1. Mock 'source list' finding an existing source
                    list_result = MagicMock()
                    list_result.returncode = 0
                    list_result.stdout = json.dumps([{"id": source_id, "title": "Daily Trading Digest (2026-03-19)"}])
                    # 2. Mock 'source delete' success
                    delete_result = MagicMock()
                    delete_result.returncode = 0
                    # 3. Mock 'source add' success
                    add_result = MagicMock()
                    add_result.returncode = 0
                    mock_run.side_effect = [list_result, delete_result, add_result]
                    sync_to_notebooklm(digest_path, "2026-03-19")
                    # Verify calls
                    assert mock_run.call_count == 3
                    # Check list call
                    args, kwargs = mock_run.call_args_list[0]
                    assert "list" in args[0]
                    assert notebook_id in args[0]
                    # Check delete call
                    args, kwargs = mock_run.call_args_list[1]
                    assert "delete" in args[0]
                    assert source_id in args[0]
                    # Check add call
                    args, kwargs = mock_run.call_args_list[2]
                    assert "add" in args[0]
                    assert "--text" in args[0]
                    assert content in args[0]
 def test_sync_adds_directly_when_none_exists(mock_nlm_path):
    """Should add new source directly if no existing one is found."""
    notebook_id = "test-notebook-id"
    digest_path = Path("digest.md")
    content = "# New Digest"
    with patch.object(Path, "read_text", return_value=content):
        with patch.dict(os.environ, {"NOTEBOOKLM_ID": notebook_id}):
            with patch("shutil.which", return_value=mock_nlm_path):
                with patch("subprocess.run") as mock_run:
                    # 1. Mock 'source list' returning empty list
                    list_result = MagicMock()
                    list_result.returncode = 0
                    list_result.stdout = "[]"
                    # 2. Mock 'source add' success
                    add_result = MagicMock()
                    add_result.returncode = 0
                    mock_run.side_effect = [list_result, add_result]
                    sync_to_notebooklm(digest_path, "2026-03-19")
                    # Verify only 2 calls (no delete)
                    assert mock_run.call_count == 2
                    assert "list" in mock_run.call_args_list[0][0][0]
                    assert "add" in mock_run.call_args_list[1][0][0]
 def test_handles_json_error_gracefully(mock_nlm_path):
    """Should skip delete and attempt add if JSON list parsing fails."""
    with patch.object(Path, "read_text", return_value="content"):
        with patch.dict(os.environ, {"NOTEBOOKLM_ID": "id"}):
            with patch("shutil.which", return_value=mock_nlm_path):
                with patch("subprocess.run") as mock_run:
                    # Mock invalid JSON
                    list_result = MagicMock()
                    list_result.stdout = "invalid json"
                    add_result = MagicMock()
                    add_result.returncode = 0
                    mock_run.side_effect = [list_result, add_result]
                    sync_to_notebooklm(Path("test.md"), "2026-03-19")
                    assert mock_run.call_count == 2
                    assert "add" in mock_run.call_args_list[1][0][0]
--- a/tradingagents/notebook_sync.py
+++ b/tradingagents/notebook_sync.py
@ -17,38 +17,28 @@ from rich.console import Console
 console = Console()
 _NOTE_TITLE = "Daily Trading Digest"
 # Common install locations outside of PATH (e.g. pip install --user)
 _FALLBACK_PATHS = [
    Path.home() / ".local" / "bin" / "nlm",
    Path("/usr/local/bin/nlm"),
 ]
 def _find_nlm() -> str | None:
-    """Return the path to the nlm binary, or None if not found."""
+    """Resolve the path to the nlm CLI."""
-    found = shutil.which("nlm")
+    if nlm_path := shutil.which("nlm"):
-    if found:
+        return nlm_path
        return found
    for p in _FALLBACK_PATHS:
        if p.exists():
            return str(p)
    return None
-def sync_to_notebooklm(digest_path: Path, notebook_id: str | None = None) -> None:
+def sync_to_notebooklm(digest_path: Path, date: str, notebook_id: str | None = None) -> None:
-    """Upload *digest_path* content to Google NotebookLM as a note.
+    """Upload *digest_path* content to Google NotebookLM as a source.
-    If a note titled ``Daily Trading Digest`` already exists it is updated
+    If a source with the title for the given day already exists, it is deleted
-    in-place; otherwise a new note is created.
+    and re-uploaded to ensure the latest content is indexed.
    Parameters
    ----------
    digest_path:
        Path to the digest markdown file to upload.
    date:
        The date string (e.g., "YYYY-MM-DD") used for the source title.
    notebook_id:
-        NotebookLM notebook ID.  Falls back to the ``NOTEBOOK_ID``
+        NotebookLM notebook ID. Falls back to the ``NOTEBOOKLM_ID``
        environment variable when *None*.
    """
    if notebook_id is None:
@ -62,63 +52,60 @@ def sync_to_notebooklm(digest_path: Path, notebook_id: str | None = None) -> Non
        return
    content = digest_path.read_text()
    title = f"Daily Trading Digest ({date})"
-    # Check for an existing note with the same title
+    # Find and delete existing source with the same title
-    existing_note_id = _find_note(nlm, notebook_id)
+    existing_source_id = _find_source(nlm, notebook_id, title)
    if existing_source_id:
        _delete_source(nlm, notebook_id, existing_source_id)
-    if existing_note_id:
+    # Add as a new source
-        _update_note(nlm, notebook_id, existing_note_id, content)
+    _add_source(nlm, notebook_id, content, title)
    else:
        _create_note(nlm, notebook_id, content)
-def _find_note(nlm: str, notebook_id: str) -> str | None:
+def _find_source(nlm: str, notebook_id: str, title: str) -> str | None:
-    """Return the note ID for the daily digest note, or None if not found."""
+    """Return the source ID for the daily digest, or None if not found."""
    try:
        result = subprocess.run(
-            [nlm, "note", "list", notebook_id, "--json"],
+            [nlm, "source", "list", notebook_id, "--json"],
            capture_output=True,
            text=True,
        )
        if result.returncode != 0:
            return None
-        data = json.loads(result.stdout)
+        sources = json.loads(result.stdout)
-        notes = data.get("notes", data) if isinstance(data, dict) else data
+        for source in sources:
-        for note in notes:
+            if isinstance(source, dict) and source.get("title") == title:
-            if isinstance(note, dict) and note.get("title") == _NOTE_TITLE:
+                return source.get("id")
                return note.get("id") or note.get("noteId")
    except (ValueError, KeyError, OSError):
        pass
    return None
-def _create_note(nlm: str, notebook_id: str, content: str) -> None:
+def _delete_source(nlm: str, notebook_id: str, source_id: str) -> None:
-    """Create a new note in the notebook."""
+    """Delete an existing source."""
    try:
        subprocess.run(
            [nlm, "source", "delete", notebook_id, source_id, "-y"],
            capture_output=True,
            text=True,
            check=False,  # Ignore non-zero exit since nlm sometimes fails even on success
        )
    except OSError:
        pass
 def _add_source(nlm: str, notebook_id: str, content: str, title: str) -> None:
    """Add content as a new source."""
    try:
        result = subprocess.run(
-            [nlm, "note", "create", notebook_id, "--title", _NOTE_TITLE, "--content", content],
+            [nlm, "source", "add", notebook_id, "--title", title, "--text", content, "--wait"],
            capture_output=True,
            text=True,
        )
        if result.returncode == 0:
-            console.print(f"[green]✓ Created NotebookLM note: {_NOTE_TITLE}[/green]")
+            console.print(f"[green]✓ Synced NotebookLM source: {title}[/green]")
        else:
-            console.print(f"[yellow]Warning: nlm note create failed: {result.stderr.strip()}[/yellow]")
+            console.print(f"[yellow]Warning: nlm source add failed: {result.stderr.strip()}[/yellow]")
    except OSError as exc:
-        console.print(f"[yellow]Warning: could not create NotebookLM note: {exc}[/yellow]")
+        console.print(f"[yellow]Warning: could not add NotebookLM source: {exc}[/yellow]")
 def _update_note(nlm: str, notebook_id: str, note_id: str, content: str) -> None:
    """Update an existing note's content."""
    try:
        result = subprocess.run(
            [nlm, "note", "update", notebook_id, note_id, "--content", content],
            capture_output=True,
            text=True,
        )
        if result.returncode == 0:
            console.print(f"[green]✓ Updated NotebookLM note: {_NOTE_TITLE}[/green]")
        else:
            console.print(f"[yellow]Warning: nlm note update failed: {result.stderr.strip()}[/yellow]")
    except OSError as exc:
        console.print(f"[yellow]Warning: could not update NotebookLM note: {exc}[/yellow]")