TradingAgents/verify_local_embeddings.py

139 lines
5.6 KiB
Python
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Verify that local sentence-transformers embeddings are working correctly.
This script tests the local embedding model without requiring external services.
"""
import os
import sys
def test_local_embeddings():
"""Test local sentence-transformers embeddings"""
embedding_model = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
print("=" * 60)
print("Local Embeddings Verification (sentence-transformers)")
print("=" * 60)
print(f"Embedding Model: {embedding_model}")
print()
try:
# 1. Try to import sentence-transformers (Local Library Mode)
try:
from sentence_transformers import SentenceTransformer
print("✅ Found local sentence-transformers library.")
# Load the model
print(f"📦 Loading embedding model: {embedding_model}")
print(" (First run will download the model, ~90MB)")
print()
model = SentenceTransformer(embedding_model)
# Test embedding generation
test_texts = [
"This is a test sentence for embedding generation.",
"Financial markets are showing increased volatility.",
"The company reported strong quarterly earnings."
]
print(f"Testing embedding generation with {len(test_texts)} sentences:")
for i, text in enumerate(test_texts, 1):
print(f" {i}. '{text[:50]}...'")
print()
embeddings = model.encode(test_texts, convert_to_numpy=True)
print("✅ SUCCESS!")
print(f"Generated {len(embeddings)} embedding vectors")
print(f"Embedding dimensions: {embeddings.shape[1]}")
print(f"First embedding (first 5 values): {embeddings[0][:5].tolist()}")
print()
# Test similarity
from numpy import dot
from numpy.linalg import norm
def cosine_similarity(a, b):
return dot(a, b) / (norm(a) * norm(b))
sim_0_1 = cosine_similarity(embeddings[0], embeddings[1])
sim_1_2 = cosine_similarity(embeddings[1], embeddings[2])
print("Similarity scores:")
print(f" Sentence 1 ↔ Sentence 2: {sim_0_1:.4f}")
print(f" Sentence 2 ↔ Sentence 3: {sim_1_2:.4f}")
print()
print("=" * 60)
print("Local embeddings (Library) are working correctly! 🎉")
print("=" * 60)
return True
except ImportError:
# 2. If Library missing, try connection to Local Service (Docker Mode)
print(" sentence-transformers library not installed.")
print("Checking for local embedding service...")
try:
from openai import OpenAI
embedding_url = os.getenv("EMBEDDING_API_URL", "http://localhost:8000/v1")
print(f"Connecting to: {embedding_url}")
client = OpenAI(base_url=embedding_url, api_key="local")
# Test embedding generation via API
test_texts = [
"This is a test sentence for embedding generation.",
"Financial markets are showing increased volatility.",
"The company reported strong quarterly earnings."
]
print(f"Testing embedding generation via API with {len(test_texts)} sentences:")
for i, text in enumerate(test_texts, 1):
print(f" {i}. '{text[:50]}...'")
print()
response = client.embeddings.create(model=embedding_model, input=test_texts)
embeddings = [data.embedding for data in response.data]
print("✅ SUCCESS!")
print(f"Generated {len(embeddings)} embedding vectors via API")
print(f"Embedding dimensions: {len(embeddings[0])}")
print(f"First embedding (first 5 values): {embeddings[0][:5]}")
print()
print("=" * 60)
print("Local embedding service (Docker) is working correctly! 🎉")
print("=" * 60)
return True
except Exception as service_error:
print("❌ FAILED!")
print("Neither sentence-transformers library nor local embedding service found.")
print(f"Library Error: sentence-transformers not installed")
print(f"Service Error: {str(service_error)}")
print()
print("=" * 60)
print("Installation Options:")
print("=" * 60)
print("OPTION 1: Run Service (Recommended - Docker)")
print(" docker run -d -p 8000:8000 ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id sentence-transformers/all-MiniLM-L6-v2")
print(" export EMBEDDING_API_URL=http://localhost:8000/v1")
print()
print("OPTION 2: Install Library (Runs locally, adds dependencies)")
print(" pip install sentence-transformers")
print("=" * 60)
return False
except Exception as e:
print("❌ FAILED!")
print(f"Error: {str(e)}")
return False
if __name__ == "__main__":
success = test_local_embeddings()
sys.exit(0 if success else 1)