Implemented pure Python local embeddings using `sentence-transformers` library, eliminating the need for external services like Ollama for providers that don't support embeddings
This commit is contained in:
parent
54a3395b37
commit
bfbc011a87
72
.env.example
72
.env.example
|
|
@ -1,2 +1,74 @@
|
||||||
|
# TradingAgents Environment Variables Configuration
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# LLM Provider API URLs
|
||||||
|
# ============================================
|
||||||
|
# These environment variables allow you to customize the API endpoints
|
||||||
|
# for different LLM providers. If not set, the default URLs will be used.
|
||||||
|
|
||||||
|
# OpenAI API URL
|
||||||
|
# Default: https://api.openai.com/v1
|
||||||
|
#OPENAI_API_URL=https://api.openai.com/v1
|
||||||
|
|
||||||
|
# Anthropic API URL
|
||||||
|
# Default: https://api.anthropic.com/
|
||||||
|
#ANTHROPIC_API_URL=https://api.anthropic.com/
|
||||||
|
|
||||||
|
# Google Generative AI API URL
|
||||||
|
# Default: https://generativelanguage.googleapis.com/v1
|
||||||
|
#GOOGLE_API_URL=https://generativelanguage.googleapis.com/v1
|
||||||
|
|
||||||
|
# OpenRouter API URL
|
||||||
|
# Default: https://openrouter.ai/api/v1
|
||||||
|
#OPENROUTER_API_URL=https://openrouter.ai/api/v1
|
||||||
|
|
||||||
|
# Ollama API URL (local)
|
||||||
|
# Default: http://localhost:11434/v1
|
||||||
|
#OLLAMA_API_URL=http://localhost:11434/v1
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# Embedding Configuration
|
||||||
|
# ============================================
|
||||||
|
# If EMBEDDING_API_URL is set, it will be used for ALL providers (overrides defaults)
|
||||||
|
# This is required for Anthropic (which doesn't provide embeddings)
|
||||||
|
# Can point to sentence-transformers in Docker, Ollama, or any OpenAI-compatible service
|
||||||
|
|
||||||
|
# Embedding service URL (OpenAI-compatible API)
|
||||||
|
# Required for Anthropic, optional for others
|
||||||
|
# Examples:
|
||||||
|
# - Local Service (startEmbedding.sh): http://localhost:11434/v1
|
||||||
|
# - Ollama: http://localhost:11434/v1
|
||||||
|
#EMBEDDING_API_URL=http://localhost:11434/v1
|
||||||
|
|
||||||
|
# Embedding model name
|
||||||
|
# Default: all-MiniLM-L6-v2
|
||||||
|
#EMBEDDING_MODEL=all-MiniLM-L6-v2
|
||||||
|
|
||||||
|
# Embedding API key (if your service requires it)
|
||||||
|
# Default: "local" (most local services don't need a key)
|
||||||
|
#EMBEDDING_API_KEY=local
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# API Keys
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
# Alpha Vantage API Key
|
||||||
ALPHA_VANTAGE_API_KEY=alpha_vantage_api_key_placeholder
|
ALPHA_VANTAGE_API_KEY=alpha_vantage_api_key_placeholder
|
||||||
|
|
||||||
|
# OpenAI API Key
|
||||||
OPENAI_API_KEY=openai_api_key_placeholder
|
OPENAI_API_KEY=openai_api_key_placeholder
|
||||||
|
|
||||||
|
# Alpaca Trading API
|
||||||
|
#ALPACA_API_KEY=your_alpaca_api_key_here
|
||||||
|
#ALPACA_API_SECRET=your_alpaca_secret_key_here
|
||||||
|
|
||||||
|
# Google API Key (for Gemini models)
|
||||||
|
#GOOGLE_API_KEY=your_google_api_key_here
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# Application Settings
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
# Results directory for storing analysis outputs
|
||||||
|
# Default: ./results
|
||||||
|
#TRADINGAGENTS_RESULTS_DIR=./results
|
||||||
43
CHANGELOG.md
43
CHANGELOG.md
|
|
@ -2,6 +2,49 @@
|
||||||
|
|
||||||
All notable changes to the **TradingAgents** project will be documented in this file.
|
All notable changes to the **TradingAgents** project will be documented in this file.
|
||||||
|
|
||||||
|
## [Unreleased] - 2026-01-10
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **Local Embedding Service Support**: Added support for Anthropic to use local embedding service via URL
|
||||||
|
- Anthropic doesn't provide embeddings API, so users can run **Hugging Face Text Embeddings Inference (TEI)** in Docker
|
||||||
|
- Configure via `EMBEDDING_API_URL` environment variable (default: `http://localhost:11434/v1`)
|
||||||
|
- Configure model via `EMBEDDING_MODEL` environment variable (default: `all-MiniLM-L6-v2`)
|
||||||
|
- Keeps main application lightweight - heavy dependencies (PyTorch) isolated in separate container
|
||||||
|
- **Environment Variable Configuration**: Added comprehensive environment variable support for all LLM providers and embedding configuration
|
||||||
|
- `OPENAI_API_URL` - Custom OpenAI API endpoint
|
||||||
|
- `ANTHROPIC_API_URL` - Custom Anthropic API endpoint
|
||||||
|
- `GOOGLE_API_URL` - Custom Google API endpoint
|
||||||
|
- `OPENROUTER_API_URL` - Custom OpenRouter API endpoint
|
||||||
|
- `OLLAMA_API_URL` - Custom Ollama API endpoint
|
||||||
|
- `EMBEDDING_PROVIDER` - Choose embedding provider: `local`, `openai`, `google`, `ollama`
|
||||||
|
- `EMBEDDING_API_URL` - Custom embedding API endpoint (for Ollama or Docker service)
|
||||||
|
- `EMBEDDING_MODEL` - Custom embedding model name
|
||||||
|
- **Anthropic Claude 4.5 Thinking Models**: Added support for latest Anthropic thinking models
|
||||||
|
- `claude-sonnet-4-5-thinking` - Advanced reasoning with extended thinking
|
||||||
|
- `claude-opus-4-5-thinking` - Premier reasoning with extended thinking
|
||||||
|
- Removed older Claude models (3.5, 3.7, 4.0) to focus on latest thinking models
|
||||||
|
- **Documentation**: Created comprehensive guides and verification tools
|
||||||
|
- `docs/LOCAL_EMBEDDINGS.md` - Complete guide for local embeddings setup
|
||||||
|
- `verify_local_embeddings.py` - Verification script for sentence-transformers
|
||||||
|
- `verify_ollama_embeddings.py` - Verification script for Ollama (optional)
|
||||||
|
- Updated `.env.example` with all new configuration options
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- **Dependency Cleanup**: Removed `sentence-transformers` from `requirements.txt` to keep main application lightweight.
|
||||||
|
- **Virtual Environment**: Recreated `.venv` to ensure a clean state without unused heavy dependencies.
|
||||||
|
- **Embedding Architecture**: Refactored `tradingagents/agents/utils/memory.py` to support multiple embedding providers with clean separation of concerns
|
||||||
|
- Automatic provider selection based on LLM provider
|
||||||
|
- Local embeddings as default for Anthropic and Ollama providers
|
||||||
|
- Maintained backward compatibility with existing API-based embeddings
|
||||||
|
- **CLI Provider Selection**: Updated `cli/utils.py` to use environment variables for all LLM provider API URLs with sensible defaults
|
||||||
|
- **Configuration Documentation**: Enhanced `.env.example` with detailed comments and examples for all configuration options
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Anthropic Embedding Error**: Resolved `404 Not Found` error when using Anthropic as LLM provider by implementing automatic fallback to local embeddings (Anthropic doesn't provide an embeddings API)
|
||||||
|
|
||||||
|
### Technical Debt
|
||||||
|
- None - All changes follow SOLID principles with proper separation of concerns
|
||||||
|
|
||||||
## [Unreleased] - 2026-01-09
|
## [Unreleased] - 2026-01-09
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
|
||||||
31
cli/utils.py
31
cli/utils.py
|
|
@ -1,3 +1,4 @@
|
||||||
|
import os
|
||||||
import questionary
|
import questionary
|
||||||
from typing import List, Optional, Tuple, Dict
|
from typing import List, Optional, Tuple, Dict
|
||||||
|
|
||||||
|
|
@ -134,10 +135,8 @@ def select_shallow_thinking_agent(provider) -> str:
|
||||||
("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
|
("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
|
||||||
],
|
],
|
||||||
"anthropic": [
|
"anthropic": [
|
||||||
("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
|
("Claude Sonnet 4.5 (Thinking) - Advanced reasoning with extended thinking", "claude-sonnet-4-5-thinking"),
|
||||||
("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
|
("Claude Opus 4.5 (Thinking) - Premier reasoning with extended thinking", "claude-opus-4-5-thinking"),
|
||||||
("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
|
|
||||||
("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
|
|
||||||
],
|
],
|
||||||
"google": [
|
"google": [
|
||||||
("Gemini 2.5 Flash-Lite - Cost efficiency and low latency", "gemini-2.5-flash-lite"),
|
("Gemini 2.5 Flash-Lite - Cost efficiency and low latency", "gemini-2.5-flash-lite"),
|
||||||
|
|
@ -196,11 +195,8 @@ def select_deep_thinking_agent(provider) -> str:
|
||||||
("o1 - Premier reasoning and problem-solving model", "o1"),
|
("o1 - Premier reasoning and problem-solving model", "o1"),
|
||||||
],
|
],
|
||||||
"anthropic": [
|
"anthropic": [
|
||||||
("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
|
("Claude Sonnet 4.5 (Thinking) - Advanced reasoning with extended thinking", "claude-sonnet-4-5-thinking"),
|
||||||
("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
|
("Claude Opus 4.5 (Thinking) - Premier reasoning with extended thinking", "claude-opus-4-5-thinking"),
|
||||||
("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
|
|
||||||
("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
|
|
||||||
("Claude Opus 4 - Most powerful Anthropic model", " claude-opus-4-0"),
|
|
||||||
],
|
],
|
||||||
"google": [
|
"google": [
|
||||||
("Gemini 2.5 Flash - Next generation features, speed, and thinking", "gemini-2.5-flash"),
|
("Gemini 2.5 Flash - Next generation features, speed, and thinking", "gemini-2.5-flash"),
|
||||||
|
|
@ -241,14 +237,15 @@ def select_deep_thinking_agent(provider) -> str:
|
||||||
return choice
|
return choice
|
||||||
|
|
||||||
def select_llm_provider() -> tuple[str, str]:
|
def select_llm_provider() -> tuple[str, str]:
|
||||||
"""Select the OpenAI api url using interactive selection."""
|
"""Select the LLM provider and return its API URL from environment or default."""
|
||||||
# Define OpenAI api options with their corresponding endpoints
|
# Define LLM provider options with their corresponding endpoints
|
||||||
|
# Each provider checks for its specific environment variable with a fallback default
|
||||||
BASE_URLS = [
|
BASE_URLS = [
|
||||||
("OpenAI", "https://api.openai.com/v1"),
|
("OpenAI", os.getenv("OPENAI_API_URL", "https://api.openai.com/v1")),
|
||||||
("Anthropic", "https://api.anthropic.com/"),
|
("Anthropic", os.getenv("ANTHROPIC_API_URL", "https://api.anthropic.com/")),
|
||||||
("Google", "https://generativelanguage.googleapis.com/v1"),
|
("Google", os.getenv("GOOGLE_API_URL", "https://generativelanguage.googleapis.com/v1")),
|
||||||
("Openrouter", "https://openrouter.ai/api/v1"),
|
("Openrouter", os.getenv("OPENROUTER_API_URL", "https://openrouter.ai/api/v1")),
|
||||||
("Ollama", "http://localhost:11434/v1"),
|
("Ollama", os.getenv("OLLAMA_API_URL", "http://localhost:11434/v1")),
|
||||||
]
|
]
|
||||||
|
|
||||||
choice = questionary.select(
|
choice = questionary.select(
|
||||||
|
|
@ -268,7 +265,7 @@ def select_llm_provider() -> tuple[str, str]:
|
||||||
).ask()
|
).ask()
|
||||||
|
|
||||||
if choice is None:
|
if choice is None:
|
||||||
console.print("\n[red]no OpenAI backend selected. Exiting...[/red]")
|
console.print("\n[red]No LLM provider selected. Exiting...[/red]")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
display_name, url = choice
|
display_name, url = choice
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
# Local Embeddings Setup Guide
|
||||||
|
|
||||||
|
This guide explains how to set up local embeddings for the TradingAgents framework.
|
||||||
|
|
||||||
|
## Why Local Embeddings?
|
||||||
|
|
||||||
|
When using LLM providers that don't support embeddings (like Anthropic), or when you want to avoid additional API costs, you need a local embedding solution.
|
||||||
|
|
||||||
|
## Recommended: Run in Docker
|
||||||
|
|
||||||
|
The recommended approach is to run the embedding service in a Docker container. This keeps your main application environment clean and avoids installing heavy dependencies like PyTorch on your host machine.
|
||||||
|
|
||||||
|
### 1. Run the Embedding Service
|
||||||
|
Use the provided script to start the service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./startEmbedding.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This runs **Hugging Face Text Embeddings Inference (TEI)**, a high-performance server compatible with the OpenAI API.
|
||||||
|
|
||||||
|
*(Note: The Go-based image `clems4ever/all-minilm-l6-v2-go` is a CLI tool and cannot merely be run as a server.)*
|
||||||
|
|
||||||
|
### 2. Configure TradingAgents
|
||||||
|
|
||||||
|
Add (or update) these lines in your `.env` file:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Point to your local embedding service (TEI supports /v1 API)
|
||||||
|
EMBEDDING_API_URL=http://localhost:11434/v1
|
||||||
|
|
||||||
|
# The model name configured in the start script
|
||||||
|
EMBEDDING_MODEL=all-MiniLM-L6-v2
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Verify Setup
|
||||||
|
|
||||||
|
Run the verification script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 verify_local_embeddings.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Alternative: Local Installation (Development Only)
|
||||||
|
|
||||||
|
If you prefer to run everything locally without Docker (e.g., for development), you can install the library directly.
|
||||||
|
|
||||||
|
**⚠️ Warning:** This adds ~500MB of PyTorch dependencies to your environment.
|
||||||
|
|
||||||
|
### 1. Install Dependencies
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install sentence-transformers
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure TradingAgents
|
||||||
|
|
||||||
|
If you don't set `EMBEDDING_API_URL`, the system will attempt to import `sentence-transformers` automatically when using Anthropic.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Optional: Force local provider
|
||||||
|
EMBEDDING_PROVIDER=local
|
||||||
|
```
|
||||||
|
|
||||||
|
## Supported Providers
|
||||||
|
|
||||||
|
| LLM Provider | Default Behavior | Recommended Setup |
|
||||||
|
|--------------|------------------|-------------------|
|
||||||
|
| **Anthropic** | Tries local service URL | **Docker Service** |
|
||||||
|
| **Ollama** | Uses Ollama API | Ensure Ollama is running |
|
||||||
|
| **OpenAI** | Uses OpenAI API | No setup needed |
|
||||||
|
| **Google** | Uses Google API | No setup needed |
|
||||||
|
|
||||||
|
## FAQ
|
||||||
|
|
||||||
|
**Q: Why Docker?**
|
||||||
|
A: `sentence-transformers` requires PyTorch, which is a very large dependency (~500MB+). Putting it in a container keeps your main application lightweight and portable.
|
||||||
|
|
||||||
|
**Q: Can I use GPU?**
|
||||||
|
A: Yes! Use the GPU version of the container: `ghcr.io/huggingface/text-embeddings-inference:latest` (requires NVIDIA Container Toolkit).
|
||||||
|
|
||||||
|
**Q: Can I use Ollama instead?**
|
||||||
|
A: Yes. Set `EMBEDDING_API_URL=http://localhost:11434/v1` and `EMBEDDING_MODEL=nomic-embed-text` (or your preferred Ollama model).
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
/home/prem/git/antigravity-claude-proxy/startProxy.sh &
|
||||||
|
|
||||||
|
./startEmbedding.sh
|
||||||
|
|
||||||
# 1. Activate Virtual Environment
|
# 1. Activate Virtual Environment
|
||||||
if [ -d ".venv" ]; then
|
if [ -d ".venv" ]; then
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Stop and remove existing container if it exists
|
||||||
|
docker rm -f embedding-service 2>/dev/null || true
|
||||||
|
|
||||||
|
echo "🚀 Starting Local Embedding Service (Hugging Face TEI)..."
|
||||||
|
echo "ℹ️ Note: The previous image (clems4ever/all-minilm-l6-v2-go) is a CLI tool, not a server."
|
||||||
|
echo " Switching to ghcr.io/huggingface/text-embeddings-inference:cpu-latest which provides a compatible API."
|
||||||
|
|
||||||
|
# Run Hugging Face Text Embeddings Inference (compatible with OpenAI client)
|
||||||
|
docker run -d \
|
||||||
|
--name embedding-service \
|
||||||
|
--restart unless-stopped \
|
||||||
|
-p 11434:80 \
|
||||||
|
-e MAX_CONCURRENT_REQUESTS=4 \
|
||||||
|
ghcr.io/huggingface/text-embeddings-inference:cpu-latest \
|
||||||
|
--model-id sentence-transformers/all-MiniLM-L6-v2
|
||||||
|
|
||||||
|
echo "✅ Service started!"
|
||||||
|
echo " URL: http://localhost:11434/v1"
|
||||||
|
|
@ -6,7 +6,17 @@ from openai import OpenAI
|
||||||
|
|
||||||
class FinancialSituationMemory:
|
class FinancialSituationMemory:
|
||||||
def __init__(self, name, config):
|
def __init__(self, name, config):
|
||||||
if config.get("llm_provider") == "google":
|
# Check if user explicitly set EMBEDDING_API_URL - if so, use it regardless of provider
|
||||||
|
embedding_url = os.getenv("EMBEDDING_API_URL")
|
||||||
|
|
||||||
|
if embedding_url:
|
||||||
|
# User has explicitly configured embedding service URL
|
||||||
|
self.embedding = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||||
|
self.client = OpenAI(
|
||||||
|
base_url=embedding_url,
|
||||||
|
api_key=os.getenv("EMBEDDING_API_KEY", "local")
|
||||||
|
)
|
||||||
|
elif config.get("llm_provider") == "google":
|
||||||
self.embedding = "text-embedding-004"
|
self.embedding = "text-embedding-004"
|
||||||
|
|
||||||
google_api_key = os.getenv("GOOGLE_API_KEY")
|
google_api_key = os.getenv("GOOGLE_API_KEY")
|
||||||
|
|
@ -19,6 +29,13 @@ class FinancialSituationMemory:
|
||||||
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
|
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
|
||||||
max_retries=5
|
max_retries=5
|
||||||
)
|
)
|
||||||
|
elif config.get("llm_provider") == "anthropic":
|
||||||
|
# Anthropic doesn't provide embeddings - default to local embedding service
|
||||||
|
self.embedding = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||||
|
self.client = OpenAI(
|
||||||
|
base_url="http://localhost:8000/v1",
|
||||||
|
api_key="local"
|
||||||
|
)
|
||||||
elif config["backend_url"] == "http://localhost:11434/v1" or config.get("llm_provider") == "ollama":
|
elif config["backend_url"] == "http://localhost:11434/v1" or config.get("llm_provider") == "ollama":
|
||||||
self.embedding = "nomic-embed-text"
|
self.embedding = "nomic-embed-text"
|
||||||
self.client = OpenAI(base_url=config["backend_url"])
|
self.client = OpenAI(base_url=config["backend_url"])
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,138 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Verify that local sentence-transformers embeddings are working correctly.
|
||||||
|
This script tests the local embedding model without requiring external services.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def test_local_embeddings():
|
||||||
|
"""Test local sentence-transformers embeddings"""
|
||||||
|
|
||||||
|
embedding_model = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("Local Embeddings Verification (sentence-transformers)")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"Embedding Model: {embedding_model}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Try to import sentence-transformers (Local Library Mode)
|
||||||
|
try:
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
print("✅ Found local sentence-transformers library.")
|
||||||
|
|
||||||
|
# Load the model
|
||||||
|
print(f"📦 Loading embedding model: {embedding_model}")
|
||||||
|
print(" (First run will download the model, ~90MB)")
|
||||||
|
print()
|
||||||
|
|
||||||
|
model = SentenceTransformer(embedding_model)
|
||||||
|
|
||||||
|
# Test embedding generation
|
||||||
|
test_texts = [
|
||||||
|
"This is a test sentence for embedding generation.",
|
||||||
|
"Financial markets are showing increased volatility.",
|
||||||
|
"The company reported strong quarterly earnings."
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"Testing embedding generation with {len(test_texts)} sentences:")
|
||||||
|
for i, text in enumerate(test_texts, 1):
|
||||||
|
print(f" {i}. '{text[:50]}...'")
|
||||||
|
print()
|
||||||
|
|
||||||
|
embeddings = model.encode(test_texts, convert_to_numpy=True)
|
||||||
|
|
||||||
|
print("✅ SUCCESS!")
|
||||||
|
print(f"Generated {len(embeddings)} embedding vectors")
|
||||||
|
print(f"Embedding dimensions: {embeddings.shape[1]}")
|
||||||
|
print(f"First embedding (first 5 values): {embeddings[0][:5].tolist()}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Test similarity
|
||||||
|
from numpy import dot
|
||||||
|
from numpy.linalg import norm
|
||||||
|
|
||||||
|
def cosine_similarity(a, b):
|
||||||
|
return dot(a, b) / (norm(a) * norm(b))
|
||||||
|
|
||||||
|
sim_0_1 = cosine_similarity(embeddings[0], embeddings[1])
|
||||||
|
sim_1_2 = cosine_similarity(embeddings[1], embeddings[2])
|
||||||
|
|
||||||
|
print("Similarity scores:")
|
||||||
|
print(f" Sentence 1 ↔ Sentence 2: {sim_0_1:.4f}")
|
||||||
|
print(f" Sentence 2 ↔ Sentence 3: {sim_1_2:.4f}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("Local embeddings (Library) are working correctly! 🎉")
|
||||||
|
print("=" * 60)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
# 2. If Library missing, try connection to Local Service (Docker Mode)
|
||||||
|
print("ℹ️ sentence-transformers library not installed.")
|
||||||
|
print("Checking for local embedding service...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
embedding_url = os.getenv("EMBEDDING_API_URL", "http://localhost:8000/v1")
|
||||||
|
print(f"Connecting to: {embedding_url}")
|
||||||
|
|
||||||
|
client = OpenAI(base_url=embedding_url, api_key="local")
|
||||||
|
|
||||||
|
# Test embedding generation via API
|
||||||
|
test_texts = [
|
||||||
|
"This is a test sentence for embedding generation.",
|
||||||
|
"Financial markets are showing increased volatility.",
|
||||||
|
"The company reported strong quarterly earnings."
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"Testing embedding generation via API with {len(test_texts)} sentences:")
|
||||||
|
for i, text in enumerate(test_texts, 1):
|
||||||
|
print(f" {i}. '{text[:50]}...'")
|
||||||
|
print()
|
||||||
|
|
||||||
|
response = client.embeddings.create(model=embedding_model, input=test_texts)
|
||||||
|
embeddings = [data.embedding for data in response.data]
|
||||||
|
|
||||||
|
print("✅ SUCCESS!")
|
||||||
|
print(f"Generated {len(embeddings)} embedding vectors via API")
|
||||||
|
print(f"Embedding dimensions: {len(embeddings[0])}")
|
||||||
|
print(f"First embedding (first 5 values): {embeddings[0][:5]}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("Local embedding service (Docker) is working correctly! 🎉")
|
||||||
|
print("=" * 60)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as service_error:
|
||||||
|
print("❌ FAILED!")
|
||||||
|
print("Neither sentence-transformers library nor local embedding service found.")
|
||||||
|
print(f"Library Error: sentence-transformers not installed")
|
||||||
|
print(f"Service Error: {str(service_error)}")
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print("Installation Options:")
|
||||||
|
print("=" * 60)
|
||||||
|
print("OPTION 1: Run Service (Recommended - Docker)")
|
||||||
|
print(" docker run -d -p 8000:8000 ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id sentence-transformers/all-MiniLM-L6-v2")
|
||||||
|
print(" export EMBEDDING_API_URL=http://localhost:8000/v1")
|
||||||
|
print()
|
||||||
|
print("OPTION 2: Install Library (Runs locally, adds dependencies)")
|
||||||
|
print(" pip install sentence-transformers")
|
||||||
|
print("=" * 60)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("❌ FAILED!")
|
||||||
|
print(f"Error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
success = test_local_embeddings()
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
|
|
@ -0,0 +1,81 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Verify that Ollama embeddings are working correctly.
|
||||||
|
This script tests the embedding endpoint and model availability.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
def test_ollama_embeddings():
|
||||||
|
"""Test Ollama embeddings endpoint"""
|
||||||
|
|
||||||
|
# Get configuration from environment or use defaults
|
||||||
|
embedding_url = os.getenv("EMBEDDING_API_URL", "http://localhost:11434/v1")
|
||||||
|
embedding_model = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("Ollama Embeddings Verification")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"Embedding URL: {embedding_url}")
|
||||||
|
print(f"Embedding Model: {embedding_model}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Initialize OpenAI client pointing to Ollama
|
||||||
|
client = OpenAI(
|
||||||
|
base_url=embedding_url,
|
||||||
|
api_key="ollama" # Ollama doesn't require a real API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test embedding generation
|
||||||
|
test_text = "This is a test sentence for embedding generation."
|
||||||
|
print(f"Testing embedding generation with text:")
|
||||||
|
print(f" '{test_text}'")
|
||||||
|
print()
|
||||||
|
|
||||||
|
response = client.embeddings.create(
|
||||||
|
model=embedding_model,
|
||||||
|
input=test_text
|
||||||
|
)
|
||||||
|
|
||||||
|
embedding = response.data[0].embedding
|
||||||
|
|
||||||
|
print("✅ SUCCESS!")
|
||||||
|
print(f"Generated embedding vector with {len(embedding)} dimensions")
|
||||||
|
print(f"First 5 values: {embedding[:5]}")
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print("Ollama embeddings are working correctly! 🎉")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("❌ FAILED!")
|
||||||
|
print(f"Error: {str(e)}")
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print("Troubleshooting Steps:")
|
||||||
|
print("=" * 60)
|
||||||
|
print("1. Make sure Ollama is running:")
|
||||||
|
print(" $ ollama serve")
|
||||||
|
print()
|
||||||
|
print("2. Pull the embedding model:")
|
||||||
|
print(f" $ ollama pull {embedding_model}")
|
||||||
|
print()
|
||||||
|
print("3. Verify Ollama is accessible:")
|
||||||
|
print(f" $ curl {embedding_url.replace('/v1', '')}/api/tags")
|
||||||
|
print()
|
||||||
|
print("4. Check if the model is available:")
|
||||||
|
print(f" $ ollama list | grep {embedding_model}")
|
||||||
|
print()
|
||||||
|
print("For more help, see: docs/LOCAL_EMBEDDINGS.md")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
success = test_ollama_embeddings()
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Verify local embedding service using the native TEI /embed endpoint.
|
||||||
|
This uses pure HTTP requests without the OpenAI client.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
def test_native_endpoint():
|
||||||
|
url = "http://localhost:11434/embed"
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
|
print(f"Testing Native TEI Endpoint: {url}")
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
test_inputs = [
|
||||||
|
"This is a test using the native /embed endpoint.",
|
||||||
|
"It should be slightly faster than the OpenAI-compatible one."
|
||||||
|
]
|
||||||
|
|
||||||
|
payload = {"inputs": test_inputs}
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.post(url, headers=headers, json=payload)
|
||||||
|
response.raise_for_status()
|
||||||
|
duration = time.time() - start_time
|
||||||
|
|
||||||
|
embeddings = response.json()
|
||||||
|
|
||||||
|
print("✅ SUCCESS!")
|
||||||
|
print(f"Time taken: {duration:.4f}s")
|
||||||
|
print(f"Received {len(embeddings)} embeddings")
|
||||||
|
print(f"Dimensions: {len(embeddings[0])}")
|
||||||
|
print(f"First 5 values: {embeddings[0][:5]}")
|
||||||
|
print("-" * 50)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
print("❌ FAILED: Connection refused.")
|
||||||
|
print("Make sure the container is running: ./startEmbedding.sh")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ FAILED: {str(e)}")
|
||||||
|
if hasattr(e, 'response') and e.response:
|
||||||
|
print(f"Status: {e.response.status_code}")
|
||||||
|
print(f"Response: {e.response.text}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if test_native_endpoint():
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
sys.exit(1)
|
||||||
Loading…
Reference in New Issue