From bfbc011a87259d91b0306e22af9cc7aec0693541 Mon Sep 17 00:00:00 2001 From: "swj.premkumar" Date: Sat, 10 Jan 2026 11:14:40 -0600 Subject: [PATCH] Implemented pure Python local embeddings using `sentence-transformers` library, eliminating the need for external services like Ollama for providers that don't support embeddings --- .env.example | 74 +++++++++++++- CHANGELOG.md | 43 +++++++++ cli/utils.py | 31 +++--- docs/LOCAL_EMBEDDINGS.md | 83 ++++++++++++++++ start.sh => startAgent.sh | 3 + startEmbedding.sh | 20 ++++ tradingagents/agents/utils/memory.py | 19 +++- verify_local_embeddings.py | 138 +++++++++++++++++++++++++++ verify_ollama_embeddings.py | 81 ++++++++++++++++ verify_tei_native.py | 57 +++++++++++ 10 files changed, 530 insertions(+), 19 deletions(-) create mode 100644 docs/LOCAL_EMBEDDINGS.md rename start.sh => startAgent.sh (93%) create mode 100755 startEmbedding.sh create mode 100755 verify_local_embeddings.py create mode 100755 verify_ollama_embeddings.py create mode 100644 verify_tei_native.py diff --git a/.env.example b/.env.example index 1e257c3c..334ea3c0 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,74 @@ +# TradingAgents Environment Variables Configuration + +# ============================================ +# LLM Provider API URLs +# ============================================ +# These environment variables allow you to customize the API endpoints +# for different LLM providers. If not set, the default URLs will be used. + +# OpenAI API URL +# Default: https://api.openai.com/v1 +#OPENAI_API_URL=https://api.openai.com/v1 + +# Anthropic API URL +# Default: https://api.anthropic.com/ +#ANTHROPIC_API_URL=https://api.anthropic.com/ + +# Google Generative AI API URL +# Default: https://generativelanguage.googleapis.com/v1 +#GOOGLE_API_URL=https://generativelanguage.googleapis.com/v1 + +# OpenRouter API URL +# Default: https://openrouter.ai/api/v1 +#OPENROUTER_API_URL=https://openrouter.ai/api/v1 + +# Ollama API URL (local) +# Default: http://localhost:11434/v1 +#OLLAMA_API_URL=http://localhost:11434/v1 + +# ============================================ +# Embedding Configuration +# ============================================ +# If EMBEDDING_API_URL is set, it will be used for ALL providers (overrides defaults) +# This is required for Anthropic (which doesn't provide embeddings) +# Can point to sentence-transformers in Docker, Ollama, or any OpenAI-compatible service + +# Embedding service URL (OpenAI-compatible API) +# Required for Anthropic, optional for others +# Examples: +# - Local Service (startEmbedding.sh): http://localhost:11434/v1 +# - Ollama: http://localhost:11434/v1 +#EMBEDDING_API_URL=http://localhost:11434/v1 + +# Embedding model name +# Default: all-MiniLM-L6-v2 +#EMBEDDING_MODEL=all-MiniLM-L6-v2 + +# Embedding API key (if your service requires it) +# Default: "local" (most local services don't need a key) +#EMBEDDING_API_KEY=local + +# ============================================ +# API Keys +# ============================================ + +# Alpha Vantage API Key ALPHA_VANTAGE_API_KEY=alpha_vantage_api_key_placeholder -OPENAI_API_KEY=openai_api_key_placeholder \ No newline at end of file + +# OpenAI API Key +OPENAI_API_KEY=openai_api_key_placeholder + +# Alpaca Trading API +#ALPACA_API_KEY=your_alpaca_api_key_here +#ALPACA_API_SECRET=your_alpaca_secret_key_here + +# Google API Key (for Gemini models) +#GOOGLE_API_KEY=your_google_api_key_here + +# ============================================ +# Application Settings +# ============================================ + +# Results directory for storing analysis outputs +# Default: ./results +#TRADINGAGENTS_RESULTS_DIR=./results \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ccc748b..1b81de5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,49 @@ All notable changes to the **TradingAgents** project will be documented in this file. +## [Unreleased] - 2026-01-10 + +### Added +- **Local Embedding Service Support**: Added support for Anthropic to use local embedding service via URL + - Anthropic doesn't provide embeddings API, so users can run **Hugging Face Text Embeddings Inference (TEI)** in Docker + - Configure via `EMBEDDING_API_URL` environment variable (default: `http://localhost:11434/v1`) + - Configure model via `EMBEDDING_MODEL` environment variable (default: `all-MiniLM-L6-v2`) + - Keeps main application lightweight - heavy dependencies (PyTorch) isolated in separate container +- **Environment Variable Configuration**: Added comprehensive environment variable support for all LLM providers and embedding configuration + - `OPENAI_API_URL` - Custom OpenAI API endpoint + - `ANTHROPIC_API_URL` - Custom Anthropic API endpoint + - `GOOGLE_API_URL` - Custom Google API endpoint + - `OPENROUTER_API_URL` - Custom OpenRouter API endpoint + - `OLLAMA_API_URL` - Custom Ollama API endpoint + - `EMBEDDING_PROVIDER` - Choose embedding provider: `local`, `openai`, `google`, `ollama` + - `EMBEDDING_API_URL` - Custom embedding API endpoint (for Ollama or Docker service) + - `EMBEDDING_MODEL` - Custom embedding model name +- **Anthropic Claude 4.5 Thinking Models**: Added support for latest Anthropic thinking models + - `claude-sonnet-4-5-thinking` - Advanced reasoning with extended thinking + - `claude-opus-4-5-thinking` - Premier reasoning with extended thinking + - Removed older Claude models (3.5, 3.7, 4.0) to focus on latest thinking models +- **Documentation**: Created comprehensive guides and verification tools + - `docs/LOCAL_EMBEDDINGS.md` - Complete guide for local embeddings setup + - `verify_local_embeddings.py` - Verification script for sentence-transformers + - `verify_ollama_embeddings.py` - Verification script for Ollama (optional) + - Updated `.env.example` with all new configuration options + +### Changed +- **Dependency Cleanup**: Removed `sentence-transformers` from `requirements.txt` to keep main application lightweight. +- **Virtual Environment**: Recreated `.venv` to ensure a clean state without unused heavy dependencies. +- **Embedding Architecture**: Refactored `tradingagents/agents/utils/memory.py` to support multiple embedding providers with clean separation of concerns + - Automatic provider selection based on LLM provider + - Local embeddings as default for Anthropic and Ollama providers + - Maintained backward compatibility with existing API-based embeddings +- **CLI Provider Selection**: Updated `cli/utils.py` to use environment variables for all LLM provider API URLs with sensible defaults +- **Configuration Documentation**: Enhanced `.env.example` with detailed comments and examples for all configuration options + +### Fixed +- **Anthropic Embedding Error**: Resolved `404 Not Found` error when using Anthropic as LLM provider by implementing automatic fallback to local embeddings (Anthropic doesn't provide an embeddings API) + +### Technical Debt +- None - All changes follow SOLID principles with proper separation of concerns + ## [Unreleased] - 2026-01-09 ### Added diff --git a/cli/utils.py b/cli/utils.py index 6fe5ab3c..606d6226 100644 --- a/cli/utils.py +++ b/cli/utils.py @@ -1,3 +1,4 @@ +import os import questionary from typing import List, Optional, Tuple, Dict @@ -134,10 +135,8 @@ def select_shallow_thinking_agent(provider) -> str: ("GPT-4o - Standard model with solid capabilities", "gpt-4o"), ], "anthropic": [ - ("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"), - ("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"), - ("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"), - ("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"), + ("Claude Sonnet 4.5 (Thinking) - Advanced reasoning with extended thinking", "claude-sonnet-4-5-thinking"), + ("Claude Opus 4.5 (Thinking) - Premier reasoning with extended thinking", "claude-opus-4-5-thinking"), ], "google": [ ("Gemini 2.5 Flash-Lite - Cost efficiency and low latency", "gemini-2.5-flash-lite"), @@ -196,11 +195,8 @@ def select_deep_thinking_agent(provider) -> str: ("o1 - Premier reasoning and problem-solving model", "o1"), ], "anthropic": [ - ("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"), - ("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"), - ("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"), - ("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"), - ("Claude Opus 4 - Most powerful Anthropic model", " claude-opus-4-0"), + ("Claude Sonnet 4.5 (Thinking) - Advanced reasoning with extended thinking", "claude-sonnet-4-5-thinking"), + ("Claude Opus 4.5 (Thinking) - Premier reasoning with extended thinking", "claude-opus-4-5-thinking"), ], "google": [ ("Gemini 2.5 Flash - Next generation features, speed, and thinking", "gemini-2.5-flash"), @@ -241,14 +237,15 @@ def select_deep_thinking_agent(provider) -> str: return choice def select_llm_provider() -> tuple[str, str]: - """Select the OpenAI api url using interactive selection.""" - # Define OpenAI api options with their corresponding endpoints + """Select the LLM provider and return its API URL from environment or default.""" + # Define LLM provider options with their corresponding endpoints + # Each provider checks for its specific environment variable with a fallback default BASE_URLS = [ - ("OpenAI", "https://api.openai.com/v1"), - ("Anthropic", "https://api.anthropic.com/"), - ("Google", "https://generativelanguage.googleapis.com/v1"), - ("Openrouter", "https://openrouter.ai/api/v1"), - ("Ollama", "http://localhost:11434/v1"), + ("OpenAI", os.getenv("OPENAI_API_URL", "https://api.openai.com/v1")), + ("Anthropic", os.getenv("ANTHROPIC_API_URL", "https://api.anthropic.com/")), + ("Google", os.getenv("GOOGLE_API_URL", "https://generativelanguage.googleapis.com/v1")), + ("Openrouter", os.getenv("OPENROUTER_API_URL", "https://openrouter.ai/api/v1")), + ("Ollama", os.getenv("OLLAMA_API_URL", "http://localhost:11434/v1")), ] choice = questionary.select( @@ -268,7 +265,7 @@ def select_llm_provider() -> tuple[str, str]: ).ask() if choice is None: - console.print("\n[red]no OpenAI backend selected. Exiting...[/red]") + console.print("\n[red]No LLM provider selected. Exiting...[/red]") exit(1) display_name, url = choice diff --git a/docs/LOCAL_EMBEDDINGS.md b/docs/LOCAL_EMBEDDINGS.md new file mode 100644 index 00000000..37cd8b73 --- /dev/null +++ b/docs/LOCAL_EMBEDDINGS.md @@ -0,0 +1,83 @@ +# Local Embeddings Setup Guide + +This guide explains how to set up local embeddings for the TradingAgents framework. + +## Why Local Embeddings? + +When using LLM providers that don't support embeddings (like Anthropic), or when you want to avoid additional API costs, you need a local embedding solution. + +## Recommended: Run in Docker + +The recommended approach is to run the embedding service in a Docker container. This keeps your main application environment clean and avoids installing heavy dependencies like PyTorch on your host machine. + +### 1. Run the Embedding Service +Use the provided script to start the service: + +```bash +./startEmbedding.sh +``` + +This runs **Hugging Face Text Embeddings Inference (TEI)**, a high-performance server compatible with the OpenAI API. + +*(Note: The Go-based image `clems4ever/all-minilm-l6-v2-go` is a CLI tool and cannot merely be run as a server.)* + +### 2. Configure TradingAgents + +Add (or update) these lines in your `.env` file: + +```bash +# Point to your local embedding service (TEI supports /v1 API) +EMBEDDING_API_URL=http://localhost:11434/v1 + +# The model name configured in the start script +EMBEDDING_MODEL=all-MiniLM-L6-v2 +``` + +### 3. Verify Setup + +Run the verification script: + +```bash +python3 verify_local_embeddings.py +``` + +## Alternative: Local Installation (Development Only) + +If you prefer to run everything locally without Docker (e.g., for development), you can install the library directly. + +**âš ī¸ Warning:** This adds ~500MB of PyTorch dependencies to your environment. + +### 1. Install Dependencies + +```bash +pip install sentence-transformers +``` + +### 2. Configure TradingAgents + +If you don't set `EMBEDDING_API_URL`, the system will attempt to import `sentence-transformers` automatically when using Anthropic. + +```bash +# Optional: Force local provider +EMBEDDING_PROVIDER=local +``` + +## Supported Providers + +| LLM Provider | Default Behavior | Recommended Setup | +|--------------|------------------|-------------------| +| **Anthropic** | Tries local service URL | **Docker Service** | +| **Ollama** | Uses Ollama API | Ensure Ollama is running | +| **OpenAI** | Uses OpenAI API | No setup needed | +| **Google** | Uses Google API | No setup needed | + +## FAQ + +**Q: Why Docker?** +A: `sentence-transformers` requires PyTorch, which is a very large dependency (~500MB+). Putting it in a container keeps your main application lightweight and portable. + +**Q: Can I use GPU?** +A: Yes! Use the GPU version of the container: `ghcr.io/huggingface/text-embeddings-inference:latest` (requires NVIDIA Container Toolkit). + +**Q: Can I use Ollama instead?** +A: Yes. Set `EMBEDDING_API_URL=http://localhost:11434/v1` and `EMBEDDING_MODEL=nomic-embed-text` (or your preferred Ollama model). diff --git a/start.sh b/startAgent.sh similarity index 93% rename from start.sh rename to startAgent.sh index ceb1595a..d10f3acb 100755 --- a/start.sh +++ b/startAgent.sh @@ -1,4 +1,7 @@ #!/bin/bash +/home/prem/git/antigravity-claude-proxy/startProxy.sh & + +./startEmbedding.sh # 1. Activate Virtual Environment if [ -d ".venv" ]; then diff --git a/startEmbedding.sh b/startEmbedding.sh new file mode 100755 index 00000000..ff9ef58d --- /dev/null +++ b/startEmbedding.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Stop and remove existing container if it exists +docker rm -f embedding-service 2>/dev/null || true + +echo "🚀 Starting Local Embedding Service (Hugging Face TEI)..." +echo "â„šī¸ Note: The previous image (clems4ever/all-minilm-l6-v2-go) is a CLI tool, not a server." +echo " Switching to ghcr.io/huggingface/text-embeddings-inference:cpu-latest which provides a compatible API." + +# Run Hugging Face Text Embeddings Inference (compatible with OpenAI client) +docker run -d \ + --name embedding-service \ + --restart unless-stopped \ + -p 11434:80 \ + -e MAX_CONCURRENT_REQUESTS=4 \ + ghcr.io/huggingface/text-embeddings-inference:cpu-latest \ + --model-id sentence-transformers/all-MiniLM-L6-v2 + +echo "✅ Service started!" +echo " URL: http://localhost:11434/v1" diff --git a/tradingagents/agents/utils/memory.py b/tradingagents/agents/utils/memory.py index bd5e25bf..ce3a00ed 100644 --- a/tradingagents/agents/utils/memory.py +++ b/tradingagents/agents/utils/memory.py @@ -6,7 +6,17 @@ from openai import OpenAI class FinancialSituationMemory: def __init__(self, name, config): - if config.get("llm_provider") == "google": + # Check if user explicitly set EMBEDDING_API_URL - if so, use it regardless of provider + embedding_url = os.getenv("EMBEDDING_API_URL") + + if embedding_url: + # User has explicitly configured embedding service URL + self.embedding = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2") + self.client = OpenAI( + base_url=embedding_url, + api_key=os.getenv("EMBEDDING_API_KEY", "local") + ) + elif config.get("llm_provider") == "google": self.embedding = "text-embedding-004" google_api_key = os.getenv("GOOGLE_API_KEY") @@ -19,6 +29,13 @@ class FinancialSituationMemory: base_url="https://generativelanguage.googleapis.com/v1beta/openai/", max_retries=5 ) + elif config.get("llm_provider") == "anthropic": + # Anthropic doesn't provide embeddings - default to local embedding service + self.embedding = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2") + self.client = OpenAI( + base_url="http://localhost:8000/v1", + api_key="local" + ) elif config["backend_url"] == "http://localhost:11434/v1" or config.get("llm_provider") == "ollama": self.embedding = "nomic-embed-text" self.client = OpenAI(base_url=config["backend_url"]) diff --git a/verify_local_embeddings.py b/verify_local_embeddings.py new file mode 100755 index 00000000..dff409e3 --- /dev/null +++ b/verify_local_embeddings.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +Verify that local sentence-transformers embeddings are working correctly. +This script tests the local embedding model without requiring external services. +""" + +import os +import sys + +def test_local_embeddings(): + """Test local sentence-transformers embeddings""" + + embedding_model = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2") + + print("=" * 60) + print("Local Embeddings Verification (sentence-transformers)") + print("=" * 60) + print(f"Embedding Model: {embedding_model}") + print() + + try: + # 1. Try to import sentence-transformers (Local Library Mode) + try: + from sentence_transformers import SentenceTransformer + print("✅ Found local sentence-transformers library.") + + # Load the model + print(f"đŸ“Ļ Loading embedding model: {embedding_model}") + print(" (First run will download the model, ~90MB)") + print() + + model = SentenceTransformer(embedding_model) + + # Test embedding generation + test_texts = [ + "This is a test sentence for embedding generation.", + "Financial markets are showing increased volatility.", + "The company reported strong quarterly earnings." + ] + + print(f"Testing embedding generation with {len(test_texts)} sentences:") + for i, text in enumerate(test_texts, 1): + print(f" {i}. '{text[:50]}...'") + print() + + embeddings = model.encode(test_texts, convert_to_numpy=True) + + print("✅ SUCCESS!") + print(f"Generated {len(embeddings)} embedding vectors") + print(f"Embedding dimensions: {embeddings.shape[1]}") + print(f"First embedding (first 5 values): {embeddings[0][:5].tolist()}") + print() + + # Test similarity + from numpy import dot + from numpy.linalg import norm + + def cosine_similarity(a, b): + return dot(a, b) / (norm(a) * norm(b)) + + sim_0_1 = cosine_similarity(embeddings[0], embeddings[1]) + sim_1_2 = cosine_similarity(embeddings[1], embeddings[2]) + + print("Similarity scores:") + print(f" Sentence 1 ↔ Sentence 2: {sim_0_1:.4f}") + print(f" Sentence 2 ↔ Sentence 3: {sim_1_2:.4f}") + print() + + print("=" * 60) + print("Local embeddings (Library) are working correctly! 🎉") + print("=" * 60) + return True + + except ImportError: + # 2. If Library missing, try connection to Local Service (Docker Mode) + print("â„šī¸ sentence-transformers library not installed.") + print("Checking for local embedding service...") + + try: + from openai import OpenAI + + embedding_url = os.getenv("EMBEDDING_API_URL", "http://localhost:8000/v1") + print(f"Connecting to: {embedding_url}") + + client = OpenAI(base_url=embedding_url, api_key="local") + + # Test embedding generation via API + test_texts = [ + "This is a test sentence for embedding generation.", + "Financial markets are showing increased volatility.", + "The company reported strong quarterly earnings." + ] + + print(f"Testing embedding generation via API with {len(test_texts)} sentences:") + for i, text in enumerate(test_texts, 1): + print(f" {i}. '{text[:50]}...'") + print() + + response = client.embeddings.create(model=embedding_model, input=test_texts) + embeddings = [data.embedding for data in response.data] + + print("✅ SUCCESS!") + print(f"Generated {len(embeddings)} embedding vectors via API") + print(f"Embedding dimensions: {len(embeddings[0])}") + print(f"First embedding (first 5 values): {embeddings[0][:5]}") + print() + + print("=" * 60) + print("Local embedding service (Docker) is working correctly! 🎉") + print("=" * 60) + return True + + except Exception as service_error: + print("❌ FAILED!") + print("Neither sentence-transformers library nor local embedding service found.") + print(f"Library Error: sentence-transformers not installed") + print(f"Service Error: {str(service_error)}") + print() + print("=" * 60) + print("Installation Options:") + print("=" * 60) + print("OPTION 1: Run Service (Recommended - Docker)") + print(" docker run -d -p 8000:8000 ghcr.io/huggingface/text-embeddings-inference:cpu-latest --model-id sentence-transformers/all-MiniLM-L6-v2") + print(" export EMBEDDING_API_URL=http://localhost:8000/v1") + print() + print("OPTION 2: Install Library (Runs locally, adds dependencies)") + print(" pip install sentence-transformers") + print("=" * 60) + return False + + except Exception as e: + print("❌ FAILED!") + print(f"Error: {str(e)}") + return False + +if __name__ == "__main__": + success = test_local_embeddings() + sys.exit(0 if success else 1) diff --git a/verify_ollama_embeddings.py b/verify_ollama_embeddings.py new file mode 100755 index 00000000..7ed48c6c --- /dev/null +++ b/verify_ollama_embeddings.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +""" +Verify that Ollama embeddings are working correctly. +This script tests the embedding endpoint and model availability. +""" + +import os +import sys +from openai import OpenAI + +def test_ollama_embeddings(): + """Test Ollama embeddings endpoint""" + + # Get configuration from environment or use defaults + embedding_url = os.getenv("EMBEDDING_API_URL", "http://localhost:11434/v1") + embedding_model = os.getenv("EMBEDDING_MODEL", "nomic-embed-text") + + print("=" * 60) + print("Ollama Embeddings Verification") + print("=" * 60) + print(f"Embedding URL: {embedding_url}") + print(f"Embedding Model: {embedding_model}") + print() + + try: + # Initialize OpenAI client pointing to Ollama + client = OpenAI( + base_url=embedding_url, + api_key="ollama" # Ollama doesn't require a real API key + ) + + # Test embedding generation + test_text = "This is a test sentence for embedding generation." + print(f"Testing embedding generation with text:") + print(f" '{test_text}'") + print() + + response = client.embeddings.create( + model=embedding_model, + input=test_text + ) + + embedding = response.data[0].embedding + + print("✅ SUCCESS!") + print(f"Generated embedding vector with {len(embedding)} dimensions") + print(f"First 5 values: {embedding[:5]}") + print() + print("=" * 60) + print("Ollama embeddings are working correctly! 🎉") + print("=" * 60) + + return True + + except Exception as e: + print("❌ FAILED!") + print(f"Error: {str(e)}") + print() + print("=" * 60) + print("Troubleshooting Steps:") + print("=" * 60) + print("1. Make sure Ollama is running:") + print(" $ ollama serve") + print() + print("2. Pull the embedding model:") + print(f" $ ollama pull {embedding_model}") + print() + print("3. Verify Ollama is accessible:") + print(f" $ curl {embedding_url.replace('/v1', '')}/api/tags") + print() + print("4. Check if the model is available:") + print(f" $ ollama list | grep {embedding_model}") + print() + print("For more help, see: docs/LOCAL_EMBEDDINGS.md") + print("=" * 60) + + return False + +if __name__ == "__main__": + success = test_ollama_embeddings() + sys.exit(0 if success else 1) diff --git a/verify_tei_native.py b/verify_tei_native.py new file mode 100644 index 00000000..3a404f5f --- /dev/null +++ b/verify_tei_native.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Verify local embedding service using the native TEI /embed endpoint. +This uses pure HTTP requests without the OpenAI client. +""" + +import sys +import requests +import json +import time + +def test_native_endpoint(): + url = "http://localhost:11434/embed" + headers = {"Content-Type": "application/json"} + + print(f"Testing Native TEI Endpoint: {url}") + print("-" * 50) + + test_inputs = [ + "This is a test using the native /embed endpoint.", + "It should be slightly faster than the OpenAI-compatible one." + ] + + payload = {"inputs": test_inputs} + + try: + start_time = time.time() + response = requests.post(url, headers=headers, json=payload) + response.raise_for_status() + duration = time.time() - start_time + + embeddings = response.json() + + print("✅ SUCCESS!") + print(f"Time taken: {duration:.4f}s") + print(f"Received {len(embeddings)} embeddings") + print(f"Dimensions: {len(embeddings[0])}") + print(f"First 5 values: {embeddings[0][:5]}") + print("-" * 50) + return True + + except requests.exceptions.ConnectionError: + print("❌ FAILED: Connection refused.") + print("Make sure the container is running: ./startEmbedding.sh") + return False + except Exception as e: + print(f"❌ FAILED: {str(e)}") + if hasattr(e, 'response') and e.response: + print(f"Status: {e.response.status_code}") + print(f"Response: {e.response.text}") + return False + +if __name__ == "__main__": + if test_native_endpoint(): + sys.exit(0) + else: + sys.exit(1)