slurm init
This commit is contained in:
parent
ff1296a860
commit
43ae0453aa
|
|
@ -0,0 +1,39 @@
|
|||
# TradingAgents SLURM Configuration
|
||||
# Copy this file to .env and customize for your cluster environment
|
||||
|
||||
# LLM Configuration
|
||||
LLM_PROVIDER=ollama
|
||||
LLM_BACKEND_URL=http://localhost:11434/v1
|
||||
DEEP_THINK_LLM=llama3.2
|
||||
QUICK_THINK_LLM=llama3.2
|
||||
|
||||
# Alternative: OpenAI/OpenRouter Configuration
|
||||
# LLM_PROVIDER=openai
|
||||
# LLM_BACKEND_URL=https://api.openai.com/v1
|
||||
# DEEP_THINK_LLM=gpt-4
|
||||
# QUICK_THINK_LLM=gpt-3.5-turbo
|
||||
# OPENAI_API_KEY=your_openai_api_key_here
|
||||
|
||||
# Alternative: Anthropic Configuration
|
||||
# LLM_PROVIDER=anthropic
|
||||
# DEEP_THINK_LLM=claude-3-sonnet-20240229
|
||||
# QUICK_THINK_LLM=claude-3-haiku-20240307
|
||||
# ANTHROPIC_API_KEY=your_anthropic_api_key_here
|
||||
|
||||
# TradingAgents Configuration
|
||||
TRADINGAGENTS_RESULTS_DIR=./results
|
||||
MAX_DEBATE_ROUNDS=2
|
||||
MAX_RISK_DISCUSS_ROUNDS=2
|
||||
ONLINE_TOOLS=true
|
||||
|
||||
# Data Sources API Keys (optional but recommended)
|
||||
FINNHUB_API_KEY=your_finnhub_api_key
|
||||
REDDIT_CLIENT_ID=your_reddit_client_id
|
||||
REDDIT_CLIENT_SECRET=your_reddit_client_secret
|
||||
REDDIT_USER_AGENT=TradingAgents/1.0
|
||||
|
||||
# SLURM Cluster Specific
|
||||
SLURM_PARTITION=cpu
|
||||
SLURM_GPU_PARTITION=gpu
|
||||
SLURM_MAX_TIME=08:00:00
|
||||
SLURM_DEFAULT_MEMORY=16G
|
||||
|
|
@ -8,4 +8,5 @@ eval_data/
|
|||
*.egg-info/
|
||||
.env
|
||||
*.log
|
||||
results
|
||||
results
|
||||
.idea
|
||||
|
|
@ -0,0 +1,341 @@
|
|||
# TradingAgents SLURM Cluster Guide
|
||||
|
||||
This guide explains how to run the TradingAgents framework on a SLURM cluster environment.
|
||||
|
||||
## Overview
|
||||
|
||||
The TradingAgents framework has been configured to run efficiently on SLURM clusters with the following features:
|
||||
|
||||
- **Multi-job support**: Single analysis, batch processing, and GPU-accelerated runs
|
||||
- **Resource management**: Optimized CPU, memory, and GPU allocation
|
||||
- **Environment isolation**: Python virtual environments and dependency management
|
||||
- **Result collection**: Structured output and error handling
|
||||
- **LLM flexibility**: Support for various LLM providers (OpenAI, Anthropic, Ollama, etc.)
|
||||
|
||||
## Files Created
|
||||
|
||||
| File | Purpose |
|
||||
| -------------------------- | --------------------------------------------- |
|
||||
| `slurm_setup.sh` | Environment setup and dependency installation |
|
||||
| `slurm_single_analysis.sh` | Single stock analysis job |
|
||||
| `slurm_batch_analysis.sh` | Batch analysis for multiple stocks |
|
||||
| `slurm_gpu_analysis.sh` | GPU-accelerated analysis with local models |
|
||||
| `slurm_manager.sh` | Job management and utility script |
|
||||
| `.env.slurm.template` | Environment configuration template |
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Initial Setup
|
||||
|
||||
```bash
|
||||
# Make the manager script executable
|
||||
chmod +x slurm_manager.sh
|
||||
|
||||
# Setup environment and create directories
|
||||
./slurm_manager.sh setup
|
||||
|
||||
# Submit setup job to install dependencies
|
||||
./slurm_manager.sh submit-setup
|
||||
```
|
||||
|
||||
### 2. Configure Environment
|
||||
|
||||
Edit the `.env` file (created from template) to configure your LLM provider:
|
||||
|
||||
```bash
|
||||
# For Ollama (local models)
|
||||
LLM_PROVIDER=ollama
|
||||
LLM_BACKEND_URL=http://localhost:11434/v1
|
||||
DEEP_THINK_LLM=llama3.2
|
||||
QUICK_THINK_LLM=llama3.2
|
||||
|
||||
# For OpenAI
|
||||
LLM_PROVIDER=openai
|
||||
OPENAI_API_KEY=your_api_key_here
|
||||
DEEP_THINK_LLM=gpt-4
|
||||
QUICK_THINK_LLM=gpt-3.5-turbo
|
||||
|
||||
# For Anthropic
|
||||
LLM_PROVIDER=anthropic
|
||||
ANTHROPIC_API_KEY=your_api_key_here
|
||||
DEEP_THINK_LLM=claude-3-sonnet-20240229
|
||||
QUICK_THINK_LLM=claude-3-haiku-20240307
|
||||
```
|
||||
|
||||
### 3. Submit Jobs
|
||||
|
||||
```bash
|
||||
# Single stock analysis
|
||||
./slurm_manager.sh submit-single AAPL
|
||||
|
||||
# Batch analysis (multiple stocks)
|
||||
./slurm_manager.sh submit-batch
|
||||
|
||||
# GPU-accelerated analysis
|
||||
./slurm_manager.sh submit-gpu TSLA
|
||||
```
|
||||
|
||||
### 4. Monitor Jobs
|
||||
|
||||
```bash
|
||||
# Check all recent jobs
|
||||
./slurm_manager.sh status
|
||||
|
||||
# Check specific job
|
||||
./slurm_manager.sh status 12345
|
||||
|
||||
# View job output
|
||||
./slurm_manager.sh output 12345
|
||||
|
||||
# View job errors
|
||||
./slurm_manager.sh output 12345 err
|
||||
```
|
||||
|
||||
### 5. Collect Results
|
||||
|
||||
```bash
|
||||
# View results for all symbols
|
||||
./slurm_manager.sh results
|
||||
|
||||
# View results for specific symbol
|
||||
./slurm_manager.sh results AAPL
|
||||
|
||||
# View results for specific date
|
||||
./slurm_manager.sh results AAPL 2024-01-15
|
||||
```
|
||||
|
||||
## Job Types
|
||||
|
||||
### 1. Single Analysis (`slurm_single_analysis.sh`)
|
||||
|
||||
- **Purpose**: Analyze a single stock symbol
|
||||
- **Resources**: 8 CPUs, 16GB RAM, 4 hours
|
||||
- **Usage**: Best for focused analysis or testing
|
||||
|
||||
```bash
|
||||
sbatch slurm_single_analysis.sh SYMBOL DATE
|
||||
# or
|
||||
./slurm_manager.sh submit-single SYMBOL DATE
|
||||
```
|
||||
|
||||
### 2. Batch Analysis (`slurm_batch_analysis.sh`)
|
||||
|
||||
- **Purpose**: Analyze multiple stocks in parallel
|
||||
- **Resources**: Array job with up to 5 concurrent tasks
|
||||
- **Default symbols**: SPY, QQQ, AAPL, MSFT, GOOGL, AMZN, TSLA, NVDA, META, NFLX
|
||||
- **Usage**: Efficient for portfolio-wide analysis
|
||||
|
||||
```bash
|
||||
sbatch slurm_batch_analysis.sh
|
||||
# or
|
||||
./slurm_manager.sh submit-batch
|
||||
```
|
||||
|
||||
### 3. GPU Analysis (`slurm_gpu_analysis.sh`)
|
||||
|
||||
- **Purpose**: GPU-accelerated analysis with local models
|
||||
- **Resources**: 1 GPU, 8 CPUs, 32GB RAM, 8 hours
|
||||
- **Usage**: Best for Ollama or other local LLM providers
|
||||
|
||||
```bash
|
||||
sbatch slurm_gpu_analysis.sh SYMBOL DATE
|
||||
# or
|
||||
./slurm_manager.sh submit-gpu SYMBOL DATE
|
||||
```
|
||||
|
||||
## Resource Requirements
|
||||
|
||||
### Minimum Requirements
|
||||
|
||||
- **CPU Jobs**: 4-8 cores, 8-16GB RAM
|
||||
- **GPU Jobs**: 1 GPU, 8 cores, 32GB RAM
|
||||
- **Storage**: ~1GB for dependencies, variable for results/cache
|
||||
|
||||
### Recommended Partitions
|
||||
|
||||
- **CPU Partition**: For most analysis jobs
|
||||
- **GPU Partition**: For local LLM acceleration
|
||||
- **High-Memory Partition**: For large-scale batch processing
|
||||
|
||||
## LLM Provider Configuration
|
||||
|
||||
### Ollama (Recommended for Clusters)
|
||||
|
||||
- Runs locally on compute nodes
|
||||
- No external API dependencies
|
||||
- GPU acceleration support
|
||||
- Models: llama3.2, mistral, etc.
|
||||
|
||||
### OpenAI/OpenRouter
|
||||
|
||||
- Requires API key and internet access
|
||||
- Fast inference
|
||||
- Usage costs apply
|
||||
- Models: gpt-4, gpt-3.5-turbo, etc.
|
||||
|
||||
### Anthropic
|
||||
|
||||
- Requires API key and internet access
|
||||
- High-quality reasoning
|
||||
- Usage costs apply
|
||||
- Models: claude-3-sonnet, claude-3-haiku
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
TradingAgents/
|
||||
├── slurm_*.sh # SLURM job scripts
|
||||
├── slurm_manager.sh # Job management utility
|
||||
├── .env # Environment configuration
|
||||
├── logs/ # Job output and error logs
|
||||
├── results/ # Analysis results by symbol/date
|
||||
├── venv/ # Python virtual environment
|
||||
└── data_cache/ # Cached market data
|
||||
```
|
||||
|
||||
## Error Handling and Exit Behavior
|
||||
|
||||
### **Automatic Script Exit**
|
||||
|
||||
✅ **Yes, scripts will exit automatically on failures** with the following behavior:
|
||||
|
||||
#### **1. Bash Script Level**
|
||||
|
||||
- **`set -euo pipefail`**: Scripts exit immediately on any command failure
|
||||
- **`-e`**: Exit on any non-zero exit status
|
||||
- **`-u`**: Exit on undefined variables
|
||||
- **`-o pipefail`**: Exit if any command in a pipeline fails
|
||||
|
||||
#### **2. Python Script Level**
|
||||
|
||||
- **Exception handling**: All Python errors are caught and logged
|
||||
- **Explicit exit**: `sys.exit(1)` on any analysis failure
|
||||
- **Error logging**: Failures are saved to JSON files for debugging
|
||||
|
||||
#### **3. SLURM Level**
|
||||
|
||||
- **Job cancellation**: Failed jobs are marked as FAILED in SLURM
|
||||
- **Resource cleanup**: Allocated resources are automatically released
|
||||
- **Log preservation**: Output and error logs are saved for investigation
|
||||
|
||||
### **What Happens on Failure**
|
||||
|
||||
1. **Immediate termination** of the failing script
|
||||
2. **Error information saved** to `results/[SYMBOL]/[DATE]/error_[JOB_ID].json`
|
||||
3. **SLURM job status** set to FAILED
|
||||
4. **Exit code 1** returned to SLURM scheduler
|
||||
5. **Resources released** back to the cluster
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Job Fails to Start**
|
||||
|
||||
- Check SLURM partition availability: `sinfo`
|
||||
- Verify resource requirements match cluster limits
|
||||
- Ensure environment setup job completed successfully
|
||||
|
||||
2. **Python Dependencies Missing**
|
||||
|
||||
- Run setup job: `./slurm_manager.sh submit-setup`
|
||||
- Check setup job output: `./slurm_manager.sh output SETUP_JOB_ID`
|
||||
|
||||
3. **LLM Connection Issues**
|
||||
|
||||
- Verify API keys in `.env` file
|
||||
- Check network connectivity for external providers
|
||||
- For Ollama, ensure GPU resources are available
|
||||
|
||||
4. **Out of Memory Errors**
|
||||
|
||||
- Increase memory allocation in job scripts
|
||||
- Reduce `max_debate_rounds` in configuration
|
||||
- Use GPU partition for memory-intensive models
|
||||
|
||||
5. **Script Exit Issues**
|
||||
- Check exit codes: `sacct -j JOB_ID --format=JobID,State,ExitCode`
|
||||
- Review error logs: `./slurm_manager.sh output JOB_ID err`
|
||||
- Verify all prerequisites are met before job submission
|
||||
|
||||
### Debugging
|
||||
|
||||
```bash
|
||||
# Check job status and exit codes
|
||||
squeue -u $USER
|
||||
sacct -j JOB_ID --format=JobID,State,ExitCode,Reason
|
||||
|
||||
# View detailed job information
|
||||
scontrol show job JOB_ID
|
||||
|
||||
# Check node resources
|
||||
sinfo -N -l
|
||||
|
||||
# View job output in real-time
|
||||
tail -f logs/trading_JOB_ID.out
|
||||
|
||||
# Check for error files
|
||||
find results -name "error_*.json" -exec echo "Found error in: {}" \; -exec cat {} \;
|
||||
```
|
||||
|
||||
## Customization
|
||||
|
||||
### Modify Stock Lists
|
||||
|
||||
Edit the `SYMBOLS` array in `slurm_batch_analysis.sh`:
|
||||
|
||||
```bash
|
||||
SYMBOLS=("AAPL" "MSFT" "GOOGL" "AMZN" "TSLA")
|
||||
```
|
||||
|
||||
### Adjust Resources
|
||||
|
||||
Modify SLURM directives in job scripts:
|
||||
|
||||
```bash
|
||||
#SBATCH --cpus-per-task=16 # More CPUs
|
||||
#SBATCH --mem=64G # More memory
|
||||
#SBATCH --time=12:00:00 # Longer runtime
|
||||
```
|
||||
|
||||
### Configure Analysis Parameters
|
||||
|
||||
Edit the config in Python scripts:
|
||||
|
||||
```python
|
||||
config["max_debate_rounds"] = 3 # More thorough analysis
|
||||
config["max_risk_discuss_rounds"] = 3 # More risk assessment
|
||||
config["online_tools"] = True # Enable web scraping
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Start Small**: Test with single analysis before batch jobs
|
||||
2. **Monitor Resources**: Check CPU/memory usage during jobs
|
||||
3. **Batch Wisely**: Use array jobs for multiple symbols
|
||||
4. **Cache Data**: Leverage data caching to reduce API calls
|
||||
5. **Log Everything**: Review job logs for optimization opportunities
|
||||
6. **Backup Results**: Copy important results to permanent storage
|
||||
|
||||
## Performance Tips
|
||||
|
||||
1. **Use Local Models**: Ollama reduces API latency and costs
|
||||
2. **Parallel Processing**: Leverage array jobs for batch analysis
|
||||
3. **Resource Matching**: Match job resources to actual needs
|
||||
4. **Data Locality**: Store frequently accessed data on fast storage
|
||||
5. **Network Optimization**: Use cluster-internal services when possible
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **API Keys**: Store sensitive keys in `.env` file, not in scripts
|
||||
2. **File Permissions**: Ensure job scripts and data have appropriate permissions
|
||||
3. **Network Access**: Some clusters restrict external API access
|
||||
4. **Data Privacy**: Be aware of data residency requirements for financial data
|
||||
|
||||
## Support
|
||||
|
||||
For issues specific to:
|
||||
|
||||
- **SLURM**: Consult your cluster documentation or administrator
|
||||
- **TradingAgents**: Check the main repository issues and documentation
|
||||
- **LLM Providers**: Refer to respective provider documentation
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
#!/bin/bash
|
||||
#SBATCH --job-name=trading-agents-batch
|
||||
#SBATCH --output=logs/batch_%A_%a.out
|
||||
#SBATCH --error=logs/batch_%A_%a.err
|
||||
#SBATCH --time=06:00:00
|
||||
#SBATCH --cpus-per-task=8
|
||||
#SBATCH --mem=16G
|
||||
#SBATCH --partition=cpu
|
||||
#SBATCH --array=1-10%5
|
||||
|
||||
# Exit on any error, undefined variable, or pipe failure
|
||||
set -euo pipefail
|
||||
|
||||
# Batch analysis for multiple stocks
|
||||
# This script runs trading analysis for multiple symbols in parallel
|
||||
# The %5 limits to 5 concurrent jobs
|
||||
|
||||
echo "Starting TradingAgents batch analysis..."
|
||||
echo "Job ID: ${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}"
|
||||
echo "Node: $SLURM_NODELIST"
|
||||
echo "Started at: $(date)"
|
||||
|
||||
# Load necessary modules
|
||||
module load python/3.10
|
||||
|
||||
# Set up environment
|
||||
WORK_DIR=${SLURM_SUBMIT_DIR}
|
||||
cd $WORK_DIR
|
||||
|
||||
# Activate virtual environment
|
||||
source venv/bin/activate
|
||||
|
||||
# Set environment variables
|
||||
export PYTHONPATH="${WORK_DIR}:${PYTHONPATH}"
|
||||
export TRADINGAGENTS_RESULTS_DIR="${WORK_DIR}/results"
|
||||
|
||||
# Define array of stocks to analyze
|
||||
SYMBOLS=("SPY" "QQQ" "AAPL" "MSFT" "GOOGL" "AMZN" "TSLA" "NVDA" "META" "NFLX")
|
||||
|
||||
# Get the symbol for this array task
|
||||
SYMBOL=${SYMBOLS[$((SLURM_ARRAY_TASK_ID-1))]}
|
||||
DATE=$(date +%Y-%m-%d)
|
||||
|
||||
echo "Processing symbol: $SYMBOL (Task ${SLURM_ARRAY_TASK_ID})"
|
||||
|
||||
# Create results directory for this symbol
|
||||
RESULTS_DIR="${WORK_DIR}/results/${SYMBOL}/${DATE}"
|
||||
mkdir -p "$RESULTS_DIR"
|
||||
|
||||
# Create a custom Python script for this analysis
|
||||
cat > "batch_analysis_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.py" << EOF
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from datetime import datetime
|
||||
from tradingagents.graph.trading_graph import TradingAgentsGraph
|
||||
from tradingagents.default_config import DEFAULT_CONFIG
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
def main():
|
||||
symbol = "$SYMBOL"
|
||||
date = "$DATE"
|
||||
task_id = "$SLURM_ARRAY_TASK_ID"
|
||||
|
||||
print(f"Batch analysis - Task {task_id}: {symbol} on {date}")
|
||||
|
||||
# Create a custom config for SLURM environment
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
|
||||
# Adjust for cluster environment
|
||||
config["results_dir"] = os.getenv("TRADINGAGENTS_RESULTS_DIR", "./results")
|
||||
config["max_debate_rounds"] = 2
|
||||
config["max_risk_discuss_rounds"] = 2
|
||||
config["online_tools"] = True
|
||||
|
||||
# Use environment variables for LLM configuration
|
||||
config["llm_provider"] = os.getenv("LLM_PROVIDER", "ollama")
|
||||
config["backend_url"] = os.getenv("LLM_BACKEND_URL", "http://localhost:11434/v1")
|
||||
config["deep_think_llm"] = os.getenv("DEEP_THINK_LLM", "llama3.2")
|
||||
config["quick_think_llm"] = os.getenv("QUICK_THINK_LLM", "llama3.2")
|
||||
|
||||
try:
|
||||
# Initialize trading agents
|
||||
ta = TradingAgentsGraph(debug=True, config=config)
|
||||
|
||||
# Run analysis
|
||||
print(f"Running trading analysis for {symbol}...")
|
||||
state, decision = ta.propagate(symbol, date)
|
||||
|
||||
# Save results
|
||||
results = {
|
||||
"symbol": symbol,
|
||||
"date": date,
|
||||
"decision": decision,
|
||||
"array_job_id": os.getenv("SLURM_ARRAY_JOB_ID"),
|
||||
"task_id": task_id,
|
||||
"node": os.getenv("SLURM_NODELIST"),
|
||||
"completed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
output_file = f"$RESULTS_DIR/batch_results_task_{task_id}.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print(f"Analysis completed for {symbol}. Results saved to: {output_file}")
|
||||
print(f"Decision: {decision}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during analysis of {symbol}: {str(e)}")
|
||||
# Save error information
|
||||
error_info = {
|
||||
"symbol": symbol,
|
||||
"date": date,
|
||||
"error": str(e),
|
||||
"array_job_id": os.getenv("SLURM_ARRAY_JOB_ID"),
|
||||
"task_id": task_id,
|
||||
"failed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
error_file = f"$RESULTS_DIR/error_task_{task_id}.json"
|
||||
with open(error_file, 'w') as f:
|
||||
json.dump(error_info, f, indent=2)
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
EOF
|
||||
|
||||
# Run the analysis
|
||||
echo "Running Python analysis script for $SYMBOL..."
|
||||
python "batch_analysis_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.py"
|
||||
|
||||
# Clean up temporary script
|
||||
rm "batch_analysis_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.py"
|
||||
|
||||
echo "Task ${SLURM_ARRAY_TASK_ID} for $SYMBOL completed at: $(date)"
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
#!/bin/bash
|
||||
#SBATCH --job-name=trading-agents-gpu
|
||||
#SBATCH --output=logs/gpu_trading_%j.out
|
||||
#SBATCH --error=logs/gpu_trading_%j.err
|
||||
#SBATCH --time=08:00:00
|
||||
#SBATCH --gpus=1
|
||||
#SBATCH --cpus-per-task=8
|
||||
#SBATCH --mem=32G
|
||||
#SBATCH --partition=gpu
|
||||
|
||||
# Exit on any error, undefined variable, or pipe failure
|
||||
set -euo pipefail
|
||||
|
||||
# GPU-accelerated analysis using local LLM models
|
||||
# This script is useful when running with Ollama or other local models that can benefit from GPU acceleration
|
||||
|
||||
echo "Starting TradingAgents GPU analysis..."
|
||||
echo "Job ID: $SLURM_JOB_ID"
|
||||
echo "Node: $SLURM_NODELIST"
|
||||
echo "GPU: $CUDA_VISIBLE_DEVICES"
|
||||
echo "Started at: $(date)"
|
||||
|
||||
# Parse command line arguments
|
||||
SYMBOL=${1:-"SPY"}
|
||||
DATE=${2:-$(date +%Y-%m-%d)}
|
||||
|
||||
echo "Analyzing symbol: $SYMBOL for date: $DATE"
|
||||
|
||||
# Load necessary modules
|
||||
module load python/3.10
|
||||
module load cuda/11.8
|
||||
|
||||
# Set up environment
|
||||
WORK_DIR=${SLURM_SUBMIT_DIR}
|
||||
cd $WORK_DIR
|
||||
|
||||
# Activate virtual environment
|
||||
source venv/bin/activate
|
||||
|
||||
# Set environment variables
|
||||
export PYTHONPATH="${WORK_DIR}:${PYTHONPATH}"
|
||||
export TRADINGAGENTS_RESULTS_DIR="${WORK_DIR}/results"
|
||||
export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
|
||||
|
||||
# Set up Ollama if using local models
|
||||
if [ "$LLM_PROVIDER" == "ollama" ]; then
|
||||
# Start Ollama server on this node
|
||||
export OLLAMA_HOST=0.0.0.0:11434
|
||||
export OLLAMA_GPU_LAYERS=999 # Use all GPU layers
|
||||
|
||||
# Start Ollama in background
|
||||
ollama serve &
|
||||
OLLAMA_PID=$!
|
||||
|
||||
# Wait for Ollama to start
|
||||
sleep 10
|
||||
|
||||
# Pull required models if they don't exist
|
||||
ollama pull llama3.2 || echo "Model llama3.2 already exists or failed to pull"
|
||||
fi
|
||||
|
||||
# Create results directory for this job
|
||||
RESULTS_DIR="${WORK_DIR}/results/${SYMBOL}/${DATE}"
|
||||
mkdir -p "$RESULTS_DIR"
|
||||
|
||||
# Create a custom Python script for GPU analysis
|
||||
cat > "gpu_analysis_${SLURM_JOB_ID}.py" << EOF
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import torch
|
||||
from datetime import datetime
|
||||
from tradingagents.graph.trading_graph import TradingAgentsGraph
|
||||
from tradingagents.default_config import DEFAULT_CONFIG
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
def main():
|
||||
symbol = "$SYMBOL"
|
||||
date = "$DATE"
|
||||
|
||||
print(f"Starting GPU-accelerated analysis for {symbol} on {date}")
|
||||
print(f"CUDA available: {torch.cuda.is_available()}")
|
||||
if torch.cuda.is_available():
|
||||
print(f"GPU device: {torch.cuda.get_device_name()}")
|
||||
print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
|
||||
|
||||
# Create a custom config for GPU SLURM environment
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
|
||||
# Adjust for GPU cluster environment
|
||||
config["results_dir"] = os.getenv("TRADINGAGENTS_RESULTS_DIR", "./results")
|
||||
config["max_debate_rounds"] = 3 # More rounds for thorough analysis
|
||||
config["max_risk_discuss_rounds"] = 3
|
||||
config["online_tools"] = True
|
||||
|
||||
# Configure for GPU-accelerated LLM
|
||||
config["llm_provider"] = os.getenv("LLM_PROVIDER", "ollama")
|
||||
config["backend_url"] = os.getenv("LLM_BACKEND_URL", "http://localhost:11434/v1")
|
||||
config["deep_think_llm"] = os.getenv("DEEP_THINK_LLM", "llama3.2")
|
||||
config["quick_think_llm"] = os.getenv("QUICK_THINK_LLM", "llama3.2")
|
||||
|
||||
try:
|
||||
# Initialize trading agents
|
||||
ta = TradingAgentsGraph(debug=True, config=config)
|
||||
|
||||
# Run analysis
|
||||
print("Running GPU-accelerated trading analysis...")
|
||||
state, decision = ta.propagate(symbol, date)
|
||||
|
||||
# Save results
|
||||
results = {
|
||||
"symbol": symbol,
|
||||
"date": date,
|
||||
"decision": decision,
|
||||
"job_id": os.getenv("SLURM_JOB_ID"),
|
||||
"node": os.getenv("SLURM_NODELIST"),
|
||||
"gpu_used": torch.cuda.is_available(),
|
||||
"completed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
output_file = f"$RESULTS_DIR/gpu_analysis_results_{os.getenv('SLURM_JOB_ID')}.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print(f"GPU analysis completed. Results saved to: {output_file}")
|
||||
print(f"Decision: {decision}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during GPU analysis: {str(e)}")
|
||||
# Save error information
|
||||
error_info = {
|
||||
"symbol": symbol,
|
||||
"date": date,
|
||||
"error": str(e),
|
||||
"job_id": os.getenv("SLURM_JOB_ID"),
|
||||
"gpu_available": torch.cuda.is_available(),
|
||||
"failed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
error_file = f"$RESULTS_DIR/gpu_error_{os.getenv('SLURM_JOB_ID')}.json"
|
||||
with open(error_file, 'w') as f:
|
||||
json.dump(error_info, f, indent=2)
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
EOF
|
||||
|
||||
# Run the analysis
|
||||
echo "Running GPU Python analysis script..."
|
||||
python "gpu_analysis_${SLURM_JOB_ID}.py"
|
||||
|
||||
# Clean up
|
||||
rm "gpu_analysis_${SLURM_JOB_ID}.py"
|
||||
|
||||
# Stop Ollama if we started it
|
||||
if [ ! -z "$OLLAMA_PID" ]; then
|
||||
kill $OLLAMA_PID
|
||||
fi
|
||||
|
||||
echo "GPU job completed at: $(date)"
|
||||
|
|
@ -0,0 +1,321 @@
|
|||
#!/bin/bash
|
||||
|
||||
# TradingAgents SLURM Job Management Script
|
||||
# This script provides convenience functions for managing TradingAgents jobs on SLURM
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Print colored output
|
||||
print_status() {
|
||||
echo -e "${GREEN}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
print_header() {
|
||||
echo -e "${BLUE}=== $1 ===${NC}"
|
||||
}
|
||||
|
||||
# Check if required files exist
|
||||
check_prerequisites() {
|
||||
print_header "Checking Prerequisites"
|
||||
|
||||
local missing_files=()
|
||||
|
||||
if [ ! -f "requirements.txt" ]; then
|
||||
missing_files+=("requirements.txt")
|
||||
fi
|
||||
|
||||
if [ ! -f "slurm_setup.sh" ]; then
|
||||
missing_files+=("slurm_setup.sh")
|
||||
fi
|
||||
|
||||
if [ ! -f ".env.slurm.template" ]; then
|
||||
missing_files+=(".env.slurm.template")
|
||||
fi
|
||||
|
||||
if [ ${#missing_files[@]} -ne 0 ]; then
|
||||
print_error "Missing required files: ${missing_files[*]}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_status "All required files found"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Setup environment
|
||||
setup_environment() {
|
||||
print_header "Setting Up Environment"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p logs results data_cache
|
||||
|
||||
# Copy environment template if .env doesn't exist
|
||||
if [ ! -f ".env" ]; then
|
||||
cp .env.slurm.template .env
|
||||
print_status "Created .env file from template. Please customize it for your environment."
|
||||
fi
|
||||
|
||||
# Make scripts executable
|
||||
chmod +x slurm_*.sh
|
||||
|
||||
print_status "Environment setup completed"
|
||||
}
|
||||
|
||||
# Submit setup job
|
||||
submit_setup() {
|
||||
print_header "Submitting Setup Job"
|
||||
|
||||
if [ ! -f "slurm_setup.sh" ]; then
|
||||
print_error "slurm_setup.sh not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local job_id=$(sbatch slurm_setup.sh | grep -o '[0-9]*')
|
||||
print_status "Setup job submitted with ID: $job_id"
|
||||
echo "$job_id"
|
||||
}
|
||||
|
||||
# Submit single analysis job
|
||||
submit_single_analysis() {
|
||||
local symbol=${1:-"SPY"}
|
||||
local date=${2:-$(date +%Y-%m-%d)}
|
||||
|
||||
print_header "Submitting Single Analysis Job"
|
||||
print_status "Symbol: $symbol, Date: $date"
|
||||
|
||||
if [ ! -f "slurm_single_analysis.sh" ]; then
|
||||
print_error "slurm_single_analysis.sh not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local job_id=$(sbatch slurm_single_analysis.sh "$symbol" "$date" | grep -o '[0-9]*')
|
||||
print_status "Single analysis job submitted with ID: $job_id"
|
||||
echo "$job_id"
|
||||
}
|
||||
|
||||
# Submit batch analysis job
|
||||
submit_batch_analysis() {
|
||||
print_header "Submitting Batch Analysis Job"
|
||||
|
||||
if [ ! -f "slurm_batch_analysis.sh" ]; then
|
||||
print_error "slurm_batch_analysis.sh not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local job_id=$(sbatch slurm_batch_analysis.sh | grep -o '[0-9]*')
|
||||
print_status "Batch analysis job submitted with ID: $job_id"
|
||||
echo "$job_id"
|
||||
}
|
||||
|
||||
# Submit GPU analysis job
|
||||
submit_gpu_analysis() {
|
||||
local symbol=${1:-"SPY"}
|
||||
local date=${2:-$(date +%Y-%m-%d)}
|
||||
|
||||
print_header "Submitting GPU Analysis Job"
|
||||
print_status "Symbol: $symbol, Date: $date"
|
||||
|
||||
if [ ! -f "slurm_gpu_analysis.sh" ]; then
|
||||
print_error "slurm_gpu_analysis.sh not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local job_id=$(sbatch slurm_gpu_analysis.sh "$symbol" "$date" | grep -o '[0-9]*')
|
||||
print_status "GPU analysis job submitted with ID: $job_id"
|
||||
echo "$job_id"
|
||||
}
|
||||
|
||||
# Check job status
|
||||
check_job_status() {
|
||||
local job_id=$1
|
||||
|
||||
if [ -z "$job_id" ]; then
|
||||
print_error "Job ID required"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_header "Job Status for ID: $job_id"
|
||||
squeue -j "$job_id" --format="%.18i %.9P %.20j %.8u %.8T %.10M %.6D %R"
|
||||
}
|
||||
|
||||
# Show recent jobs
|
||||
show_recent_jobs() {
|
||||
print_header "Recent TradingAgents Jobs"
|
||||
squeue -u $USER --name=trading-agents* --format="%.18i %.9P %.20j %.8u %.8T %.10M %.6D %R"
|
||||
}
|
||||
|
||||
# Cancel job
|
||||
cancel_job() {
|
||||
local job_id=$1
|
||||
|
||||
if [ -z "$job_id" ]; then
|
||||
print_error "Job ID required"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_header "Cancelling Job: $job_id"
|
||||
scancel "$job_id"
|
||||
print_status "Job $job_id cancelled"
|
||||
}
|
||||
|
||||
# View job output
|
||||
view_job_output() {
|
||||
local job_id=$1
|
||||
local output_type=${2:-"out"} # "out" or "err"
|
||||
|
||||
if [ -z "$job_id" ]; then
|
||||
print_error "Job ID required"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local output_file
|
||||
if [ "$output_type" == "err" ]; then
|
||||
output_file="logs/trading_${job_id}.err"
|
||||
else
|
||||
output_file="logs/trading_${job_id}.out"
|
||||
fi
|
||||
|
||||
if [ -f "$output_file" ]; then
|
||||
print_header "Job $job_id Output ($output_type)"
|
||||
tail -f "$output_file"
|
||||
else
|
||||
print_error "Output file not found: $output_file"
|
||||
fi
|
||||
}
|
||||
|
||||
# Check for failed jobs and show errors
|
||||
check_failed_jobs() {
|
||||
print_header "Checking for Failed Jobs"
|
||||
|
||||
# Get failed jobs from sacct
|
||||
local failed_jobs=$(sacct -u $USER --name=trading-agents* --state=FAILED --format=JobID,State,ExitCode --noheader --parsable2 | cut -d'|' -f1)
|
||||
|
||||
if [ -z "$failed_jobs" ]; then
|
||||
print_status "No failed jobs found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "$failed_jobs" | while read -r job_id; do
|
||||
if [ -n "$job_id" ]; then
|
||||
print_warning "Failed job: $job_id"
|
||||
|
||||
# Look for error files
|
||||
local error_files=$(find results -name "error_${job_id}.json" 2>/dev/null)
|
||||
if [ -n "$error_files" ]; then
|
||||
echo "$error_files" | while read -r error_file; do
|
||||
echo "Error details from: $error_file"
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
jq '.' "$error_file" 2>/dev/null || cat "$error_file"
|
||||
else
|
||||
cat "$error_file"
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "No error details found for job $job_id"
|
||||
fi
|
||||
echo ""
|
||||
fi
|
||||
done
|
||||
}
|
||||
local symbol=${1:-"*"}
|
||||
local date=${2:-$(date +%Y-%m-%d)}
|
||||
|
||||
print_header "Collecting Results"
|
||||
print_status "Symbol: $symbol, Date: $date"
|
||||
|
||||
find results -name "*.json" -path "*/$symbol/$date/*" | while read -r file; do
|
||||
echo "Found result: $file"
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
jq '.decision' "$file" 2>/dev/null || echo " (Could not parse decision)"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Main function
|
||||
main() {
|
||||
case "$1" in
|
||||
"setup")
|
||||
check_prerequisites && setup_environment
|
||||
;;
|
||||
"submit-setup")
|
||||
submit_setup
|
||||
;;
|
||||
"submit-single")
|
||||
submit_single_analysis "$2" "$3"
|
||||
;;
|
||||
"submit-batch")
|
||||
submit_batch_analysis
|
||||
;;
|
||||
"submit-gpu")
|
||||
submit_gpu_analysis "$2" "$3"
|
||||
;;
|
||||
"status")
|
||||
if [ -n "$2" ]; then
|
||||
check_job_status "$2"
|
||||
else
|
||||
show_recent_jobs
|
||||
fi
|
||||
;;
|
||||
"cancel")
|
||||
cancel_job "$2"
|
||||
;;
|
||||
"output")
|
||||
view_job_output "$2" "$3"
|
||||
;;
|
||||
"results")
|
||||
collect_results "$2" "$3"
|
||||
;;
|
||||
"check-failed")
|
||||
check_failed_jobs
|
||||
;;
|
||||
"help"|"--help"|"-h"|"")
|
||||
cat << EOF
|
||||
TradingAgents SLURM Job Manager
|
||||
|
||||
Usage: $0 <command> [arguments]
|
||||
|
||||
Commands:
|
||||
setup - Setup environment and create necessary directories
|
||||
submit-setup - Submit environment setup job
|
||||
submit-single [SYM] [DATE] - Submit single analysis job (default: SPY, today)
|
||||
submit-batch - Submit batch analysis job for multiple symbols
|
||||
submit-gpu [SYM] [DATE] - Submit GPU-accelerated analysis job
|
||||
status [JOB_ID] - Show job status (specific job or all recent jobs)
|
||||
cancel <JOB_ID> - Cancel a specific job
|
||||
output <JOB_ID> [err] - View job output (stdout or stderr)
|
||||
results [SYM] [DATE] - Collect and display results
|
||||
check-failed - Check for failed jobs and show error details
|
||||
help - Show this help message
|
||||
|
||||
Examples:
|
||||
$0 setup # Initial setup
|
||||
$0 submit-single AAPL # Analyze AAPL for today
|
||||
$0 submit-batch # Analyze multiple stocks
|
||||
$0 status 12345 # Check status of job 12345
|
||||
$0 output 12345 # View output of job 12345
|
||||
$0 results AAPL # Show results for AAPL
|
||||
|
||||
EOF
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown command: $1"
|
||||
print_status "Use '$0 help' for usage information"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Run main function with all arguments
|
||||
main "$@"
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
#!/bin/bash
|
||||
#SBATCH --job-name=trading-agents-setup
|
||||
#SBATCH --output=setup_%j.out
|
||||
#SBATCH --error=setup_%j.err
|
||||
#SBATCH --time=02:00:00
|
||||
#SBATCH --cpus-per-task=4
|
||||
#SBATCH --mem=8G
|
||||
#SBATCH --partition=cpu
|
||||
|
||||
# Exit on any error, undefined variable, or pipe failure
|
||||
set -euo pipefail
|
||||
|
||||
# TradingAgents SLURM Setup Script
|
||||
# This script sets up the environment for running TradingAgents on a SLURM cluster
|
||||
|
||||
echo "Setting up TradingAgents environment on SLURM cluster..."
|
||||
echo "Job ID: $SLURM_JOB_ID"
|
||||
echo "Node: $SLURM_NODELIST"
|
||||
echo "Started at: $(date)"
|
||||
|
||||
# Load necessary modules (adjust based on your cluster's available modules)
|
||||
module load python/3.10
|
||||
module load git
|
||||
|
||||
# Set up working directory
|
||||
WORK_DIR=${SLURM_SUBMIT_DIR}
|
||||
cd $WORK_DIR
|
||||
|
||||
# Create Python virtual environment if it doesn't exist
|
||||
if [ ! -d "venv" ]; then
|
||||
echo "Creating Python virtual environment..."
|
||||
python -m venv venv
|
||||
fi
|
||||
|
||||
# Activate virtual environment
|
||||
source venv/bin/activate
|
||||
|
||||
# Upgrade pip
|
||||
pip install --upgrade pip
|
||||
|
||||
# Install dependencies
|
||||
echo "Installing Python dependencies..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p results
|
||||
mkdir -p logs
|
||||
mkdir -p data_cache
|
||||
|
||||
# Set environment variables
|
||||
export PYTHONPATH="${WORK_DIR}:${PYTHONPATH}"
|
||||
|
||||
echo "Environment setup completed at: $(date)"
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
#!/bin/bash
|
||||
#SBATCH --job-name=trading-agents-single
|
||||
#SBATCH --output=logs/trading_%j.out
|
||||
#SBATCH --error=logs/trading_%j.err
|
||||
#SBATCH --time=04:00:00
|
||||
#SBATCH --cpus-per-task=8
|
||||
#SBATCH --mem=16G
|
||||
#SBATCH --partition=cpu
|
||||
|
||||
# Exit on any error, undefined variable, or pipe failure
|
||||
set -euo pipefail
|
||||
|
||||
# Single stock analysis job
|
||||
# Usage: sbatch slurm_single_analysis.sh SYMBOL DATE
|
||||
|
||||
echo "Starting TradingAgents single analysis..."
|
||||
echo "Job ID: $SLURM_JOB_ID"
|
||||
echo "Node: $SLURM_NODELIST"
|
||||
echo "Started at: $(date)"
|
||||
|
||||
# Parse command line arguments
|
||||
SYMBOL=${1:-"SPY"}
|
||||
DATE=${2:-$(date +%Y-%m-%d)}
|
||||
|
||||
echo "Analyzing symbol: $SYMBOL for date: $DATE"
|
||||
|
||||
# Load necessary modules
|
||||
if ! module load python/3.10; then
|
||||
echo "ERROR: Failed to load Python module"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set up environment
|
||||
WORK_DIR=${SLURM_SUBMIT_DIR}
|
||||
cd "$WORK_DIR" || { echo "ERROR: Cannot access work directory $WORK_DIR"; exit 1; }
|
||||
|
||||
# Activate virtual environment
|
||||
if [ ! -f "venv/bin/activate" ]; then
|
||||
echo "ERROR: Virtual environment not found. Run setup first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! source venv/bin/activate; then
|
||||
echo "ERROR: Failed to activate virtual environment"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set environment variables
|
||||
export PYTHONPATH="${WORK_DIR}:${PYTHONPATH}"
|
||||
export TRADINGAGENTS_RESULTS_DIR="${WORK_DIR}/results"
|
||||
|
||||
# Set SLURM-specific configurations
|
||||
export SLURM_JOB_MODE=true
|
||||
export SLURM_CPUS_AVAILABLE=$SLURM_CPUS_PER_TASK
|
||||
|
||||
# Create results directory for this job
|
||||
RESULTS_DIR="${WORK_DIR}/results/${SYMBOL}/${DATE}"
|
||||
if ! mkdir -p "$RESULTS_DIR"; then
|
||||
echo "ERROR: Failed to create results directory: $RESULTS_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a custom Python script for this analysis
|
||||
cat > "slurm_analysis_${SLURM_JOB_ID}.py" << EOF
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from datetime import datetime
|
||||
from tradingagents.graph.trading_graph import TradingAgentsGraph
|
||||
from tradingagents.default_config import DEFAULT_CONFIG
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
def main():
|
||||
symbol = "$SYMBOL"
|
||||
date = "$DATE"
|
||||
|
||||
print(f"Starting analysis for {symbol} on {date}")
|
||||
|
||||
# Create a custom config for SLURM environment
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
|
||||
# Adjust for cluster environment
|
||||
config["results_dir"] = os.getenv("TRADINGAGENTS_RESULTS_DIR", "./results")
|
||||
config["max_debate_rounds"] = 2 # Increase for more thorough analysis
|
||||
config["max_risk_discuss_rounds"] = 2
|
||||
config["online_tools"] = True
|
||||
|
||||
# Use environment variables for LLM configuration
|
||||
config["llm_provider"] = os.getenv("LLM_PROVIDER", "ollama")
|
||||
config["backend_url"] = os.getenv("LLM_BACKEND_URL", "http://localhost:11434/v1")
|
||||
config["deep_think_llm"] = os.getenv("DEEP_THINK_LLM", "llama3.2")
|
||||
config["quick_think_llm"] = os.getenv("QUICK_THINK_LLM", "llama3.2")
|
||||
|
||||
try:
|
||||
# Initialize trading agents
|
||||
ta = TradingAgentsGraph(debug=True, config=config)
|
||||
|
||||
# Run analysis
|
||||
print("Running trading analysis...")
|
||||
state, decision = ta.propagate(symbol, date)
|
||||
|
||||
# Save results
|
||||
results = {
|
||||
"symbol": symbol,
|
||||
"date": date,
|
||||
"decision": decision,
|
||||
"job_id": os.getenv("SLURM_JOB_ID"),
|
||||
"node": os.getenv("SLURM_NODELIST"),
|
||||
"completed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
output_file = f"$RESULTS_DIR/analysis_results_{os.getenv('SLURM_JOB_ID')}.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print(f"Analysis completed. Results saved to: {output_file}")
|
||||
print(f"Decision: {decision}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during analysis: {str(e)}")
|
||||
# Save error information
|
||||
error_info = {
|
||||
"symbol": symbol,
|
||||
"date": date,
|
||||
"error": str(e),
|
||||
"job_id": os.getenv("SLURM_JOB_ID"),
|
||||
"failed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
error_file = f"$RESULTS_DIR/error_{os.getenv('SLURM_JOB_ID')}.json"
|
||||
with open(error_file, 'w') as f:
|
||||
json.dump(error_info, f, indent=2)
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
EOF
|
||||
|
||||
# Run the analysis
|
||||
echo "Running Python analysis script..."
|
||||
if ! python "slurm_analysis_${SLURM_JOB_ID}.py"; then
|
||||
echo "ERROR: Python analysis script failed"
|
||||
rm -f "slurm_analysis_${SLURM_JOB_ID}.py"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Clean up temporary script
|
||||
rm "slurm_analysis_${SLURM_JOB_ID}.py"
|
||||
|
||||
echo "Job completed successfully at: $(date)"
|
||||
Loading…
Reference in New Issue