AI_Secretary_System/.env.docker.example at main · DebuggingMax/AI_Secretary_System · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# =============================================================================
# AI Secretary System - Docker Environment Configuration
# =============================================================================
# Copy this file to .env and configure for your deployment
#
# Usage:
#   cp .env.docker.example .env
#   # Edit .env with your values
#   docker compose up -d
# =============================================================================

# -----------------------------------------------------------------------------
# LLM Configuration
# -----------------------------------------------------------------------------

# LLM Backend: "vllm" (local GPU) or "cloud:{provider_id}" (cloud AI provider)
# Legacy "gemini" value is auto-migrated to cloud provider on startup
LLM_BACKEND=vllm

# Deployment mode: "full" (all features), "cloud" (no GPU/hardware), "local" (explicit full)
DEPLOYMENT_MODE=full

# vLLM Model (for GPU mode)
# Options: Qwen/Qwen2.5-7B-Instruct-AWQ, meta-llama/Meta-Llama-3.1-8B-Instruct
VLLM_MODEL=Qwen/Qwen2.5-7B-Instruct-AWQ

# vLLM Model Name (for LoRA adapters)
VLLM_MODEL_NAME=

# Gemini API Key (for cloud fallback or CPU mode)
GEMINI_API_KEY=

# Secretary persona: "anna" or "marina"
SECRETARY_PERSONA=anna

# -----------------------------------------------------------------------------
# Security
# -----------------------------------------------------------------------------

# Admin JWT Secret (auto-generated if empty, recommended to set for production)
ADMIN_JWT_SECRET=

# Rate Limiting (slowapi)
# Set to "false" to disable rate limiting
RATE_LIMIT_ENABLED=true
RATE_LIMIT_DEFAULT=60/minute
RATE_LIMIT_AUTH=10/minute
RATE_LIMIT_CHAT=30/minute
RATE_LIMIT_TTS=20/minute
RATE_LIMIT_STT=20/minute

# CORS Allowed Origins (comma-separated, use "*" for development)
# For production, specify exact origins: https://example.com,https://app.example.com
CORS_ORIGINS=*

# Security Headers
# Set to "false" to disable security headers
SECURITY_HEADERS_ENABLED=true
# X-Frame-Options: DENY (blocks all framing) or SAMEORIGIN (allows same origin)
X_FRAME_OPTIONS=DENY

# -----------------------------------------------------------------------------
# External Access
# -----------------------------------------------------------------------------

# Port to expose
ORCHESTRATOR_PORT=8002

# -----------------------------------------------------------------------------
# Model Access
# -----------------------------------------------------------------------------

# Hugging Face token (required for gated models like Llama)
HF_TOKEN=

# -----------------------------------------------------------------------------
# Advanced (usually no need to change)
# -----------------------------------------------------------------------------

# Redis URL (set automatically in docker-compose)
# REDIS_URL=redis://redis:6379/0

# Database URL (set automatically in docker-compose)
# DATABASE_URL=sqlite+aiosqlite:///data/secretary.db