forked from KoljaB/RealtimeVoiceChat
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
75 lines (73 loc) · 2.54 KB
/
docker-compose.yml
File metadata and controls
75 lines (73 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Remove the version: '3.8' line as per the warning
services:
# Your FastAPI Application Service
app:
build: . # Build the image using the SIMPLIFIED Dockerfile
image: realtime-voice-chat:latest # Name the image built by 'build:'
container_name: realtime-voice-chat-app
ports:
- "8000:8000"
environment:
# Point to the 'ollama' service
- OLLAMA_BASE_URL=http://ollama:11434
# --- Other App Environment Variables ---
- LOG_LEVEL=${LOG_LEVEL:-INFO}
- MAX_AUDIO_QUEUE_SIZE=${MAX_AUDIO_QUEUE_SIZE:-50}
- NVIDIA_VISIBLE_DEVICES=all # For app's PyTorch/DeepSpeed/etc
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- HF_HOME=/home/appuser/.cache/huggingface
- TORCH_HOME=/home/appuser/.cache/torch
volumes:
# Optional: Mount code for live development
# - ./code:/app/code
# Mount cache directories
- huggingface_cache:/home/appuser/.cache/huggingface
- torch_cache:/home/appuser/.cache/torch
depends_on:
- ollama
deploy: # GPU access for the app
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu, compute, utility]
restart: unless-stopped
# Ollama Server Service (Using Official Image)
ollama:
# --- Use the official Ollama image ---
image: ollama/ollama:latest
container_name: realtime-voice-chat-ollama
# --- No 'build:' section needed here ---
# command: ["ollama", "serve"] # Usually the default command/entrypoint
volumes:
# Persist Ollama models and data
- ollama_data:/root/.ollama
environment:
- NVIDIA_VISIBLE_DEVICES=all # Make GPUs visible inside container
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
# OLLAMA_MODELS might be useful if needed, points inside volume
# - OLLAMA_MODELS=/root/.ollama/models
deploy: # GPU access for Ollama Service
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu, compute, utility]
# healthcheck:
# Check if the Ollama API is responsive
# test: ["CMD", "wget", "--quiet", "--spider", "--tries=1", "--timeout=10", "http://localhost:11434/api/tags"]
# interval: 15s
# timeout: 10s
# retries: 12
# start_period: 45s # Give it time to start
restart: unless-stopped
# Define named volumes for persistent data
volumes:
ollama_data:
driver: local
huggingface_cache:
driver: local
torch_cache:
driver: local