RealtimeVoiceChat/docker-compose.yml at main · sunpcaudio2note/RealtimeVoiceChat · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Remove the version: '3.8' line as per the warning
services:
  # Your FastAPI Application Service
  app:
    build: . # Build the image using the SIMPLIFIED Dockerfile
    image: realtime-voice-chat:latest # Name the image built by 'build:'
    container_name: realtime-voice-chat-app
    ports:
      - "8000:8000"
    environment:
      # Point to the 'ollama' service
      - OLLAMA_BASE_URL=http://ollama:11434
      # --- Other App Environment Variables ---
      - LOG_LEVEL=${LOG_LEVEL:-INFO}
      - MAX_AUDIO_QUEUE_SIZE=${MAX_AUDIO_QUEUE_SIZE:-50}
      - NVIDIA_VISIBLE_DEVICES=all # For app's PyTorch/DeepSpeed/etc
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      - HF_HOME=/home/appuser/.cache/huggingface
      - TORCH_HOME=/home/appuser/.cache/torch
    volumes:
       # Optional: Mount code for live development
       # - ./code:/app/code
       # Mount cache directories
       - huggingface_cache:/home/appuser/.cache/huggingface
       - torch_cache:/home/appuser/.cache/torch
    depends_on:
    - ollama
    deploy: # GPU access for the app
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu, compute, utility]
    restart: unless-stopped

  # Ollama Server Service (Using Official Image)
  ollama:
    # --- Use the official Ollama image ---
    image: ollama/ollama:latest
    container_name: realtime-voice-chat-ollama
    # --- No 'build:' section needed here ---
    # command: ["ollama", "serve"] # Usually the default command/entrypoint
    volumes:
      # Persist Ollama models and data
      - ollama_data:/root/.ollama
    environment:
      - NVIDIA_VISIBLE_DEVICES=all # Make GPUs visible inside container
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      # OLLAMA_MODELS might be useful if needed, points inside volume
      # - OLLAMA_MODELS=/root/.ollama/models
    deploy: # GPU access for Ollama Service
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu, compute, utility]
    # healthcheck:
      # Check if the Ollama API is responsive
      # test: ["CMD", "wget", "--quiet", "--spider", "--tries=1", "--timeout=10", "http://localhost:11434/api/tags"]
      # interval: 15s
      # timeout: 10s
      # retries: 12
      # start_period: 45s # Give it time to start
    restart: unless-stopped

# Define named volumes for persistent data
volumes:
  ollama_data:
    driver: local
  huggingface_cache:
    driver: local
  torch_cache:
    driver: local