-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsingle_gpu_setup.yaml
More file actions
298 lines (241 loc) · 5.08 KB
/
single_gpu_setup.yaml
File metadata and controls
298 lines (241 loc) · 5.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# Single GPU Setup Configuration
# ThemisDB v1.4.0-alpha
# Use case: Development, small-scale training, inference
# Server Configuration
server:
host: 0.0.0.0
port: 8080
workers: 4
max_connections: 1000
# Request handling
request_timeout: 300 # seconds
keepalive_timeout: 75
# Resource limits
max_request_size: 100MB
max_response_size: 100MB
# GPU Configuration
gpu:
enabled: true
# Single GPU device
devices:
- id: 0
name: "GPU-0"
memory_limit: 0.9 # Use 90% of VRAM
compute_capability: 8.6
# GPU settings
persistence_mode: true
compute_mode: default
power_limit: null # Use default
# Memory management
memory_pool:
enabled: true
initial_size_mb: 2048
max_split_size_mb: 512
# VRAM optimization
vram:
secure_clear: true
unified_memory: false
oom_protection:
enabled: true
reserved_memory_mb: 1024
# LLM Configuration
llm:
enabled: true
backend: cuda
# Model settings
model_path: /models/llama-2-7b-chat.gguf
model_type: llama
# Context and batch
context_length: 4096
batch_size: 512
n_gpu_layers: 35 # All layers on GPU
# Performance
threads: 8
use_mmap: true
use_mlock: false
# Inference settings
inference:
temperature: 0.7
top_p: 0.9
top_k: 40
repeat_penalty: 1.1
# KV cache
kv_cache:
enabled: true
max_tokens: 4096
# LoRA Configuration
lora:
enabled: true
adapter_path: /adapters
max_adapters: 4
gpu_enabled: true
# Adapter management
preload_adapters: []
cache_adapters: true
# LoRA settings
rank: 16
alpha: 32
dropout: 0.05
# Training Configuration
training:
enabled: true
# Batch configuration
batch_size: 16
micro_batch_size: 8
gradient_accumulation_steps: 2
# Precision
precision: fp16
mixed_precision:
enabled: true
loss_scale: dynamic
# Optimization
optimizer: adamw
learning_rate: 3e-4
weight_decay: 0.01
# Learning rate schedule
lr_schedule:
type: cosine_with_warmup
warmup_steps: 500
min_lr: 3e-5
# Gradient management
gradient_clipping:
enabled: true
max_norm: 1.0
gradient_checkpointing:
enabled: true
checkpoint_segments: 2
# Checkpoints
checkpoint:
enabled: true
save_interval: 500
max_keep: 3
output_dir: /data/checkpoints
compression: true
async_save: true
# Validation
validation:
enabled: true
interval: 100
split: 0.1
# Inference Configuration
inference:
enabled: true
# Batching
batch_size: 1
continuous_batching:
enabled: false
max_wait_time_ms: 50
# Performance
precision: fp16
# Generation settings
max_tokens: 512
stream: true
# Storage Configuration
storage:
# Data directory
data_dir: /data/themisdb
# RocksDB settings
rocksdb:
max_open_files: 1000
max_background_jobs: 4
write_buffer_size: 64MB
max_write_buffer_number: 3
# Compression
compression: lz4
# Cache
block_cache_size: 512MB
# Encryption (optional)
encryption:
enabled: false
algorithm: AES-256-GCM
# Network Configuration
network:
# HTTP/REST API
http:
enabled: true
port: 8080
# Binary protocol
binary:
enabled: true
port: 18765
# TLS (optional - enable for production)
tls:
enabled: false
cert_file: /etc/themisdb/certs/server.crt
key_file: /etc/themisdb/certs/server.key
# Monitoring Configuration
metrics:
enabled: true
port: 4318
path: /metrics
# Collection intervals
gpu_metrics_interval: 1s
training_metrics_interval: 1s
inference_metrics_interval: 100ms
# Exporters
exporters:
- type: prometheus
endpoint: http://localhost:4318/metrics
# Logging Configuration
logging:
level: info
format: json
output: /var/log/themisdb/app.log
# Rotation
rotation:
max_size: 100MB
max_age: 7
max_backups: 5
compress: true
# Audit logging
audit:
enabled: true
output: /var/log/themisdb/audit.log
# Security Configuration
security:
# Authentication
authentication:
enabled: false # Enable for production
method: api_key
# Authorization
authorization:
enabled: false # Enable for production
# GPU access control
gpu:
access_control:
enabled: false
mode: permissive
# Backup Configuration
backup:
enabled: true
# Schedule
schedule:
full_backup: "0 2 * * 0" # Weekly on Sunday
incremental: "0 2 * * 1-6" # Daily
# Retention
retention:
full: 2
incremental: 7
# Destination
destination: /backup/themisdb
# Resource Limits
limits:
# Memory
max_memory_mb: 32768 # 32GB
# Disk
max_disk_usage_gb: 500
# Connections
max_concurrent_requests: 100
max_queue_size: 1000
# Development Settings
development:
debug_mode: false
profiling: false
hot_reload: false
# Testing
test_mode: false
# Example Usage:
# 1. Copy this file to /etc/themisdb/config.yaml
# 2. Adjust paths for your environment
# 3. Enable security features for production
# 4. Start service: sudo systemctl start themisdb