48 lines
1.4 KiB
YAML
48 lines
1.4 KiB
YAML
version: "3.8"
|
|
services:
|
|
ktransformers:
|
|
image: docker.citory.tech/public/ktransformers:0.2.2rc1
|
|
container_name: ktransformers
|
|
runtime: nvidia
|
|
tty: true
|
|
stdin_open: true
|
|
ports:
|
|
- 10580:10580
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 2
|
|
capabilities:
|
|
- gpu
|
|
ipc: host
|
|
volumes:
|
|
- /home/deepgeek/data/data_local/server/ktransformers/models:/workspace/models
|
|
- /home/deepgeek/data/data_local/server/ktransformers/ktransformers/website:/workspace/ktransformers/ktransformers/website
|
|
env_file:
|
|
- .env
|
|
restart: unless-stopped
|
|
entrypoint: [
|
|
"python3", "/workspace/ktransformers/ktransformers/server/main.py",
|
|
"--gguf_path", "/workspace/models/DeepSeek-R1-GGUF/DeepSeek-R1-UD-Q2_K_XL",
|
|
"--model_path", "/workspace/models/DeepSeek-R1",
|
|
"--model_name", "deepseek-r1:671b",
|
|
"--cpu_infer", "94",
|
|
"--optimize_config_path", "/workspace/ktransformers/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu.yaml",
|
|
"--max_new_tokens", "8192",
|
|
"--cache_lens", "32768",
|
|
"--total_context", "32768",
|
|
"--cache_q4", "true",
|
|
"--temperature", "0.6",
|
|
"--top_p", "0.95",
|
|
"--force_think",
|
|
"--no-use_cuda_graph",
|
|
"--host", "0.0.0.0",
|
|
"--port", "10580"
|
|
]
|
|
x-dockge:
|
|
urls:
|
|
- http://local.citory.tech:10580
|
|
networks: {}
|