stacks/ktransformers/compose.yaml

48 lines
1.4 KiB
YAML

version: "3.8"
services:
ktransformers:
image: docker.citory.tech/public/ktransformers:0.2.2rc1
container_name: ktransformers
runtime: nvidia
tty: true
stdin_open: true
ports:
- 10580:10580
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 2
capabilities:
- gpu
ipc: host
volumes:
- /home/deepgeek/data/data_local/server/ktransformers/models:/workspace/models
- /home/deepgeek/data/data_local/server/ktransformers/ktransformers/website:/workspace/ktransformers/ktransformers/website
env_file:
- .env
restart: unless-stopped
entrypoint: [
"python3", "/workspace/ktransformers/ktransformers/server/main.py",
"--gguf_path", "/workspace/models/DeepSeek-R1-GGUF/DeepSeek-R1-UD-Q2_K_XL",
"--model_path", "/workspace/models/DeepSeek-R1",
"--model_name", "deepseek-r1:671b",
"--cpu_infer", "94",
"--optimize_config_path", "/workspace/ktransformers/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu.yaml",
"--max_new_tokens", "8192",
"--cache_lens", "32768",
"--total_context", "32768",
"--cache_q4", "true",
"--temperature", "0.6",
"--top_p", "0.95",
"--force_think",
"--no-use_cuda_graph",
"--host", "0.0.0.0",
"--port", "10580"
]
x-dockge:
urls:
- http://local.citory.tech:10580
networks: {}