feat(*):增加了一些模型

This commit is contained in:
2025-03-27 21:19:43 +08:00
parent a55bb3a448
commit a513dd8a91
62 changed files with 2420 additions and 109 deletions

3
ktransformers/.env Normal file
View File

@@ -0,0 +1,3 @@
TRANSFORMERS_OFFLINE=0
HF_HUB_OFFLINE=0
TORCH_CUDA_ARCH_LIST=8.9

View File

@@ -0,0 +1,47 @@
version: "3.8"
services:
ktransformers:
image: docker.citory.tech/public/ktransformers:0.2.2rc1
container_name: ktransformers
runtime: nvidia
tty: true
stdin_open: true
ports:
- 10580:10580
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 2
capabilities:
- gpu
ipc: host
volumes:
- /home/deepgeek/data/data_local/server/ktransformers/models:/workspace/models
- /home/deepgeek/data/data_local/server/ktransformers/ktransformers/website:/workspace/ktransformers/ktransformers/website
env_file:
- .env
restart: unless-stopped
entrypoint: [
"python3", "/workspace/ktransformers/ktransformers/server/main.py",
"--gguf_path", "/workspace/models/DeepSeek-R1-GGUF/DeepSeek-R1-UD-Q2_K_XL",
"--model_path", "/workspace/models/DeepSeek-R1",
"--model_name", "deepseek-r1:671b",
"--cpu_infer", "94",
"--optimize_config_path", "/workspace/ktransformers/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu.yaml",
"--max_new_tokens", "8192",
"--cache_lens", "32768",
"--total_context", "32768",
"--cache_q4", "true",
"--temperature", "0.6",
"--top_p", "0.95",
"--force_think",
"--no-use_cuda_graph",
"--host", "0.0.0.0",
"--port", "10580"
]
x-dockge:
urls:
- http://local.citory.tech:10580
networks: {}