feat(*):增加了一些模型
This commit is contained in:
		
							
								
								
									
										12
									
								
								vllm-qwen25-72b-instruct-awq/compose.yaml
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										12
									
								
								vllm-qwen25-72b-instruct-awq/compose.yaml
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -18,12 +18,12 @@ services: | ||||
|     runtime: nvidia | ||||
|     volumes: | ||||
|       - /home/deepgeek/data/data_local/server/vllm/models:/models | ||||
|     restart: always | ||||
|     command: --served-model-name Qwen2.5-72B-Instruct-AWQ --model /models/Qwen/Qwen2.5-72B-Instruct-AWQ | ||||
|       --trust-remote-code --host 0.0.0.0 --port 8080 --max-model-len 8192 | ||||
|       --tensor-parallel-size 2 --gpu_memory_utilization 0.9 --enforce-eager | ||||
|       --dtype auto --swap-space 8 --enable-auto-tool-choice --tool-call-parser | ||||
|       llama3_json | ||||
|     restart: unless-stopped | ||||
|     command: --served-model-name Qwen2.5:72b --model | ||||
|       /models/Qwen/Qwen2.5-72B-Instruct-AWQ --trust-remote-code --host 0.0.0.0 | ||||
|       --port 8080 --max-model-len 8192 --tensor-parallel-size 2 | ||||
|       --gpu_memory_utilization 0.9 --enforce-eager --dtype auto --swap-space 8 | ||||
|       --enable-auto-tool-choice --tool-call-parser llama3_json | ||||
| x-dockge: | ||||
|   urls: | ||||
|     - http://local.citory.tech:10580 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user