mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
Merge pull request #1640 from Jzz1943/main
support vLLM >=0.11.0 (V1 engine) for better performance
This commit is contained in:
@@ -152,14 +152,18 @@ python example.py
|
|||||||
```
|
```
|
||||||
|
|
||||||
#### CosyVoice2 vllm Usage
|
#### CosyVoice2 vllm Usage
|
||||||
If you want to use vllm for inference, please install `vllm==v0.9.0`. Older vllm version do not support CosyVoice2 inference.
|
CosyVoice2 now supports **vLLM 0.11.x+ (V1 engine)** and **vLLM 0.9.0 (legacy)**.
|
||||||
|
Older vllm version(<0.9.0) do not support CosyVoice2 inference, and versions in between (e.g., 0.10.x) are not tested.
|
||||||
|
|
||||||
Notice that `vllm==v0.9.0` has a lot of specific requirements, for example `torch==2.7.0`. You can create a new env to in case your hardward do not support vllm and old env is corrupted.
|
Notice that `vllm==v0.9.0` has a lot of specific requirements, for example `torch==2.7.0`. You can create a new env to in case your hardward do not support vllm and old env is corrupted.
|
||||||
|
|
||||||
``` sh
|
``` sh
|
||||||
conda create -n cosyvoice_vllm --clone cosyvoice
|
conda create -n cosyvoice_vllm --clone cosyvoice
|
||||||
conda activate cosyvoice_vllm
|
conda activate cosyvoice_vllm
|
||||||
|
# for vllm==0.9.0
|
||||||
pip install vllm==v0.9.0 transformers==4.51.3 numpy==1.26.4 -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
|
pip install vllm==v0.9.0 transformers==4.51.3 numpy==1.26.4 -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
|
||||||
|
# for vllm>=0.11.0
|
||||||
|
pip install vllm==v0.11.0 transformers==4.57.1 numpy==1.26.4 -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
|
||||||
python vllm_example.py
|
python vllm_example.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,15 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
|
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
|
||||||
|
from typing import Optional
|
||||||
|
from packaging.version import parse as vparse
|
||||||
|
import vllm
|
||||||
|
|
||||||
|
# vLLM-0.11.0+ only support V1 engine
|
||||||
|
VLLM_V1_ENGINE_ONLY: bool = vparse(vllm.__version__) >= vparse("0.11.0")
|
||||||
|
if VLLM_V1_ENGINE_ONLY:
|
||||||
|
from vllm.v1.sample.metadata import SamplingMetadata
|
||||||
|
|
||||||
from vllm.model_executor.models.qwen2 import *
|
from vllm.model_executor.models.qwen2 import *
|
||||||
|
|
||||||
|
|
||||||
@@ -87,10 +96,14 @@ class CosyVoice2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
|
|||||||
def compute_logits(
|
def compute_logits(
|
||||||
self,
|
self,
|
||||||
hidden_states: torch.Tensor,
|
hidden_states: torch.Tensor,
|
||||||
sampling_metadata: SamplingMetadata,
|
sampling_metadata: Optional[SamplingMetadata] = None,
|
||||||
) -> Optional[torch.Tensor]:
|
) -> Optional[torch.Tensor]:
|
||||||
logits = self.logits_processor(self.lm_head, hidden_states,
|
if VLLM_V1_ENGINE_ONLY:
|
||||||
sampling_metadata, self.lm_head.bias)
|
logits = self.logits_processor(self.lm_head, hidden_states,
|
||||||
|
self.lm_head.bias)
|
||||||
|
else:
|
||||||
|
logits = self.logits_processor(self.lm_head, hidden_states,
|
||||||
|
sampling_metadata, self.lm_head.bias)
|
||||||
return logits
|
return logits
|
||||||
|
|
||||||
def load_weights(self, weights: Iterable[tuple[str,
|
def load_weights(self, weights: Iterable[tuple[str,
|
||||||
|
|||||||
Reference in New Issue
Block a user