From 7f9030254a333e029b4741de72c2cd8cdcfaca07 Mon Sep 17 00:00:00 2001 From: Hongji Zhu Date: Mon, 20 May 2024 19:05:17 +0800 Subject: [PATCH] fix web_demo_2.5 for int4 --- web_demo_2.5.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/web_demo_2.5.py b/web_demo_2.5.py index 4d11356..cc80a82 100644 --- a/web_demo_2.5.py +++ b/web_demo_2.5.py @@ -25,10 +25,15 @@ assert device in ['cuda', 'mps'] # Load model model_path = 'openbmb/MiniCPM-Llama3-V-2_5' -model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.float16) +if 'int4' in model_path: + if device == 'mps': + print('Error: running int4 model with bitsandbytes on Mac is not supported right now.') + exit() + model = AutoModel.from_pretrained(model_path, trust_remote_code=True) +else: + model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.float16) + model = model.to(device=device) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) - -model = model.to(device=device) model.eval()