From e47c6ab5aa88c0327a33a861d838b550ddaba609 Mon Sep 17 00:00:00 2001 From: root <403644786@qq.com> Date: Fri, 12 Jul 2024 16:09:49 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=8A=A0=E4=BA=86=E6=B3=A8=E9=87=8A=E5=92=8C?= =?UTF-8?q?=E8=A7=84=E8=8C=83=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- quantize/bnb_quantize.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/quantize/bnb_quantize.py b/quantize/bnb_quantize.py index 4f0d20c..106b974 100644 --- a/quantize/bnb_quantize.py +++ b/quantize/bnb_quantize.py @@ -40,12 +40,12 @@ quantization_config = BitsAndBytesConfig( tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = AutoModel.from_pretrained( model_path, - device_map="cuda:0", # 分配模型到GPU0 + device_map=device, # 分配模型到device quantization_config=quantization_config, trust_remote_code=True ) -gpu_usage = GPUtil.getGPUs()[0].memoryUsed - + +gpu_usage = GPUtil.getGPUs()[0].memoryUsed start=time.time() response = model.chat( image=Image.open(image_path).convert("RGB"), @@ -58,7 +58,7 @@ response = model.chat( tokenizer=tokenizer ) # 模型推理 print('量化后输出',response) -print('量化后用时',time.time()-start) +print('量化后推理用时',time.time()-start) print(f"量化后显存占用: {round(gpu_usage/1024,2)}GB") """