From e47c6ab5aa88c0327a33a861d838b550ddaba609 Mon Sep 17 00:00:00 2001
From: root <403644786@qq.com>
Date: Fri, 12 Jul 2024 16:09:49 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=8A=A0=E4=BA=86=E6=B3=A8=E9=87=8A=E5=92=8C?=
 =?UTF-8?q?=E8=A7=84=E8=8C=83=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 quantize/bnb_quantize.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/quantize/bnb_quantize.py b/quantize/bnb_quantize.py
index 4f0d20c..106b974 100644
--- a/quantize/bnb_quantize.py
+++ b/quantize/bnb_quantize.py
@@ -40,12 +40,12 @@ quantization_config = BitsAndBytesConfig(
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModel.from_pretrained(
     model_path,
-    device_map="cuda:0",  # 分配模型到GPU0
+    device_map=device,  # 分配模型到device
     quantization_config=quantization_config,
     trust_remote_code=True
 )
-gpu_usage = GPUtil.getGPUs()[0].memoryUsed 
-        
+
+gpu_usage = GPUtil.getGPUs()[0].memoryUsed  
 start=time.time()
 response = model.chat(
     image=Image.open(image_path).convert("RGB"),
@@ -58,7 +58,7 @@ response = model.chat(
     tokenizer=tokenizer
 ) # 模型推理
 print('量化后输出',response)
-print('量化后用时',time.time()-start)
+print('量化后推理用时',time.time()-start)
 print(f"量化后显存占用: {round(gpu_usage/1024,2)}GB")
 
 """