Merge pull request #153 from whn09/main

combine dtype and device to save CPU memory
2026-02-05 18:29:18 +08:00 · 2024-05-31 11:23:13 +08:00
parent d44bc28f86 5b67e5c822
commit fe7184f8c9
1 changed files with 1 additions and 2 deletions
--- a/web_demo_2.5.py
+++ b/web_demo_2.5.py
@@ -31,8 +31,7 @@ if 'int4' in model_path:
        exit()
    model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 else:
-    model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.float16)
+    model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.float16, device_map=device)
    model = model.to(device=device)
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model.eval()