diff --git a/docs/swift_train_and_infer.md b/docs/swift_train_and_infer.md index 1e74607..693efe6 100644 --- a/docs/swift_train_and_infer.md +++ b/docs/swift_train_and_infer.md @@ -21,20 +21,20 @@ CUDA_VISIBLE_DEVICES=0 swift infer --model_type minicpm-v-v2_5-chat 2. You can also run the code with more arguments below to run the inference: ``` -model_id_or_path # 可以写huggingface的模型id或者本地模型地址 -infer_backend ['AUTO', 'vllm', 'pt'] # 后段推理,默认auto -dtype ['bf16', 'fp16', 'fp32', 'AUTO'] # 计算精度 -max_length # 最大长度 -max_new_tokens: int = 2048 #最多生成多少token -do_sample: bool = True # 是否采样 -temperature: float = 0.3 # 生成时的温度系数 +model_id_or_path # Can be the model ID from Hugging Face or the local path to the model +infer_backend ['AUTO', 'vllm', 'pt'] # Backend for inference, default is auto +dtype ['bf16', 'fp16', 'fp32', 'AUTO'] # Computational precision +max_length # Maximum length +max_new_tokens: int = 2048 # Maximum number of tokens to generate +do_sample: bool = True # Whether to sample during generation +temperature: float = 0.3 # Temperature coefficient during generation top_k: int = 20 top_p: float = 0.7 -repetition_penalty: float = 1. -num_beams: int = 1 -stop_words: List[str] = None -quant_method ['bnb', 'hqq', 'eetq', 'awq', 'gptq', 'aqlm'] # 模型的量化方式 -quantization_bit [0, 1, 2, 3, 4, 8] 默认是0,代表不使用量化 +repetition_penalty: float = 1. # Penalty for repetition +num_beams: int = 1 # Number of beams for beam search +stop_words: List[str] = None # List of stop words +quant_method ['bnb', 'hqq', 'eetq', 'awq', 'gptq', 'aqlm'] # Quantization method for the model +quantization_bit [0, 1, 2, 3, 4, 8] # Default is 0, which means no quantization is used ``` 3. Example: ``` shell @@ -48,36 +48,36 @@ The following demonstrates using Python code to initiate inference with the Mini ```python import os -os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' # 设置显卡数 +os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' # Set the number of GPUs to use from swift.llm import ( get_model_tokenizer, get_template, inference, ModelType, get_default_template_type, inference_stream -) # 导入必要模块 +) # Import necessary modules -from swift.utils import seed_everything # 设置随机种子 +from swift.utils import seed_everything # Set random seed import torch model_type = ModelType.minicpm_v_v2_5_chat -template_type = get_default_template_type(model_type) # 获取模板类型,主要是用于特殊token的构造和图像的处理流程 +template_type = get_default_template_type(model_type) # Obtain the template type, primarily used for constructing special tokens and image processing workflow print(f'template_type: {template_type}') model, tokenizer = get_model_tokenizer(model_type, torch.bfloat16, - model_id_or_path='/root/ld/ld_model_pretrain/MiniCPM-Llama3-V-2_5', - model_kwargs={'device_map': 'auto'}) # 加载模型,并设置模型类型,模型路径,模型参数,设备分配等,计算精度等等 + model_id_or_path='/root/ld/ld_model_pretrain/MiniCPM-Llama3-V-2_5', + model_kwargs={'device_map': 'auto'}) # Load the model, set model type, model path, model parameters, device allocation, etc., computation precision, etc. model.generation_config.max_new_tokens = 256 -template = get_template(template_type, tokenizer) # 根据模版类型构造模板 +template = get_template(template_type, tokenizer) # Construct the template based on the template type seed_everything(42) -images = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/road.png'] # 图片地址 -query = '距离各城市多远?' -response, history = inference(model, template, query, images=images) # 推理获得结果 +images = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/road.png'] # Image URL +query = '距离各城市多远?' # Note: Query is still in Chinese, consider translating if needed +response, history = inference(model, template, query, images=images) # Obtain results through inference print(f'query: {query}') print(f'response: {response}') -# 流式 -query = '距离最远的城市是哪?' -gen = inference_stream(model, template, query, history, images=images) # 调用流式输出接口 +# Streaming output +query = '距离最远的城市是哪?' # Note: Query is still in Chinese, consider translating if needed +gen = inference_stream(model, template, query, history, images=images) # Call the streaming output interface print_idx = 0 print(f'query: {query}\nresponse: ', end='') for response, history in gen: @@ -92,9 +92,9 @@ print(f'history: {history}') SWIFT supports training on the local dataset,the training steps are as follows: 1. Make the train data like this: ```jsonl -{"query": "这张图片描述了什么", "response": "这张图片有一个大熊猫", "images": ["local_image_path"]} -{"query": "这张图片描述了什么", "response": "这张图片有一个大熊猫", "history": [], "images": ["image_path"]} -{"query": "竹子好吃么", "response": "看大熊猫的样子挺好吃呢", "history": [["这张图有什么", "这张图片有大熊猫"], ["大熊猫在干嘛", "吃竹子"]], "images": ["image_url"]} +{"query": "What does this picture describe?", "response": "This picture has a giant panda.", "images": ["local_image_path"]} +{"query": "What does this picture describe?", "response": "This picture has a giant panda.", "history": [], "images": ["image_path"]} +{"query": "Is bamboo tasty?", "response": "It seems pretty tasty judging by the panda's expression.", "history": [["What's in this picture?", "There's a giant panda in this picture."], ["What is the panda doing?", "Eating bamboo."]], "images": ["image_url"]} ``` 2. LoRA Tuning: