fix lint

2026-02-04 17:39:25 +08:00 · 2025-07-29 08:39:41 +00:00
parent 1b8d194b67
commit 07cbc51cd1
8 changed files with 165 additions and 157 deletions
--- a/runtime/triton_trtllm/scripts/convert_checkpoint.py
+++ b/runtime/triton_trtllm/scripts/convert_checkpoint.py
@@ -35,8 +35,7 @@ def parse_arguments():
        type=str,
        default='auto',
        choices=['auto', 'float16', 'bfloat16', 'float32'],
-        help=
-        "The data type for the model weights and activations if not quantized. "
+        help="The data type for the model weights and activations if not quantized. "
        "If 'auto', the data type is automatically inferred from the source model; "
        "however, if the source dtype is float32, it is converted to float16.")
    parser.add_argument(
@@ -49,8 +48,7 @@ def parse_arguments():
        '--disable_weight_only_quant_plugin',
        default=False,
        action="store_true",
-        help=
-        'By default, using plugin implementation for weight quantization. Enabling disable_weight_only_quant_plugin flag will use ootb implementation instead of plugin.'
+        help='By default, using plugin implementation for weight quantization. Enabling disable_weight_only_quant_plugin flag will use ootb implementation instead of plugin.'
        'You must also use --use_weight_only for that argument to have an impact.'
    )
    parser.add_argument(
@@ -60,16 +58,14 @@ def parse_arguments():
        nargs='?',
        default='int8',
        choices=['int8', 'int4', 'int4_gptq'],
-        help=
-        'Define the precision for the weights when using weight-only quantization.'
+        help='Define the precision for the weights when using weight-only quantization.'
        'You must also use --use_weight_only for that argument to have an impact.'
    )
    parser.add_argument(
        '--calib_dataset',
        type=str,
        default='ccdv/cnn_dailymail',
-        help=
-        "The huggingface dataset name or the local directory of the dataset for calibration."
+        help="The huggingface dataset name or the local directory of the dataset for calibration."
    )
    parser.add_argument(
        "--smoothquant",
@@ -83,31 +79,27 @@ def parse_arguments():
        '--per_channel',
        action="store_true",
        default=False,
-        help=
-        'By default, we use a single static scaling factor for the GEMM\'s result. '
+        help='By default, we use a single static scaling factor for the GEMM\'s result. '
        'per_channel instead uses a different static scaling factor for each channel. '
        'The latter is usually more accurate, but a little slower.')
    parser.add_argument(
        '--per_token',
        action="store_true",
        default=False,
-        help=
-        'By default, we use a single static scaling factor to scale activations in the int8 range. '
+        help='By default, we use a single static scaling factor to scale activations in the int8 range. '
        'per_token chooses at run time, and for each token, a custom scaling factor. '
        'The latter is usually more accurate, but a little slower.')
    parser.add_argument(
        '--int8_kv_cache',
        default=False,
        action="store_true",
-        help=
-        'By default, we use dtype for KV cache. int8_kv_cache chooses int8 quantization for KV'
+        help='By default, we use dtype for KV cache. int8_kv_cache chooses int8 quantization for KV'
    )
    parser.add_argument(
        '--per_group',
        default=False,
        action="store_true",
-        help=
-        'By default, we use a single static scaling factor to scale weights in the int4 range. '
+        help='By default, we use a single static scaling factor to scale weights in the int4 range. '
        'per_group chooses at run time, and for each group, a custom scaling factor. '
        'The flag is built for GPTQ/AWQ quantization.')

@@ -121,16 +113,14 @@ def parse_arguments():
        '--use_parallel_embedding',
        action="store_true",
        default=False,
-        help=
-        'By default embedding parallelism is disabled. By setting this flag, embedding parallelism is enabled'
+        help='By default embedding parallelism is disabled. By setting this flag, embedding parallelism is enabled'
    )
    parser.add_argument(
        '--embedding_sharding_dim',
        type=int,
        default=0,
        choices=[0, 1],
-        help=
-        'By default the embedding lookup table is sharded along vocab dimension (embedding_sharding_dim=0). '
+        help='By default the embedding lookup table is sharded along vocab dimension (embedding_sharding_dim=0). '
        'To shard it along hidden dimension, set embedding_sharding_dim=1'
        'Note: embedding sharing is only enabled when embedding_sharding_dim = 0'
    )
@@ -147,15 +137,13 @@ def parse_arguments():
        '--moe_tp_size',
        type=int,
        default=-1,
-        help=
-        'N-way tensor parallelism size for MOE, default is tp_size, which will do tp-only for MoE'
+        help='N-way tensor parallelism size for MOE, default is tp_size, which will do tp-only for MoE'
    )
    parser.add_argument(
        '--moe_ep_size',
        type=int,
        default=-1,
-        help=
-        'N-way expert parallelism size for MOE, default is 1, which will do tp-only for MoE'
+        help='N-way expert parallelism size for MOE, default is 1, which will do tp-only for MoE'
    )
    args = parser.parse_args()
    return args
@@ -249,7 +237,7 @@ def convert_and_save_hf(args):
                                               trust_remote_code=True)
        quant_config, override_fields = update_quant_config_from_hf(
            quant_config, hf_config, override_fields)
-    except:
+    except BaseException:
        logger.warning("AutoConfig cannot load the huggingface config.")

    if args.smoothquant is not None or args.int8_kv_cache:
@@ -339,4 +327,4 @@ def main():


 if __name__ == '__main__':
-    main()
+    main()
--- a/runtime/triton_trtllm/scripts/fill_template.py
+++ b/runtime/triton_trtllm/scripts/fill_template.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python3
+# /usr/bin/env python3
 from argparse import ArgumentParser
 from string import Template

@@ -59,8 +59,7 @@ if __name__ == "__main__":
    parser.add_argument("file_path", help="path of the .pbtxt to modify")
    parser.add_argument(
        "substitutions",
-        help=
-        "substitutions to perform, in the format variable_name_1:value_1,variable_name_2:value_2..."
+        help="substitutions to perform, in the format variable_name_1:value_1,variable_name_2:value_2..."
    )
    parser.add_argument("--in_place",
                        "-i",
--- a/runtime/triton_trtllm/scripts/test_llm.py
+++ b/runtime/triton_trtllm/scripts/test_llm.py
@@ -46,7 +46,6 @@ def parse_arguments(args=None):
    parser.add_argument('--top_k', type=int, default=50)
    parser.add_argument('--top_p', type=float, default=0.95)

-
    return parser.parse_args(args=args)


@@ -60,7 +59,7 @@ def parse_input(tokenizer,
        input_ids = tokenizer.encode(
            curr_text)
        batch_input_ids.append(input_ids)
- 
+
    batch_input_ids = [
        torch.tensor(x, dtype=torch.int32) for x in batch_input_ids
    ]