Skip to content

Commit 048f57f

Browse files
authored
add EP in PTQ (#15015)
Signed-off-by: jenchen13 <[email protected]>
1 parent 2224289 commit 048f57f

File tree

2 files changed

+4
-0
lines changed

2 files changed

+4
-0
lines changed

nemo/collections/llm/api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,7 @@ def ptq(
471471
export_config: ExportConfig,
472472
calibration_tp: int = 1,
473473
calibration_pp: int = 1,
474+
calibration_ep: int = 1,
474475
num_layers_in_first_pipeline_stage: int | None = None,
475476
num_layers_in_last_pipeline_stage: int | None = None,
476477
devices: int | None = None,
@@ -558,6 +559,7 @@ def ptq(
558559
pipeline_model_parallel_size=calibration_pp,
559560
num_layers_in_first_pipeline_stage=num_layers_in_first_pipeline_stage,
560561
num_layers_in_last_pipeline_stage=num_layers_in_last_pipeline_stage,
562+
expert_model_parallel_size=calibration_ep,
561563
devices=devices,
562564
num_nodes=num_nodes,
563565
inference_only=True,

scripts/llm/ptq.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def get_args():
3434
)
3535
parser.add_argument("--decoder_type", type=str, help="Decoder type for TensorRT-Model-Optimizer")
3636
parser.add_argument("-ctp", "--calibration_tp", "--calib_tp", type=int, default=1)
37+
parser.add_argument("-cep", "--calibration_ep", "--calib_ep", type=int, default=1)
3738
parser.add_argument("-cpp", "--calibration_pp", "--calib_pp", type=int, default=1)
3839
parser.add_argument(
3940
"--num_layers_in_first_pipeline_stage",
@@ -167,6 +168,7 @@ def main():
167168
export_config=export_config,
168169
calibration_tp=args.calibration_tp,
169170
calibration_pp=args.calibration_pp,
171+
calibration_ep=args.calibration_ep,
170172
num_layers_in_first_pipeline_stage=args.num_layers_in_first_pipeline_stage,
171173
num_layers_in_last_pipeline_stage=args.num_layers_in_last_pipeline_stage,
172174
devices=args.devices,

0 commit comments

Comments
 (0)