update

dsikka · dsikka · commit b2a3a3d02903 · 2025-10-31T20:58:37.000Z
diff --git a/examples/quantization_w8a8_fp8/kimi_linear_fp8.py b/examples/quantization_w8a8_fp8/kimi_linear_fp8.py
@@ -1,4 +1,4 @@
-from transformers import AutoProcessor, AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
 
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier
@@ -7,8 +7,8 @@
 MODEL_ID = "//proving-grounds/engine/hub_cache/models--moonshotai--Kimi-Linear-48B-A3B-Instruct/snapshots/fd1de6347c9d3896f6df8edc529c68942bdd58f6"
 
 # Load model.
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
-processor = AutoProcessor.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 # Configure the quantization algorithm and scheme.
 # In this case, we:
@@ -28,6 +28,7 @@
 # Apply quantization.
 oneshot(model=model, recipe=recipe)
 
+"""
 print("========== SAMPLE GENERATION ==============")
 dispatch_for_generation(model)
 input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
@@ -36,8 +37,9 @@
 output = model.generate(input_ids, max_new_tokens=20)
 print(tokenizer.decode(output[0]))
 print("==========================================")
+"""
 
 # Save to disk in compressed-tensors format.
-SAVE_DIR = "/raid/engine/hub_cache/Kimi-Linear-48B-A3B-Instruct" + "-FP8-DYNAMIC"
+SAVE_DIR = "/proving-grounds/engine/hub_cache/Kimi-Linear-48B-A3B-Instruct" + "-FP8-DYNAMIC"
 model.save_pretrained(SAVE_DIR)
-processor.save_pretrained(SAVE_DIR)
+tokenizer.save_pretrained(SAVE_DIR)