Skip to content

Commit b2a3a3d

Browse files
committed
update
1 parent ae47c45 commit b2a3a3d

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

examples/quantization_w8a8_fp8/kimi_linear_fp8.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from transformers import AutoProcessor, AutoModelForCausalLM
1+
from transformers import AutoTokenizer, AutoModelForCausalLM
22

33
from llmcompressor import oneshot
44
from llmcompressor.modifiers.quantization import QuantizationModifier
@@ -7,8 +7,8 @@
77
MODEL_ID = "//proving-grounds/engine/hub_cache/models--moonshotai--Kimi-Linear-48B-A3B-Instruct/snapshots/fd1de6347c9d3896f6df8edc529c68942bdd58f6"
88

99
# Load model.
10-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
11-
processor = AutoProcessor.from_pretrained(MODEL_ID)
10+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", trust_remote_code=True)
11+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
1212

1313
# Configure the quantization algorithm and scheme.
1414
# In this case, we:
@@ -28,6 +28,7 @@
2828
# Apply quantization.
2929
oneshot(model=model, recipe=recipe)
3030

31+
"""
3132
print("========== SAMPLE GENERATION ==============")
3233
dispatch_for_generation(model)
3334
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
@@ -36,8 +37,9 @@
3637
output = model.generate(input_ids, max_new_tokens=20)
3738
print(tokenizer.decode(output[0]))
3839
print("==========================================")
40+
"""
3941

4042
# Save to disk in compressed-tensors format.
41-
SAVE_DIR = "/raid/engine/hub_cache/Kimi-Linear-48B-A3B-Instruct" + "-FP8-DYNAMIC"
43+
SAVE_DIR = "/proving-grounds/engine/hub_cache/Kimi-Linear-48B-A3B-Instruct" + "-FP8-DYNAMIC"
4244
model.save_pretrained(SAVE_DIR)
43-
processor.save_pretrained(SAVE_DIR)
45+
tokenizer.save_pretrained(SAVE_DIR)

0 commit comments

Comments
 (0)