|
1 | | -from transformers import AutoProcessor, AutoModelForCausalLM |
| 1 | +from transformers import AutoTokenizer, AutoModelForCausalLM |
2 | 2 |
|
3 | 3 | from llmcompressor import oneshot |
4 | 4 | from llmcompressor.modifiers.quantization import QuantizationModifier |
|
7 | 7 | MODEL_ID = "//proving-grounds/engine/hub_cache/models--moonshotai--Kimi-Linear-48B-A3B-Instruct/snapshots/fd1de6347c9d3896f6df8edc529c68942bdd58f6" |
8 | 8 |
|
9 | 9 | # Load model. |
10 | | -model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto") |
11 | | -processor = AutoProcessor.from_pretrained(MODEL_ID) |
| 10 | +model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", trust_remote_code=True) |
| 11 | +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
12 | 12 |
|
13 | 13 | # Configure the quantization algorithm and scheme. |
14 | 14 | # In this case, we: |
|
28 | 28 | # Apply quantization. |
29 | 29 | oneshot(model=model, recipe=recipe) |
30 | 30 |
|
| 31 | +""" |
31 | 32 | print("========== SAMPLE GENERATION ==============") |
32 | 33 | dispatch_for_generation(model) |
33 | 34 | input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to( |
|
36 | 37 | output = model.generate(input_ids, max_new_tokens=20) |
37 | 38 | print(tokenizer.decode(output[0])) |
38 | 39 | print("==========================================") |
| 40 | +""" |
39 | 41 |
|
40 | 42 | # Save to disk in compressed-tensors format. |
41 | | -SAVE_DIR = "/raid/engine/hub_cache/Kimi-Linear-48B-A3B-Instruct" + "-FP8-DYNAMIC" |
| 43 | +SAVE_DIR = "/proving-grounds/engine/hub_cache/Kimi-Linear-48B-A3B-Instruct" + "-FP8-DYNAMIC" |
42 | 44 | model.save_pretrained(SAVE_DIR) |
43 | | -processor.save_pretrained(SAVE_DIR) |
| 45 | +tokenizer.save_pretrained(SAVE_DIR) |
0 commit comments