Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions course/en/chapter11/section3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"outputs": [],
"source": [
"# Install the requirements in Google Colab\n",
"# !pip install transformers datasets trl huggingface_hub\n",
"# !pip install transformers==4.57.3 datasets==4.4.2 trl==0.26.2 huggingface_hub\n",
"\n",
"# Authenticate to Hugging Face\n",
"\n",
Expand All @@ -44,7 +44,7 @@
"# Import necessary libraries\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"from datasets import load_dataset\n",
"from trl import SFTConfig, SFTTrainer, setup_chat_format\n",
"from trl import SFTConfig, SFTTrainer\n",
"import torch\n",
"\n",
"device = (\n",
Expand All @@ -60,8 +60,12 @@
").to(device)\n",
"tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)\n",
"\n",
"# Set up the chat format\n",
"model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)\n",
"# Tokenizer Chat Template\n",
"tokenizer.chat_template = (\n",
" \"{% for message in messages %}\"\n",
" \"{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}\"\n",
" \"{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}\"\n",
")\n",
"\n",
"# Set our name for the finetune to be saved &/ uploaded to\n",
"finetune_name = \"SmolLM2-FT-MyDataset\"\n",
Expand Down Expand Up @@ -153,20 +157,20 @@
" learning_rate=5e-5, # Common starting point for fine-tuning\n",
" logging_steps=10, # Frequency of logging training metrics\n",
" save_steps=100, # Frequency of saving model checkpoints\n",
" evaluation_strategy=\"steps\", # Evaluate the model at regular intervals\n",
" eval_strategy=\"steps\", # Evaluate the model at regular intervals\n",
" eval_steps=50, # Frequency of evaluation\n",
" use_mps_device=(\n",
" True if device == \"mps\" else False\n",
" ), # Use MPS for mixed precision training\n",
" hub_model_id=finetune_name, # Set a unique name for your model\n",
")\n",
"\n",
"# Initialize the SFTTrainer\n",
" # Initialize the SFTTrainer\n",
"trainer = SFTTrainer(\n",
" model=model,\n",
" args=sft_config,\n",
" train_dataset=ds[\"train\"],\n",
" tokenizer=tokenizer,\n",
" processing_class=tokenizer,\n",
" eval_dataset=ds[\"test\"],\n",
")\n",
"\n",
Expand Down
30 changes: 17 additions & 13 deletions course/en/chapter11/section4.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"outputs": [],
"source": [
"# Install the requirements in Google Colab\n",
"# !pip install transformers datasets trl huggingface_hub\n",
"# !pip install transformers==4.57.3 datasets==4.4.2 trl==0.26.2 peft==0.18.0 huggingface_hub\n",
"\n",
"# Authenticate to Hugging Face\n",
"\n",
Expand Down Expand Up @@ -135,7 +135,7 @@
"# Import necessary libraries\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"from datasets import load_dataset\n",
"from trl import SFTConfig, SFTTrainer, setup_chat_format\n",
"from trl import SFTConfig, SFTTrainer\n",
"import torch\n",
"\n",
"device = (\n",
Expand All @@ -152,8 +152,12 @@
").to(device)\n",
"tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)\n",
"\n",
"# Set up the chat format\n",
"model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)\n",
"# Tokenizer Chat Template\n",
"tokenizer.chat_template = (\n",
" \"{% for message in messages %}\"\n",
" \"{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}\"\n",
" \"{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}\"\n",
")\n",
"\n",
"# Set our name for the finetune to be saved &/ uploaded to\n",
"finetune_name = \"SmolLM2-FT-MyDataset\"\n",
Expand Down Expand Up @@ -223,6 +227,8 @@
},
"outputs": [],
"source": [
"max_seq_length = 1512 # max sequence length for model and packing of the dataset\n",
"\n",
"# Training configuration\n",
"# Hyperparameters based on QLoRA paper recommendations\n",
"args = SFTConfig(\n",
Expand Down Expand Up @@ -250,6 +256,12 @@
" # Integration settings\n",
" push_to_hub=False, # Don't push to HuggingFace Hub\n",
" report_to=\"none\", # Disable external logging\n",
" max_length=max_seq_length, # Maximum sequence length\n",
" packing=True, # Enable input packing for efficiency\n",
" dataset_kwargs={\n",
" \"add_special_tokens\": False, # Special tokens handled by template\n",
" \"append_concat_token\": False, # No additional separator needed\n",
" },\n",
")"
]
},
Expand All @@ -270,21 +282,13 @@
},
"outputs": [],
"source": [
"max_seq_length = 1512 # max sequence length for model and packing of the dataset\n",
"\n",
"# Create SFTTrainer with LoRA configuration\n",
"trainer = SFTTrainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=dataset[\"train\"],\n",
" peft_config=peft_config, # LoRA configuration\n",
" max_seq_length=max_seq_length, # Maximum sequence length\n",
" tokenizer=tokenizer,\n",
" packing=True, # Enable input packing for efficiency\n",
" dataset_kwargs={\n",
" \"add_special_tokens\": False, # Special tokens handled by template\n",
" \"append_concat_token\": False, # No additional separator needed\n",
" },\n",
" processing_class=tokenizer\n",
")"
]
},
Expand Down