diff --git a/course/en/chapter11/section3.ipynb b/course/en/chapter11/section3.ipynb index 7dd5cdd8b..cf2519302 100644 --- a/course/en/chapter11/section3.ipynb +++ b/course/en/chapter11/section3.ipynb @@ -25,7 +25,7 @@ "outputs": [], "source": [ "# Install the requirements in Google Colab\n", - "# !pip install transformers datasets trl huggingface_hub\n", + "# !pip install transformers==4.57.3 datasets==4.4.2 trl==0.26.2 huggingface_hub\n", "\n", "# Authenticate to Hugging Face\n", "\n", @@ -44,7 +44,7 @@ "# Import necessary libraries\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from datasets import load_dataset\n", - "from trl import SFTConfig, SFTTrainer, setup_chat_format\n", + "from trl import SFTConfig, SFTTrainer\n", "import torch\n", "\n", "device = (\n", @@ -60,8 +60,12 @@ ").to(device)\n", "tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)\n", "\n", - "# Set up the chat format\n", - "model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)\n", + "# Tokenizer Chat Template\n", + "tokenizer.chat_template = (\n", + " \"{% for message in messages %}\"\n", + " \"{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}\"\n", + " \"{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}\"\n", + ")\n", "\n", "# Set our name for the finetune to be saved &/ uploaded to\n", "finetune_name = \"SmolLM2-FT-MyDataset\"\n", @@ -153,7 +157,7 @@ " learning_rate=5e-5, # Common starting point for fine-tuning\n", " logging_steps=10, # Frequency of logging training metrics\n", " save_steps=100, # Frequency of saving model checkpoints\n", - " evaluation_strategy=\"steps\", # Evaluate the model at regular intervals\n", + " eval_strategy=\"steps\", # Evaluate the model at regular intervals\n", " eval_steps=50, # Frequency of evaluation\n", " use_mps_device=(\n", " True if device == \"mps\" else False\n", @@ -161,12 +165,12 @@ " hub_model_id=finetune_name, # Set a unique name for your model\n", ")\n", "\n", - "# Initialize the SFTTrainer\n", + " # Initialize the SFTTrainer\n", "trainer = SFTTrainer(\n", " model=model,\n", " args=sft_config,\n", " train_dataset=ds[\"train\"],\n", - " tokenizer=tokenizer,\n", + " processing_class=tokenizer,\n", " eval_dataset=ds[\"test\"],\n", ")\n", "\n", diff --git a/course/en/chapter11/section4.ipynb b/course/en/chapter11/section4.ipynb index 9028d4771..94aedea90 100644 --- a/course/en/chapter11/section4.ipynb +++ b/course/en/chapter11/section4.ipynb @@ -41,7 +41,7 @@ "outputs": [], "source": [ "# Install the requirements in Google Colab\n", - "# !pip install transformers datasets trl huggingface_hub\n", + "# !pip install transformers==4.57.3 datasets==4.4.2 trl==0.26.2 peft==0.18.0 huggingface_hub\n", "\n", "# Authenticate to Hugging Face\n", "\n", @@ -135,7 +135,7 @@ "# Import necessary libraries\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from datasets import load_dataset\n", - "from trl import SFTConfig, SFTTrainer, setup_chat_format\n", + "from trl import SFTConfig, SFTTrainer\n", "import torch\n", "\n", "device = (\n", @@ -152,8 +152,12 @@ ").to(device)\n", "tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)\n", "\n", - "# Set up the chat format\n", - "model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)\n", + "# Tokenizer Chat Template\n", + "tokenizer.chat_template = (\n", + " \"{% for message in messages %}\"\n", + " \"{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}\"\n", + " \"{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}\"\n", + ")\n", "\n", "# Set our name for the finetune to be saved &/ uploaded to\n", "finetune_name = \"SmolLM2-FT-MyDataset\"\n", @@ -223,6 +227,8 @@ }, "outputs": [], "source": [ + "max_seq_length = 1512 # max sequence length for model and packing of the dataset\n", + "\n", "# Training configuration\n", "# Hyperparameters based on QLoRA paper recommendations\n", "args = SFTConfig(\n", @@ -250,6 +256,12 @@ " # Integration settings\n", " push_to_hub=False, # Don't push to HuggingFace Hub\n", " report_to=\"none\", # Disable external logging\n", + " max_length=max_seq_length, # Maximum sequence length\n", + " packing=True, # Enable input packing for efficiency\n", + " dataset_kwargs={\n", + " \"add_special_tokens\": False, # Special tokens handled by template\n", + " \"append_concat_token\": False, # No additional separator needed\n", + " },\n", ")" ] }, @@ -270,21 +282,13 @@ }, "outputs": [], "source": [ - "max_seq_length = 1512 # max sequence length for model and packing of the dataset\n", - "\n", "# Create SFTTrainer with LoRA configuration\n", "trainer = SFTTrainer(\n", " model=model,\n", " args=args,\n", " train_dataset=dataset[\"train\"],\n", " peft_config=peft_config, # LoRA configuration\n", - " max_seq_length=max_seq_length, # Maximum sequence length\n", - " tokenizer=tokenizer,\n", - " packing=True, # Enable input packing for efficiency\n", - " dataset_kwargs={\n", - " \"add_special_tokens\": False, # Special tokens handled by template\n", - " \"append_concat_token\": False, # No additional separator needed\n", - " },\n", + " processing_class=tokenizer\n", ")" ] },