Merge pull request #135 from MervinPraison/develop

MervinPraison · web-flow · commit 11518ea0d863 · 2024-08-05T16:50:20.000+01:00
praisonai train final release
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11-slim
 WORKDIR /app
 COPY . .
-RUN pip install flask praisonai==0.0.59rc11 gunicorn markdown
+RUN pip install flask praisonai==0.0.59 gunicorn markdown
 EXPOSE 8080
 CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]
diff --git a/docs/api/praisonai/deploy.html b/docs/api/praisonai/deploy.html
@@ -110,7 +110,7 @@ <h2 id="raises">Raises</h2>
             file.write(&#34;FROM python:3.11-slim\n&#34;)
             file.write(&#34;WORKDIR /app\n&#34;)
             file.write(&#34;COPY . .\n&#34;)
-            file.write(&#34;RUN pip install flask praisonai==0.0.59rc11 gunicorn markdown\n&#34;)
+            file.write(&#34;RUN pip install flask praisonai==0.0.59 gunicorn markdown\n&#34;)
             file.write(&#34;EXPOSE 8080\n&#34;)
             file.write(&#39;CMD [&#34;gunicorn&#34;, &#34;-b&#34;, &#34;0.0.0.0:8080&#34;, &#34;api:app&#34;]\n&#39;)
             
diff --git a/praisonai.rb b/praisonai.rb
@@ -3,7 +3,7 @@ class Praisonai < Formula
   
     desc "AI tools for various AI applications"
     homepage "https://github.com/MervinPraison/PraisonAI"
-    url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/0.0.59rc11.tar.gz"
+    url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/0.0.59.tar.gz"
     sha256 "1828fb9227d10f991522c3f24f061943a254b667196b40b1a3e4a54a8d30ce32"  # Replace with actual SHA256 checksum
     license "MIT"
   
diff --git a/praisonai/cli.py b/praisonai/cli.py
@@ -133,18 +133,24 @@ def main(self):
             package_root = os.path.dirname(os.path.abspath(__file__))
             config_yaml_destination = os.path.join(os.getcwd(), 'config.yaml')
 
-            # Generate config.yaml using the function
-            config = generate_config(
-                model_name=args.model,
-                hf_model_name=args.hf,
-                ollama_model_name=args.ollama,
-                dataset=[{
-                    "name": args.dataset
-                }]
-            )
-            with open('config.yaml', 'w') as f:
-                yaml.dump(config, f, default_flow_style=False, indent=2) 
-
+            # Create config.yaml only if it doesn't exist or --model or --dataset is provided
+            if not os.path.exists(config_yaml_destination) or args.model or args.dataset:
+                config = generate_config(
+                    model_name=args.model,
+                    hf_model_name=args.hf,
+                    ollama_model_name=args.ollama,
+                    dataset=[{
+                        "name": args.dataset
+                    }]
+                )
+                with open('config.yaml', 'w') as f:
+                    yaml.dump(config, f, default_flow_style=False, indent=2) 
+
+            # Overwrite huggingface_save and ollama_save if --hf or --ollama are provided 
+            if args.hf:
+                config["huggingface_save"] = "true"
+            if args.ollama:
+                config["ollama_save"] = "true"
 
             if 'init' in sys.argv:
                 from praisonai.setup.setup_conda_env import main as setup_conda_main
diff --git a/praisonai/deploy.py b/praisonai/deploy.py
@@ -56,7 +56,7 @@ def create_dockerfile(self):
             file.write("FROM python:3.11-slim\n")
             file.write("WORKDIR /app\n")
             file.write("COPY . .\n")
-            file.write("RUN pip install flask praisonai==0.0.59rc11 gunicorn markdown\n")
+            file.write("RUN pip install flask praisonai==0.0.59 gunicorn markdown\n")
             file.write("EXPOSE 8080\n")
             file.write('CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]\n')
             
diff --git a/praisonai/inc/config.py b/praisonai/inc/config.py
@@ -38,9 +38,9 @@ def generate_config(
     """Generates the configuration for PraisonAI with dynamic overrides."""
 
     config = {
-        "ollama_save": ollama_save or "false",
-        "huggingface_save": huggingface_save or "false",
-        "train": train or "false",
+        "ollama_save": ollama_save or "true",
+        "huggingface_save": huggingface_save or "true",
+        "train": train or "true",
 
         "model_name": model_name or "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
         "hf_model_name": hf_model_name or "mervinpraison/llama-3.1-tamilan-8B-test",
diff --git a/praisonai/setup/config.yaml b/praisonai/setup/config.yaml
@@ -10,7 +10,7 @@ model_parameters: "8b"
 max_seq_length: 2048
 load_in_4bit: true
 lora_r: 16
-lora_target_modules: 
+lora_target_modules:
   - "q_proj"
   - "k_proj"
   - "v_proj"
diff --git a/praisonai/train.py b/praisonai/train.py
@@ -142,6 +142,8 @@ def train_model(self):
             ),
         )
         trainer.train()
+        self.model.save_pretrained("lora_model") # Local saving
+        self.tokenizer.save_pretrained("lora_model")
 
     def inference(self, instruction, input_text):
         FastLanguageModel.for_inference(self.model)
@@ -158,6 +160,17 @@ def inference(self, instruction, input_text):
         inputs = self.tokenizer([alpaca_prompt.format(instruction, input_text, "")], return_tensors="pt").to("cuda")
         outputs = self.model.generate(**inputs, max_new_tokens=64, use_cache=True)
         print(self.tokenizer.batch_decode(outputs))
+        
+    def load_model(self):
+        """Loads the model and tokenizer using the FastLanguageModel library."""
+        from unsloth import FastLanguageModel
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name=self.config["output_dir"],
+            max_seq_length=2048,
+            dtype=None,
+            load_in_4bit=self.config["load_in_4bit"],
+        )
+        return model, tokenizer
 
     def save_model_merged(self):
         if os.path.exists(self.config["hf_model_name"]):
@@ -176,9 +189,22 @@ def push_model_gguf(self):
             quantization_method=self.config["quantization_method"],
             token=os.getenv('HF_TOKEN')
         )
+    
+    def save_model_gguf(self):
+        self.model.save_pretrained_gguf(
+            self.config["hf_model_name"],
+            self.tokenizer,
+            quantization_method="q4_k_m"
+        )
 
     def prepare_modelfile_content(self):
         output_model = self.config["hf_model_name"]
+        gguf_path = f"{output_model}/unsloth.Q4_K_M.gguf"
+
+        # Check if the GGUF file exists. If not, generate it ## TODO Multiple Quantisation other than Q4_K_M.gguf
+        if not os.path.exists(gguf_path):
+            self.model, self.tokenizer = self.load_model()
+            self.save_model_gguf()
         return f"""FROM {output_model}/unsloth.Q4_K_M.gguf
 
 TEMPLATE \"\"\"Below are some instructions that describe some tasks. Write responses that appropriately complete each request.{{{{ if .Prompt }}}}
@@ -215,10 +241,20 @@ def run(self):
             self.train_model()
 
         if self.config.get("huggingface_save", "true").lower() == "true":
+            self.model, self.tokenizer = self.load_model()
             self.save_model_merged()
 
         if self.config.get("huggingface_save_gguf", "true").lower() == "true":
+            self.model, self.tokenizer = self.load_model()
             self.push_model_gguf()
+            
+        # if self.config.get("save_gguf", "true").lower() == "true": ## TODO
+        #     self.model, self.tokenizer = self.load_model()
+        #     self.save_model_gguf()
+        
+        # if self.config.get("save_merged", "true").lower() == "true": ## TODO
+        #     self.model, self.tokenizer = self.load_model()
+        #     self.save_model_merged()
 
         if self.config.get("ollama_save", "true").lower() == "true":
             self.create_and_push_ollama_model()
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "PraisonAI"
-version = "0.0.59rc11"
+version = "0.0.59"
 description = "PraisonAI application combines AutoGen and CrewAI or similar frameworks into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customization, and efficient human-agent collaboration."
 authors = ["Mervin Praison"]
 license = ""