Merge pull request #831 from project-anuvaad/wfmrefactoring

MrigankTarento · web-flow · commit 1318fa8943f6 · 2023-11-09T12:58:38.000+05:30
Bulk Call - Auto Translation Completed Fix
diff --git a/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/configs/wfmconfig.py b/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/configs/wfmconfig.py
@@ -81,3 +81,8 @@
 #Specific variables
 granularity_list = ["manualEditingStartTime","manualEditingEndTime","parallelDocumentUpload","reviewerInProgress","reviewerCompleted"]
 workflowCodesTranslation = ["DP_WFLOW_FBT","WF_A_FCBMTKTR","DP_WFLOW_FBTTR","WF_A_FTTKTR"]
+
+#Service URLs
+DOCUMENT_CONVERTER_SERVER_URL=os.environ.get("DOCUMENT_CONVERTER_SERVER_URL","http://anuvaad-etl-document-converter:5001/")
+ZUUL_ROUTES_FU_URL = os.environ.get("ZUUL_ROUTES_FU_URL","http://anuvaad-suploader:5001/")
+ZUUL_ROUTES_WFM_URL = os.environ.get("ZUUL_ROUTES_WFM_URL","http://anuvaad-etl-wf-manager:5001/")
diff --git a/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/controller/wfmcontroller.py b/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/controller/wfmcontroller.py
@@ -203,6 +203,19 @@ def active_docs():
         log_exception("Something went wrong: " + str(e), None, e)
         return {"status": "FAILED", "message": "Something went wrong"}, 400
 
+@wfmapp.route(context_path + '/v1/translate_pipeline', methods=["POST"])
+def translate_pipeline():
+    try:
+        service = WFMService()
+        data = add_headers(request.get_json(), request)
+        response = service.digitization_translation_pipeline(data)
+        if not response:
+            return {"response": response}, 400
+        return {"response": response}, 200
+    except Exception as e:
+        log_exception("Something went wrong: " + str(e), None, e)
+        return {"status": "FAILED", "message": "Something went wrong"}, 400
+
 # Fetches required headers from the request and adds it to the body.
 def add_headers(data, api_request):
     headers = {
diff --git a/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/service/pipelinecalls.py b/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/service/pipelinecalls.py
@@ -0,0 +1,91 @@
+from anuvaad_auditor import log_info, log_error
+import json
+import requests
+from configs.wfmconfig import app_context
+from configs.wfmconfig import DOCUMENT_CONVERTER_SERVER_URL, ZUUL_ROUTES_FU_URL, ZUUL_ROUTES_WFM_URL
+import traceback
+
+class PipelineCalls:
+
+    def document_export(self,user_id,record_id,filetype,headers):
+        log_info("Performing Document Export",app_context)
+        payload = json.dumps({
+        "record_id": record_id,
+        "user_id": user_id,
+        "file_type": filetype
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        try:
+            url = DOCUMENT_CONVERTER_SERVER_URL + "anuvaad-etl/document-converter/v0/document-exporter"
+            response = requests.request("POST", url, headers=headers, data=payload)
+            log_info(f"Document Export Response {response.status_code}",app_context)
+            if response.status_code >=200 and response.status_code <=204:
+                return response.json()["translated_document"]
+        except Exception as e:
+            log_error(f"Error during document conversion : {traceback.format_exc()}",app_context,e)
+
+    def download_file(self,download_path):
+        log_info("Performing File Download",app_context)
+        url = ZUUL_ROUTES_FU_URL + "anuvaad-api/file-uploader/v0/serve-file?filename=" + download_path
+        try:
+            response = requests.request("GET", url)
+            if response.status_code >=200 and response.status_code <=204:
+                return response.content
+        except Exception as e:
+            log_error(f"Error during file download : {traceback.format_exc()}",app_context,e)
+
+
+    def upload_files(self,filepath,headers):
+        # hit upload_file api and fetch file_id
+        request_headers = {
+            "x-user-id": headers["userID"],
+            "x-org-id": headers["orgID"],
+            "x-roles": headers["roles"],
+            "x-request-id": headers["requestID"],
+            "x-session-id": headers["sessionID"]
+        }
+        log_info("Performing Upload File",app_context)
+        try:
+            uploadfiles_body = {
+                'file': open(filepath,'rb')
+            }
+            url = ZUUL_ROUTES_FU_URL + "anuvaad-api/file-uploader/v0/upload-file"
+            req = requests.post(timeout=120,url=url,files=uploadfiles_body,headers=request_headers)
+            if req.status_code >=200 and req.status_code <=204:
+                file_id = req.json()["data"]
+                return file_id
+            else:
+                return None
+        except requests.exceptions.RequestException as e:
+            log_error(f"Error during document conversion : {traceback.format_exc()}",app_context,e)
+
+    def translate(self,file_name,file_id,payload,headers):
+
+        request_headers = {
+            "x-user-id": headers["userID"],
+            "x-org-id": headers["orgID"],
+            "x-roles": headers["roles"],
+            "x-request-id": headers["requestID"],
+            "x-session-id": headers["sessionID"]
+        }
+    
+        payload["jobName"] = file_name
+        payload["files"][0]["path"] = file_id
+        payload["files"][0]["type"] = file_id.split()[-1]
+
+        # Perform translation
+        log_info(f"Performing Translation {file_id}",app_context)
+        asyncwf_body = payload
+        try:
+            url = ZUUL_ROUTES_WFM_URL+"anuvaad-etl/wf-manager/v1/workflow/async/initiate"
+            req = requests.post(timeout=120,url=url,json=asyncwf_body, headers=request_headers)
+            if req.status_code >=200 and req.status_code <=204:
+                resp = req.json()
+                return resp
+            else:
+                return None
+        except requests.exceptions.RequestException as e:
+            log_error(f"Error during file download : {traceback.format_exc()}",app_context,e)
+
diff --git a/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/service/wfmservice.py b/anuvaad-etl/anuvaad-workflow-mgr/etl-wf-manager/service/wfmservice.py
@@ -10,6 +10,7 @@
 from anuvaad_auditor.errorhandler import post_error_wf, post_error, log_exception
 from anuvaad_auditor.loghandler import log_info, log_error
 from repository.redisrepo import REDISRepository
+from service.pipelinecalls import PipelineCalls
 from configs.wfmconfig import app_context, workflowCodesTranslation
 import datetime 
 
@@ -19,6 +20,7 @@
 wfmutils = WFMUtils()
 validator = WFMValidator()
 redisRepo = REDISRepository()
+pipelineCalls = PipelineCalls()
 
 class WFMService:
     def __init__(self):
@@ -389,6 +391,9 @@ def get_job_details_bulk(self, req_criteria, skip_pagination, isReviewer=False):
                             jobIDs.append(jobID)
                     if len(jobIDs) > 0:
                         criteria["jobID"] = {"$in": jobIDs}
+            if 'inputFileName' in req_criteria.keys():
+                jobName_pattern = req_criteria["inputFileName"]
+                criteria["input.jobName"] = {"$regex": jobName_pattern}
             if 'orgIDs' in req_criteria.keys():
                 if req_criteria["orgIDs"]:
                     orgIDs = []
@@ -404,7 +409,13 @@ def get_job_details_bulk(self, req_criteria, skip_pagination, isReviewer=False):
                         if currentStat:
                             currentStatus.append(currentStat)
                     if len(currentStatus) > 0:
-                        criteria["granularity.currentStatus"] = {"$in": currentStatus}
+                        if "auto_translation_completed" in currentStatus:
+                            criteria["$or"] = [
+                                {"granularity.currentStatus": {"$in": currentStatus}},
+                                {"granularity.currentStatus": {"$exists": False}}
+                            ]
+                        else:
+                            criteria["granularity.currentStatus"] = {"$in": currentStatus}
             if 'filterByStartTime' in req_criteria.keys():
                 if 'startTimeStamp' in req_criteria['filterByStartTime'].keys() and 'endTimeStamp' in req_criteria['filterByStartTime'].keys():
                             criteria["startTime"] = { "$gte": req_criteria['filterByStartTime']['startTimeStamp'], "$lte": req_criteria['filterByStartTime']['endTimeStamp']}
@@ -581,4 +592,92 @@ def get_active_doc_count(self):
             response = redisRepo.get_active_count()
             return response
         except Exception as e:
-            log_exception("Active Job Status Retrieval: {wf_async_input['jobID']} " + str(e), None, e)
+            log_exception("Active Job Status Retrieval: {wf_async_input['jobID']} " + str(e), None, e)
+
+    def digitization_translation_pipeline(self,data):
+        """
+                {
+                    "record_id": "A_FWLBOD20TESOT-hnHgN-1693227866067%7C0-16932280318450673.json",
+                    "user_id": "d225fb2cd78a45078518356548f396ff1686290705872",
+                    "file_type": "pdf",
+                    "file_name": "name.pdf"
+                    "translation_async_flow" : {
+                        "workflowCode": "WF_A_FCBMTKTR",
+                        "jobName": "1958_1_1150_1155_updated.pdf",
+                        "jobDescription": "",
+                        "files": [
+                            {
+                                "path": "01c440b8-8aac-4352-9b44-c3fbf1ddca6e.pdf",
+                                "type": "pdf",
+                                "locale": "en",
+                                "model": {
+                                    "uuid": "687baea0-4512-4fb9-9264-5c7b368afc59",
+                                    "is_primary": true,
+                                    "model_id": 103,
+                                    "model_name": "English-Hindi IndicTrans Model-1",
+                                    "source_language_code": "en",
+                                    "source_language_name": "English",
+                                    "target_language_code": "hi",
+                                    "target_language_name": "Hindi",
+                                    "description": "AAI4B en-hi model-1(indictrans/fairseq)",
+                                    "status": "ACTIVE",
+                                    "connection_details": {
+                                        "kafka": {
+                                            "input_topic": "KAFKA_AAI4B_NMT_TRANSLATION_INPUT_TOPIC",
+                                            "output_topic": "KAFKA_AAI4B_NMT_TRANSLATION_OUTPUT_TOPIC"
+                                        },
+                                        "translation": {
+                                            "api_endpoint": "AAIB_NMT_TRANSLATE_ENDPOINT",
+                                            "host": "AAI4B_NMT_HOST"
+                                        },
+                                        "interactive": {
+                                            "api_endpoint": "AAIB_NMT_IT_ENDPOINT",
+                                            "host": "AAI4B_NMT_HOST"
+                                        }
+                                    },
+                                    "interactive_translation": true
+                                },
+                                "context": "JUDICIARY",
+                                "modifiedSentences": "a"
+                            }
+                        ]
+                    }
+                }
+        """
+        try:
+            if "record_id" not in data.keys():
+                return {"status" : "Error", "reason":"record_id missing"}
+            if data["file_type"] in ["jpg","bmp","png","svg","jpeg"]:
+                data["record_id"] = data["record_id"].replace("%7C","|")
+                data["file_type"] = "pdf"
+                data["file_name"] = data["file_name"].replace(data["file_name"].split(".")[-1],"pdf")
+
+            document = pipelineCalls.document_export(data["user_id"],data["record_id"],data["file_type"],data["metadata"])
+            if document is None:
+                return {"status":"Error","reason":"Document Export Failed"}
+            file_content = pipelineCalls.download_file(document)
+            if file_content is None:
+                return {"status":"Error","reason":"File Download Failed"}
+            
+            if not os.path.exists("upload_files"):
+                # If it doesn't exist, create it
+                os.makedirs("upload_files")
+
+            with open("./upload_files/"+data["file_name"], "wb") as file:
+                file.write(file_content)
+            
+            file_id = pipelineCalls.upload_files("./upload_files/"+data["file_name"],data["metadata"])
+            if file_id is None:
+                return {"status":"Error","reason":"File Upload Failed"}
+
+            # Delete uploaded file
+            try:
+                os.remove("./upload_files/"+data["file_name"])
+            except Exception as e:
+                log_error(f"Exception during file deletion",app_context,e)        
+
+            response = pipelineCalls.translate(data["file_name"],file_id,data["translation_async_flow"],data["metadata"])
+            return response
+        except Exception as e:
+            log_error(f"Exception occurred {e}",e,app_context)
+