Implemented frog selection options #25

proycon · proycon · commit 5b777470dbf1 · 2018-04-10T16:11:12.000+02:00
diff --git a/webservice/picclservice/picclservice.py b/webservice/picclservice/picclservice.py
@@ -396,8 +396,13 @@
         ChoiceParameter('distance','How many edits?','Search a distance of N characters for variants',choices=[('2','Up to two edits'),('1','Only one edit')]) #old TICCL -L
     ]),
     ('Automatic Linguistic Enrichment', [
-        BooleanParameter('tok','Perform Tokenisation',"Perform tokenisation."),
-        BooleanParameter('frog','Perform Linguistic Enrichment',"Performs tokenisation, Part-of-Speech tagging, lemmatisation, named entity recognition and more. This is implemented only for Dutch (uses Frog)!!!")
+        BooleanParameter('tok','Tokenisation',"Perform tokenisation", default=True),
+        BooleanParameter('pos','Part-of-Speech Tagging',"Part-of-speech Tagging (for Dutch only!)",default=True),
+        BooleanParameter('lemma','Lemmatisation',"Lemmatisation (for Dutch only!)", default=True),
+        BooleanParameter('morph','Morphological Analysis',"Morphological Analysis (for Dutch only!)", default=False),
+        BooleanParameter('ner','Named Entity Recognition',"Named Entity Recognition", default=False),
+        BooleanParameter('parser','Dependency Parser',"Dependency parser (for Dutch only!)", default=False),
+        BooleanParameter('chunker','Chunker / Shallow-parser Parser',"Chunker / Shallow parser (for Dutch only!)", default=False),
     ]),
     #('Focus Word Selection', [
     #    IntegerParameter('minlength','Minimum Word Length','Integer between zero and one hundred',default=5,minvalue=0, maxvalue=100), #old ticcl -x
@@ -421,7 +426,7 @@
 ]
 
 
-# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SKIP THIS!) ========
+# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SmedKIP THIS!) ========
 
 #The dispatcher to use (defaults to clamdispatcher.py), you almost never want to change this
 #DISPATCHER = 'clamdispatcher.py'
diff --git a/webservice/picclservice/picclservice_wrapper.py b/webservice/picclservice/picclservice_wrapper.py
@@ -205,15 +205,36 @@ def nextflowout(prefix):
     frog_inputdir = 'ocr_output'
     textclass_opts = "--inputclass \"OCR\" --outputclass \"current\"" #extra textclass opts for both frog and/or ucto
 
-
-if 'frog' in clamdata and clamdata['frog']:
+frog = False
+if lang == "nld":
+    for key in ('pos','lemma','morph','ner','parser','chunker'):
+        if key in clamdata and clamdata[key]:
+            frog = True
+    if frog:
+        skip = ""
+        #PoS can't be skipped
+        if 'lemma' not in clamdata or not clamdata['lemma']:
+            skip += 'l'
+        if 'parser' not in clamdata or not clamdata['parser']:
+            skip += 'mp'
+        if 'morph' not in clamdata or not clamdata['morph']:
+            skip += 'a'
+        if 'ner' not in clamdata or not clamdata['ner']:
+            skip += 'n'
+        if 'chunker' not in clamdata or not clamdata['chunker']:
+            skip += 'c'
+        if skip:
+            skip = "--skip=" + skip
+
+if frog:
     print("Running Frog...",file=sys.stderr)
     clam.common.status.write(statusfile, "Running Frog Pipeline (linguistic enrichment)",75) # status update
-    if os.system(run_piccl + "frog.nf " + textclass_opts + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log"  ) != 0:
+    if os.system(run_piccl + "frog.nf " + textclass_opts + " " + skip + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log"  ) != 0:
         fail('frog')
     nextflowout('frog')
 elif 'tok' in clamdata and clamdata['tok']:
     clam.common.status.write(statusfile, "Running Tokeniser (ucto)",75) # status update
+
     if os.system(run_piccl + "tokenize.nf " + textclass_opts + " --language " + shellsafe(lang,'"') + " --inputformat folia --inputdir " + shellsafe(frog_inputdir,'"') + " --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >ucto.nextflow.out.log 2>ucto.nextflow.err.log"  ) != 0:
         fail('ucto')
     nextflowout('ucto')
diff --git a/webservice/setup.py b/webservice/setup.py
@@ -10,7 +10,7 @@
 
 setup(
     name = "PICCL",
-    version = "0.5",
+    version = "0.5.1",
     author = "Martin Reynaert, Maarten van Gompel",
     author_email = "reynaert@uvt.nl",
     description = ("Webservice for PICCL"),