Skip to content

Commit 5b77747

Browse files
committed
Implemented frog selection options #25
1 parent 89d29f1 commit 5b77747

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

webservice/picclservice/picclservice.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -396,8 +396,13 @@
396396
ChoiceParameter('distance','How many edits?','Search a distance of N characters for variants',choices=[('2','Up to two edits'),('1','Only one edit')]) #old TICCL -L
397397
]),
398398
('Automatic Linguistic Enrichment', [
399-
BooleanParameter('tok','Perform Tokenisation',"Perform tokenisation."),
400-
BooleanParameter('frog','Perform Linguistic Enrichment',"Performs tokenisation, Part-of-Speech tagging, lemmatisation, named entity recognition and more. This is implemented only for Dutch (uses Frog)!!!")
399+
BooleanParameter('tok','Tokenisation',"Perform tokenisation", default=True),
400+
BooleanParameter('pos','Part-of-Speech Tagging',"Part-of-speech Tagging (for Dutch only!)",default=True),
401+
BooleanParameter('lemma','Lemmatisation',"Lemmatisation (for Dutch only!)", default=True),
402+
BooleanParameter('morph','Morphological Analysis',"Morphological Analysis (for Dutch only!)", default=False),
403+
BooleanParameter('ner','Named Entity Recognition',"Named Entity Recognition", default=False),
404+
BooleanParameter('parser','Dependency Parser',"Dependency parser (for Dutch only!)", default=False),
405+
BooleanParameter('chunker','Chunker / Shallow-parser Parser',"Chunker / Shallow parser (for Dutch only!)", default=False),
401406
]),
402407
#('Focus Word Selection', [
403408
# IntegerParameter('minlength','Minimum Word Length','Integer between zero and one hundred',default=5,minvalue=0, maxvalue=100), #old ticcl -x
@@ -421,7 +426,7 @@
421426
]
422427

423428

424-
# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SKIP THIS!) ========
429+
# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SmedKIP THIS!) ========
425430

426431
#The dispatcher to use (defaults to clamdispatcher.py), you almost never want to change this
427432
#DISPATCHER = 'clamdispatcher.py'

webservice/picclservice/picclservice_wrapper.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,15 +205,36 @@ def nextflowout(prefix):
205205
frog_inputdir = 'ocr_output'
206206
textclass_opts = "--inputclass \"OCR\" --outputclass \"current\"" #extra textclass opts for both frog and/or ucto
207207

208-
209-
if 'frog' in clamdata and clamdata['frog']:
208+
frog = False
209+
if lang == "nld":
210+
for key in ('pos','lemma','morph','ner','parser','chunker'):
211+
if key in clamdata and clamdata[key]:
212+
frog = True
213+
if frog:
214+
skip = ""
215+
#PoS can't be skipped
216+
if 'lemma' not in clamdata or not clamdata['lemma']:
217+
skip += 'l'
218+
if 'parser' not in clamdata or not clamdata['parser']:
219+
skip += 'mp'
220+
if 'morph' not in clamdata or not clamdata['morph']:
221+
skip += 'a'
222+
if 'ner' not in clamdata or not clamdata['ner']:
223+
skip += 'n'
224+
if 'chunker' not in clamdata or not clamdata['chunker']:
225+
skip += 'c'
226+
if skip:
227+
skip = "--skip=" + skip
228+
229+
if frog:
210230
print("Running Frog...",file=sys.stderr)
211231
clam.common.status.write(statusfile, "Running Frog Pipeline (linguistic enrichment)",75) # status update
212-
if os.system(run_piccl + "frog.nf " + textclass_opts + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log" ) != 0:
232+
if os.system(run_piccl + "frog.nf " + textclass_opts + " " + skip + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log" ) != 0:
213233
fail('frog')
214234
nextflowout('frog')
215235
elif 'tok' in clamdata and clamdata['tok']:
216236
clam.common.status.write(statusfile, "Running Tokeniser (ucto)",75) # status update
237+
217238
if os.system(run_piccl + "tokenize.nf " + textclass_opts + " --language " + shellsafe(lang,'"') + " --inputformat folia --inputdir " + shellsafe(frog_inputdir,'"') + " --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >ucto.nextflow.out.log 2>ucto.nextflow.err.log" ) != 0:
218239
fail('ucto')
219240
nextflowout('ucto')

webservice/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
setup(
1212
name = "PICCL",
13-
version = "0.5",
13+
version = "0.5.1",
1414
author = "Martin Reynaert, Maarten van Gompel",
1515
author_email = "[email protected]",
1616
description = ("Webservice for PICCL"),

0 commit comments

Comments
 (0)