Skip to content

Commit f2f5ede

Browse files
author
Frankie Robertson
committed
Factor out fetchers
1 parent 57e262a commit f2f5ede

File tree

7 files changed

+123
-95
lines changed

7 files changed

+123
-95
lines changed

Dockerfile

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,22 @@ RUN set -ex && pipenv install --deploy --system
4646
# NLTK resources
4747
RUN python -c "from nltk import download as d; d('wordnet'); d('omw'); d('punkt')"
4848

49+
RUN mkdir /app/fetchers
50+
4951
# UKB
50-
COPY ./ukb.py /app/
52+
COPY ./fetchers/ukb.py /app/fetchers/
5153
COPY ./support/ukb /app/support/ukb
52-
RUN python ukb.py fetch
54+
RUN python fetchers/ukb.py fetch
5355

5456
# SupWSD
55-
COPY ./supwsd.py /app/
57+
COPY ./fetchers/supwsd.py /app/fetchers/
5658
COPY ./support/supWSD /app/support/supWSD
57-
RUN bash -c 'source "/root/.sdkman/bin/sdkman-init.sh" && python supwsd.py fetch'
59+
RUN bash -c 'source "/root/.sdkman/bin/sdkman-init.sh" && python fetchers/supwsd.py'
5860

5961
# Context2Vec
60-
COPY ./ctx2vec.py /app/
62+
COPY ./fetchers/ctx2vec.py /app/fetchers/
6163
COPY ./support/context2vec /app/support/context2vec
62-
RUN python ctx2vec.py fetch
64+
RUN python fetchers/ctx2vec.py
6365

6466
# Evaluation framework setup
6567
COPY . /app

ctx2vec.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import tempfile
2-
import os
32
import click
43
from plumbum import local
5-
from plumbum.cmd import git, python, pipenv, ln
6-
from shutil import copyfile
4+
from plumbum.cmd import python, pipenv, ln
75
from os.path import join as pjoin
86

97

@@ -12,24 +10,6 @@ def ctx2vec():
1210
pass
1311

1412

15-
@ctx2vec.command()
16-
@click.option("--gpu/--no-gpu")
17-
def fetch(gpu):
18-
os.makedirs("systems", exist_ok=True)
19-
with local.cwd("systems"):
20-
git("clone", "https://github.com/orenmel/context2vec.git")
21-
22-
subdir = "gpu" if gpu else "nogpu"
23-
for fn in ["Pipfile", "Pipfile.lock"]:
24-
copyfile(
25-
"support/context2vec/{}/{}".format(subdir, fn),
26-
"systems/context2vec/{}".format(fn),
27-
)
28-
29-
with local.cwd("systems/context2vec"):
30-
pipenv("install")
31-
32-
3313
def get_xml_key_pair(xml_path, key_path):
3414
tempdir = tempfile.mkdtemp(prefix="ctx2vec")
3515
pair_path = pjoin(tempdir, "corpus")

fetchers/ctx2vec.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import click
2+
from plumbum import local
3+
from plumbum.cmd import git, pipenv
4+
from shutil import copyfile
5+
import os
6+
7+
8+
@click.command()
9+
@click.option("--gpu/--no-gpu")
10+
def ctx2vec(gpu):
11+
os.makedirs("systems", exist_ok=True)
12+
with local.cwd("systems"):
13+
git("clone", "https://github.com/orenmel/context2vec.git")
14+
15+
subdir = "gpu" if gpu else "nogpu"
16+
for fn in ["Pipfile", "Pipfile.lock"]:
17+
copyfile(
18+
"support/context2vec/{}/{}".format(subdir, fn),
19+
"systems/context2vec/{}".format(fn),
20+
)
21+
22+
with local.cwd("systems/context2vec"):
23+
pipenv("install")
24+
25+
26+
if __name__ == "__main__":
27+
ctx2vec()

fetchers/supwsd.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from plumbum import local
2+
from plumbum.cmd import git
3+
import click
4+
import os
5+
from finntk.emb.word2vec import vecs as word2vec_res
6+
from finntk.emb.fasttext import vecs as fasttext_res
7+
from os.path import join as pjoin
8+
9+
10+
@click.command()
11+
def supwsd():
12+
fetch_program()
13+
fetch_emb()
14+
15+
16+
def fetch_emb():
17+
emb = "support/emb"
18+
os.makedirs(emb, exist_ok=True)
19+
word2vec_res.get_vecs().save_word2vec_format(pjoin(emb, "word2vec.txt"))
20+
fasttext_res.get_fi().save_word2vec_format(pjoin(emb, "fasttext.txt"))
21+
22+
23+
def fetch_program():
24+
from plumbum.cmd import mvn
25+
26+
os.makedirs("systems", exist_ok=True)
27+
with local.cwd("systems"):
28+
git("clone", "https://github.com/frankier/supWSD.git")
29+
with local.cwd("supWSD"):
30+
git("checkout", "fixes-sep-24-1")
31+
mvn("package")
32+
33+
34+
if __name__ == "__main__":
35+
supwsd()

fetchers/ukb.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from plumbum import local
2+
from plumbum.cmd import python, make, bash, git
3+
import click
4+
import os
5+
from os.path import abspath
6+
from finntk.wordnet.reader import fiwn_encnt
7+
from finntk.wordnet.utils import fi2en_post
8+
9+
10+
@click.group()
11+
def ukb():
12+
pass
13+
14+
15+
@ukb.command()
16+
def fetch():
17+
os.makedirs("systems", exist_ok=True)
18+
with local.cwd("systems"):
19+
git("clone", "https://github.com/asoroa/ukb.git")
20+
with local.cwd("ukb/src"):
21+
local["./configure"]()
22+
make()
23+
# Prepare
24+
with local.env(UKB_PATH=abspath("systems/ukb/src")):
25+
with local.cwd("support/ukb"):
26+
bash("./prepare_wn30graph.sh")
27+
(python[__file__, "mkwndict", "--en-synset-ids"] > "support/ukb/wndict.fi.txt")()
28+
29+
30+
@ukb.command()
31+
@click.option("--en-synset-ids/--fi-synset-ids")
32+
def mkwndict(en_synset_ids):
33+
lemma_names = fiwn_encnt.all_lemma_names()
34+
35+
for lemma_name in lemma_names:
36+
lemmas = fiwn_encnt.lemmas(lemma_name)
37+
synsets = []
38+
for lemma in lemmas:
39+
synset = lemma.synset()
40+
post_synset_id = fiwn_encnt.ss2of(synset)
41+
if en_synset_ids:
42+
post_synset_id = fi2en_post(post_synset_id)
43+
synsets.append("{}:{}".format(post_synset_id, lemma.count()))
44+
if not lemma_name:
45+
continue
46+
print("{}\t{}".format(lemma_name, " ".join(synsets)))
47+
48+
49+
if __name__ == "__main__":
50+
ukb()

supwsd.py

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
1-
import os
21
import click
32
from plumbum import local
4-
from plumbum.cmd import git, java
3+
from plumbum.cmd import java
54
from string import Template
65
from os.path import join as pjoin
7-
from finntk.emb.word2vec import vecs as word2vec_res
8-
from finntk.emb.fasttext import vecs as fasttext_res
96

107

118
SUPWSD_JAR = "target/supwsd-toolkit-1.0.0.jar"
@@ -16,32 +13,6 @@ def supwsd():
1613
pass
1714

1815

19-
@supwsd.command()
20-
def fetch():
21-
fetch_program.callback()
22-
fetch_emb.callback()
23-
24-
25-
@supwsd.command()
26-
def fetch_emb():
27-
emb = "support/emb"
28-
os.makedirs(emb, exist_ok=True)
29-
word2vec_res.get_vecs().save_word2vec_format(pjoin(emb, "word2vec.txt"))
30-
fasttext_res.get_fi().save_word2vec_format(pjoin(emb, "fasttext.txt"))
31-
32-
33-
@supwsd.command()
34-
def fetch_program():
35-
from plumbum.cmd import mvn
36-
37-
os.makedirs("systems", exist_ok=True)
38-
with local.cwd("systems"):
39-
git("clone", "https://github.com/frankier/supWSD.git")
40-
with local.cwd("supWSD"):
41-
git("checkout", "fixes-sep-24-1")
42-
mvn("package")
43-
44-
4516
@supwsd.command()
4617
@click.argument("work_dir")
4718
@click.argument("vec_path", required=False)

ukb.py

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
import os
22
import sys
33
import click
4-
from os.path import abspath
54
from plumbum import local
6-
from plumbum.cmd import python, make, bash, git
5+
from plumbum.cmd import python
76
from stiff.utils.xml import iter_sentences
87
from stiff.data.constants import UNI_POS_WN_MAP
9-
from finntk.wordnet.reader import fiwn_encnt
10-
from finntk.wordnet.utils import fi2en_post
118

129

1310
def get_ukb():
@@ -82,39 +79,5 @@ def clean_keyfile(keyin, keyout):
8279
keyout.write("\n")
8380

8481

85-
@ukb.command()
86-
def fetch():
87-
os.makedirs("systems", exist_ok=True)
88-
with local.cwd("systems"):
89-
git("clone", "https://github.com/asoroa/ukb.git")
90-
with local.cwd("ukb/src"):
91-
local["./configure"]()
92-
make()
93-
# Prepare
94-
with local.env(UKB_PATH=abspath("systems/ukb/src")):
95-
with local.cwd("support/ukb"):
96-
bash("./prepare_wn30graph.sh")
97-
(python[__file__, "mkwndict", "--en-synset-ids"] > "support/ukb/wndict.fi.txt")()
98-
99-
100-
@ukb.command()
101-
@click.option("--en-synset-ids/--fi-synset-ids")
102-
def mkwndict(en_synset_ids):
103-
lemma_names = fiwn_encnt.all_lemma_names()
104-
105-
for lemma_name in lemma_names:
106-
lemmas = fiwn_encnt.lemmas(lemma_name)
107-
synsets = []
108-
for lemma in lemmas:
109-
synset = lemma.synset()
110-
post_synset_id = fiwn_encnt.ss2of(synset)
111-
if en_synset_ids:
112-
post_synset_id = fi2en_post(post_synset_id)
113-
synsets.append("{}:{}".format(post_synset_id, lemma.count()))
114-
if not lemma_name:
115-
continue
116-
print("{}\t{}".format(lemma_name, " ".join(synsets)))
117-
118-
11982
if __name__ == "__main__":
12083
ukb()

0 commit comments

Comments
 (0)