Skip to content

Commit ce48455

Browse files
committed
json-logging
1 parent 5df0c88 commit ce48455

File tree

3 files changed

+49
-26
lines changed

3 files changed

+49
-26
lines changed

addlink-query_links.py

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@
1010

1111
PATH_mwaddlink=""
1212

13+
## logging via json
14+
#https://github.com/bobbui/json-logging-python
15+
import json_logging, logging, sys
16+
LOG_LEVEL = logging.DEBUG
17+
# log is initialized without a web framework name
18+
json_logging.init_non_web(enable_json=True)
19+
logger = logging.getLogger("logger")
20+
logger.setLevel(LOG_LEVEL)
21+
logger.addHandler(logging.StreamHandler(sys.stdout))
22+
1323
def main():
1424
parser = argparse.ArgumentParser()
1525

@@ -30,65 +40,75 @@ def main():
3040
type = float,
3141
help="threshold value for links to be recommended")
3242

33-
parser.add_argument("--output","-o",
34-
default="",
35-
type=str,
36-
help="if None, print to terminal, otherwise write result to file")
37-
3843
args = parser.parse_args()
3944
lang = args.lang.replace('wiki','')
4045
page_title = normalise_title(args.page)
4146
threshold = args.threshold
42-
output_path = args.output
4347

48+
logger.info('Getting link recommendations for article %s in %swiki with link-threshold %s'%(page_title, lang,threshold))
49+
50+
## open the trained model
51+
logger.info('Loading the trained model')
4452
try:
45-
46-
anchors = SqliteDict(os.path.join(PATH_mwaddlink,"data/{0}/{0}.anchors.sqlite".format(lang)) )
47-
pageids = SqliteDict(os.path.join(PATH_mwaddlink,"data/{0}/{0}.pageids.sqlite".format(lang)) )
53+
anchors = SqliteDict(os.path.join(PATH_mwaddlink,"data/{0}/{0}.anchors.sqlite".format(lang)) )
54+
pageids = SqliteDict(os.path.join(PATH_mwaddlink,"data/{0}/{0}.pageids.sqlite".format(lang)))
4855
redirects = SqliteDict(os.path.join(PATH_mwaddlink,"data/{0}/{0}.redirects.sqlite".format(lang)) )
4956
word2vec = SqliteDict(os.path.join(PATH_mwaddlink,"data/{0}/{0}.w2v.filtered.sqlite".format(lang)) )
5057
nav2vec = SqliteDict(os.path.join(PATH_mwaddlink,"data/{0}/{0}.nav.filtered.sqlite".format(lang)) )
5158
## load trained model
5259
n_cpus_max = min([int(multiprocessing.cpu_count()/4),8])
5360
model = xgb.XGBClassifier(n_jobs =n_cpus_max ) # init model
54-
model.load_model(os.path.join(PATH_mwaddlink,"data/{0}/{0}.linkmodel.bin".format(lang))) # load data
61+
model.load_model(os.path.join(PATH_mwaddlink,"data/{0}/{0}.linkmodel_v2.bin".format(lang))) # load data
5562
except:
56-
print('Link recommendation model not available for %swiki. try another language.'%lang)
57-
63+
# logging
64+
logger.error('Could not open trained model in %swiki. try another language.'%lang)
5865

66+
## querying the API to get the wikitext for the page
67+
logger.info('Getting the wikitext of the article')
5968
try:
6069
page_dict = getPageDict(page_title,lang)
6170
wikitext = page_dict['wikitext']
6271
pageid = page_dict['pageid']
6372
revid = page_dict['revid']
6473
except:
6574
wikitext = ""
66-
print("""Not able to retrieve article '%s' in %swiki. try another article."""%(page_title,lang))
75+
logger.error("""Not able to retrieve article '%s' in %swiki. try another article."""%(page_title,lang))
76+
77+
## querying the API to get the wikitext for the page
78+
logger.info('Processing wikitext to get link recommendations')
6779
try:
6880
added_links = process_page(wikitext, page_title, anchors, pageids, redirects, word2vec,nav2vec, model, threshold = threshold, return_wikitext = False)
6981
except:
70-
print("""Not able to get links-recommendations for article '%s' in %swiki. """%(page_title,lang))
71-
anchors.close()
72-
pageids.close()
73-
redirects.close()
74-
word2vec.close()
75-
nav2vec.close()
82+
logger.error("""Not able to process article '%s' in %swiki. try another article."""%(page_title,lang))
83+
84+
## closing model
85+
try:
86+
anchors.close()
87+
pageids.close()
88+
redirects.close()
89+
word2vec.close()
90+
nav2vec.close()
91+
except:
92+
logger.warning('Could not close model in %swiki.'%lang)
93+
7694

95+
## querying the API to get the wikitext for the page
96+
logger.info('Number of links from recommendation model: %s'%len(added_links))
97+
if len(added_links) == 0:
98+
logger.info('Model did not yield any links to recommend. Try a lower link-threshold (e.g. -t 0.2)')
99+
77100
dict_return = {
78101
'page_title':page_title,
79102
'lang':lang,
80103
'pageid':pageid,
81104
'revid':revid,
82105
'no_added_links':len(added_links),
83106
'added_links':added_links,
84-
85107
}
86108
json_out = json.dumps(dict_return, indent=4)
87-
if len(output_path) == 0:
88-
print(json_out)
89-
else:
90-
with open(output_path,'w') as fout:
91-
fout.write(json_out+'\n')
109+
logger.info('Recommended links: %s',dict_return)
110+
print('--- Recommended links ---')
111+
print(json_out)
92112

93113
if __name__ == "__main__":
94114
main()

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ jedi==0.17.2
2323
jieba==0.42.1
2424
Jinja2==2.11.2
2525
joblib==0.16.0
26+
json-logging==1.2.10
2627
jsonable==0.3.1
2728
jsonschema==3.2.0
2829
jupyter==1.0.0
2930
jupyter-client==6.1.7
3031
jupyter-console==6.2.0
3132
jupyter-core==4.6.3
3233
jupyterlab-pygments==0.1.2
34+
line-profiler==3.0.2
3335
lmdb==1.0.0
3436
marisa-trie==0.7.5
3537
MarkupSafe==1.1.1
@@ -86,6 +88,6 @@ wcwidth==0.2.5
8688
webencodings==0.5.1
8789
widgetsnbextension==3.5.1
8890
wikipedia2vec==1.0.4
89-
wikitextparser==0.45.1
91+
wikitextparser==0.44.0
9092
xgboost==1.2.0
9193
zipp==3.2.0

requirements_query.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ chardet==3.0.4
33
click==7.1.2
44
idna==2.10
55
joblib==0.16.0
6+
json-logging==1.2.10
67
mwparserfromhell==0.5.4
78
nltk==3.5
89
numpy==1.19.2

0 commit comments

Comments
 (0)