10
10
11
11
PATH_mwaddlink = ""
12
12
13
+ ## logging via json
14
+ #https://github.com/bobbui/json-logging-python
15
+ import json_logging , logging , sys
16
+ LOG_LEVEL = logging .DEBUG
17
+ # log is initialized without a web framework name
18
+ json_logging .init_non_web (enable_json = True )
19
+ logger = logging .getLogger ("logger" )
20
+ logger .setLevel (LOG_LEVEL )
21
+ logger .addHandler (logging .StreamHandler (sys .stdout ))
22
+
13
23
def main ():
14
24
parser = argparse .ArgumentParser ()
15
25
@@ -30,65 +40,75 @@ def main():
30
40
type = float ,
31
41
help = "threshold value for links to be recommended" )
32
42
33
- parser .add_argument ("--output" ,"-o" ,
34
- default = "" ,
35
- type = str ,
36
- help = "if None, print to terminal, otherwise write result to file" )
37
-
38
43
args = parser .parse_args ()
39
44
lang = args .lang .replace ('wiki' ,'' )
40
45
page_title = normalise_title (args .page )
41
46
threshold = args .threshold
42
- output_path = args .output
43
47
48
+ logger .info ('Getting link recommendations for article %s in %swiki with link-threshold %s' % (page_title , lang ,threshold ))
49
+
50
+ ## open the trained model
51
+ logger .info ('Loading the trained model' )
44
52
try :
45
-
46
- anchors = SqliteDict (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.anchors.sqlite" .format (lang )) )
47
- pageids = SqliteDict (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.pageids.sqlite" .format (lang )) )
53
+ anchors = SqliteDict (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.anchors.sqlite" .format (lang )) )
54
+ pageids = SqliteDict (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.pageids.sqlite" .format (lang )))
48
55
redirects = SqliteDict (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.redirects.sqlite" .format (lang )) )
49
56
word2vec = SqliteDict (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.w2v.filtered.sqlite" .format (lang )) )
50
57
nav2vec = SqliteDict (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.nav.filtered.sqlite" .format (lang )) )
51
58
## load trained model
52
59
n_cpus_max = min ([int (multiprocessing .cpu_count ()/ 4 ),8 ])
53
60
model = xgb .XGBClassifier (n_jobs = n_cpus_max ) # init model
54
- model .load_model (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.linkmodel .bin" .format (lang ))) # load data
61
+ model .load_model (os .path .join (PATH_mwaddlink ,"data/{0}/{0}.linkmodel_v2 .bin" .format (lang ))) # load data
55
62
except :
56
- print ( 'Link recommendation model not available for %swiki. try another language.' % lang )
57
-
63
+ # logging
64
+ logger . error ( 'Could not open trained model in %swiki. try another language.' % lang )
58
65
66
+ ## querying the API to get the wikitext for the page
67
+ logger .info ('Getting the wikitext of the article' )
59
68
try :
60
69
page_dict = getPageDict (page_title ,lang )
61
70
wikitext = page_dict ['wikitext' ]
62
71
pageid = page_dict ['pageid' ]
63
72
revid = page_dict ['revid' ]
64
73
except :
65
74
wikitext = ""
66
- print ("""Not able to retrieve article '%s' in %swiki. try another article.""" % (page_title ,lang ))
75
+ logger .error ("""Not able to retrieve article '%s' in %swiki. try another article.""" % (page_title ,lang ))
76
+
77
+ ## querying the API to get the wikitext for the page
78
+ logger .info ('Processing wikitext to get link recommendations' )
67
79
try :
68
80
added_links = process_page (wikitext , page_title , anchors , pageids , redirects , word2vec ,nav2vec , model , threshold = threshold , return_wikitext = False )
69
81
except :
70
- print ("""Not able to get links-recommendations for article '%s' in %swiki. """ % (page_title ,lang ))
71
- anchors .close ()
72
- pageids .close ()
73
- redirects .close ()
74
- word2vec .close ()
75
- nav2vec .close ()
82
+ logger .error ("""Not able to process article '%s' in %swiki. try another article.""" % (page_title ,lang ))
83
+
84
+ ## closing model
85
+ try :
86
+ anchors .close ()
87
+ pageids .close ()
88
+ redirects .close ()
89
+ word2vec .close ()
90
+ nav2vec .close ()
91
+ except :
92
+ logger .warning ('Could not close model in %swiki.' % lang )
93
+
76
94
95
+ ## querying the API to get the wikitext for the page
96
+ logger .info ('Number of links from recommendation model: %s' % len (added_links ))
97
+ if len (added_links ) == 0 :
98
+ logger .info ('Model did not yield any links to recommend. Try a lower link-threshold (e.g. -t 0.2)' )
99
+
77
100
dict_return = {
78
101
'page_title' :page_title ,
79
102
'lang' :lang ,
80
103
'pageid' :pageid ,
81
104
'revid' :revid ,
82
105
'no_added_links' :len (added_links ),
83
106
'added_links' :added_links ,
84
-
85
107
}
86
108
json_out = json .dumps (dict_return , indent = 4 )
87
- if len (output_path ) == 0 :
88
- print (json_out )
89
- else :
90
- with open (output_path ,'w' ) as fout :
91
- fout .write (json_out + '\n ' )
109
+ logger .info ('Recommended links: %s' ,dict_return )
110
+ print ('--- Recommended links ---' )
111
+ print (json_out )
92
112
93
113
if __name__ == "__main__" :
94
114
main ()
0 commit comments