-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
213 lines (190 loc) · 7.73 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
VENV = my_venv
PYTHON = $(VENV)/bin/python
PIP = $(VENV)/bin/pip
PYTHON_PATH = .
export PYTHONPATH := $(PYTHON_PATH):$(PYTHONPATH)
# Please keep this list sorted for easier maintenance
# See https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html for
# docs on what phony targets are and how to use them.
.PHONY: \
article-country \
article-descriptions \
articlequality \
articletopic-outlink-predictor \
articletopic-outlink-transformer \
clean \
clone-descartes \
clone-wmf-kserve-numpy-200 \
download-nltk-punkt \
language-identification \
logo-detection \
readability \
reference-need \
revertrisk-language-agnostic \
revertrisk-multilingual \
run \
run-server
# Default run command for revertrisk-language-agnostic
# NOTE: the first target in a Makefile is the default to run on just `make`
run: revertrisk-language-agnostic
# Generic command to run any model server.
# Adds isvc-specific arguments if defined:
# - articlequality uses the MAX_FEATURE_VALS to load the features data file.
# - articletopic-outlink uses PREDICTOR_PORT because it runs both a predictor
# and transformer.
run-server: $(VENV)/bin/activate $(MODEL_PATH)
MODEL_PATH=$(MODEL_PATH) MODEL_NAME=$(MODEL_NAME) \
$(if $(MAX_FEATURE_VALS), MAX_FEATURE_VALS=$(MAX_FEATURE_VALS)) \
$(PYTHON) $(MODEL_SERVER_PARENT_DIR)/$(MODEL_SERVER_DIR)/model.py \
$(if $(PREDICTOR_PORT), --http_port=$(PREDICTOR_PORT))
# Clean the environment
clean:
rm -rf __pycache__
rm -rf $(VENV)
@if [ -z "$(MODEL_TYPE)" ]; then \
echo "No MODEL_TYPE specified. Skipping model-specific cleanup."; \
else \
echo "Cleaning models in directory models/$(MODEL_TYPE) ..."; \
rm -rf models/$(MODEL_TYPE); \
fi
### Targets for running servers locally (plus direct deps)
# Command for article-country model-server
article-country:
@$(MAKE) run-server MODEL_NAME="article-country" \
MODEL_URL="article-country/20240901015102/" \
MODEL_SERVER_PARENT_DIR="src/models/article_country" \
MODEL_PATH="models/article-country/20240901015102/" \
DATA_PATH="models/article-country/20240901015102/" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR=".." \
CUT_DIRS=2 \
ACCEPT_REGEX="'(category-countries.tsv.gz|ne_10m_admin_0_map_units.geojson)'"
# Command for article-descriptions model-server
article-descriptions: clone-descartes
@$(MAKE) run-server MODEL_NAME="article-descriptions" \
MODEL_URL="article-descriptions/" \
MODEL_SERVER_PARENT_DIR="src/models/article_descriptions" \
MODEL_PATH="models/article-descriptions/" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR="." \
CUT_DIRS=2 \
ACCEPT_REGEX="'(bert-base-multilingual-uncased|mbart-large-cc25)'"
# Clone descartes repository if not already present (used by article-descriptions)
clone-descartes:
@if [ ! -d "src/models/article_descriptions/model_server/descartes" ]; then \
git clone https://github.com/wikimedia/descartes.git --branch 1.0.1 src/models/article_descriptions/model_server/descartes; \
fi
# Command for articlequality model-server
articlequality: clone-wmf-kserve-numpy-200
@$(MAKE) run-server MODEL_NAME="articlequality" \
MODEL_URL="articlequality/language-agnostic/20240801111508/model.pkl" \
MODEL_SERVER_PARENT_DIR="src/models/articlequality" \
MODEL_PATH="models/articlequality/language-agnostic/20240801111508/model.pkl" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR=".." \
CUT_DIRS=2 \
ACCEPT_REGEX="." \
MAX_FEATURE_VALS="src/models/articlequality/data/feature_values.tsv"
# Clone the wmf kserve fork that uses numpy v2.0.0 (used by articlequality)
clone-wmf-kserve-numpy-200:
@if [ ! -d "src/models/articlequality/kserve_repository" ]; then \
git clone --branch numpy-200 https://github.com/wikimedia/kserve.git src/models/articlequality/kserve_repository; \
fi
# Command for articletopic-outlink predictor
articletopic-outlink-predictor:
@$(MAKE) run-server MODEL_NAME="outlink-topic-model" \
MODEL_URL="articletopic/outlink/20221111111111/model.bin" \
MODEL_SERVER_PARENT_DIR="src/models/outlink_topic_model" \
MODEL_PATH="models/articletopic/outlink/20221111111111/model.bin" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR="." \
CUT_DIRS=2 \
ACCEPT_REGEX="." \
PREDICTOR_PORT=8181
# Command for articletopic-outlink transformer
articletopic-outlink-transformer:
. $(VENV)/bin/activate && \
$(PYTHON) src/models/outlink_topic_model/transformer/transformer.py \
--predictor_host="localhost:8181" --model_name="outlink-topic-model"
# Command for language-identification model-server
language-identification:
@$(MAKE) run-server MODEL_NAME="langid" \
MODEL_URL="langid/lid201-model.bin" \
MODEL_SERVER_PARENT_DIR="src/models/langid" \
MODEL_PATH="models/langid/lid201-model.bin" \
MODEL_SERVER_DIR="." \
DEP_DIR="." \
CUT_DIRS=2 \
ACCEPT_REGEX="."
# Command for logo-detection model-server
logo-detection:
@$(MAKE) run-server MODEL_NAME="logo-detection" \
MODEL_URL="logo-detection/20240417132942/logo_max_all.keras" \
MODEL_SERVER_PARENT_DIR="src/models/logo_detection" \
MODEL_PATH="models/logo-detection/20240417132942/logo_max_all.keras" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR="." \
CUT_DIRS=2 \
ACCEPT_REGEX="."
# Command for readability model-server
readability:
@$(MAKE) download-nltk-punkt run-server MODEL_NAME="readability" \
MODEL_URL="readability/multilingual/20240805140437/model.bin" \
MODEL_SERVER_PARENT_DIR="src/models/readability_model" \
MODEL_PATH="models/readability/multilingual/20240805140437/model.bin" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR="." \
CUT_DIRS=2 \
ACCEPT_REGEX="."
# Download NLTK Punkt sentence tokenizer used by readability
download-nltk-punkt: $(VENV)/bin/activate
@$(PYTHON) -m nltk.downloader punkt
# Command for revertrisk-language-agnostic model-server
revertrisk-language-agnostic:
@$(MAKE) run-server MODEL_NAME="revertrisk-language-agnostic" \
MODEL_URL="revertrisk/language-agnostic/20231117132654/model.pkl" \
MODEL_SERVER_PARENT_DIR="src/models/revert_risk_model" \
MODEL_PATH="models/revertrisk/language-agnostic/20231117132654/model.pkl" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR="revertrisk" \
CUT_DIRS=2 \
ACCEPT_REGEX="."
# Command for reference-need model-server
reference-need:
@$(MAKE) run-server MODEL_NAME="reference-need" \
MODEL_URL="reference-quality/reference-need/20240903095237/model.pkl" \
MODEL_SERVER_PARENT_DIR="src/models/reference_quality" \
MODEL_PATH="models/reference-quality/reference-need/20240903095237/model.pkl" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR="." \
CUT_DIRS=2 \
ACCEPT_REGEX="."
# Command for revertrisk-multilingual model-server
revertrisk-multilingual:
@$(MAKE) run-server MODEL_NAME="revertrisk-multilingual" \
MODEL_URL="revertrisk/multilingual/20230810110019/model.pkl" \
MODEL_SERVER_PARENT_DIR="src/models/revert_risk_model" \
MODEL_PATH="models/revertrisk/multilingual/20230810110019/model.pkl" \
MODEL_SERVER_DIR="model_server" \
DEP_DIR="multilingual" \
CUT_DIRS=2 \
ACCEPT_REGEX="."
### Subtargets used by multiple other targets
# Create virtual environment and install dependencies
$(VENV)/bin/activate: $(MODEL_SERVER_PARENT_DIR)/$(MODEL_SERVER_DIR)/$(DEP_DIR)/requirements.txt
python3 -m venv $(VENV)
$(PIP) install --upgrade pip
$(PIP) install -r python/requirements.txt
# Conditional installation based on MODEL_NAME to support local run requirements
@if [ "$(MODEL_NAME)" = "articlequality" ]; then \
$(PIP) install -r $(MODEL_SERVER_PARENT_DIR)/$(MODEL_SERVER_DIR)/$(DEP_DIR)/requirements_local_run.txt; \
else \
$(PIP) install -r $(MODEL_SERVER_PARENT_DIR)/$(MODEL_SERVER_DIR)/$(DEP_DIR)/requirements.txt; \
fi
# Download the model file(s)
$(MODEL_PATH):
mkdir -p $(MODEL_SERVER_PARENT_DIR)/models
wget --no-host-directories --recursive --reject "index.html*" \
--accept-regex $(ACCEPT_REGEX) --cut-dirs=$(CUT_DIRS) \
--directory-prefix=models \
--continue https://analytics.wikimedia.org/published/wmf-ml-models/$(MODEL_URL)