Skip to content

Commit f9a3159

Browse files
authored
Fix stopwords error and fix upload to google storage (#1786)
* fix file path so google cloud storage works * fix: hard code stop words in * fix how google storage secrets are accessed
1 parent 8edcacf commit f9a3159

File tree

3 files changed

+34
-20
lines changed

3 files changed

+34
-20
lines changed

api/namex/analytics/solr.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -900,15 +900,11 @@ def remove_stopwords_designations(cls, name):
900900
'llp', 'ltd.', 'ltd', 'ltee', 'sencrl', 'societe a responsabilite limitee',
901901
'societe en nom collectif a responsabilite limitee', 'limited', 'srl', 'ulc', 'unlimited liability company']
902902

903-
stop_words = []
904-
try:
905-
with open('stopwords.txt') as stop_words_file:
906-
stop_words = []
907-
for line in stop_words_file.readlines():
908-
if line.find('#') == -1:
909-
stop_words.append(line.strip('\n').strip())
910-
except Exception as err:
911-
current_app.logger.error(err)
903+
# TODO: these should be loaded from somewhere.
904+
stop_words = [
905+
'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'corp', 'if', 'in', 'incorporation', 'into', 'is', 'it',
906+
'no', 'not', 'of', 'on', 'or', 'such', 'that', 'the', 'their', 'then', 'there', 'these', 'they', 'this', 'to'
907+
]
912908

913909
# remove designations if they are at the end of the name
914910
for designation in designations:

jobs/nr-duplicates-report/nr_duplicates_report/config.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from dotenv import load_dotenv, find_dotenv
3+
import base64
34

45
# this will load all the envars from a .env file located in the project root (api)
56
load_dotenv(find_dotenv())
@@ -33,6 +34,23 @@ class Config(object):
3334
OCP_RELAY_URL = os.getenv('OCP_RELAY_URL', '')
3435

3536
# Email
36-
NOTIFY_API_URL = f"{os.getenv("NOTIFY_API_URL", "") + os.getenv("NOTIFY_API_VERSION", "")}/notify"
37+
NOTIFY_API_URL = f"{os.getenv('NOTIFY_API_URL', '') + os.getenv('NOTIFY_API_VERSION', '')}/notify"
3738
REPORT_RECIPIENTS = os.getenv('REPORT_RECIPIENTS', '')
3839
ERROR_EMAIL_RECIPIENTS = os.getenv('ERROR_EMAIL_RECIPIENTS', '')
40+
41+
# Google Storage
42+
TYPE = os.getenv('TYPE', '')
43+
PROJECT_ID = os.getenv('PROJECT_ID', '')
44+
CLIENT_ID = os.getenv('CLIENT_ID', '')
45+
CLIENT_EMAIL = os.getenv('CLIENT_EMAIL', '')
46+
AUTH_URI = os.getenv('AUTH_URI', '')
47+
TOKEN_URI = os.getenv('TOKEN_URI', '')
48+
AUTH_PROVIDER_X509_cert_URL = os.getenv('AUTH_PROVIDER_X509_cert_URL', '')
49+
CLIENT_X509_CERT_URL = os.getenv('CLIENT_X509_CERT_URL', '')
50+
PRIVATE_KEY_ID = os.getenv('PRIVATE_KEY_ID', '')
51+
try:
52+
pk_raw = os.getenv('PRIVATE_KEY', '').strip()
53+
pk_padded = pk_raw + '=' * (-len(pk_raw) % 4)
54+
PRIVATE_KEY = base64.b64decode(pk_padded).decode()
55+
except Exception:
56+
PRIVATE_KEY = os.getenv('PRIVATE_KEY', '')

jobs/nr-duplicates-report/nr_duplicates_report/daily/nr-duplicates.ipynb

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -273,16 +273,16 @@
273273
" if not result_frame.empty:\n",
274274
" # Config data dictionary\n",
275275
" dictionary = { \n",
276-
" \"type\": os.getenv('TYPE', ''),\n",
277-
" \"project_id\": os.getenv('PROJECT_ID', ''),\n",
278-
" \"private_key_id\": os.getenv('PRIVATE_KEY_ID', ''),\n",
279-
" \"private_key\": os.getenv('PRIVATE_KEY', ''),\n",
280-
" \"client_email\": os.getenv('CLIENT_EMAIL', ''),\n",
281-
" \"client_id\": os.getenv('CLIENT_ID', ''),\n",
282-
" \"auth_uri\": os.getenv('AUTH_URI', ''),\n",
283-
" \"token_uri\": os.getenv('TOKEN_URI', ''),\n",
284-
" \"auth_provider_x509_cert_url\": os.getenv('AUTH_PROVIDER_X509_cert_URL', ''),\n",
285-
" \"client_x509_cert_url\": os.getenv('CLIENT_X509_CERT_URL', '')\n",
276+
" \"type\": Config.TYPE,\n",
277+
" \"project_id\": Config.PROJECT_ID,\n",
278+
" \"private_key_id\": Config.PRIVATE_KEY_ID,\n",
279+
" \"private_key\": Config.PRIVATE_KEY,\n",
280+
" \"client_email\": Config.CLIENT_EMAIL,\n",
281+
" \"client_id\": Config.CLIENT_ID,\n",
282+
" \"auth_uri\": Config.AUTH_URI,\n",
283+
" \"token_uri\": Config.TOKEN_URI,\n",
284+
" \"auth_provider_x509_cert_url\": Config.AUTH_PROVIDER_X509_cert_URL,\n",
285+
" \"client_x509_cert_url\": Config.CLIENT_X509_CERT_URL\n",
286286
" }\n",
287287
"\n",
288288
" file_path = os.path.join(os.getcwd(), \"nr_duplicates_report/data/\") + \"service_key.json\"\n",

0 commit comments

Comments
 (0)