-
Notifications
You must be signed in to change notification settings - Fork 1
/
update-search-index.py
86 lines (65 loc) · 2.78 KB
/
update-search-index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import json
import os
from algoliasearch.search_client import SearchClient
def read_local_index(file_location):
return {item["objectID"]: item for item in json.load(open(file_location, "r"))}
def read_index(index):
res = index.browse_objects({"query": "", "attributesToRetrieve": ["*"]})
res = {item["objectID"]: item for item in res}
return res
def count_index(index):
res = index.search('')
return res["nbHits"]
def free_space(index, posts, delete_n):
hits = sorted(posts.values(), key=lambda hit: hit["created_at"])
oldest_n = [hit["objectID"] for hit in hits[:delete_n]]
res = index.delete_objects(oldest_n)
client = SearchClient.create(os.getenv("ALGOLIA_APP_ID"), os.getenv("ALGOLIA_ADMIN_KEY"))
local_articles = read_local_index("public/algolia.json")
remote_articles = read_index(client.init_index("support.mysurvey.solutions"))
posts_index = client.init_index("discourse-posts")
posts = read_index(posts_index)
total_objects = 0
total_objects += count_index(client.init_index("discourse-tags"))
total_objects += count_index(client.init_index("discourse-users"))
total_objects += len(list(remote_articles))
total_objects += len(list(posts))
local_ids = set(local_articles.keys())
remote_ids = set(remote_articles.keys())
intersection = local_ids.intersection(remote_ids)
to_be_ignored = [id for id in intersection if local_articles[id] == remote_articles[id]]
to_be_updated = intersection.difference(to_be_ignored)
to_be_added = local_ids.difference(remote_ids)
to_be_deleted = remote_ids.difference(local_ids)
print(f"Remote articles {len(remote_ids)}")
print(f"Local articles {len(local_ids)}")
print(f"Articles to be added {len(to_be_added)}")
print(f"Articles to be deleted {len(to_be_deleted)}")
print(f"Articles to be updated {len(to_be_updated)}")
print(f"Unchanged articles {len(to_be_ignored)}")
print(f"Current quota usage {total_objects}")
quota = 10000
reserve_n = 300 # we want to have reserve
posts_to_delete = total_objects - quota + reserve_n + len(to_be_added) - len(to_be_deleted)
if posts_to_delete > 0:
print(f"Deleteing {posts_to_delete} posts")
free_space(posts_index, posts, posts_to_delete)
batch_operations = [{
"action": "deleteObject",
"indexName": "support.mysurvey.solutions",
"body": {"objectID": id}
} for id in to_be_deleted]
# delete objects first so that we don run over the quota
client.multiple_batch(batch_operations)
batch_operations = [{
"action": "addObject",
"indexName": "support.mysurvey.solutions",
"body": local_articles[id]
} for id in to_be_added]
for id in to_be_updated:
batch_operations.append({
"action": "updateObject",
"indexName": "support.mysurvey.solutions",
"body": local_articles[id]
})
client.multiple_batch(batch_operations)