Skip to content

Commit b49b645

Browse files
authored
CMR-7185 Fixing autocomplete functionality by adding platform hierarc… (#1473)
* CMR-7185 Fixing autocomplete functionality by adding platform hierarchy and allowing skipped fields in science keywords and platforms. * CMR-7185 fixing api documentation.
1 parent 5721b5e commit b49b645

File tree

16 files changed

+783
-366
lines changed

16 files changed

+783
-366
lines changed

common-lib/src/cmr/common/util.clj

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,3 +1024,11 @@
10241024
(= "Statute Miles" unit) (* value statute-miles->meters-conversion-factor)
10251025
(= "Nautical Miles" unit) (* value nautical-miles->meters-conversion-factor)
10261026
:else value))
1027+
1028+
(defn remove-nil-tail
1029+
"Remove trailing nils from a list or vector."
1030+
[coll]
1031+
(loop [x coll]
1032+
(if (or (empty? x) (last x))
1033+
x
1034+
(recur (drop-last x)))))

common-lib/test/cmr/common/test/util.clj

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,3 +877,34 @@
877877
nil
878878
nil
879879
nil))
880+
881+
(deftest remove-nil-tail-test
882+
(util/are3 [input expected]
883+
(is (= expected (util/remove-nil-tail input)))
884+
885+
"No nils"
886+
[:a :b :c] [:a :b :c]
887+
888+
"Trailing nils"
889+
[:a :b :c nil nil nil] [:a :b :c]
890+
891+
"Mixed nils"
892+
[:a nil :b nil :c nil nil nil] [:a nil :b nil :c]
893+
894+
"Leading nils"
895+
[nil :a :b] [nil :a :b]
896+
897+
"All nils"
898+
[nil nil nil] []
899+
900+
"Single nil"
901+
[nil] []
902+
903+
"Empty collection"
904+
[] []
905+
906+
"List"
907+
'(:a nil :b nil :c nil nil) '(:a nil :b nil :c)
908+
909+
"nil as input"
910+
nil nil))

dev-system/resources/kms_examples/platforms

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ Basis,Category,Sub_Category,Short_Name,Long_Name,UUID
1212
"Space-based Platforms","Earth Observation Satellites","Defense Meteorological Satellite Program(DMSP)","DMSP 5B/F3","Defense Meteorological Satellite Program-F3","7ed12e98-95b1-406c-a58a-f4bbfa405269"
1313
"Space-based Platforms","Earth Observation Satellites","SMAP-like","SMAP","Soil Moisture Active and Passive Observatory","7ee03239-24ff-433e-ab7e-8be8b9b2636b"
1414
"Space-based Platforms","Earth Observation Satellites","","Aqua","Earth Observing System, Aqua","ea7fd15d-190d-43f3-bdd3-75f5d88dc3f8"
15+
"Space-based Platforms","Earth Observation Satellites","","Terra","Earth Observing System, Terra (AM-1)","80eca755-c564-4616-b910-a4c4387b7c54"
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
(ns cmr.indexer.services.autocomplete
2+
"Provide functions to index concept"
3+
(:require
4+
[camel-snake-kebab.core :as camel-snake-kebab]
5+
[clj-time.core :refer [now]]
6+
[clojure.string :as string]
7+
[cmr.common.config :refer [defconfig]]
8+
[cmr.common.log :as log :refer [debug info warn error]]
9+
[cmr.common.util :as util :refer [defn-timed]]
10+
[cmr.indexer.data.concept-parser :as cp]
11+
[cmr.indexer.data.concepts.collection.collection-util :as collection-util]
12+
[cmr.indexer.data.concepts.collection.humanizer :as humanizer]
13+
[cmr.indexer.data.elasticsearch :as es]
14+
[cmr.indexer.data.index-set :as idx-set]
15+
[cmr.indexer.services.index-service :as service]
16+
[cmr.transmit.metadata-db :as meta-db]
17+
[cmr.transmit.search :as search]))
18+
19+
(defconfig autocomplete-suggestion-age-limit
20+
"Age in hours that we allow autocomplete suggestions to persist to avoid stale data."
21+
{:type Long
22+
:default 24})
23+
24+
(defn- keywords->elastic-docs
25+
"Convert hierarchical keywords to colon-separated elastic docs for indexing.
26+
The keywords may not be hierarchical all the way to the end - some can be skipped to the last
27+
keyword and may be nil."
28+
[index type keywords keyword-hierarchy public-collection? permitted-group-ids modified-date]
29+
(when (and (map? keywords)
30+
(pos? (count keywords)))
31+
(let [k-strings (->> keyword-hierarchy
32+
(map #(get keywords %))
33+
(util/remove-nil-tail))
34+
keyword-string (string/join ":" k-strings)
35+
keyword-value (last k-strings)
36+
id (-> (string/lower-case keyword-string)
37+
(str "_" type)
38+
hash)]
39+
{:_id id
40+
:type type
41+
:value keyword-value
42+
:fields keyword-string
43+
:_index index
44+
:contains-public-collections public-collection?
45+
:permitted-group-ids permitted-group-ids
46+
:modified modified-date})))
47+
48+
(defn- science-keywords->elastic-docs
49+
"Convert hierarchical science-keywords to colon-separated elastic docs for indexing.
50+
Below 'term', variable may not be hierarchical - they can be skipped - and may be nil."
51+
[index science-keywords public-collection? permitted-group-ids modified-date]
52+
(let [keyword-hierarchy [:topic
53+
:term
54+
:variable-level-1
55+
:variable-level-2
56+
:variable-level-3
57+
:detailed-variable]
58+
type "science_keywords"]
59+
(keywords->elastic-docs index
60+
type
61+
science-keywords
62+
keyword-hierarchy
63+
public-collection?
64+
permitted-group-ids
65+
modified-date)))
66+
67+
(defn- platform-keywords->elastic-docs
68+
"Convert hierarchical platform keywords to colon-separated elastic docs for indexing.
69+
Below 'category', the keywords may not be hierarchical - sub-category can be skipped - and may be
70+
nil."
71+
[index platform-keywords public-collection? permitted-group-ids modified-date]
72+
(let [keyword-hierarchy [:basis :category :sub-category :short-name]
73+
type "platforms"]
74+
(keywords->elastic-docs index
75+
type
76+
platform-keywords
77+
keyword-hierarchy
78+
public-collection?
79+
permitted-group-ids
80+
modified-date)))
81+
82+
(defn- suggestion-doc
83+
"Creates elasticsearch docs from a given humanized map"
84+
[index permissions key-name value-map]
85+
(let [values (->> value-map
86+
seq
87+
(remove #(string/includes? (name (key %)) "-lowercase")))
88+
sk-matcher (re-matcher #"science-keywords" key-name)
89+
platform-matcher (re-matcher #"platforms2-humanized" key-name)
90+
public-collection? (if (some #(= % "guest") permissions)
91+
true
92+
false)
93+
permitted-group-ids (->> permissions
94+
(remove #(= "guest" %))
95+
(string/join ",")
96+
not-empty)
97+
modified-date (str (now))]
98+
(cond
99+
(seq (re-find sk-matcher))
100+
(science-keywords->elastic-docs index
101+
value-map
102+
public-collection?
103+
permitted-group-ids
104+
modified-date)
105+
106+
(seq (re-find platform-matcher))
107+
(platform-keywords->elastic-docs index
108+
value-map
109+
public-collection?
110+
permitted-group-ids
111+
modified-date)
112+
113+
:else
114+
(map (fn [value]
115+
(let [v (val value)
116+
type (-> key-name
117+
camel-snake-kebab/->snake_case_keyword
118+
(string/replace #"_humanized|:" ""))
119+
id (-> (string/lower-case v)
120+
(str "_" type)
121+
hash)]
122+
{:type type
123+
:_id id
124+
:value v
125+
:fields v
126+
:_index index
127+
:contains-public-collections public-collection?
128+
:permitted-group-ids permitted-group-ids
129+
:modified modified-date}))
130+
values))))
131+
132+
(defn- get-suggestion-docs
133+
"Given the humanized fields from a collection, assemble an elastic doc for each
134+
value available for indexing into elasticsearch"
135+
[index humanized-fields]
136+
(let [{:keys [permissions]} humanized-fields
137+
fields-without-permissions (dissoc humanized-fields :id :permissions)]
138+
(for [humanized-field fields-without-permissions
139+
:let [key (key humanized-field)
140+
key-name (-> key
141+
name
142+
(string/replace #"(\.humanized(_?2)?|-sn|-id)" ""))
143+
value-map (as-> humanized-field h
144+
(val h)
145+
(map util/remove-nil-keys h)
146+
(map #(dissoc % :priority) h))
147+
suggestion-docs (->> value-map
148+
(map #(suggestion-doc index permissions key-name %))
149+
(remove nil?))]]
150+
suggestion-docs)))
151+
152+
(defn- anti-value?
153+
"Returns whether or not the term is an anti-value. e.g. \"not applicable\" or \"not provided\".
154+
This is case-insensitive"
155+
[term]
156+
(let [rx (re-pattern #"(none|not (provided|applicable))")]
157+
(or (string/blank? term)
158+
(some? (re-find rx (string/lower-case term))))))
159+
160+
(defn anti-value-suggestion?
161+
"Returns whether an autocomplete suggestion has an anti-value as the :value
162+
See also [[anti-value?]]"
163+
[suggestion]
164+
(let [{:keys [value]} suggestion]
165+
(anti-value? value)))
166+
167+
(defn- parse-collection
168+
"Parses collection into concepts. Returns nil on error."
169+
[context collection]
170+
(try
171+
(cp/parse-concept context collection)
172+
(catch Exception e
173+
(error (format "An error occurred while parsing collection for autocomplete with concept-id [%s]: %s"
174+
(:concept-id collection)
175+
(.getMessage e))))))
176+
177+
(defn- get-humanized-collections
178+
"Get the humanized fields for the passed in parsed-concept and remove the old flat platform
179+
since we don't support those facets anymore for autocomplete."
180+
[context collection]
181+
(dissoc (humanizer/collection-humanizers-elastic context collection) :platform-sn-humanized))
182+
183+
(defn- collections->suggestion-docs
184+
"Convert collection concept metadata to UMM-C and pull facet fields
185+
to be indexed as autocomplete suggestion doc"
186+
[context collections provider-id]
187+
(let [{:keys [index-names]} (idx-set/get-concept-type-index-names context)
188+
index (get-in index-names [:autocomplete :autocomplete])
189+
humanized-fields-fn (partial get-humanized-collections context)
190+
parsed-concepts (->> collections
191+
(remove :deleted)
192+
(map #(parse-collection context %))
193+
(remove nil?))
194+
collection-permissions (map (fn [collection]
195+
(let [permissions (collection-util/get-coll-permitted-group-ids context provider-id collection)]
196+
{:id (:concept-id collection)
197+
:permissions permissions}))
198+
collections)
199+
humanized-fields (map humanized-fields-fn parsed-concepts)
200+
humanized-fields-with-permissions (map merge collection-permissions humanized-fields)]
201+
(->> humanized-fields-with-permissions
202+
(map #(get-suggestion-docs index %))
203+
flatten
204+
(remove anti-value-suggestion?))))
205+
206+
(defn-timed reindex-autocomplete-suggestions-for-provider
207+
"Reindex autocomplete suggestion for a given provider"
208+
[context provider-id]
209+
(info "Reindexing autocomplete suggestions for provider" provider-id)
210+
(let [latest-collection-batches (meta-db/find-in-batches
211+
context
212+
:collection
213+
service/REINDEX_BATCH_SIZE
214+
{:provider-id provider-id :latest true})]
215+
(reduce (fn [num-indexed coll-batch]
216+
(let [batch (collections->suggestion-docs context coll-batch provider-id)]
217+
(es/bulk-index-autocomplete-suggestions context batch)
218+
(+ num-indexed (count coll-batch))))
219+
0
220+
latest-collection-batches)))
221+
222+
(defn prune-stale-autocomplete-suggestions
223+
"Delete any autocomplete suggestions that were modified outside the retention period."
224+
[context]
225+
(info (format "Pruning autocomplete suggestions older than %d hours."
226+
(autocomplete-suggestion-age-limit)))
227+
(let [{:keys [index-names]} (idx-set/get-concept-type-index-names context)
228+
index (get-in index-names [:autocomplete :autocomplete])
229+
concept-mapping-types (idx-set/get-concept-mapping-types context)
230+
mapping-type (concept-mapping-types :collection)
231+
document-age (format "now-%dh/h" (autocomplete-suggestion-age-limit))]
232+
(es/delete-by-query
233+
context
234+
index
235+
mapping-type
236+
{:range {(service/query-field->elastic-field :modified :suggestion) {:lt document-age}}})))

indexer-app/src/cmr/indexer/services/event_handler.clj

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
[cmr.indexer.config :as config]
77
[cmr.indexer.data.collection-granule-aggregation-cache :as cgac]
88
[cmr.indexer.data.concepts.deleted-granule :as deleted-granule]
9+
[cmr.indexer.services.autocomplete :as autocomplete]
910
[cmr.indexer.services.index-service :as indexer]
1011
[cmr.message-queue.queue.queue-protocol :as queue-protocol]))
1112

@@ -28,7 +29,11 @@
2829

2930
(defmethod handle-provider-event :provider-autocomplete-suggestion-reindexing
3031
[context {:keys [provider-id]}]
31-
(indexer/reindex-autocomplete-suggestions-for-provider context provider-id))
32+
(autocomplete/reindex-autocomplete-suggestions-for-provider context provider-id))
33+
34+
(defmethod handle-provider-event :autocomplete-suggestion-prune
35+
[context _]
36+
(autocomplete/prune-stale-autocomplete-suggestions context))
3237

3338
(defmethod handle-provider-event :refresh-collection-granule-aggregation-cache
3439
[context {:keys [granules-updated-in-last-n]}]
@@ -114,4 +119,3 @@
114119
(queue-protocol/subscribe queue-broker
115120
(config/deleted-granule-index-queue-name)
116121
#(handle-ingest-event context true %)))))
117-

0 commit comments

Comments
 (0)