Skip to content

Commit de0e4d5

Browse files
authored
Merge pull request #360 from MITLibraries/TIMX-410-provenance-mappings
TIMX 410 - add TIMDEX provenance to Opensearch mapping
2 parents 37c30fe + e5c3197 commit de0e4d5

File tree

6 files changed

+110
-43
lines changed

6 files changed

+110
-43
lines changed

Makefile

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,11 @@ publish-stage:
9696
# Local Opensearch commands
9797
##############################
9898

99-
local-opensearch: # Run a local instance of Opensearch via Docker Compose
100-
docker pull opensearchproject/opensearch:latest
101-
docker pull opensearchproject/opensearch-dashboards:latest
102-
docker compose --env-file .env up
99+
local-opensearch-start: # Start local instance of Opensearch
100+
docker compose --env-file .env up
101+
102+
local-opensearch-stop: # Stop local instance of Opensearch
103+
docker compose --env-file .env stop
104+
105+
local-opensearch-teardown: # Teardown local instance of Opensearch (includes data volume)
106+
docker compose --env-file .env down -v

compose.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
version: "3.8"
21
services:
32
opensearch:
43
image: opensearchproject/opensearch:latest

config/opensearch_mappings.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,32 @@
418418
"type": "text",
419419
"index": "false"
420420
},
421+
"timdex_provenance": {
422+
"type": "nested",
423+
"include_in_parent": "true",
424+
"properties": {
425+
"source": {
426+
"type": "keyword",
427+
"normalizer": "lowercase"
428+
},
429+
"run_date": {
430+
"type": "text",
431+
"fields": {
432+
"as_date": {
433+
"type": "date",
434+
"format": "yyyy-MM-dd"
435+
}
436+
}
437+
},
438+
"run_id": {
439+
"type": "keyword",
440+
"normalizer": "lowercase"
441+
},
442+
"run_record_offset": {
443+
"type": "long"
444+
}
445+
}
446+
},
421447
"title": {
422448
"type": "text",
423449
"fields": {

tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import vcr
33
from click.testing import CliRunner
44

5-
from tim.opensearch import configure_opensearch_client
5+
import tim.opensearch as tim_os
66

77
EXIT_CODES = {
88
"success": 0,
@@ -27,7 +27,7 @@ def _test_env(monkeypatch):
2727

2828
@pytest.fixture
2929
def test_opensearch_client():
30-
return configure_opensearch_client("localhost")
30+
return tim_os.configure_opensearch_client("localhost")
3131

3232

3333
@pytest.fixture

tests/fixtures/cassettes/opensearch/bulk_index_create_records.yaml

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ interactions:
1919
of publication\",\"value\":\"New York (State)\"}],\"notes\":[{\"value\":[\"Paquito
2020
d' Rivera, saxophone ; Paquito d' Rivera, soprano saxophone.\",\"Description
2121
based on hard copy version record.\"]}],\"physical_description\":\"1 online
22-
resource (1 sound file)\",\"publication_information\":[\"[New York, N.Y.] :
23-
Chesky Records, p2008.\"],\"source\":\"MIT Alma\",\"source_link\":\"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990026671500206761\",\"subjects\":[{\"value\":[\"Jazz.\",\"Latin
22+
resource (1 sound file)\",\"publishers\":[{\"name\":\"Chesky Records\",\"date\":\"2008\",\"location\":\"New
23+
York, N.Y.\"}],\"source\":\"MIT Alma\",\"source_link\":\"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990026671500206761\",\"subjects\":[{\"value\":[\"Jazz.\",\"Latin
2424
jazz.\",\"Clarinet music (Jazz)\",\"Saxophone music (Jazz)\"]}],\"timdex_record_id\":\"mit:alma:990026671500206761\",\"title\":\"Spice
25-
it up! the best of Paquito D'Rivera.\"}\n{\"index\":{\"_id\":\"mit:alma:990027672770206761\",\"_index\":\"test-index\"}}\n{\"call_numbers\":[\"TX724.5.A1\",\"641.595\"],\"content_type\":[\"Text\"],\"contents\":[\"Breakfast
25+
it up! the best of Paquito D'Rivera.\",\"timdex_provenance\":{\"source\":\"alma\",\"run_date\":\"2025-01-01\",\"run_id\":\"run-abc-123\",\"run_record_offset\":0}}\n{\"index\":{\"_id\":\"mit:alma:990027672770206761\",\"_index\":\"test-index\"}}\n{\"call_numbers\":[\"TX724.5.A1\",\"641.595\"],\"content_type\":[\"Text\"],\"contents\":[\"Breakfast
2626
-- Lunch & small eats -- Date night in -- Celebrations & gatherings -- On the
2727
side -- Sweet -- Drinks.\"],\"contributors\":[{\"kind\":\"author\",\"value\":\"McTernan,
2828
Cynthia Chen, author.\"}],\"dates\":[{\"kind\":\"Date of publication\",\"value\":\"2018\"}],\"edition\":\"First
@@ -32,9 +32,9 @@ interactions:
3232
(hardback)\"},{\"kind\":\"isbn\",\"value\":\"9781635650020 (hardback)\"},{\"kind\":\"oclc\",\"value\":\"1019737335\"},{\"kind\":\"oclc\",\"value\":\"1061147498\"},{\"kind\":\"lccn\",\"value\":\"2018287279\"}],\"languages\":[\"English\"],\"literary_form\":\"nonfiction\",\"locations\":[{\"kind\":\"Place
3333
of publication\",\"value\":\"New York (State)\"}],\"notes\":[{\"value\":[\"Cynthia
3434
Chen McTernan.\",\"Includes index.\"]}],\"physical_description\":\"285 pages
35-
: color illustrations ; 27 cm\",\"publication_information\":[\"New York : Rodale
36-
Books, an imprint of the Crown Publishing Group, a division of Penguin Random
37-
House LLC, [2018]\",\"\xA92018\"],\"source\":\"MIT Alma\",\"source_link\":\"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990027672770206761\",\"subjects\":[{\"value\":[\"Asian
35+
: color illustrations ; 27 cm\",\"publishers\":[{\"name\":\"Rodale Books, an
36+
imprint of the Crown Publishing Group, a division of Penguin Random House LLC\",\"date\":\"2018\",\"location\":\"New
37+
York\"},{\"date\":\"\xA92018\"}],\"source\":\"MIT Alma\",\"source_link\":\"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990027672770206761\",\"subjects\":[{\"value\":[\"Asian
3838
American cooking.\"]}],\"summary\":[\"In A Common Table, Two Red Bowls blogger
3939
Cynthia Chen McTernan shares more than 80 Asian-inspired, modern recipes that
4040
marry food from her Chinese roots, Southern upbringing, and Korean mother-in-law's
@@ -57,16 +57,17 @@ interactions:
5757
the food we make and eat is rarely the product of one culture or moment, but
5858
is richly interwoven--and though some dishes might seem new or different, they
5959
are often more alike than they appear. -- Amazon.\"],\"timdex_record_id\":\"mit:alma:990027672770206761\",\"title\":\"A
60-
common table : 80 recipes and stories from my shared cultures /\"}\n{\"index\":{\"_id\":\"mit:alma:9933052979806761\",\"_index\":\"test-index\"}}\n{\"call_numbers\":[\"SB351.P3\",\"633\"],\"content_type\":[\"Text\"],\"contributors\":[{\"kind\":\"contributor\",\"value\":\"American
60+
common table : 80 recipes and stories from my shared cultures /\",\"timdex_provenance\":{\"source\":\"alma\",\"run_date\":\"2025-01-01\",\"run_id\":\"run-abc-123\",\"run_record_offset\":1}}\n{\"index\":{\"_id\":\"mit:alma:9933052979806761\",\"_index\":\"test-index\"}}\n{\"call_numbers\":[\"SB351.P3\",\"633\"],\"content_type\":[\"Text\"],\"contributors\":[{\"kind\":\"contributor\",\"value\":\"American
6161
Peanut Research and Education Society.\"}],\"dates\":[{\"kind\":\"Date of publication\",\"value\":\"2005\"}],\"identifiers\":[{\"kind\":\"issn\",\"value\":\"1943-7668\"},{\"kind\":\"oclc\",\"value\":\"232113616\"},{\"kind\":\"lccn\",\"value\":\"2008202156\"}],\"languages\":[\"English\"],\"literary_form\":\"fiction\",\"locations\":[{\"kind\":\"Place
6262
of publication\",\"value\":\"Oklahoma\"}],\"notes\":[{\"value\":[\"Refereed/Peer-reviewed\",\"Electronic
6363
reproduction. [S.l.] : HathiTrust Digital Library, 2010.\",\"Latest issue consulted:
6464
Vol. 35, issue 1 (Jan./June 2008).\",\"Description based on print version record.\"]}],\"numbering\":\"Began
6565
with v. 32, issue 1 (Jan./June 2005).\",\"physical_description\":\"1 online
66-
resource\",\"publication_frequency\":[\"Semiannual\"],\"publication_information\":[\"Perkins,
67-
OK : American Peanut Research and Education Society\"],\"source\":\"MIT Alma\",\"source_link\":\"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma9933052979806761\",\"subjects\":[{\"value\":[\"Peanuts
66+
resource\",\"publication_frequency\":[\"Semiannual\"],\"publishers\":[{\"name\":\"American
67+
Peanut Research and Education Society\",\"location\":\"Perkins, OK\"}],\"source\":\"MIT
68+
Alma\",\"source_link\":\"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma9933052979806761\",\"subjects\":[{\"value\":[\"Peanuts
6869
Periodicals.\",\"(OCoLC)fst01055999 Peanuts.\"]}],\"timdex_record_id\":\"mit:alma:9933052979806761\",\"title\":\"Peanut
69-
science.\"}\n{\"index\":{\"_id\":\"mit:dspace:1721.1-113566\",\"_index\":\"test-index\"}}\n{\"citation\":\"Ranjram,
70+
science.\",\"timdex_provenance\":{\"source\":\"alma\",\"run_date\":\"2025-01-01\",\"run_id\":\"run-abc-123\",\"run_record_offset\":2}}\n{\"index\":{\"_id\":\"mit:dspace:1721.1-113566\",\"_index\":\"test-index\"}}\n{\"citation\":\"Ranjram,
7071
Mike K., Intae Moon, and David J. Perreault. 'Variable-Inverter-Rectifier-Transformer:
7172
A Hybrid Electronic and Magnetic Structure Enabling
 Adjustable High Step-Down
7273
Conversion Ratios.' 2017 IEEE Workshop on Control and Modeling for Power Electronics
@@ -102,7 +103,7 @@ interactions:
102103
9, 12V) validates the operating principle and modelling of the proposed structure
103104
and achieves conversion efficiencies between 93.4% and
 95.7% at 25-36 W.\"],\"timdex_record_id\":\"mit:dspace:1721.1-113566\",\"title\":\"Variable-Inverter-Rectifier-Transformer:
104105
A Hybrid Electronic and Magnetic Structure Enabling Adjustable High Step-Down
105-
Conversion Ratios\"}\n{\"index\":{\"_id\":\"mit:archivesspace:VC.0002\",\"_index\":\"test-index\"}}\n{\"citation\":\"Charles
106+
Conversion Ratios\",\"timdex_provenance\":{\"source\":\"alma\",\"run_date\":\"2025-01-01\",\"run_id\":\"run-abc-123\",\"run_record_offset\":3}}\n{\"index\":{\"_id\":\"mit:archivesspace:VC.0002\",\"_index\":\"test-index\"}}\n{\"citation\":\"Charles
106107
J. Connick Stained Glass Foundation Collection, VC-0002, box X. Massachusetts
107108
Institute of Technology, Department of Distinctive Collections, Cambridge, Massachusetts.\",\"content_type\":[\"Archival
108109
collection\"],\"contents\":[\"This collection is organized into ten series\",\"Series
@@ -163,7 +164,7 @@ interactions:
163164
ArchivesSpace\",\"source_link\":\"https://archivesspace.mit.edu/repositories/2/resources/1\",\"subjects\":[{\"kind\":\"LCSH\",\"value\":[\"Glass
164165
painting and staining\"]},{\"kind\":\"NAF\",\"value\":[\"Connick, Charles J.
165166
(Charles Jay)\"]}],\"timdex_record_id\":\"mit:archivesspace:VC.0002\",\"title\":\"Charles
166-
J. Connick Stained Glass Foundation Collection\"}\n{\"index\":{\"_id\":\"mit:alma:990011240870206761\",\"_index\":\"test-index\"}}\n{\"call_numbers\":[\"SB106.B56.C76
167+
J. Connick Stained Glass Foundation Collection\",\"timdex_provenance\":{\"source\":\"alma\",\"run_date\":\"2025-01-01\",\"run_id\":\"run-abc-123\",\"run_record_offset\":4}}\n{\"index\":{\"_id\":\"mit:alma:990011240870206761\",\"_index\":\"test-index\"}}\n{\"call_numbers\":[\"SB106.B56.C76
167168
2002\",\"631.5/233\"],\"contents\":[\"1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12.
168169
13. 14. 15. Overview of Crop Biotechnology / Defining Biotechnology: Increasingly
169170
Important and Increasingly Difficult / Genetically Modified Crop Approvals and
@@ -199,33 +200,34 @@ interactions:
199200
2002\",\"collection\":\"Off Campus Collection\",\"format\":\"Print volume\",\"location\":\"Library
200201
Storage Annex\"}],\"identifiers\":[{\"kind\":\"isbn\",\"value\":\"0841237662
201202
(alk. paper)\"},{\"kind\":\"oclc\",\"value\":\"49383680\"},{\"kind\":\"lccn\",\"value\":\"2002018690\"}],\"languages\":[\"English\"],\"links\":[{\"kind\":\"Hathi
202-
Trust\",\"url\":\"http://catalog.hathitrust.org/api/volumes/oclc/49383680.html\"},{\"kind\":\"unknown\",\"url\":\"http://dx.doi.org/10.1021/bk-2002-0829\"}],\"literary_form\":\"nonfiction\",\"locations\":[{\"geopoint\":[-77.025955,38.942142],\"kind\":\"Place
203+
Trust\",\"url\":\"http://catalog.hathitrust.org/api/volumes/oclc/49383680.html\"},{\"kind\":\"unknown\",\"url\":\"http://dx.doi.org/10.1021/bk-2002-0829\"}],\"literary_form\":\"nonfiction\",\"locations\":[{\"geoshape\":\"BBOX
204+
(-77.11806895668957,-76.90988990509905, 38.99435963428633, 38.79162154730547)\",\"kind\":\"Place
203205
of publication\",\"value\":\"District of Columbia\"}],\"notes\":[{\"value\":[\"K.
204206
Rajasekaran, editor, T.J. Jacks, editor, J.W. Finley, editor.\",\"\\\"Product
205207
of a 3-day symposium held during the 219th American Chemical Society (ACS) national
206208
meeting in San Francisco, California in 2000\\\"--P. x.\",\"Includes bibliographical
207209
references and indexes.\"]}],\"physical_description\":\"xi, 259 p. : ill. ;
208-
24 cm.\",\"publication_information\":[\"Washington, DC : American Chemical Society
209-
: Distributed by Oxford University Press, c2002.\"],\"related_items\":[{\"description\":\"ACS
210+
24 cm.\",\"publishers\":[{\"name\":\"American Chemical Society : Distributed
211+
by Oxford University Press\",\"date\":\"c2002\",\"location\":\"Washington, DC\"}],\"related_items\":[{\"description\":\"ACS
210212
symposium series ; 829.\",\"relationship\":\"In series\"}],\"source\":\"MIT
211213
Alma\",\"source_link\":\"https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990011240870206761\",\"subjects\":[{\"value\":[\"Plant
212214
biotechnology Congresses.\",\"Crops Congresses. Genetic engineering\"]}],\"timdex_record_id\":\"mit:alma:990011240870206761\",\"title\":\"Crop
213-
biotechnology /\"}\n"
215+
biotechnology /\",\"timdex_provenance\":{\"source\":\"alma\",\"run_date\":\"2025-01-01\",\"run_id\":\"run-abc-123\",\"run_record_offset\":5}}\n"
214216
headers:
215217
Content-Length:
216-
- '19794'
218+
- '20552'
217219
content-type:
218220
- application/json
219221
user-agent:
220-
- opensearch-py/2.0.0 (Python 3.10.6)
222+
- opensearch-py/2.8.0 (Python 3.12.2)
221223
method: POST
222224
uri: http://localhost:9200/_bulk
223225
response:
224226
body:
225-
string: '{"took":153,"errors":false,"items":[{"index":{"_index":"test-index","_type":"_doc","_id":"mit:alma:990026671500206761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_type":"_doc","_id":"mit:alma:990027672770206761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":1,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_type":"_doc","_id":"mit:alma:9933052979806761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":2,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_type":"_doc","_id":"mit:dspace:1721.1-113566","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":3,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_type":"_doc","_id":"mit:archivesspace:VC.0002","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":4,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_type":"_doc","_id":"mit:alma:990011240870206761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":5,"_primary_term":1,"status":201}}]}'
227+
string: '{"took":40,"errors":false,"items":[{"index":{"_index":"test-index","_id":"mit:alma:990026671500206761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_id":"mit:alma:990027672770206761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":1,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_id":"mit:alma:9933052979806761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":2,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_id":"mit:dspace:1721.1-113566","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":3,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_id":"mit:archivesspace:VC.0002","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":4,"_primary_term":1,"status":201}},{"index":{"_index":"test-index","_id":"mit:alma:990011240870206761","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":5,"_primary_term":1,"status":201}}]}'
226228
headers:
227229
content-length:
228-
- '1278'
230+
- '1187'
229231
content-type:
230232
- application/json; charset=UTF-8
231233
status:
@@ -239,7 +241,7 @@ interactions:
239241
content-type:
240242
- application/json
241243
user-agent:
242-
- opensearch-py/2.0.0 (Python 3.10.6)
244+
- opensearch-py/2.8.0 (Python 3.12.2)
243245
method: POST
244246
uri: http://localhost:9200/test-index/_refresh
245247
response:

0 commit comments

Comments
 (0)