Skip to content

Commit 1998c95

Browse files
committed
Create lookup file maps from Strapi, not Allegro data (RPB-154)
1 parent 392021c commit 1998c95

9 files changed

+14681
-14676
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ RPB-Export_HBZ_SWN.txt
2222
RPB-Export_HBZ_Syst.txt
2323
RPB-Export_HBZ_ZSS.txt
2424
conf/RPBEXP/*.ZIP
25+
conf/strapi-export.tar.gz
2526
nohup.out*

conf/maps/gndId-to-rppdId.tsv

Lines changed: 13541 additions & 13539 deletions
Large diffs are not rendered by default.

conf/maps/rppdId-with-label.tsv

Lines changed: 1124 additions & 1124 deletions
Large diffs are not rendered by default.

conf/rppd-rppdId-with-label-map.fix

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ unless any_contain("gndIdentifier", "Keine")
22
reject()
33
end
44

5-
copy_field("rppdId", "rppdUri")
6-
prepend("rppdUri", "https://rppd.lobid.org/")
5+
copy_field("rppdId", "uri")
6+
prepend("uri", "https://rppd.lobid.org/")
77
replace_all("preferredName", "\\/\\s(ca\\.|um)?-?\\s?\\d.+$", "")
88
trim("preferredName")
99

10-
retain("rppdId", "rppdUri", "preferredName")
10+
retain("rppdId", "preferredName", "uri")

conf/rppd-rppdId-with-label-map.flux

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
FLUX_DIR + "output/output-rppd-strapi.ndjson"
1+
FLUX_DIR + "output/rppd-export.jsonl"
22
| open-file
33
| as-lines
4-
| decode-json
4+
| decode-json(recordPath="data")
55
| fix(FLUX_DIR + "rppd-rppdId-with-label-map.fix")
66
| encode-csv(includeheader="true", noquotes="true",separator="\t")
77
| write(FLUX_DIR + "maps/rppdId-with-label.tsv")

conf/rppd-to-gnd-map.fix

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ if any_contain("gndIdentifier", "Keine")
22
reject()
33
end
44

5-
prepend("gndIdentifier", "https://d-nb.info/gnd/")
5+
copy_field("gndIdentifier", "uri")
6+
prepend("uri", "https://d-nb.info/gnd/")
67
replace_all("preferredName", "\\/\\s(ca\\.|um)?-?\\s?\\d.+$", "")
78
trim("preferredName")
8-
retain("rppdId", "gndIdentifier", "preferredName")
9+
10+
retain("rppdId", "preferredName", "uri")

conf/rppd-to-gnd-mapping.flux

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
FLUX_DIR + "output/output-rppd-strapi.ndjson"
1+
FLUX_DIR + "output/rppd-export.jsonl"
22
| open-file
33
| as-lines
4-
| decode-json
4+
| decode-json(recordPath="data")
55
| fix(FLUX_DIR + "rppd-to-gnd-map.fix")
66
| encode-csv(includeheader="true", noquotes="true",separator="\t")
77
| write(FLUX_DIR + "maps/gndId-to-rppdId.tsv")

conf/rppd-to-lobid.fix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ do once("map")
2626
put_filemap("conf/maps/gndGeographicName.tsv", "gnd_spatial_map", key_column:"0", value_column:"1", sep_char: "\t", expected_columns:"2")
2727

2828
# maps für lookup relatedPerson
29-
put_filemap("conf/maps/gndId-to-rppdId.tsv", "map_rel_preferredName",key_column:"1",value_column:"2", sep_char: "\t", expected_columns:"-1")
29+
put_filemap("conf/maps/gndId-to-rppdId.tsv", "map_rel_preferredName",key_column:"2",value_column:"1", sep_char: "\t", expected_columns:"-1")
3030
put_filemap("conf/maps/rppdId-with-label.tsv", "map_rel_rppdLabel",key_column:"0",value_column:"1", sep_char: "\t", expected_columns:"-1")
3131

3232
# maps für depiction

transformRppd.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ set -u
33

44
bash transformBeacons.sh
55
rm conf/output/bulk/rppd/*
6-
sbt "runMain rpb.ETL conf/rppd-to-strapi.flux IN_FILE=RPB-Export_HBZ_Bio.txt OUT_FILE=output-rppd-strapi.ndjson"
7-
sbt "runMain rpb.ETL conf/rppd-to-gnd-mapping.flux"
8-
sbt "runMain rpb.ETL conf/rppd-rppdId-with-label-map.flux"
96
# Here, we used to import Allegro data:
7+
# sbt "runMain rpb.ETL conf/rppd-to-strapi.flux IN_FILE=RPB-Export_HBZ_Bio.txt OUT_FILE=output-rppd-strapi.ndjson"
108
# sbt "runMain rpb.ETL conf/rppd-to-lobid.flux"
119
# But now we use the Strapi export:
1210
zgrep -a '"type":"api::person.person"' conf/strapi-export.tar.gz > conf/output/rppd-export.jsonl
11+
sbt "runMain rpb.ETL conf/rppd-to-gnd-mapping.flux"
12+
sbt "runMain rpb.ETL conf/rppd-rppdId-with-label-map.flux"
1313
sbt "runMain rpb.ETL conf/rppd-to-lobid.flux IN_FILE=rppd-export.jsonl RECORD_PATH=data"
1414

1515
# Indexing happens in rppd/transformAndIndexRppd.sh (lobid-gnd repo, branch 'rppd'), which calls this script

0 commit comments

Comments
 (0)