Skip to content

Commit bb13c82

Browse files
committed
Update transformation to current lobid fix RPB-225
1 parent deedcd5 commit bb13c82

File tree

6 files changed

+53
-20
lines changed

6 files changed

+53
-20
lines changed

conf/hebisMarc2lobid-transformation/fix/describedBy.fix

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ nothing() # currently no transformation for describedBy is needed.
4141
# substring("@initialCataloguingDate","0","6")
4242
# end
4343
#
44-
# if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000
44+
# if any_match("@initialCataloguingDate","^[0-4]\\d(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])") # Assume dates from 2000-01-01 to 2049-12-31 ( e.g. matching 491231)
4545
# prepend("@initialCataloguingDate","20")
46-
# elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000
46+
# elsif any_match("@initialCataloguingDate","\\d{2}(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])") # Assume dates from 1900-01-01 to 1999-12-31 ( e.g. matching 991231)
4747
# prepend("@initialCataloguingDate","19")
4848
# else
4949
# copy_field("MNG .b","@initialCataloguingDate")
@@ -67,16 +67,18 @@ nothing() # currently no transformation for describedBy is needed.
6767
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
6868
# replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01")
6969
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01")
70+
# call_macro("leapYearChecker",date:"describedBy.resultOf.object.dateCreated")
71+
# call_macro("leapYearChecker",date:"describedBy.resultOf.object.dateModified")
7072
#
7173
# add_array("describedBy.resultOf.object.type[]", "DataFeedItem")
7274
#
7375
# copy_field("almaMmsId","describedBy.resultOf.object.label")
7476
# prepend("describedBy.resultOf.object.label","hbz-Ressource ")
7577
# append("describedBy.resultOf.object.label"," im Exportformat MARC21 XML")
7678
#
77-
# add_field("describedBy.resultOf.object.inDataset.id", "http://sru.hebis.de/sru/DB=2.1?version=1.1")
79+
# add_field("describedBy.resultOf.object.inDataset.id", "https://datahub.io/dataset/hbz_unioncatalog")
7880
#
79-
# add_field("describedBy.resultOf.object.inDataset.label", "Hebis SRU")
81+
# add_field("describedBy.resultOf.object.inDataset.label", "hbz_unioncatalog")
8082
#
8183
# add_array("describedBy.license[]")
8284
# add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" )
@@ -115,4 +117,3 @@ nothing() # currently no transformation for describedBy is needed.
115117
# end
116118
#
117119
# uniq("describedBy.resultOf.object.modifiedBy[]")
118-
#

conf/hebisMarc2lobid-transformation/fix/macros.fix

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,3 +585,22 @@ do put_macro("lobidResourcesFallbackLabel")
585585
end
586586
end
587587

588+
# validate leap years
589+
do put_macro("leapYearChecker")
590+
if any_match("$[date]","....-02-29")
591+
unless any_match("$[date]","(((18|19|20)(04|08|[2468][048]|[13579][26]))|2000)-02-29")
592+
replace_all("$[date]","(....-02)-29","$1-28")
593+
end
594+
end
595+
end
596+
597+
598+
# DE Sol1 Holding Tester excludes ZDB Records
599+
600+
do put_macro("deSol1BridgeTester")
601+
if any_match("$[holdingId]",".*7830$")
602+
unless exists("zdbId")
603+
add_field("$i.deSol1Bridge","true")
604+
end
605+
end
606+
end

conf/hebisMarc2lobid-transformation/fix/otherFields.fix

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,9 @@ replace_all("extent", " ", " ")
112112
# 500 - General Note (R) Subfield: $a (NR)
113113
add_array("note[]")
114114
do list(path:"500 ", "var": "$i")
115-
copy_field("$i.a", "note[].$append")
115+
unless any_contain("$i.a","In:")
116+
copy_field("$i.a", "note[].$append")
117+
end
116118
end
117119
uniq("note[]")
118120

@@ -130,6 +132,9 @@ do list(path:"520[ 23] ", "var": "$i")
130132
copy_field("$i.[ab]", "abstract[].$append")
131133
end
132134

135+
replace_all("abstract[].*","^<!\\[CDATA\\[(.*)\\]\\]>$","$1")
136+
replace_all("abstract[].*","<[\\/]?.{1,2}>","")
137+
133138
# 502 - Dissertation Note (R) Subfield: $a (R)
134139
add_array("thesisInformation[]")
135140
do list(path:"502 ", "var": "$i")

conf/hebisMarc2lobid-transformation/fix/relatedRessourcesAndLinks.fix

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ unless any_match("leader", "^.{7}[ad].*")
8686
do list(path: "773??", "var": "$i")
8787
unless any_equal("$i.9","LOCAL")
8888
do list(path: "$i.w", "var": "$j")
89-
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
89+
add_hash("isPartOf[].$append")
9090
add_array("isPartOf[].$last.hasSuperordinate[]")
9191
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
9292
if all_match("$j", "^\\((?:DE-600|DE-605)\\)(.*)$")
@@ -108,7 +108,7 @@ end
108108
# 490 with 1. Indicator 1 has an identical entry in 830. So only 490 with 1. Indicator 0
109109

110110
do list(path: "4900?", "var": "$i")
111-
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
111+
add_hash("isPartOf[].$append")
112112
add_array("isPartOf[].$last.hasSuperordinate[]")
113113
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
114114
add_array("isPartOf[].$last.hasSuperordinate[].$last.label")
@@ -130,18 +130,19 @@ end
130130
# Element can be repeatable with local entries they have subfield $M.
131131

132132
do list(path: "830??", "var": "$i")
133-
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
133+
add_hash("isPartOf[].$append")
134134
add_array("isPartOf[].$last.hasSuperordinate[]")
135135
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
136136
if all_match("$i.w", "^\\((?:DE-600|DE-605)\\)(.*)$")
137137
copy_field("$i.w", "isPartOf[].$last.hasSuperordinate[].$last.id")
138138
end
139-
add_array("isPartOf[].$last.hasSuperordinate[].$last.label")
140139
do list(path:"$i.a", "var":"$j")
141140
copy_field("$j", "isPartOf[].$last.hasSuperordinate[].$last.label.$append")
142141
end
143142
join_field("isPartOf[].$last.hasSuperordinate[].$last.label", " / ")
144-
copy_field("$i.v", "isPartOf[].$last.numbering")
143+
unless is_empty("isPartOf[].$last.hasSuperordinate[].1")
144+
copy_field("$i.v", "isPartOf[].$last.numbering")
145+
end
145146
end
146147

147148
do list(path: "4901?", "var": "$j")
@@ -164,7 +165,7 @@ if any_match("leader", "^.{7}[ad].*")
164165
do list(path: "773??", "var": "$i")
165166
unless any_equal("$i.9","LOCAL")
166167
do list(path: "$i.w", "var": "$j")
167-
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
168+
add_hash("isPartOf[].$append")
168169
add_array("isPartOf[].$last.hasSuperordinate[]")
169170
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
170171
if all_match("$j", "^\\((?:DE-600|DE-605)\\)(.*)$")
@@ -192,21 +193,26 @@ if any_match("leader", "^.{7}[ad].*")
192193
end
193194
end
194195

195-
do list(path: "isPartOf[].*.hasSuperordinate[]", "var": "$i")
196-
unless exists("$i.label")
197-
copy_field("@title", "$i.label")
196+
do list(path: "isPartOf[]","var":"$i")
197+
unless is_empty("$i.hasSuperordinate[].1")
198+
do list(path:"$i.hasSuperordinate[]", "var": "$j") ## This is the fallback for isPartOf[].*.hasSuperordinate[].*.label
199+
unless exists("$j.label")
200+
copy_field("@title", "$j.label")
201+
end
202+
end
203+
add_array("$i.type[]", "IsPartOfRelation")
198204
end
199205
end
200206

201207
replace_all("isPartOf[].*.hasSuperordinate[].*.id", "^\\(DE-605\\)(.*)$", "http://lobid.org/resources/$1#!")
202208
replace_all("isPartOf[].*.hasSuperordinate[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/resources/ZDB-$1#!")
203209

204210
replace_all("isPartOf[].*.numbering", "^[©]|\\s?[,.:;/=]?$", "")
205-
do list(path:"isPartOf[]","var":"$i")
206-
call_macro("lobidResourcesFallbackLabel",field:"$i.hasSuperordinate[]")
207-
end
211+
208212

209213
uniq("isPartOf[]")
214+
215+
210216
replace_all("containedIn[].*.id", "^\\(DE-605\\)(.*)$", "http://lobid.org/resources/$1#!")
211217
replace_all("containedIn[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/resources/ZDB-$1#!")
212218
replace_all("containedIn[].*.label","<<|>>","")

conf/hebisMarc2lobid-transformation/fix/titleRelatedFields.fix

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,8 @@ do list(path: "publication[]", "var": "$i")
326326
replace_all("$i.location[].*", "^\\[(.*)\\]$", "$1")
327327
replace_all("$i.location[].*", "\\s?[,:;]$", "")
328328
replace_all("$i.publishedBy[].*", "^[©]|\\s?[,:;/=]?$", "")
329+
call_macro("leapYearChecker",date:"$i.startDate")
330+
call_macro("leapYearChecker",date:"$i.endDate")
329331
uniq("$i.location[]")
330332
end
331333

conf/output/test-hebis-to-lobid-output-9.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@
2121
"label" : "OCLC Ressource"
2222
} ],
2323
"isPartOf" : [ {
24-
"type" : [ "IsPartOfRelation" ],
2524
"hasSuperordinate" : [ {
2625
"label" : "Beiträge zur Geschichte des Gau-Algesheimer Raumes"
2726
} ],
28-
"numbering" : "42"
27+
"numbering" : "42",
28+
"type" : [ "IsPartOfRelation" ]
2929
} ],
3030
"language" : [ {
3131
"id" : "http://id.loc.gov/vocabulary/iso639-2/ger",

0 commit comments

Comments
 (0)