Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions app/models/child_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ def checksum_matches?
sha512_checksum == access_sha512_checksum
# goobi
elsif checksum.present?
checksum == Digest::SHA1.file(access_primary_path).to_s
checksum == access_sha1_checksum
# ladybird
elsif sha256_checksum.present?
sha256_checksum == Digest::SHA256.file(access_primary_path).to_s
sha256_checksum == access_sha256_checksum
# ladybird
elsif md5_checksum.present?
md5_checksum == Digest::MD5.file(access_primary_path).to_s
md5_checksum == access_md5_checksum
else
false
end
Expand All @@ -130,6 +130,18 @@ def access_sha512_checksum
Digest::SHA512.file(access_primary_path).to_s
end

def access_sha256_checksum
Digest::SHA256.file(access_primary_path).to_s
end

def access_sha1_checksum
Digest::SHA1.file(access_primary_path).to_s
end

def access_md5_checksum
Digest::MD5.file(access_primary_path).to_s
end

def access_file_size
File.exist?(access_primary_path) ? File&.size(access_primary_path) : nil
end
Expand Down
42 changes: 27 additions & 15 deletions app/models/concerns/updatable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ def update_child_objects_caption
attach_item(parent_object)
add_admin_set_to_bp(sets, parent_object)
save!
child_object.caption = row['caption'] unless row['caption'].nil?
child_object.label = row['label'] unless row['label'].nil?
processed_fields = validate_child_field(child_object, row)
child_object.update!(processed_fields)
processing_event_for_child(child_object)
Expand Down Expand Up @@ -166,12 +164,16 @@ def remove_child_blanks(row, child_object)

# CHECKS CHILD OBJECT ATTRIBUTES
def validate_child_field(child_object, row)
fields = ['caption', 'label', 'sha512_checksum', 'sha256_checksum', 'checksum', 'md5_checksum']
fields = ['caption', 'label']
checksum_fields = { 'sha512' => 'sha512_checksum', 'sha256' => 'sha256_checksum', 'sha1' => 'checksum', 'md5' => 'md5_checksum' }
row, blanks = remove_child_blanks(row, child_object)
processed_fields = {}
fields.each do |f|
processed_fields[f.to_sym] = valid_regular_child_fields(row, f, child_object)
end
checksum_fields.each do |k, v|
processed_fields[v.to_sym] = valid_checksum_child_fields(row, k, v, child_object)
end
processed_fields.merge!(blanks)
processed_fields
end
Expand Down Expand Up @@ -235,22 +237,13 @@ def update_child_objects_checksum
attach_item(parent_object)
add_admin_set_to_bp(sets, parent_object)
save!
child_object.sha512_checksum = row['sha512'] unless row['sha512'].nil?
child_object.sha256_checksum = row['sha256'] unless row['sha256'].nil?
child_object.checksum = row['sha1'] unless row['sha1'].nil?
child_object.md5_checksum = row['md5'] unless row['md5'].nil?
processed_fields = validate_child_field(child_object, row)
child_object.update!(processed_fields)
processing_event_for_child(child_object)
processed_child_objects_count += 1
end
unique_po = po_arr.uniq(&:oid)
unique_po.each do |parent_object|
trigger_setup_metadata(parent_object)
processing_event_for_parent(parent_object)
end
batch_processing_event("#{processed_child_objects_count} child objects updated.", "Complete")
batch_processing_event("Child objects that were not updated: [#{child_objects_not_updated}].", "Complete")
batch_processing_event("Child objects that were not updated: #{child_objects_not_updated}.", "Complete")
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/CyclomaticComplexity
Expand Down Expand Up @@ -288,7 +281,7 @@ def remote_po_path(oid, metadata_source)
"#{metadata_source}/#{oid}.json"
end

# CHECKS PARENT OBJECT PERMITTED ATTRIBUTES
# CHECKS PARENT OBJECT REGULAR FIELDS
def valid_regular_fields(row, field_value, parent_object)
if row[field_value].present? && row[field_value] != parent_object.send(field_value)
row[field_value]
Expand All @@ -297,7 +290,7 @@ def valid_regular_fields(row, field_value, parent_object)
end
end

# CHECKS CHILD OBJECT PERMITTED ATTRIBUTES
# CHECKS CHILD OBJECT REGULAR FIELDS
def valid_regular_child_fields(row, field_value, child_object)
if row[field_value].present? && row[field_value] != child_object.send(field_value)
row[field_value]
Expand All @@ -306,6 +299,18 @@ def valid_regular_child_fields(row, field_value, child_object)
end
end

# CHECKS CHILD OBJECT CHECKSUM FIELDS
def valid_checksum_child_fields(row, column_name, attribute, child_object)
if row[column_name].present? && row[column_name] != child_object.send(attribute) && (child_object.send("access_#{attribute}") == row[column_name])
row[column_name]
elsif row[column_name].present? && row[column_name] != child_object.send(attribute) && (child_object.send("access_#{attribute}") != row[column_name])
process_invalid_checksum_event(column_name, child_object)
child_object.send("access_#{attribute}")
else
child_object.send(attribute)
end
end

# CHECKS CONTROLLED VOCABULARY
def valid_controlled_vocab_fields(row, column_name, vocab, parent_object)
if row[column_name].present? && row[column_name] != parent_object.send(column_name) && (ParentObject.send(vocab).include? row[column_name])
Expand Down Expand Up @@ -334,6 +339,13 @@ def process_invalid_vocab_event(column_name, row_value, oid)
end
# rubocop:enable Layout/LineLength

def process_invalid_checksum_event(column_name, child_object)
child_object.current_batch_process = self
child_object.current_batch_connection = batch_connections.find_or_create_by(connectable: child_object)
child_object.current_batch_connection.save!
child_object.processing_event("Child #{child_object.oid} was updated with the #{column_name} checksum value, read from the access primary original image file.", 'update-complete')
end

# KICKS OFF SETUP METADATA JOB AND ATTACHES BATCH PROCESS TO PARENT
def trigger_setup_metadata(parent_object)
parent_object.current_batch_process = self
Expand Down
4 changes: 4 additions & 0 deletions spec/fixtures/csv/checksum_child_object_valid.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
oid,sha512,sha256,sha1,md5
10736292,ddfcbb8f70ba901e979acbe0c5a716e2cb1784dae560ea396471a277caa4ce58f796adb6d64cbbdb3be3d2a2c436bc26c45b878e33a7f083c3b72272e01595b0
67890,ddfcbb8f70ba901e979acbe0c5a716e2cb1784dae560ea396471a277caa4ce58f796adb6d64cbbdb3be3d2a2c436bc26c45b878e33a7f083c3b72272e01595b0
12,ddfcbb8f70ba901e979acbe0c5a716e2cb1784dae560ea396471a277caa4ce58f796adb6d64cbbdb3be3d2a2c436bc26c45b878e33a7f083c3b72272e01595b0
Binary file not shown.
Binary file not shown.
Binary file not shown.
61 changes: 54 additions & 7 deletions spec/models/batch_process_update_child_objects_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,25 @@
let(:role) { FactoryBot.create(:role, name: editor) }
let(:caption_label_csv_upload) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "update_child_object_caption.csv")) }
let(:caption_label_csv_blank_value_upload) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "update_child_object_blank.csv")) }
let(:checksum_csv_upload) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "checksum_child_object.csv")) }
let(:checksum_csv_valid_upload) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "checksum_child_object_valid.csv")) }
let(:checksum_csv_invalid_upload) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "checksum_child_object_invalid.csv")) }
let(:checksum_csv_blank_value_upload) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "checksum_child_object_blank.csv")) }
let(:parent_object) { FactoryBot.create(:parent_object, oid: 2_002_826, admin_set_id: admin_set.id) }
let(:initial_fixity_value) { FFaker::Number.number(digits: 20) }
let(:tif_sha512_fixity_value) { "ddfcbb8f70ba901e979acbe0c5a716e2cb1784dae560ea396471a277caa4ce58f796adb6d64cbbdb3be3d2a2c436bc26c45b878e33a7f083c3b72272e01595b0" }
let(:child_object) { FactoryBot.create(:child_object, oid: 10_736_292, caption: "caption", label: "label", parent_object: parent_object, sha512_checksum: initial_fixity_value) }
let(:child_object_2) { FactoryBot.create(:child_object, oid: 67_890, caption: "co2 caption", label: "co2 label", parent_object: parent_object, sha512_checksum: initial_fixity_value) }
let(:child_object_3) { FactoryBot.create(:child_object, oid: 12, caption: "co3 caption", label: "co3 label", parent_object: parent_object, sha512_checksum: initial_fixity_value) }

around do |example|
perform_enqueued_jobs do
original_path_ocr = ENV['OCR_DOWNLOAD_BUCKET']
access_host = ENV['ACCESS_PRIMARY_MOUNT']
ENV['OCR_DOWNLOAD_BUCKET'] = "yul-dc-ocr-test"
ENV['ACCESS_PRIMARY_MOUNT'] = File.join("spec", "fixtures", "images", "access_primaries")
example.run
ENV['OCR_DOWNLOAD_BUCKET'] = original_path_ocr
ENV['ACCESS_PRIMARY_MOUNT'] = access_host
end
end

Expand Down Expand Up @@ -105,19 +110,61 @@

with_versioning do
it "can update child checksum based on csv import" do
# begin test with incorrect fixity values saved to the child object
expect(child_object.sha512_checksum).to eq initial_fixity_value.to_s
expect(child_object_2.sha512_checksum).to eq initial_fixity_value.to_s
expect(child_object_3.sha512_checksum).to eq initial_fixity_value.to_s
checksum_batch_process.file = checksum_csv_upload
checksum_batch_process.file = checksum_csv_valid_upload
checksum_batch_process.save
updated_child_object = ChildObject.find(10_736_292)
updated_child_object_two = ChildObject.find(67_890)
updated_child_object_three = ChildObject.find(12)
expect(updated_child_object.sha512_checksum).to eq "6fe314934e4623e61084b7f590ddee5cb259db13d45901c96ac74e14a7c771164feaa3a4cdb087c0f5c1eb39d671f1040eb8c092cfd1743d07f24c081d1fcd75"
expect(updated_child_object_two.sha512_checksum).to eq "b04e233da2e3b76fcfe2928f73e58f61351fec489d112acba1616f4d809f83722d07fb6fe62bf21a6b1ebcc4097b64458b39ee1e35235e3604234c0b3d9840ca"
expect(updated_child_object_three.sha512_checksum).to eq "8ac52a77a818780d29fe390f9b69cebe5a64c06559161e2e5ba7b6f425e7cbf785b50172f5795d1fcd5c63fc99c46774eb6c470f8ca453063bf98f4470ce81b0"
# all tifs are the same image so matching checksums is expected
expect(updated_child_object.sha512_checksum).to eq tif_sha512_fixity_value
expect(updated_child_object_two.sha512_checksum).to eq tif_sha512_fixity_value
expect(updated_child_object_three.sha512_checksum).to eq tif_sha512_fixity_value
expect(checksum_batch_process.batch_ingest_events.count).to eq 2
expect(checksum_batch_process.batch_ingest_events.first.reason).to eq "3 child objects updated."
expect(checksum_batch_process.batch_ingest_events.last.reason).to eq "Child objects that were not updated: []."
expect(updated_child_object.events_for_batch_process(checksum_batch_process).count).to eq 1
expect(updated_child_object.events_for_batch_process(checksum_batch_process).first.reason).to eq "Child 10736292 has been updated"
expect(updated_child_object_two.events_for_batch_process(checksum_batch_process).count).to eq 1
expect(updated_child_object_two.events_for_batch_process(checksum_batch_process).first.reason).to eq "Child 67890 has been updated"
expect(updated_child_object_three.events_for_batch_process(checksum_batch_process).count).to eq 1
expect(updated_child_object_three.events_for_batch_process(checksum_batch_process).first.reason).to eq "Child 12 has been updated"
end

# rubocop:disable Layout/LineLength
it "cannot update child checksum with incorrect values" do
# begin test with incorrect fixity values saved to the child object
expect(child_object.sha512_checksum).to eq initial_fixity_value.to_s
expect(child_object_2.sha512_checksum).to eq initial_fixity_value.to_s
expect(child_object_3.sha512_checksum).to eq initial_fixity_value.to_s
checksum_batch_process.file = checksum_csv_invalid_upload
checksum_batch_process.save
updated_child_object = ChildObject.find(10_736_292)
updated_child_object_two = ChildObject.find(67_890)
updated_child_object_three = ChildObject.find(12)
# ensure child object page has message to user on what succeeded or failed
# all tifs are the same image so matching checksums is expected
expect(updated_child_object.sha512_checksum).to eq tif_sha512_fixity_value
expect(updated_child_object_two.sha512_checksum).to eq tif_sha512_fixity_value
expect(updated_child_object_three.sha512_checksum).to eq tif_sha512_fixity_value
expect(checksum_batch_process.batch_ingest_events.count).to eq 2
expect(checksum_batch_process.batch_ingest_events.first.reason).to eq "3 child objects updated."
expect(checksum_batch_process.batch_ingest_events.last.reason).to eq "Child objects that were not updated: []."
expect(updated_child_object.events_for_batch_process(checksum_batch_process).count).to eq 2
expect(updated_child_object.events_for_batch_process(checksum_batch_process).first.reason).to eq "Child 10736292 was updated with the sha512 checksum value, read from the access primary original image file."
expect(updated_child_object.events_for_batch_process(checksum_batch_process).last.reason).to eq "Child 10736292 has been updated"
expect(updated_child_object_two.events_for_batch_process(checksum_batch_process).count).to eq 2
expect(updated_child_object_two.events_for_batch_process(checksum_batch_process).first.reason).to eq "Child 67890 was updated with the sha512 checksum value, read from the access primary original image file."
expect(updated_child_object_two.events_for_batch_process(checksum_batch_process).last.reason).to eq "Child 67890 has been updated"
expect(updated_child_object_three.events_for_batch_process(checksum_batch_process).count).to eq 2
expect(updated_child_object_three.events_for_batch_process(checksum_batch_process).first.reason).to eq "Child 12 was updated with the sha512 checksum value, read from the access primary original image file."
expect(updated_child_object_three.events_for_batch_process(checksum_batch_process).last.reason).to eq "Child 12 has been updated"
end
# rubocop:enable Layout/LineLength

it "cannot update child checksum with _blank_ values" do
expect(child_object_2.sha512_checksum).to eq initial_fixity_value.to_s
checksum_batch_process.file = checksum_csv_blank_value_upload
Expand All @@ -132,7 +179,7 @@
before do
user.add_role(:viewer, admin_set)
checksum_batch_process.user_id = user.id
checksum_batch_process.file = checksum_csv_upload
checksum_batch_process.file = checksum_csv_valid_upload
checksum_batch_process.save
end

Expand All @@ -141,7 +188,7 @@
child_object = ChildObject.find(10_736_292)
expect(child_object.sha512_checksum).to eq initial_fixity_value.to_s
expect(checksum_batch_process.batch_ingest_events.first.reason).to eq "Skipping row [2] with child oid: 10736292, user does not have permission to update."
expect(checksum_batch_process.batch_ingest_events.last.reason).to eq "Child objects that were not updated: [[\"10736292\", \"67890\", \"12\"]]."
expect(checksum_batch_process.batch_ingest_events.last.reason).to eq "Child objects that were not updated: [\"10736292\", \"67890\", \"12\"]."
end
end
end
Expand Down