Skip to content

Commit 42fcd98

Browse files
authored
Merge pull request #106 from sneakers-the-rat/handle-10_5555-prefix
Special case `10.5555` DOIs (and fix ambiguous DOI checker results)
2 parents c695650 + fe6b0c3 commit 42fcd98

File tree

3 files changed

+82
-19
lines changed

3 files changed

+82
-19
lines changed

app/lib/doi_checker.rb

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,52 @@ def initialize(entries=[])
99
end
1010

1111
def check_dois
12-
doi_summary = {ok: [], missing: [], invalid: []}
12+
doi_summary = {ok: [], skip: [], missing: [], invalid: []}
1313

1414
if @entries.any?
1515
@entries.each do |entry|
16-
if entry.has_field?('doi') && !entry.doi.empty?
16+
# handle special cases first
17+
special_case = self.handle_special_case(entry)
18+
if special_case
19+
doi_validity = special_case
20+
elsif entry.has_field?('doi') && !entry.doi.empty?
21+
# Validate entries with DOIs
1722
doi_validity = validate_doi(entry.doi.value)
18-
doi_summary[doi_validity[:validity]].push(doi_validity[:msg])
19-
# If there's no DOI present, check Crossref to see if we can find a candidate DOI for this entry.
2023
elsif entry.has_field?('title')
21-
candidate_doi = crossref_lookup(entry.title.value)
22-
truncated_title = entry.title.to_s[0,50]
23-
truncated_title += "..." if truncated_title.length < entry.title.to_s.length
24-
if candidate_doi == "CROSSREF-ERROR"
25-
doi_summary[:missing].push("Errored finding suggestions for \"#{truncated_title}\", please try later")
26-
elsif candidate_doi
27-
doi_summary[:missing].push("#{candidate_doi} may be a valid DOI for title: #{truncated_title}")
28-
else
29-
doi_summary[:missing].push("No DOI given, and none found for title: #{truncated_title}")
30-
end
24+
# Try and find candidate entries if doi absent, but title present
25+
doi_validity = handle_missing_doi(entry)
3126
else
32-
doi_summary[:missing].push("Entry without DOI or title found")
27+
doi_validity = {validity: :missing, msg: "Entry without DOI or title found"}
3328
end
29+
30+
doi_summary[doi_validity[:validity]].push(doi_validity[:msg])
3431
end
3532
end
3633

3734
doi_summary
35+
end
36+
37+
# any special case should return false if not applicable, and an object like
38+
# {:validity => :ok, :msg => "whatever"} otherwise.
39+
# Add additional special cases as private methods and chain in a tidy sequence plz <3
40+
def handle_special_case(entry)
41+
validity = acm_105555_prefix(entry) and return validity
42+
false
43+
end
44+
45+
46+
# If there's no DOI present, check Crossref to see if we can find a candidate DOI for this entry.
47+
def handle_missing_doi(entry)
48+
candidate_doi = crossref_lookup(entry.title.value)
49+
truncated_title = entry.title.to_s[0,50]
50+
truncated_title += "..." if truncated_title.length < entry.title.to_s.length
51+
if candidate_doi == "CROSSREF-ERROR"
52+
{ validity: :missing, msg: "Errored finding suggestions for \"#{truncated_title}\", please try later" }
53+
elsif candidate_doi
54+
{ validity: :missing, msg: "#{candidate_doi} may be a valid DOI for title: #{truncated_title}" }
55+
else
56+
{ validity: :skip, msg: "No DOI given, and none found for title: #{truncated_title}" }
57+
end
3858
end
3959

4060
def validate_doi(doi_string)
@@ -112,4 +132,16 @@ def levenshtein_distance(s, t)
112132
def similar?(string_1, string_2)
113133
levenshtein_distance(string_1, string_2) < 3
114134
end
135+
136+
private
137+
138+
def acm_105555_prefix(entry)
139+
if entry.has_field?('doi') && entry.doi.include?("10.5555/")
140+
{ validity: :invalid, msg: "#{entry.doi} is INVALID - 10.5555 is a known broken prefix, replace with https://dl.acm.org/doi/{doi} in the {url} field" }
141+
elsif entry.has_field?('url') && entry.url.include?("https://dl.acm.org/doi/10.5555")
142+
{ validity: :skip, msg: "#{entry.url} - correctly put 10.5555 prefixed doi in the url field, editor should ensure this resolves" }
143+
else
144+
false
145+
end
146+
end
115147
end

app/responses/doi_checks.erb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
```
22
Reference check summary (note 'MISSING' DOIs are suggestions that need verification):
33
<% doi_summary.each do |type, messages| -%>
4-
5-
<%= type.to_s.upcase %> DOIs
6-
4+
<% if type.to_s === "ok" %>
5+
✅ - <%= type.to_s.upcase %> DOIs
6+
<% elsif type.to_s === "skip" %>
7+
❔ - <%= type.to_s.upcase %> DOIs
8+
<% else %>
9+
❌ - <%= type.to_s.upcase %> DOIs
10+
<% end %>
711
<% if messages.empty? -%>
812
- None
913
<% else -%>

spec/doi_checker_spec.rb

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@
9393

9494
expect(doi_summary[:ok]).to be_empty
9595
expect(doi_summary[:invalid]).to be_empty
96-
expect(doi_summary[:missing][0]).to eq("No DOI given, and none found for title: #{title}")
96+
expect(doi_summary[:skip][0]).to eq("No DOI given, and none found for title: #{title}")
9797
end
9898

9999
it "should report entries with no DOI or title as missing both" do
@@ -107,6 +107,33 @@
107107
end
108108
end
109109

110+
describe "#handle_special_case" do
111+
it "should treat DOIs with a 10.5555 prefix as invalid" do
112+
entry = BibTeX::Entry.new(doi: "10.5555/xxxxxxx.yyyyyyyyy")
113+
validity = subject.handle_special_case(entry)
114+
expect(validity[:validity]).to eq(:invalid)
115+
expect(validity[:msg]).to include("replace with https://dl.acm.org/doi")
116+
end
117+
118+
it "should treat URLs with a 10.5555 prefix as a skip" do
119+
entry = BibTeX::Entry.new(url: "https://dl.acm.org/doi/10.5555/2827719.2827740")
120+
validity = subject.handle_special_case(entry)
121+
expect(validity[:validity]).to eq(:skip)
122+
expect(validity[:msg]).to eq("https://dl.acm.org/doi/10.5555/2827719.2827740 - correctly put 10.5555 prefixed doi in the url field, editor should ensure this resolves")
123+
end
124+
125+
it "should handle special cases separately from normal DOI checking" do
126+
entry = BibTeX::Entry.new(doi: "10.5555/xxxxxxx.yyyyyyyyy")
127+
doi_checker = DOIChecker.new([entry])
128+
129+
doi_summary = doi_checker.check_dois
130+
expect(doi_summary[:ok]).to be_empty
131+
expect(doi_summary[:missing]).to be_empty
132+
expect(doi_summary[:skip]).to be_empty
133+
expect(doi_summary[:invalid][0]).to include("is INVALID - 10.5555 is a known broken prefix, replace with https://dl.acm.org/doi/")
134+
end
135+
end
136+
110137
describe "#validate_doi" do
111138

112139
it "should invalidate empty doi strings" do

0 commit comments

Comments
 (0)