Skip to content

Commit e90b301

Browse files
committed
Deal with gcld3 problems
Makes it so tests and code work without gcld3 and protobuff-compile. Makes gcld3 extra in pyproject.toml. Makes tests that use gcld3 pytest.skip if they cannot do imports. At some point gcld3 was added to the requirements but it needs protobuff-compile which cannot be installed with pip. This caused problems with users of arxiv-base that didn't need gcld3. metadata.py needs gcld3.
1 parent cdb6378 commit e90b301

File tree

4 files changed

+234
-63
lines changed

4 files changed

+234
-63
lines changed

.github/workflows/pullreqeust_tests.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ jobs:
2121
poetry --version
2222
# Check if poetry lock file is current with pyproject.toml
2323
poetry check --lock
24+
# Must be able to install without QA/gcld3/protobuf-compile
2425
poetry install --with=dev --no-ansi
26+
# Install with QA/gcld3
27+
poetry install --with=dev --no-ansi --extras qa
2528
2629
- name: Run db/test with MySQL docker
2730
run: |

arxiv/metadata/tests/test_metacheck.py

Lines changed: 41 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11

22
import pytest
33

4-
from arxiv.metadata import FieldName
5-
from arxiv.metadata import Disposition
6-
from arxiv.metadata import metacheck
7-
8-
from arxiv.metadata.metacheck import combine_dispositions
4+
try:
5+
from arxiv.metadata import FieldName
6+
from arxiv.metadata import Disposition
7+
from arxiv.metadata import metacheck
8+
9+
from arxiv.metadata.metacheck import combine_dispositions
10+
except ModuleNotFoundError:
11+
pytest.skip(
12+
""""gcld3 and/or protobuf-compile are not installed.
13+
To run these tests, install with:
14+
sudo apt install -y protobuf-compiler
15+
poetry install --with-dev --extras qa""", allow_module_level=True)
916

1017
############################################################
1118
TITLE = FieldName.TITLE
@@ -23,18 +30,18 @@
2330

2431
############################################################
2532
# Helper functions for unit tests
26-
33+
2734
def check_result( result, expected_result ):
2835
# result should be ( OK, [] ) or (HOLD, [message, message...]) or (WARN, ...)
2936
if expected_result is None:
3037
assert result[0] == OK
3138
else:
3239
assert result[0] == expected_result[0]
33-
assert result[1] != None
40+
assert result[1] != None
3441
# assert len(result[1]) == len( expected_result[1] )
3542
assert result[1] == expected_result[1]
3643
#
37-
44+
3845

3946
############################################################
4047
# Tests for internal (helper) function combine_dispositions
@@ -44,13 +51,13 @@ def test_combine_dispositions():
4451
assert HOLD == combine_dispositions( HOLD, WARN )
4552
assert HOLD == combine_dispositions( WARN, HOLD )
4653
assert HOLD == combine_dispositions( HOLD, OK )
47-
assert HOLD == combine_dispositions( OK, HOLD )
54+
assert HOLD == combine_dispositions( OK, HOLD )
4855
assert WARN == combine_dispositions( WARN, WARN )
4956
assert WARN == combine_dispositions( WARN, OK )
5057
assert WARN == combine_dispositions( OK, WARN )
5158
assert OK == combine_dispositions( OK, OK )
5259

53-
############################################################l
60+
############################################################l
5461
##### TITLE field checks
5562

5663
TITLE_TESTS = [
@@ -75,7 +82,7 @@ def test_combine_dispositions():
7582
('NOT EVEN BORDERLINE ALL CAPS TITLE',
7683
(WARN, ["Title: excessive capitalization"])),
7784
('BORDERLINE All Caps TITLE', None),
78-
('BORDERLINE ALL caps TITLE',
85+
('BORDERLINE ALL caps TITLE',
7986
(WARN, ["Title: excessive capitalization"])),
8087
('This is a title WITH ONE LONG WORD CAPITALIZED', None),
8188
('This is a title WITH SOME EXTRAEXTRA LONG WORDS CAPITALIZED',
@@ -113,25 +120,25 @@ def test_titles(test):
113120
title, expected_result = test
114121
result = metacheck.check( { TITLE: title } );
115122
check_result(result[TITLE], expected_result)
116-
123+
117124
############################################################
118125
##### Detailed tests for AUTHORS field
119126

120127
AUTHORS_TESTS = [
121128
('Fred Smith', None),
122129
('Fred Smith, Joe Bloggs', None),
123-
# We don't check for ellipsis, but
130+
# We don't check for ellipsis, but
124131
# ('Fred Smith, Joe Bloggs, ...',
125132
# (WARN, ["Authors: ends with punctuation"])),
126133
('Fred Smith, \\ Joe Bloggs', None ),
127-
('Fred Smith, \\\\ Joe Bloggs',
134+
('Fred Smith, \\\\ Joe Bloggs',
128135
(WARN, ["Authors: contains TeX line break"])),
129-
('Fred Smith*, Joe Bloggs#, Bob Briggs^, Jill Camana@, and Rebecca MacInnon',
136+
('Fred Smith*, Joe Bloggs#, Bob Briggs^, Jill Camana@, and Rebecca MacInnon',
130137
(WARN, [
131138
"Authors: contains bad character '*'",
132139
"Authors: contains bad character '#'",
133140
"Authors: contains bad character '^'",
134-
"Authors: contains bad character '@'",
141+
"Authors: contains bad character '@'",
135142
])),
136143
(' Jane Austen ',
137144
(WARN, [
@@ -148,8 +155,8 @@ def test_titles(test):
148155
('Person with <sup>1</sup>',
149156
(WARN, ['Authors: contains HTML',
150157
# 'Authors: no caps in name',
151-
'Authors: name should not contain digits',
152-
])),
158+
'Authors: name should not contain digits',
159+
])),
153160
('Jane Smith<br/>Joe linebreaks<br />Alice Third',
154161
(WARN, ['Authors: contains HTML'])),
155162
('C. Sivaram (1) and Kenath Arun (2) ((1) Indian Institute of Astrophysics, Bangalore, (2) Christ Junior College, Bangalore)', None), # should not flag physics in astrophys as inappropriate
@@ -158,7 +165,7 @@ def test_titles(test):
158165
('Jaganathan SR', None),
159166
('Sylvie ROUX', None), # ?
160167
('S ROUX', None),
161-
('SYLVIE ROUX',
168+
('SYLVIE ROUX',
162169
(WARN, ["Author name is in all caps"])),
163170
('Sylvie roux', None), # ?
164171
('sylvie roux',
@@ -216,7 +223,7 @@ def test_titles(test):
216223
('Adrienne Bloss, Audie Cornish, and ChatGPT',
217224
(WARN, [
218225
"Authors: lone surname",
219-
"Authors: name should not contain chatgpt",
226+
"Authors: name should not contain chatgpt",
220227
])),
221228
# ('Paul R.~Archer', "Authors: tilde as hard space?"),
222229
# "Authors: includes semicolon not in affiliation, comma intended?"
@@ -230,7 +237,7 @@ def test_titles(test):
230237
('T. Zaj\\k{a}c', None),
231238
('(T. Zaj\\k{a}c',
232239
(WARN, ["Authors: unbalanced brackets"])),
233-
]
240+
]
234241

235242
@pytest.mark.parametrize("test", AUTHORS_TESTS)
236243
def test_authors(test):
@@ -255,7 +262,7 @@ def test_authors(test):
255262
('Some words\\\\\\\\ more words',
256263
(WARN, ['Abstract: contains TeX line break'])),
257264
# (MathJax now handles "$3$-coloring")
258-
('Work \\cite{8} established a connection between the edge $3$-coloring', None),
265+
('Work \\cite{8} established a connection between the edge $3$-coloring', None),
259266
# Not yet:
260267
# ('he abstract is sometimes missing a first letter, warn if starts with lower',
261268
# (WARN, ['Abstract: starts with lower case']
@@ -271,15 +278,15 @@ def test_authors(test):
271278
'Abstract: contains \\texttt',
272279
'Abstract: contains unnecessary escape: \\#',
273280
'Abstract: contains unnecessary escape: \\%',
274-
])),
281+
])),
275282
('This ] is bad',
276283
(WARN, ['Abstract: unbalanced brackets'])),
277284
('Учењето со засилување е разноврсна рамка за учење за решавање на сложени задачи од реалниот свет. Конечно, разговараме за отворените предизвици на техниките за анализа за RL алгоритми.',
278285
(WARN, ['Abstract does not appear to be English'])),
279286
('El aprendizaje por refuerzo es un marco versátil para aprender a resolver tareas complejas del mundo real. Sin embargo, las influencias en el rendimiento de aprendizaje de los algoritmos de aprendizaje por refuerzo suelen comprenderse mal en la práctica.',
280287
(WARN, ['Abstract does not appear to be English'])),
281288
]
282-
289+
283290
@pytest.mark.parametrize("test", ABSTRACT_TESTS)
284291
def test_abstracts(test):
285292
(abs, expected_result) = test
@@ -292,7 +299,7 @@ def test_abstracts(test):
292299

293300
COMMENTS_TESTS = [
294301
('',None),
295-
('A comment',None),
302+
('A comment',None),
296303
('15 pages, 6 figures',None),
297304
# ('15 pages, 6 figures,',(HOLD,['Comments: ends with punctuation (,)'])],
298305
# ['15 pages, 6 figures:',(HOLD,['Comments: ends with punctuation (:)'])],
@@ -308,7 +315,7 @@ def test_abstracts(test):
308315
'Comments: contains \\texttt',
309316
'Comments: contains unnecessary escape: \\#',
310317
'Comments: contains unnecessary escape: \\%',
311-
])),
318+
])),
312319
('This ] is bad',
313320
(WARN, ['Comments: unbalanced brackets'])),
314321
]
@@ -346,7 +353,7 @@ def test_report_num(test):
346353
JREF_TESTS = [
347354
# ['ibid',"Journal-ref: inappropriate word: ibid"],
348355
('Proceedings of the 34th "The Web Conference" (WWW 2025)', None),
349-
('JACM volume 1 issue 3, Jan 2024', None),
356+
('JACM volume 1 issue 3, Jan 2024', None),
350357
('1975',
351358
(WARN, ["jref: too short"])),
352359
('Science 1.1',
@@ -369,7 +376,7 @@ def test_jrefs(test):
369376
# print( jref, result )
370377
check_result(result[JOURNAL_REF], expected_result)
371378

372-
############################################################
379+
############################################################
373380
# (related DOI?) DOI field checks
374381

375382
DOI_TESTS = [
@@ -400,27 +407,26 @@ def test_doi(test):
400407

401408
UNBALANCED_BRACKETS_TESTS = [
402409
'this [ is wrong',
403-
'this [ } wrong',
404-
'this [ ) wrong',
410+
'this [ } wrong',
411+
'this [ ) wrong',
405412
'this ( is wrong',
406413
'this ( ] wrong',
407-
'this ( } wrong',
414+
'this ( } wrong',
408415
'this { is wrong',
409416
'this { ) wrong',
410417
'this { ] wrong',
411-
'this is ) ( wrong',
412-
'this is } wrong',
418+
'this is ) ( wrong',
419+
'this is } wrong',
413420
'this is ] [ wrong',
414421
]
415422

416423
@pytest.mark.parametrize("s", BALANCED_BRACKETS_TESTS)
417424
def test_balanced_brackets(s):
418425
assert metacheck.all_brackets_balanced(s) == True
419426

420-
@pytest.mark.parametrize("s", BALANCED_BRACKETS_TESTS)
427+
@pytest.mark.parametrize("s", BALANCED_BRACKETS_TESTS)
421428
def test_unbalanced_brackets(s):
422429
assert metacheck.all_brackets_balanced(s) == True
423430

424431
# pyenv activate arxiv-base-3-11
425432
# python -m pytest arxiv/metadata/tests/test_metacheck.py
426-

0 commit comments

Comments
 (0)