Skip to content

Commit 939288e

Browse files
Merge pull request #52 from UIUCLibrary/dev
Dev
2 parents 014ba65 + 4de6d06 commit 939288e

File tree

6 files changed

+15
-27
lines changed

6 files changed

+15
-27
lines changed

galatea/clean_tsv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ def clean_tsv(
204204
modified_data = []
205205
dialect = get_tsv_dialect(tsv_file)
206206
field_names = galatea.tsv.get_field_names(source)
207+
row: TableRow[Marc_Entry]
207208
for row in iter_tsv_fp(tsv_file, dialect):
208209
transformed_row = transform_row_and_merge(
209210
row.entry,

galatea/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
logger = logging.getLogger(__name__)
2525

26+
2627
def get_versions_from_package() -> Optional[str]:
2728
"""Get version information from the package metadata."""
2829
try:

galatea/modifiers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
if typing.TYPE_CHECKING:
1313
from galatea.marc import MarcEntryDataTypes
1414

15+
1516
def split_and_modify(
1617
entry: MarcEntryDataTypes,
1718
funcs: List[Callable[[MarcEntryDataTypes], MarcEntryDataTypes]],
@@ -117,6 +118,7 @@ def regex_transform(
117118
return None
118119
return re.sub(pattern, replacement, entry)
119120

121+
120122
@functools.cache
121123
def _get_relator_term_regex():
122124
terms = (
@@ -125,6 +127,7 @@ def _get_relator_term_regex():
125127
relator_terms_in_regex = "|".join(terms)
126128
return fr"({relator_terms_in_regex})\.?"
127129

130+
128131
def remove_relator_terms(entry: MarcEntryDataTypes):
129132
"""Remove any relator terms from the string.
130133

galatea/validate_authorized_terms.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -89,26 +89,6 @@ def check_terms(name: str, cache: CachedApiCheck) -> bool:
8989
return cache.get_data(name).status_code == 200
9090

9191

92-
# def _iter_things_to_check(
93-
# source: pathlib.Path,
94-
# ) -> Iterator[Tuple[int, str, str]]:
95-
# field_names = {
96-
# "260$a",
97-
# "264$a",
98-
# }
99-
# with open(source) as tsv_file:
100-
# dialect = get_tsv_dialect(tsv_file)
101-
# for row in iter_tsv_file(source, dialect):
102-
# for field_name in field_names:
103-
# field = row.entry[field_name]
104-
# if not field:
105-
# continue
106-
# field = field.strip()
107-
# for name in field.split("||"):
108-
# cleaned_string = name.strip()
109-
# yield row.line_number, field_name, cleaned_string
110-
#
111-
11292
def optional_rate_limited_iterator(
11393
iterable: Iterable[T],
11494
bypass_sleep_func: Callable[[T], bool] = lambda *_: False,
@@ -137,6 +117,7 @@ def optional_rate_limited_iterator(
137117
start_time = time.time()
138118
yield results
139119

120+
140121
class IterTerms(collections.abc.Iterable):
141122
tsv_file_row_iterator = iter_tsv_file
142123

@@ -145,10 +126,11 @@ def __init__(self, source):
145126
self.field_names = set()
146127

147128
def iter_rows(self):
148-
with open(self._source) as tsv_file:
129+
with open(self._source, encoding="utf-8") as tsv_file:
149130
dialect = get_tsv_dialect(tsv_file)
150131
for row in IterTerms.tsv_file_row_iterator(self._source, dialect):
151132
yield row
133+
152134
def __iter__(self):
153135
for row in self.iter_rows():
154136
for field_name in self.field_names:
@@ -160,6 +142,7 @@ def __iter__(self):
160142
cleaned_string = name.strip()
161143
yield row.line_number, field_name, cleaned_string
162144

145+
163146
def validate_authorized_terms(source: pathlib.Path) -> None:
164147
"""Validate Authorized terms.
165148

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ requires-python = ">= 3.10"
66
authors = [
77
{name="University Library at The University of Illinois at Urbana Champaign: Preservation Services", email="[email protected]"}
88
]
9-
license = { file="LICENSE"}
9+
license = "NCSA"
10+
license-files = ["LICENSE"]
1011
readme = { file= "README.md", content-type="text/markdown" }
1112
classifiers = [
1213
"Development Status :: 3 - Alpha",
1314
"Environment :: Console",
1415
"Natural Language :: English",
1516
"Intended Audience :: Science/Research",
16-
"License :: OSI Approved :: University of Illinois/NCSA Open Source License",
1717
"Programming Language :: Python :: 3",
1818
"Topic :: System :: Archiving"
1919

@@ -27,7 +27,7 @@ dependencies = [
2727
galatea = "galatea.cli:main"
2828

2929
[build-system]
30-
requires = ["setuptools>=75.1.0"]
30+
requires = ["setuptools>=77.0.0"]
3131
build-backend = "setuptools.build_meta"
3232

3333
[tool.setuptools]
@@ -59,4 +59,5 @@ ignore = ["D203"]
5959
convention = "google"
6060

6161
[tool.ruff.lint.per-file-ignores]
62-
"tests/**" = ["D"]
62+
"tests/**" = ["D"]
63+
"docs/conf.py" = ["D"]

tests/test_clean_tsv.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import pathlib
2-
from unittest.mock import Mock, mock_open, patch, ANY, create_autospec
2+
from unittest.mock import Mock, mock_open, patch, ANY
33

44
import pytest
55
import galatea.tsv
66
import galatea.clean_tsv
77
from galatea import clean_tsv, modifiers
8-
import io
98

109
def test_make_empty_strings_none_removes_empty_strings():
1110
record = {"1": "somedata", "50": ""}

0 commit comments

Comments
 (0)