Skip to content

Commit 8072624

Browse files
committed
Add ruff and pre-commit config and fix issues
1 parent 43f1d91 commit 8072624

29 files changed

+276
-181
lines changed

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,3 @@ __pycache__/
2020
/testing/local
2121
/testing/public
2222
/testing/archive
23-

.pre-commit-config.yaml

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
repos:
2+
- repo: meta
3+
hooks:
4+
- id: check-hooks-apply
5+
- repo: https://github.com/pre-commit/pre-commit-hooks
6+
rev: v4.4.0
7+
hooks:
8+
- id: check-ast
9+
- id: end-of-file-fixer
10+
exclude: \.sql$|\.svg$
11+
- id: trailing-whitespace
12+
- id: debug-statements
13+
- repo: https://github.com/charliermarsh/ruff-pre-commit
14+
rev: v0.0.284
15+
hooks:
16+
- id: ruff
17+
args: [--fix, --exit-non-zero-on-fix]

README.md

+57-57
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ isimip-publisher run <path>
160160
For all commands a list of files with absolute pathes (as line separated txt file) can be provided to restrict the files processed, e.g.:
161161

162162
```bash
163-
isimip-publisher -e exclude.txt -i include.txt run <path>
163+
isimip-publisher -e exclude.txt -i include.txt run <path>
164164
```
165165

166166
Default values for the optional arguments are set in the code, but can also be provided via:
@@ -242,104 +242,104 @@ The database schema is automatically created when `insert_datasets` or `init` is
242242

243243
```
244244
Table "public.datasets"
245-
Column | Type | Collation | Nullable | Default
245+
Column | Type | Collation | Nullable | Default
246246
-------------+-----------------------------+-----------+----------+---------
247-
id | uuid | | not null |
248-
target_id | uuid | | |
249-
name | text | | not null |
250-
path | text | | not null |
251-
version | character varying(8) | | not null |
252-
size | bigint | | not null |
253-
specifiers | jsonb | | not null |
254-
identifiers | text[] | | not null |
255-
public | boolean | | not null |
256-
tree_path | text | | |
257-
rights | text | | |
258-
created | timestamp without time zone | | |
259-
updated | timestamp without time zone | | |
260-
published | timestamp without time zone | | |
247+
id | uuid | | not null |
248+
target_id | uuid | | |
249+
name | text | | not null |
250+
path | text | | not null |
251+
version | character varying(8) | | not null |
252+
size | bigint | | not null |
253+
specifiers | jsonb | | not null |
254+
identifiers | text[] | | not null |
255+
public | boolean | | not null |
256+
tree_path | text | | |
257+
rights | text | | |
258+
created | timestamp without time zone | | |
259+
updated | timestamp without time zone | | |
260+
published | timestamp without time zone | | |
261261
archived | timestamp without time zone | | |
262262
```
263263

264264
```
265265
Table "public.files"
266-
Column | Type | Collation | Nullable | Default
266+
Column | Type | Collation | Nullable | Default
267267
---------------+-----------------------------+-----------+----------+---------
268-
id | uuid | | not null |
269-
dataset_id | uuid | | |
270-
target_id | uuid | | |
271-
name | text | | not null |
272-
path | text | | not null |
273-
version | character varying(8) | | not null |
274-
size | bigint | | not null |
275-
checksum | text | | not null |
276-
checksum_type | text | | not null |
277-
netcdf_header | jsonb | | |
278-
specifiers | jsonb | | not null |
279-
identifiers | text[] | | not null |
280-
created | timestamp without time zone | | |
281-
updated | timestamp without time zone | | |
268+
id | uuid | | not null |
269+
dataset_id | uuid | | |
270+
target_id | uuid | | |
271+
name | text | | not null |
272+
path | text | | not null |
273+
version | character varying(8) | | not null |
274+
size | bigint | | not null |
275+
checksum | text | | not null |
276+
checksum_type | text | | not null |
277+
netcdf_header | jsonb | | |
278+
specifiers | jsonb | | not null |
279+
identifiers | text[] | | not null |
280+
created | timestamp without time zone | | |
281+
updated | timestamp without time zone | | |
282282
```
283283

284284
```
285285
Table "public.resources"
286-
Column | Type | Collation | Nullable | Default
286+
Column | Type | Collation | Nullable | Default
287287
----------+-----------------------------+-----------+----------+---------
288-
id | uuid | | not null |
289-
doi | text | | not null |
290-
title | text | | not null |
291-
version | text | | |
292-
paths | text[] | | not null |
293-
datacite | jsonb | | not null |
294-
created | timestamp without time zone | | |
295-
updated | timestamp without time zone | | |
288+
id | uuid | | not null |
289+
doi | text | | not null |
290+
title | text | | not null |
291+
version | text | | |
292+
paths | text[] | | not null |
293+
datacite | jsonb | | not null |
294+
created | timestamp without time zone | | |
295+
updated | timestamp without time zone | | |
296296
```
297297

298298
The many-to-many relation between `datasets` and `resources` is implemented using a seperate table:
299299

300300
```
301301
Table "public.resources_datasets"
302-
Column | Type | Collation | Nullable | Default
302+
Column | Type | Collation | Nullable | Default
303303
-------------+------+-----------+----------+---------
304-
resource_id | uuid | | |
305-
dataset_id | uuid | | |
304+
resource_id | uuid | | |
305+
dataset_id | uuid | | |
306306
```
307307

308308
Additional tables are created for the search and tree functionality of the repository.
309309

310310
```
311311
Table "public.search"
312-
Column | Type | Collation | Nullable | Default
312+
Column | Type | Collation | Nullable | Default
313313
------------+-----------------------------+-----------+----------+---------
314-
dataset_id | uuid | | not null |
315-
vector | tsvector | | not null |
316-
created | timestamp without time zone | | |
317-
updated | timestamp without time zone | | |
314+
dataset_id | uuid | | not null |
315+
vector | tsvector | | not null |
316+
created | timestamp without time zone | | |
317+
updated | timestamp without time zone | | |
318318
```
319319

320320
```
321321
Table "public.trees"
322-
Column | Type | Collation | Nullable | Default
322+
Column | Type | Collation | Nullable | Default
323323
-----------+-----------------------------+-----------+----------+---------
324-
id | uuid | | not null |
325-
tree_dict | jsonb | | not null |
326-
created | timestamp without time zone | | |
324+
id | uuid | | not null |
325+
tree_dict | jsonb | | not null |
326+
created | timestamp without time zone | | |
327327
updated | timestamp without time zone | | |
328328
```
329329

330330
Two materialized views are used to allow a fast lookup to all `identifiers` (with the list of corresponding specifiers), as well as all `words` (the list of tokens for the search):
331331

332332
```
333333
Materialized view "public.identifiers"
334-
Column | Type | Collation | Nullable | Default
334+
Column | Type | Collation | Nullable | Default
335335
------------+------+-----------+----------+---------
336-
identifier | text | | |
337-
specifiers | json | | |
336+
identifier | text | | |
337+
specifiers | json | | |
338338
```
339339

340340
```
341341
Materialized view "public.words"
342-
Column | Type | Collation | Nullable | Default
342+
Column | Type | Collation | Nullable | Default
343343
--------+------+-----------+----------+---------
344-
word | text | | |
344+
word | text | | |
345345
```

isimip_publisher/commands.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,9 @@ def link_datasets():
208208

209209
for file in dataset.files:
210210
target_file_path = str(settings.TARGET_PATH / Path(file.path).relative_to(settings.PATH))
211-
database.insert_file_link(session, settings.VERSION, target_file_path, file.dataset.path, file.name, file.path,
212-
file.size, file.checksum, file.checksum_type, file.netcdf_header, file.specifiers)
211+
database.insert_file_link(session, settings.VERSION, target_file_path, file.dataset.path,
212+
file.name, file.path, file.size, file.checksum, file.checksum_type,
213+
file.netcdf_header, file.specifiers)
213214

214215
session.commit()
215216
database.update_tree(session, settings.PATH, settings.TREE)

isimip_publisher/config.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
import logging
2-
32
from datetime import datetime
43
from pathlib import Path
54

65
from isimip_utils.config import Settings as BaseSettings
76
from isimip_utils.decorators import cached_property
8-
from isimip_utils.fetch import (fetch_definitions, fetch_pattern, fetch_resource,
9-
fetch_schema, fetch_tree)
7+
from isimip_utils.fetch import fetch_definitions, fetch_pattern, fetch_resource, fetch_schema, fetch_tree
108
from isimip_utils.utils import parse_filelist
119

1210
logger = logging.getLogger(__name__)
@@ -38,8 +36,8 @@ def setup(self, args):
3836

3937
try:
4038
datetime.strptime(self.VERSION, '%Y%m%d')
41-
except ValueError:
42-
raise AssertionError("Incorrect version format, should be YYYYMMDD")
39+
except ValueError as e:
40+
raise AssertionError("Incorrect version format, should be YYYYMMDD") from e
4341

4442
@cached_property
4543
def REMOTE_PATH(self):
@@ -107,7 +105,7 @@ def TREE(self):
107105
return fetch_tree(self.PROTOCOL_LOCATIONS.split(), self.PATH)
108106

109107

110-
class Store(object):
108+
class Store:
111109

112110
_shared_state = {}
113111

isimip_publisher/main.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,37 @@
22

33
from isimip_utils.parser import ArgumentParser
44

5-
from .commands import (archive_datasets, check, clean, fetch_files,
6-
insert_datasets, insert_doi, link_datasets, link_files,
7-
link_links, list_local, list_public, list_public_links,
8-
list_remote, list_remote_links, match_local,
9-
match_public, match_public_links, match_remote,
10-
match_remote_links, publish_datasets, register_doi,
11-
update_datasets, update_doi, update_search, update_tree,
12-
update_views, write_link_jsons, write_local_jsons,
13-
write_public_jsons)
5+
from .commands import (
6+
archive_datasets,
7+
check,
8+
clean,
9+
fetch_files,
10+
insert_datasets,
11+
insert_doi,
12+
link_datasets,
13+
link_files,
14+
link_links,
15+
list_local,
16+
list_public,
17+
list_public_links,
18+
list_remote,
19+
list_remote_links,
20+
match_local,
21+
match_public,
22+
match_public_links,
23+
match_remote,
24+
match_remote_links,
25+
publish_datasets,
26+
register_doi,
27+
update_datasets,
28+
update_doi,
29+
update_search,
30+
update_tree,
31+
update_views,
32+
write_link_jsons,
33+
write_local_jsons,
34+
write_public_jsons,
35+
)
1436
from .config import RIGHTS_CHOICES, settings
1537

1638

isimip_publisher/models.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66

77
from isimip_utils.checksum import get_checksum, get_checksum_type
88
from isimip_utils.decorators import cached_property
9-
from isimip_utils.netcdf import (open_dataset_read, get_dimensions,
10-
get_global_attributes, get_variables)
9+
from isimip_utils.netcdf import get_dimensions, get_global_attributes, get_variables, open_dataset_read
1110

1211
from .utils.files import get_size
1312

1413
logger = logging.getLogger(__name__)
1514

1615

17-
class Dataset(object):
16+
class Dataset:
1817

1918
def __init__(self, name=None, path=None, specifiers=None):
2019
self.name = name
@@ -47,7 +46,7 @@ def validate(self, schema):
4746
raise e
4847

4948

50-
class File(object):
49+
class File:
5150

5251
def __init__(self, dataset=None, name=None, path=None, abspath=None, specifiers=None):
5352
self.dataset = dataset

isimip_publisher/tests/test_commands.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
import shutil
3-
from pathlib import Path
43
from datetime import datetime
4+
from pathlib import Path
55

66
import pytest
77
from dotenv import load_dotenv
@@ -261,21 +261,24 @@ def test_insert_datasets(setup, local_files, db, script_runner):
261261

262262

263263
def test_link_links(setup, remote_links, script_runner):
264-
response = script_runner.run(['isimip-publisher', 'link_links', 'round/product/sector/model', 'round/product/sector2/model'])
264+
response = script_runner.run(['isimip-publisher', 'link_links',
265+
'round/product/sector/model', 'round/product/sector2/model'])
265266
assert response.success, response.stderr
266267
assert not response.stdout
267268
assert response.stderr.strip().startswith('link_links')
268269

269270

270271
def test_link_files(setup, remote_files, script_runner):
271-
response = script_runner.run(['isimip-publisher', 'link_files', 'round/product/sector/model', 'round/product/sector2/model'])
272+
response = script_runner.run(['isimip-publisher', 'link_files',
273+
'round/product/sector/model', 'round/product/sector2/model'])
272274
assert response.success, response.stderr
273275
assert not response.stdout
274276
assert response.stderr.strip().startswith('link_files')
275277

276278

277279
def test_link_datasets(setup, public_links, script_runner):
278-
response = script_runner.run(['isimip-publisher', 'link_datasets', 'round/product/sector/model', 'round/product/sector2/model'])
280+
response = script_runner.run(['isimip-publisher', 'link_datasets',
281+
'round/product/sector/model', 'round/product/sector2/model'])
279282
assert response.success, response.stderr
280283
assert not response.stdout
281284
assert response.stderr.strip().startswith('link_datasets')
@@ -338,7 +341,8 @@ def test_clean(setup, script_runner):
338341

339342

340343
def test_insert_doi(setup, db, public_datasets, script_runner):
341-
response = script_runner.run(['isimip-publisher', 'insert_doi', 'testing/resources/test.json', 'round/product/sector/model'])
344+
response = script_runner.run(['isimip-publisher', 'insert_doi',
345+
'testing/resources/test.json', 'round/product/sector/model'])
342346
assert response.success, response.stderr
343347
assert not response.stdout
344348
assert not response.stderr

0 commit comments

Comments
 (0)