Skip to content

Commit 509f617

Browse files
Swiddis1over137
authored andcommitted
Update dictformats to use jsonl
1 parent ff40b53 commit 509f617

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

vocabsieve/dictformats.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
})
2929

3030
supported_dict_extensions = [
31-
".json", ".ifo", ".mdx", ".dsl", ".dz", ".csv", ".tsv", ".xz", ".bz2", ".gz"
31+
".json", ".jsonl", ".ifo", ".mdx", ".dsl", ".dz", ".csv", ".tsv", ".xz", ".bz2", ".gz"
3232
]
3333

3434

@@ -84,7 +84,7 @@ def dictinfo(path) -> dict[str, str]:
8484
return {"type": "audiolib", "basename": basename, "path": path}
8585
if ext not in supported_dict_extensions:
8686
raise NotImplementedError("Unsupported format")
87-
if ext in ('.json', '.xz', '.bz2', '.gz'):
87+
if ext in ('.json', '.jsonl', '.xz', '.bz2', '.gz'):
8888
with zopen(path) as f:
8989
try:
9090
d = json.load(f)
@@ -251,6 +251,10 @@ def parseKaikki(path, lang) -> dict[str, str]:
251251
'''
252252
print("Parsing Kaikki wiktionary dump at " + path)
253253
items: list[tuple[str, str]] = []
254+
255+
if path.endswith(".json"):
256+
logger.warning("Legacy Kaikki JSON dump detected, this may cause issues. New exports have a .jsonl suffix")
257+
254258
with zopen(path) as f:
255259
logger.debug("Parsing Kaikki wiktionary dump at " + path)
256260
logger.debug("Only importing entries in language " + lang)

0 commit comments

Comments
 (0)