|
28 | 28 | })
|
29 | 29 |
|
30 | 30 | supported_dict_extensions = [
|
31 |
| - ".json", ".ifo", ".mdx", ".dsl", ".dz", ".csv", ".tsv", ".xz", ".bz2", ".gz" |
| 31 | + ".json", ".jsonl", ".ifo", ".mdx", ".dsl", ".dz", ".csv", ".tsv", ".xz", ".bz2", ".gz" |
32 | 32 | ]
|
33 | 33 |
|
34 | 34 |
|
@@ -84,7 +84,7 @@ def dictinfo(path) -> dict[str, str]:
|
84 | 84 | return {"type": "audiolib", "basename": basename, "path": path}
|
85 | 85 | if ext not in supported_dict_extensions:
|
86 | 86 | raise NotImplementedError("Unsupported format")
|
87 |
| - if ext in ('.json', '.xz', '.bz2', '.gz'): |
| 87 | + if ext in ('.json', '.jsonl', '.xz', '.bz2', '.gz'): |
88 | 88 | with zopen(path) as f:
|
89 | 89 | try:
|
90 | 90 | d = json.load(f)
|
@@ -251,6 +251,10 @@ def parseKaikki(path, lang) -> dict[str, str]:
|
251 | 251 | '''
|
252 | 252 | print("Parsing Kaikki wiktionary dump at " + path)
|
253 | 253 | items: list[tuple[str, str]] = []
|
| 254 | + |
| 255 | + if path.endswith(".json"): |
| 256 | + logger.warning("Legacy Kaikki JSON dump detected, this may cause issues. New exports have a .jsonl suffix") |
| 257 | + |
254 | 258 | with zopen(path) as f:
|
255 | 259 | logger.debug("Parsing Kaikki wiktionary dump at " + path)
|
256 | 260 | logger.debug("Only importing entries in language " + lang)
|
|
0 commit comments