Skip to content

Commit 0da4f18

Browse files
ghisvailpercevalw
authored andcommitted
fix: replace unidecode with anyascii
1 parent e2cc771 commit 0da4f18

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

nlstruct/data_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import numpy as np
66
import regex
7-
from unidecode import unidecode
7+
from anyascii import anyascii
88

99
import functools
1010
import textwrap
@@ -369,10 +369,10 @@ def regex_multisub_with_spans(patterns, replacements, text, deltas=None, return_
369369

370370
def run_unidecode(text, return_deltas=False):
371371
if not return_deltas:
372-
return unidecode(text), None
372+
return anyascii(text), None
373373
begins, ends, deltas = [], [], []
374374
new_text = ""
375-
for i, (old_char, new_char) in enumerate((char, unidecode(char)) for char in text):
375+
for i, (old_char, new_char) in enumerate((char, anyascii(char)) for char in text):
376376
if len(old_char) != len(new_char):
377377
begins.append(i)
378378
ends.append(i + 1)

nlstruct/datasets/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import torch
1313
from sklearn.datasets._base import _sha256
1414
from tqdm import tqdm
15-
from unidecode import unidecode
15+
from anyascii import anyascii
1616
import warnings
1717

1818
from ..data_utils import mix, loop
@@ -101,7 +101,7 @@ def __init__(self,
101101
if synonym_preprocess_fn is not None:
102102
synonym = synonym_preprocess_fn(synonym)
103103
if do_unidecode:
104-
synonym = unidecode(synonym)
104+
synonym = anyascii(synonym)
105105
if subs:
106106
for pattern, replacement in subs:
107107
synonym = re.sub(pattern, replacement, synonym)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
"fire",
2828
"torch>=1.0.0",
2929
"numpy",
30-
"unidecode>=1.1.2",
3130
"einops>=0.3.0",
31+
"anyascii>=0.3.2",
3232
"transformers>=4.3.0",
3333
"tqdm>=4.56.0",
3434
"scikit-learn",

0 commit comments

Comments
 (0)