diff --git a/requirements.txt b/requirements.txt index 08a17b7..36ed34a 100755 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ gensim>=3.4.0,<4.0 jupyter>=1.0.0,<3.0.0 # pandas>=0.23.4,<0.30.0 matplotlib>=2.2.3,<2.3.0 nltk>=3.2.5,<3.5.0 -nlup==0.5 +# nlup==0.5 pandas>=0.24.2,<0.25.0 pip>=18.0 plotly>=3.2.1,<3.4.0 diff --git a/setup.cfg b/setup.cfg index bcce2d1..ce1a929 100755 --- a/setup.cfg +++ b/setup.cfg @@ -38,7 +38,7 @@ include_package_data = True package_dir = =src # Add here dependencies of your project (semicolon-separated), e.g. -install_requires = coverage; future; fuzzywuzzy; jupyter; matplotlib; nltk; nlup==0.5; pandas; pip; pypandoc; python-Levenshtein; python-slugify; scikit-learn; scipy; plotly; seaborn; tqdm; wheel; gensim; +install_requires = coverage; future; fuzzywuzzy; jupyter; matplotlib; nltk; pandas; pip; pypandoc; python-Levenshtein; python-slugify; scikit-learn; scipy; plotly; seaborn; tqdm; wheel; gensim; # Add here test requirements (semicolon-separated) tests_require = pytest; pytest-cov diff --git a/src/pugnlp/detector_morse.py b/src/pugnlp/detector_morse.py index 992dc5a..19dbf0a 100755 --- a/src/pugnlp/detector_morse.py +++ b/src/pugnlp/detector_morse.py @@ -31,14 +31,17 @@ from future import standard_library standard_library.install_aliases() # noqa +from collections import namedtuple import logging from re import finditer, match, search -from collections import namedtuple - -from nlup import case_feature, isnumberlike, listify, BinaryAveragedPerceptron, BinaryConfusion, IO, JSONable from .penn_treebank_tokenizer import word_tokenize + logger = logging.getLogger(__name__) +try: + from nlup import case_feature, isnumberlike, listify, BinaryAveragedPerceptron, BinaryConfusion, IO, JSONable +except ImportError: + logger.error("detector_morse disabled because Kyle Gorman's nlup sentence boundary detector has not been installed.") # FIXME(kbg) can surely avoid full-blown tokenization diff --git a/src/pugnlp/segmentation.py b/src/pugnlp/segmentation.py index 674d444..8f52b99 100755 --- a/src/pugnlp/segmentation.py +++ b/src/pugnlp/segmentation.py @@ -12,20 +12,25 @@ import os import re from itertools import chain +import logging -import nltk.stem - -from pugnlp.detector_morse import Detector -from pugnlp.detector_morse import slurp -from pugnlp.futil import find_files -# from .penn_treebank_tokenizer import word_tokenize -import nlup +import nltk.stem +from .detector_morse import Detector +from .detector_morse import slurp +from .futil import find_files from .constants import DATA_PATH from .futil import generate_files from .util import stringify, passthrough +from .regexes import CRE_TOKEN, RE_NONWORD + +# from .penn_treebank_tokenizer import word_tokenize -from pugnlp.regexes import CRE_TOKEN, RE_NONWORD +logger = logging.getLogger(__name__) +try: + import nlup +except ImportError: + logger.error("detector_morse disabled because Kyle Gorman's nlup sentence boundary detector has not been installed.") class Split(object):