-
Notifications
You must be signed in to change notification settings - Fork 4
/
setup.py
89 lines (82 loc) · 2.77 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from setuptools import setup
from setuptools.command.install import install as _install
from os import path
# to get pylint to shut up
__appname__ = None
__appversion__ = None
# __appname__, __appversion__, and friends come from here
exec(open("otmt/version.py").read())
# Python packaging info: http://python-packaging.readthedocs.io/en/latest/index.html
# More Python packaging info: http://python-packaging-user-guide.readthedocs.io/tutorials/distributing-packages/
# Python version info: https://www.python.org/dev/peps/pep-0440/
# Thanks https://stackoverflow.com/questions/26799894/installing-nltk-data-in-setup-py-script
# for detailing how to install nltk data as part of setup.py
# Thanks https://blog.niteoweb.com/setuptools-run-custom-code-in-setup-py/
# for detailing how to fix what that solution broke
class Install(_install):
def run(self):
_install.run(self)
import nltk
nltk.download("stopwords")
nltk.download("punkt")
here = path.abspath(path.dirname(__file__))
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()
setup(
name=__appname__.lower(),
cmdclass={'install': Install},
version=__appversion__,
description='Tools for determining if web archive collecions are Off-Topic',
long_description_content_type="text/markdown",
long_description=long_description,
url='https://github.com/oduwsdl/off-topic-memento-toolkit',
author='Shawn M. Jones',
author_email='[email protected]',
license='MIT',
packages=['otmt'],
scripts=[
'bin/detect_off_topic',
'bin/exclude_duplicates',
'bin/select_by_language',
'bin/slice_by_datetime',
'bin/cluster_by_simhash',
'bin/select_high_quality'
],
install_requires=[
'aiu',
'bs4',
'distance',
'gensim',
'html5lib',
'justext',
'langdetect',
'lxml',
'nltk',
'numpy',
'paramiko',
'requests',
'requests_cache',
'requests_futures',
'scikit-learn',
'scipy',
'simhash',
'warcio'
],
setup_requires=['nltk'],
test_suite="tests",
zip_safe=True,
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'Intended Audience :: Information Technology',
'Intended Audience :: Science/Research',
'Topic :: Internet :: WWW/HTTP',
'Topic :: Text Processing',
'Topic :: Scientific/Engineering',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Utilities',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.6',
],
keywords='webarchives memento similarity offtopic'
)