-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsetup.py
114 lines (98 loc) · 3.91 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python
from distutils.core import setup, Extension
import os.path
try:
from Cython.Distutils import build_ext
have_pyrex = 1
except:
have_pyrex = 0
# Directory which libstemmer sources are unpacked in.
library_dir = 'libstemmer_c'
# Directories in libstemmer which contain libstemmer sources (ie, not
# examples, etc).
library_core_dirs = ('src_c', 'runtime', 'libstemmer', 'include')
# Read the manifest of files in libstemmer.
src_files = [os.path.join(library_dir, line.strip().replace(' \\', ''))
for line in open(os.path.join(library_dir, 'mkinc_utf8.mak'))
if len(line.strip()) > 2
and (line.strip().endswith('.c \\') or line.strip().endswith('.c'))
and os.path.split(line.strip())[0] in library_core_dirs]
# Set the include path to include libstemmer.
include_dirs = ('src', os.path.join(library_dir, 'include'))
if have_pyrex:
# Add the pyrex sources, and a special rule so distutils knows how to
# use them.
src_files.append('src/Stemmer.pyx')
cmdclass = {'build_ext': build_ext}
else:
# Add just the C sources.
src_files.append('src/Stemmer.c')
cmdclass = {}
long_description = r"""
Stemming algorithms
PyStemmer provides access to efficient algorithms for calculating a
"stemmed" form of a word. This is a form with most of the common
morphological endings removed; hopefully representing a common
linguistic base form. This is most useful in building search engines
and information retrieval software; for example, a search with stemming
enabled should be able to find a document containing "cycling" given the
query "cycles".
PyStemmer provides algorithms for several (mainly european) languages,
by wrapping the libstemmer library from the Snowball project in a Python
module.
It also provides access to the classic Porter stemming algorithm for
english: although this has been superceded by an improved algorithm, the
original algorithm may be of interest to information retrieval
researchers wishing to reproduce results of earlier experiments.
""".strip()
version_str = '1.2.0'
setup(name = 'PyStemmer',
version = version_str,
author = 'Richard Boulton',
author_email = 'richard@tartarus.org',
maintainer = 'Richard Boulton',
maintainer_email = 'richard@tartarus.org',
url = 'http://snowball.tartarus.org/',
download_url = 'http://snowball.tartarus.org/wrappers/PyStemmer-%s.tar.gz' % version_str,
description = 'Snowball stemming algorithms, for information retrieval',
long_description = long_description,
platforms = ["any"],
license = ["MIT", "BSD"],
keywords = [
"python",
"information retrieval",
"language processing",
"morphological analysis",
"stemming algorithms",
"stemmers"
],
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"License :: OSI Approved :: BSD License",
"Natural Language :: Danish",
"Natural Language :: Dutch",
"Natural Language :: English",
"Natural Language :: Finnish",
"Natural Language :: French",
"Natural Language :: German",
"Natural Language :: Italian",
"Natural Language :: Norwegian",
"Natural Language :: Portuguese",
"Natural Language :: Russian",
"Natural Language :: Spanish",
"Natural Language :: Swedish",
"Operating System :: OS Independent",
"Programming Language :: C",
"Programming Language :: Other",
"Programming Language :: Python",
"Topic :: Database",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
"Topic :: Text Processing :: Indexing",
"Topic :: Text Processing :: Linguistic",
],
ext_modules = [Extension('Stemmer', src_files,
include_dirs = include_dirs)],
cmdclass = cmdclass
)