forked from openstate/open-raadsinformatie
-
Notifications
You must be signed in to change notification settings - Fork 0
/
misc.py
124 lines (100 loc) · 3.45 KB
/
misc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import datetime
import json
import re
import translitcodec
def load_sources_config(filename):
"""Loads a JSON file containing the configuration of the available
sources.
:param filename: the filename of the JSON file.
:type filename: str.
"""
if type(filename) == file:
# Already an open file
return json.load(filename)
try:
with open(filename) as json_file:
return json.load(json_file)
except IOError, e:
e.strerror = 'Unable to load sources configuration file (%s)' % (
e.strerror,)
raise
def load_object(path):
"""Load an object given it's absolute object path, and return it.
The object can be a class, function, variable or instance.
:param path: absolute object path (i.e. 'ocd_backend.extractor.BaseExtractor')
:type path: str.
"""
try:
dot = path.rindex('.')
except ValueError:
raise ValueError, "Error loading object '%s': not a full path" % path
module, name = path[:dot], path[dot+1:]
try:
mod = __import__(module, {}, {}, [''])
except ImportError, e:
raise ImportError, "Error loading object '%s': %s" % (path, e)
try:
obj = getattr(mod, name)
except AttributeError:
raise NameError, "Module '%s' doesn't define any object named '%s'" % (
module, name)
return obj
def try_convert(conv, value):
try:
return conv(value)
except ValueError:
return value
def parse_date(regexen, date_str):
"""
Parse a messy string into a granular date
`regexen` is of the form [ (regex, (granularity, groups -> datetime)) ]
"""
if date_str:
for reg, (gran, dater) in regexen:
m = re.match(reg, date_str)
if m:
try:
return gran, dater(m.groups())
except ValueError:
return 0, None
return 0, None
def parse_date_span(regexen, date1_str, date2_str):
"""
Parse a start & end date into a (less) granular date
`regexen` is of the form [ (regex, (granularity, groups -> datetime)) ]
"""
date1_gran, date1 = parse_date(regexen, date1_str)
date2_gran, date2 = parse_date(regexen, date2_str)
if date2:
# TODO: integrate both granularities
if (date1_gran, date1) == (date2_gran, date2):
return date1_gran, date1
if (date2 - date1).days < 5*365:
return 4, date1
if (date2 - date1).days < 50*365:
return 3, date1
if (date2 - date1).days >= 50*365:
return 2, date1
else:
return date1_gran, date1
class DatetimeJSONEncoder(json.JSONEncoder):
"""
JSONEncoder that can handle ``datetime.datetime``, ``datetime.date`` and
``datetime.timedelta`` objects.
"""
def default(self, o):
if isinstance(o, datetime.datetime) or isinstance(o, datetime.date):
return o.isoformat()
elif isinstance(o, datetime.timedelta):
return (datetime.datetime.min + o).time().isoformat()
else:
return super(DatetimeJSONEncoder, self).default(o)
_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
def slugify(text, delim=u'-'):
"""Generates an ASCII-only slug."""
result = []
for word in _punct_re.split(text.lower()):
word = word.encode('translit/long')
if word:
result.append(word)
return unicode(delim.join(result))