-
Notifications
You must be signed in to change notification settings - Fork 2
/
clippingsParserHelpers.py
68 lines (59 loc) · 1.8 KB
/
clippingsParserHelpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import string
# constants
NOTES = [
'- Your Note at location ',
'- Your Note on page '
]
BOOKMARKS = [
'- Your Bookmark at location ',
'- Your Bookmark on page '
]
STARTS_OF_HIGHLIGHT = [
'- Your Highlight at location ',
'- Your Highlight on page '
]
SKIPPABLE_PHRASES = BOOKMARKS + STARTS_OF_HIGHLIGHT
END_OF_HIGHLIGHT = '=========='
HIGHLIGHT_HEADER = '## Highlight #'
# end of constants
def indexOf(obj, elem):
"""
Rewriting the indexOf method for this file.
Regular `index` method of Python throws an Exception if the element is not found.
This one does the traditional returning of -1 if that's the case.
## Parameters
- `obj` - the list or string in which the element needs to be found
- `elem` - the element that needs to be found
## Returns
- index as `number` if the element is found
- -1 if not found
"""
try:
ind = obj.index(elem)
return ind
except:
return -1
def elemInLine(listOfElems: list, line: str):
"""
Returns `True` if each element in `listOfElems` is present in the start of `line`.
Returns `False` otherwise.
"""
for elem in listOfElems:
if indexOf(line, elem) == 0:
return True
return False
def line_cleanup(line: str):
line = line.strip().replace('\ufeff', '')
line = line.replace(
u'\u2019', u'\u0027' # replace ’ with '
).replace(
'\u201C', '\u0022' # replace “ with "
).replace(
'\u201D', '\u0022' # replace ” with "
).replace(
'\u2014', '--' # replace — with --
)
# The following logic is borrowed from:
# https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
line = ''.join(filter(lambda c: c in string.printable, line))
return line