-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.py
104 lines (83 loc) · 5.13 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from remarkable import RemarkableOCR, colors, RemarkableOCRResearch, plotting
from PIL import Image
import more_itertools
import random
# Operation Moonglow; annotated by David Bernat
image_filename = "remarkable/_db/docs/moonglow.jpg"
im = Image.open(image_filename)
##################################################################
# using data
##################################################################
data = RemarkableOCR.ocr(image_filename)
# we can debug using an image
RemarkableOCR.create_debug_image(im, data).show()
# hey. what are all the c words?
cwords = [d for d in data if "sea" in d["text"].lower()]
cwords = RemarkableOCR.create_debug_image(im, cwords).show()
# nevermind; apply filters because this is a book page
# removes annotations on the edges; which are often numerous
data = RemarkableOCR.filter_assumption_blocks_of_text(data)
margins = [d for d in data if d["is_first_in_line"] or d["is_last_in_line"]]
RemarkableOCR.create_debug_image(im, margins).show()
# transforms data to a space-separated string; adding new-lines at paragraph breaks.
readable = RemarkableOCR.readable_lines(data)
print(readable)
##################################################################
# highlighting
##################################################################
# to create a highlight bar based on token pixel sizes
# if None will calculate on max/min height of the sequence
base = RemarkableOCR.document_statistics(data)
wm, ws = base["char"]["wm"], base["char"]["ws"]
height_px = wm + 6*ws
# simple search for phrases (lowercase, punctuation removed) returns one result for each four
phrases = ["the Space Age", "US Information Agency", "US State Department", "Neil Armstrong"]
found = RemarkableOCR.find_statements(phrases, data)
# we can highlight these using custom highlights
configs = [dict(highlight_color=colors.starlight),
dict(highlight_color=colors.green),
dict(highlight_color=colors.starlight),
dict(highlight_color=colors.orange, highlight_alpha=0.40),
]
highlight = RemarkableOCR.highlight_statements(im, found, data, configs, height_px=height_px)
highlight.show()
# we can redact our secret activities shh :)
phrases = ["I spent the summer reading memos, reports, letters"]
found = RemarkableOCR.find_statements(phrases, data)
config = dict(highlight_color=colors.black, highlight_alpha=1.0)
RemarkableOCR.highlight_statements(highlight, found, data, config, height_px=height_px).show()
##################################################################
# research & development
##################################################################
# we can use large reoccurrences of words (about ten sentences) to estimate typographical information about individual
# characters, including their typographical baseline and x_height, and typical dimensions of individual characters.
# this statistical procedure is very robust with, and tested with, mostly uniform text fonts (i.e., book pages).
data, typo = RemarkableOCRResearch.enrich_typographical_statistics(data)
if typo is None: raise RuntimeError("typography failed to converge. please contribute this image to an issue")
RemarkableOCRResearch.create_typography_debug_image(im, data).show()
# we can use computer vision to estimate whether images have handwritten underlining; because the typographical features
# provide very helpful constraints on where underlying occurs this feature is only available when typography converges.
data = RemarkableOCRResearch.enrich_handwritten_features(im, data)
hwords = [d for d in data if d["is_highlighted"]]
RemarkableOCR.create_debug_image(im, hwords).show()
# we can also analyze the specific character instances estimated by typographical features. first we show all letters t.
# second we organize the char_bboxes by character and sort by widest character, choosing a random example of each. third
# we have a little fun by generating arbitrary sentences (not recommended for hostage taking or love letters, please).
# this demo uses a utility that takes a list of images and plots them in a tile grid left to right top to bottom.
t_data = [t for word in typo["char_bboxes"] for t in word if t["char"] == "t"]
images = [im.crop(dct["bbox"]) for dct in t_data]
plotting.tile_images(images, tile_wh=[None, 100], n_width=20).show()
char_boxes_by_char = [t for word in typo["char_bboxes"] for t in word]
char_boxes_by_char = more_itertools.map_reduce(char_boxes_by_char, lambda item: item["char"], lambda item: item["bbox"])
chars_by_width = dict(sorted(typo["font_char_widths"].items(), reverse=True, key=lambda item: item[1])).keys()
random.seed(0)
chars_data = [random.choice(char_boxes_by_char[c]) for c in chars_by_width]
images = [im.crop(bbox) for bbox in chars_data]
plotting.tile_images(images, tile_wh=[None, 100], n_width=11).show()
quote = "Same road, no cars. It's magic."
images = []
for word in quote.split(" "):
chars_data = [random.choice(char_boxes_by_char[c]) for c in word if c != " "]
as_images = [im.crop(bbox) for bbox in chars_data]
images.append(plotting.tile_images(as_images, tile_wh=[None, 100], pad_wh=[0,0], n_width=len(word)))
plotting.tile_images(images, tile_wh=[None, 100], pad_wh=[60, 5], n_width=2).show()