Skip to content

Commit

Permalink
Remove "ann-types" argument (implied "all" always)
Browse files Browse the repository at this point in the history
  • Loading branch information
Azeirah committed Nov 26, 2024
1 parent 3b3c1ea commit 9565edb
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 53 deletions.
1 change: 0 additions & 1 deletion deployment/remarks-server/remarks_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os, os.path

default_args = {
"ann_type": ["scribbles", "highlights"],
"combined_pdf": True,
"combined_md": True,
"md_hl_format": "whole_block",
Expand Down
6 changes: 0 additions & 6 deletions remarks/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ def main():
help="Base directory for all files created (*.pdf, *.png, *.md, and/or *.svg)",
metavar="OUTPUT_DIRECTORY",
)
parser.add_argument(
"--ann_type",
help="Force remarks to handle only a specific type of annotation: highlights or scribbles. If none is specified, remarks will handle both by default",
default=["scribbles", "highlights"],
metavar="ANNOTATION_TYPE",
)
parser.add_argument(
"--skip_combined_pdf",
dest="combined_pdf",
Expand Down
63 changes: 18 additions & 45 deletions remarks/remarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
get_ann_max_bound, determine_document_dimensions,
)
from .conversion.text import (
check_if_text_extractable,
extract_groups_from_smart_hl,
)
from .dimensions import REMARKABLE_PDF_EXPORT, REMARKABLE_DOCUMENT
Expand Down Expand Up @@ -86,7 +85,6 @@ def run_remarks(
def process_document(
metadata_path,
out_path,
ann_type=None,
combined_pdf=False,
):
document = Document(metadata_path)
Expand All @@ -105,8 +103,6 @@ def process_document(
) in document.pages():
print(f"processing page {page_idx}, {page_uuid}")

has_ann_hl = False

# Create a new PDF document to hold the page that will be annotated
work_doc = fitz.open()

Expand Down Expand Up @@ -142,33 +138,22 @@ def process_document(
# - https://pymupdf.readthedocs.io/en/latest/page.html#Page.show_pdf_page
# - https://pymupdf.readthedocs.io/en/latest/document.html#Document.insert_pdf

is_text_extractable = check_if_text_extractable(
pdf_src[page_idx],
)
(ann_data, has_ann_hl), version = parse_rm_file(rm_annotation_file)
x_max, y_max, x_min, y_min = get_ann_max_bound(ann_data)
offset_x = 0
offset_y = 0
is_ann_out_page = True
if version == "V6":
offset_x = RM_WIDTH / 2
if dims.height >= (RM_HEIGHT + 88 * 3):
offset_y = 3 * 88 # why 3 * text_offset? No clue, ask ReMarkable.
if abs(x_min) + abs(x_max) > 1872:
scale = REMARKABLE_DOCUMENT.width / (max(x_max, 1872) - min(x_min, 0))
ann_data = rescale_parsed_data(ann_data, scale, offset_x, offset_y)
else:
scale = REMARKABLE_DOCUMENT.height / (max(y_max, 2048) - min(y_min, 0))
ann_data = rescale_parsed_data(ann_data, scale, offset_x, offset_y)

is_ann_out_page = False

scale = 1
if "scribbles" in ann_type and has_annotations:
(ann_data, has_ann_hl), version = parse_rm_file(rm_annotation_file)
x_max, y_max, x_min, y_min = get_ann_max_bound(ann_data)
offset_x = 0
offset_y = 0
is_ann_out_page = True
if version == "V6":
offset_x = RM_WIDTH / 2
if dims.height >= (RM_HEIGHT + 88 * 3):
offset_y = 3 * 88 # why 3 * text_offset? No clue, ask ReMarkable.
if abs(x_min) + abs(x_max) > 1872:
scale = REMARKABLE_DOCUMENT.width / (max(x_max, 1872) - min(x_min, 0))
ann_data = rescale_parsed_data(ann_data, scale, offset_x, offset_y)
else:
scale = REMARKABLE_DOCUMENT.height / (max(y_max, 2048) - min(y_min, 0))
ann_data = rescale_parsed_data(ann_data, scale, offset_x, offset_y)
if "highlights" not in ann_type and has_ann_hl:
logging.info(
"- Found highlighted text on page #{page_idx} but `--ann_type` flag is set to `scribbles` only, so we won't bother with it"
)

if ann_data:
if "text" in ann_data:
Expand All @@ -179,22 +164,10 @@ def process_document(
if has_annotations:
ann_page = draw_annotations_on_pdf(ann_data, ann_page)

if (
"highlights" in ann_type
and has_ann_hl
and is_text_extractable
):
pass
elif "highlights" in ann_type and has_ann_hl and document.doc_type == "pdf":
logging.info(
f"- Found highlights on page #{page_idx} but couldn't extract them to Markdown."
)

smart_hl_groups = []
if "highlights" in ann_type and has_smart_highlights:
if has_smart_highlights:
smart_hl_data = load_json_file(rm_highlights_file)
ann_page = add_smart_highlight_annotations(smart_hl_data, ann_page, scale)
smart_hl_groups = extract_groups_from_smart_hl(smart_hl_data)
add_smart_highlight_annotations(smart_hl_data, ann_page, scale)
extract_groups_from_smart_hl(smart_hl_data)

# If there are annotations outside the original page limits
# that we've just (re)created from scratch
Expand Down
2 changes: 1 addition & 1 deletion test_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from syrupy.extensions.single_file import SingleFileSnapshotExtension
import remarks

default_args = {"ann_type": ["scribbles", "highlights"], "combined_pdf": True}
default_args = {"combined_pdf": True}


class JPEGImageExtension(SingleFileSnapshotExtension):
Expand Down

0 comments on commit 9565edb

Please sign in to comment.