Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated dependencies #12

Merged
merged 6 commits into from
Dec 20, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fixed pylint warnings
  • Loading branch information
ubmarco committed Dec 20, 2021
commit 7354c625f86c4051ef881294bbf091d60a47585e
13 changes: 5 additions & 8 deletions libpdf/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,13 @@ def main_api( # pylint: disable=too-many-arguments
for visual_incl_element in visual_debug_include_elements:
if visual_incl_element not in RENDER_ELEMENTS:
raise ValueError(
'Given visual included elements {} not in {}'.format(visual_incl_element, RENDER_ELEMENTS),
f'Given visual included elements {visual_incl_element} not in {RENDER_ELEMENTS}',
)
if visual_debug_exclude_elements:
for visual_excl_element in visual_debug_exclude_elements:
if visual_excl_element not in RENDER_ELEMENTS:
raise ValueError(
'Given visual excluded elements {} not in {}'.format(visual_excl_element, RENDER_ELEMENTS),
f'Given visual excluded elements {visual_excl_element} not in {RENDER_ELEMENTS}',
)
if visual_debug_include_elements and visual_debug_exclude_elements:
raise ValueError('Can not visual include and exclude at the same time.')
Expand Down Expand Up @@ -330,15 +330,12 @@ def handle_parse_result(self, ctx, opts, args):
"""Handle parse result."""
if (not self.depends_on.intersection(opts)) and self.name in opts:
raise click.UsageError(
"Illegal usage: '{}' depends on "
"'{}' which is not given.".format(self.name, ', '.join(self.depends_on)),
f"Illegal usage: '{self.name}' depends on '{', '.join(self.depends_on)}' which is not given.",
)
if self.mutually_exclusive.intersection(opts) and self.name in opts:
raise click.UsageError(
"Illegal usage: '{}' is mutually exclusive with '{}' which is also given.".format(
self.name,
', '.join(self.mutually_exclusive),
),
f"Illegal usage: '{self.name}' is mutually exclusive with '{', '.join(self.mutually_exclusive)}' "
"which is also given."
)

return super().handle_parse_result(ctx, opts, args)
Expand Down
16 changes: 7 additions & 9 deletions libpdf/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class FoldedStr(str):

def folded_str_representer(dumper, text):
"""Warp function of the representer."""
return dumper.represent_scalar(u'tag', text, style='>')
return dumper.represent_scalar('tag', text, style='>')


yaml.add_representer(FoldedStr, folded_str_representer)
Expand Down Expand Up @@ -257,8 +257,7 @@ def smart_page_crop_header_footer( # pylint: disable=too-many-branches, too-man
):
element_cnt += 1
# on one page several header elements may have same y coordination but count only once
if element_cnt > 1:
element_cnt = 1
element_cnt = min(element_cnt, 1)
page_cnt = page_cnt + element_cnt
# occur on more than HEADER_FOOTER_OCCURRENCE_PERCENTAGE pages, considered as header element
# and remove from list
Expand Down Expand Up @@ -291,8 +290,7 @@ def smart_page_crop_header_footer( # pylint: disable=too-many-branches, too-man
):
element_cnt += 1
# on one page several footer elements may have same y coordination but count only once
if element_cnt > 1:
element_cnt = 1
element_cnt = min(element_cnt, 1)
page_cnt = page_cnt + element_cnt
# occur on more than HEADER_FOOTER_OCCURRENCE_PERCENTAGE pages, considered as footer element
# and remove from list
Expand Down Expand Up @@ -343,9 +341,9 @@ def check_false_positive_header_footer(pdf, elements_list): # pylint: disable=t
# search the lowest element height on each page
element_low_pos_dict = {}
for page_num, elements in elements_page_dict.items():
lowest_element_pos = float('{:.4f}'.format(elements[0].position.y0)) # restrict to 4 digits precision
lowest_element_pos = float(f'{elements[0].position.y0:.4f}') # restrict to 4 digits precision
for element in elements:
lowest_element_pos = min(lowest_element_pos, float('{:.4f}'.format(element.position.y0)))
lowest_element_pos = min(lowest_element_pos, float(f'{element.position.y0:.4f}'))
element_low_pos_dict[page_num] = lowest_element_pos

start_page_low_pos = list(element_low_pos_dict)[0]
Expand Down Expand Up @@ -376,7 +374,7 @@ def check_false_positive_header_footer(pdf, elements_list): # pylint: disable=t
UNIQUE_HEADER_OR_FOOTER_ELEMENTS_PERCENTAGE * len(pdf.pages),
):
for idx, element in enumerate(elements_list):
if float('{:.4f}'.format(element.position.y0)) == header_low_pos:
if float(f'{element.position.y0:.4f}') == header_low_pos:
del elements_list[idx]
# recursively check again, to find the next min_low_pos, which will determine the header/footer boundary
if elements_list:
Expand All @@ -386,7 +384,7 @@ def check_false_positive_header_footer(pdf, elements_list): # pylint: disable=t
elements_list.pop()
else:
for idx, element in enumerate(elements_list):
if float('{:.4f}'.format(element.position.y0)) == header_low_pos:
if float(f'{element.position.y0:.4f}') == header_low_pos:
del elements_list[idx]
# recursively check again, to find the next min_low_pos, which will determine the header/footer boundary
if elements_list:
Expand Down
3 changes: 1 addition & 2 deletions libpdf/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

def get_level_name(verbose):
"""Return the log levels for the CLI verbosity flag in words."""
if verbose > 3:
verbose = 3
verbose = min(verbose, 3)
level_dict = {
0: 'ERROR/FATAL/CRITICAL',
1: 'WARNING',
Expand Down
4 changes: 2 additions & 2 deletions libpdf/models/model_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ def check(self):
def __repr__(self):
"""Overwrite the object representation for better debugging."""
if hasattr(self, 'id_'):
return '{0}({1!r})'.format(self.__class__.__name__, self.id_) # pylint: disable=no-member
return '{0}()'.format(self.__class__.__name__)
return f'{self.__class__.__name__}({self.id_!r})' # pylint: disable=no-member
return f'{self.__class__.__name__}()'
4 changes: 2 additions & 2 deletions libpdf/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class MyRepresenter(RoundTripRepresenter): # pylint: disable=too-few-public-met

def represent_mapping(self, tag, mapping, flow_style=None):
"""Override represent_mapping."""
tag = u'tag:yaml.org,2002:map'
tag = 'tag:yaml.org,2002:map'

return RoundTripRepresenter.represent_mapping(self, tag, mapping, flow_style=flow_style)

Expand Down Expand Up @@ -161,7 +161,7 @@ def output_dump(output_format: str, output_path: str, objects: ApiObjects):
if output_dir:
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
with open(output_path, 'w') as file:
with open(output_path, 'w', encoding='utf-8') as file:
if output_format == 'json':
json_string = json.dumps(output_dict, default=json_datetime_converter, indent=2, sort_keys=False)
file.write(json_string)
Expand Down
2 changes: 1 addition & 1 deletion libpdf/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class FoldedStr(str):

def folded_str_representer(dumper, text):
"""Warp function of the representer."""
return dumper.represent_scalar(u'tag', text, style='>')
return dumper.represent_scalar('tag', text, style='>')


def extract_pdf_table(pdf, pages_list: List[Page], figure_list: List[Figure]):
Expand Down
3 changes: 1 addition & 2 deletions libpdf/textbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,7 @@ def chapter_examiner(chapter: Dict, lt_textboxes: List[LTTextBox], page: Page) -
# This assumption may not work in PDFs with multiple columns.
y0 = chapter['position']['y1'] - (page.height / 4)
y1 = chapter['position']['y1'] + (page.height / 4)
if y0 < 0:
y0 = 0
y0 = max(y0, 0)
if y1 > page.height:
y1 = page.height

Expand Down
8 changes: 4 additions & 4 deletions libpdf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,10 +811,10 @@ def visual_debug_pdfminer(pdf_path, vd_pdfminer_output):
def extract_layout(path_pdf, idx_single_page=None):
"""Use pdfminer.six to extract LTContainer layout boxes."""
LOG.info('Extracting layout ...')
file_pointer = open(path_pdf, 'rb')

# init pdfminer elements
parser = PDFParser(file_pointer)
parser = None
with open(path_pdf, 'rb') as file_pointer:
# init pdfminer elements
parser = PDFParser(file_pointer)
doc = PDFDocument(parser)
rsrcmgr = PDFResourceManager()
laparams = LAParams(char_margin=6, line_margin=0.4)
Expand Down