Skip to content

Commit

Permalink
Merge pull request #16 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
Fix PDF flattening
  • Loading branch information
VikParuchuri authored Oct 25, 2024
2 parents c88e23c + a8605c2 commit 10d979b
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
3 changes: 2 additions & 1 deletion pdftext/extraction.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from itertools import repeat
from typing import List
from concurrent.futures import ProcessPoolExecutor
import math
Expand Down Expand Up @@ -54,7 +55,7 @@ def _get_pages(pdf_path, page_range=None, flatten_pdf=False, workers=None):
page_range_chunks = [page_range[i * pages_per_worker:(i + 1) * pages_per_worker] for i in range(workers)]

with ProcessPoolExecutor(max_workers=workers, initializer=worker_init, initargs=(pdf_path, flatten_pdf)) as executor:
pages = list(executor.map(_get_page_range, page_range_chunks))
pages = list(executor.map(_get_page_range, page_range_chunks, repeat(flatten_pdf)))

ordered_pages = [page for sublist in pages for page in sublist]

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pdftext"
version = "0.3.17"
version = "0.3.18"
description = "Extract structured text from pdfs quickly"
authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
license = "Apache-2.0"
Expand Down

0 comments on commit 10d979b

Please sign in to comment.