diff --git a/CHANGELOG.md b/CHANGELOG.md index 1db85fc8..1d77591d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ -## 0.7.37-dev1 +## 0.7.37-dev2 * refactor: remove layout analysis related code * enhancement: Hide warning about table transformer weights not being loaded +* fix(layout): Use TemporaryDirectory instead of NamedTemporaryFile for Windows support ## 0.7.36 diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index a55dc982..16e23bb0 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.7.37-dev1" # pragma: no cover +__version__ = "0.7.37-dev2" # pragma: no cover diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py index 517a2dba..944f2548 100644 --- a/unstructured_inference/inference/layout.py +++ b/unstructured_inference/inference/layout.py @@ -3,7 +3,7 @@ import os import tempfile from pathlib import PurePath -from typing import BinaryIO, Collection, List, Optional, Union, cast +from typing import Any, BinaryIO, Collection, List, Optional, Union, cast import numpy as np import pdf2image @@ -323,15 +323,19 @@ def from_image( def process_data_with_model( data: BinaryIO, model_name: Optional[str], - **kwargs, + **kwargs: Any, ) -> DocumentLayout: - """Processes pdf file in the form of a file handler (supporting a read method) into a - DocumentLayout by using a model identified by model_name.""" - with tempfile.NamedTemporaryFile() as tmp_file: - tmp_file.write(data.read()) - tmp_file.flush() # Make sure the file is written out + """Process PDF as file-like object `data` into a `DocumentLayout`. + + Uses the model identified by `model_name`. + """ + with tempfile.TemporaryDirectory() as tmp_dir_path: + file_path = os.path.join(tmp_dir_path, "document.pdf") + with open(file_path, "wb") as f: + f.write(data.read()) + f.flush() layout = process_file_with_model( - tmp_file.name, + file_path, model_name, **kwargs, ) @@ -345,7 +349,7 @@ def process_file_with_model( is_image: bool = False, fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None, pdf_image_dpi: int = 200, - **kwargs, + **kwargs: Any, ) -> DocumentLayout: """Processes pdf file with name filename into a DocumentLayout by using a model identified by model_name."""