Skip to content

Commit

Permalink
fix(layout): Windows support (#376)
Browse files Browse the repository at this point in the history
**Summary**
Remove use of `tempfile.NamedTemporaryFile` which attempts to access the temporary file by name. This fails on Windows. Use `tempfile.TemporaryDirectory` instead.

Fixes #303
  • Loading branch information
scanny authored Aug 1, 2024
1 parent 4662c4f commit 7804e0d
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
## 0.7.37-dev1
## 0.7.37-dev2

* refactor: remove layout analysis related code
* enhancement: Hide warning about table transformer weights not being loaded
* fix(layout): Use TemporaryDirectory instead of NamedTemporaryFile for Windows support

## 0.7.36

Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.37-dev1" # pragma: no cover
__version__ = "0.7.37-dev2" # pragma: no cover
22 changes: 13 additions & 9 deletions unstructured_inference/inference/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import tempfile
from pathlib import PurePath
from typing import BinaryIO, Collection, List, Optional, Union, cast
from typing import Any, BinaryIO, Collection, List, Optional, Union, cast

import numpy as np
import pdf2image
Expand Down Expand Up @@ -323,15 +323,19 @@ def from_image(
def process_data_with_model(
data: BinaryIO,
model_name: Optional[str],
**kwargs,
**kwargs: Any,
) -> DocumentLayout:
"""Processes pdf file in the form of a file handler (supporting a read method) into a
DocumentLayout by using a model identified by model_name."""
with tempfile.NamedTemporaryFile() as tmp_file:
tmp_file.write(data.read())
tmp_file.flush() # Make sure the file is written out
"""Process PDF as file-like object `data` into a `DocumentLayout`.
Uses the model identified by `model_name`.
"""
with tempfile.TemporaryDirectory() as tmp_dir_path:
file_path = os.path.join(tmp_dir_path, "document.pdf")
with open(file_path, "wb") as f:
f.write(data.read())
f.flush()
layout = process_file_with_model(
tmp_file.name,
file_path,
model_name,
**kwargs,
)
Expand All @@ -345,7 +349,7 @@ def process_file_with_model(
is_image: bool = False,
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
pdf_image_dpi: int = 200,
**kwargs,
**kwargs: Any,
) -> DocumentLayout:
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
model_name."""
Expand Down

0 comments on commit 7804e0d

Please sign in to comment.