Skip to content

Commit

Permalink
fix(layout): Windows support
Browse files Browse the repository at this point in the history
Remove use of `tempfile.NamedTemporaryFile` which attempts to access
the temporary file by name. This fails on Windows. Use
`tempfile.TemporaryDirectory` instead.
  • Loading branch information
scanny committed Aug 1, 2024
1 parent 4662c4f commit b51f1f3
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
## 0.7.37-dev1
## 0.7.37-dev2

* refactor: remove layout analysis related code
* enhancement: Hide warning about table transformer weights not being loaded
* fix(layout): Use TemporaryDirectory instead of NamedTemporaryFile for Windows support

## 0.7.36

Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.37-dev1" # pragma: no cover
__version__ = "0.7.37-dev2" # pragma: no cover
22 changes: 13 additions & 9 deletions unstructured_inference/inference/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import tempfile
from pathlib import PurePath
from typing import BinaryIO, Collection, List, Optional, Union, cast
from typing import Any, BinaryIO, Collection, List, Optional, Union, cast

import numpy as np
import pdf2image
Expand Down Expand Up @@ -323,15 +323,19 @@ def from_image(
def process_data_with_model(
data: BinaryIO,
model_name: Optional[str],
**kwargs,
**kwargs: Any,
) -> DocumentLayout:
"""Processes pdf file in the form of a file handler (supporting a read method) into a
DocumentLayout by using a model identified by model_name."""
with tempfile.NamedTemporaryFile() as tmp_file:
tmp_file.write(data.read())
tmp_file.flush() # Make sure the file is written out
"""Process PDF as file-like object `data` into a `DocumentLayout`.
Uses the model identified by `model_name`.
"""
with tempfile.TemporaryDirectory() as tmp_dir_path:
file_path = os.path.join(tmp_dir_path, "document.pdf")
with open(file_path, "wb") as f:
f.write(data.read())
f.flush()
layout = process_file_with_model(
tmp_file.name,
file_path,
model_name,
**kwargs,
)
Expand All @@ -345,7 +349,7 @@ def process_file_with_model(
is_image: bool = False,
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
pdf_image_dpi: int = 200,
**kwargs,
**kwargs: Any,
) -> DocumentLayout:
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
model_name."""
Expand Down

0 comments on commit b51f1f3

Please sign in to comment.