-
Notifications
You must be signed in to change notification settings - Fork 109
/
Copy pathtest_pdf_markdown_reader.py
107 lines (81 loc) · 2.39 KB
/
test_pdf_markdown_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import pytest
from llama_index.core.schema import Document as LlamaIndexDocument
try:
from llama_index.core.readers.base import BaseReader
from llama_index.core.schema import Document as LlamaIndexDocument
print("All imports are successful.")
except ImportError:
raise NotImplementedError("Please install 'llama_index' is needed.")
from pymupdf4llm.pymupdf4llm.llama_index.pdf_markdown_reader import PDFMarkdownReader
PDF = "input.pdf"
def _get_test_file_path(file_name: str, __file__: str = __file__) -> str:
file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"..",
"..",
".." "helpers",
file_name,
)
file_path = os.path.normpath(file_path)
return file_path
def test_load_data():
# Arrange
# ---
pdf_reader = PDFMarkdownReader()
path = _get_test_file_path(PDF, __file__)
extra_info = {"test_key": "test_value"}
# Act
# ---
documents = pdf_reader.load_data(path, extra_info)
# Assert
# ---
assert isinstance(documents, list)
for doc in documents:
assert isinstance(doc, LlamaIndexDocument)
def test_load_data_with_invalid_file_path():
# Arrange
# ---
pdf_reader = PDFMarkdownReader()
extra_info = {"test_key": "test_value"}
path = "fake/path"
# Act & Assert
# ---
with pytest.raises(Exception):
pdf_reader.load_data(path, extra_info)
def test_load_data_with_invalid_extra_info():
# Arrange
# ---
pdf_reader = PDFMarkdownReader()
extra_info = "invalid_extra_info"
path = _get_test_file_path(PDF, __file__)
# Act & Assert
# ---
with pytest.raises(TypeError):
pdf_reader.load_data(path, extra_info)
@pytest.mark.asyncio
async def test_aload_data_with_invalid_file_path():
# Arrange
# ---
pdf_reader = PDFMarkdownReader()
extra_info = {"test_key": "test_value"}
# Act
# ---
path = "Fake/path"
# Assert
# ---
with pytest.raises(Exception):
await pdf_reader.aload_data(path, extra_info)
@pytest.mark.asyncio
async def test_aload_data_with_invalid_extra_info():
# Arrange
# ---
pdf_reader = PDFMarkdownReader()
extra_info = "invalid_extra_info"
# Act
# ---
path = _get_test_file_path(PDF, __file__)
# Assert
# ---
with pytest.raises(TypeError):
await pdf_reader.aload_data(path, extra_info)