From ad5c3245fd5a8551b076fe0047a36db4c88a8cf5 Mon Sep 17 00:00:00 2001 From: Rajendra Kadam Date: Fri, 26 Jan 2024 03:51:56 +0530 Subject: [PATCH] Fix the failing unit tests. (#13) Update unit test for lazy_load. Format code using ruff. --- .../document_loaders/test_pebblo.py | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/libs/community/tests/unit_tests/document_loaders/test_pebblo.py b/libs/community/tests/unit_tests/document_loaders/test_pebblo.py index 72e58751d6bd0..cde952461f0ea 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_pebblo.py +++ b/libs/community/tests/unit_tests/document_loaders/test_pebblo.py @@ -2,34 +2,42 @@ from pathlib import Path from langchain_core.documents import Document -from langchain_community.document_loaders import CSVLoader -EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent / "examples/") +from langchain_community.document_loaders import CSVLoader, PyPDFLoader + +EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent.parent / "examples/") + def test_pebblo_import() -> None: """Test that the Pebblo safe loader can be imported.""" from langchain_community.document_loaders import PebbloSafeLoader # noqa: F401 + def test_empty_filebased_loader() -> None: """Test basic file based csv loader.""" # Setup from langchain_community.document_loaders import PebbloSafeLoader + file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_empty.csv") expected_docs: list = [] # Exercise loader = PebbloSafeLoader( CSVLoader(file_path=file_path), - "dummy_app_name", "dummy_owner","dummy_description" - ) + "dummy_app_name", + "dummy_owner", + "dummy_description", + ) result = loader.load() # Assert assert result == expected_docs + def test_csv_loader_load_valid_data() -> None: # Setup from langchain_community.document_loaders import PebbloSafeLoader + file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_nominal.csv") expected_docs = [ Document( @@ -45,37 +53,33 @@ def test_csv_loader_load_valid_data() -> None: # Exercise loader = PebbloSafeLoader( CSVLoader(file_path=file_path), - "dummy_app_name", "dummy_owner","dummy_description" - ) + "dummy_app_name", + "dummy_owner", + "dummy_description", + ) result = loader.load() # Assert assert result == expected_docs -def test_csv_lazy_load(): - # Setup + +def test_pdf_lazy_load(): + # Setup from langchain_community.document_loaders import PebbloSafeLoader - file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_nominal.csv") - expected_docs = [ - Document( - page_content="column1: value1\ncolumn2: value2\ncolumn3: value3", - metadata={"source": file_path, "row": 0}, - ), - Document( - page_content="column1: value4\ncolumn2: value5\ncolumn3: value6", - metadata={"source": file_path, "row": 1}, - ), - ] + + file_path = os.path.join( + EXAMPLE_DOCS_DIRECTORY, "multi-page-forms-sample-2-page.pdf" + ) # Exercise loader = PebbloSafeLoader( - CSVLoader(file_path=file_path), - "dummy_app_name", "dummy_owner","dummy_description" - ) + PyPDFLoader(file_path=file_path), + "dummy_app_name", + "dummy_owner", + "dummy_description", + ) - result = [] - for doc in loader.lazy_load(): - result.extend(doc) + result = list(loader.lazy_load()) # Assert - assert result == expected_docs + assert len(result) == 2