Skip to content

Commit f3722a4

Browse files
authored
update document parse base url & remove fragile test (#50)
* update document parse base url * fix expected element count in parse test * update version * remove fragile tests dependent on model outputs
1 parent 7428fbf commit f3722a4

File tree

3 files changed

+2
-41
lines changed

3 files changed

+2
-41
lines changed

libs/upstage/langchain_upstage/document_parse_parsers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
logger = logging.getLogger("pypdf")
1414
logger.setLevel(logging.ERROR)
1515

16-
DOCUMENT_PARSE_BASE_URL = "https://api.upstage.ai/v1/document-ai/document-parse"
16+
DOCUMENT_PARSE_BASE_URL = "https://api.upstage.ai/v1/document-digitization"
1717
DEFAULT_NUM_PAGES = 10
1818
DOCUMENT_PARSE_DEFAULT_MODEL = "document-parse"
1919

libs/upstage/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "langchain-upstage"
3-
version = "0.7.0rc0"
3+
version = "0.7.0rc1"
44
description = "An integration package connecting Upstage and LangChain"
55
authors = []
66
readme = "README.md"
Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,6 @@
11
from pathlib import Path
2-
from typing import get_args
3-
4-
import pytest
52

63
from langchain_upstage.document_parse import UpstageDocumentParseLoader
7-
from langchain_upstage.document_parse_parsers import (
8-
OCR,
9-
Category,
10-
OutputFormat,
11-
SplitType,
12-
)
134

145
EXAMPLE_PDF_PATH = Path(__file__).parent.parent / "examples/solar.pdf"
156

@@ -24,33 +15,3 @@ def test_file_not_found_error() -> None:
2415
assert False
2516
except FileNotFoundError:
2617
assert True
27-
28-
29-
@pytest.mark.parametrize("output_format", get_args(OutputFormat))
30-
@pytest.mark.parametrize("split", get_args(SplitType))
31-
@pytest.mark.parametrize("ocr", get_args(OCR))
32-
@pytest.mark.parametrize("coordinates", [True, False])
33-
@pytest.mark.parametrize("base64_encoding", ["paragraph"])
34-
def test_document_parse(
35-
output_format: OutputFormat,
36-
split: SplitType,
37-
ocr: OCR,
38-
coordinates: bool,
39-
base64_encoding: Category,
40-
) -> None:
41-
loader = UpstageDocumentParseLoader(
42-
file_path=EXAMPLE_PDF_PATH,
43-
output_format=output_format,
44-
split=split,
45-
ocr=ocr,
46-
coordinates=coordinates,
47-
base64_encoding=[base64_encoding],
48-
)
49-
documents = loader.load()
50-
if split == "element":
51-
if ocr == "auto":
52-
assert len(documents) == 14
53-
else:
54-
assert len(documents) == 15
55-
else:
56-
assert len(documents) == 1

0 commit comments

Comments
 (0)