Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .release-please-manifest.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
".": "0.6.0"
".": "0.7.0"
}
4 changes: 2 additions & 2 deletions .stats.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
configured_endpoints: 52
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/contextual-ai%2Fsunrise-17bdb8a33fb4fcade827bba868bd65cd30c64b1d09b4a6d83c3e37a8439ed37f.yml
openapi_spec_hash: bc325b52f3b20d8c56e0be5de88f2dc3
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/contextual-ai%2Fsunrise-8d75c58c83d13f67b6a125c3eb4639d213c91aec7dbb6e06f0cd5bdfc074d54e.yml
openapi_spec_hash: 47795284631814d0f8eb42f6a0d5a3b3
config_hash: 1ecef0ff4fd125bbc00eec65e3dd4798
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
# Changelog

## 0.7.0 (2025-05-13)

Full Changelog: [v0.6.0...v0.7.0](https://github.com/ContextualAI/contextual-client-python/compare/v0.6.0...v0.7.0)

### Features

* **api:** update via SDK Studio ([656a0e1](https://github.com/ContextualAI/contextual-client-python/commit/656a0e19d78fe677a1a859bff114511acd58fa87))


### Bug Fixes

* **package:** support direct resource imports ([109de24](https://github.com/ContextualAI/contextual-client-python/commit/109de24d9c76aaa1d90fff8dfc816e5cfbfab50a))
* **tests:** correct number examples ([cb94e10](https://github.com/ContextualAI/contextual-client-python/commit/cb94e101a87b8abec57d46667fecef7a3079765f))


### Chores

* **internal:** avoid errors for isinstance checks on proxies ([581e581](https://github.com/ContextualAI/contextual-client-python/commit/581e581480ac98e0fed61eacc36e90a44e3b99fc))

## 0.6.0 (2025-05-08)

Full Changelog: [v0.5.1...v0.6.0](https://github.com/ContextualAI/contextual-client-python/compare/v0.5.1...v0.6.0)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "contextual-client"
version = "0.6.0"
version = "0.7.0"
description = "The official Python library for the Contextual AI API"
dynamic = ["readme"]
license = "Apache-2.0"
Expand Down
5 changes: 5 additions & 0 deletions src/contextual/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

import typing as _t

from . import types
from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
from ._utils import file_from_path
Expand Down Expand Up @@ -78,6 +80,9 @@
"DefaultAsyncHttpxClient",
]

if not _t.TYPE_CHECKING:
from ._utils._resources_proxy import resources as resources

_setup_logging()

# Update the __module__ attribute for exported symbols so that
Expand Down
5 changes: 4 additions & 1 deletion src/contextual/_utils/_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]:
@property # type: ignore
@override
def __class__(self) -> type: # pyright: ignore
proxied = self.__get_proxied__()
try:
proxied = self.__get_proxied__()
except Exception:
return type(self)
if issubclass(type(proxied), LazyProxy):
return type(proxied)
return proxied.__class__
Expand Down
24 changes: 24 additions & 0 deletions src/contextual/_utils/_resources_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import annotations

from typing import Any
from typing_extensions import override

from ._proxy import LazyProxy


class ResourcesProxy(LazyProxy[Any]):
"""A proxy for the `contextual.resources` module.

This is used so that we can lazily import `contextual.resources` only when
needed *and* so that users can just import `contextual` and reference `contextual.resources`
"""

@override
def __load__(self) -> Any:
import importlib

mod = importlib.import_module("contextual.resources")
return mod


resources = ResourcesProxy().__as_proxied__()
2 changes: 1 addition & 1 deletion src/contextual/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

__title__ = "contextual"
__version__ = "0.6.0" # x-release-please-version
__version__ = "0.7.0" # x-release-please-version
86 changes: 46 additions & 40 deletions src/contextual/resources/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,39 +65,42 @@ def create(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> ParseCreateResponse:
"""Parse a file into a structured Markdown representation.
"""Parse a file into a structured Markdown and/or JSON.

The file size must be
less than 100MB and the number of pages must be less than 400.
Files must be less than
100MB and 400 pages. We use LibreOffice to convert DOC(X) and PPT(X) files to
PDF, which may affect page count.

See our [blog post](https://contextual.ai/blog/document-parser-for-rag) and
[code examples](https://github.com/ContextualAI/examples/blob/main/03-standalone-api/04-parse/parse.ipynb).
Email [parse-feedback@contextual.ai](mailto:parse-feedback@contextual.ai) with
any feedback or questions.

Args:
raw_file: The file to be parsed. The file type must be PDF, DOC / DOCX, PPT / PPTX.

enable_document_hierarchy: Controls parsing heading levels (e.g. H1, H2, H3) at higher quality. Adds a
table of contents to the output with the structure of the entire parsed
document. Not permitted in 'basic' parsing_mode, or if page_range is not
continuous and/or does not start from page zero.
enable_document_hierarchy: Adds a table of contents to the output with the structure of the entire parsed
document. This feature is in beta. Controls parsing heading levels (e.g. H1, H2,
H3) at higher quality. Not permitted in `basic` parsing_mode, or if page_range
is not continuous and/or does not start from page zero.

enable_split_tables: Controls whether tables are split into multiple tables by row with the headers
propagated. Use for improving LLM comprehension of very large tables. Not
permitted in 'basic' parsing_mode.
permitted in `basic` parsing_mode.

figure_caption_mode: Controls how thorough figure captions are. 'concise' is short and minimizes
chances of hallucinations. 'detailed' is more thorough and can include
commentary. Not permitted in 'basic' parsing_mode.
figure_caption_mode: Controls how thorough figure captions are. `concise` is short and minimizes
chances of hallucinations. `detailed` is more thorough and can include
commentary; this mode is in beta. Not permitted in `basic` parsing_mode.

max_split_table_cells: Threshold number of table cells beyond which large tables are split if
`enable_split_tables` is True. Not permitted in 'basic' parsing_mode.
`enable_split_tables` is True. Not permitted in `basic` parsing_mode.

page_range: Optional string representing page range to be parsed. Format: comma-separated
indexes (0-based) e.g. '0,1,2,5,6' or ranges (inclusive of both ends) e.g.
'0-2,5,6'
indexes (0-based, e.g. `0,1,2,5,6`), or ranges inclusive of both ends (e.g.
`0-2,5,6`)

parse_mode: The settings to use for parsing. 'basic' is for simple, text-only documents.
'standard' is for complex documents with images, complex hierarchy, and/or no
parse_mode: The settings to use for parsing. `basic` is for simple, text-only documents.
`standard` is for complex documents with images, complex hierarchy, and/or no
natively encoded textual data (e.g. for scanned documents).

extra_headers: Send extra headers
Expand Down Expand Up @@ -156,11 +159,11 @@ def job_results(
job_id: Unique ID of the parse job

output_types: The desired output format(s) of the parsed file. Must be `markdown-document`,
`markdown-per-page`, and/or `blocks-per-page`. `markdown-document` parses the
whole document into a single concatenated markdown output. `markdown-per-page`
provides markdown output per page. `blocks-per-page` provides a structured JSON
`markdown-per-page`, and/or `blocks-per-page`. Specify multiple values to get
multiple formats in the response. `markdown-document` parses the whole document
into a single concatenated markdown output. `markdown-per-page` provides
markdown output per page. `blocks-per-page` provides a structured JSON
representation of the content blocks on each page, sorted by reading order.
Specify multiple values to get multiple formats in the response.

extra_headers: Send extra headers

Expand Down Expand Up @@ -298,39 +301,42 @@ async def create(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> ParseCreateResponse:
"""Parse a file into a structured Markdown representation.
"""Parse a file into a structured Markdown and/or JSON.

The file size must be
less than 100MB and the number of pages must be less than 400.
Files must be less than
100MB and 400 pages. We use LibreOffice to convert DOC(X) and PPT(X) files to
PDF, which may affect page count.

See our [blog post](https://contextual.ai/blog/document-parser-for-rag) and
[code examples](https://github.com/ContextualAI/examples/blob/main/03-standalone-api/04-parse/parse.ipynb).
Email [parse-feedback@contextual.ai](mailto:parse-feedback@contextual.ai) with
any feedback or questions.

Args:
raw_file: The file to be parsed. The file type must be PDF, DOC / DOCX, PPT / PPTX.

enable_document_hierarchy: Controls parsing heading levels (e.g. H1, H2, H3) at higher quality. Adds a
table of contents to the output with the structure of the entire parsed
document. Not permitted in 'basic' parsing_mode, or if page_range is not
continuous and/or does not start from page zero.
enable_document_hierarchy: Adds a table of contents to the output with the structure of the entire parsed
document. This feature is in beta. Controls parsing heading levels (e.g. H1, H2,
H3) at higher quality. Not permitted in `basic` parsing_mode, or if page_range
is not continuous and/or does not start from page zero.

enable_split_tables: Controls whether tables are split into multiple tables by row with the headers
propagated. Use for improving LLM comprehension of very large tables. Not
permitted in 'basic' parsing_mode.
permitted in `basic` parsing_mode.

figure_caption_mode: Controls how thorough figure captions are. 'concise' is short and minimizes
chances of hallucinations. 'detailed' is more thorough and can include
commentary. Not permitted in 'basic' parsing_mode.
figure_caption_mode: Controls how thorough figure captions are. `concise` is short and minimizes
chances of hallucinations. `detailed` is more thorough and can include
commentary; this mode is in beta. Not permitted in `basic` parsing_mode.

max_split_table_cells: Threshold number of table cells beyond which large tables are split if
`enable_split_tables` is True. Not permitted in 'basic' parsing_mode.
`enable_split_tables` is True. Not permitted in `basic` parsing_mode.

page_range: Optional string representing page range to be parsed. Format: comma-separated
indexes (0-based) e.g. '0,1,2,5,6' or ranges (inclusive of both ends) e.g.
'0-2,5,6'
indexes (0-based, e.g. `0,1,2,5,6`), or ranges inclusive of both ends (e.g.
`0-2,5,6`)

parse_mode: The settings to use for parsing. 'basic' is for simple, text-only documents.
'standard' is for complex documents with images, complex hierarchy, and/or no
parse_mode: The settings to use for parsing. `basic` is for simple, text-only documents.
`standard` is for complex documents with images, complex hierarchy, and/or no
natively encoded textual data (e.g. for scanned documents).

extra_headers: Send extra headers
Expand Down Expand Up @@ -389,11 +395,11 @@ async def job_results(
job_id: Unique ID of the parse job

output_types: The desired output format(s) of the parsed file. Must be `markdown-document`,
`markdown-per-page`, and/or `blocks-per-page`. `markdown-document` parses the
whole document into a single concatenated markdown output. `markdown-per-page`
provides markdown output per page. `blocks-per-page` provides a structured JSON
`markdown-per-page`, and/or `blocks-per-page`. Specify multiple values to get
multiple formats in the response. `markdown-document` parses the whole document
into a single concatenated markdown output. `markdown-per-page` provides
markdown output per page. `blocks-per-page` provides a structured JSON
representation of the content blocks on each page, sorted by reading order.
Specify multiple values to get multiple formats in the response.

extra_headers: Send extra headers

Expand Down
25 changes: 13 additions & 12 deletions src/contextual/types/parse_create_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,44 +14,45 @@ class ParseCreateParams(TypedDict, total=False):
"""The file to be parsed. The file type must be PDF, DOC / DOCX, PPT / PPTX."""

enable_document_hierarchy: bool
"""Controls parsing heading levels (e.g.

H1, H2, H3) at higher quality. Adds a table of contents to the output with the
structure of the entire parsed document. Not permitted in 'basic' parsing_mode,
or if page_range is not continuous and/or does not start from page zero.
"""
Adds a table of contents to the output with the structure of the entire parsed
document. This feature is in beta. Controls parsing heading levels (e.g. H1, H2,
H3) at higher quality. Not permitted in `basic` parsing_mode, or if page_range
is not continuous and/or does not start from page zero.
"""

enable_split_tables: bool
"""
Controls whether tables are split into multiple tables by row with the headers
propagated. Use for improving LLM comprehension of very large tables. Not
permitted in 'basic' parsing_mode.
permitted in `basic` parsing_mode.
"""

figure_caption_mode: Literal["concise", "detailed"]
"""Controls how thorough figure captions are.

'concise' is short and minimizes chances of hallucinations. 'detailed' is more
thorough and can include commentary. Not permitted in 'basic' parsing_mode.
`concise` is short and minimizes chances of hallucinations. `detailed` is more
thorough and can include commentary; this mode is in beta. Not permitted in
`basic` parsing_mode.
"""

max_split_table_cells: int
"""
Threshold number of table cells beyond which large tables are split if
`enable_split_tables` is True. Not permitted in 'basic' parsing_mode.
`enable_split_tables` is True. Not permitted in `basic` parsing_mode.
"""

page_range: str
"""Optional string representing page range to be parsed.

Format: comma-separated indexes (0-based) e.g. '0,1,2,5,6' or ranges (inclusive
of both ends) e.g. '0-2,5,6'
Format: comma-separated indexes (0-based, e.g. `0,1,2,5,6`), or ranges inclusive
of both ends (e.g. `0-2,5,6`)
"""

parse_mode: Literal["basic", "standard"]
"""The settings to use for parsing.

'basic' is for simple, text-only documents. 'standard' is for complex documents
`basic` is for simple, text-only documents. `standard` is for complex documents
with images, complex hierarchy, and/or no natively encoded textual data (e.g.
for scanned documents).
"""
4 changes: 2 additions & 2 deletions src/contextual/types/parse_job_results_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ class ParseJobResultsParams(TypedDict, total=False):
"""The desired output format(s) of the parsed file.

Must be `markdown-document`, `markdown-per-page`, and/or `blocks-per-page`.
Specify multiple values to get multiple formats in the response.
`markdown-document` parses the whole document into a single concatenated
markdown output. `markdown-per-page` provides markdown output per page.
`blocks-per-page` provides a structured JSON representation of the content
blocks on each page, sorted by reading order. Specify multiple values to get
multiple formats in the response.
blocks on each page, sorted by reading order.
"""
Loading