Skip to content

feat: Support input/output files by introducing FileReference Pydantic annotation #240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion examples/helloworld/tesseract_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,30 @@

from pydantic import BaseModel, Field

from tesseract_core.runtime import FileReference


class InputSchema(BaseModel):
name: str = Field(description="Name of the person you want to greet.")
input_file: FileReference = Field(description="A file that can be used as input.")


class OutputSchema(BaseModel):
greeting: str = Field(description="A greeting!")
output_file: FileReference = Field(description="We'll dump some output here.")


def apply(inputs: InputSchema) -> OutputSchema:
"""Greet a person whose name is given as input."""
return OutputSchema(greeting=f"Hello {inputs.name}!")
# read the file to demonstrate usage of FileReference
with inputs.input_file.open() as f:
file_content = f.read()
print(f"File content: {file_content}")
# Create output file "test_out.txt"
output_file = inputs.input_file.with_name("test_out.txt")
with output_file.open("w") as f:
f.write("This is some output content.")
return OutputSchema(
greeting=f"Hello {inputs.name}! Read file: '{inputs.input_file}' with content '{file_content}'",
output_file=output_file,
)
2 changes: 2 additions & 0 deletions tesseract_core/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .schema_types import (
Array,
Differentiable,
FileReference,
Float16,
Float32,
Float64,
Expand All @@ -50,6 +51,7 @@
__all__ = [
"Array",
"Differentiable",
"FileReference",
"Float16",
"Float32",
"Float64",
Expand Down
56 changes: 56 additions & 0 deletions tesseract_core/runtime/schema_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from abc import ABCMeta
from enum import IntEnum
from functools import partial
from pathlib import Path
from typing import (
Annotated,
Any,
Expand Down Expand Up @@ -356,6 +357,61 @@ def is_differentiable(obj: Any) -> bool:
return False


class FileReference:
Copy link
Contributor

@nmheim nmheim Jun 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just reading this before our meeting and leaving my thoughts here so they do not get lost (sorry for commenting on a draft). Maybe we can do this with a field validator?

from typing import Annotated
from pydantic import AfterValidator, RootModel

def is_relative(path: Path) -> Path:
        if path.is_relative_to(Path.cwd()):
            path = path.relative_to(Path.cwd())
        else:
            raise ValueError(
                f"FileReference path must be relative to the current working directory: {path}"
            )
    return path


class FileReference(RootModel):
    root: Annotated[Path, AfterValidator(is_relative)]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I played around with RootModel a bit. The key issue I run into is that for a schema like

class InputSchema(BaseModel):
    name: str = Field(description="Name of the person you want to greet.")
    input_file: FileReference = Field(description="A file that can be used as input.")

the server gets the validated model

name='Osborne' input_file=Apply_FileReference(root=PosixPath('test.txt'))

I don't see a straightforward way to get rid of the nested model and resolve straight to a path. In order to avoid nesting, my understanding is that we actually have to go with the custom type (which also has the upside that we can even pass plain strings and automatically convert them to Path objects).

Copy link
Contributor

@nmheim nmheim Jun 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, does this help?

In [4]: m = InputSchema(name="Osborne", input_file="./test.txt")

In [5]: m.model_dump()
Out[5]: {'name': 'Osborne', 'input_file': PosixPath('test.txt')}

In [6]: m.input_file.model_dump()
Out[6]: PosixPath('test.txt')

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, that gets rid of the nested model! The catch is that in apply, the user gets an InputSchema (not a JSON/dict), so we can't dump the input before passing to user code (and I don't think the user should have to unpack inputs).

I've pushed the RootModel experiment to https://github.com/pasteurlabs/tesseract-core/tree/linus/feat-file-schema-root-model

You can give it a try by serving the helloworld example and

curl -d '{"inputs": {"name": "Osborne", "input_file": "test.txt"}}' -H "Conn/json" http://127.0.0.1:8080/apply

"""Type annotation for a file path that must be relative and exist on the filesystem."""

@classmethod
def __get_pydantic_core_schema__(
cls,
_source_type: Any,
_handler: GetCoreSchemaHandler,
) -> core_schema.CoreSchema:
"""Get the core schema for the FileReference type."""
return core_schema.chain_schema(
[
core_schema.union_schema(
[
core_schema.str_schema(),
core_schema.is_instance_schema(Path),
]
),
core_schema.with_info_plain_validator_function(cls.validate),
],
serialization=core_schema.plain_serializer_function_ser_schema(
lambda v, _info: str(v) if isinstance(v, (str, Path)) else v,
info_arg=True,
),
)

@classmethod
def validate(cls, value: Any, _info: Any) -> Path:
"""Validate that the value is a relative path and exists."""
path = Path(value).resolve()
# Turn into a relative path wrt the current working directory (if possible)
if path.is_relative_to(Path.cwd()):
path = path.relative_to(Path.cwd())
else:
raise ValueError(
f"FileReference path must be relative to the current working directory: {path}"
)
if not path.exists():
raise ValueError(f"FileReference path does not exist: {path}")
if not path.is_file():
raise ValueError(f"FileReference path is not a file: {path}")
return path

@classmethod
def __get_pydantic_json_schema__(
cls, core_schema: core_schema.CoreSchema, handler: Any
) -> JsonSchemaValue:
# Base it on string type and add a custom description
return {
"type": "string",
"format": "relative-path",
"description": "A relative filesystem path (must not be absolute)",
}


# Export concrete scalar types
Float16 = Array[(), "float16"]
Float32 = Array[(), "float32"]
Expand Down
Loading