Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified data/compare_ome_arrow_only_runs.parquet
Binary file not shown.
Binary file modified data/compare_ome_arrow_only_summary.parquet
Binary file not shown.
Binary file modified images/compare_ome_arrow_only_summary.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dependencies = [
"pandas>=2.2",
"poethepoet>=0.38",
"pyarrow>=15",
"tifffile>=2024.7.24",
"vortex-data>=0.56",
"zarr>=3.1.5",
]
Expand Down
68 changes: 68 additions & 0 deletions src/benchmarks/compare_ome_arrow_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

Setup:
- Artifacts are written under `data/` (git-ignored).
- TIFF benchmarks require `tifffile` (included in project deps).
- If you want the OME-Zarr timings, install the extra deps:
`uv pip install bioio-ome-zarr zarr numcodecs`.
"""
Expand Down Expand Up @@ -59,6 +60,7 @@
VORTEX_PATH = DATA_DIR / "ome_only.vortex"
DUCK_PATH = DATA_DIR / "ome_only.duckdb"
OME_ZARR_DIR = DATA_DIR / "ome_zarr_runs"
TIFF_DIR = DATA_DIR / "tiff_runs"
LANCE_TABLE = "bench"
DUCK_TABLE = "bench"

Expand Down Expand Up @@ -96,6 +98,7 @@ def _pkg_version(name: str, default: str = "missing") -> str:
"bioio_ome_zarr": _pkg_version("bioio-ome-zarr"),
"zarr": _pkg_version("zarr"),
"numcodecs": _pkg_version("numcodecs"),
"tifffile": _pkg_version("tifffile"),
}
FORMAT_VERSIONS = {
"Parquet (pyarrow, zstd)": f"pyarrow {VERSIONS['pyarrow']}",
Expand Down Expand Up @@ -413,6 +416,52 @@ def ome_zarr_random_read_native(indices, base_path=OME_ZARR_DIR):
return out


# TIFF helpers — dir-per-image layout
def tiff_available() -> bool:
try:
import tifffile # noqa: F401

return True
except Exception:
return False


def tiff_write_all(arrays, base_path=TIFF_DIR):
if not tiff_available():
raise RuntimeError("tifffile is required for TIFF benchmarks.")
drop_path(base_path)
base_path.mkdir(parents=True, exist_ok=True)
import tifffile

for idx, arr in enumerate(arrays):
out_path = base_path / f"img_{idx:05d}.tiff"
tifffile.imwrite(str(out_path), arr)


def tiff_read_all(base_path=TIFF_DIR):
if not tiff_available():
raise RuntimeError("tifffile is required for TIFF benchmarks.")
import tifffile

out = []
for tiff_path in sorted(base_path.glob("*.tiff")):
out.append(tifffile.imread(str(tiff_path)))
return out


def tiff_random_read(indices, base_path=TIFF_DIR):
if not tiff_available():
raise RuntimeError("tifffile is required for TIFF benchmarks.")
import tifffile

paths = sorted(base_path.glob("*.tiff"))
out = []
for idx in indices:
if 0 <= idx < len(paths):
out.append(tifffile.imread(str(paths[idx])))
return out


if RUN_BENCHMARKS:
format_configs = [
{
Expand Down Expand Up @@ -462,6 +511,24 @@ def ome_zarr_random_read_native(indices, base_path=OME_ZARR_DIR):
},
]

if tiff_available():
format_configs.append(
{
"name": "TIFF (dir-per-image)",
"path": TIFF_DIR,
"write": lambda arrays, path=TIFF_DIR: tiff_write_all(arrays, path),
"read": lambda path=TIFF_DIR: tiff_read_all(path),
"random_read": lambda path=TIFF_DIR, indices=None: tiff_random_read(
indices, path
),
"table": ome_arrays, # list of numpy arrays
"random_repeats": RANDOM_READ_REPEATS,
"version": f"tifffile {VERSIONS.get('tifffile', '')}",
}
)
else:
raise RuntimeError("TIFF format requires tifffile. Install it to proceed.")

if ome_zarr_native_available():
format_configs.append(
{
Expand Down Expand Up @@ -552,6 +619,7 @@ def ome_zarr_random_read_native(indices, base_path=OME_ZARR_DIR):
"Lance (lancedb)": "#C86A1B",
"Vortex": "#2E7D4F",
"DuckDB (file table)": "#B23B3B",
"TIFF (dir-per-image)": "#5A6B3A",
"OME-Zarr (dir-per-image)": "#7A5A3C",
}
colors = [COLOR_MAP.get(name, "#BAB0AC") for name in summary["format"]]
Expand Down
2 changes: 2 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.