Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Run performance benchmarks
name: Benchmarks

on:
push:
branches: ["main"]
release:
types: [published]
pull_request:
types: [opened, reopened, synchronize]
branches: ["main"]

env:
CARGO_TERM_COLOR: always

jobs:
rust-bench:
runs-on: ubuntu-24.04
container:
image: ghcr.io/osgeo/gdal:ubuntu-small-3.11.5
options: --privileged
permissions:
contents: read # required for actions/checkout
id-token: write # required for OIDC authentication with CodSpeed

steps:
- name: Install dev dependencies and setup git
run: |
apt update
apt install -y build-essential cmake git libclang-dev pkg-config
git config --global --add safe.directory $GITHUB_WORKSPACE

- name: Checkout repository
uses: actions/checkout@v6
with:
persist-credentials: false

- name: Download and extract files
run: |
gdal raster convert --co COMPRESS=LZW --co TILED=YES --co PREDICTOR=2 https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/12/S/UF/2022/6/S2B_12SUF_20220609_0_L2A/TCI.tif benches/TCI_lzw.tif
ls -lh benches/

- name: Setup rust toolchain, cache and cargo-codspeed binary
uses: moonrepo/setup-rust@v1
with:
channel: stable
cache: false
cache-target: release
bins: cargo-codspeed

- name: Build the benchmark target(s)
run: cargo codspeed build

- name: Run the benchmarks
uses: CodSpeedHQ/action@v4
with:
mode: simulation
run: cargo codspeed run
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ tokio = { version = "1.43.0", optional = true, default-features = false, feature
weezl = "0.1.0"

[dev-dependencies]
criterion = { package = "codspeed-criterion-compat", version = "4.1.0" }
object_store = { version = "0.12", features = ["http"] }
rayon = "1.11.0"
tiff = "0.9.1"
tokio = { version = "1.9", features = [
"macros",
Expand All @@ -42,3 +44,7 @@ reqwest = ["dep:reqwest"]
object_store = ["dep:object_store"]

[package.metadata.cargo-all-features]

[[bench]]
name = "read_tiff"
harness = false
97 changes: 97 additions & 0 deletions benches/read_tiff.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/// Benchmarks on reading a GeoTIFF
use std::path::PathBuf;
use std::sync::Arc;

use async_tiff::decoder::DecoderRegistry;
use async_tiff::error::{AsyncTiffError, AsyncTiffResult};
use async_tiff::metadata::{PrefetchBuffer, TiffMetadataReader};
use async_tiff::reader::ObjectReader;
use async_tiff::{ImageFileDirectory, Tile};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use object_store::path::Path;
use object_store::{parse_url, ObjectStore};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use rayon::ThreadPoolBuilder;
use reqwest::Url;
use tokio::runtime;

fn read_tiff(fpath: &str) -> AsyncTiffResult<()> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we split this up into a few helper functions? Maybe start with splitting

fn open_tiff(path: &str) -> ObjectReader

hopefully as we expand the benchmarks we can reuse that part

let abs_path: PathBuf = std::path::Path::new(fpath).canonicalize()?;
let tif_url: Url = Url::from_file_path(abs_path).expect("Failed to parse url: {abs_path}");
let (store, path): (Box<dyn ObjectStore>, Path) = parse_url(&tif_url)?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know whether this or just using LocalFileSystem::new is easier. Up to you


let reader = ObjectReader::new(Arc::new(store), path);
let decoder_registry = DecoderRegistry::default();

// Initialize async runtime
let runtime = runtime::Builder::new_current_thread()
.enable_all()
.build()?;

// Get list of tiles in TIFF file stream (using tokio async runtime)
let tiles: Vec<Tile> = runtime
.block_on(async {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be great to move this entire async fn into a top-level function, and then just call

let tiles = runtime.block_on(read_tiles(reader, path))

or something like that

// Read metadata header
let prefetch_reader = PrefetchBuffer::new(reader.clone(), 32 * 1024).await?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we merge #140 first, we'll have to change this import.

let mut metadata_reader = TiffMetadataReader::try_open(&prefetch_reader).await?;

// Read Image File Directories
let ifds: Vec<ImageFileDirectory> =
metadata_reader.read_all_ifds(&prefetch_reader).await?;

assert_eq!(ifds.len(), 1); // should have only 1 IFD
let ifd: &ImageFileDirectory = ifds.first().ok_or(AsyncTiffError::General(
"unable to read first IFD".to_string(),
))?;

let (x_count, y_count) = ifd.tile_count().ok_or(AsyncTiffError::General(
"unable to get IFD count".to_string(),
))?;
// dbg!(x_count, y_count); // 43 * 43 = 1849
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Convert into assert_eq or remove?


// Get cartesian product of x and y tile ids
let x_ids: Vec<usize> = (0..x_count)
.flat_map(|i| (0..y_count).map(move |_j| i))
.collect();
let y_ids: Vec<usize> = (0..x_count).flat_map(|_i| 0..y_count).collect();

let tiles: Vec<Tile> = ifd.fetch_tiles(&x_ids, &y_ids, &reader).await?;
assert_eq!(tiles.len(), 1849);

Ok::<Vec<Tile>, AsyncTiffError>(tiles)
})
.unwrap();

// Do actual decoding of TIFF tile data (multi-threaded using rayon)
let pool = ThreadPoolBuilder::new()
.num_threads(4)
.build()
.map_err(|err| AsyncTiffError::External(Box::new(err)))?;

let tile_bytes: Vec<u8> = pool.install(|| {
tiles
.into_par_iter()
.flat_map_iter(|tile| tile.decode(&decoder_registry).unwrap())
.collect()
});
Comment on lines +65 to +76
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rayon probably defaults to LIFO instead of FIFO here (xref #41). Will change it to use a FIFO scope in #133 (need to figure out how though).

assert_eq!(tile_bytes.len(), 363528192); // should be 361681200, why not?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm what's happening here? Can you explain why you expect the number to be different?

Is it possible there's an issue with whether this is compressed/decompressed? Or whether there's a step in the decoding missing?

Copy link
Member Author

@weiji14 weiji14 Nov 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be 10980x10980x3bands = 361681200 bytes (since this is u8 dtype, so 1 byte per pixel). The number is not lower, but higher, so the decompression should have worked, but not sure what's causing the deviation. I should probably try to find a way to piece the bytes together and display it to see what's going on.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe read it in python to inspect it?


Ok(())
}

pub fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("read_tiff");

let fsize: u64 = std::fs::metadata("benches/TCI_lzw.tif").unwrap().len();
group.throughput(Throughput::BytesDecimal(fsize)); // 55MB filesize

// CPU decoding using async-tiff
group.sample_size(30);
group.bench_function("async-tiff", move |b| {
b.iter(|| read_tiff("benches/TCI_lzw.tif"))
});
group.finish();
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);