Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[data][tests] Update image processing benchmarks to use ImageNet #39207

Merged
merged 16 commits into from
Sep 5, 2023
Merged
Prev Previous commit
Next Next commit
update
Signed-off-by: Stephanie Wang <swang@cs.berkeley.edu>
  • Loading branch information
stephanie-wang committed Sep 5, 2023
commit 48bf64bf6b7d164b20232fd42c11c36b01f2cfe1
22 changes: 5 additions & 17 deletions release/nightly_tests/dataset/image_loader_microbenchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def tf_crop_and_flip(image_buffer, num_channels=3):
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
shape = tf.shape(image_buffer)
if len(shape) == 4:
if len(shape) == num_channels + 1:
shape = shape[1:]

bbox = tf.constant(
Expand Down Expand Up @@ -196,18 +196,7 @@ def get_transform(to_torch_tensor):
)
return transform

transform = torchvision.transforms.Compose(
[
torchvision.transforms.RandomResizedCrop(
size=DEFAULT_IMAGE_SIZE,
scale=(0.05, 1.0),
ratio=(0.75, 1.33),
),
torchvision.transforms.RandomHorizontalFlip(),
# torchvision.transforms.ToTensor(),
]
)

# Capture `transform`` in the map UDFs.
transform = get_transform(False)
stephanie-wang marked this conversation as resolved.
Show resolved Hide resolved

def crop_and_flip_image(row):
Expand All @@ -229,11 +218,10 @@ def decode_image_crop_and_flip(row):
# NUM_CHANNELS = 3
# row["image"] = np.frombuffer(row["image"], dtype=np.uint8).reshape((NUM_CHANNELS, row["height"], row["width"])) #
row["image"] = Image.frombytes("RGB", (row["height"], row["width"]), row["image"])
del row["width"]
del row["height"]
# Convert back np to avoid storing a np.object array.
row["image"] = np.array(transform(row["image"]))
return row
return {
"image": np.array(transform(row["image"]))
}


class MdsDatasource(ray.data.datasource.FileBasedDatasource):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ rm -rf "$MOSAIC_DIR"
rm -rf "$TFRECORDS_DIR"
rm -rf "$PARQUET_DIR"

# Download 1GB dataset from S3 to local disk.
aws s3 sync s3://imagenetmini1000/1gb "$DIR"

# Download 1GB dataset from S3 to local disk so we can preprocess with mosaic.
aws s3 sync s3://imagenetmini1000/1gb $DIR
stephanie-wang marked this conversation as resolved.
Show resolved Hide resolved
# Generated with
Expand Down