Skip to content

feat: add blob.exif function support #1703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions bigframes/blob/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,38 @@ def udf(self):
return self._session.read_gbq_function(udf_name)


def exif_func(src_obj_ref_rt: str) -> str:
import io
import json

from PIL import ExifTags, Image
import requests
from requests import adapters

session = requests.Session()
session.mount("https://", adapters.HTTPAdapter(max_retries=3))

src_obj_ref_rt_json = json.loads(src_obj_ref_rt)

src_url = src_obj_ref_rt_json["access_urls"]["read_url"]

response = session.get(src_url, timeout=30)
bts = response.content

image = Image.open(io.BytesIO(bts))
exif_data = image.getexif()
exif_dict = {}
if exif_data:
for tag, value in exif_data.items():
tag_name = ExifTags.TAGS.get(tag, tag)
exif_dict[tag_name] = value

return json.dumps(exif_dict)


exif_func_def = FunctionDef(exif_func, ["pillow", "requests"])


# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string.
def image_blur_func(
src_obj_ref_rt: str, dst_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str
Expand Down
40 changes: 40 additions & 0 deletions bigframes/operations/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,46 @@ def get_runtime_json_str(
runtime = self._get_runtime(mode=mode, with_metadata=with_metadata)
return runtime._apply_unary_op(ops.ToJSONString())

def exif(
self,
*,
connection: Optional[str] = None,
max_batching_rows: int = 8192,
container_cpu: Union[float, int] = 0.33,
container_memory: str = "512Mi",
) -> bigframes.series.Series:
"""Extract EXIF data. Now only support image types.

Args:
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.

Returns:
bigframes.series.Series: JSON series of key-value pairs.
"""

import bigframes.bigquery as bbq
import bigframes.blob._functions as blob_func

connection = self._resolve_connection(connection)
df = self.get_runtime_json_str(mode="R").to_frame()

exif_udf = blob_func.TransformFunction(
blob_func.exif_func_def,
session=self._block.session,
connection=connection,
max_batching_rows=max_batching_rows,
container_cpu=container_cpu,
container_memory=container_memory,
).udf()

res = self._df_apply_udf(df, exif_udf)
res = bbq.parse_json(res)

return res

def image_blur(
self,
ksize: tuple[int, int],
Expand Down
2 changes: 1 addition & 1 deletion notebooks/experimental/multimodal_dataframe.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@
},
"source": [
"### 1. Create Multimodal DataFrame\n",
"There are several ways to create Multimodal DataFrame. The easiest way is from the wiledcard paths."
"There are several ways to create Multimodal DataFrame. The easiest way is from the wildcard paths."
]
},
{
Expand Down
24 changes: 24 additions & 0 deletions tests/system/large/blob/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,30 @@ def images_output_uris(images_output_folder: str) -> list[str]:
]


def test_blob_exif(
bq_connection: str,
test_session: bigframes.Session,
):
exif_image_df = test_session.from_glob_path(
"gs://bigframes_blob_test/images_exif/*",
name="blob_col",
connection=bq_connection,
)

actual = exif_image_df["blob_col"].blob.exif(connection=bq_connection)
expected = bpd.Series(
['{"ExifOffset": 47, "Make": "MyCamera"}'],
session=test_session,
dtype=dtypes.JSON_DTYPE,
)
pd.testing.assert_series_equal(
actual.to_pandas(),
expected.to_pandas(),
check_dtype=False,
check_index_type=False,
)


def test_blob_image_blur_to_series(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down