Skip to content

Commit 3f79528

Browse files
authored
feat: add blob.exif function support (#1703)
1 parent 4389f4c commit 3f79528

File tree

4 files changed

+97
-1
lines changed

4 files changed

+97
-1
lines changed

bigframes/blob/_functions.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,38 @@ def udf(self):
108108
return self._session.read_gbq_function(udf_name)
109109

110110

111+
def exif_func(src_obj_ref_rt: str) -> str:
112+
import io
113+
import json
114+
115+
from PIL import ExifTags, Image
116+
import requests
117+
from requests import adapters
118+
119+
session = requests.Session()
120+
session.mount("https://", adapters.HTTPAdapter(max_retries=3))
121+
122+
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
123+
124+
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
125+
126+
response = session.get(src_url, timeout=30)
127+
bts = response.content
128+
129+
image = Image.open(io.BytesIO(bts))
130+
exif_data = image.getexif()
131+
exif_dict = {}
132+
if exif_data:
133+
for tag, value in exif_data.items():
134+
tag_name = ExifTags.TAGS.get(tag, tag)
135+
exif_dict[tag_name] = value
136+
137+
return json.dumps(exif_dict)
138+
139+
140+
exif_func_def = FunctionDef(exif_func, ["pillow", "requests"])
141+
142+
111143
# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string.
112144
def image_blur_func(
113145
src_obj_ref_rt: str, dst_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str

bigframes/operations/blob.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,46 @@ def get_runtime_json_str(
300300
runtime = self._get_runtime(mode=mode, with_metadata=with_metadata)
301301
return runtime._apply_unary_op(ops.ToJSONString())
302302

303+
def exif(
304+
self,
305+
*,
306+
connection: Optional[str] = None,
307+
max_batching_rows: int = 8192,
308+
container_cpu: Union[float, int] = 0.33,
309+
container_memory: str = "512Mi",
310+
) -> bigframes.series.Series:
311+
"""Extract EXIF data. Now only support image types.
312+
313+
Args:
314+
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
315+
max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
316+
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
317+
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
318+
319+
Returns:
320+
bigframes.series.Series: JSON series of key-value pairs.
321+
"""
322+
323+
import bigframes.bigquery as bbq
324+
import bigframes.blob._functions as blob_func
325+
326+
connection = self._resolve_connection(connection)
327+
df = self.get_runtime_json_str(mode="R").to_frame()
328+
329+
exif_udf = blob_func.TransformFunction(
330+
blob_func.exif_func_def,
331+
session=self._block.session,
332+
connection=connection,
333+
max_batching_rows=max_batching_rows,
334+
container_cpu=container_cpu,
335+
container_memory=container_memory,
336+
).udf()
337+
338+
res = self._df_apply_udf(df, exif_udf)
339+
res = bbq.parse_json(res)
340+
341+
return res
342+
303343
def image_blur(
304344
self,
305345
ksize: tuple[int, int],

notebooks/experimental/multimodal_dataframe.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
},
107107
"source": [
108108
"### 1. Create Multimodal DataFrame\n",
109-
"There are several ways to create Multimodal DataFrame. The easiest way is from the wiledcard paths."
109+
"There are several ways to create Multimodal DataFrame. The easiest way is from the wildcard paths."
110110
]
111111
},
112112
{

tests/system/large/blob/test_function.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,30 @@ def images_output_uris(images_output_folder: str) -> list[str]:
5151
]
5252

5353

54+
def test_blob_exif(
55+
bq_connection: str,
56+
test_session: bigframes.Session,
57+
):
58+
exif_image_df = test_session.from_glob_path(
59+
"gs://bigframes_blob_test/images_exif/*",
60+
name="blob_col",
61+
connection=bq_connection,
62+
)
63+
64+
actual = exif_image_df["blob_col"].blob.exif(connection=bq_connection)
65+
expected = bpd.Series(
66+
['{"ExifOffset": 47, "Make": "MyCamera"}'],
67+
session=test_session,
68+
dtype=dtypes.JSON_DTYPE,
69+
)
70+
pd.testing.assert_series_equal(
71+
actual.to_pandas(),
72+
expected.to_pandas(),
73+
check_dtype=False,
74+
check_index_type=False,
75+
)
76+
77+
5478
def test_blob_image_blur_to_series(
5579
images_mm_df: bpd.DataFrame,
5680
bq_connection: str,

0 commit comments

Comments
 (0)