Skip to content

Commit

Permalink
Exif Nested Metadata support for Image Dashboards (#2542)
Browse files Browse the repository at this point in the history
* exif metadata extension

* requirement update for latest pillow attributes

* gate update to avoid py 3.7

* python lint fixes

* isort lint fixes

* test fixes

* python lint fixes

* modified gate for matplotlib on windows

* lint fixes

* gate fix

* gate fix

* reverted gate

* test fix based on OS

* isort fix

* auto lintfix

* adding matplotlib install

* tweaking matplotlib install

* gate cleanup

* removed byte decoding

* multiple click support

* IC test fixes

* test fixes

* test fixes

* test fixes

* test fix

* test fix

* test changes

* auto lint fixes

* comment fixes

* comment fixes

* auto lint fixes
  • Loading branch information
Advitya17 authored Mar 5, 2024
1 parent 7292ba3 commit e2c7dd0
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export const FridgeImageClassificationModelDebugging = {
modelOverviewData: {
featureCohortView: {
firstFeatureToSelect: "mean_pixel_value",
multiFeatureCohorts: 3,
multiFeatureCohorts: 6,
secondFeatureToSelect: "Make",
singleFeatureCohorts: 3
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export const FridgeMultilabelModelDebugging = {
modelOverviewData: {
featureCohortView: {
firstFeatureToSelect: "mean_pixel_value",
multiFeatureCohorts: 3,
multiFeatureCohorts: 6,
secondFeatureToSelect: "Make",
singleFeatureCohorts: 3
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import { getOS } from "../../../../util/getOS";

const FeatureCohorts = getOS() === "Linux" ? [2, 3] : 3;
const FeatureCohorts = getOS() === "Linux" ? [3, 6] : 6;

export const FridgeObjectDetectionModelDebugging = {
causalAnalysisData: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,28 +40,30 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr
assertNumberOfChartRowsEqual(
datasetShape,
selectedFeatures,
defaultVisibleChart
defaultVisibleChart,
isVision
);
}
}

function assertNumberOfChartRowsEqual(
datasetShape: IModelAssessmentData,
selectedFeatures: number,
chartIdentifier: Locators
chartIdentifier: Locators,
isVision: boolean
): void {
const featureCohortView = datasetShape.modelOverviewData?.featureCohortView;
let expectedNumberOfCohorts = featureCohortView?.singleFeatureCohorts;
if (selectedFeatures > 1) {
expectedNumberOfCohorts = featureCohortView?.multiFeatureCohorts;
}
console.log(selectedFeatures);
console.log(expectedNumberOfCohorts);
if (Array.isArray(expectedNumberOfCohorts)) {
cy.get(getChartItems(chartIdentifier))
.its("length")
.should("be.gte", expectedNumberOfCohorts[0])
.and("be.lte", expectedNumberOfCohorts[1]);
} else if (isVision) {
cy.get(getChartItems(chartIdentifier)).its("length").should("be.gt", 2);
} else {
cy.get(getChartItems(chartIdentifier)).should(
"have.length",
Expand Down
2 changes: 1 addition & 1 deletion libs/e2e/src/util/comboBox.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ export function multiSelectComboBox(
item
)}`
)
.click();
.click({ multiple: true });
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
from typing import Optional

import pandas as pd
from PIL import Image
from PIL import ExifTags, Image
from PIL.ExifTags import TAGS
from PIL.TiffImagePlugin import IFDRational
from tqdm import tqdm

from responsibleai.feature_metadata import FeatureMetadata
from responsibleai_vision.common.constants import (ExtractedFeatures,
ImageColumns)
from responsibleai_vision.utils.image_reader import (
get_all_exif_feature_names, get_image_from_path,
IFD_CODE_LOOKUP, get_all_exif_feature_names, get_image_from_path,
get_image_pointer_from_path)

MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
Expand Down Expand Up @@ -91,35 +92,48 @@ def extract_features(image_dataset: pd.DataFrame,
return results, feature_names


def append_exif_features(image, row_feature_values, feature_names,
blacklisted_tags, feature_metadata):
def process_data(data,
tag,
feature_names,
feature_metadata,
row_feature_values,
blacklisted_tags):
if isinstance(data, IFDRational):
data = data.numerator / data.denominator
if isinstance(data, (str, int, float)):
if tag in feature_names:
if tag not in feature_metadata.categorical_features:
feature_metadata.categorical_features.append(tag)
row_feature_values[feature_names.index(tag)] = data
elif tag not in blacklisted_tags:
blacklisted_tags.add(tag)
warnings.warn(
f'Exif tag {tag} could not be found '
'in the feature names. Ignoring tag '
'from extracted metadata.')


def append_exif_features(image,
row_feature_values,
feature_names,
blacklisted_tags,
feature_metadata):
if isinstance(image, str):
image_pointer_path = get_image_pointer_from_path(image)
with Image.open(image_pointer_path) as im:
exifdata = im.getexif()
for tag_id in exifdata:
# get the tag name, instead of human unreadable tag id
tag = str(TAGS.get(tag_id, tag_id))
data = exifdata.get(tag_id)
# decode bytes
if isinstance(data, bytes):
data = data.decode()
if len(data) > MAX_CUSTOM_LEN:
data = data[:MAX_CUSTOM_LEN] + '...'
if isinstance(data, str):
if tag in feature_names:
if tag not in feature_metadata.categorical_features:
feature_metadata.categorical_features.append(tag)
tag_index = feature_names.index(tag)
row_feature_values[tag_index] = data
else:
# in theory this should now never happen with
# latest code, but adding this check for safety
if tag not in blacklisted_tags:
blacklisted_tags.add(tag)
warnings.warn(
f'Exif tag {tag} could not be found '
'in the feature names. Ignoring tag '
'from extracted metadata.')
elif isinstance(data, int) or isinstance(data, float):
row_feature_values[feature_names.index(tag)] = data
if tag_id in IFD_CODE_LOOKUP:
ifd_data = exifdata.get_ifd(tag_id)
for nested_tag_id, data in ifd_data.items():
tag = ExifTags.GPSTAGS.get(nested_tag_id, None) \
or ExifTags.TAGS.get(nested_tag_id, None) \
or nested_tag_id
process_data(data, tag, feature_names,
feature_metadata, row_feature_values,
blacklisted_tags)
else:
tag = str(TAGS.get(tag_id, tag_id))
data = exifdata.get(tag_id)
process_data(data, tag, feature_names, feature_metadata,
row_feature_values, blacklisted_tags)
27 changes: 18 additions & 9 deletions responsibleai_vision/responsibleai_vision/utils/image_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import requests
from numpy import asarray
from PIL import Image
from PIL import ExifTags, Image
from PIL.ExifTags import TAGS
from requests.adapters import HTTPAdapter, Retry

Expand All @@ -20,6 +20,8 @@
# domain mapped session for reuse
_requests_sessions = {}

IFD_CODE_LOOKUP = {t.value: t.name for t in ExifTags.IFD}


def _get_retry_session(url):
domain = urlparse(url.lower()).netloc
Expand Down Expand Up @@ -88,15 +90,22 @@ def get_all_exif_feature_names(image_dataset):
with Image.open(image_pointer_path) as im:
exifdata = im.getexif()
for tag_id in exifdata:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
if tag not in image_dataset.columns:
data = exifdata.get(tag_id)
if isinstance(data, str) or \
isinstance(data, int) or \
isinstance(data, float) or \
isinstance(data, bytes):
# nesting for IFD block tags
if tag_id in IFD_CODE_LOOKUP:
ifd_data = exifdata.get_ifd(tag_id)

for nested_tag_id in ifd_data:
nested_tag = ExifTags.GPSTAGS.get(nested_tag_id,
None) \
or ExifTags.TAGS.get(nested_tag_id, None) \
or nested_tag_id
exif_feature_names.add(nested_tag)
else:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
if tag not in image_dataset.columns:
exif_feature_names.add(tag)

return list(exif_feature_names)


Expand Down
20 changes: 16 additions & 4 deletions responsibleai_vision/tests/test_feature_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,22 @@

MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
FRIDGE_METADATA_FEATURES = [
'Make', 'ResolutionUnit', 'ImageLength', 'ExifOffset', 'Model',
'GPSInfo', 'ImageWidth', 'DateTime', 'YCbCrPositioning',
'Software', 'Orientation'
]
'SensingMethod', 'GPSVersionID', 'ISOSpeedRatings', 'SceneType',
'SceneCaptureType', 'SubjectDistance', 'CustomRendered',
'SubjectDistanceRange', 'DigitalZoomRatio', 'ApertureValue',
'ImageWidth', 'GPSDOP', 'MaxApertureValue', 'ColorSpace',
'FocalLengthIn35mmFilm', 'ExposureMode', 'Saturation', 'ExposureTime',
'ExifImageHeight', 'FNumber', 'YCbCrPositioning', 'Make', 'MeteringMode',
'ExposureBiasValue', 'ExposureProgram', 'ComponentsConfiguration',
'ExifImageWidth', 'ExifInteroperabilityOffset', 'BrightnessValue',
'ImageLength', 'FlashPixVersion', 'SubsecTimeOriginal', 'Model',
'SubsecTimeDigitized', 'ResolutionUnit', 'DateTimeOriginal', 'XResolution',
'FocalLength', 'Sharpness', 'GPSLongitude', 'Contrast', 'Software',
'GPSLatitude', 'MakerNote', 'GPSDateStamp', 'GPSAltitude',
'GPSProcessingMethod', 'GPSTimeStamp', 'GPSLatitudeRef', 'WhiteBalance',
'GPSLongitudeRef', 'Flash', 'SubsecTime', 'YResolution',
'DateTimeDigitized', 'DateTime', 'GPSAltitudeRef', 'Orientation',
'ShutterSpeedValue', 'ExifVersion']


def validate_extracted_features(extracted_features, feature_names,
Expand Down
4 changes: 2 additions & 2 deletions responsibleai_vision/tests/test_image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def test_retry_sessions_retries_on_conn_failure(self, request_mock):
def test_get_all_exif_feature_names(self):
image_dataset = load_fridge_object_detection_dataset().head(2)
exif_feature_names = get_all_exif_feature_names(image_dataset)
assert len(exif_feature_names) == 10 if platform.system() == "Linux" \
else 11
num_features = 49 if platform.system() == "Linux" else 60
assert len(exif_feature_names) == num_features

def test_generate_od_error_labels(self):
true_y = np.array([[[3, 142, 257, 395, 463, 0]],
Expand Down

0 comments on commit e2c7dd0

Please sign in to comment.