diff --git a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts index 566b4f2159..27a199b385 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts @@ -23,7 +23,7 @@ export const FridgeImageClassificationModelDebugging = { modelOverviewData: { featureCohortView: { firstFeatureToSelect: "mean_pixel_value", - multiFeatureCohorts: 3, + multiFeatureCohorts: 6, secondFeatureToSelect: "Make", singleFeatureCohorts: 3 }, diff --git a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts index c68afa0cc3..79c853089c 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts @@ -23,7 +23,7 @@ export const FridgeMultilabelModelDebugging = { modelOverviewData: { featureCohortView: { firstFeatureToSelect: "mean_pixel_value", - multiFeatureCohorts: 3, + multiFeatureCohorts: 6, secondFeatureToSelect: "Make", singleFeatureCohorts: 3 }, diff --git a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts index 0bd580b2ac..41c8c6537f 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts @@ -3,7 +3,7 @@ import { getOS } from "../../../../util/getOS"; -const FeatureCohorts = getOS() === "Linux" ? [2, 3] : 3; +const FeatureCohorts = getOS() === "Linux" ? [3, 6] : 6; export const FridgeObjectDetectionModelDebugging = { causalAnalysisData: { diff --git a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts index df5651cefc..e5ca0843e7 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts @@ -40,7 +40,8 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr assertNumberOfChartRowsEqual( datasetShape, selectedFeatures, - defaultVisibleChart + defaultVisibleChart, + isVision ); } } @@ -48,20 +49,21 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr function assertNumberOfChartRowsEqual( datasetShape: IModelAssessmentData, selectedFeatures: number, - chartIdentifier: Locators + chartIdentifier: Locators, + isVision: boolean ): void { const featureCohortView = datasetShape.modelOverviewData?.featureCohortView; let expectedNumberOfCohorts = featureCohortView?.singleFeatureCohorts; if (selectedFeatures > 1) { expectedNumberOfCohorts = featureCohortView?.multiFeatureCohorts; } - console.log(selectedFeatures); - console.log(expectedNumberOfCohorts); if (Array.isArray(expectedNumberOfCohorts)) { cy.get(getChartItems(chartIdentifier)) .its("length") .should("be.gte", expectedNumberOfCohorts[0]) .and("be.lte", expectedNumberOfCohorts[1]); + } else if (isVision) { + cy.get(getChartItems(chartIdentifier)).its("length").should("be.gt", 2); } else { cy.get(getChartItems(chartIdentifier)).should( "have.length", diff --git a/libs/e2e/src/util/comboBox.ts b/libs/e2e/src/util/comboBox.ts index 863af87c0e..3c35147a04 100644 --- a/libs/e2e/src/util/comboBox.ts +++ b/libs/e2e/src/util/comboBox.ts @@ -38,5 +38,5 @@ export function multiSelectComboBox( item )}` ) - .click(); + .click({ multiple: true }); } diff --git a/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py b/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py index 3080ecbc96..13c7876b1e 100644 --- a/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py +++ b/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py @@ -7,15 +7,16 @@ from typing import Optional import pandas as pd -from PIL import Image +from PIL import ExifTags, Image from PIL.ExifTags import TAGS +from PIL.TiffImagePlugin import IFDRational from tqdm import tqdm from responsibleai.feature_metadata import FeatureMetadata from responsibleai_vision.common.constants import (ExtractedFeatures, ImageColumns) from responsibleai_vision.utils.image_reader import ( - get_all_exif_feature_names, get_image_from_path, + IFD_CODE_LOOKUP, get_all_exif_feature_names, get_image_from_path, get_image_pointer_from_path) MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value @@ -91,35 +92,48 @@ def extract_features(image_dataset: pd.DataFrame, return results, feature_names -def append_exif_features(image, row_feature_values, feature_names, - blacklisted_tags, feature_metadata): +def process_data(data, + tag, + feature_names, + feature_metadata, + row_feature_values, + blacklisted_tags): + if isinstance(data, IFDRational): + data = data.numerator / data.denominator + if isinstance(data, (str, int, float)): + if tag in feature_names: + if tag not in feature_metadata.categorical_features: + feature_metadata.categorical_features.append(tag) + row_feature_values[feature_names.index(tag)] = data + elif tag not in blacklisted_tags: + blacklisted_tags.add(tag) + warnings.warn( + f'Exif tag {tag} could not be found ' + 'in the feature names. Ignoring tag ' + 'from extracted metadata.') + + +def append_exif_features(image, + row_feature_values, + feature_names, + blacklisted_tags, + feature_metadata): if isinstance(image, str): image_pointer_path = get_image_pointer_from_path(image) with Image.open(image_pointer_path) as im: exifdata = im.getexif() for tag_id in exifdata: - # get the tag name, instead of human unreadable tag id - tag = str(TAGS.get(tag_id, tag_id)) - data = exifdata.get(tag_id) - # decode bytes - if isinstance(data, bytes): - data = data.decode() - if len(data) > MAX_CUSTOM_LEN: - data = data[:MAX_CUSTOM_LEN] + '...' - if isinstance(data, str): - if tag in feature_names: - if tag not in feature_metadata.categorical_features: - feature_metadata.categorical_features.append(tag) - tag_index = feature_names.index(tag) - row_feature_values[tag_index] = data - else: - # in theory this should now never happen with - # latest code, but adding this check for safety - if tag not in blacklisted_tags: - blacklisted_tags.add(tag) - warnings.warn( - f'Exif tag {tag} could not be found ' - 'in the feature names. Ignoring tag ' - 'from extracted metadata.') - elif isinstance(data, int) or isinstance(data, float): - row_feature_values[feature_names.index(tag)] = data + if tag_id in IFD_CODE_LOOKUP: + ifd_data = exifdata.get_ifd(tag_id) + for nested_tag_id, data in ifd_data.items(): + tag = ExifTags.GPSTAGS.get(nested_tag_id, None) \ + or ExifTags.TAGS.get(nested_tag_id, None) \ + or nested_tag_id + process_data(data, tag, feature_names, + feature_metadata, row_feature_values, + blacklisted_tags) + else: + tag = str(TAGS.get(tag_id, tag_id)) + data = exifdata.get(tag_id) + process_data(data, tag, feature_names, feature_metadata, + row_feature_values, blacklisted_tags) diff --git a/responsibleai_vision/responsibleai_vision/utils/image_reader.py b/responsibleai_vision/responsibleai_vision/utils/image_reader.py index c0f9499156..61337daf5d 100644 --- a/responsibleai_vision/responsibleai_vision/utils/image_reader.py +++ b/responsibleai_vision/responsibleai_vision/utils/image_reader.py @@ -10,7 +10,7 @@ import requests from numpy import asarray -from PIL import Image +from PIL import ExifTags, Image from PIL.ExifTags import TAGS from requests.adapters import HTTPAdapter, Retry @@ -20,6 +20,8 @@ # domain mapped session for reuse _requests_sessions = {} +IFD_CODE_LOOKUP = {t.value: t.name for t in ExifTags.IFD} + def _get_retry_session(url): domain = urlparse(url.lower()).netloc @@ -88,15 +90,22 @@ def get_all_exif_feature_names(image_dataset): with Image.open(image_pointer_path) as im: exifdata = im.getexif() for tag_id in exifdata: - # get the tag name, instead of human unreadable tag id - tag = TAGS.get(tag_id, tag_id) - if tag not in image_dataset.columns: - data = exifdata.get(tag_id) - if isinstance(data, str) or \ - isinstance(data, int) or \ - isinstance(data, float) or \ - isinstance(data, bytes): + # nesting for IFD block tags + if tag_id in IFD_CODE_LOOKUP: + ifd_data = exifdata.get_ifd(tag_id) + + for nested_tag_id in ifd_data: + nested_tag = ExifTags.GPSTAGS.get(nested_tag_id, + None) \ + or ExifTags.TAGS.get(nested_tag_id, None) \ + or nested_tag_id + exif_feature_names.add(nested_tag) + else: + # get the tag name, instead of human unreadable tag id + tag = TAGS.get(tag_id, tag_id) + if tag not in image_dataset.columns: exif_feature_names.add(tag) + return list(exif_feature_names) diff --git a/responsibleai_vision/tests/test_feature_extractors.py b/responsibleai_vision/tests/test_feature_extractors.py index 46849af024..78e57d20b7 100644 --- a/responsibleai_vision/tests/test_feature_extractors.py +++ b/responsibleai_vision/tests/test_feature_extractors.py @@ -13,10 +13,22 @@ MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value FRIDGE_METADATA_FEATURES = [ - 'Make', 'ResolutionUnit', 'ImageLength', 'ExifOffset', 'Model', - 'GPSInfo', 'ImageWidth', 'DateTime', 'YCbCrPositioning', - 'Software', 'Orientation' -] + 'SensingMethod', 'GPSVersionID', 'ISOSpeedRatings', 'SceneType', + 'SceneCaptureType', 'SubjectDistance', 'CustomRendered', + 'SubjectDistanceRange', 'DigitalZoomRatio', 'ApertureValue', + 'ImageWidth', 'GPSDOP', 'MaxApertureValue', 'ColorSpace', + 'FocalLengthIn35mmFilm', 'ExposureMode', 'Saturation', 'ExposureTime', + 'ExifImageHeight', 'FNumber', 'YCbCrPositioning', 'Make', 'MeteringMode', + 'ExposureBiasValue', 'ExposureProgram', 'ComponentsConfiguration', + 'ExifImageWidth', 'ExifInteroperabilityOffset', 'BrightnessValue', + 'ImageLength', 'FlashPixVersion', 'SubsecTimeOriginal', 'Model', + 'SubsecTimeDigitized', 'ResolutionUnit', 'DateTimeOriginal', 'XResolution', + 'FocalLength', 'Sharpness', 'GPSLongitude', 'Contrast', 'Software', + 'GPSLatitude', 'MakerNote', 'GPSDateStamp', 'GPSAltitude', + 'GPSProcessingMethod', 'GPSTimeStamp', 'GPSLatitudeRef', 'WhiteBalance', + 'GPSLongitudeRef', 'Flash', 'SubsecTime', 'YResolution', + 'DateTimeDigitized', 'DateTime', 'GPSAltitudeRef', 'Orientation', + 'ShutterSpeedValue', 'ExifVersion'] def validate_extracted_features(extracted_features, feature_names, diff --git a/responsibleai_vision/tests/test_image_utils.py b/responsibleai_vision/tests/test_image_utils.py index 7369a9a522..07f54da8a0 100644 --- a/responsibleai_vision/tests/test_image_utils.py +++ b/responsibleai_vision/tests/test_image_utils.py @@ -96,8 +96,8 @@ def test_retry_sessions_retries_on_conn_failure(self, request_mock): def test_get_all_exif_feature_names(self): image_dataset = load_fridge_object_detection_dataset().head(2) exif_feature_names = get_all_exif_feature_names(image_dataset) - assert len(exif_feature_names) == 10 if platform.system() == "Linux" \ - else 11 + num_features = 49 if platform.system() == "Linux" else 60 + assert len(exif_feature_names) == num_features def test_generate_od_error_labels(self): true_y = np.array([[[3, 142, 257, 395, 463, 0]],