Skip to content

Commit 90880e1

Browse files
dameikletballison
authored andcommitted
TIKA-2630: Wrong height and width metadata for JPEG images (#255)
* TIKA-2630: - Added extraction of image height/width from ExifSubIFDDirectory for compressed images - Include directory name as key qualifier for Exif directories to avoid clashes * TIKA-2630: Tidied up code
1 parent 51aa831 commit 90880e1

File tree

4 files changed

+29
-10
lines changed

4 files changed

+29
-10
lines changed

tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,11 @@ public void handle(Directory directory, Metadata metadata)
260260
throws MetadataException {
261261
if (directory.getTags() != null) {
262262
for (Tag tag : directory.getTags()) {
263-
metadata.set(tag.getTagName(), tag.getDescription());
263+
if (directory instanceof ExifDirectoryBase) {
264+
metadata.set(directory.getName() + ":" + tag.getTagName(), tag.getDescription());
265+
} else {
266+
metadata.set(tag.getTagName(), tag.getDescription());
267+
}
264268
}
265269
}
266270
}
@@ -288,7 +292,11 @@ public void handle(Directory directory, Metadata metadata)
288292
} else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
289293
value = Boolean.FALSE.toString();
290294
}
291-
metadata.set(name, value);
295+
if (directory instanceof ExifDirectoryBase) {
296+
metadata.set(directory.getName() + ":" + name, value);
297+
} else {
298+
metadata.set(name, value);
299+
}
292300
}
293301
}
294302
}
@@ -493,6 +501,17 @@ public void handlePhotoTags(Directory directory, Metadata metadata) {
493501
metadata.set(Metadata.IMAGE_LENGTH,
494502
trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)));
495503
}
504+
505+
// For Compressed Images read from ExifSubIFDDirectory
506+
if (directory.containsTag(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)) {
507+
metadata.set(Metadata.IMAGE_WIDTH,
508+
trimPixels(directory.getDescription(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)));
509+
}
510+
if (directory.containsTag(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)) {
511+
metadata.set(Metadata.IMAGE_LENGTH,
512+
trimPixels(directory.getDescription(ExifSubIFDDirectory.TAG_EXIF_IMAGE_HEIGHT)));
513+
}
514+
496515
}
497516

498517
/**

tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ public void testJPEG() throws Exception {
6565
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
6666

6767
// Core EXIF/TIFF tags
68-
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
69-
assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
68+
assertEquals("3888", metadata.get(Metadata.IMAGE_WIDTH));
69+
assertEquals("2592", metadata.get(Metadata.IMAGE_LENGTH));
7070
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
7171
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
7272

@@ -86,7 +86,7 @@ public void testJPEG() throws Exception {
8686
// Check that EXIF/TIFF tags come through with their raw values too
8787
// (This may be removed for Tika 1.0, as we support more of them
8888
// with explicit Metadata entries)
89-
assertEquals("Canon EOS 40D", metadata.get("Model"));
89+
assertEquals("Canon EOS 40D", metadata.get("Exif IFD0:Model"));
9090

9191
// Common tags
9292
assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
@@ -120,8 +120,8 @@ public void testJPEGGeo() throws Exception {
120120
assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
121121

122122
// Core EXIF/TIFF tags
123-
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
124-
assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
123+
assertEquals("3888", metadata.get(Metadata.IMAGE_WIDTH));
124+
assertEquals("2592", metadata.get(Metadata.IMAGE_LENGTH));
125125
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
126126
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
127127

tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ public void getNormalMetadataToo() throws Exception {
255255
m = getXML("testTIFF.tif").metadata;
256256
assertEquals("100", m.get(Metadata.IMAGE_WIDTH));
257257
assertEquals("75", m.get(Metadata.IMAGE_LENGTH));
258-
assertEquals("72 dots per inch", m.get("Y Resolution"));
258+
assertEquals("72 dots per inch", m.get("Exif IFD0:Y Resolution"));
259259
}
260260

261261
//TODO: add unit tests for jp2/jpx/ppm TIKA-2174

tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,8 @@ public void testRegularImages() throws Exception {
468468
assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
469469
assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
470470

471-
assertEquals(51, meta_jpg.names().length);
472-
assertEquals(115, meta_jpg_exif.names().length);
471+
assertEquals(50, meta_jpg.names().length);
472+
assertEquals(116, meta_jpg_exif.names().length);
473473
}
474474

475475
@Test

0 commit comments

Comments
 (0)