@@ -15,8 +15,10 @@ def bytesToInt(bytes, alignmentIndicator, signed=False) :
1515 return i
1616
1717def bytesToASCIIString (bytes ) :
18- s = bytes .decode ()
19- return s
18+ # Remove trailing null used in IFD string elements
19+ if bytes [- 1 ] == 0x00 :
20+ bytes = bytes [0 :len (bytes )- 1 ]
21+ return bytes .decode ()
2022
2123# Extract first n bytes up to a 0 byte, expect this to be an ASCII string identifying the type of App Segment, e.g. "Exif"
2224def getAppSegmentIdentifier (segment ) :
@@ -83,7 +85,7 @@ def readEntropyCodedDataSegment(f) :
8385#
8486
8587#http://gvsoft.no-ip.org/exif/exif-explanation.html
86- def processExifSegment (dict , info , segment ) :
88+ def processExifSegment (info , segment ) :
8789
8890 # Expect first six bytes to be 'Exif\x00\x00'
8991 ExifIdentifierLength = 6
@@ -106,38 +108,79 @@ def processExifSegment(dict, info, segment) :
106108 firstIFDOffset = bytesToInt (TIFFHeader [4 :8 ], byteAlignmentIndicator )
107109 #print(firstIFDOffset)
108110
109- # Then a chained set of IFD blocks, which sometimes contain embedded pointers to further specialised IFD blocks,
110- # which we record and look at after processing the main chain of IFDs.
111- embeddedIFDOffsets = [] # List of (IFD type, offset) tuples
112111 nextIFDOffset = firstIFDOffset
113112 IFDCount = 0
113+
114+ # Dictionary to record each IFD, keyed an IFD name, storing the detailed IFD dictionary as the value
115+ dict = {}
116+
114117 while nextIFDOffset != 0 :
118+ IFDname = "IFD" + str (IFDCount )
119+ #print("Handling main chain IFD:", IFDname)
120+ IFDentries , nextIFDOffset = processIFD (TIFF , nextIFDOffset , byteAlignmentIndicator )
121+ dict [IFDname ] = IFDentries
115122 IFDCount += 1
116- print ("Handling main chain IFD:" , IFDCount )
117- entries , nextIFDOffset = processIFD (TIFF , nextIFDOffset , byteAlignmentIndicator )
118- # Look for IFD elements known to indicate an embedded IFD offset
119- for entry in entries :
120- if entry ['tag' ] in [34853 , 34665 ] :
121- embeddedIFDOffsets .append ( (entry ['tag' ], entry ['value' ]) )
122- dict [entry ['tag' ]] = entry
123- # ???? Can a tag be repeated across the set of IFDs ? If so what to do - put them in a list ?
124- # ???? NB Issue affecrs embeddedIFDOffsets method too, perhaps handle in there as well
125-
126- if len (embeddedIFDOffsets ) > 0 :
127- print ("Found embedded IFDs within this IFD:" , embeddedIFDOffsets )
128- for (id , os ) in embeddedIFDOffsets :
129- print ("Looking at embedded IFD:" , id )
130- # ???? Need to store results. Do we ever expect a non-zero next-in-chain value ?
131- processIFD (TIFF , os , byteAlignmentIndicator )
132123
124+ # Search for embedded IFD elements within the IFDs we've already identified. Assume only one embedded IFD
125+ # of each type. IFDs can be nested by more than one level, so keep going as long as we find a new IFDs
126+
127+ continueLooking = True
128+ while (continueLooking ) :
129+ newIFDinfo = []
130+ for knownIFDname , d in dict .items () :
131+ for embeddedIFDtag , embeddedIFDname in knownEmbeddedIFDs ().items () :
132+ # This will re-search all IFDs each time through the loop, not just ones we've added last time
133+ # around, so ignore embedded IFDs we've already picked up. (Assuming the only exist in one place.)
134+ if embeddedIFDtag in d and embeddedIFDname not in dict :
135+ IFDname = embeddedIFDname
136+ #print("Handling embedded IFD:", IFDname)
137+ embeddedIFDOffset = d [embeddedIFDtag ]['value' ]
138+ embeddedIFDentries , nextIFDOffset = processIFD (TIFF , embeddedIFDOffset , byteAlignmentIndicator )
139+ # Put info about embedded IFD onto a list, we can't put it directly in the main dictionary
140+ # while looping over the dictionary,
141+ newIFDinfo .append ( (embeddedIFDname , embeddedIFDentries ) )
142+ if nextIFDOffset != 0000 :
143+ print ("*** - unexpected next IFD offset in IFD" , embeddedIFDname )
144+ # Can now add the new IFD(s) to the main dictionary
145+ for additionalIFDname , IFDentries in newIFDinfo :
146+ dict [additionalIFDname ] = IFDentries
147+ continueLooking = len (newIFDinfo ) > 0
148+
149+ return dict
150+
151+ def knownEmbeddedIFDs () :
152+ return {
153+ 34665 : "Exif" ,
154+ 34853 : "GPS" ,
155+ 40965 : "Interoperability"
156+ }
157+
158+ # Add an IFD element (itself a dictionary) to an IFD-level dictionary. If the element's tag does not already exist in the
159+ # IFD dictionary just add the element. If it does, add the element to a list associated with the tag instead.
160+ def addToIFDDictionary (IFDdict , element ) :
161+ tag = element ['tag' ]
162+ if tag not in IFDdict :
163+ IFDdict [tag ] = element
164+ else :
165+ currentValue = IFDdict [tag ]
166+ #print("tag already in dict:", tag, type(currentValue))
167+ if type (currentValue ) is dict :
168+ # Convert to a list of elements
169+ IFDdict ['tag' ] = [currentValue , element ]
170+ #print("... converted to a list: ", IFDdict['tag'])
171+ elif type (currentValue ) is list :
172+ # Already multiple entries for this tag - append to it
173+ IFDdict ['tag' ].append (element )
174+ #print("... appended to existing list: ", IFDdict['tag'])
175+
133176# Each IFD (Image File Directory) consists of:
134177# - a two-byte int giving the number of directory elements
135178# - the 12-byte elements
136179# - a four-byte offset to the start of the next IFD in this chain, or 0000 if the end of the chain
137180def processIFD (TIFF , IFDOffset , byteAlignmentIndicator ) :
138181
139182 # List of dictionaries for output, one per IFD element
140- IFDEntries = []
183+ IFDEntries = {}
141184
142185 IFDBytes = TIFF [IFDOffset :]
143186 # 2 byte value indicating the number of elements
@@ -150,7 +193,7 @@ def processIFD(TIFF, IFDOffset, byteAlignmentIndicator) :
150193 for n in range (0 , elementCount ) :
151194 thisElementBytes = elementBytes [elementSize * n : elementSize * (n + 1 )]
152195 element = processIFDElement (n , thisElementBytes , TIFF , byteAlignmentIndicator )
153- IFDEntries . append ( element )
196+ addToIFDDictionary ( IFDEntries , element )
154197
155198 # The final four bytes are either an offset to the next IFD in the chain, or 0000 if no more IFDs in this chain
156199 nextOffsetBytesPosition = 2 + elementSize * elementCount
@@ -170,106 +213,101 @@ def processIFDElement(elementNo, element, TIFF, byteAlignmentIndicator) :
170213
171214 tag = bytesToInt (element [0 :2 ], byteAlignmentIndicator )
172215 dataFormat = bytesToInt (element [2 :4 ], byteAlignmentIndicator )
173- numComponents = bytesToInt (element [4 :8 ], byteAlignmentIndicator )
216+ componentCount = bytesToInt (element [4 :8 ], byteAlignmentIndicator )
174217 dataBytes = element [8 :12 ]
175218 dataBytesAsOffset = bytesToInt (dataBytes , byteAlignmentIndicator )
176219
177220 implemented = True
178221 dataValue = "-"
179222 # 1 = unsigned byte, 1 byte per component, not implemented
180223 if dataFormat == 1 :
181- if numComponents == 1 :
224+ if componentCount == 1 :
182225 dataValue = bytesToInt (dataBytes [0 :1 ], byteAlignmentIndicator )
183- elif numComponents <= 4 :
226+ elif componentCount <= 4 :
184227 dataValue = []
185- for i in range (0 , numComponents ) :
228+ for i in range (0 , componentCount ) :
186229 dataValue .append (bytesToInt (dataBytes [i :i + 1 ], byteAlignmentIndicator ))
187- elif numComponents > 4 :
230+ elif componentCount > 4 :
188231 dataValue = []
189- for i in range (0 , numComponents ) :
232+ for i in range (0 , componentCount ) :
190233 offset = dataBytesAsOffset + i
191234 dataValue .append (bytesToInt (TIFF [offset :offset + 1 ], byteAlignmentIndicator ))
192- print (".. IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , "(ubyte), num:" , numComponents , ", val:" , dataValue )
235+ # print(".. IFD item no:", elementNo, "tag:", tag, ", dataFormat:", dataFormat, "(ubyte), num:", componentCount , ", val:", dataValue)
193236 # 2 = ASCII string, 1 byte per character
194237 elif dataFormat == 2 :
195- if numComponents <= 4 :
196- dataValue = bytesToASCIIString (dataBytes [0 :numComponents ])
238+ if componentCount <= 4 :
239+ dataValue = bytesToASCIIString (dataBytes [0 :componentCount ])
197240 else :
198- dataValue = bytesToASCIIString (TIFF [dataBytesAsOffset :dataBytesAsOffset + numComponents ])
199- print (".. IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , "(String), num:" , numComponents , ", val:" , dataValue )
241+ dataValue = bytesToASCIIString (TIFF [dataBytesAsOffset :dataBytesAsOffset + componentCount ])
242+ # print(".. IFD item no:", elementNo, "tag:", tag, ", dataFormat:", dataFormat, "(String), num:", componentCount , ", val:", dataValue)
200243 # 3 = unsigned short, 2 bytes per component
201244 elif dataFormat == 3 :
202- if numComponents == 1 :
245+ if componentCount == 1 :
203246 dataValue = bytesToInt (dataBytes [0 :2 ], byteAlignmentIndicator )
204- elif numComponents == 2 :
247+ elif componentCount == 2 :
205248 dataValue = [ bytesToInt (dataBytes [0 :2 ], byteAlignmentIndicator ), bytesToInt (dataBytes [2 :4 ], byteAlignmentIndicator ) ]
206- elif numComponents > 2 :
249+ elif componentCount > 2 :
207250 dataValue = []
208- for i in range (0 , numComponents ) :
251+ for i in range (0 , componentCount ) :
209252 offset = dataBytesAsOffset + i * 2
210253 dataValue .append (bytesToInt (TIFF [offset :offset + 2 ], byteAlignmentIndicator ))
211- print (".. IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , "(ushort), num:" , numComponents , ", val:" , dataValue )
254+ # print(".. IFD item no:", elementNo, "tag:", tag, ", dataFormat:", dataFormat, "(ushort), num:", componentCount , ", val:", dataValue)
212255 # 4 = unsigned long, 4 bytes per component
213256 elif dataFormat in [4 , 9 ] :
214257 signed = dataFormat == 9
215258 desc = "(ulong)" if dataFormat == 4 else "(long)"
216- if numComponents == 1 :
259+ if componentCount == 1 :
217260 dataValue = bytesToInt (dataBytes [0 :4 ], byteAlignmentIndicator , signed )
218- elif numComponents > 1 :
261+ elif componentCount > 1 :
219262 dataValue = []
220- for i in range (0 , numComponents ) :
263+ for i in range (0 , componentCount ) :
221264 offset = dataBytesAsOffset + i * 4
222265 dataValue .append (bytesToInt (TIFF [offset :offset + 2 ], byteAlignmentIndicator , signed ))
223- print (".. IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , desc , ", num:" , numComponents , ", val:" , dataValue )
266+ # print(".. IFD item no:", elementNo, "tag:", tag, ", dataFormat:", dataFormat, desc, ", num:", componentCount , ", val:", dataValue)
224267 elif dataFormat in [5 , 10 ] :
225268 signed = dataFormat == 10
226269 desc = "(urational)" if dataFormat == 5 else "(rational)"
227270 values = []
228- for i in range (0 , numComponents ) :
271+ for i in range (0 , componentCount ) :
229272 offset = dataBytesAsOffset + i * 8
230273 numerator = bytesToInt (TIFF [offset :offset + 4 ], byteAlignmentIndicator , signed )
231274 denominator = bytesToInt (TIFF [offset + 4 :offset + 8 ], byteAlignmentIndicator , signed )
232275 values .append ( (numerator , denominator ) )
233- if numComponents == 1 :
276+ if componentCount == 1 :
234277 dataValue = values [0 ]
235278 else :
236279 dataValue = values
237- print (".. IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , desc , ", num:" , numComponents , ", val:" , dataValue )
238- # 7 = General purpose undefined. 1 byte per component
280+ # print(".. IFD item no:", elementNo, "tag:", tag, ", dataFormat:", dataFormat, desc, ", num:", componentCount , ", val:", dataValue)
281+ # 7 = General purpose ' undefined' type . 1 byte per component
239282 elif dataFormat == 7 :
240- if numComponents == 1 :
283+ if componentCount == 1 :
241284 dataValue = dataBytes [0 :1 ]
242- elif numComponents <= 4 :
285+ elif componentCount <= 4 :
243286 dataValue = []
244- for i in range (0 , numComponents ) :
287+ for i in range (0 , componentCount ) :
245288 dataValue .append (dataBytes [i :i + 1 ])
246- elif numComponents > 4 :
289+ elif componentCount > 4 :
247290 dataValue = []
248- for i in range (0 , numComponents ) :
291+ for i in range (0 , componentCount ) :
249292 offset = dataBytesAsOffset + i
250293 dataValue .append (TIFF [offset :offset + 1 ])
251- print (".. IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , "(undefined), num:" , numComponents , ", val:" , dataValue [0 :12 ])
294+ # print(".. IFD item no:", elementNo, "tag:", tag, ", dataFormat:", dataFormat, "(undefined), num:", componentCount , ", val:", dataValue[0:12])
252295 else :
253296 implemented = False
254297
255- # ???? Remove prints
256- # Fill in dictionary some more, allow for repeated values ?
257- # Caller to handle different IFDs vs repeated values in different IFDs ????
258- # NB Thumbnail vs full image ????
259-
260- # Structure to allow easy look by tag ?
261-
298+ # Put values for the element into a dictionary and return it
262299 entry = {}
263300 entry ['tag' ] = tag
301+ entry ['index' ] = elementNo
302+ entry ['format' ] = dataFormat
303+ entry ['count' ] = componentCount
264304 entry ['value' ] = dataValue
265305
266- if implemented :
267- pass
268- else :
269- print ("*** IFD data type not implemented: IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , ", num:" , numComponents , ", bytes:" , dataBytes )
270-
271- return entry
306+ if not implemented :
307+ entry ['unhandled' ] = True
308+ print ("*** IFD data type not implemented: IFD item no:" , elementNo , "tag:" , tag , ", dataFormat:" , dataFormat , ", num:" , componentCount , ", bytes:" , dataBytes )
272309
310+ return entry
273311
274312#
275313#############################################
@@ -419,7 +457,10 @@ def processFile(filename) :
419457 appName = info ['app' ]
420458 dict = {}
421459 if appName == "Exif" :
422- processExifSegment (dict , info , data )
460+ ExifDict = processExifSegment (info , data )
461+ print ("Extracted these IFDs from the Exif segment:" )
462+ for n , d in ExifDict .items () :
463+ print ("- " , n , ":" , len (d ), "item(s)" )
423464 elif appName == "JFIF" :
424465 processJFIFSegment (dict , info , data )
425466 elif appName == "ICC_PROFILE" :
0 commit comments