Skip to content

Commit a46f2c5

Browse files
committed
Handle the case where a stream-length is reported incorrectly in a dictionary.
1 parent 6974f2f commit a46f2c5

File tree

2 files changed

+69
-9
lines changed

2 files changed

+69
-9
lines changed

PdfSharpCore/Pdf.IO/Lexer.cs

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
using System.IO;
3535
using PdfSharpCore.Internal;
3636
using PdfSharpCore.Pdf.Internal;
37+
using System.Collections.Generic;
38+
using System.Linq;
3739

3840
namespace PdfSharpCore.Pdf.IO
3941
{
@@ -170,6 +172,19 @@ public Symbol ScanNextToken()
170172
/// Reads the raw content of a stream.
171173
/// </summary>
172174
public byte[] ReadStream(int length)
175+
{
176+
var pos = MoveToStartOfStream();
177+
_pdfSteam.Position = pos;
178+
byte[] bytes = new byte[length];
179+
int read = _pdfSteam.Read(bytes, 0, length);
180+
Debug.Assert(read == length);
181+
182+
// Synchronize idxChar etc.
183+
Position = pos + length;
184+
return bytes;
185+
}
186+
187+
internal long MoveToStartOfStream()
173188
{
174189
long pos;
175190

@@ -187,15 +202,45 @@ public byte[] ReadStream(int length)
187202
}
188203
else
189204
pos = _idxChar + 1;
205+
return pos;
206+
}
190207

191-
_pdfSteam.Position = pos;
192-
byte[] bytes = new byte[length];
193-
int read = _pdfSteam.Read(bytes, 0, length);
194-
Debug.Assert(read == length);
208+
/// <summary>
209+
/// Scans the input stream for the specified marker.<br></br>
210+
/// Returns the bytes from the current position up to the start of the marker or the end of the stream.<br></br>
211+
/// The position of the input-stream is the byte right after the marker (if found) or the end of the stream.
212+
/// </summary>
213+
/// <param name="marker">The marker to scan for</param>
214+
/// <param name="markerFound">Receives a boolean that indicates whether the marker was found</param>
215+
/// <returns></returns>
216+
internal byte[] ScanUntilMarker(byte[] marker, out bool markerFound)
217+
{
218+
markerFound = false;
219+
var result = new List<byte>();
220+
while (true)
221+
{
222+
var markerIndex = 0;
223+
while (_currChar != Chars.EOF && _currChar != marker[markerIndex])
224+
{
225+
result.Add((byte)_currChar);
226+
ScanNextChar(false);
227+
}
228+
while (_currChar != Chars.EOF && markerIndex < marker.Length && _currChar == marker[markerIndex])
229+
{
230+
markerIndex++;
231+
ScanNextChar(false);
232+
}
233+
if (_currChar == Chars.EOF || markerIndex == marker.Length)
234+
{
235+
if (markerIndex == marker.Length)
236+
markerFound = true;
237+
break;
238+
}
239+
// only part of the marker was found, add to result and continue
240+
result.AddRange(marker.Take(markerIndex));
241+
}
195242

196-
// Synchronize idxChar etc.
197-
Position = pos + length;
198-
return bytes;
243+
return result.ToArray();
199244
}
200245

201246
/// <summary>
@@ -722,7 +767,6 @@ public char MoveToNonWhiteSpace()
722767
}
723768
return _currChar;
724769
}
725-
726770
// #if DEBUG
727771
// public string SurroundingsOfCurrentPosition(bool hex)
728772
// {

PdfSharpCore/Pdf.IO/Parser.cs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
using PdfSharpCore.Exceptions;
3636
using PdfSharpCore.Internal;
3737
using PdfSharpCore.Pdf.Advanced;
38+
using PdfSharpCore.Pdf.Internal;
3839
using PdfSharpCore.Pdf.IO.enums;
3940

4041
namespace PdfSharpCore.Pdf.IO
@@ -272,6 +273,7 @@ public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool incl
272273
#if true_
273274
ReadStream(dict);
274275
#else
276+
var startOfStream = _lexer.Position;
275277
int length = GetStreamLength(dict);
276278
byte[] bytes = _lexer.ReadStream(length);
277279
#if true_
@@ -301,7 +303,21 @@ public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool incl
301303
#endif
302304
PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
303305
dict.Stream = stream;
304-
ReadSymbol(Symbol.EndStream);
306+
try
307+
{
308+
ReadSymbol(Symbol.EndStream);
309+
}
310+
catch (PdfReaderException)
311+
{
312+
// stream length may be incorrect, scan byte by byte up to the "endstream" keyword
313+
_lexer.Position = startOfStream;
314+
_lexer.Position = _lexer.MoveToStartOfStream();
315+
bytes = _lexer.ScanUntilMarker(PdfEncoders.RawEncoding.GetBytes("\nendstream"), out var markerFound);
316+
if (!markerFound)
317+
throw;
318+
stream = new PdfDictionary.PdfStream(bytes, dict);
319+
dict.Stream = stream;
320+
}
305321
symbol = ScanNextToken();
306322
#endif
307323
if (symbol == Symbol.Eof)

0 commit comments

Comments
 (0)