Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion peepdf/JSAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,18 @@ def isJavascript(content):
results = 0
length = len(content)
smallScriptLength = 100
badChars = 0

if re.findall(reJSscript, content, re.DOTALL | re.IGNORECASE):
return True

for char in content:
if (ord(char) < 32 and char not in ['\n', '\r', '\t', '\f', '\x00']) or ord(char) >= 127:
return False
badChars += 1

# More than 20% of the content are bad chars
if badChars >= len(content) * 0.2:
return False

for string in jsStrings:
cont = content.count(string)
Expand Down
Binary file added tests/files/js_in_pdf.js
Binary file not shown.
20 changes: 20 additions & 0 deletions tests/test_pee.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@
import peepdf.main
import pytest

def test_js_detect():
p = peepdf.PDFCore.PDFParser()
r, f = p.parse(
"tests/files/js_in_pdf.js", forceMode=True,
looseMode=True, manualAnalysis=False
)
assert not r

for version in xrange(f.updates + 1):
for obj in f.body[version].objects.values():
if isinstance(obj, peepdf.PDFCore.PDFIndirectObject):
o = obj.getObject()
if isinstance(o, peepdf.PDFCore.PDFStream):
stream = o.decodedStream
isJS = peepdf.JSAnalysis.isJavascript(stream)
if "function docOpened()" in stream:
assert isJS
else:
assert not isJS
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the other stream without function docOpened() in it?


def test_whitespace_after_opening():
p = peepdf.PDFCore.PDFParser()
r, f = p.parse(
Expand Down