Skip to content

feat: add keywords filter #28

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,6 @@ ENV/
.ropeproject

# mkdocs documentation
/site
/site

.idea
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,7 @@ $ python handler.py

# Changelog

1.0 - Release
```
1.0 - Release
1.1 - Add keywords search
```
14 changes: 7 additions & 7 deletions css/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
padding: 10px;
}

#results { font-family: Verdana; font-size: 14px; }
#results { font-family: Verdana; font-size: 14px; }
#results h1 {
font-size: 150%;
margin: 0;
padding: 5px;
}
#results h2 { font-size: 125%; color: #666; }
}
#results h2 { font-size: 125%; color: #666; }
#results .file {
padding: 5px;
}
#results .link {
padding: 3px;
#results .link {
padding: 3px;
border: 1px solid #555;
background: #eee none repeat scroll 0 0;
}
}
#results .highlight { color: #000; background: yellow; }

#results .result {
Expand All @@ -35,7 +35,7 @@
background-color: #000 !important;
border-color: #000 !important;
}
.navbar-inverse .navbar-collapse,
.navbar-inverse .navbar-collapse,
.navbar-inverse .navbar-form {
background-color: #000;
}
Expand Down
86 changes: 59 additions & 27 deletions handler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import print_function
import tornado.ioloop, tornado.web, tornado.autoreload
from tornado.escape import json_encode, json_decode
import json

import safeurl, types, sys, re, mimetypes, glob, jsbeautifier, urlparse, pycurl
import calendar, time, datetime
Expand All @@ -15,7 +16,7 @@
#------------------------------------------------------------

class BaseHandler(tornado.web.RequestHandler):

def get_current_user(self):
return []

Expand All @@ -39,7 +40,7 @@ def get_current_user(self):
class MainHandler(BaseHandler):
def initialize(self):
return

def get(self):
self.render(
'templates/index.html',
Expand All @@ -52,13 +53,13 @@ def get(self):
class ViewAboutHandler(BaseHandler):
def initialize(self):
return

def get(self):
self.render(
'templates/about.html',
)


#------------------------------------------------------------
# /parse/ajax
#------------------------------------------------------------
Expand All @@ -77,21 +78,21 @@ def find_str(self, s, char):
return index
index += 1
return -1

def findEntireLine(self, contents, str):
lineNum = 0
for item in contents.split("\n"):
if str in item:
linkPos = self.find_str(item, str)
return item,lineNum,linkPos
lineNum = lineNum+1

def parseForLinks(self, contents):
discoveredLinks = []
outputLinks = []
# ugh lol
regex = r"[^/][`'\"]([\/][a-zA-Z0-9_.-]+)+(?!([gimuy]*[,;\s])|\/\2)"
links = re.finditer(regex, contents)
links = re.finditer(regex, contents)
for link in links:
linkStr = link.group(0)
# discoveredLinks list to avoid dupes and complex dupe checks
Expand All @@ -107,6 +108,33 @@ def parseForLinks(self, contents):
})
return outputLinks

def parseForKeywords(self, contents, keywords=[]):
if len(keywords) == 0:
return []

discoveredLinks = []
outputLinks = []
# ugh yeah

for keyword in keywords:
regex = r".*"+re.escape(keyword)+".*"
links = re.finditer(regex, contents)
for link in links:
linkStr = link.group(0)
# discoveredLinks list to avoid dupes and complex dupe checks
if linkStr not in discoveredLinks:
# get the entire line, line number, and link position
entireLine,lineNum,linkPos = self.findEntireLine(contents, linkStr)
discoveredLinks.append(linkStr)
# print(entireLine)
outputLinks.append({
"line": entireLine,
"link": linkStr,
"lineNum": lineNum,
"linkPos": linkPos
})
return outputLinks

def getFormattedTimestamp(self):
d = datetime.datetime.now()
formatted = "{}_{}_{}_{}-{}".format(d.month, d.day, d.year, d.hour, d.minute)
Expand All @@ -115,33 +143,31 @@ def getFormattedTimestamp(self):
def formatHTMLOutput(self, html):
output = output + html
return output

def beautifyJS(self, content):
return jsbeautifier.beautify(content)

def isLongLine(self, line):
if len(line)>1000:
return True
return False

def fileRoutine(self, url, content):
return len(line)>1000

def fileRoutine(self, url, content, keywords):
html = ""

# beautify the JS for cleaner parsing
# note: this can be slow against large JS files and can lead to failure
prettyContent = self.beautifyJS(content)

# parse all the links out
parsedLinks = self.parseForLinks(prettyContent)
parsedLinks = self.parseForLinks(prettyContent) + self.parseForKeywords(prettyContent, keywords)

# if we have results, start building HTML
if parsedLinks:
print("Discovered {} links in {}".format(len(parsedLinks), url))
# generate HTML output
# html = html+'<h1>{}</h1><div class="file">'.format(url)
html = html+'<div class="file">'
for link in parsedLinks:
html = html+"<h2>{}</h2>".format(link["link"][1:])
html = html+"<h2>{}</h2>".format(link["link"][0:].replace("<", "&lt;"))
# Get positions for highlighting
startPos = link["linkPos"]
endPos = link["linkPos"]+len(link["link"])
Expand Down Expand Up @@ -179,38 +205,44 @@ def fetchURL(self, url, headers=[]):
res = sc.execute(url)
return res

def parseLinks(self, url, headers=[]):
def parseLinks(self, url, headers=[], keywords=[]):
html = ""
file = self.fetchURL(url, headers)
html = html + self.fileRoutine(url, file)
html = html + self.fileRoutine(url, file, keywords)
return html

def post(self):

error = False
errorMsg = ""

url = self.get_argument("url")
headers = self.get_argument("headers", [])
keywords = self.get_argument("keywords", False)
if (keywords == False):
keywords = []
else:
keywords = json.loads(keywords)


if error == False:
data = self.parseLinks(url, headers)

data = self.parseLinks(url, headers, keywords)

# set content-type
self.set_header('Content-Type', 'application/json')

# output
self.write(json_encode({
"url": url,
"output": data,
}))

else:

self.write("error")


#------------------------------------------------------------
# Main
#------------------------------------------------------------
Expand Down
19 changes: 16 additions & 3 deletions js/parsejs.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ class Result {
this.result = result;
this.output = output;
}
}
}

function toggleHeaders() {
$("#headers").toggle();
}

function toggleKeywords() {
$("#keywords").toggle();
}

function getCustomHeaders() {
var headers = $('#customHeaders').val().split("\n");
var customHeaders = [];
Expand All @@ -27,10 +31,19 @@ function getCustomHeaders() {
return null;
}

function removeDuplicateAndEmpty(vals){
var uniqueVals = [];
$.each(vals, function(i, el){
if(el !== '' && $.inArray(el, uniqueVals) === -1) uniqueVals.push(el);
});
return uniqueVals;
}

function parseJS(url) {
$.post("/parse/ajax", {
url: url,
headers: getCustomHeaders(),
keywords: JSON.stringify(removeDuplicateAndEmpty($('#customKeywords').val().split("\n")))
}, function(data) {
var succeeded = false;
// success if we have output
Expand Down Expand Up @@ -142,7 +155,7 @@ function hideResults() {

$(function() {
rebuildResults();
$('#hideResults').change(function() {
$('#hideResults').change(function() {
toggleResults();
});
});
});
33 changes: 30 additions & 3 deletions templates/index.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{% module Template("header.html") %}

<h1>Parse URLs</h1>

<form method="post" name="parser">
<div class="form-group">
<label for="urls">URLs</label>
Expand All @@ -11,6 +11,32 @@ <h1>Parse URLs</h1>
<label for="urls">Headers</label>
<textarea class="form-control" id="customHeaders" name="headers" rows="3"></textarea>
</div>
<div class="form-group" id="keywords">
<label for="urls">Keywords</label>
<textarea class="form-control" id="customKeywords" name="keywords" rows="3">
document.URL
document.documentURI
document.baseURI
location.href
location.search
location.hash
location.pathname
location
document.cookie
document.referrer
window.name
history.pushState(
history.replaceState(
localStorage
sessionStorage
.onEventName
document.write
document.writeln
.innerHTML
window.location
document.location
</textarea>
</div>
<div class="form-group">
<div class="checkbox-inline">
<label>
Expand All @@ -21,15 +47,16 @@ <h1>Parse URLs</h1>
<div class="form-group">
<button type="button" class="btn btn-primary" onclick="parseURLs();">JSParse</button>
<button type="button" class="btn btn-primary" onclick="toggleHeaders();">Custom Headers</button>
<button type="button" class="btn btn-primary" onclick="toggleKeywords();">Extra Keywords</button>
<a href="https://github.com/nahamsec/JSParser/issues/new" target="_new" class="btn btn-white">Report an issue</a>
</div>
</form>

<div class="progress">
<div class="progress-bar progress-bar-success progress-bar-striped" id="progress" role="progressbar" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>

<div id="results">
</div>

{% module Template("footer.html") %}