forked from HACKE-RC/webdork
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebdork.py
207 lines (174 loc) · 6.84 KB
/
webdork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env python3
# Author : HACKE-RC commonly known as RC;
# Description : WebDork by RC - A python tool to automate some google dorking stuff to find information disclosures.
# Developer contact: @coder_rc on twitter. You can request new feature by tagging me on any of your tweet.
# Proxy integration is by me (ka1hatsu)
#Importing modules
from os import access
import webbrowser
from sys import exit
import argparse
import time
from urllib.parse import unquote
from random import randint
import requests
from bs4 import BeautifulSoup
dorkscontainer = []
all_links = []
class Scanner:
@staticmethod
def banner() -> None:
print(r"""
\ \ / /__| |__ | _ \ ___ _ __| | __
\ \ /\ / / _ \ '_ \| | | |/ _ \| '__| |/ /
\ V V / __/ |_) | |_| | (_) | | | <
\_/\_/ \___|_.__/|____/ \___/|_| |_|\_\
""")
print("\t\t\t\t\tv1.0")
print("------------ WebDork by RC ------------")
print("----- github.com/HACKE-RC/webdork -----")
@staticmethod
def createdork(prefix : str, site : str, args : argparse.Namespace) -> None:
fulldork = prefix + site + f'%20\"{args.company_name}\"'
dorkscontainer.append(fulldork)
if args.verbose:
print(f"[v] Added the dork {unquote(fulldork)} to dorks list. [v]")
del fulldork
return
@staticmethod
def browseropen(args : argparse.Namespace, dork : str) -> None:
dork = f"https://www.google.com/search?q={dork}"
if args.verbose:
print(f"[v] Opening the dork {dork} results in browser. [v]")
webbrowser.open(dork)
time.sleep(4)
return
@staticmethod
def getuseragent() -> int:
"""
Returns a random useragent
"""
all_agents = ("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
"https://developers.whatismybrowser.com/useragents/parse/16254-googlebot")
return all_agents[randint(0, len(all_agents)-1)]
@staticmethod
def writeresults(list_name, filename : str):
"""
Writes a file from a list, list or tuple
"""
file = open(filename, "a")
for item in list_name:
if not item==":":
file.write(item+"\n")
return None
@staticmethod
def filtergoogle(url_list) -> list:
"""
Takes a list and removes the garbage urls that are added in google results page.
"""
actual_results = []
for url in url_list:
if ".google." in url:
pass
elif "://webcache." in url:
if ":" == url:
pass
else:
actual_results.append(url[url.index(":http")+1:] )
else:
if ":" == url:
pass
else:
actual_results.append(url)
return list(actual_results)
@staticmethod
def showresults(iterable_name) -> None:
"""
Takes a iterable and prints all the items from it.
"""
for item in iterable_name:
if not item==":":
print(item)
return
parser = argparse.ArgumentParser(description='A python tool to automatically dork on a given company\'s name.')
parser.add_argument('-cn', "--company-name", type=str,
metavar="Company name", help='Name of the company',
required=True)
parser.add_argument("-bw", '--browser', help="Search the dorks in browser.", action="store_true")
parser.add_argument("--show", help="Print results from the dorks.", action="store_true")
parser.add_argument("-o", metavar="Output", type=str, help="Output filename(default is dorkresults.txt).",
default="dorkresults.txt")
parser.add_argument("-v", "--verbose", help="Turn verbose mode on.", action="store_true")
parser.add_argument("-s", "--silent", help="Just save the results without printing anything.", action="store_true")
parser.add_argument("--no-save-output", help="Don\'t save the output in file.", action="store_true")
args = parser.parse_args()
parser.add_argument("--proxy", type=str, help="Specify proxy address and port as <address>:<port>")
args = parser.parse_args()
results_file = open("dorkresults.txt", "a")
if args.verbose and args.silent:
print("[ ERR ] Verbose and Silent mode can\'t be turned on at same time. [ ERR ]\nTry:\n\tRemoving the -v "
"/--verbose or -s/--silent switch.")
exit(2)
proxies = None
if args.proxy:
proxies = {
'http': f'http://{args.proxy}',
'https': f'https://{args.proxy}'
}
siteprefix = "site:"
domains = ["codepad.co", "scribd.com", "npmjs.com", "npm.runkit.com", "libraries.io", "ycombinator.com", "coggle.it",
"papaly.com", "trello.com", "prezi.com", "jsdelivr.net", "codepen.io", "codeshare.io", "sharecode.io",
"pastebin.com", "repl.it", "productforums.google.com", "gitter.im", "bitbucket.org", "*.atlassian.net", "*.jira.com"]
inurlprefix = "inurl:"
#Keywords for inurl: dorks
inurlkeywords = ["gitlab"]
if not args.silent:
Scanner.banner()
if args.verbose:
print("[v] Creating dorks... [v]")
for domain in domains:
Scanner.createdork(siteprefix, domain, args)
del siteprefix
del domains
for inurlkeyword in inurlkeywords:
Scanner.createdork(inurlprefix, inurlkeyword, args)
del inurlprefix
del inurlkeywords
for dork in dorkscontainer:
if args.browser:
Scanner.browseropen(args, dork)
if args.show:
if not args.verbose:
pass
else:
if not args.silent:
print(f"Getting the results from dork -> {unquote(dork)}")
headers = {'user-agent': Scanner.getuseragent()}
dork = dork.replace(" ", "+").replace(":", r"%3A")
url = f"https://www.google.com/search?q={dork}"
r = requests.get(url, headers=headers, proxies=proxies)
soup = BeautifulSoup(r.text, 'html.parser')
anc = soup.find_all('a')
for link in anc:
try:
link.get('href')
except:
pass
if link.get('href') != "#":
try:
if link.get('href').startswith('http'):
_link = link.get('href')
all_links.append(_link)
except:
pass
except requests.exceptions.RequestException as e:
print(f"Error occurred: {e}")
all_links = Scanner.filtergoogle(all_links)
if not args.no_save_output:
Scanner.writeresults(all_links, args.o)
if args.show:
Scanner.showresults(all_links)
del dorkscontainer
del all_links