-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy patheasy_search.py
169 lines (144 loc) · 5.52 KB
/
easy_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#need py3 coz GoogleScraper need py3 with selenium module changed to make it possible
#to supass yanzhengma
#easy_search.py is stronger than combineGoogleScraper_myurlhandle.py
#coz the latter can get the http_url only while easy_search.py can get
#the three kinds of url_list and txt
#
#
##/root/myenv2/bin/python3.5m is the normal python3
#/root/myenv/bin/python3.5 is the changed GoogleScraper script version python
#
#function:
#this script get the GoogleScraper_origin_http_domain_url_list.txt
#that is a file with urls which can be directly thrown into sqlmap to start
#dig sqli(s),and the urls are from google search,eg.
#when you run easy_search.py,you need supply keyword for google search,
#then the result is those urls from this google search,the result file is
#GoogleScraper_origin_http_domain_url_list.txt.
import sys
def save_url_to_file(url_list,name):
file=open(name,"a+")
file.close()
for ur in url_list:
file=open(name,"r+")
all_lines=file.readlines()
print(all_lines)
print(len(all_lines))
file.close()
#if ur+"\r\n" not in all_lines:
if ur+"\n" not in all_lines:
#print(type(ur))
#print(type("\r\n"))
#print(type(ur+"\r\n"))
file=open(name,"a+")
#print 11112212
print(ur)
file.write(ur+"\r\n")
file.flush()
file.close()
def myGoogleScraper_get_urls_from_query(query,want):
keyword=query
num_page=50
method='selenium'
browser='firefox'
from GoogleScraper import scrape_with_config, GoogleSearchError
# See in the config.cfg file for possible values
config = {
'use_own_ip': True,
'keyword': keyword,
'search_engines': ['google'],#google,yahoo,baidu,bing...is ok,see GoogleScraper source.
'num_pages_for_keyword': num_page,
'scrape_method': method,
'sel_browser': browser,
'do_caching': False
}
try:
search = scrape_with_config(config)
#print(11)
except GoogleSearchError as e:
print(e)
# let's inspect what we got
'''
for serp in search.serps:
print(serp)
print(serp.search_engine_name)
print(serp.scrape_method)
print(serp.page_number)
print(serp.requested_at)
print(serp.num_results)
# ... more attributes ...
for link in serp.links:
#link=link.split(">")[]
#print(type(link))
print(link.link)
all_urls.append(link.link)
'''
GoogleScraper_domain_list=[]
GoogleScraper_http_domain_list=[]
GoogleScraper_origin_http_domain_url_list=[]
all_three_list=[]
for serp in search.serps:
for link in serp.links:
if "https://" in link.link:
domain=(link.link)[8:-1].split('/')[0]
#print("domain=(link.link)[8:-1].split('/')[0] is:")
#print(domain)
if domain not in GoogleScraper_domain_list:
GoogleScraper_domain_list.append(domain)
GoogleScraper_http_domain_list.append("https://"+domain)
#attention!here is not the same from the bing_search_domin script,
#coz we need as much url as possible for sqlmap to try to exploit,
#so the origin_http_domain_url_list here will add all of the result,
#though the domain is the same,different url is needed.
GoogleScraper_origin_http_domain_url_list.append(link.link)
else:
domain=(link.link)[7:-1].split('/')[0]
#print("domain=(link.link)[7:-1].split('/')[0] is:")
#print(domain)
if domain not in GoogleScraper_domain_list:
GoogleScraper_domain_list.append(domain)
GoogleScraper_http_domain_list.append("http://"+domain)
#attention!here is not the same from the bing_search_domin script,
#coz we need as much url as possible for sqlmap to try to exploit,
#so the origin_http_domain_url_list here will add all of the result,
#though the domain is the same,different url is needed.
GoogleScraper_origin_http_domain_url_list.append(link.link)
save_url_to_file(GoogleScraper_domain_list,"GoogleScraper_domain_list.txt")
save_url_to_file(GoogleScraper_http_domain_list,"GoogleScraper_http_domain_list.txt")
save_url_to_file(GoogleScraper_origin_http_domain_url_list,"GoogleScraper_origin_http_domain_url_list.txt")
all_three_list.append(GoogleScraper_domain_list)
all_three_list.append(GoogleScraper_http_domain_list)
all_three_list.append(GoogleScraper_origin_http_domain_url_list)
import os
#although os.system("pkill firefox") maybe ok,
#this is a good chance to learn awk&xargs,meanwhile,
#os.system("pkill firefox") is not ok here,but below is ok enough
#to kill all firefox when it lost into stuck.
os.system('''ps -aux | grep firefox | awk '{print $2}' | xargs kill -9''')
#os.system("pkill firefox")
if want=='GoogleScraper_domain_list.txt':
return all_three_list[0]
elif want=='GoogleScraper_http_domain_list':
return all_three_list[1]
elif want=='GoogleScraper_origin_http_domain_url_list':
return all_three_list[2]
else:
return all_three_list[2]
def usage():
print('''input your keyword directly after the script,
example:%s site:www.xxx.ooo inurl:php?id=''' % sys.argv[0])
sys.exit(0)
def main():
#print("sys.argv[1] is type:%s" % type(sys.argv[1]))
para_num=len(sys.argv)
keyword=""
for i in range(para_num-1):
keyword=keyword+sys.argv[i+1]
if i+1<para_num-1:
keyword+=" "
#print keyword
myGoogleScraper_get_urls_from_query(keyword,want='GoogleScraper_origin_http_domain_url_list')
if __name__ == '__main__':
if len(sys.argv)==0:
usage()
main()