-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhttp-sitemap-generator.nse
178 lines (160 loc) · 5.17 KB
/
http-sitemap-generator.nse
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
description = [[
Spiders a web server and displays its directory structure along with
number and types of files in each folder. Note that files listed as
having an 'Other' extension are ones that have no extension or that
are a root document.
]]
---
-- @usage
-- nmap --script http-sitemap-generator -p 80 <host>
--
--
-- @output
-- PORT STATE SERVICE REASON
-- 80/tcp open http syn-ack
-- | http-sitemap-generator:
-- | Directory structure:
-- | /
-- | Other: 1
-- | /images/
-- | png: 1
-- | /shared/css/
-- | css: 1
-- | /shared/images/
-- | gif: 1; png: 1
-- | Longest directory structure:
-- | Depth: 2
-- | Dir: /shared/css/
-- | Total files found (by extension):
-- |_ Other: 1; css: 1; gif: 1; png: 2
--
-- @args http-sitemap-generator.maxdepth the maximum amount of directories beneath
-- the initial url to spider. A negative value disables the limit.
-- (default: 3)
-- @args http-sitemap-generator.maxpagecount the maximum amount of pages to visit.
-- A negative value disables the limit (default: 20)
-- @args http-sitemap-generator.url the url to start spidering. This is a URL
-- relative to the scanned host eg. /default.html (default: /)
-- @args http-sitemap-generator.withinhost only spider URLs within the same host.
-- (default: true)
-- @args http-sitemap-generator.withindomain only spider URLs within the same
-- domain. This widens the scope from <code>withinhost</code> and can
-- not be used in combination. (default: false)
--
author = "Piotr Olma"
license = "Same as Nmap--See http://nmap.org/book/man-legal.html"
categories = {"discovery", "intrusive"}
local shortport = require 'shortport'
local http = require 'http'
local stdnse = require 'stdnse'
local url = require 'url'
local httpspider = require 'httpspider'
local string = require 'string'
local table = require 'table'
portrule = shortport.port_or_service( {80, 443}, {"http", "https"}, "tcp", "open")
local function dict_add(d, k, v)
if not d[k] then
d[k] = {}
d[k][v] = 1
elseif d[k][v] then
d[k][v] = d[k][v]+1
else
d[k][v] = 1
end
end
local function map(f, t)
local new_t = {}
for _,v in ipairs(t) do
new_t[#new_t+1] = f(v)
end
return new_t
end
local function sort_dirs(t)
local keys_table = {}
for k,_ in pairs(t) do
keys_table[#keys_table+1] = k
end
table.sort(keys_table)
local newdirs = {}
map(function(d) newdirs[#newdirs+1]={d, t[d]} end, keys_table)
return newdirs
end
local function sort_by_keys(t)
local keys_table = {}
for k,_ in pairs(t) do
keys_table[#keys_table+1] = k
end
table.sort(keys_table)
return map(function(e) return e..": "..tostring(t[e]) end, keys_table)
end
local function internal_table_to_output(t)
local output = {}
for _,dir in ipairs(t) do
local ext_and_occurences = sort_by_keys(dir[2])
output[#output+1] = {name=dir[1], table.concat(ext_and_occurences, "; ")}
end
return output
end
local function get_file_extension(f)
return string.match(f, ".-/.-%.([^/%.]*)$") or "Other"
end
-- removes /../ from paths; for example
-- normalize_path("/a/v/../../da/as/d/a/a/aa/../") -> "/da/as/d/a/a/"
local function normalize_path(p)
local n=0
repeat
p, n = string.gsub(p, "/[^/]-/%.%.", "")
until n==0
return p
end
function action(host, port)
local starting_url = stdnse.get_script_args('http-sitemap-generator.url') or "/"
-- create a new crawler instance
local crawler = httpspider.Crawler:new( host, port, nil, { scriptname = SCRIPT_NAME, noblacklist=true, useheadfornonwebfiles=true } )
if ( not(crawler) ) then
return
end
local visited = {}
local dir_structure = {}
local total_ext = {}
local longest_dir_structure = {dir="/", depth=0}
while(true) do
local status, r = crawler:crawl()
if ( not(status) ) then
if ( r.err ) then
return stdnse.format_output(true, ("ERROR: %s"):format(r.reason))
else
break
end
end
if r.response.status and r.response.status == 200 then
--check if we've already visited this file
local path = normalize_path(r.url.path)
if not visited[path] then
local ext = get_file_extension(path)
if total_ext[ext] then total_ext[ext]=total_ext[ext]+1 else total_ext[ext]=1 end
local dir = normalize_path(r.url.dir)
local _,dir_depth = string.gsub(dir,"/","/")
-- check if this path is the longest one
dir_depth = dir_depth - 1 -- first '/'
if dir_depth > longest_dir_structure["depth"] then
longest_dir_structure["dir"] = dir
longest_dir_structure["depth"] = dir_depth
end
dict_add(dir_structure, dir, ext)
-- when withinhost=false, then maybe we'd like to include the full url
-- with each path listed in the output
visited[path] = true
end
end
end
local out = internal_table_to_output(sort_dirs(dir_structure))
local tot = sort_by_keys(total_ext)
out =
{
"Directory structure:", out,
{name="Longest directory structure:", "Depth: "..tostring(longest_dir_structure.depth), "Dir: "..longest_dir_structure.dir},
{name="Total files found (by extension):", table.concat(tot, "; ")}
}
return stdnse.format_output(true, out)
end