@@ -49,7 +49,7 @@ def absurl(index, relpath=None, normpath=None):
49
49
return index
50
50
51
51
52
- def get (index , relpath = None , verbose = True , usecache = True , verify = True , ignore_error = False ):
52
+ def get (index , relpath = None , verbose = True , usecache = True , verify = True , ignore_error = False , username = None , password = None ):
53
53
global webpage2html_cache
54
54
if index .startswith ('http' ) or (relpath and relpath .startswith ('http' )):
55
55
full_path = absurl (index , relpath )
@@ -68,8 +68,13 @@ def get(index, relpath=None, verbose=True, usecache=True, verify=True, ignore_er
68
68
headers = {
69
69
'User-Agent' : 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
70
70
}
71
+
72
+ auth = None
73
+ if username and password :
74
+ auth = requests .auth .HTTPBasicAuth (username , password )
75
+
71
76
try :
72
- response = requests .get (full_path , headers = headers , verify = verify )
77
+ response = requests .get (full_path , headers = headers , verify = verify , auth = auth )
73
78
if verbose :
74
79
log ('[ GET ] %d - %s' % (response .status_code , response .url ))
75
80
if not ignore_error and (response .status_code >= 400 or response .status_code < 200 ):
@@ -202,12 +207,13 @@ def repl(matchobj):
202
207
203
208
204
209
def generate (index , verbose = True , comment = True , keep_script = False , prettify = False , full_url = True , verify = True ,
205
- errorpage = False , ** kwargs ):
210
+ errorpage = False , username = None , password = None , ** kwargs ):
206
211
"""
207
212
given a index url such as http://www.google.com, http://custom.domain/index.html
208
213
return generated single html
209
214
"""
210
- html_doc , extra_data = get (index , verbose = verbose , verify = verify , ignore_error = errorpage )
215
+ html_doc , extra_data = get (index , verbose = verbose , verify = verify , ignore_error = errorpage ,
216
+ username = username , password = password )
211
217
212
218
if extra_data and extra_data .get ('url' ):
213
219
index = extra_data ['url' ]
@@ -364,6 +370,8 @@ def main():
364
370
parser .add_argument ('-s' , '--script' , action = 'store_true' , help = "keep javascript in the generated html" )
365
371
parser .add_argument ('-k' , '--insecure' , action = 'store_true' , help = "ignore the certificate" )
366
372
parser .add_argument ('-o' , '--output' , help = "save output to" )
373
+ parser .add_argument ('-u' , '--username' , help = "use HTTP basic auth with specified username" )
374
+ parser .add_argument ('-p' , '--password' , help = "use HTTP basic auth with specified password" )
367
375
parser .add_argument ('--errorpage' , action = 'store_true' , help = "crawl an error page" )
368
376
parser .add_argument ("url" , help = "the website to store" )
369
377
args = parser .parse_args ()
0 commit comments