Skip to content

Commit 23c1c93

Browse files
committed
add username and password options
1 parent cff046b commit 23c1c93

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

webpage2html.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def absurl(index, relpath=None, normpath=None):
4949
return index
5050

5151

52-
def get(index, relpath=None, verbose=True, usecache=True, verify=True, ignore_error=False):
52+
def get(index, relpath=None, verbose=True, usecache=True, verify=True, ignore_error=False, username=None, password=None):
5353
global webpage2html_cache
5454
if index.startswith('http') or (relpath and relpath.startswith('http')):
5555
full_path = absurl(index, relpath)
@@ -68,8 +68,13 @@ def get(index, relpath=None, verbose=True, usecache=True, verify=True, ignore_er
6868
headers = {
6969
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
7070
}
71+
72+
auth = None
73+
if username and password:
74+
auth = requests.auth.HTTPBasicAuth(username, password)
75+
7176
try:
72-
response = requests.get(full_path, headers=headers, verify=verify)
77+
response = requests.get(full_path, headers=headers, verify=verify, auth=auth)
7378
if verbose:
7479
log('[ GET ] %d - %s' % (response.status_code, response.url))
7580
if not ignore_error and (response.status_code >= 400 or response.status_code < 200):
@@ -202,12 +207,13 @@ def repl(matchobj):
202207

203208

204209
def generate(index, verbose=True, comment=True, keep_script=False, prettify=False, full_url=True, verify=True,
205-
errorpage=False, **kwargs):
210+
errorpage=False, username=None, password=None, **kwargs):
206211
"""
207212
given a index url such as http://www.google.com, http://custom.domain/index.html
208213
return generated single html
209214
"""
210-
html_doc, extra_data = get(index, verbose=verbose, verify=verify, ignore_error=errorpage)
215+
html_doc, extra_data = get(index, verbose=verbose, verify=verify, ignore_error=errorpage,
216+
username=username, password=password)
211217

212218
if extra_data and extra_data.get('url'):
213219
index = extra_data['url']
@@ -364,6 +370,8 @@ def main():
364370
parser.add_argument('-s', '--script', action='store_true', help="keep javascript in the generated html")
365371
parser.add_argument('-k', '--insecure', action='store_true', help="ignore the certificate")
366372
parser.add_argument('-o', '--output', help="save output to")
373+
parser.add_argument('-u', '--username', help="use HTTP basic auth with specified username")
374+
parser.add_argument('-p', '--password', help="use HTTP basic auth with specified password")
367375
parser.add_argument('--errorpage', action='store_true', help="crawl an error page")
368376
parser.add_argument("url", help="the website to store")
369377
args = parser.parse_args()

0 commit comments

Comments
 (0)