Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ WORKDIR /webarchive
ENV INIT_COLLECTION ''

ENV VOLUME_DIR /webarchive
ENV UWSGI_MOUNT '/=/pywb/pywb/apps/wayback.py'

#USER archivist
COPY docker-entrypoint.sh ./
Expand All @@ -31,4 +32,3 @@ EXPOSE 8080

ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["uwsgi", "/uwsgi/uwsgi.ini"]

2 changes: 2 additions & 0 deletions docs/manual/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,8 @@ For example, to deploy pywb under the ``/wayback`` subdirectory, the ``uwsgi.ini
mount = /wayback=./pywb/apps/wayback.py
manage-script-name = true

Alternatively this can also be achieved using the `UWSGI_MOUNT` environment variable, e.g. with the value `/wayback=/pywb/pywb/apps/wayback.py`.
This is specifically handy on the docker image.

.. _example-deploy:

Expand Down
24 changes: 21 additions & 3 deletions pywb/apps/static_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@
from pywb.utils.wbexception import NotFoundException


# =================================================================
def is_subpath(parent_path, child_path):
parent = os.path.abspath(parent_path)
child = os.path.abspath(child_path)
return os.path.commonpath([parent, child]) == parent


#=================================================================
# Static Content Handler
#=================================================================
Expand All @@ -23,15 +30,26 @@ def __call__(self, environ, url_str):
if url.endswith('/'):
url += 'index.html'

full_path = environ.get('pywb.static_dir')
if full_path:
full_path = os.path.join(full_path, url)
full_path = None
env_static_dir = environ.get('pywb.static_dir')

if env_static_dir:
full_path = os.path.join(env_static_dir, url)

# Prevent path traversal
if not is_subpath(env_static_dir, full_path):
raise NotFoundException('Requested a static file outside of static_dir')

if not os.path.isfile(full_path):
full_path = None

if not full_path:
full_path = os.path.join(self.static_path, url)

# Prevent path traversal
if not is_subpath(self.static_path, full_path):
raise NotFoundException('Requested a static file outside of static_dir')

try:
data = self.block_loader.load(full_path)

Expand Down
18 changes: 17 additions & 1 deletion pywb/warcserver/inputrequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import base64
import cgi
import json
import math
import sys


Expand Down Expand Up @@ -328,7 +329,22 @@ def _parser(json_obj, name=""):
_parser(v, name)

elif name:
data[get_key(name)] = str(json_obj)
if isinstance(json_obj, bool) and json_obj:
data[get_key(name)] = "true"
elif isinstance(json_obj, bool):
data[get_key(name)] = "false"
elif json_obj is None:
data[get_key(name)] = "null"
elif isinstance(json_obj, float):
# Treat floats like JavaScript's Number.prototype.toString(),
# drop decimal if float represents a whole number.
fraction, _ = math.modf(json_obj)
if fraction == 0.0:
data[get_key(name)] = str(int(json_obj))
else:
data[get_key(name)] = str(json_obj)
else:
data[get_key(name)] = str(json_obj)

_parser(json.loads(string))
return urlencode(data)
Expand Down
27 changes: 16 additions & 11 deletions pywb/warcserver/test/test_inputreq.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,44 +82,49 @@ def test_post_req(self):
class TestPostQueryExtract(object):
@classmethod
def setup_class(cls):
cls.post_data = b'foo=bar&dir=%2Fbaz'
cls.post_data = b'foo=bar&dir=%2Fbaz&do=true&re=false&re=null'
cls.binary_post_data = b'\x816l`L\xa04P\x0e\xe0r\x02\xb5\x89\x19\x00fP\xdb\x0e\xb0\x02,'

def test_post_extract_1(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
len(self.post_data), BytesIO(self.post_data))

assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz'
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re=null'

assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&__wb_method=POST&foo=bar&dir=/baz'
assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re=null'

def test_post_extract_json(self):
post_data = b'{"a": "b", "c": {"a": 2}, "d": "e"}'
post_data = b'{"a": "b", "c": {"a": 2}, "d": "e", "f": true, "g": [false, null]}'
mq = MethodQueryCanonicalizer('POST', 'application/json',
len(post_data), BytesIO(post_data))

assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&a=b&a.2_=2&d=e'
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&a=b&a.2_=2&d=e&f=true&g=false&g.2_=null'

post_data = b'{"type": "event", "id": 44.0, "float": 35.7, "values": [true, false, null], "source": {"type": "component", "id": "a+b&c= d", "values": [3, 4]}}'
mq = MethodQueryCanonicalizer('POST', 'application/json',
len(post_data), BytesIO(post_data))

assert mq.append_query('http://example.com/events') == 'http://example.com/events?__wb_method=POST&type=event&id=44&float=35.7&values=true&values.2_=false&values.3_=null&type.2_=component&id.2_=a%2Bb%26c%3D+d&values.4_=3&values.5_=4'

def test_put_extract_method(self):
mq = MethodQueryCanonicalizer('PUT', 'application/x-www-form-urlencoded',
len(self.post_data), BytesIO(self.post_data))

assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=PUT&foo=bar&dir=/baz'
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=PUT&foo=bar&dir=/baz&do=true&re=false&re=null'

def test_post_extract_non_form_data_1(self):
mq = MethodQueryCanonicalizer('POST', 'application/octet-stream',
len(self.post_data), BytesIO(self.post_data))

#base64 encoded data
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA=='

def test_post_extract_non_form_data_2(self):
mq = MethodQueryCanonicalizer('POST', 'text/plain',
len(self.post_data), BytesIO(self.post_data))

#base64 encoded data
assert mq.append_query('http://example.com/pathbar?id=123') == 'http://example.com/pathbar?id=123&__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
assert mq.append_query('http://example.com/pathbar?id=123') == 'http://example.com/pathbar?id=123&__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA=='

def test_post_extract_length_invalid_ignore(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
Expand All @@ -136,13 +141,13 @@ def test_post_extract_length_too_short(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
len(self.post_data) - 4, BytesIO(self.post_data))

assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=%2'
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re='

def test_post_extract_length_too_long(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
len(self.post_data) + 4, BytesIO(self.post_data))

assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz'
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re=null'

def test_post_extract_malformed_form_data(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
Expand All @@ -155,7 +160,7 @@ def test_post_extract_no_boundary_in_multipart_form_mimetype(self):
mq = MethodQueryCanonicalizer('POST', 'multipart/form-data',
len(self.post_data), BytesIO(self.post_data))

assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA=='


def test_options(self):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ tldextract
python-dateutil
markupsafe>=2.1.1
ua_parser
py3AMF
4 changes: 0 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ def generate_git_hash_py(pkg, filename='git_hash.py'):
def load_requirements(filename):
with open(filename, 'rt') as fh:
requirements = fh.read().rstrip().split('\n')
if sys.version_info > (3, 0):
requirements.append("py3AMF")
else:
requirements.append("pyAMF")
return requirements


Expand Down
9 changes: 9 additions & 0 deletions uwsgi.ini
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,13 @@ env = GEVENT_MONKEY_PATCH=1

# specify config file here
env = PYWB_CONFIG_FILE=config.yaml

if-not-env = UWSGI_MOUNT
wsgi = pywb.apps.wayback
endif =

# Set the path to which pywb should be mounted
if-env = UWSGI_MOUNT
mount = %(_)
manage-script-name = true
endif =
Loading