Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ public void renderResource(HttpServletRequest httpRequest,
* @throws IOException per usual
* @throws WaybackException if Wayback data specific, anticipated exceptions
* occur
* @deprecated 2016-03-08 Use one-Resource version above with CompositeResource.
*/
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.exception.WaybackException;
import org.archive.wayback.proxy.ProxyHttpsResultURIConverter;
import org.archive.wayback.replay.CompositeResource;
import org.archive.wayback.replay.HttpHeaderOperation;
import org.archive.wayback.replay.HttpHeaderProcessor;
import org.archive.wayback.replay.JSPExecutor;
Expand Down Expand Up @@ -89,25 +90,26 @@ public ArchivalUrlSAXRewriteReplayRenderer(HttpHeaderProcessor httpHeaderProcess
this.httpHeaderProcessor = httpHeaderProcessor;
}

// assume this is only called for appropriate doc types: html
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, WaybackException {
renderResource(httpRequest, httpResponse, wbRequest, result, resource,
resource, uriConverter, results);
}

@Override
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource httpHeadersResource,
Resource payloadResource, ResultURIConverter uriConverter,
CaptureSearchResults results) throws ServletException, IOException,
WaybackException {
final Resource resource = httpHeadersResource == payloadResource ? payloadResource
: new CompositeResource(httpHeadersResource, payloadResource);
renderResource(httpRequest, httpResponse, wbRequest, result, resource,
uriConverter, results);
}

Resource decodedResource = TextReplayRenderer.decodeResource(httpHeadersResource, payloadResource);
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, WaybackException {
// TODO: wrong
Resource decodedResource = TextReplayRenderer.decodeResource(resource);

// The URL of the page, for resolving in-page relative URLs:
// URL url = null;
Expand All @@ -119,7 +121,7 @@ public void renderResource(HttpServletRequest httpRequest,
// throw new IOException(e1.getMessage());
// }
// determine the character set used to encode the document bytes:
String charSet = charsetDetector.getCharset(httpHeadersResource, decodedResource, wbRequest);
String charSet = charsetDetector.getCharset(resource, decodedResource, wbRequest);

// set up the context:
final ReplayParseContext context = ReplayParseContext.create(
Expand Down Expand Up @@ -165,11 +167,11 @@ public void renderResource(HttpServletRequest httpRequest,


// copy the HTTP response code:
HttpHeaderOperation.copyHTTPMessageHeader(httpHeadersResource, httpResponse);
HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse);

// transform the original headers according to our headerProcessor:
Map<String,String> headers = HttpHeaderOperation.processHeaders(
httpHeadersResource, context, httpHeaderProcessor);
resource, context, httpHeaderProcessor);

// prepare several objects for the parse:

Expand Down
20 changes: 20 additions & 0 deletions wayback-core/src/main/java/org/archive/wayback/core/Resource.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
* @author Brad Tofel
*/
public abstract class Resource extends InputStream {

/**
* Upper-case header field name for {@code Content-Encoding}.
* For use with {@link #getHeader(String)}
*/
public static final String HTTP_CONTENT_ENCODING = "CONTENT-ENCODING";

private InputStream is;

Expand Down Expand Up @@ -115,6 +121,20 @@ public String getHeader(String headerName) {
return null;
}

/**
* Return <i>content-encoding</i> of the payload.
* <p>
* Currently meaningful for HTTP response resource only,
* and returns the value of HTTP {@code Content-Encoding}.
* Composite resource would override this method to return
* a value from appropriate member Resource.
* </p>
* @return content-encoding header field value
*/
public String getContentEncoding() {
return getHeader(HTTP_CONTENT_ENCODING);
}

private void validate() throws IOException {
if(is == null) {
throw new IOException("No InputStream");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ public String getHeader(String headerName) {
else
return headersResource.getHeader(headerName);
}
/**
* Returns {@code Content-Encoding} header field value
* from the original record.
*/
@Override
public String getContentEncoding() {
return payloadResource.getHeader(HTTP_CONTENT_ENCODING);
}
@Override
public void setChunkedEncoding() throws IOException {
payloadResource.setChunkedEncoding();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,35 +95,37 @@ protected abstract void updatePage(TextDocument page,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException;

@Override
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, BadContentException {
renderResource(httpRequest, httpResponse, wbRequest, result, resource, resource, uriConverter, results);
}

@Override
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource httpHeadersResource,
Resource payloadResource, ResultURIConverter uriConverter,
CaptureSearchResults results) throws ServletException,
IOException, BadContentException {
final Resource resource = httpHeadersResource == payloadResource ? payloadResource
: new CompositeResource(httpHeadersResource, payloadResource);
renderResource(httpRequest, httpResponse, wbRequest, result, resource,
uriConverter, results);
}

@Override
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, BadContentException {
// Decode resource (such as if gzip encoded)
Resource decodedResource = decodeResource(httpHeadersResource, payloadResource);
Resource decodedResource = decodeResource(resource);

ReplayParseContext context = ReplayParseContext.create(uriConverter, wbRequest, null, result, false);

HttpHeaderOperation.copyHTTPMessageHeader(httpHeadersResource, httpResponse);
HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse);

Map<String,String> headers = HttpHeaderOperation.processHeaders(
httpHeadersResource, context, httpHeaderProcessor);
resource, context, httpHeaderProcessor);

String charSet = charsetDetector.getCharset(httpHeadersResource,
decodedResource, wbRequest);
String charSet = charsetDetector.getCharset(resource, decodedResource,
wbRequest);

ResultURIConverter pageConverter = uriConverter;
// this feature was meant for using special ResultURIConverter for rewriting XML, but
Expand Down Expand Up @@ -213,8 +215,48 @@ public void setGuessedCharsetHeader(String guessedCharsetHeader) {
this.guessedCharsetHeader = guessedCharsetHeader;
}

/**
* return gzip-decoding wrapper Resource if Resource has {@code Content-Encoding: gzip}.
* return {@code resource} as-is otherwise.
* <p>if resource's content is gzip-compressed (i.e. {@code Content-Encoding} is "{@code gzip}"),
* return a wrapping Resource that returns decoded content.</p>
* <p>As a side-effect, {@code Content-Encoding} and
* {@code Transfer-Encoding} headers are removed from {@code resource} (this happens only when
* {@code resource} is gzip-compressed.).</p>
* <p>TODO: XArchiveHttpHeaderProcessor also does HTTP header removal. Check for refactoring case.</p>
* @param resource Resource to read HTTP headers from.
* @return either wrapping decoder Resource or {@code resource}
* @throws IOException
*/
public static Resource decodeResource(Resource resource) throws IOException {
return decodeResource(resource, resource);
// Content-Encoding may differ between headersResource and
// payloadResource. We use Content-Encoding of payloadResource,
// as we're reading content from it. Then update headersResource,
// as we're rendering headers from headersResource.
String encoding = resource.getContentEncoding();
if (encoding != null) {
if (encoding.toLowerCase().equals(GzipDecodingResource.GZIP)) {
// if headersResource (revisit) has Content-Encoding, set it aside.
Map<String, String> revHeaders = resource.getHttpHeaders();
String revEncoding = HttpHeaderOperation.getHeaderValue(
revHeaders, HttpHeaderOperation.HTTP_CONTENT_ENCODING);
if (revEncoding != null) {
revHeaders.put(ORIG_ENCODING, encoding);
HttpHeaderOperation.removeHeader(revHeaders,
HttpHeaderOperation.HTTP_CONTENT_ENCODING);
}

if (HttpHeaderOperation.isChunkEncoded(revHeaders)) {
HttpHeaderOperation.removeHeader(revHeaders,
HttpHeaderOperation.HTTP_TRANSFER_ENC_HEADER);
}

return new GzipDecodingResource(resource);
}

// TODO: check for other encodings?
}
return resource;
}

/**
Expand Down Expand Up @@ -242,12 +284,15 @@ public void setOverrideContentMimeType(String overrideContentMimeType) {
* {@code Transfer-Encoding} headers are removed from {@code headersResource} (this happens only when
* {@code headerResoruce} is gzip-compressed.). It is assumed that {@code headerResource} and
* {@code payloadResource} are captures of identical response content.</p>
* <p>WARNING: this method does not work as intended with CompositeResource, whose
* {@code getHttpHeaders()} method returns HTTP headers from revisit record.</p>
* <p>TODO: XArchiveHttpHeaderProcessor also does HTTP header removal. Check for refactoring case.</p>
* @param headersResource Resource to read HTTP headers from.
* @param payloadResource Resource to read content from (same as {@code headerResource} for regular captures,
* different Resource if headersResource is a revisit record.)
* @return
* @throws IOException
* @deprecated 2016-03-08 use single-Resource version with CompositeResource
*/
public static Resource decodeResource(Resource headersResource,
Resource payloadResource) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,29 @@ public TransparentReplayRenderer(HttpHeaderProcessor httpHeaderProcessor) {
@Override
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, BadContentException {
CaptureSearchResult result, Resource httpHeadersResource,
Resource payloadResource, ResultURIConverter uriConverter,
CaptureSearchResults results) throws ServletException, IOException,
BadContentException {
final Resource resource = httpHeadersResource == payloadResource ? payloadResource
: new CompositeResource(httpHeadersResource, payloadResource);
renderResource(httpRequest, httpResponse, wbRequest, result, resource,
resource, uriConverter, results);
uriConverter, results);
}

@Override
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource httpHeadersResource,
Resource payloadResource, ResultURIConverter uriConverter,
CaptureSearchResults results) throws ServletException, IOException,
BadContentException {
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, BadContentException {

HttpHeaderOperation.copyHTTPMessageHeader(httpHeadersResource, httpResponse);
HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse);

ReplayParseContext context = ReplayParseContext.create(uriConverter, wbRequest, null, result, false);

Map<String,String> headers = HttpHeaderOperation.processHeaders(
httpHeadersResource, context, httpHeaderProcessor);
resource, context, httpHeaderProcessor);

// HACKHACK: getContentLength() may not find the original content length
// if a HttpHeaderProcessor has mangled it too badly. Should this
Expand Down Expand Up @@ -111,7 +113,7 @@ public void renderResource(HttpServletRequest httpRequest,
OutputStream os = httpResponse.getOutputStream();
byte[] buffer = new byte[BUFFER_SIZE];
long total = 0;
for (int r = -1; (r = payloadResource.read(buffer, 0, BUFFER_SIZE)) != -1;) {
for (int r = -1; (r = resource.read(buffer, 0, BUFFER_SIZE)) != -1;) {
os.write(buffer, 0, r);
total += r;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.archive.wayback.exception.BadContentException;
import org.archive.wayback.exception.BetterRequestException;
import org.archive.wayback.exception.WaybackException;
import org.archive.wayback.replay.CompositeResource;
import org.archive.wayback.replay.HttpHeaderOperation;
import org.archive.wayback.replay.HttpHeaderProcessor;
import org.archive.wayback.replay.html.ReplayParseContext;
Expand Down Expand Up @@ -78,33 +79,34 @@ public SWFReplayRenderer(HttpHeaderProcessor httpHeaderProcessor) {
this.httpHeaderProcessor = httpHeaderProcessor;
}

public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, WaybackException {
renderResource(httpRequest, httpResponse, wbRequest, result, resource,
resource, uriConverter, results);
}

@Override
public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource httpHeadersResource,
Resource payloadResource, ResultURIConverter uriConverter,
CaptureSearchResults results) throws ServletException, IOException,
WaybackException {
final Resource resource = httpHeadersResource == payloadResource ? payloadResource
: new CompositeResource(httpHeadersResource, payloadResource);
renderResource(httpRequest, httpResponse, wbRequest, result, resource,
uriConverter, results);
}

public void renderResource(HttpServletRequest httpRequest,
HttpServletResponse httpResponse, WaybackRequest wbRequest,
CaptureSearchResult result, Resource resource,
ResultURIConverter uriConverter, CaptureSearchResults results)
throws ServletException, IOException, WaybackException {
try {

// copy HTTP response code:
HttpHeaderOperation.copyHTTPMessageHeader(httpHeadersResource, httpResponse);
HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse);

ReplayParseContext context = ReplayParseContext.create(uriConverter, wbRequest, null, result, false);

// load and process original headers:
Map<String, String> headers = HttpHeaderOperation.processHeaders(
httpHeadersResource, context, httpHeaderProcessor);
resource, context, httpHeaderProcessor);

// The URL of the resource, for resolving embedded relative URLs:
URL url = null;
Expand All @@ -122,7 +124,7 @@ public void renderResource(HttpServletRequest httpRequest,
Movie movie = getRobustMovie(RobustMovieDecoder.DECODE_RULE_NULLS);

try {
movie.decodeFromStream(payloadResource);
movie.decodeFromStream(resource);
} catch (DataFormatException e1) {
throw new BadContentException(e1.getLocalizedMessage());
}
Expand Down
Loading