Move multipart resource handling to core/fetch (1/2)

Currently a multipart/x-mixed-replace response is parsed in the content layer and dispatched to clients in blink. It is problematic because the parser calls callbacks in a strange way and special handling code scatters from core/html to content/child. This change adds MultipartImageResourceParser in core/fetch. It is basically copied from multipart_response_delegate. BUG=570608 Review URL: https://codereview.chromium.org/1693183002 Cr-Commit-Position: refs/heads/master@{#380431}
cbchan · Mar 10, 2016 · 79aba5a · 79aba5a
1 parent 5b22c57
commit 79aba5a
Show file tree

Hide file tree

Showing 9 changed files with 861 additions and 0 deletions.
diff --git a/content/child/blink_platform_impl.cc b/content/child/blink_platform_impl.cc
@@ -467,6 +467,15 @@ bool BlinkPlatformImpl::portAllowed(const blink::WebURL& url) const {
   return net::IsPortAllowedForScheme(gurl.EffectiveIntPort(), gurl.scheme());
 }
 
+bool BlinkPlatformImpl::parseMultipartHeadersFromBody(
+    const char* bytes,
+    size_t size,
+    blink::WebURLResponse* response,
+    size_t* end) const {
+  return WebURLLoaderImpl::ParseMultipartHeadersFromBody(
+      bytes, size, response, end);
+}
+
 blink::WebThread* BlinkPlatformImpl::createThread(const char* name) {
   scoped_ptr<WebThreadImplForWorkerScheduler> thread(
       new WebThreadImplForWorkerScheduler(name));

diff --git a/content/child/blink_platform_impl.h b/content/child/blink_platform_impl.h
@@ -96,6 +96,11 @@ class CONTENT_EXPORT BlinkPlatformImpl
   blink::WebURLError cancelledError(const blink::WebURL& url) const override;
   bool isReservedIPAddress(const blink::WebString& host) const override;
   bool portAllowed(const blink::WebURL& url) const override;
+  bool parseMultipartHeadersFromBody(const char* bytes,
+                                     size_t size,
+                                     blink::WebURLResponse* response,
+                                     size_t* end) const override;
+
   blink::WebThread* createThread(const char* name) override;
   blink::WebThread* currentThread() override;
   void recordAction(const blink::UserMetricsAction&) override;

diff --git a/content/child/web_url_loader_impl.cc b/content/child/web_url_loader_impl.cc
@@ -83,6 +83,17 @@ namespace content {
 
 namespace {
 
+// The list of response headers that we do not copy from the original
+// response when generating a WebURLResponse for a MIME payload.
+const char* const kReplaceHeaders[] = {
+  "content-type",
+  "content-length",
+  "content-disposition",
+  "content-range",
+  "range",
+  "set-cookie"
+};
+
 using HeadersVector = ResourceDevToolsInfo::HeadersVector;
 
 // Converts timing data from |load_timing| to the format used by WebKit.
@@ -1143,4 +1154,52 @@ void WebURLLoaderImpl::setLoadingTaskRunner(
   context_->SetWebTaskRunner(make_scoped_ptr(loading_task_runner->clone()));
 }
 
+// This function is implemented here because it uses net functions. it is
+// tested in
+// third_party/WebKit/Source/core/fetch/MultipartImageResourceParserTest.cpp.
+bool WebURLLoaderImpl::ParseMultipartHeadersFromBody(
+    const char* bytes,
+    size_t size,
+    blink::WebURLResponse* response,
+    size_t* end) {
+  int headers_end_pos =
+      net::HttpUtil::LocateEndOfAdditionalHeaders(bytes, size, 0);
+
+  if (headers_end_pos < 0)
+    return false;
+
+  *end = headers_end_pos;
+  // Eat headers and prepend a status line as is required by
+  // HttpResponseHeaders.
+  std::string headers("HTTP/1.1 200 OK\r\n");
+  headers.append(bytes, headers_end_pos);
+
+  scoped_refptr<net::HttpResponseHeaders> response_headers =
+      new net::HttpResponseHeaders(
+          net::HttpUtil::AssembleRawHeaders(headers.c_str(), headers.size()));
+
+  std::string mime_type;
+  response_headers->GetMimeType(&mime_type);
+  response->setMIMEType(WebString::fromUTF8(mime_type));
+
+  std::string charset;
+  response_headers->GetCharset(&charset);
+  response->setTextEncodingName(WebString::fromUTF8(charset));
+
+  // Copy headers listed in kReplaceHeaders to the response.
+  for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
+    std::string name(kReplaceHeaders[i]);
+    std::string value;
+    WebString webStringName(WebString::fromLatin1(name));
+    size_t iterator = 0;
+
+    response->clearHTTPHeaderField(webStringName);
+    while (response_headers->EnumerateHeader(&iterator, name, &value)) {
+      response->addHTTPHeaderField(webStringName,
+                                   WebString::fromLatin1(value));
+    }
+  }
+  return true;
+}
+
 }  // namespace content
diff --git a/content/child/web_url_loader_impl.h b/content/child/web_url_loader_impl.h
@@ -68,6 +68,12 @@ class CONTENT_EXPORT WebURLLoaderImpl
                          int intra_priority_value) override;
   void setLoadingTaskRunner(blink::WebTaskRunner* loading_task_runner) override;
 
+  // This is a utility function for multipart image resources.
+  static bool ParseMultipartHeadersFromBody(const char* bytes,
+                                            size_t size,
+                                            blink::WebURLResponse* response,
+                                            size_t* end);
+
  private:
   class Context;
   class RequestPeerImpl;

diff --git a/third_party/WebKit/Source/core/core.gypi b/third_party/WebKit/Source/core/core.gypi
@@ -1610,6 +1610,8 @@
             'fetch/LinkFetchResource.h',
             'fetch/MemoryCache.cpp',
             'fetch/MemoryCache.h',
+            'fetch/MultipartImageResourceParser.cpp',
+            'fetch/MultipartImageResourceParser.h',
             'fetch/RawResource.cpp',
             'fetch/RawResource.h',
             'fetch/Resource.cpp',
@@ -3959,6 +3961,7 @@
             'fetch/ImageResourceTest.cpp',
             'fetch/MemoryCacheTest.cpp',
             'fetch/MockImageResourceClient.cpp',
+            'fetch/MultipartImageResourceParserTest.cpp',
             'fetch/RawResourceTest.cpp',
             'fetch/ResourceFetcherTest.cpp',
             'fetch/ResourceLoaderOptionsTest.cpp',

diff --git a/third_party/WebKit/Source/core/fetch/MultipartImageResourceParser.cpp b/third_party/WebKit/Source/core/fetch/MultipartImageResourceParser.cpp
@@ -0,0 +1,201 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fetch/MultipartImageResourceParser.h"
+
+#include "public/platform/Platform.h"
+#include "public/platform/WebURLResponse.h"
+#include "wtf/NotFound.h"
+#include "wtf/text/WTFString.h"
+
+#include <algorithm>
+
+namespace blink {
+
+MultipartImageResourceParser::MultipartImageResourceParser(const ResourceResponse& response, const Vector<char>& boundary, Client* client)
+    : m_originalResponse(response)
+    , m_boundary(boundary)
+    , m_client(client)
+{
+    // Some servers report a boundary prefixed with "--".  See https://crbug.com/5786.
+    if (m_boundary.size() < 2 || m_boundary[0] != '-' || m_boundary[1] != '-')
+        m_boundary.prepend("--", 2);
+}
+
+void MultipartImageResourceParser::appendData(const char* bytes, size_t size)
+{
+    // m_sawLastBoundary means that we've already received the final boundary
+    // token. The server should stop sending us data at this point, but if it
+    // does, we just throw it away.
+    if (m_sawLastBoundary)
+        return;
+    m_data.append(bytes, size);
+
+    if (m_isParsingTop) {
+        // Eat leading \r\n
+        size_t pos = pushOverLine(m_data, 0);
+        if (pos)
+            m_data.remove(0, pos);
+
+        if (m_data.size() < m_boundary.size() + 2) {
+            // We don't have enough data yet to make a boundary token.  Just
+            // wait until the next chunk of data arrives.
+            return;
+        }
+
+        // Some servers don't send a boundary token before the first chunk of
+        // data.  We handle this case anyway (Gecko does too).
+        if (0 != memcmp(m_data.data(), m_boundary.data(), m_boundary.size())) {
+            m_data.prepend("\n", 1);
+            m_data.prependVector(m_boundary);
+        }
+        m_isParsingTop = false;
+    }
+
+    // Headers
+    if (m_isParsingHeaders) {
+        // Eat leading \r\n
+        size_t pos = pushOverLine(m_data, 0);
+        if (pos)
+            m_data.remove(0, pos);
+
+        if (!parseHeaders()) {
+            // Get more data before trying again.
+            return;
+        }
+        // Successfully parsed headers.
+        m_isParsingHeaders = false;
+        if (isCancelled())
+            return;
+    }
+
+    size_t boundaryPosition;
+    while ((boundaryPosition = findBoundary(m_data, &m_boundary)) != kNotFound) {
+        // Strip out trailing \r\n characters in the buffer preceding the
+        // boundary on the same lines as does Firefox.
+        size_t dataSize = boundaryPosition;
+        if (boundaryPosition > 0 && m_data[boundaryPosition - 1] == '\n') {
+            dataSize--;
+            if (boundaryPosition > 1 && m_data[boundaryPosition - 2] == '\r') {
+                dataSize--;
+            }
+        }
+        if (dataSize) {
+            m_client->multipartDataReceived(m_data.data(), dataSize);
+            if (isCancelled())
+                return;
+        }
+        size_t boundaryEndPosition = boundaryPosition + m_boundary.size();
+        if (boundaryEndPosition < m_data.size() && '-' == m_data[boundaryEndPosition]) {
+            // This was the last boundary so we can stop processing.
+            m_sawLastBoundary = true;
+            m_data.clear();
+            return;
+        }
+
+        // We can now throw out data up through the boundary
+        size_t offset = pushOverLine(m_data, boundaryEndPosition);
+        m_data.remove(0, boundaryEndPosition + offset);
+
+        // Ok, back to parsing headers
+        if (!parseHeaders()) {
+            m_isParsingHeaders = true;
+            break;
+        }
+    }
+
+    // At this point, we should send over any data we have, but keep enough data
+    // buffered to handle a boundary that may have been truncated.
+    if (!m_isParsingHeaders && m_data.size() > m_boundary.size()) {
+        // If the last character is a new line character, go ahead and just send
+        // everything we have buffered.  This matches an optimization in Gecko.
+        size_t sendLength = m_data.size() - m_boundary.size();
+        if (m_data.last() == '\n')
+            sendLength = m_data.size();
+        m_client->multipartDataReceived(m_data.data(), sendLength);
+        m_data.remove(0, sendLength);
+    }
+}
+
+void MultipartImageResourceParser::finish()
+{
+    ASSERT(!isCancelled());
+    // If we have any pending data and we're not in a header, go ahead and send
+    // it to the client.
+    if (!m_isParsingHeaders && !m_data.isEmpty() && !m_sawLastBoundary)
+        m_client->multipartDataReceived(m_data.data(), m_data.size());
+    m_data.clear();
+    m_sawLastBoundary = true;
+}
+
+size_t MultipartImageResourceParser::pushOverLine(const Vector<char>& data, size_t pos)
+{
+    size_t offset = 0;
+    // TODO(yhirano): This function has two problems. Fix them.
+    //  1. It eats "\n\n".
+    //  2. When the incoming data is not sufficient (i.e. data[pos] == '\r'
+    //     && data.size() == pos + 1), it should notify the caller.
+    if (pos < data.size() && (data[pos] == '\r' || data[pos] == '\n')) {
+        ++offset;
+        if (pos + 1 < data.size() && data[pos + 1] == '\n')
+            ++offset;
+    }
+    return offset;
+}
+
+bool MultipartImageResourceParser::parseHeaders()
+{
+    // Create a WebURLResponse based on the original set of headers + the
+    // replacement headers. We only replace the same few headers that gecko
+    // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
+    WebURLResponse response(m_originalResponse.url());
+    for (const auto& header : m_originalResponse.httpHeaderFields())
+        response.addHTTPHeaderField(header.key, header.value);
+
+    size_t end = 0;
+    if (!Platform::current()->parseMultipartHeadersFromBody(m_data.data(), m_data.size(), &response, &end))
+        return false;
+    m_data.remove(0, end);
+
+    // To avoid recording every multipart load as a separate visit in
+    // the history database, we want to keep track of whether the response
+    // is part of a multipart payload.  We do want to record the first visit,
+    // so we only set isMultipartPayload to true after the first visit.
+    response.setIsMultipartPayload(!m_isFirstPart);
+    m_isFirstPart = false;
+    // Send the response!
+    m_client->onePartInMultipartReceived(response.toResourceResponse());
+
+    return true;
+}
+
+// Boundaries are supposed to be preceeded with --, but it looks like gecko
+// doesn't require the dashes to exist.  See nsMultiMixedConv::FindToken.
+size_t MultipartImageResourceParser::findBoundary(const Vector<char>& data, Vector<char>* boundary)
+{
+    auto it = std::search(data.data(), data.data() + data.size(), boundary->data(), boundary->data() + boundary->size());
+    if (it == data.data() + data.size())
+        return kNotFound;
+
+    size_t boundaryPosition = it - data.data();
+    // Back up over -- for backwards compat
+    // TODO(tc): Don't we only want to do this once?  Gecko code doesn't
+    // seem to care.
+    if (boundaryPosition >= 2) {
+        if (data[boundaryPosition - 1] == '-' && data[boundaryPosition - 2] == '-') {
+            boundaryPosition -= 2;
+            Vector<char> v(2, '-');
+            v.appendVector(*boundary);
+            *boundary = v;
+        }
+    }
+    return boundaryPosition;
+}
+
+DEFINE_TRACE(MultipartImageResourceParser)
+{
+    visitor->trace(m_client);
+}
+
+} // namespace blink