Revert of Improve linearized pdf load/show time. (patchset #18 id:340…

…001 of https://codereview.chromium.org/2349753003/ ) Reason for revert: https://build.chromium.org/p/chromium/builders/Win%20x64/builds/5423/steps/compile/logs/stdio FAILED: obj/pdf/pdf_unittests/document_loader_unittest.obj pdf\document_loader_unittest.cc(631): error C2131: expression did not evaluate to a constant pdf\document_loader_unittest.cc(631): note: failure was caused by call of undefined function or one not declared 'constexpr' pdf\document_loader_unittest.cc(631): note: see usage of 'chrome_pdf::DocumentLoader::default_request_size' Original issue's description: > Improve linearized pdf load/show time. > Reduce Pdf Plugin's count of reconnects. > Add tests for PDFPlugin DocumentLoader. > > DocumentLoader was splitted into separate components, and missing tests was added for them. > > The main ideas in this CL are: > > 1) Do not reset browser initiated connection at start (includes case when we can use range requests), if we request data near current downloading position. > 2) Request as much data as we can on each request, and continue loading data using current range request. (like tape rewind) > 3) Isolate RangeRequest logic into DocumentLoader. Method OnPendingRequestComplete is called, when we receive requested data (main connection, or Range connection). (like tape playing without rewing). > 4) Fill this logic by tests. > > Example URL: > http://www.major-landrover.ru/upload/attachments/f/9/f96aab07dab04ae89c8a509ec1ef2b31.pdf > Comparison of changes: > https://drive.google.com/file/d/0BzWfMBOuik2QNGg0SG93Y3lpUlE/view?usp=sharing > > Committed: https://crrev.com/7fd7423cdee0dba84faf480d10dd66dcb57110d9 > Cr-Commit-Position: refs/heads/master@{#427752} TBR=jochen@chromium.org,raymes@chromium.org,spelchat@chromium.org,rsesek@chromium.org,art-snake@yandex-team.ru # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true Review-Url: https://codereview.chromium.org/2458493002 Cr-Commit-Position: refs/heads/master@{#427772}
FairyWorld · Oct 26, 2016 · eb1b563 · eb1b563
1 parent c87e52b
commit eb1b563
Show file tree

Hide file tree

Showing 24 changed files with 742 additions and 2,985 deletions.
diff --git a/pdf/BUILD.gn b/pdf/BUILD.gn
@@ -3,7 +3,6 @@
 # found in the LICENSE file.
 
 import("//build/config/features.gni")
-import("//testing/test.gni")
 import("//third_party/pdfium/pdfium.gni")
 
 assert(enable_pdf)
@@ -18,10 +17,10 @@ static_library("pdf") {
     "//ppapi/cpp:objects",
     "//ppapi/cpp/private:internal_module",
     "//ui/base",
-    "//ui/gfx/range",
   ]
 
   sources = [
+    "chunk_stream.cc",
     "chunk_stream.h",
     "document_loader.cc",
     "document_loader.h",
@@ -38,13 +37,6 @@ static_library("pdf") {
     "pdf_engine.h",
     "preview_mode_client.cc",
     "preview_mode_client.h",
-    "range_set.cc",
-    "range_set.h",
-    "timer.cc",
-    "timer.h",
-    "url_loader_wrapper.h",
-    "url_loader_wrapper_impl.cc",
-    "url_loader_wrapper_impl.h",
   ]
 
   # TODO(jschuh): crbug.com/167187 fix size_t to int truncations.
@@ -78,23 +70,3 @@ static_library("pdf") {
     defines += [ "PDF_ENABLE_XFA" ]
   }
 }
-
-test("pdf_unittests") {
-  sources = [
-    "chunk_stream_unittest.cc",
-    "document_loader_unittest.cc",
-    "range_set_unittest.cc",
-    "run_all_unittests.cc",
-  ]
-
-  deps = [
-    ":pdf",
-    "//base",
-    "//base/test:test_support",
-    "//ppapi/c",
-    "//ppapi/cpp",
-    "//testing/gmock",
-    "//testing/gtest",
-    "//ui/gfx/range",
-  ]
-}
diff --git a/pdf/DEPS b/pdf/DEPS
@@ -4,6 +4,5 @@ include_rules = [
   "+ppapi",
   "+ui/base/window_open_disposition.h",
   "+ui/events/keycodes/keyboard_codes.h",
-  "+ui/gfx/range/range.h",
   "+v8/include/v8.h"
 ]
diff --git a/pdf/chunk_stream.cc b/pdf/chunk_stream.cc
@@ -0,0 +1,175 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "pdf/chunk_stream.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#define __STDC_LIMIT_MACROS
+#ifdef _WIN32
+#include <limits.h>
+#else
+#include <stdint.h>
+#endif
+
+#include <algorithm>
+
+namespace chrome_pdf {
+
+ChunkStream::ChunkStream() : stream_size_(0) {
+}
+
+ChunkStream::~ChunkStream() {
+}
+
+void ChunkStream::Clear() {
+  chunks_.clear();
+  data_.clear();
+  stream_size_ = 0;
+}
+
+void ChunkStream::Preallocate(size_t stream_size) {
+  data_.reserve(stream_size);
+  stream_size_ = stream_size;
+}
+
+size_t ChunkStream::GetSize() const {
+  return data_.size();
+}
+
+bool ChunkStream::WriteData(size_t offset, void* buffer, size_t size) {
+  if (SIZE_MAX - size < offset)
+    return false;
+
+  if (data_.size() < offset + size)
+    data_.resize(offset + size);
+
+  memcpy(&data_[offset], buffer, size);
+
+  if (chunks_.empty()) {
+    chunks_[offset] = size;
+    return true;
+  }
+
+  std::map<size_t, size_t>::iterator start = chunks_.upper_bound(offset);
+  if (start != chunks_.begin())
+    --start;  // start now points to the key equal or lower than offset.
+  if (start->first + start->second < offset)
+    ++start;  // start element is entirely before current chunk, skip it.
+
+  std::map<size_t, size_t>::iterator end = chunks_.upper_bound(offset + size);
+  if (start == end) {  // No chunks to merge.
+    chunks_[offset] = size;
+    return true;
+  }
+
+  --end;
+
+  size_t new_offset = std::min<size_t>(start->first, offset);
+  size_t new_size =
+      std::max<size_t>(end->first + end->second, offset + size) - new_offset;
+
+  chunks_.erase(start, ++end);
+
+  chunks_[new_offset] = new_size;
+
+  return true;
+}
+
+bool ChunkStream::ReadData(size_t offset, size_t size, void* buffer) const {
+  if (!IsRangeAvailable(offset, size))
+    return false;
+
+  memcpy(buffer, &data_[offset], size);
+  return true;
+}
+
+bool ChunkStream::GetMissedRanges(
+    size_t offset, size_t size,
+    std::vector<std::pair<size_t, size_t> >* ranges) const {
+  if (IsRangeAvailable(offset, size))
+    return false;
+
+  ranges->clear();
+  if (chunks_.empty()) {
+    ranges->push_back(std::pair<size_t, size_t>(offset, size));
+    return true;
+  }
+
+  std::map<size_t, size_t>::const_iterator start = chunks_.upper_bound(offset);
+  if (start != chunks_.begin())
+    --start;  // start now points to the key equal or lower than offset.
+  if (start->first + start->second < offset)
+    ++start;  // start element is entirely before current chunk, skip it.
+
+  std::map<size_t, size_t>::const_iterator end =
+      chunks_.upper_bound(offset + size);
+  if (start == end) {  // No data in the current range available.
+    ranges->push_back(std::pair<size_t, size_t>(offset, size));
+    return true;
+  }
+
+  size_t cur_offset = offset;
+  std::map<size_t, size_t>::const_iterator it;
+  for (it = start; it != end; ++it) {
+    if (cur_offset < it->first) {
+      size_t new_size = it->first - cur_offset;
+      ranges->push_back(std::pair<size_t, size_t>(cur_offset, new_size));
+      cur_offset = it->first + it->second;
+    } else if (cur_offset < it->first + it->second) {
+      cur_offset = it->first + it->second;
+    }
+  }
+
+  // Add last chunk.
+  if (cur_offset < offset + size)
+    ranges->push_back(std::pair<size_t, size_t>(cur_offset,
+        offset + size - cur_offset));
+
+  return true;
+}
+
+bool ChunkStream::IsRangeAvailable(size_t offset, size_t size) const {
+  if (chunks_.empty())
+    return false;
+
+  if (SIZE_MAX - size < offset)
+    return false;
+
+  std::map<size_t, size_t>::const_iterator it = chunks_.upper_bound(offset);
+  if (it == chunks_.begin())
+    return false;  // No chunks includes offset byte.
+
+  --it;  // Now it starts equal or before offset.
+  return (it->first + it->second) >= (offset + size);
+}
+
+size_t ChunkStream::GetFirstMissingByte() const {
+  if (chunks_.empty())
+    return 0;
+  std::map<size_t, size_t>::const_iterator begin = chunks_.begin();
+  return begin->first > 0 ? 0 : begin->second;
+}
+
+size_t ChunkStream::GetFirstMissingByteInInterval(size_t offset) const {
+  if (chunks_.empty())
+    return 0;
+  std::map<size_t, size_t>::const_iterator it = chunks_.upper_bound(offset);
+  if (it == chunks_.begin())
+    return 0;
+  --it;
+  return it->first + it->second;
+}
+
+size_t ChunkStream::GetLastMissingByteInInterval(size_t offset) const {
+  if (chunks_.empty())
+    return stream_size_ - 1;
+  std::map<size_t, size_t>::const_iterator it = chunks_.upper_bound(offset);
+  if (it == chunks_.end())
+    return stream_size_ - 1;
+  return it->first - 1;
+}
+
+}  // namespace chrome_pdf
diff --git a/pdf/chunk_stream.h b/pdf/chunk_stream.h
@@ -6,103 +6,46 @@
 #define PDF_CHUNK_STREAM_H_
 
 #include <stddef.h>
-#include <string.h>
 
-#include <algorithm>
-#include <array>
-#include <memory>
+#include <map>
+#include <utility>
 #include <vector>
 
-#include "pdf/range_set.h"
-
 namespace chrome_pdf {
 
 // This class collects a chunks of data into one data stream. Client can check
 // if data in certain range is available, and get missing chunks of data.
-template <uint32_t N>
 class ChunkStream {
  public:
-  static constexpr uint32_t kChunkSize = N;
-  using ChunkData = typename std::array<unsigned char, N>;
-
-  ChunkStream() {}
-  ~ChunkStream() {}
-
-  void SetChunkData(uint32_t chunk_index, std::unique_ptr<ChunkData> data) {
-    if (!data)
-      return;
-    if (chunk_index >= data_.size()) {
-      data_.resize(chunk_index + 1);
-    }
-    if (!data_[chunk_index]) {
-      ++filled_chunks_count_;
-    }
-    data_[chunk_index] = std::move(data);
-    filled_chunks_.Union(gfx::Range(chunk_index, chunk_index + 1));
-  }
-
-  bool ReadData(const gfx::Range& range, void* buffer) const {
-    if (!IsRangeAvailable(range)) {
-      return false;
-    }
-    unsigned char* data_buffer = static_cast<unsigned char*>(buffer);
-    uint32_t start = range.start();
-    while (start != range.end()) {
-      const uint32_t chunk_index = GetChunkIndex(start);
-      const uint32_t chunk_start = start % kChunkSize;
-      const uint32_t len =
-          std::min(kChunkSize - chunk_start, range.end() - start);
-      memcpy(data_buffer, data_[chunk_index]->data() + chunk_start, len);
-      data_buffer += len;
-      start += len;
-    }
-    return true;
-  }
+  ChunkStream();
+  ~ChunkStream();
 
-  uint32_t GetChunkIndex(uint32_t offset) const { return offset / kChunkSize; }
+  void Clear();
 
-  gfx::Range GetChunksRange(uint32_t offset, uint32_t size) const {
-    return gfx::Range(GetChunkIndex(offset),
-                      GetChunkIndex(offset + size + kChunkSize - 1));
-  }
+  void Preallocate(size_t stream_size);
+  size_t GetSize() const;
 
-  bool IsRangeAvailable(const gfx::Range& range) const {
-    if (!range.IsValid() || range.is_reversed() ||
-        (eof_pos_ > 0 && eof_pos_ < range.end()))
-      return false;
-    if (range.is_empty())
-      return true;
-    const gfx::Range chunks_range(GetChunkIndex(range.start()),
-                                  GetChunkIndex(range.end() + kChunkSize - 1));
-    return filled_chunks_.Contains(chunks_range);
-  }
+  bool WriteData(size_t offset, void* buffer, size_t size);
+  bool ReadData(size_t offset, size_t size, void* buffer) const;
 
-  void set_eof_pos(uint32_t eof_pos) { eof_pos_ = eof_pos; }
-  uint32_t eof_pos() const { return eof_pos_; }
+  // Returns vector of pairs where first is an offset, second is a size.
+  bool GetMissedRanges(size_t offset, size_t size,
+      std::vector<std::pair<size_t, size_t> >* ranges) const;
+  bool IsRangeAvailable(size_t offset, size_t size) const;
+  size_t GetFirstMissingByte() const;
 
-  const RangeSet& filled_chunks() const { return filled_chunks_; }
+  // Finds the first byte of the missing byte interval that offset belongs to.
+  size_t GetFirstMissingByteInInterval(size_t offset) const;
+  // Returns the last byte of the missing byte interval that offset belongs to.
+  size_t GetLastMissingByteInInterval(size_t offset) const;
 
-  bool IsComplete() const {
-    return eof_pos_ > 0 && IsRangeAvailable(gfx::Range(0, eof_pos_));
-  }
-
-  void Clear() {
-    data_.clear();
-    eof_pos_ = 0;
-    filled_chunks_.Clear();
-    filled_chunks_count_ = 0;
-  }
+ private:
+  std::vector<unsigned char> data_;
 
-  uint32_t filled_chunks_count() const { return filled_chunks_count_; }
-  uint32_t total_chunks_count() const {
-    return GetChunkIndex(eof_pos_ + kChunkSize - 1);
-  }
+  // Pair, first - begining of the chunk, second - size of the chunk.
+  std::map<size_t, size_t> chunks_;
 
- private:
-  std::vector<std::unique_ptr<ChunkData>> data_;
-  uint32_t eof_pos_ = 0;
-  RangeSet filled_chunks_;
-  uint32_t filled_chunks_count_ = 0;
+  size_t stream_size_;
 };
 
 };  // namespace chrome_pdf