Skip to content

Commit

Permalink
Improve linearized pdf load/show time.
Browse files Browse the repository at this point in the history
Reduce Pdf Plugin's count of reconnects.
Add tests for PDFPlugin DocumentLoader.

DocumentLoader was splitted into separate components, and missing tests was added for them.

The main ideas in this CL are:

1) Do not reset browser initiated connection at start (includes case when we can use range requests), if we request data near current downloading position.
2) Request as much data as we can on each request, and continue loading data using current range request. (like tape rewind)
3) Isolate RangeRequest logic into DocumentLoader.  Method OnPendingRequestComplete is called, when we receive requested data (main connection, or Range connection). (like tape playing without rewing).
4) Fill this logic by tests.

Example URL:
http://www.major-landrover.ru/upload/attachments/f/9/f96aab07dab04ae89c8a509ec1ef2b31.pdf
Comparison of changes:
https://drive.google.com/file/d/0BzWfMBOuik2QNGg0SG93Y3lpUlE/view?usp=sharing

Review-Url: https://codereview.chromium.org/2349753003
Cr-Commit-Position: refs/heads/master@{#427752}
  • Loading branch information
art-snake authored and Commit bot committed Oct 26, 2016
1 parent 46a480d commit 7fd7423
Show file tree
Hide file tree
Showing 24 changed files with 2,984 additions and 742 deletions.
30 changes: 29 additions & 1 deletion pdf/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# found in the LICENSE file.

import("//build/config/features.gni")
import("//testing/test.gni")
import("//third_party/pdfium/pdfium.gni")

assert(enable_pdf)
Expand All @@ -17,10 +18,10 @@ static_library("pdf") {
"//ppapi/cpp:objects",
"//ppapi/cpp/private:internal_module",
"//ui/base",
"//ui/gfx/range",
]

sources = [
"chunk_stream.cc",
"chunk_stream.h",
"document_loader.cc",
"document_loader.h",
Expand All @@ -37,6 +38,13 @@ static_library("pdf") {
"pdf_engine.h",
"preview_mode_client.cc",
"preview_mode_client.h",
"range_set.cc",
"range_set.h",
"timer.cc",
"timer.h",
"url_loader_wrapper.h",
"url_loader_wrapper_impl.cc",
"url_loader_wrapper_impl.h",
]

# TODO(jschuh): crbug.com/167187 fix size_t to int truncations.
Expand Down Expand Up @@ -70,3 +78,23 @@ static_library("pdf") {
defines += [ "PDF_ENABLE_XFA" ]
}
}

test("pdf_unittests") {
sources = [
"chunk_stream_unittest.cc",
"document_loader_unittest.cc",
"range_set_unittest.cc",
"run_all_unittests.cc",
]

deps = [
":pdf",
"//base",
"//base/test:test_support",
"//ppapi/c",
"//ppapi/cpp",
"//testing/gmock",
"//testing/gtest",
"//ui/gfx/range",
]
}
1 change: 1 addition & 0 deletions pdf/DEPS
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ include_rules = [
"+ppapi",
"+ui/base/window_open_disposition.h",
"+ui/events/keycodes/keyboard_codes.h",
"+ui/gfx/range/range.h",
"+v8/include/v8.h"
]
175 changes: 0 additions & 175 deletions pdf/chunk_stream.cc

This file was deleted.

103 changes: 80 additions & 23 deletions pdf/chunk_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,103 @@
#define PDF_CHUNK_STREAM_H_

#include <stddef.h>
#include <string.h>

#include <map>
#include <utility>
#include <algorithm>
#include <array>
#include <memory>
#include <vector>

#include "pdf/range_set.h"

namespace chrome_pdf {

// This class collects a chunks of data into one data stream. Client can check
// if data in certain range is available, and get missing chunks of data.
template <uint32_t N>
class ChunkStream {
public:
ChunkStream();
~ChunkStream();
static constexpr uint32_t kChunkSize = N;
using ChunkData = typename std::array<unsigned char, N>;

void Clear();
ChunkStream() {}
~ChunkStream() {}

void Preallocate(size_t stream_size);
size_t GetSize() const;
void SetChunkData(uint32_t chunk_index, std::unique_ptr<ChunkData> data) {
if (!data)
return;
if (chunk_index >= data_.size()) {
data_.resize(chunk_index + 1);
}
if (!data_[chunk_index]) {
++filled_chunks_count_;
}
data_[chunk_index] = std::move(data);
filled_chunks_.Union(gfx::Range(chunk_index, chunk_index + 1));
}

bool WriteData(size_t offset, void* buffer, size_t size);
bool ReadData(size_t offset, size_t size, void* buffer) const;
bool ReadData(const gfx::Range& range, void* buffer) const {
if (!IsRangeAvailable(range)) {
return false;
}
unsigned char* data_buffer = static_cast<unsigned char*>(buffer);
uint32_t start = range.start();
while (start != range.end()) {
const uint32_t chunk_index = GetChunkIndex(start);
const uint32_t chunk_start = start % kChunkSize;
const uint32_t len =
std::min(kChunkSize - chunk_start, range.end() - start);
memcpy(data_buffer, data_[chunk_index]->data() + chunk_start, len);
data_buffer += len;
start += len;
}
return true;
}

// Returns vector of pairs where first is an offset, second is a size.
bool GetMissedRanges(size_t offset, size_t size,
std::vector<std::pair<size_t, size_t> >* ranges) const;
bool IsRangeAvailable(size_t offset, size_t size) const;
size_t GetFirstMissingByte() const;
uint32_t GetChunkIndex(uint32_t offset) const { return offset / kChunkSize; }

// Finds the first byte of the missing byte interval that offset belongs to.
size_t GetFirstMissingByteInInterval(size_t offset) const;
// Returns the last byte of the missing byte interval that offset belongs to.
size_t GetLastMissingByteInInterval(size_t offset) const;
gfx::Range GetChunksRange(uint32_t offset, uint32_t size) const {
return gfx::Range(GetChunkIndex(offset),
GetChunkIndex(offset + size + kChunkSize - 1));
}

private:
std::vector<unsigned char> data_;
bool IsRangeAvailable(const gfx::Range& range) const {
if (!range.IsValid() || range.is_reversed() ||
(eof_pos_ > 0 && eof_pos_ < range.end()))
return false;
if (range.is_empty())
return true;
const gfx::Range chunks_range(GetChunkIndex(range.start()),
GetChunkIndex(range.end() + kChunkSize - 1));
return filled_chunks_.Contains(chunks_range);
}

void set_eof_pos(uint32_t eof_pos) { eof_pos_ = eof_pos; }
uint32_t eof_pos() const { return eof_pos_; }

const RangeSet& filled_chunks() const { return filled_chunks_; }

// Pair, first - begining of the chunk, second - size of the chunk.
std::map<size_t, size_t> chunks_;
bool IsComplete() const {
return eof_pos_ > 0 && IsRangeAvailable(gfx::Range(0, eof_pos_));
}

size_t stream_size_;
void Clear() {
data_.clear();
eof_pos_ = 0;
filled_chunks_.Clear();
filled_chunks_count_ = 0;
}

uint32_t filled_chunks_count() const { return filled_chunks_count_; }
uint32_t total_chunks_count() const {
return GetChunkIndex(eof_pos_ + kChunkSize - 1);
}

private:
std::vector<std::unique_ptr<ChunkData>> data_;
uint32_t eof_pos_ = 0;
RangeSet filled_chunks_;
uint32_t filled_chunks_count_ = 0;
};

}; // namespace chrome_pdf
Expand Down
Loading

0 comments on commit 7fd7423

Please sign in to comment.