Skip to content

Commit

Permalink
Merge pull request #880 from openzim/split_file_open
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Apr 30, 2024
2 parents 51b8a4d + 48e64c7 commit 1e42f56
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 23 deletions.
53 changes: 36 additions & 17 deletions src/file_compound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "file_compound.h"

#include <errno.h>
#include <stdexcept>
#include <string.h>
#include <sys/stat.h>
#include <zim/tools.h>
Expand All @@ -40,30 +41,48 @@ void FileCompound::addPart(FilePart* fpart)
_fsize += fpart->size();
}

std::shared_ptr<FileCompound> FileCompound::openSinglePieceOrSplitZimFile(std::string filename) {
std::shared_ptr<FileCompound> fileCompound;
if (filename.size() > 6 && filename.substr(filename.size()-6) == ".zimaa") {
filename.resize(filename.size()-2);
} else {
try {
fileCompound = std::make_shared<FileCompound>(filename);
} catch(...) { }
}

if ( !fileCompound ) {
fileCompound = std::make_shared<FileCompound>(filename, FileCompound::MultiPartToken::Multi);
}
return fileCompound;
}

FileCompound::FileCompound(const std::string& filename):
_filename(filename),
_fsize(0)
{
addPart(new FilePart(filename));
}

FileCompound::FileCompound(const std::string& base_filename, MultiPartToken _token):
_filename(base_filename),
_fsize(0)
{
try {
addPart(new FilePart(filename));
} catch(...) {
int errnoSave = errno;
_fsize = zsize_t(0);
try {
for (char ch0 = 'a'; ch0 <= 'z'; ++ch0)
for (char ch0 = 'a'; ch0 <= 'z'; ++ch0)
{
const std::string fname0 = base_filename + ch0;
for (char ch1 = 'a'; ch1 <= 'z'; ++ch1)
{
const std::string fname0 = filename + ch0;
for (char ch1 = 'a'; ch1 <= 'z'; ++ch1)
{
addPart(new FilePart(fname0 + ch1));
}
addPart(new FilePart(fname0 + ch1));
}
} catch (...) { }

if (empty())
throw std::runtime_error(Formatter()
<< "error " << errnoSave << " opening file \""
<< filename << "\"");
}
} catch (std::runtime_error& e) {
// This catch acts as a break for the double loop.
}
if (empty()) {
// We haven't found any part
throw std::runtime_error(Formatter() << "Error opening as a split file: " << base_filename);
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/file_compound.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "zim_types.h"
#include "debug.h"
#include <map>
#include <memory>
#include <vector>

namespace zim {
Expand Down Expand Up @@ -53,9 +54,12 @@ class FileCompound : private std::map<Range, FilePart*, less_range> {
public: // types
typedef const_iterator PartIterator;
typedef std::pair<PartIterator, PartIterator> PartRange;
enum class MultiPartToken { Multi };

public: // functions
static std::shared_ptr<FileCompound> openSinglePieceOrSplitZimFile(std::string filename);
explicit FileCompound(const std::string& filename);
explicit FileCompound(const std::string& filename, MultiPartToken token);

#ifndef _WIN32
explicit FileCompound(int fd);
Expand Down
15 changes: 10 additions & 5 deletions src/fileimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class Grouping
// FileImpl
//
FileImpl::FileImpl(const std::string& fname)
: FileImpl(std::make_shared<FileCompound>(fname))
: FileImpl(FileCompound::openSinglePieceOrSplitZimFile(fname))
{}

#ifndef _WIN32
Expand Down Expand Up @@ -207,6 +207,15 @@ class Grouping
throw ZimFileFormatError("error reading zim-file header.");
}

// This can happen for several reasons:
// - Zim file is corrupted (corrupted header)
// - Zim file is too small (ongoing download, truncated file...)
// - Zim file is embedded at beginning of another file (and we try to open the file as a zim file)
// If open through a FdInput, size should be set in FdInput.
if (header.hasChecksum() && (header.getChecksumPos() + 16) != size_type(zimReader->size())) {
throw ZimFileFormatError("Zim file(s) is of bad size or corrupted.");
}

auto pathPtrReader = sectionSubReader(*zimReader,
"Dirent pointer table",
offset_t(header.getPathPtrPos()),
Expand Down Expand Up @@ -297,10 +306,6 @@ class Grouping
throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
}
}

if (header.hasChecksum() && header.getChecksumPos() != (getFilesize().v-16) ) {
throw ZimFileFormatError("Checksum position is not valid");
}
}

offset_type FileImpl::getMimeListEndUpperLimit() const
Expand Down
27 changes: 26 additions & 1 deletion test/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,31 @@ TEST(ZimArchive, openRealZimArchive)
}
}

TEST(ZimArchive, openSplitZimArchive)
{
const char* fname = "wikibooks_be_all_nopic_2017-02_splitted.zim";

for (auto& testfile: getDataFilePath(fname)) {
const TestContext ctx{ {"path", testfile.path+"aa" } };
std::unique_ptr<zim::Archive> archive;
EXPECT_NO_THROW( archive.reset(new zim::Archive(testfile.path+"aa")) ) << ctx;
if ( archive ) {
EXPECT_TRUE( archive->check() ) << ctx;
}
}
}

TEST(ZimArchive, openDontFallbackOnNonSplitZimArchive)
{
const char* fname = "wikibooks_be_all_nopic_2017-02.zim";

for (auto& testfile: getDataFilePath(fname)) {
const TestContext ctx{ {"path", testfile.path+"aa" } };
std::unique_ptr<zim::Archive> archive;
EXPECT_THROW( archive.reset(new zim::Archive(testfile.path+"aa")), std::runtime_error) << ctx;
}
}

TEST(ZimArchive, randomEntry)
{
const char* const zimfiles[] = {
Expand Down Expand Up @@ -434,7 +459,7 @@ TEST(ZimArchive, validate)

TEST_BROKEN_ZIM_NAME(
"invalid.invalid_checksumpos.zim",
"Checksum position is not valid\n"
"Zim file(s) is of bad size or corrupted.\n"
);

TEST_BROKEN_ZIM_NAME(
Expand Down

0 comments on commit 1e42f56

Please sign in to comment.