Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly try to open split files. #880

Merged
merged 2 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 36 additions & 17 deletions src/file_compound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "file_compound.h"

#include <errno.h>
#include <stdexcept>
#include <string.h>
#include <sys/stat.h>
#include <zim/tools.h>
Expand All @@ -40,30 +41,48 @@ void FileCompound::addPart(FilePart* fpart)
_fsize += fpart->size();
}

std::shared_ptr<FileCompound> FileCompound::openSinglePieceOrSplitZimFile(std::string filename) {
std::shared_ptr<FileCompound> fileCompound;
if (filename.size() > 6 && filename.substr(filename.size()-6) == ".zimaa") {
filename.resize(filename.size()-2);
} else {
try {
fileCompound = std::make_shared<FileCompound>(filename);
} catch(...) { }
}

if ( !fileCompound ) {
fileCompound = std::make_shared<FileCompound>(filename, FileCompound::MultiPartToken::Multi);
}
return fileCompound;
}

FileCompound::FileCompound(const std::string& filename):
_filename(filename),
_fsize(0)
{
addPart(new FilePart(filename));
}

FileCompound::FileCompound(const std::string& base_filename, MultiPartToken _token):
_filename(base_filename),
_fsize(0)
{
try {
addPart(new FilePart(filename));
} catch(...) {
int errnoSave = errno;
_fsize = zsize_t(0);
try {
for (char ch0 = 'a'; ch0 <= 'z'; ++ch0)
for (char ch0 = 'a'; ch0 <= 'z'; ++ch0)
{
const std::string fname0 = base_filename + ch0;
for (char ch1 = 'a'; ch1 <= 'z'; ++ch1)
{
const std::string fname0 = filename + ch0;
for (char ch1 = 'a'; ch1 <= 'z'; ++ch1)
{
addPart(new FilePart(fname0 + ch1));
}
addPart(new FilePart(fname0 + ch1));
}
} catch (...) { }

if (empty())
throw std::runtime_error(Formatter()
<< "error " << errnoSave << " opening file \""
<< filename << "\"");
}
} catch (std::runtime_error& e) {
// This catch acts as a break for the double loop.
}
if (empty()) {
// We haven't found any part
throw std::runtime_error(Formatter() << "Error opening as a split file: " << base_filename);
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/file_compound.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "zim_types.h"
#include "debug.h"
#include <map>
#include <memory>
#include <vector>

namespace zim {
Expand Down Expand Up @@ -53,9 +54,12 @@ class FileCompound : private std::map<Range, FilePart*, less_range> {
public: // types
typedef const_iterator PartIterator;
typedef std::pair<PartIterator, PartIterator> PartRange;
enum class MultiPartToken { Multi };

public: // functions
static std::shared_ptr<FileCompound> openSinglePieceOrSplitZimFile(std::string filename);
explicit FileCompound(const std::string& filename);
explicit FileCompound(const std::string& filename, MultiPartToken token);

#ifndef _WIN32
explicit FileCompound(int fd);
Expand Down
15 changes: 10 additions & 5 deletions src/fileimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class Grouping
// FileImpl
//
FileImpl::FileImpl(const std::string& fname)
: FileImpl(std::make_shared<FileCompound>(fname))
: FileImpl(FileCompound::openSinglePieceOrSplitZimFile(fname))
{}

#ifndef _WIN32
Expand Down Expand Up @@ -207,6 +207,15 @@ class Grouping
throw ZimFileFormatError("error reading zim-file header.");
}

// This can happen for several reasons:
// - Zim file is corrupted (corrupted header)
// - Zim file is too small (ongoing download, truncated file...)
// - Zim file is embedded at beginning of another file (and we try to open the file as a zim file)
// If open through a FdInput, size should be set in FdInput.
if (header.hasChecksum() && (header.getChecksumPos() + 16) != size_type(zimReader->size())) {
throw ZimFileFormatError("Zim file(s) is of bad size or corrupted.");
}

auto pathPtrReader = sectionSubReader(*zimReader,
"Dirent pointer table",
offset_t(header.getPathPtrPos()),
Expand Down Expand Up @@ -297,10 +306,6 @@ class Grouping
throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
}
}

if (header.hasChecksum() && header.getChecksumPos() != (getFilesize().v-16) ) {
throw ZimFileFormatError("Checksum position is not valid");
}
}

offset_type FileImpl::getMimeListEndUpperLimit() const
Expand Down
27 changes: 26 additions & 1 deletion test/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,31 @@ TEST(ZimArchive, openRealZimArchive)
}
}

TEST(ZimArchive, openSplitZimArchive)
{
const char* fname = "wikibooks_be_all_nopic_2017-02_splitted.zim";

for (auto& testfile: getDataFilePath(fname)) {
const TestContext ctx{ {"path", testfile.path+"aa" } };
std::unique_ptr<zim::Archive> archive;
EXPECT_NO_THROW( archive.reset(new zim::Archive(testfile.path+"aa")) ) << ctx;
if ( archive ) {
EXPECT_TRUE( archive->check() ) << ctx;
}
}
}

TEST(ZimArchive, openDontFallbackOnNonSplitZimArchive)
{
const char* fname = "wikibooks_be_all_nopic_2017-02.zim";

for (auto& testfile: getDataFilePath(fname)) {
const TestContext ctx{ {"path", testfile.path+"aa" } };
std::unique_ptr<zim::Archive> archive;
EXPECT_THROW( archive.reset(new zim::Archive(testfile.path+"aa")), std::runtime_error) << ctx;
}
}

TEST(ZimArchive, randomEntry)
{
const char* const zimfiles[] = {
Expand Down Expand Up @@ -434,7 +459,7 @@ TEST(ZimArchive, validate)

TEST_BROKEN_ZIM_NAME(
"invalid.invalid_checksumpos.zim",
"Checksum position is not valid\n"
"Zim file(s) is of bad size or corrupted.\n"
);

TEST_BROKEN_ZIM_NAME(
Expand Down
Loading