diff --git a/meson.build b/meson.build index b12b485b..cf7e3c8f 100644 --- a/meson.build +++ b/meson.build @@ -30,6 +30,7 @@ else private_conf.set('ENABLE_USE_MMAP', get_option('USE_MMAP')) endif private_conf.set('ENABLE_USE_BUFFER_HEADER', get_option('USE_BUFFER_HEADER')) +private_conf.set('ENABLE_XAPIAN_FULLER', get_option('with_xapian_fuller')) static_linkage = get_option('static-linkage') static_linkage = static_linkage or get_option('default_library')=='static' diff --git a/meson_options.txt b/meson_options.txt index 253c7739..e23118f6 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -22,5 +22,7 @@ option('tests', type : 'boolean', value : true, description : 'Build the tests.') option('with_xapian', type : 'boolean', value: true, description: 'Build libzim with xapian support') +option('with_xapian_fuller', type: 'boolean', value: true, + description: 'Create xapian archive using "FULLER" compaction.\nThis is a workaround for a compilation issue on Windows. This will be removed soon') option('test_data_dir', type : 'string', value: '', description: 'Where the test data are. If not set, meson will use a internal directory in the build dir. If you want to download the data in the specified directory you can use `meson download_test_data`. As a special value, you can pass `none` to deactivate test using external test data.') diff --git a/src/config.h.in b/src/config.h.in index b3018333..35e3115e 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -19,6 +19,8 @@ #mesondefine ENABLE_XAPIAN +#mesondefine ENABLE_XAPIAN_FULLER + #mesondefine ENABLE_USE_MMAP #mesondefine ENABLE_USE_BUFFER_HEADER diff --git a/src/tools.cpp b/src/tools.cpp index 17ef3369..8603bade 100644 --- a/src/tools.cpp +++ b/src/tools.cpp @@ -67,14 +67,14 @@ uint32_t zim::countWords(const std::string& text) unsigned int i = 0; // Find first word - while ( i < length && std::isspace(text[i]) ) i++; + while ( i < length && std::isspace(static_cast(text[i])) ) i++; while ( i < length ) { // Find end of word - while ( i < length && !std::isspace(text[i]) ) i++; + while ( i < length && !std::isspace(static_cast(text[i])) ) i++; numWords++; // Find start of next word - while ( i < length && std::isspace(text[i]) ) i++; + while ( i < length && std::isspace(static_cast(text[i])) ) i++; } return numWords; } diff --git a/src/writer/xapianIndexer.cpp b/src/writer/xapianIndexer.cpp index 322b4550..f3492448 100644 --- a/src/writer/xapianIndexer.cpp +++ b/src/writer/xapianIndexer.cpp @@ -174,7 +174,12 @@ void XapianIndexer::indexTitle(const std::string& path, const std::string& title void XapianIndexer::indexingPostlude() { this->writableDatabase.commit(); - this->writableDatabase.compact(indexPath, Xapian::DBCOMPACT_SINGLE_FILE|Xapian::Compactor::FULLER); +#if defined ENABLE_XAPIAN_FULLER + auto flags = Xapian::DBCOMPACT_SINGLE_FILE|Xapian::Compactor::FULLER; +#else + auto flags = Xapian::DBCOMPACT_SINGLE_FILE; +#endif + this->writableDatabase.compact(indexPath, flags); this->writableDatabase.close(); } diff --git a/test/tooltesting.cpp b/test/tooltesting.cpp index aa46463b..fe05a1b4 100644 --- a/test/tooltesting.cpp +++ b/test/tooltesting.cpp @@ -83,7 +83,10 @@ namespace { auto accentedString(ss.str()); // Check our input data (that we have a char in the middle of a batch boundary) // Indexing is made on u16 - icu::UnicodeString ustring(accentedString.c_str()); + // `zim::removeAccents` calls `ucnv_setDefaultName` before creating the UnicodeString + // so it will be converted using the right encoding ("utf8"). + // But we don't so we need to be explicit on the encoding here. + icu::UnicodeString ustring(accentedString.c_str(), "utf8"); // Test input data. // "bépo" is 4 chars