Skip to content

Commit

Permalink
Better computation of getArticleCount.
Browse files Browse the repository at this point in the history
It corresponds to what is made on libkiwix.
  • Loading branch information
mgautierfr committed Nov 2, 2022
1 parent 93d632c commit 7a40cce
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
4 changes: 1 addition & 3 deletions include/zim/archive.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ namespace zim
* The definition of "article" depends of the zim archive.
* On recent archives, this correspond to all entries marked as "FRONT_ARTICLE"
* at creaton time.
* On old archives, this correspond to all entries in 'A' namespace.
* Few archives may have been created without namespace but also without specific
* article listing. In this case, articles are all user entries.
* On old archives, this corresponds to all "text/html*" entries.
*
* @return the number of articles in the archive.
*/
Expand Down
14 changes: 9 additions & 5 deletions src/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,19 @@ namespace zim
{
if (m_impl->hasFrontArticlesIndex()) {
return m_impl->getFrontEntryCount().v;
} else if (m_impl->hasNewNamespaceScheme()) {
return m_impl->getNamespaceEntryCount('C').v;
} else {
return m_impl->getNamespaceEntryCount('A').v;
try {
return countMimeType(
getMetadata("Counter"),
[](const std::string& mimetype) { return mimetype.find("text/html") == 0; }
);
} catch(const EntryNotFound& e) {
const char articleNs = m_impl->hasNewNamespaceScheme() ? 'C' : 'A';
return m_impl->getNamespaceEntryCount(articleNs).v;
}
}
}



entry_index_type Archive::getMediaCount() const
{
return countMimeType(
Expand Down
6 changes: 3 additions & 3 deletions test/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,9 @@ TEST(ZimArchive, articleNumber)
// Name mediaCount, withns nons
// {articles, userEntries, allEntries}, {articles, userEntries, allEntries}
{"small.zim", 1, { 1, 17, 17, }, { 1, 2, 16 }},
{"wikibooks_be_all_nopic_2017-02.zim", 34, { 70, 118, 118, }, { 66, 109, 123 }},
{"wikibooks_be_all_nopic_2017-02_splitted.zim", 34, { 70, 118, 118, }, { 66, 109, 123 }},
{"wikipedia_en_climate_change_nopic_2020-01.zim", 333, { 7253, 7646, 7646, }, { 1837, 7633, 7649 }}
{"wikibooks_be_all_nopic_2017-02.zim", 34, { 66, 118, 118, }, { 66, 109, 123 }},
{"wikibooks_be_all_nopic_2017-02_splitted.zim", 34, { 66, 118, 118, }, { 66, 109, 123 }},
{"wikipedia_en_climate_change_nopic_2020-01.zim", 333, { 1837, 7646, 7646, }, { 1837, 7633, 7649 }}
};
// "withns" zim files have no notion of user entries, so EntryCount == allEntryCount.
// for small.zim, there is always 1 article, whatever the article is in 'A' namespace or in specific index.
Expand Down

0 comments on commit 7a40cce

Please sign in to comment.