diff --git a/src/match.h b/src/match.h index 9d1ed99..464772a 100644 --- a/src/match.h +++ b/src/match.h @@ -55,6 +55,11 @@ struct MatchBase { // indicate higher confidence that the matches are correct. CharCount part_index_sum = 0; + // The number of bytes that are shared between the beginning of the rightmost + // path component of the match and the rightmost path component of the + // current file. + CharCount cur_file_prefix_len = 0; + // The number of path components that must be traversed between the query and // item paths. CharCount path_distance = 0; @@ -68,6 +73,7 @@ struct MatchBase { std::string debug_string() const { return str_cat("prefix_score=", prefix_score, ", word_prefix_len=", word_prefix_len, ", part_index_sum=", part_index_sum, + ", cur_file_prefix_len=", cur_file_prefix_len, ", path_distance=", path_distance, ", unmatched_len=", unmatched_len); } @@ -95,6 +101,9 @@ bool operator<(Match const& lhs, Match const& rhs) { if (lhs.part_index_sum != rhs.part_index_sum) { return lhs.part_index_sum < rhs.part_index_sum; } + if (lhs.cur_file_prefix_len != rhs.cur_file_prefix_len) { + return lhs.cur_file_prefix_len > rhs.cur_file_prefix_len; + } if (lhs.path_distance != rhs.path_distance) { return lhs.path_distance < rhs.path_distance; } diff --git a/src/matcher.cc b/src/matcher.cc index 9494161..f40f528 100644 --- a/src/matcher.cc +++ b/src/matcher.cc @@ -29,15 +29,14 @@ namespace cpsm { Matcher::Matcher(boost::string_ref const query, MatcherOpts opts) : opts_(std::move(opts)) { - decompose_utf8_string(query, query_chars_); + decompose_utf8_string(query, query_); if (opts_.is_path) { // Store the index of the first character after the rightmost path // separator in the query. (Store an index rather than an iterator to keep // Matcher copyable/moveable.) query_key_begin_index_ = - std::find(query_chars_.crbegin(), query_chars_.crend(), - path_separator()).base() - - query_chars_.cbegin(); + std::find(query_.crbegin(), query_.crend(), path_separator()).base() - + query_.cbegin(); switch (opts_.query_path_mode) { case MatcherOpts::QueryPathMode::NORMAL: require_full_part_ = false; @@ -57,17 +56,9 @@ Matcher::Matcher(boost::string_ref const query, MatcherOpts opts) // Queries are smartcased (case-sensitive only if any uppercase appears in the // query). - is_case_sensitive_ = - std::any_of(query_chars_.begin(), query_chars_.end(), is_uppercase); + is_case_sensitive_ = std::any_of(query_.begin(), query_.end(), is_uppercase); cur_file_parts_ = path_components_of(opts_.cur_file); - // Keeping the filename in cur_file_parts_ causes the path distance metric to - // favor the currently open file. While we don't want to exclude the - // currently open file from being matched, it shouldn't be favored over its - // siblings on path distance. - if (!cur_file_parts_.empty()) { - cur_file_parts_.pop_back(); - } } bool Matcher::match_base(boost::string_ref const item, MatchBase& m, @@ -87,7 +78,6 @@ bool Matcher::match_base(boost::string_ref const item, MatchBase& m, std::vector item_parts; if (opts_.is_path) { item_parts = path_components_of(item); - m.path_distance = path_distance_between(cur_file_parts_, item_parts); } else { item_parts.push_back(item); } @@ -95,15 +85,16 @@ bool Matcher::match_base(boost::string_ref const item, MatchBase& m, m.unmatched_len = item_parts.back().size(); } - if (query_chars_.empty()) { + if (query_.empty()) { + match_path(item_parts, m); return true; } // Since for paths (the common case) we prefer rightmost path components, we // scan path components right-to-left. - auto query_it = query_chars_.crbegin(); - auto const query_end = query_chars_.crend(); - auto query_key_begin = query_chars_.cend(); + auto query_it = query_.crbegin(); + auto const query_end = query_.crend(); + auto query_key_begin = query_.cend(); // Index into item_parts, counting from the right. CharCount part_index = 0; for (boost::string_ref const item_part : @@ -149,22 +140,37 @@ bool Matcher::match_base(boost::string_ref const item, MatchBase& m, return false; } + // Fill path match data. + match_path(item_parts, m); + // Now do more refined matching on the key (the rightmost path component of // the item for a path match, and just the full item otherwise). match_key(*key_chars, query_key_begin, m); return true; } +void Matcher::match_path(std::vector const& item_parts, + MatchBase& m) const { + if (!opts_.is_path) { + return; + } + m.path_distance = path_distance_between(cur_file_parts_, item_parts); + if (!cur_file_parts_.empty() && !item_parts.empty()) { + m.cur_file_prefix_len = + common_prefix(cur_file_parts_.back(), item_parts.back()); + } +} + void Matcher::match_key(std::vector const& key, std::vector::const_iterator query_key, MatchBase& m) const { - auto const query_key_end = query_chars_.cend(); + auto const query_key_end = query_.cend(); if (query_key == query_key_end) { return; } bool const query_key_at_begin = - (query_key == (query_chars_.cbegin() + query_key_begin_index_)); - // key can't be empty since [query_key, query_chars_.end()) is non-empty. + (query_key == (query_.cbegin() + query_key_begin_index_)); + // key can't be empty since [query_key, query_.end()) is non-empty. const auto is_word_prefix = [&](std::size_t const i) -> bool { if (i == 0) { return true; diff --git a/src/matcher.h b/src/matcher.h index 1ec9be5..a6b8f34 100644 --- a/src/matcher.h +++ b/src/matcher.h @@ -68,13 +68,16 @@ class Matcher { std::vector* buf, std::vector* buf2) const; + void match_path(std::vector const& item_parts, + MatchBase& m) const; + void match_key(std::vector const& key, std::vector::const_iterator query_key, MatchBase& m) const; bool match_char(char32_t item, char32_t query) const; - std::vector query_chars_; + std::vector query_; std::size_t query_key_begin_index_; MatcherOpts opts_; bool is_case_sensitive_; diff --git a/src/path_util.cc b/src/path_util.cc index fc9c190..0b6931f 100644 --- a/src/path_util.cc +++ b/src/path_util.cc @@ -15,8 +15,6 @@ #include "path_util.h" -#include - // TODO: Support non-Unix non-UTF-8 paths. namespace cpsm { @@ -47,14 +45,7 @@ std::vector path_components_of(boost::string_ref path) { CharCount path_distance_between(std::vector const& x, std::vector const& y) { - auto const end = std::min(x.size(), y.size()); - CharCount common_ancestors; - for (common_ancestors = 0; common_ancestors < end; common_ancestors++) { - if (x[common_ancestors] != y[common_ancestors]) { - break; - } - } - return x.size() + y.size() - (2 * common_ancestors); + return x.size() + y.size() - (2 * common_prefix(x, y)); } } // namespace cpsm diff --git a/src/path_util.h b/src/path_util.h index 7e96515..9f3c86b 100644 --- a/src/path_util.h +++ b/src/path_util.h @@ -16,6 +16,7 @@ #ifndef CPSM_PATH_UTIL_H_ #define CPSM_PATH_UTIL_H_ +#include #include #include @@ -44,6 +45,26 @@ std::vector path_components_of(boost::string_ref path); CharCount path_distance_between(std::vector const& x, std::vector const& y); +// Returns the number of elements that are common at the beginning of the two +// given iterables. +template +std::size_t common_prefix(T const& x, T const& y) { + auto x_it = x.cbegin(); + auto y_it = y.cbegin(); + auto const x_end = x.cend(); + auto const y_end = y.cend(); + std::size_t common_ancestors = 0; + while (x_it != x_end && y_it != y_end) { + if (*x_it != *y_it) { + break; + } + ++x_it; + ++y_it; + common_ancestors++; + } + return common_ancestors; +} + } // namespace cpsm #endif /* CPSM_PATH_UTIL_H_ */