Skip to content

Commit

Permalink
Add cur_file_prefix_len
Browse files Browse the repository at this point in the history
cur_file_prefix_len gives a low-priority bonus to files sharing a prefix
with the currently open file, which selects for closely related files
(e.g. .h vs. .cc files, _test.cc files), especially for empty queries.
  • Loading branch information
nixprime committed Jun 2, 2015
1 parent 54c497d commit 2abec32
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 32 deletions.
9 changes: 9 additions & 0 deletions src/match.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ struct MatchBase {
// indicate higher confidence that the matches are correct.
CharCount part_index_sum = 0;

// The number of bytes that are shared between the beginning of the rightmost
// path component of the match and the rightmost path component of the
// current file.
CharCount cur_file_prefix_len = 0;

// The number of path components that must be traversed between the query and
// item paths.
CharCount path_distance = 0;
Expand All @@ -68,6 +73,7 @@ struct MatchBase {
std::string debug_string() const {
return str_cat("prefix_score=", prefix_score, ", word_prefix_len=",
word_prefix_len, ", part_index_sum=", part_index_sum,
", cur_file_prefix_len=", cur_file_prefix_len,
", path_distance=", path_distance, ", unmatched_len=",
unmatched_len);
}
Expand Down Expand Up @@ -95,6 +101,9 @@ bool operator<(Match<T> const& lhs, Match<T> const& rhs) {
if (lhs.part_index_sum != rhs.part_index_sum) {
return lhs.part_index_sum < rhs.part_index_sum;
}
if (lhs.cur_file_prefix_len != rhs.cur_file_prefix_len) {
return lhs.cur_file_prefix_len > rhs.cur_file_prefix_len;
}
if (lhs.path_distance != rhs.path_distance) {
return lhs.path_distance < rhs.path_distance;
}
Expand Down
48 changes: 27 additions & 21 deletions src/matcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@ namespace cpsm {

Matcher::Matcher(boost::string_ref const query, MatcherOpts opts)
: opts_(std::move(opts)) {
decompose_utf8_string(query, query_chars_);
decompose_utf8_string(query, query_);
if (opts_.is_path) {
// Store the index of the first character after the rightmost path
// separator in the query. (Store an index rather than an iterator to keep
// Matcher copyable/moveable.)
query_key_begin_index_ =
std::find(query_chars_.crbegin(), query_chars_.crend(),
path_separator()).base() -
query_chars_.cbegin();
std::find(query_.crbegin(), query_.crend(), path_separator()).base() -
query_.cbegin();
switch (opts_.query_path_mode) {
case MatcherOpts::QueryPathMode::NORMAL:
require_full_part_ = false;
Expand All @@ -57,17 +56,9 @@ Matcher::Matcher(boost::string_ref const query, MatcherOpts opts)

// Queries are smartcased (case-sensitive only if any uppercase appears in the
// query).
is_case_sensitive_ =
std::any_of(query_chars_.begin(), query_chars_.end(), is_uppercase);
is_case_sensitive_ = std::any_of(query_.begin(), query_.end(), is_uppercase);

cur_file_parts_ = path_components_of(opts_.cur_file);
// Keeping the filename in cur_file_parts_ causes the path distance metric to
// favor the currently open file. While we don't want to exclude the
// currently open file from being matched, it shouldn't be favored over its
// siblings on path distance.
if (!cur_file_parts_.empty()) {
cur_file_parts_.pop_back();
}
}

bool Matcher::match_base(boost::string_ref const item, MatchBase& m,
Expand All @@ -87,23 +78,23 @@ bool Matcher::match_base(boost::string_ref const item, MatchBase& m,
std::vector<boost::string_ref> item_parts;
if (opts_.is_path) {
item_parts = path_components_of(item);
m.path_distance = path_distance_between(cur_file_parts_, item_parts);
} else {
item_parts.push_back(item);
}
if (!item_parts.empty()) {
m.unmatched_len = item_parts.back().size();
}

if (query_chars_.empty()) {
if (query_.empty()) {
match_path(item_parts, m);
return true;
}

// Since for paths (the common case) we prefer rightmost path components, we
// scan path components right-to-left.
auto query_it = query_chars_.crbegin();
auto const query_end = query_chars_.crend();
auto query_key_begin = query_chars_.cend();
auto query_it = query_.crbegin();
auto const query_end = query_.crend();
auto query_key_begin = query_.cend();
// Index into item_parts, counting from the right.
CharCount part_index = 0;
for (boost::string_ref const item_part :
Expand Down Expand Up @@ -149,22 +140,37 @@ bool Matcher::match_base(boost::string_ref const item, MatchBase& m,
return false;
}

// Fill path match data.
match_path(item_parts, m);

// Now do more refined matching on the key (the rightmost path component of
// the item for a path match, and just the full item otherwise).
match_key(*key_chars, query_key_begin, m);
return true;
}

void Matcher::match_path(std::vector<boost::string_ref> const& item_parts,
MatchBase& m) const {
if (!opts_.is_path) {
return;
}
m.path_distance = path_distance_between(cur_file_parts_, item_parts);
if (!cur_file_parts_.empty() && !item_parts.empty()) {
m.cur_file_prefix_len =
common_prefix(cur_file_parts_.back(), item_parts.back());
}
}

void Matcher::match_key(std::vector<char32_t> const& key,
std::vector<char32_t>::const_iterator query_key,
MatchBase& m) const {
auto const query_key_end = query_chars_.cend();
auto const query_key_end = query_.cend();
if (query_key == query_key_end) {
return;
}
bool const query_key_at_begin =
(query_key == (query_chars_.cbegin() + query_key_begin_index_));
// key can't be empty since [query_key, query_chars_.end()) is non-empty.
(query_key == (query_.cbegin() + query_key_begin_index_));
// key can't be empty since [query_key, query_.end()) is non-empty.
const auto is_word_prefix = [&](std::size_t const i) -> bool {
if (i == 0) {
return true;
Expand Down
5 changes: 4 additions & 1 deletion src/matcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,16 @@ class Matcher {
std::vector<char32_t>* buf,
std::vector<char32_t>* buf2) const;

void match_path(std::vector<boost::string_ref> const& item_parts,
MatchBase& m) const;

void match_key(std::vector<char32_t> const& key,
std::vector<char32_t>::const_iterator query_key,
MatchBase& m) const;

bool match_char(char32_t item, char32_t query) const;

std::vector<char32_t> query_chars_;
std::vector<char32_t> query_;
std::size_t query_key_begin_index_;
MatcherOpts opts_;
bool is_case_sensitive_;
Expand Down
11 changes: 1 addition & 10 deletions src/path_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

#include "path_util.h"

#include <algorithm>

// TODO: Support non-Unix non-UTF-8 paths.

namespace cpsm {
Expand Down Expand Up @@ -47,14 +45,7 @@ std::vector<boost::string_ref> path_components_of(boost::string_ref path) {

CharCount path_distance_between(std::vector<boost::string_ref> const& x,
std::vector<boost::string_ref> const& y) {
auto const end = std::min(x.size(), y.size());
CharCount common_ancestors;
for (common_ancestors = 0; common_ancestors < end; common_ancestors++) {
if (x[common_ancestors] != y[common_ancestors]) {
break;
}
}
return x.size() + y.size() - (2 * common_ancestors);
return x.size() + y.size() - (2 * common_prefix(x, y));
}

} // namespace cpsm
21 changes: 21 additions & 0 deletions src/path_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#ifndef CPSM_PATH_UTIL_H_
#define CPSM_PATH_UTIL_H_

#include <algorithm>
#include <vector>

#include <boost/utility/string_ref.hpp>
Expand Down Expand Up @@ -44,6 +45,26 @@ std::vector<boost::string_ref> path_components_of(boost::string_ref path);
CharCount path_distance_between(std::vector<boost::string_ref> const& x,
std::vector<boost::string_ref> const& y);

// Returns the number of elements that are common at the beginning of the two
// given iterables.
template <typename T>
std::size_t common_prefix(T const& x, T const& y) {
auto x_it = x.cbegin();
auto y_it = y.cbegin();
auto const x_end = x.cend();
auto const y_end = y.cend();
std::size_t common_ancestors = 0;
while (x_it != x_end && y_it != y_end) {
if (*x_it != *y_it) {
break;
}
++x_it;
++y_it;
common_ancestors++;
}
return common_ancestors;
}

} // namespace cpsm

#endif /* CPSM_PATH_UTIL_H_ */

0 comments on commit 2abec32

Please sign in to comment.