Skip to content

Commit

Permalink
Auto merge of rust-lang#119331 - notriddle:notriddle/maxpatheditdista…
Browse files Browse the repository at this point in the history
…nce, r=GuillaumeGomez

rustdoc-search: count path edits with separate edit limit

Avoids strange-looking results like this one, where the path component seems to be ignored:

![image](https://github.com/rust-lang/rust/assets/1593513/f0ef077a-6e09-4d67-a29d-8cabc1495f66)

Since the two are counted separately elsewhere, they should get their own limits, too. The biggest problem with combining them is that paths are loosely checked by not requiring every component to match, which means that if they are short and matched loosely, they can easily find "drunk typist" matches that make no sense, like this old result:

    std::collections::btree_map::itermut matching slice::itermut
    maxEditDistance = ("slice::itermut".length) / 3 = 14 / 3 = 4
    editDistance("std", "slice") = 4
    editDistance("itermut", "itermut") = 0
        4 + 0 <= 4 PASS

Of course, `slice::itermut` should not match stuff from btreemap. `slice` should not match `std`.

The new result counts them separately:

    maxPathEditDistance = "slice".length / 3 = 5 / 3 = 1
    maxEditDistance = "itermut".length / 3 = 7 / 3 = 2
    editDistance("std", "slice") = 4
        4 <= 1 FAIL

Effectively, this makes path queries less "typo-resistant". It's not zero, but it means `vec` won't match the `v1` prelude.

This commit also adds substring matching to paths. It's stricter than the substring matching in the main part, but loose enough that what I expect to match does.

Queries without parent paths are unchanged.
  • Loading branch information
bors committed Dec 28, 2023
2 parents f4d794e + 0ea58e2 commit 5c0907b
Show file tree
Hide file tree
Showing 10 changed files with 152 additions and 44 deletions.
47 changes: 30 additions & 17 deletions src/librustdoc/html/static/js/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -1805,11 +1805,20 @@ function initSearch(rawSearchIndex) {
return unifyFunctionTypes([row], [elem], whereClause, mgens);
}

function checkPath(contains, ty, maxEditDistance) {
/**
* Compute an "edit distance" that ignores missing path elements.
* @param {string[]} contains search query path
* @param {Row} ty indexed item
* @returns {null|number} edit distance
*/
function checkPath(contains, ty) {
if (contains.length === 0) {
return 0;
}
let ret_dist = maxEditDistance + 1;
const maxPathEditDistance = Math.floor(
contains.reduce((acc, next) => acc + next.length, 0) / 3
);
let ret_dist = maxPathEditDistance + 1;
const path = ty.path.split("::");

if (ty.parent && ty.parent.name) {
Expand All @@ -1821,15 +1830,23 @@ function initSearch(rawSearchIndex) {
pathiter: for (let i = length - clength; i >= 0; i -= 1) {
let dist_total = 0;
for (let x = 0; x < clength; ++x) {
const dist = editDistance(path[i + x], contains[x], maxEditDistance);
if (dist > maxEditDistance) {
continue pathiter;
const [p, c] = [path[i + x], contains[x]];
if (Math.floor((p.length - c.length) / 3) <= maxPathEditDistance &&
p.indexOf(c) !== -1
) {
// discount distance on substring match
dist_total += Math.floor((p.length - c.length) / 3);
} else {
const dist = editDistance(p, c, maxPathEditDistance);
if (dist > maxPathEditDistance) {
continue pathiter;
}
dist_total += dist;
}
dist_total += dist;
}
ret_dist = Math.min(ret_dist, Math.round(dist_total / clength));
}
return ret_dist;
return ret_dist > maxPathEditDistance ? null : ret_dist;
}

function typePassesFilter(filter, type) {
Expand Down Expand Up @@ -2030,8 +2047,8 @@ function initSearch(rawSearchIndex) {
}

if (elem.fullPath.length > 1) {
path_dist = checkPath(elem.pathWithoutLast, row, maxEditDistance);
if (path_dist > maxEditDistance) {
path_dist = checkPath(elem.pathWithoutLast, row);
if (path_dist === null) {
return;
}
}
Expand All @@ -2045,7 +2062,7 @@ function initSearch(rawSearchIndex) {

const dist = editDistance(row.normalizedName, elem.normalizedPathLast, maxEditDistance);

if (index === -1 && dist + path_dist > maxEditDistance) {
if (index === -1 && dist > maxEditDistance) {
return;
}

Expand Down Expand Up @@ -2100,13 +2117,9 @@ function initSearch(rawSearchIndex) {
}

function innerRunQuery() {
let queryLen = 0;
for (const elem of parsedQuery.elems) {
queryLen += elem.name.length;
}
for (const elem of parsedQuery.returned) {
queryLen += elem.name.length;
}
const queryLen =
parsedQuery.elems.reduce((acc, next) => acc + next.pathLast.length, 0) +
parsedQuery.returned.reduce((acc, next) => acc + next.pathLast.length, 0);
const maxEditDistance = Math.floor(queryLen / 3);

/**
Expand Down
1 change: 0 additions & 1 deletion tests/rustdoc-js-std/asrawfd.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ const EXPECTED = {
// Validate that type alias methods get the correct path.
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
{ 'path': 'std::os::linux::process::PidFd', 'name': 'as_raw_fd' },
{ 'path': 'std::os::fd::RawFd', 'name': 'as_raw_fd' },
],
};
42 changes: 42 additions & 0 deletions tests/rustdoc-js-std/path-maxeditdistance.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// exact-check
const FILTER_CRATE = "std";
const EXPECTED = [
{
query: 'vec::intoiterator',
others: [
// trait std::iter::IntoIterator is not the first result
{ 'path': 'std::vec', 'name': 'IntoIter' },
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
],
},
{
query: 'vec::iter',
others: [
// std::net::ToSocketAttrs::iter should not show up here
{ 'path': 'std::vec', 'name': 'IntoIter' },
{ 'path': 'std::vec::Vec', 'name': 'from_iter' },
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'iter_mut' },
{ 'path': 'std::collections::VecDeque', 'name': 'from_iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
],
},
{
query: 'slice::itermut',
others: [
// std::collections::btree_map::itermut should not show up here
{ 'path': 'std::slice', 'name': 'IterMut' },
{ 'path': 'std::slice', 'name': 'iter_mut' },
],
},
];
31 changes: 20 additions & 11 deletions tests/rustdoc-js-std/path-ordering.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
const EXPECTED = {
query: 'hashset::insert',
others: [
// ensure hashset::insert comes first
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
],
};
const EXPECTED = [
{
query: 'hashset::insert',
others: [
// ensure hashset::insert comes first
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
],
},
{
query: 'hash::insert',
others: [
// ensure hashset/hashmap::insert come first
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
],
},
];
1 change: 0 additions & 1 deletion tests/rustdoc-js/exact-match.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ const EXPECTED = {
'others': [
{ 'path': 'exact_match::Si', 'name': 'pc' },
{ 'path': 'exact_match::Psi', 'name': 'pc' },
{ 'path': 'exact_match::Si', 'name': 'pa' },
],
};
22 changes: 15 additions & 7 deletions tests/rustdoc-js/module-substring.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
const EXPECTED = {
'query': 'ig::pc',
'others': [
{ 'path': 'module_substring::Sig', 'name': 'pc' },
{ 'path': 'module_substring::Si', 'name': 'pc' },
],
};
const EXPECTED = [
{
'query': 'ig::pc',
'others': [
{ 'path': 'module_substring::Sig', 'name': 'pc' },
],
},
{
'query': 'si::pc',
'others': [
{ 'path': 'module_substring::Si', 'name': 'pc' },
{ 'path': 'module_substring::Sig', 'name': 'pc' },
],
},
];
35 changes: 35 additions & 0 deletions tests/rustdoc-js/path-maxeditdistance.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// exact-check

const EXPECTED = [
{
'query': 'xxxxxxxxxxx::hocuspocusprestidigitation',
// do not match abracadabra::hocuspocusprestidigitation
'others': [],
},
{
// exact match
'query': 'abracadabra::hocuspocusprestidigitation',
'others': [
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
],
},
{
// swap br/rb; that's edit distance 2, where maxPathEditDistance = 3 (11 / 3)
'query': 'arbacadarba::hocuspocusprestidigitation',
'others': [
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
],
},
{
// truncate 5 chars, where maxEditDistance = 7 (21 / 3)
'query': 'abracadarba::hocusprestidigitation',
'others': [
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
],
},
{
// truncate 9 chars, where maxEditDistance = 5 (17 / 3)
'query': 'abracadarba::hprestidigitation',
'others': [],
},
];
3 changes: 3 additions & 0 deletions tests/rustdoc-js/path-maxeditdistance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#![crate_name="abracadabra"]

pub struct HocusPocusPrestidigitation;
8 changes: 4 additions & 4 deletions tests/rustdoc-js/path-ordering.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
// exact-check

const EXPECTED = {
'query': 'b::ccccccc',
'query': 'bbbbbb::ccccccc',
'others': [
// `ccccccc` is an exact match for all three of these.
// However `b` is a closer match for `bb` than for any
// of the others, so it ought to go first.
{ 'path': 'path_ordering::bb', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::aa', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::dd', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::bbbbbb', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::abbbbb', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::dbbbbb', 'name': 'Ccccccc' },
],
};
6 changes: 3 additions & 3 deletions tests/rustdoc-js/path-ordering.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
pub mod dd {
pub mod dbbbbb {
pub struct Ccccccc;
}
pub mod aa {
pub mod abbbbb {
pub struct Ccccccc;
}
pub mod bb {
pub mod bbbbbb {
pub struct Ccccccc;
}

0 comments on commit 5c0907b

Please sign in to comment.