diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js index 2338931a18fd2..93709e4e830ad 100644 --- a/src/librustdoc/html/static/js/externs.js +++ b/src/librustdoc/html/static/js/externs.js @@ -14,7 +14,7 @@ function initSearch(searchIndex){} * pathWithoutLast: Array, * pathLast: string, * generics: Array, - * bindings: Map<(string|integer), Array>, + * bindings: Map>, * }} */ let QueryElement; @@ -42,6 +42,7 @@ let ParserState; * totalElems: number, * literalSearch: boolean, * corrections: Array<{from: string, to: integer}>, + * typeFingerprint: Uint32Array, * }} */ let ParsedQuery; diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 5d348d3f17635..6fce7650b4c14 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -238,6 +238,10 @@ function initSearch(rawSearchIndex) { * @type {Array} */ let searchIndex; + /** + * @type {Uint32Array} + */ + let functionTypeFingerprint; let currentResults; /** * Map from normalized type names to integers. Used to make type search @@ -1038,6 +1042,8 @@ function initSearch(rawSearchIndex) { correction: null, proposeCorrectionFrom: null, proposeCorrectionTo: null, + // bloom filter build from type ids + typeFingerprint: new Uint32Array(4), }; } @@ -1133,7 +1139,6 @@ function initSearch(rawSearchIndex) { query.error = err; return query; } - if (!query.literalSearch) { // If there is more than one element in the query, we switch to literalSearch in any // case. @@ -1329,25 +1334,6 @@ function initSearch(rawSearchIndex) { return 0; }); - let nameSplit = null; - if (parsedQuery.elems.length === 1) { - const hasPath = typeof parsedQuery.elems[0].path === "undefined"; - nameSplit = hasPath ? null : parsedQuery.elems[0].path; - } - - for (const result of result_list) { - // this validation does not make sense when searching by types - if (result.dontValidate) { - continue; - } - const name = result.item.name.toLowerCase(), - path = result.item.path.toLowerCase(), - parent = result.item.parent; - - if (!isType && !validateResult(name, path, nameSplit, parent)) { - result.id = -1; - } - } return transformResults(result_list); } @@ -1960,8 +1946,7 @@ function initSearch(rawSearchIndex) { * @param {integer} path_dist */ function addIntoResults(results, fullId, id, index, dist, path_dist, maxEditDistance) { - const inBounds = dist <= maxEditDistance || index !== -1; - if (dist === 0 || (!parsedQuery.literalSearch && inBounds)) { + if (dist <= maxEditDistance || index !== -1) { if (results.has(fullId)) { const result = results.get(fullId); if (result.dontValidate || result.dist <= dist) { @@ -2009,17 +1994,31 @@ function initSearch(rawSearchIndex) { const fullId = row.id; const searchWord = searchWords[pos]; - const in_args = row.type && row.type.inputs - && checkIfInList(row.type.inputs, elem, row.type.where_clause); - if (in_args) { - // path_dist is 0 because no parent path information is currently stored - // in the search index - addIntoResults(results_in_args, fullId, pos, -1, 0, 0, maxEditDistance); - } - const returned = row.type && row.type.output - && checkIfInList(row.type.output, elem, row.type.where_clause); - if (returned) { - addIntoResults(results_returned, fullId, pos, -1, 0, 0, maxEditDistance); + // fpDist is a minimum possible type distance, where "type distance" is the number of + // atoms in the function not present in the query + const tfpDist = compareTypeFingerprints( + fullId, + parsedQuery.typeFingerprint + ); + if (tfpDist !== null) { + const in_args = row.type && row.type.inputs + && checkIfInList(row.type.inputs, elem, row.type.where_clause); + const returned = row.type && row.type.output + && checkIfInList(row.type.output, elem, row.type.where_clause); + if (in_args) { + results_in_args.max_dist = Math.max(results_in_args.max_dist || 0, tfpDist); + const maxDist = results_in_args.size < MAX_RESULTS ? + (tfpDist + 1) : + results_in_args.max_dist; + addIntoResults(results_in_args, fullId, pos, -1, tfpDist, 0, maxDist); + } + if (returned) { + results_returned.max_dist = Math.max(results_returned.max_dist || 0, tfpDist); + const maxDist = results_returned.size < MAX_RESULTS ? + (tfpDist + 1) : + results_returned.max_dist; + addIntoResults(results_returned, fullId, pos, -1, tfpDist, 0, maxDist); + } } if (!typePassesFilter(elem.typeFilter, row.ty)) { @@ -2078,6 +2077,17 @@ function initSearch(rawSearchIndex) { return; } + const tfpDist = compareTypeFingerprints( + row.id, + parsedQuery.typeFingerprint + ); + if (tfpDist === null) { + return; + } + if (results.size >= MAX_RESULTS && tfpDist > results.max_dist) { + return; + } + // If the result is too "bad", we return false and it ends this search. if (!unifyFunctionTypes( row.type.inputs, @@ -2096,7 +2106,8 @@ function initSearch(rawSearchIndex) { return; } - addIntoResults(results, row.id, pos, 0, 0, 0, Number.MAX_VALUE); + results.max_dist = Math.max(results.max_dist || 0, tfpDist); + addIntoResults(results, row.id, pos, 0, tfpDist, 0, Number.MAX_VALUE); } function innerRunQuery() { @@ -2216,14 +2227,17 @@ function initSearch(rawSearchIndex) { ); } + const fps = new Set(); for (const elem of parsedQuery.elems) { convertNameToId(elem); + buildFunctionTypeFingerprint(elem, parsedQuery.typeFingerprint, fps); } for (const elem of parsedQuery.returned) { convertNameToId(elem); + buildFunctionTypeFingerprint(elem, parsedQuery.typeFingerprint, fps); } - if (parsedQuery.foundElems === 1) { + if (parsedQuery.foundElems === 1 && parsedQuery.returned.length === 0) { if (parsedQuery.elems.length === 1) { const elem = parsedQuery.elems[0]; for (let i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { @@ -2239,28 +2253,24 @@ function initSearch(rawSearchIndex) { maxEditDistance ); } - } else if (parsedQuery.returned.length === 1) { - // We received one returned argument to check, so looking into returned values. - for (let i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { - const row = searchIndex[i]; - const in_returned = row.type && unifyFunctionTypes( - row.type.output, - parsedQuery.returned, - row.type.where_clause - ); - if (in_returned) { - addIntoResults( - results_others, - row.id, - i, - -1, - 0, - Number.MAX_VALUE - ); - } - } } } else if (parsedQuery.foundElems > 0) { + // Sort input and output so that generic type variables go first and + // types with generic parameters go last. + // That's because of the way unification is structured: it eats off + // the end, and hits a fast path if the last item is a simple atom. + const sortQ = (a, b) => { + const ag = a.generics.length === 0 && a.bindings.size === 0; + const bg = b.generics.length === 0 && b.bindings.size === 0; + if (ag !== bg) { + return ag - bg; + } + const ai = a.id > 0; + const bi = b.id > 0; + return ai - bi; + }; + parsedQuery.elems.sort(sortQ); + parsedQuery.returned.sort(sortQ); for (let i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { handleArgs(searchIndex[i], i, results_others); } @@ -2284,44 +2294,6 @@ function initSearch(rawSearchIndex) { return ret; } - /** - * Validate performs the following boolean logic. For example: - * "File::open" will give IF A PARENT EXISTS => ("file" && "open") - * exists in (name || path || parent) OR => ("file" && "open") exists in - * (name || path ) - * - * This could be written functionally, but I wanted to minimise - * functions on stack. - * - * @param {string} name - The name of the result - * @param {string} path - The path of the result - * @param {string} keys - The keys to be used (["file", "open"]) - * @param {Object} parent - The parent of the result - * - * @return {boolean} - Whether the result is valid or not - */ - function validateResult(name, path, keys, parent, maxEditDistance) { - if (!keys || !keys.length) { - return true; - } - for (const key of keys) { - // each check is for validation so we negate the conditions and invalidate - if (!( - // check for an exact name match - name.indexOf(key) > -1 || - // then an exact path match - path.indexOf(key) > -1 || - // next if there is a parent, check for exact parent match - (parent !== undefined && parent.name !== undefined && - parent.name.toLowerCase().indexOf(key) > -1) || - // lastly check to see if the name was an editDistance match - editDistance(name, key, maxEditDistance) <= maxEditDistance)) { - return false; - } - } - return true; - } - function nextTab(direction) { const next = (searchState.currentTab + direction + 3) % searchState.focusedByTab.length; searchState.focusedByTab[searchState.currentTab] = document.activeElement; @@ -2840,6 +2812,117 @@ ${item.displayPath}${name}\ }; } + /** + * Type fingerprints allow fast, approximate matching of types. + * + * This algo creates a compact representation of the type set using a Bloom filter. + * This fingerprint is used three ways: + * + * - It accelerates the matching algorithm by checking the function fingerprint against the + * query fingerprint. If any bits are set in the query but not in the function, it can't + * match. + * + * - The fourth section has the number of distinct items in the set. + * This is the distance function, used for filtering and for sorting. + * + * [^1]: Distance is the relatively naive metric of counting the number of distinct items in + * the function that are not present in the query. + * + * @param {FunctionType|QueryElement} type - a single type + * @param {Uint32Array} output - write the fingerprint to this data structure: uses 128 bits + * @param {Set} fps - Set of distinct items + */ + function buildFunctionTypeFingerprint(type, output, fps) { + let input = type.id; + // All forms of `[]` get collapsed down to one thing in the bloom filter. + // Differentiating between arrays and slices, if the user asks for it, is + // still done in the matching algorithm. + if (input === typeNameIdOfArray || input === typeNameIdOfSlice) { + input = typeNameIdOfArrayOrSlice; + } + // http://burtleburtle.net/bob/hash/integer.html + // ~~ is toInt32. It's used before adding, so + // the number stays in safe integer range. + const hashint1 = k => { + k = (~~k + 0x7ed55d16) + (k << 12); + k = (k ^ 0xc761c23c) ^ (k >>> 19); + k = (~~k + 0x165667b1) + (k << 5); + k = (~~k + 0xd3a2646c) ^ (k << 9); + k = (~~k + 0xfd7046c5) + (k << 3); + return (k ^ 0xb55a4f09) ^ (k >>> 16); + }; + const hashint2 = k => { + k = ~k + (k << 15); + k ^= k >>> 12; + k += k << 2; + k ^= k >>> 4; + k = Math.imul(k, 2057); + return k ^ (k >> 16); + }; + if (input !== null) { + const h0a = hashint1(input); + const h0b = hashint2(input); + // Less Hashing, Same Performance: Building a Better Bloom Filter + // doi=10.1.1.72.2442 + const h1a = ~~(h0a + Math.imul(h0b, 2)); + const h1b = ~~(h0a + Math.imul(h0b, 3)); + const h2a = ~~(h0a + Math.imul(h0b, 4)); + const h2b = ~~(h0a + Math.imul(h0b, 5)); + output[0] |= (1 << (h0a % 32)) | (1 << (h1b % 32)); + output[1] |= (1 << (h1a % 32)) | (1 << (h2b % 32)); + output[2] |= (1 << (h2a % 32)) | (1 << (h0b % 32)); + fps.add(input); + } + for (const g of type.generics) { + buildFunctionTypeFingerprint(g, output, fps); + } + const fb = { + id: null, + ty: 0, + generics: [], + bindings: new Map(), + }; + for (const [k, v] of type.bindings.entries()) { + fb.id = k; + fb.generics = v; + buildFunctionTypeFingerprint(fb, output, fps); + } + output[3] = fps.size; + } + + /** + * Compare the query fingerprint with the function fingerprint. + * + * @param {{number}} fullId - The function + * @param {{Uint32Array}} queryFingerprint - The query + * @returns {number|null} - Null if non-match, number if distance + * This function might return 0! + */ + function compareTypeFingerprints(fullId, queryFingerprint) { + const fh0 = functionTypeFingerprint[fullId * 4]; + const fh1 = functionTypeFingerprint[(fullId * 4) + 1]; + const fh2 = functionTypeFingerprint[(fullId * 4) + 2]; + const [qh0, qh1, qh2] = queryFingerprint; + // Approximate set intersection with bloom filters. + // This can be larger than reality, not smaller, because hashes have + // the property that if they've got the same value, they hash to the + // same thing. False positives exist, but not false negatives. + const [in0, in1, in2] = [fh0 & qh0, fh1 & qh1, fh2 & qh2]; + // Approximate the set of items in the query but not the function. + // This might be smaller than reality, but cannot be bigger. + // + // | in_ | qh_ | XOR | Meaning | + // | --- | --- | --- | ------------------------------------------------ | + // | 0 | 0 | 0 | Not present | + // | 1 | 0 | 1 | IMPOSSIBLE because `in_` is `fh_ & qh_` | + // | 1 | 1 | 0 | If one or both is false positive, false negative | + // | 0 | 1 | 1 | Since in_ has no false negatives, must be real | + if ((in0 ^ qh0) || (in1 ^ qh1) || (in2 ^ qh2)) { + return null; + } + return functionTypeFingerprint[(fullId * 4) + 3]; + } + function buildIndex(rawSearchIndex) { searchIndex = []; /** @@ -2859,6 +2942,22 @@ ${item.displayPath}${name}\ typeNameIdOfSlice = buildTypeMapIndex("slice"); typeNameIdOfArrayOrSlice = buildTypeMapIndex("[]"); + // Function type fingerprints are 128-bit bloom filters that are used to + // estimate the distance between function and query. + // This loop counts the number of items to allocate a fingerprint for. + for (const crate in rawSearchIndex) { + if (!hasOwnPropertyRustdoc(rawSearchIndex, crate)) { + continue; + } + // Each item gets an entry in the fingerprint array, and the crate + // does, too + id += rawSearchIndex[crate].t.length + 1; + } + functionTypeFingerprint = new Uint32Array((id + 1) * 4); + + // This loop actually generates the search item indexes, including + // normalized names, type signature objects and fingerprints, and aliases. + id = 0; for (const crate in rawSearchIndex) { if (!hasOwnPropertyRustdoc(rawSearchIndex, crate)) { continue; @@ -3008,6 +3107,28 @@ ${item.displayPath}${name}\ } searchWords.push(word); const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath; + let type = null; + if (itemFunctionSearchTypes[i] !== 0) { + type = buildFunctionSearchType( + itemFunctionSearchTypes[i], + lowercasePaths + ); + if (type) { + const fp = functionTypeFingerprint.subarray(id * 4, (id + 1) * 4); + const fps = new Set(); + for (const t of type.inputs) { + buildFunctionTypeFingerprint(t, fp, fps); + } + for (const t of type.output) { + buildFunctionTypeFingerprint(t, fp, fps); + } + for (const w of type.where_clause) { + for (const t of w) { + buildFunctionTypeFingerprint(t, fp, fps); + } + } + } + } const row = { crate: crate, ty: itemTypes.charCodeAt(i) - charA, @@ -3015,10 +3136,7 @@ ${item.displayPath}${name}\ path: path, desc: itemDescs[i], parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined, - type: buildFunctionSearchType( - itemFunctionSearchTypes[i], - lowercasePaths - ), + type, id: id, normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""), deprecated: deprecatedItems.has(i), diff --git a/tests/rustdoc-js/assoc-type.js b/tests/rustdoc-js/assoc-type.js index 47776656e32c2..eec4e7a8258fb 100644 --- a/tests/rustdoc-js/assoc-type.js +++ b/tests/rustdoc-js/assoc-type.js @@ -7,16 +7,16 @@ const EXPECTED = [ 'query': 'iterator -> u32', 'correction': null, 'others': [ - { 'path': 'assoc_type', 'name': 'my_fn' }, { 'path': 'assoc_type::my', 'name': 'other_fn' }, + { 'path': 'assoc_type', 'name': 'my_fn' }, ], }, { 'query': 'iterator', 'correction': null, 'in_args': [ - { 'path': 'assoc_type', 'name': 'my_fn' }, { 'path': 'assoc_type::my', 'name': 'other_fn' }, + { 'path': 'assoc_type', 'name': 'my_fn' }, ], }, { @@ -26,8 +26,8 @@ const EXPECTED = [ { 'path': 'assoc_type', 'name': 'Something' }, ], 'in_args': [ - { 'path': 'assoc_type', 'name': 'my_fn' }, { 'path': 'assoc_type::my', 'name': 'other_fn' }, + { 'path': 'assoc_type', 'name': 'my_fn' }, ], }, // if I write an explicit binding, only it shows up diff --git a/tests/rustdoc-js/big-result.js b/tests/rustdoc-js/big-result.js new file mode 100644 index 0000000000000..07961d196f47d --- /dev/null +++ b/tests/rustdoc-js/big-result.js @@ -0,0 +1,39 @@ +// exact-check + +const EXPECTED = [ + { + 'query': 'First', + 'in_args': (function() { + // Generate the list of 200 items that should match. + const results = []; + function generate(lx, ly) { + for (const x of lx) { + for (const y of ly) { + results.push({ + 'path': `big_result::${y}`, + 'name': x, + }); + } + } + } + // Fewest parameters that still match go on top. + generate( + ['u', 'v', 'w', 'x', 'y'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + generate( + ['p', 'q', 'r', 's', 't'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + generate( + ['k', 'l', 'm', 'n', 'o'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + generate( + ['f', 'g', 'h', 'i', 'j'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + return results; + })(), + }, +]; diff --git a/tests/rustdoc-js/big-result.rs b/tests/rustdoc-js/big-result.rs new file mode 100644 index 0000000000000..4dfecd6aaadd5 --- /dev/null +++ b/tests/rustdoc-js/big-result.rs @@ -0,0 +1,61 @@ +#![feature(concat_idents)] +#![allow(nonstandard_style)] +/// Generate 250 items that all match the query, starting with the longest. +/// Those long items should be dropped from the result set, and the short ones +/// should be shown instead. +macro_rules! generate { + ([$($x:ident),+], $y:tt, $z:tt) => { + $( + generate!(@ $x, $y, $z); + )+ + }; + (@ $x:ident , [$($y:ident),+], $z:tt) => { + pub struct $x; + $( + generate!(@@ $x, $y, $z); + )+ + }; + (@@ $x:ident , $y:ident, [$($z:ident: $zt:ident),+]) => { + impl $y { + pub fn $x($($z: $zt,)+) {} + } + } +} + +pub struct First; +pub struct Second; +pub struct Third; +pub struct Fourth; +pub struct Fifth; + +generate!( + [a, b, c, d, e], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second, c: Third, d: Fourth, e: Fifth] +); + +generate!( + [f, g, h, i, j], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second, c: Third, d: Fourth] +); + +generate!( + [k, l, m, n, o], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second, c: Third] +); + +generate!( + // reverse it, just to make sure they're alphabetized + // in the result set when all else is equal + [t, s, r, q, p], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second] +); + +generate!( + [u, v, w, x, y], + [a, b, c, d, e, f, g, h, i, j], + [a: First] +); diff --git a/tests/rustdoc-js/full-path-function.js b/tests/rustdoc-js/full-path-function.js index 48be51b156fde..0464f7922174d 100644 --- a/tests/rustdoc-js/full-path-function.js +++ b/tests/rustdoc-js/full-path-function.js @@ -4,16 +4,16 @@ const EXPECTED = [ { 'query': 'sac -> usize', 'others': [ - { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, { 'path': 'full_path_function::b::Sac', 'name': 'len' }, { 'path': 'full_path_function::sac::Sac', 'name': 'len' }, + { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, ], }, { 'query': 'b::sac -> usize', 'others': [ - { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, { 'path': 'full_path_function::b::Sac', 'name': 'len' }, + { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, ], }, { diff --git a/tests/rustdoc-js/generics.js b/tests/rustdoc-js/generics.js index ebc92ccfc0575..b3ca0af3056a5 100644 --- a/tests/rustdoc-js/generics.js +++ b/tests/rustdoc-js/generics.js @@ -1,4 +1,5 @@ // exact-check +// ignore-order const EXPECTED = [ { diff --git a/tests/rustdoc-js/impl-trait.js b/tests/rustdoc-js/impl-trait.js index 00d67d639bd08..8bb3f2d3e99a5 100644 --- a/tests/rustdoc-js/impl-trait.js +++ b/tests/rustdoc-js/impl-trait.js @@ -39,8 +39,8 @@ const EXPECTED = [ { 'path': 'impl_trait', 'name': 'Aaaaaaa' }, ], 'in_args': [ - { 'path': 'impl_trait::Ccccccc', 'name': 'eeeeeee' }, { 'path': 'impl_trait::Ccccccc', 'name': 'fffffff' }, + { 'path': 'impl_trait::Ccccccc', 'name': 'eeeeeee' }, ], 'returned': [ { 'path': 'impl_trait', 'name': 'bbbbbbb' }, diff --git a/tests/rustdoc-js/type-parameters.js b/tests/rustdoc-js/type-parameters.js index e695f189bb672..e045409e507e5 100644 --- a/tests/rustdoc-js/type-parameters.js +++ b/tests/rustdoc-js/type-parameters.js @@ -1,20 +1,19 @@ // exact-check -// ignore-order const EXPECTED = [ { query: '-> trait:Some', others: [ - { path: 'foo', name: 'alef' }, { path: 'foo', name: 'alpha' }, + { path: 'foo', name: 'alef' }, ], }, { query: '-> generic:T', others: [ + { path: 'foo', name: 'beta' }, { path: 'foo', name: 'bet' }, { path: 'foo', name: 'alef' }, - { path: 'foo', name: 'beta' }, ], }, { @@ -44,38 +43,40 @@ const EXPECTED = [ { query: 'Other, Other', others: [ - { path: 'foo', name: 'other' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'generic:T', in_args: [ - { path: 'foo', name: 'bet' }, { path: 'foo', name: 'beta' }, - { path: 'foo', name: 'other' }, + { path: 'foo', name: 'bet' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'generic:Other', in_args: [ - { path: 'foo', name: 'bet' }, { path: 'foo', name: 'beta' }, - { path: 'foo', name: 'other' }, + { path: 'foo', name: 'bet' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'trait:Other', in_args: [ - { path: 'foo', name: 'other' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'Other', in_args: [ + // because function is called "other", it's sorted first + // even though it has higher type distance { path: 'foo', name: 'other' }, { path: 'foo', name: 'alternate' }, ],