Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve search relevance #8

Merged
merged 21 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions FORK.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,17 @@ To help keep things trackable, the following branch naming conventions are used:
By using the syntax ` ```mermaid ` in any markdown file, dbt entities can enhance
their catalog page with any mermaid.js diagram.

- Prioritise exact file name matches over fuzzy matches when conducting a search.

- Upstream PR: https://github.com/dbt-labs/dbt-docs/pull/503
- Upstream issue: https://github.com/dbt-labs/dbt-docs/issues/217
- Fork PR: https://github.com/PicnicSupermarket/dbt-docs/pull/8

Adds an additional step to the relevance calculation process. Now, a weight
value is also computed and stored for the name field. Finally, the results are
sorted first by overallNameWeight and then by overallWeight.


## Changelog entry example

```
Expand Down
8 changes: 4 additions & 4 deletions src/app/components/search/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ <h1>
<small><span>{{ results.length }}</span> search results</small>
</h1>
<input type="checkbox" id="name" ng-model="checkboxStatus.show_names" ng-click="filterResults(results, checkboxStatus)" style = "margin-left:10px;margin-right:5px">
<label for="name" style="margin-right:25px">Name</label>
<label for="name" style="margin-right:25px">Name</label>
<input type="checkbox" id="desc" ng-model="checkboxStatus.show_descriptions" ng-click = "filterResults(results, checkboxStatus)" style = "margin-right:5px">
<label for="desc" style="margin-right:25px;">Description</label>
<label for="desc" style="margin-right:25px;">Description</label>
<input type="checkbox" id="column" ng-model="checkboxStatus.show_columns" ng-click = "filterResults(results, checkboxStatus)" style = "margin-right:5px">
<label for="column" style="margin-right:25px;">Column</label>
<label for="column" style="margin-right:25px;">Column</label>
<input type="checkbox" id="column_description" ng-model="checkboxStatus.show_column_descriptions" ng-click = "filterResults(results, checkboxStatus)" style = "margin-right:5px">
<label for="column_description" style="margin-right:25px;">Column Description</label>
<input type="checkbox" id="code" ng-model="checkboxStatus.show_code" ng-click = "filterResults(results, checkboxStatus)" style = "margin-right:5px">
Expand All @@ -33,7 +33,7 @@ <h1>
<div class="app-details">
<div class="app-frame app-pad">
<div class="results">
<div ng-repeat="result in results | filter:limit_search | orderBy:'overallWeight':true track by result.model.unique_id"
<div ng-repeat="result in results | filter:limit_search track by result.model.unique_id"
data-ui-state="getState(result.model)" data-ui-state-params="{unique_id: result.model.unique_id}"
ng-click="onSelect()"
class="result search-result a">
Expand Down
15 changes: 7 additions & 8 deletions src/app/components/search/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,9 @@ angular
if(!_.some(_.values(checkboxStatus))){
return results;
}

let finalResults = [];
let fileIDs = [];

const {show_names, show_descriptions, show_columns, show_column_descriptions, show_code, show_tags} = checkboxStatus;
_.each(results, function(result){
_.each(result.matches, function(match){
Expand Down Expand Up @@ -94,18 +93,18 @@ angular
});

scope.shorten = function(text) {
if(text != null && text.trim().length > 0 && scope.query != null && scope.query.trim().length > 0){
let modified = text.replace(/\s+/g, ' ');
//choose the first word in the search as the anchor for shortening.
if(text != null && text.trim().length > 0 && scope.query != null && scope.query.trim().length > 0){
let modified = text.replace(/\s+/g, ' ');
//choose the first word in the search as the anchor for shortening.
//Escaping in case the first token is "*" or another reserved regex character
let first_token = escapeRegExp(getQueryTokens(scope.query)[0]);
let first_token = escapeRegExp(getQueryTokens(scope.query)[0]);
let indexOfInstance = modified.search(new RegExp(first_token));
let startIndex = (indexOfInstance - 75) < 0 ? 0 : indexOfInstance - 75;
let endIndex = (indexOfInstance + 75) > modified.length ? modified.length : indexOfInstance + 75;
let shortened = "..." + modified.substring(startIndex, endIndex) + "...";
return shortened;
}
return text;
return text;
}

scope.highlight = function(text) {
Expand All @@ -116,7 +115,7 @@ angular
//e.g. "hello WORLD" changes to "(hello)|(world)"
let query_segments = getQueryTokens(scope.query);
let escaped_segments = query_segments.map(segment => escapeRegExp(segment));
let highlight_words = "(" + escaped_segments.join(")|(") + ")";
let highlight_words = "(" + escaped_segments.join(")|(") + ")";
return $sce.trustAsHtml(text.replace(new RegExp(highlight_words, 'gi'), '<span class="search-result-match">$&</span>'));
}

Expand Down
71 changes: 1 addition & 70 deletions src/app/main/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ angular
var state_changed = true;
if (from_state == to_state && prev_node_id == cur_node_id) {
state_changed = false;
}
}

if (state_changed && params.unique_id) {
var tree = projectService.updateSelected(params.unique_id);
Expand All @@ -145,75 +145,6 @@ angular
}
});

$scope.$watch('search.query', function(q) {
$scope.search.results = assignSearchRelevance(projectService.search(q));
});

function assignSearchRelevance(results){
if($scope.search.query === "")
return results;
let criteriaArr = {
"name": 10,
"tags": 5,
"description": 3,
"raw_code": 2,
"columns": 1
};
_.each(results, function(result){
result.overallWeight = 0;
_.each(Object.keys(criteriaArr), function(criteria){
if(result.model[criteria] != undefined){
let count = 0;
let body = result.model[criteria];
let query = ($scope.search.query).toLowerCase();
if(criteria === "columns"){
_.each(body, function(column){
// there a spark bug where columns are missign from the catalog. That
// needs to be fixed outside of docs but this if != null check will
// allow docs to continue to function now and also when the bug is
// fixed.
// relevant issue: https://github.com/dbt-labs/dbt-spark/issues/295
if (column.name) {
let columnName = column.name.toLowerCase();
let index = 0;
while(index != -1){
index = columnName.indexOf(query, index);
if (index != -1) {
count++; index++;
}
}
}
});
}
else if(criteria === "tags"){
_.each(body, function(tag){
let tagName = tag.toLowerCase();
let index = 0;
while(index != -1){
index = tagName.indexOf(query, index);
if (index != -1) {
count++; index++;
}
}
});
}
else{
body = body.toLowerCase();
let index = 0;
while(index != -1){
index = body.indexOf(query, index);
if(index != -1){
count++; index++;
}
}
}
result.overallWeight += (count * criteriaArr[criteria]);
}
});
});
return results;
}

/*
INITIALIZE THE APPLICATION:
1. Set the selected model (if there is one) via the url
Expand Down
5 changes: 3 additions & 2 deletions src/app/services/project_service.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const angular = require('angular');
const $ = require('jquery');
const _ = require('lodash');
const { getQuoteChar } = require('./compat');
const { assignSearchRelevance } = require('./project_service_utils');

import merge from 'deepmerge';

Expand Down Expand Up @@ -334,7 +335,7 @@ angular
}

service.search = function(q) {
if (q.length == 0) {
if (q.length === 0) {
return _.map(service.project.searchable, function(model) {
return {
model: model,
Expand All @@ -353,7 +354,7 @@ angular
});
}
});
return res;
return assignSearchRelevance(res, q);
}

function clean_project_macros(macros, adapter) {
Expand Down
89 changes: 89 additions & 0 deletions src/app/services/project_service_utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
const _ = require('lodash');


function assignSearchRelevance(results, q) {
if(q === "") {
return results;
}
let criteriaArr = {
"name": 10,
"tags": 5,
"description": 3,
"raw_code": 2,
"columns": 1
};

_.each(results, function(result){
result.overallWeight = 0;
result.overallNameWeight = 0;
_.each(Object.keys(criteriaArr), function(criteria){
if(result.model[criteria] !== undefined){
let count = 0;
let body = result.model[criteria];
let query = (q).toLowerCase();
if(criteria === "columns"){
_.each(body, function(column){
// there a spark bug where columns are missign from the catalog. That
// needs to be fixed outside of docs but this if != null check will
// allow docs to continue to function now and also when the bug is
// fixed.
// relevant issue: https://github.com/dbt-labs/dbt-spark/issues/295
if (column.name) {
let columnName = column.name.toLowerCase();
let index = 0;
while(index !== -1){
index = columnName.indexOf(query, index);
if (index !== -1) {
count++; index++;
}
}
}
});
}
else if(criteria === "name"){
const calculateNameMatchWeight = (body, query) => {
if (body === query) return 10;
const lowerBody = body.toLowerCase();
if (lowerBody.startsWith(query)) return 5;
if (lowerBody.endsWith(query)) return 3;
if (lowerBody.includes(query)) return 1;
return 0;
};

count += calculateNameMatchWeight(body, (q).toLowerCase());
result.overallNameWeight += (count * criteriaArr[criteria]);

}
else if(criteria === "tags"){
_.each(body, function(tag){
let tagName = tag.toLowerCase();
let index = 0;
while(index != -1){
index = tagName.indexOf(query, index);
if (index != -1) {
count++; index++;
}
}
});
}
else{
body = body.toLowerCase();
let index = 0;
while(index != -1){
index = body.indexOf(query, index);
if(index != -1){
count++; index++;
}
}
}
result.overallWeight += (count * criteriaArr[criteria]);
}
});
});
results.sort((a, b) => b.overallNameWeight - a.overallNameWeight || b.overallWeight - a.overallWeight);
return results
}

module.exports = {
assignSearchRelevance,
}
42 changes: 42 additions & 0 deletions src/app/services/project_service_utils.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
const proj_utils = require('./project_service_utils')

describe('Project Service Tests', () => {
describe('assignSearchRelevance', () => {
let results;

beforeEach(() => {
results = [
{ model: { name: 'dm_test', tags: ["dm", "test", "test model"], columns: {"id": {"name": "id"}}, raw_code: "SELECT test from test" }, overallWeight: 0, overallNameWeight: 0 },
{ model: { name: 'ft_test_person', tags: ["test", "ft", "person"], columns: {"id": {"name": "id"}}, raw_code: "SELECT test, test from test" }, overallWeight: 0, overallNameWeight: 0 },
{ model: { name: 'test_event', tags: ["test", "event"], columns: {"test": {"name": "test"}} , raw_code: "SELECT id from abc" }, overallWeight: 0, overallNameWeight: 0 },
{ model: { name: 'test_log', tags: ["test", "log"], overallWeight: 0, overallNameWeight: 0 }},
{ model: { name: 'test', tags: [], columns: {} }, overallWeight: 0, overallNameWeight: 0 },
{ model: { name: 'n/a', tags: [], columns: {} }, overallWeight: 0, overallNameWeight: 0 },
];
});

it('should prioritize exact name matches', () => {
proj_utils.assignSearchRelevance(results, 'test');
expect(results[0].model.name).toBe('test');
expect(results[0].overallNameWeight).toBe(100);
expect(results[0].overallWeight).toBe(100);

expect(results[1].model.name).toBe('test_event');
expect(results[1].overallNameWeight).toBe(50);
expect(results[1].overallWeight).toBe(56);

expect(results[2].model.name).toBe('test_log');
expect(results[2].overallNameWeight).toBe(50);
expect(results[2].overallWeight).toBe(55);

expect(results[3].model.name).toBe('dm_test');
expect(results[3].overallNameWeight).toBe(30);
expect(results[3].overallWeight).toBe(44);

expect(results[4].model.name).toBe('ft_test_person');
expect(results[4].overallNameWeight).toBe(10);
expect(results[4].overallWeight).toBe(21);
});

});
});