Skip to content

Commit

Permalink
Desktop: Fixes #3153: Make GotoAnyting work with East Asian charactors (
Browse files Browse the repository at this point in the history
  • Loading branch information
ylc395 committed Jun 3, 2020
1 parent 720494d commit 5082181
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 8 deletions.
16 changes: 14 additions & 2 deletions CliClient/tests/services_SearchEngine.js
Original file line number Diff line number Diff line change
Expand Up @@ -257,22 +257,29 @@ describe('services_SearchEngine', function() {

it('should support queries with Chinese characters', asyncTest(async () => {
let rows;
const n1 = await Note.save({ title: '我是法国人' });
const n1 = await Note.save({ title: '我是法国人', body: '中文测试' });

await engine.syncTables();

expect((await engine.search('我')).length).toBe(1);
expect((await engine.search('法国人')).length).toBe(1);
expect((await engine.search('法国人*'))[0].fields.sort()).toEqual(['body', 'title']); // usually assume that keyword was matched in body
expect((await engine.search('测试')).length).toBe(1);
expect((await engine.search('测试'))[0].fields).toEqual(['body']);
expect((await engine.search('测试*'))[0].fields).toEqual(['body']);
}));

it('should support queries with Japanese characters', asyncTest(async () => {
let rows;
const n1 = await Note.save({ title: '私は日本語を話すことができません' });
const n1 = await Note.save({ title: '私は日本語を話すことができません', body: 'テスト' });

await engine.syncTables();

expect((await engine.search('日本')).length).toBe(1);
expect((await engine.search('できません')).length).toBe(1);
expect((await engine.search('できません*'))[0].fields.sort()).toEqual(['body', 'title']); // usually assume that keyword was matched in body
expect((await engine.search('テスト'))[0].fields.sort()).toEqual(['body']);

}));

it('should support queries with Korean characters', asyncTest(async () => {
Expand Down Expand Up @@ -302,10 +309,15 @@ describe('services_SearchEngine', function() {
await engine.syncTables();

expect((await engine.search('title:你好*')).length).toBe(1);
expect((await engine.search('title:你好*'))[0].fields).toEqual(['title']);
expect((await engine.search('body:法国人')).length).toBe(1);
expect((await engine.search('body:法国人'))[0].fields).toEqual(['body']);
expect((await engine.search('body:你好')).length).toBe(0);
expect((await engine.search('title:你好 body:法国人')).length).toBe(1);
expect((await engine.search('title:你好 body:法国人'))[0].fields.sort()).toEqual(['body', 'title']);
expect((await engine.search('title:你好 body:bla')).length).toBe(0);
expect((await engine.search('title:你好 我是')).length).toBe(1);
expect((await engine.search('title:你好 我是'))[0].fields.sort()).toEqual(['body', 'title']);
expect((await engine.search('title:bla 我是')).length).toBe(0);

// For non-alpha char, only the first field is looked at, the following ones are ignored
Expand Down
39 changes: 33 additions & 6 deletions ReactNativeClient/lib/services/SearchEngine.js
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,37 @@ class SearchEngine {
return occurenceCount / spread;
}

processResults_(rows, parsedQuery) {
processBasicSearchResults_(rows, parsedQuery) {
const valueRegexs = parsedQuery.keys.includes('_') ? parsedQuery.terms['_'].map(term => term.valueRegex || term.value) : [];
const isTitleSearch = parsedQuery.keys.includes('title');
const isOnlyTitle = parsedQuery.keys.length === 1 && isTitleSearch;

for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const offsets = row.offsets.split(' ').map(o => Number(o));
row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);
row.fields = this.fieldNamesFromOffsets_(offsets);
const testTitle = regex => new RegExp(regex, 'ig').test(row.title);
const matchedFields = {
title: isTitleSearch || valueRegexs.some(testTitle),
body: !isOnlyTitle,
};

row.fields = Object.keys(matchedFields).filter(key => matchedFields[key]);
row.weight = 0;
}
}

processResults_(rows, parsedQuery, isBasicSearchResults = false) {
if (isBasicSearchResults) {
this.processBasicSearchResults_(rows, parsedQuery);
} else {
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const offsets = row.offsets.split(' ').map(o => Number(o));
row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);
row.fields = this.fieldNamesFromOffsets_(offsets);
}
}


rows.sort((a, b) => {
if (a.fields.includes('title') && !b.fields.includes('title')) return -1;
if (!a.fields.includes('title') && b.fields.includes('title')) return +1;
Expand Down Expand Up @@ -383,6 +406,8 @@ class SearchEngine {
const searchOptions = {};

for (const key of parsedQuery.keys) {
if (parsedQuery.terms[key].length === 0) continue;

const term = parsedQuery.terms[key][0].value;
if (key === '_') searchOptions.anywherePattern = `*${term}*`;
if (key === 'title') searchOptions.titlePattern = `*${term}*`;
Expand Down Expand Up @@ -415,18 +440,20 @@ class SearchEngine {
query = this.normalizeText_(query);

const searchType = this.determineSearchType_(query, options.searchType);
const parsedQuery = this.parseQuery(query);

if (searchType === SearchEngine.SEARCH_TYPE_BASIC) {
// Non-alphabetical languages aren't support by SQLite FTS (except with extensions which are not available in all platforms)
return this.basicSearch(query);
const rows = await this.basicSearch(query);
this.processResults_(rows, parsedQuery, true);
return rows;
} else { // SEARCH_TYPE_FTS
// FTS will ignore all special characters, like "-" in the index. So if
// we search for "this-phrase" it won't find it because it will only
// see "this phrase" in the index. Because of this, we remove the dashes
// when searching.
// https://github.com/laurent22/joplin/issues/1075#issuecomment-459258856
query = query.replace(/-/g, ' ');
const parsedQuery = this.parseQuery(query);
const sql = `
SELECT
notes_fts.id,
Expand Down

0 comments on commit 5082181

Please sign in to comment.