Skip to content

Commit

Permalink
Fix highlight regex, use same pattern in the default search factory f…
Browse files Browse the repository at this point in the history
…or matching
  • Loading branch information
Dananji committed Jun 26, 2024
1 parent 167e929 commit d2a7178
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 63 deletions.
5 changes: 3 additions & 2 deletions src/services/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ import { useRef, useEffect, useState, useMemo, useCallback, useContext } from 'r
import { PlayerDispatchContext } from '../context/player-context';
import { ManifestStateContext } from '../context/manifest-context';
import { getSearchService } from './iiif-parser';
import { markMatchedParts, getMatchedTranscriptLines, parseContentSearchResponse, getHitCountForCue } from './transcript-parser';
import { markMatchedParts, getMatchedTranscriptLines, parseContentSearchResponse, getHitCountForCue, buildQueryRegex } from './transcript-parser';

export const defaultMatcherFactory = (items) => {
const mappedItems = items.map(item => item.text.toLocaleLowerCase());
return (query, abortController) => {
const queryRegex = buildQueryRegex(query);
const qStr = query.trim().toLocaleLowerCase();
const matchedItems = mappedItems.reduce((results, mappedText, idx) => {
const matchOffset = mappedText.indexOf(qStr);
const matchOffset = mappedText.search(queryRegex);
if (matchOffset !== -1) {
const matchedItem = items[idx];
const matchCount = getHitCountForCue(matchedItem.text, query);
Expand Down
73 changes: 14 additions & 59 deletions src/services/search.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,6 @@ describe('useFilteredTranscripts', () => {

describe('custom behavior', () => {
describe('custom matcherFactory', () => {
test('matcher factory can be customized to customize how matches are found', async () => {
const matcherFactory = (items) => {
const mappedItems = items.map(item => ({ ...item, text: item.text.replaceAll(' ', '') }));
return defaultMatcherFactory(mappedItems);
};
const { resultRef, Component } = createTest({ matcherFactory, query: 'theparty' });

render(Component);
await waitFor(() => expect(resultRef.current.matchingIds).toEqual([0, 8]));
});
test('matcher factory can create an async matcher', async () => {
const matcherFactory = (items) => {
const matcher = defaultMatcherFactory(items);
Expand All @@ -104,7 +94,7 @@ describe('useFilteredTranscripts', () => {
};
const { resultRef, Component } = createTest({ matcherFactory, query: 'Gatsby' });
render(Component);
await waitFor(() => expect(resultRef.current.matchingIds).toEqual([1, 4, 5, 7]));
await waitFor(() => expect(resultRef.current.matchingIds).toEqual([5, 7]));
});
});

Expand All @@ -116,8 +106,8 @@ describe('useFilteredTranscripts', () => {
matchesOnly: true
});
render(Component);
await waitFor(() => expect(resultRef.current.ids).toEqual([4, 1, 5, 7]));
expect(resultRef.current.matchingIds).toEqual([4, 1, 5, 7]);
await waitFor(() => expect(resultRef.current.ids).toEqual([5, 7]));
expect(resultRef.current.matchingIds).toEqual([5, 7]);
});
test('without matchesOnly, ids will also be sorted', async () => {
const { resultRef, Component } = createTest({
Expand Down Expand Up @@ -146,7 +136,7 @@ describe('useFilteredTranscripts', () => {
});
render(Component);
await waitFor(() => expect(resultRef.current.ids).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8]));
expect(resultRef.current.matchingIds).toEqual([4, 1, 5, 7]);
expect(resultRef.current.matchingIds).toEqual([5, 7]);
});

});
Expand All @@ -166,27 +156,21 @@ describe('useFilteredTranscripts', () => {
test('when the search query is set, matchingIds will contain ids of matches', async () => {
const { resultRef, Component } = createTest({ query: 'Gatsby' });
render(Component);
await waitFor(() => expect(resultRef.current.matchingIds).toEqual([1, 4, 5, 7]));
await waitFor(() => expect(resultRef.current.matchingIds).toEqual([5, 7]));
});
test('when matchesOnly is true, only matching results are returned', async () => {
const { resultRef, Component } = createTest({ query: 'Gatsby', matchesOnly: true });
render(Component);
await waitFor(() => expect(resultRef.current.ids).toEqual([1, 4, 5, 7]));
await waitFor(() => expect(resultRef.current.ids).toEqual([5, 7]));
});
test('results included in the match set will include a match property for highlighting matches', async () => {
const { resultRef, Component } = createTest({ query: 'Gatsby' });
render(Component);
await waitFor(() => {
expect(resultRef.current.results[1].match).toEqual(
'I believe that on the first night I went to <span class="ramp--transcript_highlight">Gatsby</span>\'s house'
expect(resultRef.current.results[5].match).toEqual(
'Once there they were introduced by somebody who knew <span class="ramp--transcript_highlight">Gatsby</span>,'
);
});
expect(resultRef.current.results[4].match).toEqual(
'and somehow they ended up at <span class="ramp--transcript_highlight">Gatsby</span>\'s door.'
);
expect(resultRef.current.results[5].match).toEqual(
'Once there they were introduced by somebody who knew <span class="ramp--transcript_highlight">Gatsby</span>,'
);
expect(resultRef.current.results[7].match).toEqual(
'Sometimes they came and went without having met <span class="ramp--transcript_highlight">Gatsby</span> at all,'
);
Expand All @@ -201,28 +185,6 @@ describe('useFilteredTranscripts', () => {
id: 'http://example.com/1/search?q=bungle',
type: 'AnnotationPage',
items: [
{
id: 'http://example.com/canvas/1/search/1',
type: 'Annotation',
motivation: 'supplementing',
target: "http://example.com/canvas/1/transcript/1#t=00:01:11.900,00:01:22.000",
body: {
type: 'TextualBody',
value: "I believe that on the first night I went to <em>Gatsby</em>\'s house",
format: 'text/plain'
}
},
{
id: 'http://example.com/canvas/1/search/2',
type: 'Annotation',
motivation: 'supplementing',
target: "http://example.com/canvas/1/transcript/1#t=00:01:36.400,00:01:42.500",
body: {
type: 'TextualBody',
value: "and somehow they ended up at <em>Gatsby</em>\'s door.",
format: 'text/plain'
}
},
{
id: 'http://example.com/canvas/1/search/3',
type: 'Annotation',
Expand Down Expand Up @@ -273,42 +235,35 @@ describe('useFilteredTranscripts', () => {
const { resultRef, Component } = createTest({ matcherFactory, query: 'Gatsby' });
render(Component);
await waitFor(() => {
expect(resultRef.current.matchingIds).toEqual([1, 4, 5, 7]);
expect(resultRef.current.matchingIds).toEqual([5, 7]);
expect(resultRef.current.counts).toEqual([{
transcriptURL: 'http://example.com/canvas/1/transcript/1',
numberOfHits: 4
numberOfHits: 2
}]);
});
});
test('when matchesOnly is true, only matching results are returned', async () => {
const { resultRef, Component } = createTest({ matcherFactory, query: 'Gatsby', matchesOnly: true });
render(Component);
await waitFor(() => {
expect(resultRef.current.matchingIds).toEqual([1, 4, 5, 7]);
expect(resultRef.current.matchingIds).toEqual([5, 7]);
expect(resultRef.current.counts).toEqual([{
transcriptURL: 'http://example.com/canvas/1/transcript/1',
numberOfHits: 4
numberOfHits: 2
}]);
});
});
test('results included in the match set will include a match property for highlighting matches', async () => {
const { resultRef, Component } = createTest({ matcherFactory, query: 'Gatsby' });
render(Component);
await waitFor(() => {
expect(resultRef.current.results[1].match).toEqual(
'I believe that on the first night I went to <span class="ramp--transcript_highlight">Gatsby</span>\'s house'
expect(resultRef.current.results[5].match).toEqual(
'Once there they were introduced by somebody who knew <span class="ramp--transcript_highlight">Gatsby</span>,'
);
});
expect(resultRef.current.results[4].match).toEqual(
'and somehow they ended up at <span class="ramp--transcript_highlight">Gatsby</span>\'s door.'
);
expect(resultRef.current.results[5].match).toEqual(
'Once there they were introduced by somebody who knew <span class="ramp--transcript_highlight">Gatsby</span>,'
);
expect(resultRef.current.results[7].match).toEqual(
'Sometimes they came and went without having met <span class="ramp--transcript_highlight">Gatsby</span> at all,'
);

});
});
});
Expand Down
16 changes: 14 additions & 2 deletions src/services/transcript-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ export const getMatchedTranscriptLines = (searchHits, query, transcripts) => {
* @returns matched cue with HTML tags added for marking the hightlight
*/
export const markMatchedParts = (text, query) => {
const queryRegex = new RegExp(String.raw`${query}`, 'gi');
const queryRegex = buildQueryRegex(query);
return text.replace(queryRegex, `<span class="ramp--transcript_highlight">$&</span>`);
};

Expand All @@ -876,13 +876,25 @@ export const getHitCountForCue = (text, query, hasHighlight = false) => {
Use regex with any punctuation followed by a white space to split the query.
e.g. query: Mr. bungle => search response: <em>Mr</em>. <em>Bungle</em>
*/
const partialQ = query.split(/[.,:;!?]\s/)[0];
const partialQ = query.split(/[.,!?;:]\s/)[0];
const hitTerm = hasHighlight ? `<em>${partialQ}</em>` : partialQ;
const hightlighedTerm = new RegExp(String.raw`${hitTerm}`, 'gi');
const hitCount = [...text.matchAll(hightlighedTerm)]?.length;
return hitCount;
};

/**
* Build a regular expression to omit matches including;
* - succeeding characters to the entered query
* - word contractions when query is used with auxiliary verbs
* @param {String} query search query entered by user
* @returns a regular expression
*/
export const buildQueryRegex = (query) => {
const queryRegex = new RegExp(String.raw`\b${query}\b(?=[\s.,!?;:]|$)`, 'gi');
return queryRegex;
};

// TODO:: Could be used for marking search hits in Word Doc transcripts?
// export const splitIntoElements = (htmlContent) => {
// // Create a temporary DOM element to parse the HTML
Expand Down

0 comments on commit d2a7178

Please sign in to comment.