Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(search): Filter out "removed" entities from autocomplete #2781

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions datahub-frontend/app/react/analytics/AnalyticsService.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package react.analytics;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.linkedin.metadata.dao.exception.ESQueryException;
import graphql.BarSegment;
import graphql.DateInterval;
Expand Down Expand Up @@ -64,13 +65,12 @@ public AnalyticsService(final RestHighLevelClient elasticClient) {
public List<NamedLine> getTimeseriesChart(String indexName, DateRange dateRange, DateInterval granularity,
Optional<String> dimension, // Length 1 for now
Map<String, List<String>> filters, Optional<String> uniqueOn) {

_logger.debug(
String.format("Invoked getTimeseriesChart with indexName: %s, dateRange: %s, granularity: %s, dimension: %s,",
indexName, dateRange, granularity, dimension)
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, Optional.of(dateRange));
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), Optional.of(dateRange));

AggregationBuilder dateHistogram = AggregationBuilders.dateHistogram(DATE_HISTOGRAM)
.field("timestamp")
Expand Down Expand Up @@ -123,8 +123,7 @@ public List<NamedBar> getBarChart(String indexName, Optional<DateRange> dateRang
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

assert (dimensions.size() == 1 || dimensions.size() == 2);

AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);

AggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(dimensions.get(0)).missing(NA);
if (dimensions.size() == 2) {
Expand Down Expand Up @@ -170,13 +169,12 @@ private List<BarSegment> extractBarSegmentsFromAggregations(Aggregations aggrega

public List<Row> getTopNTableChart(String indexName, Optional<DateRange> dateRange, String groupBy,
Map<String, List<String>> filters, Optional<String> uniqueOn, int maxRows) {

_logger.debug(
String.format("Invoked getTopNTableChart with indexName: %s, dateRange: %s, groupBy: %s",
indexName, dateRange, groupBy)
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);

TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(groupBy).size(maxRows);
if (uniqueOn.isPresent()) {
Expand All @@ -201,8 +199,8 @@ public List<Row> getTopNTableChart(String indexName, Optional<DateRange> dateRan
}

public int getHighlights(String indexName, Optional<DateRange> dateRange, Map<String, List<String>> filters,
Optional<String> uniqueOn) {
AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn) {
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange);
uniqueOn.ifPresent(s -> filteredAgg.subAggregation(getUniqueQuery(s)));

SearchRequest searchRequest = constructSearchRequest(indexName, filteredAgg);
Expand Down Expand Up @@ -239,9 +237,11 @@ private Filter executeAndExtract(SearchRequest searchRequest) {
}
}

private AggregationBuilder getFilteredAggregation(Map<String, List<String>> filters, Optional<DateRange> dateRange) {
private AggregationBuilder getFilteredAggregation(Map<String, List<String>> mustFilters,
Map<String, List<String>> mustNotFilters, Optional<DateRange> dateRange) {
BoolQueryBuilder filteredQuery = QueryBuilders.boolQuery();
filters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values)));
mustFilters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values)));
mustNotFilters.forEach((key, values) -> filteredQuery.mustNot(QueryBuilders.termsQuery(key, values)));
dateRange.ifPresent(range -> filteredQuery.must(dateRangeQuery(range)));
return AggregationBuilders.filter(FILTERED, filteredQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ private List<Highlight> getHighlights() {

int weeklyActiveUsers =
_analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRange),
ImmutableMap.of(), Optional.of("browserId"));
ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId"));

int weeklyActiveUsersLastWeek =
_analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRangeLastWeek),
ImmutableMap.of(), Optional.of("browserId"));
ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId"));

String bodyText = "";
if (weeklyActiveUsersLastWeek > 0) {
Expand All @@ -79,10 +79,11 @@ private List<Highlight> getHighlights() {
}

private Highlight getEntityMetadataStats(String title, String index) {
int numEntities = _analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of(), Optional.empty());
int numEntities = _analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty());
int numEntitiesWithOwners =
_analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of("hasOwners", ImmutableList.of("true")),
Optional.empty());
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty());
String bodyText = "";
if (numEntities > 0) {
double percentChange = 100.0 * numEntitiesWithOwners / numEntities;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
Expand Down Expand Up @@ -65,13 +66,17 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable String fi
}

private QueryBuilder getQuery(@Nonnull String query, @Nullable String field) {
BoolQueryBuilder finalQuery = QueryBuilders.boolQuery();
// Search for exact matches with higher boost and ngram matches
List<String> fieldNames = getAutocompleteFields(field).stream()
.flatMap(fieldName -> Stream.of(fieldName, fieldName + ".ngram"))
.collect(Collectors.toList());
MultiMatchQueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(query, fieldNames.toArray(new String[0]));
queryBuilder.analyzer(ANALYZER);
return queryBuilder;
MultiMatchQueryBuilder autocompleteQueryBuilder =
QueryBuilders.multiMatchQuery(query, fieldNames.toArray(new String[0]));
autocompleteQueryBuilder.analyzer(ANALYZER);
finalQuery.should(autocompleteQueryBuilder);
finalQuery.mustNot(QueryBuilders.matchQuery("removed", true));
return finalQuery;
}

// Get HighlightBuilder to highlight the matched field
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import java.util.List;
import java.util.Map;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
Expand All @@ -22,11 +24,17 @@ public void testDefaultAutocompleteRequest() {
SearchRequest autocompleteRequest = handler.getSearchRequest("input", null, null, 10);
SearchSourceBuilder sourceBuilder = autocompleteRequest.source();
assertEquals(sourceBuilder.size(), 10);
MultiMatchQueryBuilder queryBuilder = (MultiMatchQueryBuilder) sourceBuilder.query();
Map<String, Float> queryFields = queryBuilder.fields();
BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query();
assertEquals(query.should().size(), 1);
MultiMatchQueryBuilder matchQuery = (MultiMatchQueryBuilder) query.should().get(0);
Map<String, Float> queryFields = matchQuery.fields();
assertTrue(queryFields.containsKey("keyPart1"));
assertTrue(queryFields.containsKey("keyPart1.ngram"));
assertEquals(queryBuilder.analyzer(), "word_delimited");
assertEquals(matchQuery.analyzer(), "word_delimited");
assertEquals(query.mustNot().size(), 1);
MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0);
assertEquals(removedFilter.fieldName(), "removed");
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List<HighlightBuilder.Field> highlightedFields = highlightBuilder.fields();
assertEquals(highlightedFields.size(), 2);
Expand All @@ -40,11 +48,16 @@ public void testAutocompleteRequestWithField() {
SearchRequest autocompleteRequest = handler.getSearchRequest("input", "field", null, 10);
SearchSourceBuilder sourceBuilder = autocompleteRequest.source();
assertEquals(sourceBuilder.size(), 10);
MultiMatchQueryBuilder queryBuilder = (MultiMatchQueryBuilder) sourceBuilder.query();
Map<String, Float> queryFields = queryBuilder.fields();
BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query();
assertEquals(query.should().size(), 1);
MultiMatchQueryBuilder matchQuery = (MultiMatchQueryBuilder) query.should().get(0);
Map<String, Float> queryFields = matchQuery.fields();
assertTrue(queryFields.containsKey("field"));
assertTrue(queryFields.containsKey("field.ngram"));
assertEquals(queryBuilder.analyzer(), "word_delimited");
assertEquals(matchQuery.analyzer(), "word_delimited");
MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0);
assertEquals(removedFilter.fieldName(), "removed");
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List<HighlightBuilder.Field> highlightedFields = highlightBuilder.fields();
assertEquals(highlightedFields.size(), 2);
Expand Down