Skip to content

Commit

Permalink
Add search index sort multi field
Browse files Browse the repository at this point in the history
The default sorting of the search index could be more intuitive. For
example, lowercase letters follow uppercase letters, which leads to
funny results that by sorting ascending, the letter `a` comes after `Z`.

By introducing a multi-field `sort` for selected fields, it is
possible to normalize the string to lowercase, and with asciifolding,
better results can be achieved.

An even better approach would be to use `icu_collation` filter, which
can be configured to use a collation table for a specific language.

Signed-off-by: Gregor Eichelberger <gregor.eichelberger@tuwien.ac.at>
  • Loading branch information
geichelberger committed Nov 6, 2023
1 parent 8982b71 commit 814274b
Show file tree
Hide file tree
Showing 9 changed files with 191 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,23 @@ public SearchQuery withSortOrder(String field, Order order) {
return this;
}

/**
* Sort the result set by the sort multi field and the given order. The insertion-order is kept.
*
* @param field
* the field name, must not be {@code null}
* @param order
* the order direction, must not be {@code null}
* @return the updated search query
*/
public SearchQuery withNormalizerSortOrder(String field, Order order) {
sortOrders.put(
requireNonNull(field).concat(IndexSchema.NORMALIZER_SORT_FIELD_NAME_EXTENSION),
requireNonNull(order)
);
return this;
}

@Override
public Map<String, Order> getSortOrders() {
return Collections.unmodifiableMap(sortOrders);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,7 @@ public interface IndexSchema {
/** Accumulative text field with analysis targeted for fuzzy search */
String TEXT_FUZZY = "text" + FUZZY_FIELDNAME_EXTENSION;

/** Extension for sort field names */
String NORMALIZER_SORT_FIELD_NAME_EXTENSION = ".sort";

}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@

"object": { "type" : "text", "index" : false, "store" : true },

"title": { "type" : "keyword" },
"title": {
"type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"start_date": { "type" : "keyword" },

Expand All @@ -23,15 +31,30 @@

"contributor": { "type" : "keyword" },

"presenter": { "type" : "keyword" },
"presenter": {
"type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"technical_presenters": { "type" : "keyword" },

"subject": { "type" : "keyword" },

"description": { "type" : "keyword" },

"location": { "type" : "keyword" },
"location": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"agent_id": { "type" : "keyword" },

Expand All @@ -41,7 +64,14 @@

"series_id": { "type" : "keyword" },

"series_name": { "type" : "keyword" },
"series_name": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"source": { "type" : "keyword" },

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
"lowercase"
]
}
},
"normalizer": {
"sort_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase", "asciifolding"]
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,103 @@

"object": { "type" : "text", "index": false, "store" : true },

"title": { "type" : "keyword" },
"title": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"series_json": { "type" : "keyword" },

"description": { "type" : "keyword" },

"subject": { "type" : "keyword" },

"language": { "type" : "keyword" },

"creator": { "type" : "keyword" },

"license": { "type" : "keyword" },
"description": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"subject": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"language": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"creator": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"license": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"access_policy": { "type" : "text" },

"managed_acl": { "type" : "keyword" },

"createdDateTime": { "type" : "date", "format" : "yyyy-MM-dd'T'HH:mm:ss'Z'" },

"organizers": { "copy_to" : "organizer", "type" : "keyword" },

"contributors": { "copy_to" : "contributor", "type" : "keyword" },

"publisher": { "type" : "keyword" },

"rights_holder": { "type" : "keyword" },
"organizers": { "copy_to" : "organizer", "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"contributors": { "copy_to" : "contributor", "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"publisher": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"rights_holder": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"theme": { "type" : "long" },

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,32 @@

"default": { "type" : "boolean" },

"creator": { "type" : "keyword" },

"name": { "type" : "keyword" },

"description": { "type" : "keyword" },
"creator": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"name": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"description": { "type" : "keyword",
"fields" : {
"sort": {
"type": "keyword",
"normalizer": "sort_normalizer"
}
}
},

"bumper_active": { "type" : "boolean" },

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1077,7 +1077,7 @@ public Order getDateSortOrder() {
* @return the enhanced search query
*/
public EventSearchQuery sortByTitle(Order order) {
withSortOrder(EventIndexSchema.TITLE, order);
withNormalizerSortOrder(EventIndexSchema.TITLE, order);
return this;
}

Expand All @@ -1098,7 +1098,7 @@ public Order getTitleSortOrder() {
* @return the enhanced search query
*/
public EventSearchQuery sortByPresenter(Order order) {
withSortOrder(EventIndexSchema.PRESENTER, order);
withNormalizerSortOrder(EventIndexSchema.PRESENTER, order);
return this;
}

Expand All @@ -1119,7 +1119,7 @@ public Order getPresentersSortOrder() {
* @return the updated query
*/
public EventSearchQuery sortByLocation(Order order) {
withSortOrder(EventIndexSchema.LOCATION, order);
withNormalizerSortOrder(EventIndexSchema.LOCATION, order);
return this;
}

Expand All @@ -1140,7 +1140,7 @@ public Order getLocationSortOrder() {
* @return the updated query
*/
public EventSearchQuery sortBySeriesName(Order order) {
withSortOrder(EventIndexSchema.SERIES_NAME, order);
withNormalizerSortOrder(EventIndexSchema.SERIES_NAME, order);
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ public Order getSeriesIdentifierSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortBySubject(Order order) {
withSortOrder(SeriesIndexSchema.SUBJECT, order);
withNormalizerSortOrder(SeriesIndexSchema.SUBJECT, order);
return this;
}

Expand All @@ -554,7 +554,7 @@ public Order getSeriesSubjectSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByCreator(Order order) {
withSortOrder(SeriesIndexSchema.CREATOR, order);
withNormalizerSortOrder(SeriesIndexSchema.CREATOR, order);
return this;
}

Expand All @@ -575,7 +575,7 @@ public Order getSeriesCreatorSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByPublishers(Order order) {
withSortOrder(SeriesIndexSchema.PUBLISHERS, order);
withNormalizerSortOrder(SeriesIndexSchema.PUBLISHERS, order);
return this;
}

Expand All @@ -596,7 +596,7 @@ public Order getSeriesPublishersSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByDescription(Order order) {
withSortOrder(SeriesIndexSchema.DESCRIPTION, order);
withNormalizerSortOrder(SeriesIndexSchema.DESCRIPTION, order);
return this;
}

Expand All @@ -617,7 +617,7 @@ public Order getSeriesDescriptionSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByLanguage(Order order) {
withSortOrder(SeriesIndexSchema.LANGUAGE, order);
withNormalizerSortOrder(SeriesIndexSchema.LANGUAGE, order);
return this;
}

Expand All @@ -638,7 +638,7 @@ public Order getSeriesLanguageSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByRightsHolder(Order order) {
withSortOrder(SeriesIndexSchema.RIGHTS_HOLDER, order);
withNormalizerSortOrder(SeriesIndexSchema.RIGHTS_HOLDER, order);
return this;
}

Expand All @@ -659,7 +659,7 @@ public Order getSeriesRightsHolderSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByLicense(Order order) {
withSortOrder(SeriesIndexSchema.LICENSE, order);
withNormalizerSortOrder(SeriesIndexSchema.LICENSE, order);
return this;
}

Expand All @@ -680,7 +680,7 @@ public Order getSeriesLicenseSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByContributors(Order order) {
withSortOrder(SeriesIndexSchema.CONTRIBUTORS, order);
withNormalizerSortOrder(SeriesIndexSchema.CONTRIBUTORS, order);
return this;
}

Expand Down Expand Up @@ -743,7 +743,7 @@ public Order getSeriesDateSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByOrganizers(Order order) {
withSortOrder(SeriesIndexSchema.ORGANIZERS, order);
withNormalizerSortOrder(SeriesIndexSchema.ORGANIZERS, order);
return this;
}

Expand All @@ -764,7 +764,7 @@ public Order getSeriesOrganizersSortOrder() {
* @return the enhanced search query
*/
public SeriesSearchQuery sortByTitle(Order order) {
withSortOrder(SeriesIndexSchema.TITLE, order);
withNormalizerSortOrder(SeriesIndexSchema.TITLE, order);
return this;
}

Expand Down
Loading

0 comments on commit 814274b

Please sign in to comment.