Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 41fc0dc

Browse files
bors[bot]KerollmopsManyTheFish
authored
Merge #309
309: Sort at query time r=Kerollmops a=Kerollmops This PR: - Makes the `Asc/Desc` criteria work with strings too, it first returns documents ordered by numbers then by strings, and finally the documents that can't be ordered. Note that it is lexicographically ordered and not ordered by character, which means that it doesn't know about wide and short characters i.e. `a`, `丹`, `▲`. - Changes the syntax for the `Asc/Desc` criterion by now using a colon to separate the name and the order i.e. `title:asc`, `price:desc`. - Add the `Sort` criterion at the third position in the ranking rules by default. - Add the `sort_criteria` method to the `Search` builder struct to let the users define the `Asc/Desc` sortable attributes they want to use at query time. Note that we need to check that the fields are registered in the sortable attributes before performing the search. - Introduce a new `InvalidSortableAttribute` user error that is raised when the sort criteria declared at query time are not part of the sortable attributes. - `@ManyTheFish` introduced integration tests for the dynamic Sort criterion. Fixes #305. Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: many <maxime@meilisearch.com>
2 parents 198c416 + d1df0d2 commit 41fc0dc

File tree

17 files changed

+701
-148
lines changed

17 files changed

+701
-148
lines changed

benchmarks/benches/search_songs.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ fn bench_songs(c: &mut criterion::Criterion) {
5252
milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
5353
let default_criterion = default_criterion.iter().map(|s| s.as_str());
5454
let asc_default: Vec<&str> =
55-
std::iter::once("asc(released-timestamp)").chain(default_criterion.clone()).collect();
55+
std::iter::once("released-timestamp:asc").chain(default_criterion.clone()).collect();
5656
let desc_default: Vec<&str> =
57-
std::iter::once("desc(released-timestamp)").chain(default_criterion.clone()).collect();
57+
std::iter::once("released-timestamp:desc").chain(default_criterion.clone()).collect();
5858

5959
let basic_with_quote: Vec<String> = BASE_CONF
6060
.queries
@@ -118,12 +118,12 @@ fn bench_songs(c: &mut criterion::Criterion) {
118118
},
119119
utils::Conf {
120120
group_name: "asc",
121-
criterion: Some(&["asc(released-timestamp)"]),
121+
criterion: Some(&["released-timestamp:desc"]),
122122
..BASE_CONF
123123
},
124124
utils::Conf {
125125
group_name: "desc",
126-
criterion: Some(&["desc(released-timestamp)"]),
126+
criterion: Some(&["released-timestamp:desc"]),
127127
..BASE_CONF
128128
},
129129

http-ui/src/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ mod tests {
10301030
displayed_attributes: Setting::Set(vec!["name".to_string()]),
10311031
searchable_attributes: Setting::Set(vec!["age".to_string()]),
10321032
filterable_attributes: Setting::Set(hashset! { "age".to_string() }),
1033-
criteria: Setting::Set(vec!["asc(age)".to_string()]),
1033+
criteria: Setting::Set(vec!["age:asc".to_string()]),
10341034
stop_words: Setting::Set(btreeset! { "and".to_string() }),
10351035
synonyms: Setting::Set(hashmap! { "alex".to_string() => vec!["alexey".to_string()] }),
10361036
};
@@ -1058,7 +1058,7 @@ mod tests {
10581058
Token::Str("criteria"),
10591059
Token::Some,
10601060
Token::Seq { len: Some(1) },
1061-
Token::Str("asc(age)"),
1061+
Token::Str("age:asc"),
10621062
Token::SeqEnd,
10631063
Token::Str("stopWords"),
10641064
Token::Some,

milli/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ obkv = "0.2.0"
2525
once_cell = "1.5.2"
2626
ordered-float = "2.1.1"
2727
rayon = "1.5.0"
28-
regex = "1.4.3"
2928
roaring = "0.6.6"
3029
serde = { version = "1.0.123", features = ["derive"] }
3130
serde_json = { version = "1.0.62", features = ["preserve_order"] }

milli/src/criterion.rs

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,24 @@
11
use std::fmt;
22
use std::str::FromStr;
33

4-
use once_cell::sync::Lazy;
5-
use regex::Regex;
64
use serde::{Deserialize, Serialize};
75

86
use crate::error::{Error, UserError};
97

10-
static ASC_DESC_REGEX: Lazy<Regex> =
11-
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
12-
138
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
149
pub enum Criterion {
1510
/// Sorted by decreasing number of matched query terms.
1611
/// Query words at the front of an attribute is considered better than if it was at the back.
1712
Words,
1813
/// Sorted by increasing number of typos.
1914
Typo,
15+
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
16+
/// attributes can be used in place of this criterion at query time.
17+
Sort,
2018
/// Sorted by increasing distance between matched query terms.
2119
Proximity,
2220
/// Documents with quey words contained in more important
23-
/// attributes are considred better.
21+
/// attributes are considered better.
2422
Attribute,
2523
/// Sorted by the similarity of the matched words with the query words.
2624
Exactness,
@@ -43,29 +41,46 @@ impl Criterion {
4341
impl FromStr for Criterion {
4442
type Err = Error;
4543

46-
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
47-
match txt {
44+
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
45+
match text {
4846
"words" => Ok(Criterion::Words),
4947
"typo" => Ok(Criterion::Typo),
48+
"sort" => Ok(Criterion::Sort),
5049
"proximity" => Ok(Criterion::Proximity),
5150
"attribute" => Ok(Criterion::Attribute),
5251
"exactness" => Ok(Criterion::Exactness),
53-
text => {
54-
let caps = ASC_DESC_REGEX
55-
.captures(text)
56-
.ok_or_else(|| UserError::InvalidCriterionName { name: text.to_string() })?;
57-
let order = caps.get(1).unwrap().as_str();
58-
let field_name = caps.get(2).unwrap().as_str();
59-
match order {
60-
"asc" => Ok(Criterion::Asc(field_name.to_string())),
61-
"desc" => Ok(Criterion::Desc(field_name.to_string())),
62-
text => {
63-
return Err(
64-
UserError::InvalidCriterionName { name: text.to_string() }.into()
65-
)
66-
}
67-
}
68-
}
52+
text => match AscDesc::from_str(text) {
53+
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
54+
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
55+
Err(error) => Err(error.into()),
56+
},
57+
}
58+
}
59+
}
60+
61+
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
62+
pub enum AscDesc {
63+
Asc(String),
64+
Desc(String),
65+
}
66+
67+
impl AscDesc {
68+
pub fn field(&self) -> &str {
69+
match self {
70+
AscDesc::Asc(field) => field,
71+
AscDesc::Desc(field) => field,
72+
}
73+
}
74+
}
75+
76+
impl FromStr for AscDesc {
77+
type Err = UserError;
78+
79+
fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
80+
match text.rsplit_once(':') {
81+
Some((field_name, "asc")) => Ok(AscDesc::Asc(field_name.to_string())),
82+
Some((field_name, "desc")) => Ok(AscDesc::Desc(field_name.to_string())),
83+
_ => Err(UserError::InvalidCriterionName { name: text.to_string() }),
6984
}
7085
}
7186
}
@@ -74,6 +89,7 @@ pub fn default_criteria() -> Vec<Criterion> {
7489
vec![
7590
Criterion::Words,
7691
Criterion::Typo,
92+
Criterion::Sort,
7793
Criterion::Proximity,
7894
Criterion::Attribute,
7995
Criterion::Exactness,
@@ -87,11 +103,12 @@ impl fmt::Display for Criterion {
87103
match self {
88104
Words => f.write_str("words"),
89105
Typo => f.write_str("typo"),
106+
Sort => f.write_str("sort"),
90107
Proximity => f.write_str("proximity"),
91108
Attribute => f.write_str("attribute"),
92109
Exactness => f.write_str("exactness"),
93-
Asc(attr) => write!(f, "asc({})", attr),
94-
Desc(attr) => write!(f, "desc({})", attr),
110+
Asc(attr) => write!(f, "{}:asc", attr),
111+
Desc(attr) => write!(f, "{}:desc", attr),
95112
}
96113
}
97114
}

milli/src/error.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub enum UserError {
5858
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
5959
InvalidFilter(pest::error::Error<ParserRule>),
6060
InvalidFilterAttribute(pest::error::Error<ParserRule>),
61+
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
6162
InvalidStoreFile,
6263
MaxDatabaseSizeReached,
6364
MissingDocumentId { document: Object },
@@ -226,6 +227,15 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
226227
)
227228
}
228229
Self::InvalidFilterAttribute(error) => error.fmt(f),
230+
Self::InvalidSortableAttribute { field, valid_fields } => {
231+
let valid_names =
232+
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ");
233+
write!(
234+
f,
235+
"Attribute {} is not sortable, available sortable attributes are: {}",
236+
field, valid_names
237+
)
238+
}
229239
Self::MissingDocumentId { document } => {
230240
let json = serde_json::to_string(document).unwrap();
231241
write!(f, "document doesn't have an identifier {}", json)

milli/src/index.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub mod main_key {
2828
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
2929
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
3030
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
31+
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
3132
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
3233
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
3334
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
@@ -446,13 +447,45 @@ impl Index {
446447
Ok(fields_ids)
447448
}
448449

450+
/* sortable fields */
451+
452+
/// Writes the sortable fields names in the database.
453+
pub(crate) fn put_sortable_fields(
454+
&self,
455+
wtxn: &mut RwTxn,
456+
fields: &HashSet<String>,
457+
) -> heed::Result<()> {
458+
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields)
459+
}
460+
461+
/// Deletes the sortable fields ids in the database.
462+
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
463+
self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY)
464+
}
465+
466+
/// Returns the sortable fields names.
467+
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
468+
Ok(self
469+
.main
470+
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)?
471+
.unwrap_or_default())
472+
}
473+
474+
/// Identical to `sortable_fields`, but returns ids instead.
475+
pub fn sortable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
476+
let fields = self.sortable_fields(rtxn)?;
477+
let fields_ids_map = self.fields_ids_map(rtxn)?;
478+
Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
479+
}
480+
449481
/* faceted documents ids */
450482

451483
/// Returns the faceted fields names.
452484
///
453-
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
485+
/// Faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
454486
pub fn faceted_fields(&self, rtxn: &RoTxn) -> Result<HashSet<String>> {
455487
let filterable_fields = self.filterable_fields(rtxn)?;
488+
let sortable_fields = self.sortable_fields(rtxn)?;
456489
let distinct_field = self.distinct_field(rtxn)?;
457490
let asc_desc_fields =
458491
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
@@ -461,6 +494,7 @@ impl Index {
461494
});
462495

463496
let mut faceted_fields = filterable_fields;
497+
faceted_fields.extend(sortable_fields);
464498
faceted_fields.extend(asc_desc_fields);
465499
if let Some(field) = distinct_field {
466500
faceted_fields.insert(field.to_owned());

milli/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::result::Result as StdResult;
2222
use fxhash::{FxHasher32, FxHasher64};
2323
use serde_json::{Map, Value};
2424

25-
pub use self::criterion::{default_criteria, Criterion};
25+
pub use self::criterion::{default_criteria, AscDesc, Criterion};
2626
pub use self::error::{
2727
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
2828
};

0 commit comments

Comments
 (0)