Skip to content

Commit

Permalink
Support exists query for json
Browse files Browse the repository at this point in the history
  • Loading branch information
ppodolsky committed Jun 22, 2023
1 parent cf902b6 commit 629d0fc
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 109 deletions.
6 changes: 3 additions & 3 deletions aiosumma/aiosumma/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ async def copy_documents(
self,
source_index_name: str,
target_index_name: str,
conflict_strategy: Optional[dict] = None,
conflict_strategy: Optional[str] = None,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> index_service_pb.CopyDocumentsResponse:
Expand All @@ -146,9 +146,9 @@ async def copy_documents(
conflict_strategy: recommended to set to DoNothing for large updates and maintain uniqueness in your application
request_id: request id
session_id: session id
Returns:
Commit scheduling result
"""
if isinstance(conflict_strategy, str):
conflict_strategy = index_service_pb.ConflictStrategy.Value(conflict_strategy)
return await self.stubs['index_api'].copy_documents(
index_service_pb.CopyDocumentsRequest(
source_index_name=source_index_name,
Expand Down
9 changes: 2 additions & 7 deletions aiosumma/aiosumma/proto/consumer_service_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
from typing import ClassVar as _ClassVar
from typing import Iterable as _Iterable
from typing import Mapping as _Mapping
from typing import Optional as _Optional
from typing import Union as _Union

from google.protobuf.internal import containers as _containers
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

Expand Down
9 changes: 2 additions & 7 deletions aiosumma/aiosumma/proto/dag_pb_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
from typing import ClassVar as _ClassVar
from typing import Iterable as _Iterable
from typing import Mapping as _Mapping
from typing import Optional as _Optional
from typing import Union as _Union

from google.protobuf.internal import containers as _containers
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

Expand Down
11 changes: 3 additions & 8 deletions aiosumma/aiosumma/proto/index_service_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
from typing import ClassVar as _ClassVar
from typing import Iterable as _Iterable
from typing import Mapping as _Mapping
from typing import Optional as _Optional
from typing import Union as _Union

import query_pb2 as _query_pb2
import utils_pb2 as _utils_pb2
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

Brotli: Compression
DESCRIPTOR: _descriptor.FileDescriptor
Expand Down
11 changes: 3 additions & 8 deletions aiosumma/aiosumma/proto/query_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
from typing import ClassVar as _ClassVar
from typing import Iterable as _Iterable
from typing import Mapping as _Mapping
from typing import Optional as _Optional
from typing import Union as _Union

import utils_pb2 as _utils_pb2
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

AsUsualTerms: MissingFieldPolicy
DESCRIPTOR: _descriptor.FileDescriptor
Expand Down
9 changes: 2 additions & 7 deletions aiosumma/aiosumma/proto/reflection_service_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
from typing import ClassVar as _ClassVar
from typing import Iterable as _Iterable
from typing import Mapping as _Mapping
from typing import Optional as _Optional
from typing import Union as _Union

from google.protobuf.internal import containers as _containers
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

Expand Down
9 changes: 2 additions & 7 deletions aiosumma/aiosumma/proto/search_service_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
from typing import ClassVar as _ClassVar
from typing import Iterable as _Iterable
from typing import Mapping as _Mapping
from typing import Optional as _Optional
from typing import Union as _Union

import query_pb2 as _query_pb2
from google.protobuf.internal import containers as _containers
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

Expand Down
10 changes: 3 additions & 7 deletions aiosumma/aiosumma/proto/unixfs_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
from typing import ClassVar as _ClassVar
from typing import Iterable as _Iterable
from typing import Optional as _Optional
from typing import Union as _Union

from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

Expand Down
5 changes: 2 additions & 3 deletions aiosumma/aiosumma/proto/utils_pb2.pyi
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import ClassVar as _ClassVar

from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
from typing import ClassVar as _ClassVar

Asc: Order
DESCRIPTOR: _descriptor.FileDescriptor
Expand Down
2 changes: 1 addition & 1 deletion aiosumma/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "aiosumma"
version = "2.43.4"
version = "2.43.5"
authors = [{ name = "Pasha Podolsky", email = "ppodolsky@me.com" }]
description = "Async client for Summa Search"
readme = "README.md"
Expand Down
70 changes: 19 additions & 51 deletions summa-core/src/components/query_parser/proto_query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,25 @@ use std::ops::Bound;
use std::ops::Bound::Unbounded;
use std::str::FromStr;

use base64::Engine;
#[cfg(feature = "metrics")]
use opentelemetry::metrics::Counter;
#[cfg(feature = "metrics")]
use opentelemetry::Context;
#[cfg(feature = "metrics")]
use opentelemetry::{global, KeyValue};
use summa_proto::proto;
use tantivy::json_utils::{convert_to_fast_value_and_get_term, JsonTermWriter};
use tantivy::json_utils::JsonTermWriter;
use tantivy::query::{
AllQuery, BooleanQuery, BoostQuery, DisjunctionMaxQuery, EmptyQuery, MoreLikeThisQuery, Occur, PhraseQuery, Query, RangeQuery, RegexQuery, TermQuery,
AllQuery, BooleanQuery, BoostQuery, DisjunctionMaxQuery, EmptyQuery, MoreLikeThisQuery, Occur, PhrasePrefixQuery, PhraseQuery, Query, RangeQuery,
RegexQuery, TermQuery,
};
use tantivy::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema};
use tantivy::{DateTime, Index, Score, Term};
use tantivy::{Index, Score, Term};
use tracing::info;

use crate::components::queries::ExistsQuery;
use crate::components::query_parser::morphology::MorphologyManager;
use crate::components::query_parser::utils::cast_field_to_typed_term;
use crate::components::query_parser::{QueryParser, QueryParserError};
use crate::configs::core::QueryParserConfig;
use crate::errors::{Error, SummaResult, ValidationError};
Expand Down Expand Up @@ -57,48 +58,11 @@ impl From<Option<proto::query_parser_config::DefaultMode>> for QueryParserDefaul
}
}

fn cast_value_to_term(field: Field, full_path: &str, field_type: &FieldType, value: &str) -> SummaResult<Term> {
Ok(match field_type {
FieldType::Str(_) => Term::from_field_text(field, value),
FieldType::JsonObject(json_options) => {
let mut term = Term::with_capacity(128);
let mut json_term_writer = JsonTermWriter::from_field_and_json_path(field, full_path, json_options.is_expand_dots_enabled(), &mut term);
convert_to_fast_value_and_get_term(&mut json_term_writer, value).unwrap_or_else(|| {
json_term_writer.set_str(value.trim_matches(|c| c == '\'' || c == '\"'));
json_term_writer.term().clone()
})
}
FieldType::I64(_) => Term::from_field_i64(
field,
i64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as i64")))?,
),
FieldType::U64(_) => Term::from_field_u64(
field,
u64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as u64")))?,
),
FieldType::F64(_) => Term::from_field_f64(
field,
f64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as f64")))?,
),
FieldType::Bytes(_) => Term::from_field_bytes(
field,
&base64::engine::general_purpose::STANDARD
.decode(value)
.map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as bytes")))?,
),
FieldType::Date(_) => Term::from_field_date(
field,
DateTime::from_timestamp_secs(i64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as date")))?),
),
_ => return Err(Error::InvalidSyntax("invalid range type".to_owned())),
})
}

fn cast_value_to_bound_term(field: Field, full_path: &str, field_type: &FieldType, value: &str, including: bool) -> SummaResult<Bound<Term>> {
fn cast_value_to_bound_term(field: &Field, full_path: &str, field_type: &FieldType, value: &str, including: bool) -> SummaResult<Bound<Term>> {
Ok(match value {
"*" => Unbounded,
value => {
let casted_value = cast_value_to_term(field, full_path, field_type, value)?;
let casted_value = cast_field_to_typed_term(field, full_path, field_type, value)?;
if including {
Bound::Included(casted_value)
} else {
Expand Down Expand Up @@ -207,8 +171,8 @@ impl ProtoQueryParser {
proto::query::Query::Range(range_query_proto) => {
let (field, full_path, field_entry) = self.field_and_field_entry(&range_query_proto.field)?;
let value = range_query_proto.value.as_ref().ok_or(ValidationError::MissingRange)?;
let left = cast_value_to_bound_term(field, full_path, field_entry.field_type(), &value.left, value.including_left)?;
let right = cast_value_to_bound_term(field, full_path, field_entry.field_type(), &value.right, value.including_right)?;
let left = cast_value_to_bound_term(&field, full_path, field_entry.field_type(), &value.left, value.including_left)?;
let right = cast_value_to_bound_term(&field, full_path, field_entry.field_type(), &value.right, value.including_right)?;
Box::new(RangeQuery::new_term_bounds(
range_query_proto.field.clone(),
field_entry.field_type().value_type(),
Expand All @@ -231,7 +195,10 @@ impl ProtoQueryParser {
let mut token_stream = tokenizer.token_stream(&phrase_query_proto.value);
let mut terms: Vec<(usize, Term)> = vec![];
while let Some(token) = token_stream.next() {
terms.push((token.position, cast_value_to_term(field, full_path, field_entry.field_type(), &token.text)?))
terms.push((
token.position,
cast_field_to_typed_term(&field, full_path, field_entry.field_type(), &token.text)?,
))
}
if terms.is_empty() {
Box::new(EmptyQuery)
Expand All @@ -247,7 +214,7 @@ impl ProtoQueryParser {
proto::query::Query::Term(term_query_proto) => {
let (field, full_path, field_entry) = self.field_and_field_entry(&term_query_proto.field)?;
Box::new(TermQuery::new(
cast_value_to_term(field, full_path, field_entry.field_type(), &term_query_proto.value)?,
cast_field_to_typed_term(&field, full_path, field_entry.field_type(), &term_query_proto.value)?,
field_entry.field_type().index_record_option().unwrap_or(IndexRecordOption::Basic),
))
}
Expand Down Expand Up @@ -296,10 +263,11 @@ impl ProtoQueryParser {
if full_path.is_empty() {
Box::new(ExistsQuery::new(field))
} else {
Box::new(TermQuery::new(
cast_value_to_term(field, full_path, field_entry.field_type(), "")?,
field_entry.field_type().index_record_option().unwrap_or(IndexRecordOption::Basic),
))
// Generalize approach (now position indexing is required and expand_dots_enabled is fixes)
let mut term = Term::with_capacity(128);
let json_term_writer = JsonTermWriter::from_field_and_json_path(field, full_path, true, &mut term);
let prefix_term = json_term_writer.term().clone();
Box::new(PhrasePrefixQuery::new(vec![prefix_term]))
}
}
})
Expand Down
35 changes: 35 additions & 0 deletions summa-core/src/components/query_parser/utils.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
use std::str::FromStr;

use base64::Engine;
use prost::encoding::bool;
use tantivy::json_utils::{convert_to_fast_value_and_get_term, JsonTermWriter};
use tantivy::schema::{Field, FieldType};
use tantivy::Term;
use tantivy_common::DateTime;

use crate::errors::SummaResult;
use crate::Error;

pub fn cast_field_to_term(field: &Field, full_path: &str, field_type: &FieldType, value: &str, force_str: bool) -> Term {
match field_type {
Expand All @@ -22,3 +29,31 @@ pub fn cast_field_to_term(field: &Field, full_path: &str, field_type: &FieldType
_ => unreachable!(),
}
}

pub fn cast_field_to_typed_term(field: &Field, full_path: &str, field_type: &FieldType, value: &str) -> SummaResult<Term> {
Ok(match field_type {
FieldType::I64(_) => Term::from_field_i64(
*field,
i64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as i64")))?,
),
FieldType::U64(_) => Term::from_field_u64(
*field,
u64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as u64")))?,
),
FieldType::F64(_) => Term::from_field_f64(
*field,
f64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as f64")))?,
),
FieldType::Bytes(_) => Term::from_field_bytes(
*field,
&base64::engine::general_purpose::STANDARD
.decode(value)
.map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as bytes")))?,
),
FieldType::Date(_) => Term::from_field_date(
*field,
DateTime::from_timestamp_secs(i64::from_str(value).map_err(|_e| Error::InvalidSyntax(format!("cannot parse {value} as date")))?),
),
_ => cast_field_to_term(field, full_path, field_type, value, false),
})
}

0 comments on commit 629d0fc

Please sign in to comment.