Skip to content

Commit f3b65eb

Browse files
authored
Implement distance_feature for runtime dates (#60851)
This implements the distance_feature for `date` valued `runtime_script`s. This produces the same numbers running against an indexed date, but it doesn't have the same performance characteristics at all. Which is normal for `runtime_script`s`. But `distance_feature` against an indexes fields does a lot of work to refine the query as it goes, limiting the number of documents that it has to visit. We can't do that because we don't have an index. So we just spit out the same numbers and hope it is good enough.
1 parent 4a5d317 commit f3b65eb

File tree

9 files changed

+471
-97
lines changed

9 files changed

+471
-97
lines changed

x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/ScriptDateMappedFieldType.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.common.lucene.search.Queries;
1515
import org.elasticsearch.common.time.DateFormatter;
1616
import org.elasticsearch.common.time.DateMathParser;
17+
import org.elasticsearch.common.unit.TimeValue;
1718
import org.elasticsearch.index.mapper.DateFieldMapper;
1819
import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
1920
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
@@ -23,6 +24,7 @@
2324
import org.elasticsearch.search.lookup.SearchLookup;
2425
import org.elasticsearch.xpack.runtimefields.DateScriptFieldScript;
2526
import org.elasticsearch.xpack.runtimefields.fielddata.ScriptDateFieldData;
27+
import org.elasticsearch.xpack.runtimefields.query.LongScriptFieldDistanceFeatureQuery;
2628
import org.elasticsearch.xpack.runtimefields.query.LongScriptFieldExistsQuery;
2729
import org.elasticsearch.xpack.runtimefields.query.LongScriptFieldRangeQuery;
2830
import org.elasticsearch.xpack.runtimefields.query.LongScriptFieldTermQuery;
@@ -86,6 +88,30 @@ private DateScriptFieldScript.LeafFactory leafFactory(SearchLookup lookup) {
8688
return scriptFactory.newFactory(script.getParams(), lookup);
8789
}
8890

91+
@Override
92+
public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) {
93+
checkAllowExpensiveQueries(context);
94+
return DateFieldType.handleNow(context, now -> {
95+
long originLong = DateFieldType.parseToLong(
96+
origin,
97+
true,
98+
null,
99+
dateTimeFormatter.toDateMathParser(),
100+
now,
101+
DateFieldMapper.Resolution.MILLISECONDS
102+
);
103+
TimeValue pivotTime = TimeValue.parseTimeValue(pivot, "distance_feature.pivot");
104+
return new LongScriptFieldDistanceFeatureQuery(
105+
script,
106+
leafFactory(context.lookup())::newInstance,
107+
name(),
108+
originLong,
109+
pivotTime.getMillis(),
110+
boost
111+
);
112+
});
113+
}
114+
89115
@Override
90116
public Query existsQuery(QueryShardContext context) {
91117
checkAllowExpensiveQueries(context);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.runtimefields.query;
8+
9+
import org.apache.lucene.index.LeafReaderContext;
10+
import org.apache.lucene.index.Term;
11+
import org.apache.lucene.search.DocIdSetIterator;
12+
import org.apache.lucene.search.Explanation;
13+
import org.apache.lucene.search.IndexSearcher;
14+
import org.apache.lucene.search.QueryVisitor;
15+
import org.apache.lucene.search.ScoreMode;
16+
import org.apache.lucene.search.Scorer;
17+
import org.apache.lucene.search.TwoPhaseIterator;
18+
import org.apache.lucene.search.Weight;
19+
import org.elasticsearch.common.CheckedFunction;
20+
import org.elasticsearch.script.Script;
21+
import org.elasticsearch.xpack.runtimefields.AbstractLongScriptFieldScript;
22+
23+
import java.io.IOException;
24+
import java.util.Objects;
25+
import java.util.Set;
26+
27+
public final class LongScriptFieldDistanceFeatureQuery extends AbstractScriptFieldQuery {
28+
private final CheckedFunction<LeafReaderContext, AbstractLongScriptFieldScript, IOException> leafFactory;
29+
private final long origin;
30+
private final long pivot;
31+
private final float boost;
32+
33+
public LongScriptFieldDistanceFeatureQuery(
34+
Script script,
35+
CheckedFunction<LeafReaderContext, AbstractLongScriptFieldScript, IOException> leafFactory,
36+
String fieldName,
37+
long origin,
38+
long pivot,
39+
float boost
40+
) {
41+
super(script, fieldName);
42+
this.leafFactory = leafFactory;
43+
this.origin = origin;
44+
this.pivot = pivot;
45+
this.boost = boost;
46+
}
47+
48+
@Override
49+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
50+
return new Weight(this) {
51+
@Override
52+
public boolean isCacheable(LeafReaderContext ctx) {
53+
return false;
54+
}
55+
56+
@Override
57+
public void extractTerms(Set<Term> terms) {}
58+
59+
@Override
60+
public Scorer scorer(LeafReaderContext context) throws IOException {
61+
return new DistanceScorer(this, leafFactory.apply(context), context.reader().maxDoc(), boost);
62+
}
63+
64+
@Override
65+
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
66+
AbstractLongScriptFieldScript script = leafFactory.apply(context);
67+
script.runForDoc(doc);
68+
long value = valueWithMinAbsoluteDistance(script);
69+
float weight = LongScriptFieldDistanceFeatureQuery.this.boost * boost;
70+
float score = score(weight, distanceFor(value));
71+
return Explanation.match(
72+
score,
73+
"Distance score, computed as weight * pivot / (pivot + abs(value - origin)) from:",
74+
Explanation.match(weight, "weight"),
75+
Explanation.match(pivot, "pivot"),
76+
Explanation.match(origin, "origin"),
77+
Explanation.match(value, "current value")
78+
);
79+
}
80+
};
81+
}
82+
83+
private class DistanceScorer extends Scorer {
84+
private final AbstractLongScriptFieldScript script;
85+
private final TwoPhaseIterator twoPhase;
86+
private final DocIdSetIterator disi;
87+
private final float weight;
88+
89+
protected DistanceScorer(Weight weight, AbstractLongScriptFieldScript script, int maxDoc, float boost) {
90+
super(weight);
91+
this.script = script;
92+
twoPhase = new TwoPhaseIterator(DocIdSetIterator.all(maxDoc)) {
93+
@Override
94+
public boolean matches() throws IOException {
95+
script.runForDoc(approximation().docID());
96+
return script.count() > 0;
97+
}
98+
99+
@Override
100+
public float matchCost() {
101+
return MATCH_COST;
102+
}
103+
};
104+
disi = TwoPhaseIterator.asDocIdSetIterator(twoPhase);
105+
this.weight = LongScriptFieldDistanceFeatureQuery.this.boost * boost;
106+
}
107+
108+
@Override
109+
public int docID() {
110+
return disi.docID();
111+
}
112+
113+
@Override
114+
public DocIdSetIterator iterator() {
115+
return disi;
116+
}
117+
118+
@Override
119+
public TwoPhaseIterator twoPhaseIterator() {
120+
return twoPhase;
121+
}
122+
123+
@Override
124+
public float getMaxScore(int upTo) throws IOException {
125+
return weight;
126+
}
127+
128+
@Override
129+
public float score() throws IOException {
130+
if (script.count() == 0) {
131+
return 0;
132+
}
133+
return LongScriptFieldDistanceFeatureQuery.this.score(weight, (double) minAbsoluteDistance(script));
134+
}
135+
}
136+
137+
long minAbsoluteDistance(AbstractLongScriptFieldScript script) {
138+
long minDistance = Long.MAX_VALUE;
139+
for (int i = 0; i < script.count(); i++) {
140+
minDistance = Math.min(minDistance, distanceFor(script.values()[i]));
141+
}
142+
return minDistance;
143+
}
144+
145+
long valueWithMinAbsoluteDistance(AbstractLongScriptFieldScript script) {
146+
long minDistance = Long.MAX_VALUE;
147+
long minDistanceValue = Long.MAX_VALUE;
148+
for (int i = 0; i < script.count(); i++) {
149+
long distance = distanceFor(script.values()[i]);
150+
if (distance < minDistance) {
151+
minDistance = distance;
152+
minDistanceValue = script.values()[i];
153+
}
154+
}
155+
return minDistanceValue;
156+
}
157+
158+
long distanceFor(long value) {
159+
long distance = Math.max(value, origin) - Math.min(value, origin);
160+
if (distance < 0) {
161+
// The distance doesn't fit into signed long so clamp it to MAX_VALUE
162+
return Long.MAX_VALUE;
163+
}
164+
return distance;
165+
}
166+
167+
float score(float weight, double distance) {
168+
return (float) (weight * (pivot / (pivot + distance)));
169+
}
170+
171+
@Override
172+
public String toString(String field) {
173+
StringBuilder b = new StringBuilder();
174+
if (false == fieldName().equals(field)) {
175+
b.append(fieldName()).append(":");
176+
}
177+
b.append(getClass().getSimpleName());
178+
b.append("(origin=").append(origin);
179+
b.append(",pivot=").append(pivot);
180+
b.append(",boost=").append(boost).append(")");
181+
return b.toString();
182+
183+
}
184+
185+
@Override
186+
public int hashCode() {
187+
return Objects.hash(super.hashCode(), origin, pivot);
188+
}
189+
190+
@Override
191+
public boolean equals(Object obj) {
192+
if (false == super.equals(obj)) {
193+
return false;
194+
}
195+
LongScriptFieldDistanceFeatureQuery other = (LongScriptFieldDistanceFeatureQuery) obj;
196+
return origin == other.origin && pivot == other.pivot;
197+
}
198+
199+
@Override
200+
public void visit(QueryVisitor visitor) {
201+
// No subclasses contain any Terms because those have to be strings.
202+
if (visitor.acceptField(fieldName())) {
203+
visitor.visitLeaf(this);
204+
}
205+
}
206+
207+
long origin() {
208+
return origin;
209+
}
210+
211+
long pivot() {
212+
return pivot;
213+
}
214+
215+
float boost() {
216+
return boost;
217+
}
218+
}

x-pack/plugin/runtime-fields/src/test/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptMappedFieldTypeTestCase.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
package org.elasticsearch.xpack.runtimefields.mapper;
88

9+
import org.apache.lucene.index.IndexReader;
910
import org.elasticsearch.common.geo.ShapeRelation;
1011
import org.elasticsearch.index.mapper.MapperService;
1112
import org.elasticsearch.index.query.QueryShardContext;
@@ -124,4 +125,8 @@ private void assertQueryOnlyOnText(String queryName, ThrowingRunnable buildQuery
124125
)
125126
);
126127
}
128+
129+
protected String readSource(IndexReader reader, int docId) throws IOException {
130+
return reader.document(docId).getBinaryValue("_source").utf8ToString();
131+
}
127132
}

x-pack/plugin/runtime-fields/src/test/java/org/elasticsearch/xpack/runtimefields/mapper/ScriptDateMappedFieldTypeTests.java

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
import org.apache.lucene.index.RandomIndexWriter;
1313
import org.apache.lucene.index.SortedNumericDocValues;
1414
import org.apache.lucene.search.Collector;
15+
import org.apache.lucene.search.Explanation;
1516
import org.apache.lucene.search.IndexSearcher;
1617
import org.apache.lucene.search.LeafCollector;
1718
import org.apache.lucene.search.MatchAllDocsQuery;
19+
import org.apache.lucene.search.Query;
1820
import org.apache.lucene.search.Scorable;
1921
import org.apache.lucene.search.ScoreMode;
2022
import org.apache.lucene.search.Sort;
2123
import org.apache.lucene.search.SortField;
24+
import org.apache.lucene.search.TopDocs;
2225
import org.apache.lucene.search.TopFieldDocs;
2326
import org.apache.lucene.store.Directory;
2427
import org.apache.lucene.util.BytesRef;
@@ -59,6 +62,8 @@
5962
import java.util.function.BiConsumer;
6063

6164
import static java.util.Collections.emptyMap;
65+
import static org.hamcrest.Matchers.arrayWithSize;
66+
import static org.hamcrest.Matchers.closeTo;
6267
import static org.hamcrest.Matchers.containsString;
6368
import static org.hamcrest.Matchers.equalTo;
6469

@@ -146,18 +151,9 @@ public void testSort() throws IOException {
146151
ScriptDateFieldData ifd = simpleMappedFieldType().fielddataBuilder("test", mockContext()::lookup).build(null, null, null);
147152
SortField sf = ifd.sortField(null, MultiValueMode.MIN, null, false);
148153
TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 3, new Sort(sf));
149-
assertThat(
150-
reader.document(docs.scoreDocs[0].doc).getBinaryValue("_source").utf8ToString(),
151-
equalTo("{\"timestamp\": [1595432181351]}")
152-
);
153-
assertThat(
154-
reader.document(docs.scoreDocs[1].doc).getBinaryValue("_source").utf8ToString(),
155-
equalTo("{\"timestamp\": [1595432181354]}")
156-
);
157-
assertThat(
158-
reader.document(docs.scoreDocs[2].doc).getBinaryValue("_source").utf8ToString(),
159-
equalTo("{\"timestamp\": [1595432181356]}")
160-
);
154+
assertThat(readSource(reader, docs.scoreDocs[0].doc), equalTo("{\"timestamp\": [1595432181351]}"));
155+
assertThat(readSource(reader, docs.scoreDocs[1].doc), equalTo("{\"timestamp\": [1595432181354]}"));
156+
assertThat(readSource(reader, docs.scoreDocs[2].doc), equalTo("{\"timestamp\": [1595432181356]}"));
161157
}
162158
}
163159
}
@@ -192,6 +188,42 @@ public double execute(ExplanationHolder explanation) {
192188
}
193189
}
194190

191+
public void testDistanceFeatureQuery() throws IOException {
192+
try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
193+
iw.addDocuments(
194+
List.of(
195+
List.of(new StoredField("_source", new BytesRef("{\"timestamp\": [1595432181354]}"))),
196+
List.of(new StoredField("_source", new BytesRef("{\"timestamp\": [1595432181351]}"))),
197+
List.of(new StoredField("_source", new BytesRef("{\"timestamp\": [1595432181356, 1]}"))),
198+
List.of(new StoredField("_source", new BytesRef("{\"timestamp\": []}")))
199+
)
200+
);
201+
try (DirectoryReader reader = iw.getReader()) {
202+
IndexSearcher searcher = newSearcher(reader);
203+
Query query = simpleMappedFieldType().distanceFeatureQuery(1595432181354L, "1ms", 1, mockContext());
204+
TopDocs docs = searcher.search(query, 4);
205+
assertThat(docs.scoreDocs, arrayWithSize(3));
206+
assertThat(readSource(reader, docs.scoreDocs[0].doc), equalTo("{\"timestamp\": [1595432181354]}"));
207+
assertThat(docs.scoreDocs[0].score, equalTo(1.0F));
208+
assertThat(readSource(reader, docs.scoreDocs[1].doc), equalTo("{\"timestamp\": [1595432181356, 1]}"));
209+
assertThat((double) docs.scoreDocs[1].score, closeTo(.333, .001));
210+
assertThat(readSource(reader, docs.scoreDocs[2].doc), equalTo("{\"timestamp\": [1595432181351]}"));
211+
assertThat((double) docs.scoreDocs[2].score, closeTo(.250, .001));
212+
Explanation explanation = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0F)
213+
.explain(reader.leaves().get(0), docs.scoreDocs[0].doc);
214+
assertThat(explanation.toString(), containsString("1.0 = Distance score, computed as weight * pivot / (pivot"));
215+
assertThat(explanation.toString(), containsString("1.0 = weight"));
216+
assertThat(explanation.toString(), containsString("1 = pivot"));
217+
assertThat(explanation.toString(), containsString("1595432181354 = origin"));
218+
assertThat(explanation.toString(), containsString("1595432181354 = current value"));
219+
}
220+
}
221+
}
222+
223+
public void testDistanceFeatureQueryIsExpensive() throws IOException {
224+
checkExpensiveQuery((ft, ctx) -> ft.distanceFeatureQuery(randomLong(), randomAlphaOfLength(5), randomFloat(), ctx));
225+
}
226+
195227
@Override
196228
public void testExistsQuery() throws IOException {
197229
try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
@@ -409,7 +441,7 @@ private DateScriptFieldScript.Factory factory(String code) {
409441
@Override
410442
public void execute() {
411443
for (Object timestamp : (List<?>) getSource().get("timestamp")) {
412-
new DateScriptFieldScript.Millis(this).millis((Long) timestamp);
444+
new DateScriptFieldScript.Millis(this).millis(((Number) timestamp).longValue());
413445
}
414446
}
415447
};

0 commit comments

Comments
 (0)