Skip to content

Commit 539bde3

Browse files
rjernstkcm
authored andcommitted
Scripting: Convert domainSplit function for ML to whitelist (#34426)
This commit moves the definition of domainSplit into java and exposes it as a painless whitelist extension. The method also no longer needs params, and version which ignores params is added and deprecated.
1 parent cfd4bee commit 539bde3

File tree

10 files changed

+300
-280
lines changed

10 files changed

+300
-280
lines changed

docs/reference/ml/transforms.asciidoc

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -490,10 +490,10 @@ PUT _xpack/ml/datafeeds/datafeed-test3
490490
},
491491
"script_fields":{
492492
"sub":{
493-
"script":"return domainSplit(doc['query'].value, params).get(0);"
493+
"script":"return domainSplit(doc['query'].value).get(0);"
494494
},
495495
"hrd":{
496-
"script":"return domainSplit(doc['query'].value, params).get(1);"
496+
"script":"return domainSplit(doc['query'].value).get(1);"
497497
}
498498
}
499499
}
@@ -511,10 +511,6 @@ registered domain. For example, the highest registered domain of
511511
`domainSplit()` function returns an array of two values: the first value is the
512512
subdomain; the second value is the highest registered domain.
513513

514-
NOTE: The `domainSplit()` function takes two arguments. The first argument is
515-
the string you want to split. The second argument is always `params`. This is a
516-
technical implementation detail related to how Painless operates internally.
517-
518514
The preview {dfeed} API returns the following results, which show that
519515
"www.ml.elastic.co" has been split into "elastic.co" and "www.ml":
520516

x-pack/plugin/ml/build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@ esplugin {
99
description 'Elasticsearch Expanded Pack Plugin - Machine Learning'
1010
classname 'org.elasticsearch.xpack.ml.MachineLearning'
1111
hasNativeController true
12-
extendedPlugins = ['x-pack-core']
12+
extendedPlugins = ['x-pack-core', 'lang-painless']
1313
}
14-
archivesBaseName = 'x-pack-ml'
1514

1615
configurations {
1716
nativeBundle {
@@ -42,6 +41,7 @@ compileTestJava.options.compilerArgs << "-Xlint:-deprecation,-rawtypes,-serial,-
4241
dependencies {
4342
// "org.elasticsearch.plugin:x-pack-core:${version}" doesn't work with idea because the testArtifacts are also here
4443
compileOnly project(path: xpackModule('core'), configuration: 'default')
44+
compileOnly "org.elasticsearch.plugin:elasticsearch-scripting-painless-spi:${versions.elasticsearch}"
4545
testCompile project(path: xpackModule('core'), configuration: 'testArtifacts')
4646
// This should not be here
4747
testCompile project(path: xpackModule('security'), configuration: 'testArtifacts')

x-pack/plugin/ml/qa/single-node-tests/src/test/java/org/elasticsearch/xpack/ml/transforms/PainlessDomainSplitIT.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import org.elasticsearch.common.settings.Settings;
1414
import org.elasticsearch.test.rest.ESRestTestCase;
1515
import org.elasticsearch.xpack.ml.MachineLearning;
16-
import org.elasticsearch.xpack.ml.utils.DomainSplitFunction;
1716
import org.joda.time.DateTime;
1817

1918
import java.util.ArrayList;
@@ -190,8 +189,7 @@ public void testIsolated() throws Exception {
190189

191190
Pattern pattern = Pattern.compile("domain_split\":\\[(.*?),(.*?)\\]");
192191

193-
Map<String, Object> params = new HashMap<>(DomainSplitFunction.params.size() + 1);
194-
params.putAll(DomainSplitFunction.params);
192+
Map<String, Object> params = new HashMap<>();
195193
for (TestConfiguration testConfig : tests) {
196194
params.put("host", testConfig.hostName);
197195
String mapAsJson = Strings.toString(jsonBuilder().map(params));
@@ -207,8 +205,8 @@ public void testIsolated() throws Exception {
207205
" \"domain_split\" : {\n" +
208206
" \"script\" : {\n" +
209207
" \"lang\": \"painless\",\n" +
210-
" \"inline\": \"" + DomainSplitFunction.function +
211-
" return domainSplit(params['host'], params); \",\n" +
208+
" \"inline\": \"" +
209+
" return domainSplit(params['host']); \",\n" +
212210
" \"params\": " + mapAsJson + "\n" +
213211
" }\n" +
214212
" }\n" +
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.ml;
7+
8+
import org.elasticsearch.painless.spi.PainlessExtension;
9+
import org.elasticsearch.painless.spi.Whitelist;
10+
import org.elasticsearch.painless.spi.WhitelistLoader;
11+
import org.elasticsearch.script.ScriptContext;
12+
import org.elasticsearch.script.SearchScript;
13+
14+
import java.util.Collections;
15+
import java.util.List;
16+
import java.util.Map;
17+
18+
public class MachineLearningPainlessExtension implements PainlessExtension {
19+
private static final Whitelist WHITELIST =
20+
WhitelistLoader.loadFromResourceFiles(MachineLearningPainlessExtension.class, "whitelist.txt");
21+
22+
@Override
23+
public Map<ScriptContext<?>, List<Whitelist>> getContextWhitelists() {
24+
return Collections.singletonMap(SearchScript.CONTEXT, Collections.singletonList(WHITELIST));
25+
}
26+
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,18 @@
1717
import org.elasticsearch.client.Client;
1818
import org.elasticsearch.common.logging.Loggers;
1919
import org.elasticsearch.common.unit.TimeValue;
20-
import org.elasticsearch.script.Script;
2120
import org.elasticsearch.search.SearchHit;
2221
import org.elasticsearch.search.fetch.StoredFieldsContext;
2322
import org.elasticsearch.search.fetch.subphase.DocValueFieldsContext;
2423
import org.elasticsearch.search.sort.SortOrder;
2524
import org.elasticsearch.xpack.core.ClientHelper;
2625
import org.elasticsearch.xpack.core.ml.datafeed.extractor.DataExtractor;
2726
import org.elasticsearch.xpack.core.ml.datafeed.extractor.ExtractorUtils;
28-
import org.elasticsearch.xpack.ml.utils.DomainSplitFunction;
2927

3028
import java.io.ByteArrayInputStream;
3129
import java.io.ByteArrayOutputStream;
3230
import java.io.IOException;
3331
import java.io.InputStream;
34-
import java.util.HashMap;
35-
import java.util.Map;
3632
import java.util.NoSuchElementException;
3733
import java.util.Objects;
3834
import java.util.Optional;
@@ -130,26 +126,10 @@ private SearchRequestBuilder buildSearchRequest(long start) {
130126
} else {
131127
searchRequestBuilder.setFetchSource(sourceFields, null);
132128
}
133-
context.scriptFields.forEach(f -> searchRequestBuilder.addScriptField(
134-
f.fieldName(), injectDomainSplit(f.script())));
129+
context.scriptFields.forEach(f -> searchRequestBuilder.addScriptField(f.fieldName(), f.script()));
135130
return searchRequestBuilder;
136131
}
137132

138-
private Script injectDomainSplit(Script script) {
139-
String code = script.getIdOrCode();
140-
if (code.contains("domainSplit(") && script.getLang().equals("painless")) {
141-
String modifiedCode = DomainSplitFunction.function + code;
142-
Map<String, Object> modifiedParams = new HashMap<>(script.getParams().size()
143-
+ DomainSplitFunction.params.size());
144-
145-
modifiedParams.putAll(script.getParams());
146-
modifiedParams.putAll(DomainSplitFunction.params);
147-
148-
return new Script(script.getType(), script.getLang(), modifiedCode, modifiedParams);
149-
}
150-
return script;
151-
}
152-
153133
private InputStream processSearchResponse(SearchResponse searchResponse) throws IOException {
154134

155135
if (searchResponse.getFailedShards() > 0 && searchHasShardFailure == false) {

0 commit comments

Comments
 (0)