Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] [Enhancement] Add schema validation and placeholders to index mappings #3310

Open
wants to merge 1 commit into
base: 2.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ dependencies {
exclude group: 'com.google.guava', module: 'listenablefuture'
}
compileOnly 'com.jayway.jsonpath:json-path:2.9.0'
compileOnly("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}")
compileOnly("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}")
compileOnly group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0'
}

lombok {
Expand Down
18 changes: 9 additions & 9 deletions common/src/main/java/org/opensearch/ml/common/CommonValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ public class CommonValue {
public static final Set<String> stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words");

// Index mapping paths
public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json";
public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json";
public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json";
public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json";
public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json";
public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json";
public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json";
public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json";
public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json";
public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml_model_group.json";
public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml_model.json";
public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml_task.json";
public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml_connector.json";
public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml_config.json";
public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml_controller.json";
public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml_agent.json";
public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml_memory_meta.json";
public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml_memory_message.json";

// Calculate Versions independently of OpenSearch core version
public static final Version VERSION_2_11_0 = Version.fromString("2.11.0");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@

package org.opensearch.ml.common.utils;

import static org.opensearch.ml.common.utils.StringUtils.validateSchema;

import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import com.google.common.base.Charsets;
Expand Down Expand Up @@ -40,20 +43,99 @@ public class IndexUtils {
public static final Map<String, Object> UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1");
public static final Map<String, Object> UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all");

// Schema that validates system index mappings
public static final String MAPPING_SCHEMA_PATH = "index-mappings/schema.json";

// Placeholders to use within the json mapping files
private static final String USER_PLACEHOLDER = "USER_MAPPING_PLACEHOLDER";
private static final String CONNECTOR_PLACEHOLDER = "CONNECTOR_MAPPING_PLACEHOLDER";
public static final Map<String, String> MAPPING_PLACEHOLDERS = Map
.of(USER_PLACEHOLDER, "index-mappings/placeholders/user.json", CONNECTOR_PLACEHOLDER, "index-mappings/placeholders/connector.json");

public static String getMappingFromFile(String path) throws IOException {
URL url = IndexUtils.class.getClassLoader().getResource(path);
if (url == null) {
throw new IOException("Resource not found: " + path);
}

String mapping = Resources.toString(url, Charsets.UTF_8).trim();
if (mapping.isEmpty() || !StringUtils.isJson(mapping)) {
throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path);
if (mapping.isEmpty()) {
throw new IllegalArgumentException("Empty mapping found at: " + path);
}

mapping = replacePlaceholders(mapping);
validateMapping(mapping);

return mapping;
}

public static String replacePlaceholders(String mapping) throws IOException {
if (mapping == null || mapping.isBlank()) {
throw new IllegalArgumentException("Mapping cannot be null or empty");
}

// Preload resources into memory to avoid redundant I/O
Map<String, String> loadedPlaceholders = new HashMap<>();
for (Map.Entry<String, String> placeholder : MAPPING_PLACEHOLDERS.entrySet()) {
URL url = IndexUtils.class.getClassLoader().getResource(placeholder.getValue());
if (url == null) {
throw new IOException("Resource not found: " + placeholder.getValue());
}

loadedPlaceholders.put(placeholder.getKey(), Resources.toString(url, Charsets.UTF_8));
}

StringBuilder result = new StringBuilder(mapping);
for (Map.Entry<String, String> entry : loadedPlaceholders.entrySet()) {
String placeholder = entry.getKey();
String replacement = entry.getValue();

// Replace all occurrences of the placeholder
int index;
while ((index = result.indexOf(placeholder)) != -1) {
result.replace(index, index + placeholder.length(), replacement);
}
}

return result.toString();
}

/**
* Checks if the provided mapping is a valid JSON and validates it against a schema.
*
* <p>The schema is located at <code>mappings/schema.json</code> and enforces the following validations:</p>
*
* <ul>
* <li>Mandatory fields:
* <ul>
* <li><code>_meta</code></li>
* <li><code>_meta.schema_version</code></li>
* <li><code>properties</code></li>
* </ul>
* </li>
* <li>No additional fields are allowed at the root level.</li>
* <li>No additional fields are allowed in the <code>_meta</code> object.</li>
* <li><code>properties</code> must be an object type.</li>
* <li><code>_meta</code> must be an object type.</li>
* <li><code>_meta.schema_version</code> must be an integer.</li>
* </ul>
*
* <p><strong>Note:</strong> Validation can be made stricter if a specific schema is defined for each index.</p>
*/
public static void validateMapping(String mapping) throws IOException {
if (mapping.isBlank() || !StringUtils.isJson(mapping)) {
throw new IllegalArgumentException("Invalid or non-JSON mapping found: " + mapping);
}

URL url = IndexUtils.class.getClassLoader().getResource(MAPPING_SCHEMA_PATH);
if (url == null) {
throw new IOException("Resource not found: " + MAPPING_SCHEMA_PATH);
}

String schema = Resources.toString(url, Charsets.UTF_8);
validateSchema(schema, mapping);
}

public static Integer getVersionFromMapping(String mapping) {
if (mapping == null || mapping.isBlank()) {
throw new IllegalArgumentException("Mapping cannot be null or empty");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,27 @@
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.commons.lang3.BooleanUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.opensearch.OpenSearchParseException;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonSyntaxException;
import com.jayway.jsonpath.JsonPath;
import com.networknt.schema.JsonSchema;
import com.networknt.schema.JsonSchemaFactory;
import com.networknt.schema.SpecVersion;
import com.networknt.schema.ValidationMessage;

import lombok.extern.log4j.Log4j2;

Expand All @@ -54,6 +63,8 @@ public class StringUtils {
}
public static final String TO_STRING_FUNCTION_NAME = ".toString()";

private static final ObjectMapper MAPPER = new ObjectMapper();

public static boolean isValidJsonString(String json) {
if (json == null || json.isBlank()) {
return false;
Expand Down Expand Up @@ -336,4 +347,28 @@ public static JsonObject getJsonObjectFromString(String jsonString) {
return JsonParser.parseString(jsonString).getAsJsonObject();
}

public static void validateSchema(String schemaString, String instanceString) {
try {
// parse the schema JSON as string
JsonNode schemaNode = MAPPER.readTree(schemaString);
JsonSchema schema = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012).getSchema(schemaNode);

// JSON data to validate
JsonNode jsonNode = MAPPER.readTree(instanceString);

// Validate JSON node against the schema
Set<ValidationMessage> errors = schema.validate(jsonNode);
if (!errors.isEmpty()) {
String errorMessage = errors.stream().map(ValidationMessage::getMessage).collect(Collectors.joining(", "));

throw new OpenSearchParseException(
"Validation failed: " + errorMessage + " for instance: " + instanceString + " with schema: " + schemaString
);
}
} catch (JsonProcessingException e) {
throw new IllegalArgumentException("Invalid JSON format: " + e.getMessage(), e);
} catch (Exception e) {
throw new OpenSearchParseException("Schema validation failed: " + e.getMessage(), e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -167,77 +167,7 @@
}
}
},
"connector": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"version": {
"type": "keyword"
},
"description": {
"type": "text"
},
"protocol": {
"type": "keyword"
},
"parameters": {
"type": "flat_object"
},
"credential": {
"type": "flat_object"
},
"client_config": {
"type": "flat_object"
},
"actions": {
"type": "flat_object"
}
}
},
"user": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"backend_roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"custom_attribute_names": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
"connector": CONNECTOR_MAPPING_PLACEHOLDER,
"user": USER_MAPPING_PLACEHOLDER
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,43 +44,6 @@
"remote_job": {
"type": "flat_object"
},
"user": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"backend_roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"custom_attribute_names": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
"user": USER_MAPPING_PLACEHOLDER
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"version": {
"type": "keyword"
},
"description": {
"type": "text"
},
"protocol": {
"type": "keyword"
},
"parameters": {
"type": "flat_object"
},
"credential": {
"type": "flat_object"
},
"client_config": {
"type": "flat_object"
},
"actions": {
"type": "flat_object"
}
}
}
Loading
Loading