Skip to content

Commit

Permalink
[Enhancement] Fetch system index mappings from json file instead of s…
Browse files Browse the repository at this point in the history
…tring constants (opensearch-project#3153)

* feat(index mappings): fetch mappings and version from json file instead of string constants

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: changing exception being thrown

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* chore: remove unused file

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* chore: fix typo in comment

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* chore: adding new line at the end of files

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* feat: add test cases

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: remove test code

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix(test): in main the versions were not updated appropriately

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: move mapping templates under common module

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: ensure that conversationindexconstants reference mlindex enums rather than use their own mappings

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: update comment

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: rename dir from mappings to index-mappings

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: add null checks

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: adding dependencies for testing

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix(test): compare json object rather than strings to avoid eol character issue

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: combine if statements into single check

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactoring: null handling + clean code

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* spotless apply

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

---------

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>
  • Loading branch information
pyek-bot authored Nov 27, 2024
1 parent f9cbf15 commit 78a304a
Show file tree
Hide file tree
Showing 22 changed files with 912 additions and 635 deletions.
1 change: 1 addition & 0 deletions common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies {
compileOnly group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
compileOnly group: 'com.google.code.gson', name: 'gson', version: '2.10.1'
compileOnly group: 'org.json', name: 'json', version: '20231013'
testImplementation group: 'org.json', name: 'json', version: '20231013'
implementation('com.google.guava:guava:32.1.2-jre') {
exclude group: 'com.google.guava', module: 'failureaccess'
exclude group: 'com.google.code.findbugs', module: 'jsr305'
Expand Down
539 changes: 10 additions & 529 deletions common/src/main/java/org/opensearch/ml/common/CommonValue.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,67 @@
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.ml.engine.indices;
package org.opensearch.ml.common;

import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING_PATH;
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX;
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING;
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_SCHEMA_VERSION;
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING_PATH;

import java.io.IOException;
import java.io.UncheckedIOException;

import org.opensearch.ml.common.utils.IndexUtils;

public enum MLIndex {
MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING, ML_MODEL_GROUP_INDEX_SCHEMA_VERSION),
MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING, ML_MODEL_INDEX_SCHEMA_VERSION),
TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING, ML_TASK_INDEX_SCHEMA_VERSION),
CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING, ML_CONNECTOR_SCHEMA_VERSION),
CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING, ML_CONFIG_INDEX_SCHEMA_VERSION),
CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING, ML_CONTROLLER_INDEX_SCHEMA_VERSION),
AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING, ML_AGENT_INDEX_SCHEMA_VERSION),
MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING, ML_MEMORY_META_INDEX_SCHEMA_VERSION),
MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING, ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION);
MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING_PATH),
MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING_PATH),
TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING_PATH),
CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING_PATH),
CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING_PATH),
CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING_PATH),
AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING_PATH),
MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING_PATH),
MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH);

private final String indexName;
// whether we use an alias for the index
private final boolean alias;
private final String mapping;
private final Integer version;

MLIndex(String name, boolean alias, String mapping, Integer version) {
MLIndex(String name, boolean alias, String mappingPath) {
this.indexName = name;
this.alias = alias;
this.mapping = mapping;
this.version = version;
this.mapping = getMapping(mappingPath);
this.version = IndexUtils.getVersionFromMapping(this.mapping);
}

private String getMapping(String mappingPath) {
if (mappingPath == null) {
throw new IllegalArgumentException("Mapping path cannot be null");
}

try {
return IndexUtils.getMappingFromFile(mappingPath);
} catch (IOException e) {
// Unchecked exception is thrown since the method is being called within a constructor
throw new UncheckedIOException("Failed to fetch index mapping from file: " + mappingPath, e);
}
}

public String getIndexName() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
package org.opensearch.ml.common.conversation;

import org.opensearch.common.settings.Setting;
import org.opensearch.ml.common.MLIndex;

/**
* Class containing a bunch of constant defining how the conversational indices are formatted
* ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META directly for index names and mappings rather than constants
*/
public class ConversationalIndexConstants {
/** Version of the meta index schema */
public final static Integer META_INDEX_SCHEMA_VERSION = 2;
/** Name of the conversational metadata index */
public final static String META_INDEX_NAME = ".plugins-ml-memory-meta";
public final static String META_INDEX_NAME = MLIndex.MEMORY_META.getIndexName();
/** Name of the metadata field for initial timestamp */
public final static String META_CREATED_TIME_FIELD = "create_time";
/** Name of the metadata field for updated timestamp */
Expand All @@ -41,38 +41,10 @@ public class ConversationalIndexConstants {
public final static String META_ADDITIONAL_INFO_FIELD = "additional_info";

/** Mappings for the conversational metadata index */
public final static String META_MAPPING = "{\n"
+ " \"_meta\": {\n"
+ " \"schema_version\": "
+ META_INDEX_SCHEMA_VERSION
+ "\n"
+ " },\n"
+ " \"properties\": {\n"
+ " \""
+ META_NAME_FIELD
+ "\": {\"type\": \"text\"},\n"
+ " \""
+ META_CREATED_TIME_FIELD
+ "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n"
+ " \""
+ META_UPDATED_TIME_FIELD
+ "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n"
+ " \""
+ USER_FIELD
+ "\": {\"type\": \"keyword\"},\n"
+ " \""
+ APPLICATION_TYPE_FIELD
+ "\": {\"type\": \"keyword\"},\n"
+ " \""
+ META_ADDITIONAL_INFO_FIELD
+ "\": {\"type\": \"flat_object\"}\n"
+ " }\n"
+ "}";
public final static String META_MAPPING = MLIndex.MEMORY_META.getMapping();

/** Version of the interactions index schema */
public final static Integer INTERACTIONS_INDEX_SCHEMA_VERSION = 1;
/** Name of the conversational interactions index */
public final static String INTERACTIONS_INDEX_NAME = ".plugins-ml-memory-message";
public final static String INTERACTIONS_INDEX_NAME = MLIndex.MEMORY_MESSAGE.getIndexName();
/** Name of the interaction field for the conversation Id */
public final static String INTERACTIONS_CONVERSATION_ID_FIELD = "memory_id";
/** Name of the interaction field for the human input */
Expand All @@ -92,42 +64,7 @@ public class ConversationalIndexConstants {
/** The trace number of an interaction */
public final static String INTERACTIONS_TRACE_NUMBER_FIELD = "trace_number";
/** Mappings for the interactions index */
public final static String INTERACTIONS_MAPPINGS = "{\n"
+ " \"_meta\": {\n"
+ " \"schema_version\": "
+ INTERACTIONS_INDEX_SCHEMA_VERSION
+ "\n"
+ " },\n"
+ " \"properties\": {\n"
+ " \""
+ INTERACTIONS_CONVERSATION_ID_FIELD
+ "\": {\"type\": \"keyword\"},\n"
+ " \""
+ INTERACTIONS_CREATE_TIME_FIELD
+ "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n"
+ " \""
+ INTERACTIONS_INPUT_FIELD
+ "\": {\"type\": \"text\"},\n"
+ " \""
+ INTERACTIONS_PROMPT_TEMPLATE_FIELD
+ "\": {\"type\": \"text\"},\n"
+ " \""
+ INTERACTIONS_RESPONSE_FIELD
+ "\": {\"type\": \"text\"},\n"
+ " \""
+ INTERACTIONS_ORIGIN_FIELD
+ "\": {\"type\": \"keyword\"},\n"
+ " \""
+ INTERACTIONS_ADDITIONAL_INFO_FIELD
+ "\": {\"type\": \"flat_object\"},\n"
+ " \""
+ PARENT_INTERACTIONS_ID_FIELD
+ "\": {\"type\": \"keyword\"},\n"
+ " \""
+ INTERACTIONS_TRACE_NUMBER_FIELD
+ "\": {\"type\": \"long\"}\n"
+ " }\n"
+ "}";
public final static String INTERACTIONS_MAPPINGS = MLIndex.MEMORY_MESSAGE.getMapping();

/** Feature Flag setting for conversational memory */
public static final Setting<Boolean> ML_COMMONS_MEMORY_FEATURE_ENABLED = Setting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,15 @@

package org.opensearch.ml.common.utils;

import java.io.IOException;
import java.net.URL;
import java.util.Map;

import com.google.common.base.Charsets;
import com.google.common.io.Resources;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;

import lombok.extern.log4j.Log4j2;

@Log4j2
Expand All @@ -32,4 +39,40 @@ public class IndexUtils {
// Note: This does not include static settings like number of shards, which can't be changed after index creation.
public static final Map<String, Object> UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1");
public static final Map<String, Object> UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all");

public static String getMappingFromFile(String path) throws IOException {
URL url = IndexUtils.class.getClassLoader().getResource(path);
if (url == null) {
throw new IOException("Resource not found: " + path);
}

String mapping = Resources.toString(url, Charsets.UTF_8).trim();
if (mapping.isEmpty() || !StringUtils.isJson(mapping)) {
throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path);
}

return mapping;
}

public static Integer getVersionFromMapping(String mapping) {
if (mapping == null || mapping.isBlank()) {
throw new IllegalArgumentException("Mapping cannot be null or empty");
}

JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping);
if (mappingJson == null || !mappingJson.has("_meta")) {
throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping);
}

JsonObject metaObject = mappingJson.getAsJsonObject("_meta");
if (metaObject == null || !metaObject.has("schema_version")) {
throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping);
}

try {
return metaObject.get("schema_version").getAsInt();
} catch (NumberFormatException | ClassCastException e) {
throw new JsonParseException("Invalid \"schema_version\" value in mapping: " + mapping, e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonSyntaxException;
import com.jayway.jsonpath.JsonPath;
Expand Down Expand Up @@ -53,12 +54,16 @@ public class StringUtils {
}
public static final String TO_STRING_FUNCTION_NAME = ".toString()";

public static boolean isValidJsonString(String Json) {
public static boolean isValidJsonString(String json) {
if (json == null || json.isBlank()) {
return false;
}

try {
new JSONObject(Json);
new JSONObject(json);
} catch (JSONException ex) {
try {
new JSONArray(Json);
new JSONArray(json);
} catch (JSONException ex1) {
return false;
}
Expand All @@ -67,6 +72,10 @@ public static boolean isValidJsonString(String Json) {
}

public static boolean isJson(String json) {
if (json == null || json.isBlank()) {
return false;
}

try {
if (!isValidJsonString(json)) {
return false;
Expand Down Expand Up @@ -319,4 +328,12 @@ public static boolean isValidJSONPath(String input) {
}
}

public static JsonObject getJsonObjectFromString(String jsonString) {
if (jsonString == null || jsonString.isBlank()) {
throw new IllegalArgumentException("Json cannot be null or empty");
}

return JsonParser.parseString(jsonString).getAsJsonObject();
}

}
45 changes: 45 additions & 0 deletions common/src/main/resources/index-mappings/ml-agent.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"_meta": {
"schema_version": 2
},
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"type": {
"type": "keyword"
},
"description": {
"type": "text"
},
"llm": {
"type": "flat_object"
},
"tools": {
"type": "flat_object"
},
"parameters": {
"type": "flat_object"
},
"memory": {
"type": "flat_object"
},
"is_hidden": {
"type": "boolean"
},
"created_time": {
"type": "date",
"format": "strict_date_time||epoch_millis"
},
"last_updated_time": {
"type": "date",
"format": "strict_date_time||epoch_millis"
}
}
}
24 changes: 24 additions & 0 deletions common/src/main/resources/index-mappings/ml-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"_meta": {
"schema_version": 4
},
"properties": {
"master_key": {
"type": "keyword"
},
"config_type": {
"type": "keyword"
},
"ml_configuration": {
"type": "flat_object"
},
"create_time": {
"type": "date",
"format": "strict_date_time||epoch_millis"
},
"last_updated_time": {
"type": "date",
"format": "strict_date_time||epoch_millis"
}
}
}
Loading

0 comments on commit 78a304a

Please sign in to comment.