Skip to content

Commit

Permalink
Upgrade plugin to 2.4 and refactor zip dependencies (#25)
Browse files Browse the repository at this point in the history
Upgrades plugin to 2.4. Fixes a couple compile time dependencies with
ml-commons. Refactors dependencies on zips for ml-commons and k-NN.

Signed-off-by: John Mazanec <jmazane@amazon.com>
  • Loading branch information
jmazanec15 authored Oct 24, 2022
1 parent 094bf0b commit ed1afb6
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 55 deletions.
66 changes: 17 additions & 49 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ buildscript {
ext {
// as we don't have 3.0.0, 2.4.0 version for K-NN on darwin we need to keep OpenSearch version as 2.3 for now.
// Github issue: https://github.com/opensearch-project/opensearch-build/issues/2662
opensearch_version = System.getProperty("opensearch.version", "2.3.0-SNAPSHOT")
opensearch_version = System.getProperty("opensearch.version", "2.4.0-SNAPSHOT")
buildVersionQualifier = System.getProperty("build.version_qualifier", "")
isSnapshot = "true" == System.getProperty("build.snapshot", "true")
version_tokens = opensearch_version.tokenize('-')
Expand All @@ -89,12 +89,6 @@ buildscript {
}
opensearch_group = "org.opensearch"
opensearch_no_snapshot = opensearch_build.replace("-SNAPSHOT","")
k_NN_resource_folder = "build/resources/k-NN"
ml_common_resource_folder = "build/resources/ml-commons"
//TODO: we need a better way to construct this URL as, this URL is valid for released version of K-NN, ML-Plugin.
// Github issue: https://github.com/opensearch-project/opensearch-build/issues/2662
k_NN_build_download_url = "https://aws.oss.sonatype.org/content/repositories/releases/org/opensearch/plugin/opensearch-knn/" + opensearch_no_snapshot + "/opensearch-knn-" + opensearch_no_snapshot +".zip"
ml_common_build_download_url = "https://aws.oss.sonatype.org/content/repositories/releases/org/opensearch/plugin/opensearch-ml-plugin/" + opensearch_no_snapshot + "/opensearch-ml-plugin-" + opensearch_no_snapshot +".zip"
}

repositories {
Expand Down Expand Up @@ -137,22 +131,21 @@ def knnJarDirectory = "$buildDir/dependencies/opensearch-knn"
dependencies {
api "org.opensearch:opensearch:${opensearch_version}"
zipArchive group: 'org.opensearch.plugin', name:'opensearch-knn', version: "${opensearch_build}"
zipArchive group: 'org.opensearch', name:'opensearch-ml-plugin', version: "${opensearch_build}"
compileOnly fileTree(dir: knnJarDirectory, include: '*.jar')
api group: 'org.opensearch', name:'opensearch-ml-client', version: "${opensearch_build}"
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.10'
}

// From maven, we can get the k-NN plugin as a zip. In order to add the jar to the classpath, we need to unzip the
// In order to add the jar to the classpath, we need to unzip the
// k-NN zip and then copy it into a directory that we specify as a dependency.
task unzip(type: Copy) {
configurations.zipArchive.asFileTree.each {
from(zipTree(it))
}
task extractKnnJar(type: Copy) {
from(zipTree(configurations.zipArchive.find { it.name.startsWith("opensearch-knn")}))
into knnJarDirectory
}

compileJava {
dependsOn unzip
dependsOn extractKnnJar
options.compilerArgs.addAll(["-processor", 'lombok.launch.AnnotationProcessorHider$AnnotationProcessor'])
}
compileTestJava {
Expand Down Expand Up @@ -208,45 +201,20 @@ integTest {

testClusters.integTest {
testDistribution = "ARCHIVE"
// Install ML-Plugin on the integTest cluster nodes
plugin(provider(new Callable<RegularFile>(){
@Override
RegularFile call() throws Exception {
return new RegularFile() {
@Override
File getAsFile() {
if (new File("$project.rootDir/$ml_common_resource_folder").exists()) {
project.delete(files("$project.rootDir/$ml_common_resource_folder"))
}
project.mkdir ml_common_resource_folder
ant.get(src: ml_common_build_download_url,
dest: ml_common_resource_folder,
httpusecaches: false)
return fileTree(ml_common_resource_folder).getSingleFile()
}
}
}
}))

// Install K-NN plugin on the integTest cluster nodes
plugin(provider(new Callable<RegularFile>(){
@Override
RegularFile call() throws Exception {
return new RegularFile() {
@Override
File getAsFile() {
if (new File("$project.rootDir/$k_NN_resource_folder").exists()) {
project.delete(files("$project.rootDir/$k_NN_resource_folder"))
// Install K-NN/ml-commons plugins on the integTest cluster nodes
configurations.zipArchive.asFileTree.each {
plugin(provider(new Callable<RegularFile>(){
@Override
RegularFile call() throws Exception {
return new RegularFile() {
@Override
File getAsFile() {
return it
}
project.mkdir k_NN_resource_folder
ant.get(src: k_NN_build_download_url,
dest: k_NN_resource_folder,
httpusecaches: false)
return fileTree(k_NN_resource_folder).getSingleFile()
}
}
}
}))
}))
}

// This installs our neural-search plugin into the testClusters
plugin(project.tasks.bundlePlugin.archiveFile)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.opensearch.ml.common.dataset.MLInputDataset;
import org.opensearch.ml.common.dataset.TextDocsInputDataSet;
import org.opensearch.ml.common.input.MLInput;
import org.opensearch.ml.common.model.MLModelTaskType;
import org.opensearch.ml.common.output.MLOutput;
import org.opensearch.ml.common.output.model.ModelResultFilter;
import org.opensearch.ml.common.output.model.ModelTensor;
Expand Down Expand Up @@ -67,7 +66,7 @@ public void inferenceSentence(

/**
* Abstraction to call predict function of api of MLClient with default targetResponse filters. It uses the
* custom model provided as modelId and run the {@link MLModelTaskType#TEXT_EMBEDDING}. The return will be sent
* custom model provided as modelId and run the {@link FunctionName#TEXT_EMBEDDING}. The return will be sent
* using the actionListener which will have a {@link List} of {@link List} of {@link Float} in the order of
* inputText. We are not making this function generic enough to take any function or TaskType as currently we
* need to run only TextEmbedding tasks only.
Expand All @@ -86,7 +85,7 @@ public void inferenceSentences(

/**
* Abstraction to call predict function of api of MLClient with provided targetResponse filters. It uses the
* custom model provided as modelId and run the {@link MLModelTaskType#TEXT_EMBEDDING}. The return will be sent
* custom model provided as modelId and run the {@link FunctionName#TEXT_EMBEDDING}. The return will be sent
* using the actionListener which will have a {@link List} of {@link List} of {@link Float} in the order of
* inputText. We are not making this function generic enough to take any function or TaskType as currently we
* need to run only TextEmbedding tasks only.
Expand All @@ -112,7 +111,7 @@ public void inferenceSentences(

/**
* Abstraction to call predict function of api of MLClient with provided targetResponseFilters. It uses the
* custom model provided as modelId and run the {@link MLModelTaskType#TEXT_EMBEDDING}. The return will be sent
* custom model provided as modelId and run the {@link FunctionName#TEXT_EMBEDDING}. The return will be sent
* using the actionListener which will have a {@link List} of {@link List} of {@link Float} in the order of
* inputText. We are not making this function generic enough to take any function or TaskType as currently we need
* to run only TextEmbedding tasks only. Please note this method is a blocking method, use this only when the processing
Expand All @@ -136,7 +135,7 @@ public List<List<Float>> inferenceSentences(@NonNull final String modelId, @NonN
private MLInput createMLInput(final List<String> targetResponseFilters, List<String> inputText) {
final ModelResultFilter modelResultFilter = new ModelResultFilter(false, true, targetResponseFilters, null);
final MLInputDataset inputDataset = new TextDocsInputDataSet(inputText, modelResultFilter);
return new MLInput(FunctionName.TEXT_EMBEDDING, null, inputDataset, MLModelTaskType.TEXT_EMBEDDING);
return new MLInput(FunctionName.TEXT_EMBEDDING, null, inputDataset);
}

private List<List<Float>> buildVectorFromResponse(MLOutput mlOutput) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
import org.opensearch.action.ActionListener;
import org.opensearch.ml.client.MachineLearningNodeClient;
import org.opensearch.ml.common.input.MLInput;
import org.opensearch.ml.common.model.MLResultDataType;
import org.opensearch.ml.common.output.MLOutput;
import org.opensearch.ml.common.output.model.MLResultDataType;
import org.opensearch.ml.common.output.model.ModelTensor;
import org.opensearch.ml.common.output.model.ModelTensorOutput;
import org.opensearch.ml.common.output.model.ModelTensors;
Expand Down
Binary file modified src/test/resources/model/all-MiniLM-L6-v2.zip
Binary file not shown.

0 comments on commit ed1afb6

Please sign in to comment.