Skip to content

Commit

Permalink
[Enhancement](java-udf) java-udf module split to sub modules
Browse files Browse the repository at this point in the history
  • Loading branch information
lexluo committed Jun 12, 2023
1 parent a02a2f4 commit 25ce674
Show file tree
Hide file tree
Showing 64 changed files with 797 additions and 219 deletions.
8 changes: 5 additions & 3 deletions be/src/util/jni-util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ Status JniUtil::Init() {
RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
if (env == NULL) return Status::InternalError("Failed to get/create JVM");
// Find JniUtil class and create a global ref.
jclass local_jni_util_cl = env->FindClass("org/apache/doris/udf/JniUtil");
jclass local_jni_util_cl = env->FindClass("org/apache/doris/common/jni/utils/JniUtil");
if (local_jni_util_cl == NULL) {
if (env->ExceptionOccurred()) env->ExceptionDescribe();
return Status::InternalError("Failed to find JniUtil class.");
Expand All @@ -283,7 +283,8 @@ Status JniUtil::Init() {
}

// Find InternalException class and create a global ref.
jclass local_internal_exc_cl = env->FindClass("org/apache/doris/udf/InternalException");
jclass local_internal_exc_cl =
env->FindClass("org/apache/doris/common/exception/InternalException");
if (local_internal_exc_cl == NULL) {
if (env->ExceptionOccurred()) env->ExceptionDescribe();
return Status::InternalError("Failed to find JniUtil class.");
Expand All @@ -299,7 +300,8 @@ Status JniUtil::Init() {
}

// Find JNINativeMethod class and create a global ref.
jclass local_jni_native_exc_cl = env->FindClass("org/apache/doris/udf/JNINativeMethod");
jclass local_jni_native_exc_cl =
env->FindClass("org/apache/doris/common/jni/utils/JNINativeMethod");
if (local_jni_native_exc_cl == nullptr) {
if (env->ExceptionOccurred()) {
env->ExceptionDescribe();
Expand Down
2 changes: 1 addition & 1 deletion be/src/util/jni_native_method.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace doris {

/**
* Java native methods for org.apache.doris.udf.JNINativeMethod.
* Java native methods for org.apache.doris.common.jni.utils.JNINativeMethod.
*/
struct JavaNativeMethods {
/**
Expand Down
10 changes: 5 additions & 5 deletions be/src/vec/exec/jni_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Status JniConnector::open(RuntimeState* state, RuntimeProfile* profile) {
return Status::InternalError("Failed to get/create JVM");
}
RETURN_IF_ERROR(_init_jni_scanner(env, state->batch_size()));
// Call org.apache.doris.jni.JniScanner#open
// Call org.apache.doris.common.jni.JniScanner#open
env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_open);
RETURN_ERROR_IF_EXC(env);
return Status::OK();
Expand All @@ -82,15 +82,15 @@ Status JniConnector::init(
_generate_predicates(colname_to_value_range);
if (_predicates_length != 0 && _predicates != nullptr) {
int64_t predicates_address = (int64_t)_predicates.get();
// We can call org.apache.doris.jni.vec.ScanPredicate#parseScanPredicates to parse the
// We can call org.apache.doris.common.jni.vec.ScanPredicate#parseScanPredicates to parse the
// serialized predicates in java side.
_scanner_params.emplace("push_down_predicates", std::to_string(predicates_address));
}
return Status::OK();
}

Status JniConnector::get_nex_block(Block* block, size_t* read_rows, bool* eof) {
// Call org.apache.doris.jni.JniScanner#getNextBatchMeta
// Call org.apache.doris.common.jni.JniScanner#getNextBatchMeta
// return the address of meta information
JNIEnv* env = nullptr;
RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
Expand Down Expand Up @@ -123,7 +123,7 @@ Status JniConnector::close() {
JNIEnv* env = nullptr;
RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
// _fill_block may be failed and returned, we should release table in close.
// org.apache.doris.jni.JniScanner#releaseTable is idempotent
// org.apache.doris.common.jni.JniScanner#releaseTable is idempotent
env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_release_table);
env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_close);
env->DeleteGlobalRef(_jni_scanner_obj);
Expand Down Expand Up @@ -199,7 +199,7 @@ Status JniConnector::_fill_column(ColumnPtr& doris_column, DataTypePtr& data_typ
TypeIndex logical_type = remove_nullable(data_type)->get_type_id();
void* null_map_ptr = _next_meta_as_ptr();
if (null_map_ptr == nullptr) {
// org.apache.doris.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0
// org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0
return Status::InternalError("Unsupported type {} in java side", getTypeName(logical_type));
}
MutableColumnPtr data_column;
Expand Down
8 changes: 4 additions & 4 deletions be/src/vec/exec/jni_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ struct Decimal;
namespace doris::vectorized {

/**
* Connector to java jni scanner, which should extend org.apache.doris.jni.JniScanner
* Connector to java jni scanner, which should extend org.apache.doris.common.jni.JniScanner
*/
class JniConnector {
public:
/**
* The predicates that can be pushed down to java side.
* Reference to java class org.apache.doris.jni.vec.ScanPredicate
* Reference to java class org.apache.doris.common.jni.vec.ScanPredicate
*/
template <typename CppType>
struct ScanPredicate {
Expand Down Expand Up @@ -102,7 +102,7 @@ class JniConnector {
/**
* The value ranges can be stored as byte array as following format:
* number_filters(4) | length(4) | column_name | op(4) | scale(4) | num_values(4) | value_length(4) | value | ...
* The read method is implemented in org.apache.doris.jni.vec.ScanPredicate#parseScanPredicates
* The read method is implemented in org.apache.doris.common.jni.vec.ScanPredicate#parseScanPredicates
*/
int write(std::unique_ptr<char[]>& predicates, int origin_length) {
int num_filters = 0;
Expand Down Expand Up @@ -232,7 +232,7 @@ class JniConnector {
std::unique_ptr<char[]> _predicates = nullptr;

/**
* Set the address of meta information, which is returned by org.apache.doris.jni.JniScanner#getNextBatchMeta
* Set the address of meta information, which is returned by org.apache.doris.common.jni.JniScanner#getNextBatchMeta
*/
void _set_meta(long meta_addr) {
_meta_ptr = static_cast<long*>(reinterpret_cast<void*>(meta_addr));
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/exec/scan/jni_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ MockJniReader::MockJniReader(const std::vector<SlotDescriptor*>& file_slot_descs
std::map<String, String> params = {{"mock_rows", "10240"},
{"required_fields", required_fields.str()},
{"columns_types", columns_types.str()}};
_jni_connector = std::make_unique<JniConnector>("org/apache/doris/jni/MockJniScanner", params,
column_names);
_jni_connector = std::make_unique<JniConnector>("org/apache/doris/common/jni/MockJniScanner",
params, column_names);
}

Status MockJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/exec/scan/max_compute_jni_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ MaxComputeJniReader::MaxComputeJniReader(const MaxComputeTableDescriptor* mc_des
{"split_size", std::to_string(_range.size)},
{"required_fields", required_fields.str()},
{"columns_types", columns_types.str()}};
_jni_connector = std::make_unique<JniConnector>("org/apache/doris/jni/MaxComputeJniScanner",
params, column_names);
_jni_connector = std::make_unique<JniConnector>(
"org/apache/doris/maxcompute/MaxComputeJniScanner", params, column_names);
}

Status MaxComputeJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/exec/scan/paimon_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ PaimonJniReader::PaimonJniReader(const std::vector<SlotDescriptor*>& file_slot_d
params["length_byte"] = range.table_format_params.paimon_params.length_byte;
params["split_byte"] =
std::to_string((int64_t)range.table_format_params.paimon_params.paimon_split.data());
_jni_connector = std::make_unique<JniConnector>("org/apache/doris/jni/PaimonJniScanner", params,
column_names);
_jni_connector = std::make_unique<JniConnector>("org/apache/doris/paimon/PaimonJniScanner",
params, column_names);
}

Status PaimonJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/vjdbc_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@

namespace doris {
namespace vectorized {
const char* JDBC_EXECUTOR_CLASS = "org/apache/doris/udf/JdbcExecutor";
const char* JDBC_EXECUTOR_CLASS = "org/apache/doris/jdbc/JdbcExecutor";
const char* JDBC_EXECUTOR_CTOR_SIGNATURE = "([B)V";
const char* JDBC_EXECUTOR_WRITE_SIGNATURE = "(Ljava/lang/String;)I";
const char* JDBC_EXECUTOR_STMT_WRITE_SIGNATURE = "(Ljava/util/Map;)I";
Expand Down
96 changes: 55 additions & 41 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ usage() {
echo "
Usage: $0 <options>
Optional options:
[no option] build all components
--fe build Frontend and Spark DPP application. Default ON.
--be build Backend. Default ON.
--meta-tool build Backend meta tool. Default OFF.
--broker build Broker. Default ON.
--audit build audit loader. Default ON.
--spark-dpp build Spark DPP application. Default ON.
--hive-udf build Hive UDF library for Spark Load. Default ON.
--java-udf build Java UDF. Default ON.
--clean clean and build target
--output specify the output directory
-j build Backend parallel
[no option] build all components
--fe build Frontend and Spark DPP application. Default ON.
--be build Backend. Default ON.
--meta-tool build Backend meta tool. Default OFF.
--broker build Broker. Default ON.
--audit build audit loader. Default ON.
--spark-dpp build Spark DPP application. Default ON.
--hive-udf build Hive UDF library for Spark Load. Default ON.
--be-java-extensions build Backend java extensions. Default ON.
--clean clean and build target
--output specify the output directory
-j build Backend parallel
Environment variables:
USE_AVX2 If the CPU does not support AVX2 instruction set, please set USE_AVX2=0. Default is ON.
STRIP_DEBUG_INFO If set STRIP_DEBUG_INFO=ON, the debug information in the compiled binaries will be stored separately in the 'be/lib/debug_info' directory. Default is OFF.
DISABLE_JAVA_UDF If set DISABLE_JAVA_UDF=ON, we will do not build binary with java-udf. Default is OFF.
DISABLE_BE_JAVA_EXTENSIONS If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will do not build binary with java-udf,hudi-scanner,jdbc-scanner and so on Default is OFF.
DISABLE_JAVA_CHECK_STYLE If set DISABLE_JAVA_CHECK_STYLE=ON, it will skip style check of java code in FE.
Eg.
$0 build all
Expand Down Expand Up @@ -119,7 +119,7 @@ if ! OPTS="$(getopt \
-l 'meta-tool' \
-l 'spark-dpp' \
-l 'hive-udf' \
-l 'java-udf' \
-l 'be-java-extensions' \
-l 'clean' \
-l 'coverage' \
-l 'help' \
Expand All @@ -138,7 +138,7 @@ BUILD_BROKER=0
BUILD_AUDIT=0
BUILD_META_TOOL='OFF'
BUILD_SPARK_DPP=0
BUILD_JAVA_UDF=0
BUILD_BE_JAVA_EXTENSIONS=0
BUILD_HIVE_UDF=0
CLEAN=0
HELP=0
Expand All @@ -154,7 +154,7 @@ if [[ "$#" == 1 ]]; then
BUILD_META_TOOL='OFF'
BUILD_SPARK_DPP=1
BUILD_HIVE_UDF=1
BUILD_JAVA_UDF=1
BUILD_BE_JAVA_EXTENSIONS=1
CLEAN=0
else
while true; do
Expand All @@ -163,12 +163,12 @@ else
BUILD_FE=1
BUILD_SPARK_DPP=1
BUILD_HIVE_UDF=1
BUILD_JAVA_UDF=1
BUILD_BE_JAVA_EXTENSIONS=1
shift
;;
--be)
BUILD_BE=1
BUILD_JAVA_UDF=1
BUILD_BE_JAVA_EXTENSIONS=1
shift
;;
--broker)
Expand All @@ -191,8 +191,8 @@ else
BUILD_HIVE_UDF=1
shift
;;
--java-udf)
BUILD_JAVA_UDF=1
--be-java-extensions)
BUILD_BE_JAVA_EXTENSIONS=1
shift
;;
--clean)
Expand Down Expand Up @@ -239,7 +239,7 @@ else
BUILD_META_TOOL='ON'
BUILD_SPARK_DPP=1
BUILD_HIVE_UDF=1
BUILD_JAVA_UDF=1
BUILD_BE_JAVA_EXTENSIONS=1
CLEAN=0
fi
fi
Expand Down Expand Up @@ -343,8 +343,8 @@ if [[ -z "${OUTPUT_BE_BINARY}" ]]; then
OUTPUT_BE_BINARY=${BUILD_BE}
fi

if [[ -z "${DISABLE_JAVA_UDF}" ]]; then
DISABLE_JAVA_UDF='OFF'
if [[ -z "${BUILD_BE_JAVA_EXTENSIONS}" ]]; then
BUILD_BE_JAVA_EXTENSIONS='OFF'
fi

if [[ -z "${DISABLE_JAVA_CHECK_STYLE}" ]]; then
Expand All @@ -355,7 +355,7 @@ if [[ -z "${RECORD_COMPILER_SWITCHES}" ]]; then
RECORD_COMPILER_SWITCHES='OFF'
fi

if [[ "${BUILD_JAVA_UDF}" -eq 1 && "$(uname -s)" == 'Darwin' ]]; then
if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 && "$(uname -s)" == 'Darwin' ]]; then
if [[ -z "${JAVA_HOME}" ]]; then
CAUSE='the environment variable JAVA_HOME is not set'
else
Expand All @@ -369,13 +369,13 @@ if [[ "${BUILD_JAVA_UDF}" -eq 1 && "$(uname -s)" == 'Darwin' ]]; then

if [[ -n "${CAUSE}" ]]; then
echo -e "\033[33;1mWARNNING: \033[37;1mSkip building with Java UDF due to ${CAUSE}.\033[0m"
BUILD_JAVA_UDF=0
DISABLE_JAVA_UDF_IN_CONF=1
BUILD_BE_JAVA_EXTENSIONS=0
BUILD_BE_JAVA_EXTENSIONS_IN_CONF=1
fi
fi

if [[ "${DISABLE_JAVA_UDF}" == "ON" ]]; then
BUILD_JAVA_UDF=0
if [[ "${BUILD_BE_JAVA_EXTENSIONS}" == "ON" ]]; then
BUILD_BE_JAVA_EXTENSIONS=0
fi

echo "Get params:
Expand All @@ -385,7 +385,7 @@ echo "Get params:
BUILD_AUDIT -- ${BUILD_AUDIT}
BUILD_META_TOOL -- ${BUILD_META_TOOL}
BUILD_SPARK_DPP -- ${BUILD_SPARK_DPP}
BUILD_JAVA_UDF -- ${BUILD_JAVA_UDF}
BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS}
BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF}
PARALLEL -- ${PARALLEL}
CLEAN -- ${CLEAN}
Expand Down Expand Up @@ -424,14 +424,19 @@ if [[ "${BUILD_SPARK_DPP}" -eq 1 ]]; then
modules+=("fe-common")
modules+=("spark-dpp")
fi
if [[ "${BUILD_JAVA_UDF}" -eq 1 ]]; then
modules+=("fe-common")
modules+=("java-udf")
fi
if [[ "${BUILD_HIVE_UDF}" -eq 1 ]]; then
modules+=("fe-common")
modules+=("hive-udf")
fi
if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 ]]; then
modules+=("fe-common")
modules+=("be-java-extensions/hudi-scanner")
modules+=("be-java-extensions/java-common")
modules+=("be-java-extensions/java-udf")
modules+=("be-java-extensions/jdbc-scanner")
modules+=("be-java-extensions/paimon-scanner")
modules+=("be-java-extensions/max-compute-scanner")
fi
FE_MODULES="$(
IFS=','
echo "${modules[*]}"
Expand Down Expand Up @@ -596,11 +601,11 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
rm -rf "${DORIS_OUTPUT}/be/lib/hadoop_hdfs/native/"
fi

if [[ "${DISABLE_JAVA_UDF_IN_CONF}" -eq 1 ]]; then
if [[ "${BUILD_BE_JAVA_EXTENSIONS_IN_CONF}" -eq 1 ]]; then
echo -e "\033[33;1mWARNNING: \033[37;1mDisable Java UDF support in be.conf due to the BE was built without Java UDF.\033[0m"
cat >>"${DORIS_OUTPUT}/be/conf/be.conf" <<EOF
# Java UDF support
# Java UDF and BE-JAVA-EXTENSION support
enable_java_support = false
EOF
fi
Expand All @@ -627,10 +632,19 @@ EOF
cp -r -p "${DORIS_HOME}/be/output/lib/debug_info" "${DORIS_OUTPUT}/be/lib"/
fi

java_udf_path="${DORIS_HOME}/fe/java-udf/target/java-udf-jar-with-dependencies.jar"
if [[ -f "${java_udf_path}" ]]; then
cp "${java_udf_path}" "${DORIS_OUTPUT}/be/lib"/
fi
extensions_modules=("")
extensions_modules+=("java-udf")
extensions_modules+=("jdbc-scanner")
extensions_modules+=("hudi-scanner")
extensions_modules+=("paimon-scanner")
extensions_modules+=("max-compute-scanner")

for extensions_module in "${extensions_modules[@]}"; do
module_path="${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/${extensions_module}-jar-with-dependencies.jar"
if [[ -f "${module_path}" ]]; then
cp "${module_path}" "${DORIS_OUTPUT}/be/lib"/
fi
done

cp -r -p "${DORIS_THIRDPARTY}/installed/webroot"/* "${DORIS_OUTPUT}/be/www"/
copy_common_files "${DORIS_OUTPUT}/be/"
Expand Down Expand Up @@ -659,12 +673,12 @@ if [[ "${BUILD_AUDIT}" -eq 1 ]]; then
cd "${DORIS_HOME}"
fi

if [[ "${BUILD_JAVA_UDF}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 ]]; then
if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 ]]; then
install -d "${DORIS_OUTPUT}/be/lib"

rm -rf "${DORIS_OUTPUT}/be/lib/java-udf-jar-with-dependencies.jar"

java_udf_path="${DORIS_HOME}/fe/java-udf/target/java-udf-jar-with-dependencies.jar"
java_udf_path="${DORIS_HOME}/fe/be-java-extensions/java-udf/target/java-udf-jar-with-dependencies.jar"
if [[ -f "${java_udf_path}" ]]; then
cp "${java_udf_path}" "${DORIS_OUTPUT}/be/lib"/
fi
Expand Down
Loading

0 comments on commit 25ce674

Please sign in to comment.