Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ jobs:
echo "should_check=${{ steps.filter.outputs.cpp_changes }}" >>${GITHUB_OUTPUT}

- name: Upload
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
if: ${{ steps.filter.outputs.cpp_changes == 'true' }}
with:
name: compile_commands
Expand All @@ -129,7 +129,7 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}

- name: Download
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: compile_commands
path: ./be/build_Release
Expand Down
88 changes: 44 additions & 44 deletions docker/thirdparties/docker-compose/polaris/init-catalog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,30 @@ BASE_LOCATION=${CATALOG_BASE_LOCATION:-s3://warehouse/wh/}

echo "[polaris-init] Waiting for Polaris health check at http://$HOST:$PORT/q/health ..."
for i in $(seq 1 120); do
if curl -sSf "http://$HOST:8182/q/health" >/dev/null; then
break
fi
sleep 2
if curl -sSf "http://$HOST:8182/q/health" >/dev/null; then
break
fi
sleep 2
done

echo "[polaris-init] Fetching OAuth token via client_credentials ..."
# Try to obtain token using correct OAuth endpoint
TOKEN_JSON=$(curl -sS \
-X POST "http://$HOST:$PORT/api/catalog/v1/oauth/tokens" \
-H 'Content-Type: application/x-www-form-urlencoded' \
-d "grant_type=client_credentials&client_id=$USER&client_secret=$PASS&scope=PRINCIPAL_ROLE:ALL")
-X POST "http://$HOST:$PORT/api/catalog/v1/oauth/tokens" \
-H 'Content-Type: application/x-www-form-urlencoded' \
-d "grant_type=client_credentials&client_id=$USER&client_secret=$PASS&scope=PRINCIPAL_ROLE:ALL")

# Extract access_token field
TOKEN=$(printf "%s" "$TOKEN_JSON" | sed -n 's/.*"access_token"\s*:\s*"\([^"]*\)".*/\1/p')

if [ -z "$TOKEN" ]; then
echo "[polaris-init] ERROR: Failed to obtain OAuth token. Response: $TOKEN_JSON" >&2
exit 1
echo "[polaris-init] ERROR: Failed to obtain OAuth token. Response: $TOKEN_JSON" >&2
exit 1
fi

echo "[polaris-init] Creating catalog '$CATALOG' with base '$BASE_LOCATION' ..."
CREATE_PAYLOAD=$(cat <<JSON
CREATE_PAYLOAD=$(
cat <<JSON
{
"name": "$CATALOG",
"type": "INTERNAL",
Expand All @@ -71,75 +72,74 @@ JSON

# Try create; on 409 Conflict, treat as success
HTTP_CODE=$(curl -sS -o /tmp/resp.json -w "%{http_code}" \
-X POST "http://$HOST:$PORT/api/management/v1/catalogs" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d "$CREATE_PAYLOAD")
-X POST "http://$HOST:$PORT/api/management/v1/catalogs" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d "$CREATE_PAYLOAD")

if [ "$HTTP_CODE" = "201" ]; then
echo "[polaris-init] Catalog created."
echo "[polaris-init] Catalog created."
elif [ "$HTTP_CODE" = "409" ]; then
echo "[polaris-init] Catalog already exists. Skipping."
echo "[polaris-init] Catalog already exists. Skipping."
else
echo "[polaris-init] Create catalog failed (HTTP $HTTP_CODE):"
cat /tmp/resp.json || true
exit 1
echo "[polaris-init] Create catalog failed (HTTP $HTTP_CODE):"
cat /tmp/resp.json || true
exit 1
fi

echo "[polaris-init] Setting up permissions for catalog '$CATALOG' ..."

# Create a catalog admin role grants
echo "[polaris-init] Creating catalog admin role grants ..."
HTTP_CODE=$(curl -sS -o /tmp/resp.json -w "%{http_code}" \
-X PUT "http://$HOST:$PORT/api/management/v1/catalogs/$CATALOG/catalog-roles/catalog_admin/grants" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"grant":{"type":"catalog", "privilege":"CATALOG_MANAGE_CONTENT"}}')
-X PUT "http://$HOST:$PORT/api/management/v1/catalogs/$CATALOG/catalog-roles/catalog_admin/grants" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"grant":{"type":"catalog", "privilege":"CATALOG_MANAGE_CONTENT"}}')

if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then
echo "[polaris-init] Warning: Failed to create catalog admin grants (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
echo "[polaris-init] Warning: Failed to create catalog admin grants (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
fi

# Create a data engineer role
echo "[polaris-init] Creating data engineer role ..."
HTTP_CODE=$(curl -sS -o /tmp/resp.json -w "%{http_code}" \
-X POST "http://$HOST:$PORT/api/management/v1/principal-roles" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"principalRole":{"name":"data_engineer"}}')
-X POST "http://$HOST:$PORT/api/management/v1/principal-roles" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"principalRole":{"name":"data_engineer"}}')

if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ] && [ "$HTTP_CODE" != "409" ]; then
echo "[polaris-init] Warning: Failed to create data engineer role (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
echo "[polaris-init] Warning: Failed to create data engineer role (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
fi

# Connect the roles
echo "[polaris-init] Connecting roles ..."
HTTP_CODE=$(curl -sS -o /tmp/resp.json -w "%{http_code}" \
-X PUT "http://$HOST:$PORT/api/management/v1/principal-roles/data_engineer/catalog-roles/$CATALOG" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"catalogRole":{"name":"catalog_admin"}}')
-X PUT "http://$HOST:$PORT/api/management/v1/principal-roles/data_engineer/catalog-roles/$CATALOG" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"catalogRole":{"name":"catalog_admin"}}')

if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then
echo "[polaris-init] Warning: Failed to connect roles (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
echo "[polaris-init] Warning: Failed to connect roles (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
fi

# Give root the data engineer role
echo "[polaris-init] Assigning data engineer role to root ..."
HTTP_CODE=$(curl -sS -o /tmp/resp.json -w "%{http_code}" \
-X PUT "http://$HOST:$PORT/api/management/v1/principals/root/principal-roles" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"principalRole": {"name":"data_engineer"}}')
-X PUT "http://$HOST:$PORT/api/management/v1/principals/root/principal-roles" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"principalRole": {"name":"data_engineer"}}')

if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then
echo "[polaris-init] Warning: Failed to assign data engineer role to root (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
echo "[polaris-init] Warning: Failed to assign data engineer role to root (HTTP $HTTP_CODE)"
cat /tmp/resp.json || true
fi

echo "[polaris-init] Permissions setup completed."
echo "[polaris-init] Done."

85 changes: 42 additions & 43 deletions docker/thirdparties/run-thirdparties-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ HELP=0
STOP=0
NEED_RESERVE_PORTS=0
export NEED_LOAD_DATA=1
export LOAD_PARALLEL=$(( $(getconf _NPROCESSORS_ONLN) / 2 ))
export LOAD_PARALLEL=$(($(getconf _NPROCESSORS_ONLN) / 2))
export IP_HOST=$(ip -4 addr show scope global | awk '/inet / {print $2}' | cut -d/ -f1 | head -n 1)

if ! OPTS="$(getopt \
Expand Down Expand Up @@ -201,7 +201,7 @@ for element in "${COMPONENTS_ARR[@]}"; do
RUN_MARIADB=1
elif [[ "${element}"x == "db2"x ]]; then
RUN_DB2=1
elif [[ "${element}"x == "oceanbase"x ]];then
elif [[ "${element}"x == "oceanbase"x ]]; then
RUN_OCEANBASE=1
elif [[ "${element}"x == "lakesoul"x ]]; then
RUN_LAKESOUL=1
Expand Down Expand Up @@ -376,7 +376,7 @@ start_hive2() {
. "${ROOT}"/docker-compose/hive/hive-2x_settings.env
envsubst <"${ROOT}"/docker-compose/hive/hive-2x.yaml.tpl >"${ROOT}"/docker-compose/hive/hive-2x.yaml
envsubst <"${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl >"${ROOT}"/docker-compose/hive/hadoop-hive-2x.env
envsubst <"${ROOT}"/docker-compose/hive/hadoop-hive-2x.env.tpl >> "${ROOT}"/docker-compose/hive/hadoop-hive-2x.env
envsubst <"${ROOT}"/docker-compose/hive/hadoop-hive-2x.env.tpl >>"${ROOT}"/docker-compose/hive/hadoop-hive-2x.env
sudo docker compose -p ${CONTAINER_UID}hive2 -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive-2x.env down
if [[ "${STOP}" -ne 1 ]]; then
sudo docker compose -p ${CONTAINER_UID}hive2 -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive-2x.env up --build --remove-orphans -d --wait
Expand All @@ -390,7 +390,7 @@ start_hive3() {
. "${ROOT}"/docker-compose/hive/hive-3x_settings.env
envsubst <"${ROOT}"/docker-compose/hive/hive-3x.yaml.tpl >"${ROOT}"/docker-compose/hive/hive-3x.yaml
envsubst <"${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl >"${ROOT}"/docker-compose/hive/hadoop-hive-3x.env
envsubst <"${ROOT}"/docker-compose/hive/hadoop-hive-3x.env.tpl >> "${ROOT}"/docker-compose/hive/hadoop-hive-3x.env
envsubst <"${ROOT}"/docker-compose/hive/hadoop-hive-3x.env.tpl >>"${ROOT}"/docker-compose/hive/hadoop-hive-3x.env
sudo docker compose -p ${CONTAINER_UID}hive3 -f "${ROOT}"/docker-compose/hive/hive-3x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive-3x.env down
if [[ "${STOP}" -ne 1 ]]; then
sudo docker compose -p ${CONTAINER_UID}hive3 -f "${ROOT}"/docker-compose/hive/hive-3x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive-3x.env up --build --remove-orphans -d --wait
Expand All @@ -409,28 +409,27 @@ start_iceberg() {
if [[ "${STOP}" -ne 1 ]]; then
if [[ ! -d "${ICEBERG_DIR}/data" ]]; then
echo "${ICEBERG_DIR}/data does not exist"
cd "${ICEBERG_DIR}" \
&& rm -f iceberg_data*.zip \
&& wget -P "${ROOT}"/docker-compose/iceberg https://"${s3BucketName}.${s3Endpoint}"/regression/datalake/pipeline_data/iceberg_data_paimon_101.zip \
&& sudo unzip iceberg_data_paimon_101.zip \
&& sudo mv iceberg_data data \
&& sudo rm -rf iceberg_data_paimon_101.zip
cd "${ICEBERG_DIR}" &&
rm -f iceberg_data*.zip &&
wget -P "${ROOT}"/docker-compose/iceberg https://"${s3BucketName}.${s3Endpoint}"/regression/datalake/pipeline_data/iceberg_data_paimon_101.zip &&
sudo unzip iceberg_data_paimon_101.zip &&
sudo mv iceberg_data data &&
sudo rm -rf iceberg_data_paimon_101.zip
cd -
else
echo "${ICEBERG_DIR}/data exist, continue !"
fi

if [[ ! -f "${ICEBERG_DIR}/data/input/jars/iceberg-aws-bundle-1.10.0.jar" ]]; then
if [[ ! -f "${ICEBERG_DIR}/data/input/jars/iceberg-aws-bundle-1.10.0.jar" ]]; then
echo "iceberg 1.10.0 jars does not exist"
cd "${ICEBERG_DIR}" \
&& rm -f iceberg_1_10_0*.jars.tar.gz\
&& wget -P "${ROOT}"/docker-compose/iceberg https://"${s3BucketName}.${s3Endpoint}"/regression/datalake/pipeline_data/iceberg_1_10_0.jars.tar.gz \
&& sudo tar xzvf iceberg_1_10_0.jars.tar.gz -C "data/input/jars" \
&& sudo rm -rf iceberg_1_10_0.jars.tar.gz
cd "${ICEBERG_DIR}" &&
rm -f iceberg_1_10_0*.jars.tar.gz && wget -P "${ROOT}"/docker-compose/iceberg https://"${s3BucketName}.${s3Endpoint}"/regression/datalake/pipeline_data/iceberg_1_10_0.jars.tar.gz &&
sudo tar xzvf iceberg_1_10_0.jars.tar.gz -C "data/input/jars" &&
sudo rm -rf iceberg_1_10_0.jars.tar.gz
cd -
else
else
echo "iceberg 1.10.0 jars exist, continue !"
fi
fi

sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d --wait
fi
Expand Down Expand Up @@ -505,9 +504,9 @@ start_kerberos() {
for i in {1..2}; do
. "${ROOT}"/docker-compose/kerberos/kerberos${i}_settings.env
envsubst <"${ROOT}"/docker-compose/kerberos/hadoop-hive.env.tpl >"${ROOT}"/docker-compose/kerberos/hadoop-hive-${i}.env
envsubst <"${ROOT}"/docker-compose/kerberos/conf/my.cnf.tpl > "${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/my.cnf
envsubst <"${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/kdc.conf.tpl > "${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/kdc.conf
envsubst <"${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/krb5.conf.tpl > "${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/krb5.conf
envsubst <"${ROOT}"/docker-compose/kerberos/conf/my.cnf.tpl >"${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/my.cnf
envsubst <"${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/kdc.conf.tpl >"${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/kdc.conf
envsubst <"${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/krb5.conf.tpl >"${ROOT}"/docker-compose/kerberos/conf/kerberos${i}/krb5.conf
done
sudo chmod a+w /etc/hosts
sudo sed -i "1i${IP_HOST} hadoop-master" /etc/hosts
Expand Down Expand Up @@ -573,12 +572,12 @@ start_iceberg_rest() {
echo "RUN_ICEBERG_REST"
# iceberg-rest with multiple cloud storage backends
ICEBERG_REST_DIR=${ROOT}/docker-compose/iceberg-rest

# generate iceberg-rest.yaml
export CONTAINER_UID=${CONTAINER_UID}
. "${ROOT}"/docker-compose/iceberg-rest/iceberg-rest_settings.env
envsubst <"${ICEBERG_REST_DIR}/docker-compose.yaml.tpl" >"${ICEBERG_REST_DIR}/docker-compose.yaml"

sudo docker compose -f "${ICEBERG_REST_DIR}/docker-compose.yaml" down
if [[ "${STOP}" -ne 1 ]]; then
# Start all three REST catalogs (S3, OSS, COS)
Expand Down Expand Up @@ -606,102 +605,102 @@ fi
declare -A pids

if [[ "${RUN_ES}" -eq 1 ]]; then
start_es > start_es.log 2>&1 &
start_es >start_es.log 2>&1 &
pids["es"]=$!
fi

if [[ "${RUN_MYSQL}" -eq 1 ]]; then
start_mysql > start_mysql.log 2>&1 &
start_mysql >start_mysql.log 2>&1 &
pids["mysql"]=$!
fi

if [[ "${RUN_PG}" -eq 1 ]]; then
start_pg > start_pg.log 2>&1 &
start_pg >start_pg.log 2>&1 &
pids["pg"]=$!
fi

if [[ "${RUN_ORACLE}" -eq 1 ]]; then
start_oracle > start_oracle.log 2>&1 &
start_oracle >start_oracle.log 2>&1 &
pids["oracle"]=$!
fi

if [[ "${RUN_DB2}" -eq 1 ]]; then
start_db2 > start_db2.log 2>&1 &
start_db2 >start_db2.log 2>&1 &
pids["db2"]=$!
fi

if [[ "${RUN_OCEANBASE}" -eq 1 ]]; then
start_oceanbase > start_oceanbase.log 2>&1 &
start_oceanbase >start_oceanbase.log 2>&1 &
pids["oceanbase"]=$!
fi

if [[ "${RUN_SQLSERVER}" -eq 1 ]]; then
start_sqlserver > start_sqlserver.log 2>&1 &
start_sqlserver >start_sqlserver.log 2>&1 &
pids["sqlserver"]=$!
fi

if [[ "${RUN_CLICKHOUSE}" -eq 1 ]]; then
start_clickhouse > start_clickhouse.log 2>&1 &
start_clickhouse >start_clickhouse.log 2>&1 &
pids["clickhouse"]=$!
fi

if [[ "${RUN_KAFKA}" -eq 1 ]]; then
start_kafka > start_kafka.log 2>&1 &
start_kafka >start_kafka.log 2>&1 &
pids["kafka"]=$!
fi

if [[ "${RUN_HIVE2}" -eq 1 ]]; then
start_hive2 > start_hive2.log 2>&1 &
start_hive2 >start_hive2.log 2>&1 &
pids["hive2"]=$!
fi

if [[ "${RUN_HIVE3}" -eq 1 ]]; then
start_hive3 > start_hive3.log 2>&1 &
start_hive3 >start_hive3.log 2>&1 &
pids["hive3"]=$!
fi

if [[ "${RUN_ICEBERG}" -eq 1 ]]; then
start_iceberg > start_iceberg.log 2>&1 &
start_iceberg >start_iceberg.log 2>&1 &
pids["iceberg"]=$!
fi

if [[ "${RUN_ICEBERG_REST}" -eq 1 ]]; then
start_iceberg_rest > start_iceberg_rest.log 2>&1 &
start_iceberg_rest >start_iceberg_rest.log 2>&1 &
pids["iceberg-rest"]=$!
fi

if [[ "${RUN_HUDI}" -eq 1 ]]; then
start_hudi > start_hudi.log 2>&1 &
start_hudi >start_hudi.log 2>&1 &
pids["hudi"]=$!
fi

if [[ "${RUN_MARIADB}" -eq 1 ]]; then
start_mariadb > start_mariadb.log 2>&1 &
start_mariadb >start_mariadb.log 2>&1 &
pids["mariadb"]=$!
fi

if [[ "${RUN_LAKESOUL}" -eq 1 ]]; then
start_lakesoul > start_lakesoule.log 2>&1 &
start_lakesoul >start_lakesoule.log 2>&1 &
pids["lakesoul"]=$!
fi

if [[ "${RUN_MINIO}" -eq 1 ]]; then
start_minio > start_minio.log 2>&1 &
start_minio >start_minio.log 2>&1 &
pids["minio"]=$!
fi

if [[ "${RUN_POLARIS}" -eq 1 ]]; then
start_polaris > start_polaris.log 2>&1 &
start_polaris >start_polaris.log 2>&1 &
pids["polaris"]=$!
fi

if [[ "${RUN_KERBEROS}" -eq 1 ]]; then
start_kerberos > start_kerberos.log 2>&1 &
start_kerberos >start_kerberos.log 2>&1 &
pids["kerberos"]=$!
fi

if [[ "${RUN_RANGER}" -eq 1 ]]; then
start_ranger > start_ranger.log 2>&1 &
start_ranger >start_ranger.log 2>&1 &
pids["ranger"]=$!
fi
echo "waiting all dockers starting done"
Expand Down
Loading
Loading