Skip to content

Commit

Permalink
Issue 791 (#825)
Browse files Browse the repository at this point in the history
<!--
1. Title: [#<issue>] <type>(<scope>): <subject>
   Examples:
     - "[#123] feat(operator): support xxx"
     - "[#233] fix: check null before access result in xxx"
     - "[MINOR] refactor: fix typo in variable name"
     - "[MINOR] docs: fix typo in README"
     - "[#255] test: fix flaky test NameOfTheTest"
   Reference: https://www.conventionalcommits.org/en/v1.0.0/
2. If the PR is unfinished, please mark this PR as draft.
-->

### What changes were proposed in this pull request?

(Please outline the changes and how this PR fixes the issue.)

### Why are the changes needed?

(Please clarify why the changes are needed. For instance,
  1. If you propose a new API, clarify the use case for a new API.
  2. If you fix a bug, describe the bug.)

Fix: # (issue)

### Does this PR introduce _any_ user-facing change?

(Please list the user-facing changes introduced by your change,
including
  1. Change in user-facing APIs.
  2. Addition or removal of property keys.)

### How was this patch tested?

(Please test your changes, and provide instructions on how to test it:
1. If you add a feature or fix a bug, add a test to cover your changes.
2. If you fix a flaky test, repeat it for many times to prove it works.)
  • Loading branch information
xunliu authored Nov 26, 2023
1 parent ad0c781 commit 80a0ad6
Show file tree
Hide file tree
Showing 11 changed files with 235 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
required: true
default: 'gravitino-ci-hive'
options:
- 'gravitino'
- 'gravitino-ci-hive'
- 'gravitino-ci-trino'
tag:
Expand Down Expand Up @@ -36,6 +37,9 @@ jobs:
elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-trino" ]; then
echo "image_type=trino" >> $GITHUB_ENV
echo "image_name=datastrato/gravitino-ci-trino" >> $GITHUB_ENV
elif [ "${{ github.event.inputs.image }}" == "gravitino" ]; then
echo "image_type=gravitino" >> $GITHUB_ENV
echo "image_name=datastrato/gravitino" >> $GITHUB_ENV
fi
- uses: actions/checkout@v3
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ out/**
distribution
server/src/main/resources/project.properties

dev/docker/hive/packages
dev/docker/*/packages
dev/docker/gravitino/package
docs/build

dev/docker/tools/docker-connector
Expand Down
4 changes: 3 additions & 1 deletion dev/docker/build-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ script_dir="$(dirname "${BASH_SOURCE-$0}")"
script_dir="$(cd "${script_dir}">/dev/null; pwd)"

# Build docker image for multi-arch
USAGE="-e Usage: ./build-docker.sh --platform [all|linux/amd64|linux/arm64] --type [hive|trino] --image {image_name} --tag {tag_name} --latest"
USAGE="-e Usage: ./build-docker.sh --platform [all|linux/amd64|linux/arm64] --type [hive|trino|gravitino] --image {image_name} --tag {tag_name} --latest"

# Get platform type
if [[ "$1" == "--platform" ]]; then
Expand Down Expand Up @@ -67,6 +67,8 @@ if [[ "${component_type}" == "hive" ]]; then
build_args="--build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME}"
elif [ "${component_type}" == "trino" ]; then
true # Placeholder, do nothing
elif [ "${component_type}" == "gravitino" ]; then
. ${script_dir}/gravitino/gravitino-dependency.sh
else
echo "ERROR : ${component_type} is not a valid component type"
echo ${USAGE}
Expand Down
14 changes: 14 additions & 0 deletions dev/docker/gravitino/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#
FROM openjdk:8-jdk-buster
LABEL maintainer="support@datastrato.com"

WORKDIR /root/gravitino

COPY /packages/gravitino /root/gravitino

EXPOSE 8090

ENTRYPOINT ["/bin/bash", "/root/gravitino/bin/gravitino.sh", "start"]
23 changes: 23 additions & 0 deletions dev/docker/gravitino/gravitino-dependency.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#
set -ex
gravitino_dir="$(dirname "${BASH_SOURCE-$0}")"
gravitino_dir="$(cd "${gravitino_dir}">/dev/null; pwd)"
gravitino_home="$(cd "${gravitino_dir}/../../..">/dev/null; pwd)"

# Prepare compile Gravitino packages
if [[ ! -d "${gravitino_home}/distribution/package/" ]]; then
. "${gravitino_home}/gradlew compileDistribution -x test"
fi
rm -rf "${gravitino_dir}/packages/gravitino"
cp -r "${gravitino_home}/distribution/package" "${gravitino_dir}/packages/gravitino"

# Let gravitino.sh can not quit
cat <<EOF >> "${gravitino_dir}/packages/gravitino/bin/gravitino.sh"
# persist the container
tail -f /dev/null
EOF
4 changes: 4 additions & 0 deletions dev/docker/hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ RUN chown -R datastrato:hadoop /home/datastrato
# removed install packages
RUN rm -rf /tmp/packages

################################################################################
HEALTHCHECK --interval=30s --timeout=30s --retries=10 \
CMD /tmp/check-status.sh || exit 1

################################################################################
# expose port
EXPOSE 22 3306 8088 9000 9083 10000 10002 50070 50075 50010
Expand Down
31 changes: 31 additions & 0 deletions dev/docker/playground/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<!--
Copyright 2023 Datastrato.
This software is licensed under the Apache License version 2.
-->
# Playground
This is a complete Gravitino runtime environment with `Hive`, `Hdfs`, `Trino`, and `Gravitno` Server. just execute the `./luanch-playgraound.sh` script.
It will automatically start the `gravitino-ci-hive`, `gravitino-ci-trino`, and `Gravitino` Docker containers on the local host.
Depending on your network, the startup may take 3-5 minutes.
Once the playground environment has started, you can open http://localhost:8090 to access the Gravitino Web UI.
And use a Trino client (such as Datagrip) to test Gravitino by connecting to the Trino Docker continer via `jdbc:trino://127.0.0.1:8080`.
You test in Trino using the following SQL

```shell
CREATE SCHEMA "playground_metalake.playground_hive".db1
WITH (location = 'hdfs://hive:9000/user/hive/warehouse/db1.db');

show create schema "playground_metalake.playground_hive".db1;

create table "playground_metalake.playground_hive".db1.table_001
(
name varchar,
salary varchar
)
WITH (
format = 'TEXTFILE'
);

insert into "playground_metalake.playground_hive".db1.table_001 (name, salary) values ('sam', '11');

select * from "playground_metalake.playground_hive".db1.table_001;
```
74 changes: 74 additions & 0 deletions dev/docker/playground/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
version: '3.3'
services:
hive:
image: datastrato/gravitino-ci-hive:0.1.6
ports:
- "3306:3306"
- "8088:8088"
- "9000:9000"
- "9083:9083"
- "10000:10000"
- "10002:10002"
- "50070:50070"
- "50010:50010"
container_name: playground-hive
environment:
- HADOOP_USER_NAME=root
healthcheck:
test: ["CMD", "/tmp/check-status.sh"]
interval: 10s
timeout: 30s
retries: 10
deploy:
resources:
limits:
cpus: "1"
memory: 1G

gravitino:
image: datastrato/gravitino:0.3.0-SNAPSHORT
ports:
- "8090:8090"
container_name: playground-gravition
depends_on:
hive :
condition: service_healthy
volumes:
- ./healthcheck:/tmp/healthcheck
deploy:
resources:
limits:
cpus: "0.5"
memory: 500M
healthcheck:
test: ["CMD", "/tmp/healthcheck/gravitino-healthcheck.sh"]
interval: 10s
timeout: 30s
retries: 10

trino:
image: datastrato/gravitino-ci-trino:0.1.0
ports:
- "8080:8080"
container_name: playground-trino
environment:
- HADOOP_USER_NAME=root
volumes:
- ./packages/trino/conf:/etc/trino:ro
- ./packages/gravitino-trino-connector:/usr/lib/trino/plugin/gravitino:ro
- ./healthcheck:/tmp/healthcheck
depends_on:
hive :
condition: service_healthy
gravitino :
condition: service_healthy
deploy:
resources:
limits:
cpus: "1"
memory: 1G
# healthcheck:
# test: ["CMD", "/tmp/healthcheck/trino-healthcheck.sh"]
# interval: 30s
# timeout: 30s
# retries: 5
28 changes: 28 additions & 0 deletions dev/docker/playground/healthcheck/gravitino-healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#
set -ex

curl -X POST -H "Content-Type: application/json" -d '{"name":"playground_metalake","comment":"comment","properties":{}}' http://127.0.0.1:8090/api/metalakes

curl -X POST -H "Content-Type: application/json" -d '{"name":"playground_hive","type":"RELATIONAL", "provider":"hive", "comment":"comment","properties":{"metastore.uris":"thrift://hive:9083"}}' http://127.0.0.1:8090/api/metalakes/playground_metalake/catalogs

response=$(curl -X GET -H "Content-Type: application/json" http://127.0.0.1:8090/api/metalakes)
if echo "$response" | grep -q "playground_metalake"; then
echo "Matalake playground_metalake successfully created"
else
echo "Matalake playground_metalake create failed"
exit 1
fi

response=$(curl -X GET -H "Content-Type: application/json" http://127.0.0.1:8090/api/metalakes/playground_metalake/catalogs)
if echo "$response" | grep -q "playground_hive"; then
echo "Catalog playground_hive successfully created"
else
echo "Catalog playground_hive create failed"
exit 1
fi

exit 0
19 changes: 19 additions & 0 deletions dev/docker/playground/healthcheck/trino-healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#
set -ex

SHOW CATALOGS LIKE '%s.%s'


response=$(trino --execute "SHOW CATALOGS LIKE '%s.%s'")
if echo "$response" | grep -q "playground_metalake.playground_hive"; then
echo "Catalog playground_hive successfully created"
else
echo "Catalog playground_hive create failed"
exit 1
fi

exit 0
33 changes: 33 additions & 0 deletions dev/docker/playground/launch-playground.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#
set -ex

playground_dir="$(dirname "${BASH_SOURCE-$0}")"
playground_dir="$(cd "${playground_dir}">/dev/null; pwd)"
gravitino_home="$(cd "${playground_dir}/../../..">/dev/null; pwd)"

if [[ ! -d "${playground_dir}/packages/trino" ]]; then
mkdir -p "${playground_dir}/packages/trino"
fi

if [[ ! -d "${gravitino_home}/distribution/gravitino-trino-connector" ]]; then
. "${gravitino_home}/gradlew assembleTrinoConnector -x test"
fi
rm -rf "${playground_dir}/packages/gravitino-trino-connector"
cp -r "${gravitino_home}/distribution/gravitino-trino-connector" "${playground_dir}/packages/gravitino-trino-connector"

rm -rf "${playground_dir}/packages/trino/conf"
cp -r -p "${gravitino_home}/dev/docker/trino/conf" "${playground_dir}/packages/trino/conf"

# gravitino.uri = http://GRAVITINO_HOST_IP:GRAVITINO_HOST_PORT
# gravitino.metalake = GRAVITINO_METALAKE_NAME
sed 's/GRAVITINO_HOST_IP:GRAVITINO_HOST_PORT/gravitino:8090/g' "${playground_dir}/packages/trino/conf/catalog/gravitino.properties.template" > "${playground_dir}/packages/trino/conf/catalog/gravitino.properties"
sed -i '' 's/GRAVITINO_METALAKE_NAME/playground_metalake/g' "${playground_dir}/packages/trino/conf/catalog/gravitino.properties"

# hive.metastore.uri = thrift://HIVE_HOST_IP:9083
sed 's/HIVE_HOST_IP/hive/g' "${playground_dir}/packages/trino/conf/catalog/hive.properties.template" > "${playground_dir}/packages/trino/conf/catalog/hive.properties"

docker-compose up

0 comments on commit 80a0ad6

Please sign in to comment.