diff --git a/.gitignore b/.gitignore index 775ba55c13..588206a441 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ target .idea/ .vscode .dir-locals.el +integration_tests/dist_query/dist-query-testing +integration_tests/dist_query/tsbs +integration_tests/dist_query/output diff --git a/integration_tests/Makefile b/integration_tests/Makefile index ebb0d69793..3c9b96974b 100644 --- a/integration_tests/Makefile +++ b/integration_tests/Makefile @@ -8,14 +8,14 @@ CERESMETA_DATA_DIR = /tmp/ceresmeta export CERESDB_TEST_CASE_PATH ?= $(ROOT)/cases/env export CERESDB_TEST_BINARY ?= $(ROOT)/../target/$(MODE)/ceresdb-test -# environment variables for standalone +# Environment variables for standalone export CERESDB_SERVER_GRPC_ENDPOINT ?= 127.0.0.1:8831 export CERESDB_SERVER_HTTP_ENDPOINT ?= 127.0.0.1:5440 export CERESDB_BINARY_PATH ?= $(ROOT)/../target/$(MODE)/ceresdb-server export CERESDB_STDOUT_FILE ?= /tmp/ceresdb-stdout.log export CERESDB_CONFIG_FILE ?= $(ROOT)/../docs/minimal.toml -# environment variables for cluster +# Environment variables for cluster export CERESMETA_BINARY_PATH ?= $(ROOT)/ceresmeta/ceresmeta export CERESMETA_CONFIG_PATH ?= $(ROOT)/config/ceresmeta.toml export CERESMETA_STDOUT_FILE ?= /tmp/ceresmeta-stdout.log @@ -28,6 +28,10 @@ export RUST_BACKTRACE=1 # Whether update related repos export UPDATE_REPOS_TO_LATEST ?= true +# Used in dist query test, we don't want to rebuild the binarie and data in sometimes(e.g. debugging), +# and we can set it to true. +export DIST_QUERY_TEST_NO_INIT ?= false + clean: rm -rf $(CERESDB_DATA_DIR) $(CERESDB_DATA_DIR_0) $(CERESDB_DATA_DIR_1) $(CERESMETA_DATA_DIR) @@ -84,4 +88,4 @@ run-recovery: clean build-ceresdb kill-old-process run-dist-query: prepare build-meta CERESDB_INTEGRATION_TEST_BIN_RUN_MODE=build_cluster $(CERESDB_TEST_BINARY) - cd dist_query && sh ./run.sh + cd dist_query && ./run.sh diff --git a/integration_tests/dist_query/diff.py b/integration_tests/dist_query/diff.py new file mode 100644 index 0000000000..3cd9e32b05 --- /dev/null +++ b/integration_tests/dist_query/diff.py @@ -0,0 +1,48 @@ +import argparse +import difflib + +def get_args(): + parser = argparse.ArgumentParser(description='cmd args') + parser.add_argument('--expected', '-e', type=str, help='expected queries result file') + parser.add_argument('--actual', '-a', type=str, help='actual queries result file') + args = vars(parser.parse_args()) + return args + +def main(): + args = get_args() + + # Load queries results. + f_expected_path = args['expected'] + f_actual_path = args['actual'] + + f_expected = open(f_expected_path, "r") + expecteds = f_expected.readlines() + + f_actual = open(f_actual_path, "r") + actuals = f_actual.readlines() + + # Diff them. + diffs = difflib.context_diff(expecteds, actuals) + diff_num = 0 + for diff in diffs: + diff_num += 1 + print(diff) + print(diff_num) + + f_expected.close() + f_actual.close() + + # If diff exists, write the actual to expected, we can use `git diff` to inspect the detail diffs. + if diff_num != 0: + f = open(f_expected_path, "w") + f.writelines(actuals) + f.close() + # Test failed, just panic + print("Test failed...") + assert(False) + + # Haha, test pass! + print("Test passed...") + +if __name__ == '__main__': + main() diff --git a/integration_tests/dist_query/run.sh b/integration_tests/dist_query/run.sh index 766bdab17a..e835b5adbb 100755 --- a/integration_tests/dist_query/run.sh +++ b/integration_tests/dist_query/run.sh @@ -1,92 +1,83 @@ #!/usr/bin/env bash -# This bash supports these settings by enviroment variables: -# - RESULT_FILE -# - DATA_FILE -# - LOG_DIR -# - CERESDB_CONFIG_FILE -# - CERESDB_ADDR -# - WRITE_WORKER_NUM -# - WRITE_BATCH_SIZE - +# Get current dir export CURR_DIR=$(pwd) -export DEFAULT_RESULT_FILE=${CURR_DIR}/output/result.md + +# Related components +TSBS_REPO_PATH=${CURR_DIR}/tsbs +DATA_REPO_PATH=${CURR_DIR}/dist-query-testing +# Case contexts +CASE_DIR=tsbs-cpu-only +CASE_DATASOURCE=data.out +CASE_QUERY=single-groupby-5-8-1-queries.gz +CASE_QUERY_RESULT=queries.result + +# Test params export RESULT_FILE=${RESULT_FILE:-${DEFAULT_RESULT_FILE}} -export CERESDB_CONFIG_FILE=${CERESDB_CONFIG_FILE:-docs/minimal.toml} -export LOG_DIR=${LOG_DIR:-${CURR_DIR}/output} +export OUTPUT_DIR=${OUTPUT_DIR:-${CURR_DIR}/output} export CERESDB_ADDR=${CERESDB_ADDR:-127.0.0.1:8831} export CERESDB_HTTP_ADDR=${CERESDB_HTTP_ADDR:-127.0.0.1:5440} -export CERESDB_PID_FILE=${CURR_DIR}/ceresdb-server.pid export WRITE_WORKER_NUM=${WRITE_WORKER_NUM:-36} export WRITE_BATCH_SIZE=${WRITE_BATCH_SIZE:-500} -# Where generated data stored -export DATA_FILE=${DATA_FILE:-${CURR_DIR}/dist-query-testing/tsbs-cpu-only/data.out} -# How many values in host tag +## Where generated data stored +export DATA_FILE=${DATA_FILE:-${CURR_DIR}/dist-query-testing/${CASE_DIR}/${CASE_DATASOURCE}} +## How many values in host tag export HOST_NUM=${HOST_NUM:-10000} - -# Used for `generate_queries.sh` start. -export TS_START="2022-09-05T00:00:00Z" -export TS_END="2022-09-05T01:00:01Z" -export EXE_FILE_NAME=${CURR_DIR}/tsbs/tsbs_generate_queries -# Where generated queries stored -export BULK_DATA_DIR=${CURR_DIR}/dist-query-testing/tsbs-cpu-only -export FORMATS=ceresdb -# Used for `generate_queries.sh` end. +export BULK_DATA_DIR=${CURR_DIR}/dist-query-testing/${CASE_DIR} +## Used for `generate_queries.sh` end. export QUERY_TYPES="\ single-groupby-1-1-1 \ single-groupby-1-8-1 \ single-groupby-5-1-1 \ single-groupby-5-8-1" -export QUERIES=20 -# Where query results stored -export QUERY_RESULTS_FILE=${CURR_DIR}/output/resp.txt -TSBS_REPO_PATH=${CURR_DIR}/tsbs -DATA_REPO_PATH=${CURR_DIR}/dist-query-testing +## Where query results stored +export QUERY_RESULTS_FILE=${CURR_DIR}/output/queries.reuslt.tmp +export QUERY_EXPECTED_RESULTS_FILE=${QUERY_EXPECTED_RESULTS_FILE:-${CURR_DIR}/dist-query-testing/${CASE_DIR}/${CASE_QUERY_RESULT}} set -x -# Init -trap cleanup EXIT -cleanup() { - ls -lha ${LOG_DIR} - ls -lha ${CURR_DIR}/tsbs - ls -lha ${BULK_DATA_DIR} - curl -XPOST "${CERESDB_HTTP_ADDR}/sql" -d 'DROP TABLE `cpu`' -} - -mkdir -p ${LOG_DIR} +mkdir -p ${OUTPUT_DIR} # Prepare components -## tsbs -if [[ -d ${TSBS_REPO_PATH} && $UPDATE_REPOS_TO_LATEST == 'true' ]]; then +## Tsbs +if [[ -d ${TSBS_REPO_PATH} ]] && [[ ${UPDATE_REPOS_TO_LATEST} == 'true' ]] && [[ ${DIST_QUERY_TEST_NO_INIT} == 'false' ]]; then echo "Remove old tsbs..." rm -rf ${TSBS_REPO_PATH} fi -if [[ ! -d ${TSBS_REPO_PATH} ]]; then +if [[ ! -d ${TSBS_REPO_PATH} ]] && [[ ${DIST_QUERY_TEST_NO_INIT} == 'false' ]]; then echo "Pull tsbs repo..." git clone -b support-partitioned-table --depth 1 --single-branch https://github.com/Rachelint/tsbs.git fi -## data -if [[ -d ${DATA_REPO_PATH} && $UPDATE_REPOS_TO_LATEST == 'true' ]]; then +## Data +if [[ -d ${DATA_REPO_PATH} ]] && [[ $UPDATE_REPOS_TO_LATEST == 'true' ]] && [[ ${DIST_QUERY_TEST_NO_INIT} == 'false' ]]; then echo "Remove old dist query testing..." rm -rf ${DATA_REPO_PATH} fi -if [[ ! -d ${DATA_REPO_PATH} ]]; then - echo "Pull old dist query testing repo..." +echo ${DATA_REPO_PATH} +if [[ ! -d ${DATA_REPO_PATH} ]] && [[ ${DIST_QUERY_TEST_NO_INIT} == 'false' ]]; then + echo "Pull dist query testing repo..." git clone -b main --depth 1 --single-branch https://github.com/CeresDB/dist-query-testing.git fi -## build tsbs bins -cd tsbs -go build ./cmd/tsbs_generate_data -go build ./cmd/tsbs_load_ceresdb -go build ./cmd/tsbs_generate_queries -go build ./cmd/tsbs_run_queries_ceresdb +## Build tsbs bins +if [[ ${DIST_QUERY_TEST_NO_INIT} == 'false' ]]; then + cd tsbs + go build ./cmd/tsbs_generate_data + go build ./cmd/tsbs_load_ceresdb + go build ./cmd/tsbs_generate_queries + go build ./cmd/tsbs_run_queries_ceresdb +fi + +# Clean old table if exist +curl -XPOST "${CERESDB_HTTP_ADDR}/sql" -d 'DROP TABLE IF EXISTS `cpu`' # Write data to ceresdb -${CURR_DIR}/tsbs/tsbs_load_ceresdb --ceresdb-addr=${CERESDB_ADDR} --file ${DATA_FILE} --batch-size ${WRITE_BATCH_SIZE} --workers ${WRITE_WORKER_NUM} --access-mode proxy --partition-keys hostname | tee ${LOG_DIR}/write.log +${CURR_DIR}/tsbs/tsbs_load_ceresdb --ceresdb-addr=${CERESDB_ADDR} --file ${DATA_FILE} --batch-size ${WRITE_BATCH_SIZE} --workers ${WRITE_WORKER_NUM} --access-mode proxy --partition-keys hostname --update-mode APPEND | tee ${OUTPUT_DIR}/${CASE_DIR}-${CASE_DATASOURCE}.log # Run queries against ceresdb # TODO: support more kinds of queries besides 5-8-1. -cat ${BULK_DATA_DIR}/single-groupby-5-8-1-queries.gz | gunzip | ${CURR_DIR}/tsbs/tsbs_run_queries_ceresdb --ceresdb-addr=${CERESDB_ADDR} --print-responses true --access-mode proxy --responses-file ${QUERY_RESULTS_FILE} | tee ${LOG_DIR}/5-8-1.log +cat ${BULK_DATA_DIR}/${CASE_QUERY} | gunzip | ${CURR_DIR}/tsbs/tsbs_run_queries_ceresdb --ceresdb-addr=${CERESDB_ADDR} --print-responses true --access-mode proxy --responses-file ${QUERY_RESULTS_FILE} | tee ${OUTPUT_DIR}/${CASE_DIR}-${CASE_QUERY}.log + +# Diff the results +python3 ${CURR_DIR}/diff.py --expected ${QUERY_EXPECTED_RESULTS_FILE} --actual ${QUERY_RESULTS_FILE}