Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
xudong963 committed Aug 12, 2024
1 parent 578da08 commit 1f71cb9
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ impl CollectStatisticsOptimizer {
.await?;

let sample_filter = scan.sample_filter(&table_stats)?;
dbg!(&sample_filter);

let mut column_stats = HashMap::new();
let mut histograms = HashMap::new();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../../../shell_env.sh

echo "drop table if exists sample" | $BENDSQL_CLIENT_CONNECT
echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT
echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT

## Create table
cat <<EOF | $BENDSQL_CLIENT_CONNECT
CREATE TABLE sample
CREATE TABLE sample_table
(
Id INT NOT NULL,
City VARCHAR NOT NULL,
Expand All @@ -24,27 +24,27 @@ echo "CREATE STAGE s1 FILE_FORMAT = (TYPE = CSV)" | $BENDSQL_CLIENT_CONNECT
echo "list @s1" | $BENDSQL_CLIENT_CONNECT | awk '{print $1}'

## Insert with stage use http API
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample (Id, City, Score) values", "stage_attachment": {"location": "@s1/sample.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .stats.write_progress.bytes, .error'
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample_table (Id, City, Score) values", "stage_attachment": {"location": "@s1/sample.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .stats.write_progress.bytes, .error'

## list stage has metacache, so we just we aws client to ensure the data are purged
aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 ls s3://testbucket/admin/stage/internal/s1/sample.csv

echo "select * from sample" | $BENDSQL_CLIENT_CONNECT
echo "select * from sample_table" | $BENDSQL_CLIENT_CONNECT


# use placeholder (?, ?, ?)
echo "truncate table sample" | $BENDSQL_CLIENT_CONNECT
echo "truncate table sample_table" | $BENDSQL_CLIENT_CONNECT
aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample.csv s3://testbucket/admin/stage/internal/s1/sample1.csv >/dev/null
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample (Id, City, Score) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error'
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample_table (Id, City, Score) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error'
echo "select * from sample" | $BENDSQL_CLIENT_CONNECT

# use placeholder (?, ?, 1+1)
echo "truncate table sample" | $BENDSQL_CLIENT_CONNECT
aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_2_columns.csv s3://testbucket/admin/stage/internal/s1/sample2.csv >/dev/null

curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample (Id, City, Score) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error'
echo "select * from sample" | $BENDSQL_CLIENT_CONNECT
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample_table (Id, City, Score) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error'
echo "select * from sample_table" | $BENDSQL_CLIENT_CONNECT
#
### Drop table.
echo "drop table sample" | $BENDSQL_CLIENT_CONNECT
echo "drop table sample_table" | $BENDSQL_CLIENT_CONNECT
echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../../../shell_env.sh

echo "drop table if exists sample" | $BENDSQL_CLIENT_CONNECT
echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT
echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT

## Create table
cat <<EOF | $BENDSQL_CLIENT_CONNECT
CREATE TABLE sample
CREATE TABLE sample_table
(
Id INT NOT NULL,
City VARCHAR NOT NULL,
Expand All @@ -24,34 +24,34 @@ echo "CREATE STAGE s1 FILE_FORMAT = (TYPE = CSV)" | $BENDSQL_CLIENT_CONNECT
echo "list @s1" | $BENDSQL_CLIENT_CONNECT | awk '{print $1}'

## Insert with stage use http API
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) VALUES", "stage_attachment": {"location": "@s1/sample.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .stats.write_progress.bytes, .error'
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) VALUES", "stage_attachment": {"location": "@s1/sample.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .stats.write_progress.bytes, .error'

## list stage has metacache, so we just we aws client to ensure the data are purged
aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 ls s3://testbucket/admin/stage/internal/s1/sample.csv

echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT
echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT


# use placeholder (?, ?, ?)
aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample.csv s3://testbucket/admin/stage/internal/s1/sample1.csv >/dev/null
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error'
echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error'
echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT

# use placeholder (?, ?, 1+1)
aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_2_columns.csv s3://testbucket/admin/stage/internal/s1/sample2.csv >/dev/null

curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error'
echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error'
echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT

aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_3_replace.csv s3://testbucket/admin/stage/internal/s1/sample3.csv >/dev/null
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample3.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error'
echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample3.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error'
echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT

# duplicate value would show error and would not take effect
aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_3_duplicate.csv s3://testbucket/admin/stage/internal/s1/sample4.csv >/dev/null
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample4.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.error'
echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample4.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.error'
echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT

### Drop table.
echo "drop table sample" | $BENDSQL_CLIENT_CONNECT
echo "drop table sample_table" | $BENDSQL_CLIENT_CONNECT
echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../../../shell_env.sh

echo "drop table if exists sample" | $BENDSQL_CLIENT_CONNECT
echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT
echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT

## Create table
cat <<EOF | $BENDSQL_CLIENT_CONNECT
CREATE TABLE sample
CREATE TABLE sample_table
(
Id INT NOT NULL,
City VARCHAR NOT NULL,
Expand All @@ -28,7 +28,7 @@ curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" \
--header 'Content-Type: application/json' \
--header 'X-DATABEND-DEDUPLICATE-LABEL: insert1' \
-d '{
"sql": "insert into sample (Id, City, Score) values",
"sql": "insert into sample_table (Id, City, Score) values",
"stage_attachment": {
"location": "@s1/sample.csv"
},
Expand All @@ -37,14 +37,14 @@ curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" \
}
}' | jq -r '.stats.scan_progress.bytes, .stats.write_progress.bytes, .error'

echo "select * from sample" | $BENDSQL_CLIENT_CONNECT
echo "select * from sample_table" | $BENDSQL_CLIENT_CONNECT

## Insert again with the same deduplicate_label will have no effect
curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" \
--header 'Content-Type: application/json' \
--header 'X-DATABEND-DEDUPLICATE-LABEL: insert1' \
-d '{
"sql": "insert into sample (Id, City, Score) values",
"sql": "insert into sample_table (Id, City, Score) values",
"stage_attachment": {
"location": "@s1/sample.csv"
},
Expand All @@ -54,8 +54,8 @@ curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" \
}' | jq -r '.stats.scan_progress.bytes, .stats.write_progress.bytes, .error'


echo "select * from sample" | $BENDSQL_CLIENT_CONNECT
echo "select * from sample_table" | $BENDSQL_CLIENT_CONNECT

### Drop table.
echo "drop table sample" | $BENDSQL_CLIENT_CONNECT
echo "drop table sample_table" | $BENDSQL_CLIENT_CONNECT
echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../../../shell_env.sh

echo "drop table if exists sample" | $BENDSQL_CLIENT_CONNECT
echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT

## Create table
cat <<EOF | $BENDSQL_CLIENT_CONNECT
CREATE TABLE sample
CREATE TABLE sample_table
(
Id INT,
City2 VARCHAR AS (reverse(City)) STORED,
Expand All @@ -18,15 +18,15 @@ CREATE TABLE sample
EOF

copy_from_test_csv=(
"copy into sample from 'fs://${TESTS_DATA_DIR}/csv/sample.csv' FILE_FORMAT = (field_delimiter = ',' record_delimiter = '\n' type = CSV) ON_ERROR = ABORT"
"copy into sample_table from 'fs://${TESTS_DATA_DIR}/csv/sample.csv' FILE_FORMAT = (field_delimiter = ',' record_delimiter = '\n' type = CSV) ON_ERROR = ABORT"
)

echo "---test csv field with computed columns"
for i in "${copy_from_test_csv[@]}"; do
echo "$i" | $BENDSQL_CLIENT_CONNECT
echo "select * from sample" | $BENDSQL_CLIENT_CONNECT
echo "truncate table sample" | $BENDSQL_CLIENT_CONNECT
echo "select * from sample_table" | $BENDSQL_CLIENT_CONNECT
echo "truncate table sample_table" | $BENDSQL_CLIENT_CONNECT
done

## Drop table
echo "drop table if exists sample;" | $BENDSQL_CLIENT_CONNECT
echo "drop table if exists sample_table;" | $BENDSQL_CLIENT_CONNECT

0 comments on commit 1f71cb9

Please sign in to comment.