Skip to content
This repository has been archived by the owner on Jan 22, 2025. It is now read-only.

Commit

Permalink
different stopping condition
Browse files Browse the repository at this point in the history
  • Loading branch information
AshwinSekar committed Nov 4, 2021
1 parent 00b23d2 commit 06113f5
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 16 deletions.
2 changes: 1 addition & 1 deletion system-test/automation_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ function get_validator_confirmation_time {
curl -G "${INFLUX_HOST}/query?u=ro&p=topsecret" \
--data-urlencode "db=${TESTNET_TAG}" \
--data-urlencode "q=$q_mean_confirmation" |
python3 "${REPO_ROOT}"/system-test/testnet-automation-json-parser.py |
python3 "${REPO_ROOT}"/system-test/testnet-automation-json-parser.py --empty_error |
cut -d' ' -f2)
}

Expand Down
8 changes: 4 additions & 4 deletions system-test/partition-testcases/gce-partition-recovery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ steps:
- command: "system-test/testnet-automation.sh"
label: "Partition recovery on GCE"
env:
UPLOAD_RESULTS_TO_SLACK: "false"
UPLOAD_RESULTS_TO_SLACK: "true"
CLOUD_PROVIDER: "gce"
ENABLE_GPU: "false"
NUMBER_OF_VALIDATOR_NODES: 4
NUMBER_OF_VALIDATOR_NODES: 9
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16"
NUMBER_OF_CLIENT_NODES: 1
ADDITIONAL_FLAGS: "--dedicated"
Expand All @@ -15,8 +15,8 @@ steps:
TEST_TYPE: "script"
WARMUP_SLOTS_BEFORE_TEST: 400
PRE_PARTITION_DURATION: 120
PARTITION_DURATION: 600
PARTITION_INCREMENT: 120
PARTITION_DURATION: 360
PARTITION_INCREMENT: 60
NETEM_CONFIG_FILE: "system-test/netem-configs/complete-loss-two-partitions"
CUSTOM_SCRIPT: "system-test/partition-testcases/measure-partition-recovery.sh"
agents:
Expand Down
28 changes: 20 additions & 8 deletions system-test/partition-testcases/measure-partition-recovery.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,28 +42,40 @@ target=$mean_confirmation_ms

while true; do
execution_step "Applying partition config $NETEM_CONFIG_FILE for $PARTITION_DURATION seconds"
echo "Partitioning for $PARTITION_DURATION seconds" >> "$RESULT_FILE"
"${REPO_ROOT}"/net/net.sh netem --config-file "$NETEM_CONFIG_FILE" -n $num_online_nodes
sleep "$PARTITION_DURATION"

execution_step "Resolving partition"
"${REPO_ROOT}"/net/net.sh netem --config-file "$NETEM_CONFIG_FILE" --netem-cmd cleanup -n $num_online_nodes

get_validator_confirmation_time 10
time=0
echo "Validator confirmation is $mean_confirmation_ms ms immediately after resolving the partition"
SECONDS=0

while [[ $mean_confirmation_ms == "expected" || $mean_confirmation_ms -gt $target ]]; do
sleep 1
time=$(( time + 1 ))
# This happens when we haven't confirmed anything recently so the query returns an empty string
while [[ -z $mean_confirmation_ms ]]; do
sleep 5
get_validator_confirmation_time 10
if [[ $SECONDS -gt $PARTITION_DURATION ]]; then
echo " No confirmations seen after $SECONDS seconds" >> "$RESULT_FILE"
exit 0
fi
done
echo " Validator confirmation is $mean_confirmation_ms ms $SECONDS seconds after resolving the partition" >> "$RESULT_FILE"

last=""
while [[ -z $mean_confirmation_ms || $mean_confirmation_ms -gt $target ]]; do
sleep 5

if [[ $time -gt $PARTITION_DURATION ]]; then
echo "Partition Duration: $PARTITION_DURATION: Unable to make progress after $time seconds. Confirmation time did not fall below pre partition confirmation time" >> "$RESULT_FILE"
if [[ -n $mean_confirmation_ms && -n $last && $mean_confirmation_ms -gt $(echo "$last * 1.2" | bc) || $SECONDS -gt $PARTITION_DURATION ]]; then
echo " Unable to make progress after $SECONDS seconds. Last confirmation time was $mean_confirmation_ms ms" >> "$RESULT_FILE"
exit 0
fi
last=$mean_confirmation_ms
get_validator_confirmation_time 10
done

echo "Partition Duration: $PARTITION: $time seconds for validator confirmation to fall to $mean_confirmation_ms ms" >> "$RESULT_FILE"
echo " Recovered in $SECONDS seconds: validator confirmation to fall to $mean_confirmation_ms ms" >> "$RESULT_FILE"

PARTITION_DURATION=$(( PARTITION_DURATION + PARTITION_INCREMENT ))
done
10 changes: 7 additions & 3 deletions system-test/testnet-automation-json-parser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
#!/usr/bin/env python3
import sys, json
import sys, json, argparse

parser = argparse.ArgumentParser()
parser.add_argument("--empty_error", action="store_true", help="If present, do not print error message")
args = parser.parse_args()

data=json.load(sys.stdin)

if 'results' in data:
for result in data['results']:
if 'series' in result:
print(result['series'][0]['columns'][1] + ': ' + str(result['series'][0]['values'][0][1]))
else:
elif not args.empty_error:
print("An expected result from CURL request is missing")
else:
elif not args.empty_error:
print("No results returned from CURL request")

0 comments on commit 06113f5

Please sign in to comment.