Merge pull request #2091 from zerotier/dev-validator-fix-dump-at-abort

Separate test and summary step in validator script
This commit is contained in:
Joseph Henry 2023-08-10 15:38:27 -07:00 committed by GitHub
commit 27a9cc6254
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 83 additions and 96 deletions

View File

@ -3,26 +3,35 @@
# This test script joins Earth and pokes some stuff
TEST_NETWORK=8056c2e21c000001
RUN_LENGTH=60
RUN_LENGTH=20
TEST_FINISHED=false
ZTO_VER=$(git describe --tags $(git rev-list --tags --max-count=1))
ZTO_COMMIT=$(git rev-parse HEAD)
ZTO_COMMIT_SHORT=$(git rev-parse --short HEAD)
TEST_DIR_PREFIX="$ZTO_VER-$ZTO_COMMIT_SHORT-test-results"
EXIT_TEST_FAILED=0
TEST_OK=0
TEST_FAIL=1
echo "Performing test on: $ZTO_VER-$ZTO_COMMIT_SHORT"
TEST_FILEPATH_PREFIX="$TEST_DIR_PREFIX/$ZTO_COMMIT_SHORT"
mkdir $TEST_DIR_PREFIX
# How long we will wait for ZT to come online before considering it a failure
MAX_WAIT_SECS=60
################################################################################
# Multi-node connectivity and performance test #
################################################################################
main() {
echo -e "\nRunning test for $RUN_LENGTH seconds"
test() {
echo -e "\nPerforming pre-flight checks"
check_exit_on_invalid_identity
echo -e "\nRunning test for $RUN_LENGTH seconds"
NS1="ip netns exec ns1"
NS2="ip netns exec ns2"
@ -75,11 +84,11 @@ main() {
# Memory Leak Check #
################################################################################
FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
export FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
echo -e "\nStarting a ZeroTier instance in each namespace..."
time_test_start=$(date +%s)
export time_test_start=$(date +%s)
# Spam the CLI as ZeroTier is starting
spam_cli 100
@ -127,7 +136,6 @@ main() {
################################################################################
echo "Waiting for ZeroTier to come online before attempting test..."
MAX_WAIT_SECS="${MAX_WAIT_SECS:-120}"
node1_online=false
node2_online=false
both_instances_online=false
@ -139,13 +147,13 @@ main() {
node2_online="$($ZT2 -j info | jq '.online' 2>/dev/null)"
echo "Checking for online status: try #$s, node1:$node1_online, node2:$node2_online"
if [[ "$node1_online" == "true" ]]; then
time_zt_node1_online=$(date +%s)
export time_zt_node1_online=$(date +%s)
fi
if [[ "$node2_online" == "true" ]]; then
time_zt_node2_online=$(date +%s)
export time_zt_node2_online=$(date +%s)
fi
if [[ "$node2_online" == "true" && "$node1_online" == "true" ]]; then
both_instances_online=true
export both_instances_online=true
break
fi
sleep 1
@ -172,10 +180,7 @@ main() {
$ZT2 status
if [[ "$both_instances_online" != "true" ]]; then
echo "One or more instances of ZeroTier failed to come online. Aborting test."
collect_zt_dump_files
collect_zt_log_files
exit 0
exit_test_and_generate_report $TEST_FAIL "one or more nodes failed to come online"
fi
echo -e "\nJoining networks"
@ -199,18 +204,14 @@ main() {
$NS1 ping -c 16 $node2_ip4 >$PING12_FILENAME
$NS2 ping -c 16 $node1_ip4 >$PING21_FILENAME
# Parse ping statistics
ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
ping_loss_percent_1_to_2=$(cat $PING12_FILENAME |
grep "packet loss" | awk '{print $6}' | sed 's/%//')
ping_loss_percent_2_to_1=$(cat $PING21_FILENAME |
grep "packet loss" | awk '{print $6}' | sed 's/%//')
# Normalize loss value
ping_loss_percent_1_to_2=$(echo "scale=2; $ping_loss_percent_1_to_2/100.0" | bc)
ping_loss_percent_2_to_1=$(echo "scale=2; $ping_loss_percent_2_to_1/100.0" | bc)
export ping_loss_percent_1_to_2=$(echo "scale=2; $ping_loss_percent_1_to_2/100.0" | bc)
export ping_loss_percent_2_to_1=$(echo "scale=2; $ping_loss_percent_2_to_1/100.0" | bc)
################################################################################
# CLI Check #
@ -261,11 +262,9 @@ main() {
# TODO: Validate JSON
################################################################################
# Performance Test #
################################################################################
# Performance Test
FILENAME_PERF_JSON="$TEST_FILEPATH_PREFIX-iperf.json"
export FILENAME_PERF_JSON="$TEST_FILEPATH_PREFIX-iperf.json"
echo -e "\nBeginning performance test:"
@ -281,15 +280,7 @@ main() {
cat $FILENAME_PERF_JSON
################################################################################
# Collect ZeroTier dump files #
################################################################################
collect_zt_dump_files
################################################################################
# Let ZeroTier idle long enough for various timers #
################################################################################
# Let ZeroTier idle long enough for various timers
echo -e "\nIdling ZeroTier for $RUN_LENGTH seconds..."
sleep $RUN_LENGTH
@ -301,26 +292,45 @@ main() {
sleep 5
################################################################################
# Stop test #
################################################################################
# Stop test
echo -e "\nStopping memory check..."
sudo pkill -15 -f valgrind
sleep 10
time_test_end=$(date +%s)
export time_test_end=$(date +%s)
################################################################################
# Copy ZeroTier stdout/stderr logs #
################################################################################
collect_zt_log_files
exit_test_and_generate_report $TEST_OK "completed test"
}
################################################################################
# Generate report #
################################################################################
exit_test_and_generate_report() {
echo "Exiting test with reason: $2 ($1)"
# Collect ZeroTier dump files
echo -e "\nCollecting ZeroTier dump files"
node1_id=$($ZT1 -j status | jq -r .address)
node2_id=$($ZT2 -j status | jq -r .address)
$ZT1 dump
mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node1_id.txt"
$ZT2 dump
mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node2_id.txt"
# Copy ZeroTier stdout/stderr logs
cp node_1.log "$TEST_FILEPATH_PREFIX-node-log-$node1_id.txt"
cp node_2.log "$TEST_FILEPATH_PREFIX-node-log-$node2_id.txt"
# Generate report
cat $FILENAME_MEMORY_LOG
DEFINITELY_LOST=$(xmlstarlet sel -t -v '/valgrindoutput/error/xwhat' \
@ -328,9 +338,7 @@ main() {
POSSIBLY_LOST=$(xmlstarlet sel -t -v '/valgrindoutput/error/xwhat' \
$FILENAME_MEMORY_LOG | grep "possibly" | awk '{print $1;}')
################################################################################
# Generate coverage report artifact and summary #
################################################################################
# Generate coverage report artifact and summary
FILENAME_COVERAGE_JSON="$TEST_FILEPATH_PREFIX-coverage.json"
FILENAME_COVERAGE_HTML="$TEST_FILEPATH_PREFIX-coverage.html"
@ -350,22 +358,20 @@ main() {
COVERAGE_LINE_TOTAL="${COVERAGE_LINE_TOTAL:-0}"
COVERAGE_LINE_PERCENT="${COVERAGE_LINE_PERCENT:-0}"
################################################################################
# Default values #
################################################################################
# Default values
DEFINITELY_LOST="${DEFINITELY_LOST:-0}"
POSSIBLY_LOST="${POSSIBLY_LOST:-0}"
ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
################################################################################
# Summarize and emit json for trend reporting #
################################################################################
# Summarize and emit json for trend reporting
FILENAME_SUMMARY="$TEST_FILEPATH_PREFIX-summary.json"
time_length_test=$((time_test_end - time_test_start))
time_length_zt_node1_online=$((time_zt_node1_online - time_zt_start))
time_length_zt_node2_online=$((time_zt_node2_online - time_zt_start))
time_to_node1_online=$((time_zt_node1_online - time_zt_start))
time_to_node2_online=$((time_zt_node2_online - time_zt_start))
#time_length_zt_join=$((time_zt_join_end-time_zt_join_start))
#time_length_zt_leave=$((time_zt_leave_end-time_zt_leave_start))
#time_length_zt_can_still_ping=$((time_zt_can_still_ping-time_zt_leave_start))
@ -377,25 +383,20 @@ main() {
"commit":"$ZTO_COMMIT",
"arch_m":"$(uname -m)",
"arch_a":"$(uname -a)",
"binary_size":"$(stat -c %s zerotier-one)"
"time_length_test":$time_length_test,
"time_length_zt_node1_online":$time_length_zt_node1_online,
"time_length_zt_node2_online":$time_length_zt_node2_online,
"time_to_node1_online":$time_to_node1_online,
"time_to_node2_online":$time_to_node2_online,
"num_possible_bytes_lost": $POSSIBLY_LOST,
"num_definite_bytes_lost": $DEFINITELY_LOST,
"num_incorrect_settings": $POSSIBLY_LOST,
"num_bad_formattings": $POSSIBLY_LOST,
"percent_coverage_branches": $POSSIBLY_LOST,
"coverage_lines_covered": $COVERAGE_LINE_COVERED,
"coverage_lines_total": $COVERAGE_LINE_TOTAL,
"coverage_lines_percent": $COVERAGE_LINE_PERCENT,
"ping_loss_percent_1_to_2": $ping_loss_percent_1_to_2,
"ping_loss_percent_2_to_1": $ping_loss_percent_2_to_1,
"mean_latency_ping_random": $POSSIBLY_LOST,
"mean_latency_ping_netns": $POSSIBLY_LOST,
"mean_pdv_random": $POSSIBLY_LOST,
"mean_pdv_netns": $POSSIBLY_LOST,
"mean_perf_netns": $POSSIBLY_LOST,
"exit_test_failed": $EXIT_TEST_FAILED
"test_exit_code": $1,
"test_exit_reason":"$2"
}
EOF
)
@ -436,6 +437,10 @@ spam_cli() {
done
}
################################################################################
# Check for proper exit on load of invalid identity #
################################################################################
check_exit_on_invalid_identity() {
echo "Checking ZeroTier exits on invalid identity..."
mkdir -p $(pwd)/exit_test
@ -447,35 +452,14 @@ check_exit_on_invalid_identity() {
$ZT1 &
my_pid=$!
echo "Waiting 5 secons"
echo "Waiting 5 seconds"
sleep 5
# check if process is running
kill -0 $my_pid
if [ $? -eq 0 ]; then
EXIT_TEST_FAILED=1
echo "Exit test FAILED: Process still running after being fed an invalid identity"
else
echo "Exit test PASSED"
exit_test_and_generate_report $TEST_FAIL "Exit test FAILED: Process still running after being fed an invalid identity"
fi
}
collect_zt_dump_files() {
echo -e "\nCollecting ZeroTier dump files"
node1_id=$($ZT1 -j status | jq -r .address)
node2_id=$($ZT2 -j status | jq -r .address)
$ZT1 dump
mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node1_id.txt"
$ZT2 dump
mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node2_id.txt"
}
collect_zt_log_files() {
cp node_1.log "$TEST_FILEPATH_PREFIX-node-log-$node1_id.txt"
cp node_2.log "$TEST_FILEPATH_PREFIX-node-log-$node2_id.txt"
}
main "$@"
test "$@"

View File

@ -5,6 +5,8 @@
################################################################################
DEFINITELY_LOST=$(cat *test-results/*summary.json | jq .num_definite_bytes_lost)
EXIT_CODE=$(cat *test-results/*summary.json | jq .exit_code)
EXIT_REASON=$(cat *test-results/*summary.json | jq .exit_reason)
cat *test-results/*summary.json
@ -14,8 +16,9 @@ if [[ "$DEFINITELY_LOST" -gt 0 ]]; then
exit 1
fi
EXIT_TEST_FAILED=$(cat *test-results/*summary.json | jq .exit_test_failed)
# Catch-all for other non-zero exit codes
if [[ "$EXIT_TEST_FAILED" -gt 0 ]]; then
if [[ "$EXIT_CODE" -gt 0 ]]; then
echo "Test failed: $EXIT_REASON"
exit 1
fi

View File

@ -40,8 +40,8 @@ jobs:
run: |
sudo apt install -y valgrind xmlstarlet gcovr iperf3 tree
make one ZT_COVERAGE=1 ZT_TRACE=1
sudo chmod +x ./.github/workflows/validate-1m-linux.sh
sudo ./.github/workflows/validate-1m-linux.sh
sudo chmod +x ./.github/workflows/validate-linux.sh
sudo ./.github/workflows/validate-linux.sh
- name: Archive test results
uses: actions/upload-artifact@v3
@ -51,6 +51,6 @@ jobs:
- name: final-report
run: |
sudo chmod +x ./.github/workflows/report.sh
sudo ./.github/workflows/report.sh
sudo chmod +x ./.github/workflows/validate-report.sh
sudo ./.github/workflows/validate-report.sh