From a2b62a6495e54aaa3983d0e4afe75612e053c7a7 Mon Sep 17 00:00:00 2001 From: Anton Belodedenko <2033996+ab77@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:23:36 -0700 Subject: [PATCH 1/3] fail early on EC2 instance termination (i.e. spot capacity) change-type: patch --- .github/workflows/tests.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6bd0eaa..52725d5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -469,6 +469,9 @@ jobs: echo "::warning::Still working..." sleep "$(( (RANDOM % 5) + 5 ))s" + + aws ec2 wait instance-running --instance-ids ${{ steps.balena-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.balena-sut.outputs.instance_id }} done echo "key_id=${GITHUB_SHA}" >> "${GITHUB_OUTPUT}" @@ -505,6 +508,9 @@ jobs: echo "::warning::Still working..." sleep "$(( (RANDOM % 30) + 30 ))s" + + aws ec2 wait instance-running --instance-ids ${{ steps.balena-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.balena-sut.outputs.instance_id }} done # wait for Docker healthchecks @@ -516,6 +522,9 @@ jobs: echo "::warning::Still working..." sleep "$(( (RANDOM % 30) + 30 ))s" + + aws ec2 wait instance-running --instance-ids ${{ steps.balena-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.balena-sut.outputs.instance_id }} done # (TBC) https://www.balena.io/docs/reference/supervisor/docker-compose/ @@ -547,11 +556,15 @@ jobs: while with_backoff ssh-uuid -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ ${{ steps.register-test-device.outputs.balena_device_uuid }}.balena \ 'balena ps -q | xargs balena inspect \ - | jq -r ".[] | select(.State.Health.Status!=null).Name + \":\" + .State.Health.Status"' \ + | jq -r ".[] + | select(.State.Health.Status!=null).Name + \":\" + .State.Health.Status"' \ | grep -E ':starting|:unhealthy'; do echo "::warning::Still working..." sleep "$(( (RANDOM % 30) + 30 ))s" + + aws ec2 wait instance-running --instance-ids ${{ steps.balena-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.balena-sut.outputs.instance_id }} done - name: SUT&DUT (balena) From 65cc3e6ecce0cfeba59f0090f1f816bf76e47b5a Mon Sep 17 00:00:00 2001 From: Anton Belodedenko <2033996+ab77@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:35:13 -0700 Subject: [PATCH 2/3] need to wait for compose too.. --- .github/workflows/tests.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 52725d5..3c9fc50 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -602,6 +602,9 @@ jobs: echo "::warning::Still working..." sleep "$(( ( RANDOM % ${{ env.RETRY }} ) + ${{ env.RETRY }} ))s" + + aws ec2 wait instance-running --instance-ids ${{ steps.balena-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.balena-sut.outputs.instance_id }} done # .. once the service exits with status == exited, it is assumed to be finished @@ -892,6 +895,9 @@ jobs: echo '::info::waiting for composition...' with_backoff docker compose ls sleep $(((RANDOM%5) + 5))s + + aws ec2 wait instance-running --instance-ids ${{ steps.ubuntu-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.ubuntu-sut.outputs.instance_id }} done echo '::info::composition started' @@ -901,6 +907,9 @@ jobs: echo "::info::waiting for ${service}..." with_backoff docker compose ps sleep $(((RANDOM%5) + 5))s + + aws ec2 wait instance-running --instance-ids ${{ steps.ubuntu-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.ubuntu-sut.outputs.instance_id }} done echo "::info::${service} started" done @@ -914,6 +923,9 @@ jobs: with_backoff docker compose logs --follow --timestamps sut echo '::info::still running...' sleep $(((RANDOM%1) + 1))s + + aws ec2 wait instance-running --instance-ids ${{ steps.ubuntu-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.ubuntu-sut.outputs.instance_id }} done aws ssm wait command-executed --command-id "${cid}" --instance-id "${iid}" From 51cee3df4fc899e1d4bb095d074c4fd6654c54c2 Mon Sep 17 00:00:00 2001 From: Anton Belodedenko <2033996+ab77@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:08:16 -0700 Subject: [PATCH 3/3] check here too --- .github/workflows/tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3c9fc50..5decbb4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -619,6 +619,9 @@ jobs: --compressed | jq -r '.[].services.sut.status')" sleep "$(( ( RANDOM % ${{ env.RETRY }} ) + ${{ env.RETRY }} ))s" + + aws ec2 wait instance-running --instance-ids ${{ steps.balena-sut.outputs.instance_id }} + aws ec2 wait instance-status-ok --instance-ids ${{ steps.balena-sut.outputs.instance_id }} done # .. check its exit code