Improve test framework: fix job control bugs

The non-deterministic bug of "job %N terminated without result" is now fixed. The cause was that Bash job numbers are allocated as max(current job numbers) + 1, so if the most recent job exited was not harvested before the next job was started, then the job number would get re-used, deleting the un-harvested job's result file. SIGINT (ctrl-C interrupt) handling has been improved for the -j (parallel) and non-parallel cases. Test case execution in the non-parallel case is faster, by fixing a bug that waited for a one-second timeout before starting each test job.
2025-02-21 09:51:50 +00:00 · 2013-02-07 17:06:30 +10:30 · 2013-02-07 17:06:30 +10:30 · 819e9bb8b2
commit 819e9bb8b2
parent e01b2cacd8
1 changed files with 131 additions and 105 deletions
--- a/testframework.sh
+++ b/testframework.sh
@ -125,7 +125,6 @@ declare -a _tfw_included_tests=()
 declare -a _tfw_test_names=()
 declare -a _tfw_test_sourcefiles=()
 declare -a _tfw_job_pgids=()
 declare -a _tfw_running_jobs=()
 # The rest of this file is parsed for extended glob patterns.
 _tfw_shopt _tfw_orig_shopt -s extglob
@ -211,7 +210,7 @@ runTests() {
      return 0
   fi
   # Create base temporary working directory.
-   trap '_tfw_status=$?; _tfw_killtests; rm -rf "$_tfw_tmpmain"; exit $_tfw_status' EXIT SIGHUP SIGINT SIGTERM
+   trap '_tfw_status=$?; _tfw_killtests 2>/dev/null; rm -rf "$_tfw_tmpmain"; trap - EXIT; exit $_tfw_status' EXIT SIGHUP SIGINT SIGTERM
   rm -rf "$_tfw_tmpmain"
   mkdir -p "$_tfw_tmpmain" || return $?
   # Create an empty results directory.
@ -229,20 +228,22 @@ runTests() {
   _tfw_failcount=0
   _tfw_errorcount=0
   _tfw_fatalcount=0
   _tfw_running_jobs=()
   _tfw_job_pgids=()
   _tfw_job_pgids[0]=
   _tfw_test_number_watermark=0
   local testNumber
   local testPosition=0
   for ((testNumber = 1; testNumber <= ${#_tfw_test_names[*]}; ++testNumber)); do
      local testSourceFile="${_tfw_test_sourcefiles[$(($testNumber - 1))]}"
      local testName="${_tfw_test_names[$(($testNumber - 1))]}"
      local scriptName="${testSourceFile#$_tfw_script_dir/}"
      _tfw_filter_predicate "$testNumber" "$testName" "${filters[@]}" || continue
      let ++testPosition
      let ++_tfw_testcount
      # Wait for any existing child process to finish.
-      while [ $_tfw_njobs -ne 0 -a ${#_tfw_running_jobs[*]} -ge $_tfw_njobs ]; do
+      while [ $_tfw_njobs -ne 0 -a $(_tfw_count_running_jobs) -ge $_tfw_njobs ]; do
-         _tfw_harvest_processes
+         _tfw_wait_job_finish
         _tfw_harvest_jobs
      done
      [ $_tfw_fatalcount -ne 0 ] && break
      $_tfw_stop_on_error && [ $_tfw_errorcount -ne 0 ] && break
@ -254,7 +255,6 @@ runTests() {
      fi
      echo "$testPosition $testNumber $testName" NONE "$testSourceFile" >"$_tfw_results_dir/$testNumber"
      (
         _tfw_test_name="$testName"
         # The directory where this test's log.txt and other artifacts are
         # deposited.
         _tfw_logdir_test="$_tfw_logdir_script/$testNumber.$testName"
@ -270,13 +270,12 @@ runTests() {
         # The path name must be kept short because it is used to construct
         # named socket paths, which have a limited length.
         _tfw_tmp=$_tfw_tmpdir/_tfw-$_tfw_unique
-         trap '_tfw_status=$?; rm -rf "$_tfw_tmp"; exit $_tfw_status' EXIT SIGHUP SIGINT SIGTERM
+         trap '_tfw_status=$?; rm -rf "$_tfw_tmp"; trap - EXIT; exit $_tfw_status' EXIT SIGHUP SIGINT SIGTERM
         mkdir $_tfw_tmp || _tfw_fatalexit
         local start_time=$(_tfw_timestamp)
         local finish_time=unknown
         (
            _tfw_result=FATAL
            mkdir $_tfw_tmp || _tfw_fatalexit
            exec <&- 5>&1 5>&2 6>$_tfw_tmp/log.stdout 1>&6 2>$_tfw_tmp/log.stderr 7>$_tfw_tmp/log.xtrace
            _tfw_stdout=5
            _tfw_stderr=5
            _tfw_log_fd=6
@ -286,7 +285,7 @@ runTests() {
               source "$testSourceFile"
               unset _tfw_running
            fi
-            declare -f test_$_tfw_test_name >/dev/null || _tfw_fatal "test_$_tfw_test_name not defined"
+            declare -f test_$testName >/dev/null || _tfw_fatal "test_$testName not defined"
            export TFWLOG=$_tfw_logdir_test
            export TFWUNIQUE=$_tfw_unique
            export TFWVAR=$_tfw_tmp/var
@ -304,17 +303,17 @@ runTests() {
            fi
            _tfw_phase=setup
            _tfw_result=ERROR
-            trap '_tfw_finalise; _tfw_teardown; _tfw_exit' EXIT SIGHUP SIGINT SIGTERM
+            trap "_tfw_finalise $testName; _tfw_teardown $testName; trap - EXIT; _tfw_exit" EXIT SIGHUP SIGINT SIGTERM
-            _tfw_setup
+            _tfw_setup $testName
            _tfw_result=FAIL
            _tfw_phase=testcase
-            tfw_log "# CALL test_$_tfw_test_name()"
+            tfw_log "# CALL test_$testName()"
            $_tfw_trace && set -x
-            test_$_tfw_test_name
+            test_$testName
            set +x
            _tfw_result=PASS
            _tfw_fatalexit
-         )
+         ) <&- 5>&1 5>&2 6>$_tfw_tmp/log.stdout 1>&6 2>$_tfw_tmp/log.stderr 7>$_tfw_tmp/log.xtrace
         local stat=$?
         finish_time=$(_tfw_timestamp)
         local result=FATAL
@ -325,8 +324,7 @@ runTests() {
         esac
         echo "$testPosition $testNumber $testName $result $testSourceFile" >"$_tfw_results_dir/$testNumber"
         {
-            echo "Name:     $testName"
+            echo "Name:     $testName ($scriptName)"
            echo "Suite:    $_tfw_script_name"
            echo "Result:   $result"
            echo "Started:  $start_time"
            echo "Finished: $finish_time"
@ -345,14 +343,17 @@ runTests() {
         mv "$_tfw_logdir_test" "$_tfw_logdir_test.$result"
         exit 0
      ) </dev/null &
-      local job=$(jobs %% | $SED -n -e '1s/^\[\([0-9]\{1,\}\)\].*/\1/p')
+      local job=$(jobs %% 2>/dev/null | $SED -n -e '1s/^\[\([0-9]\{1,\}\)\].*/\1/p')
-      _tfw_running_jobs+=($job)
+      if [ -n "${_tfw_job_pgids[$job]}" ]; then
-      _tfw_job_pgids[$job]=$(jobs -p %%)
+         _tfw_harvest_job $job
      fi
      _tfw_job_pgids[$job]=$(jobs -p %$job 2>/dev/null)
      ln -f -s "$_tfw_results_dir/$testNumber" "$_tfw_results_dir/job-$job"
   done
   # Wait for all child processes to finish.
-   while [ ${#_tfw_running_jobs[*]} -ne 0 ]; do
+   while _tfw_any_running_jobs; do
-      _tfw_harvest_processes
+      _tfw_wait_job_finish
      _tfw_harvest_jobs
   done
   # Clean up working directory.
   rm -rf "$_tfw_tmpmain"
@ -365,86 +366,108 @@ runTests() {
 _tfw_killtests() {
   if [ $_tfw_njobs -eq 1 ]; then
-      echo -n " killing..."
+      echo " killing..."
   else
-      echo -n -e "\r\rKilling tests...\r"
+      echo -n -e "\r\rKilling tests..."
   fi
   trap '' SIGHUP SIGINT SIGTERM
-   local job
+   local pgid
-   for job in ${_tfw_running_jobs[*]}; do
+   for pgid in $(jobs -p); do
-      kill -TERM %$job 2>/dev/null
+      kill -TERM -$pgid
   done
-   while [ ${#_tfw_running_jobs[*]} -ne 0 ]; do
+   wait
-      _tfw_harvest_processes
+}
 _tfw_count_running_jobs() {
   local count=0
   local job
   for ((job = 1; job < ${#_tfw_job_pgids[*]}; ++job)); do
      [ -n "${_tfw_job_pgids[$job]}" ] && let count=count+1
   done
   echo $count
 }
 _tfw_any_running_jobs() {
   local job
   for ((job = 1; job < ${#_tfw_job_pgids[*]}; ++job)); do
      [ -n "${_tfw_job_pgids[$job]}" ] && return 0
   done
   return 1
 }
 _tfw_wait_job_finish() {
   if [ $_tfw_njobs -eq 1 ]; then
      wait >/dev/null 2>/dev/null
   else
      # This is the only way known to get the effect of a 'wait' builtin that
      # will return when _any_ child dies (or after a one-second timeout).
      set -m
      sleep 1 &
      local spid=$!
      trap "kill -TERM $spid 2>/dev/null" SIGCHLD
      wait $spid >/dev/null 2>/dev/null
      trap - SIGCHLD
   fi
 }
 _tfw_harvest_jobs() {
   local job
   for ((job = 1; job < ${#_tfw_job_pgids[*]}; ++job)); do
      if [ -n "${_tfw_job_pgids[$job]}" ]; then
         jobs %$job >/dev/null 2>/dev/null || _tfw_harvest_job $job
      fi
   done
 }
-_tfw_harvest_processes() {
+_tfw_harvest_job() {
-   # <incantation>
+   local job="$1"
-   # This is the only way known to get the effect of a 'wait' builtin that will
+   # Kill any residual processes from the test case.
-   # return when _any_ child dies or after a one-second timeout.
+   local pgid=${_tfw_job_pgids[$job]}
-   trap 'kill -TERM $spid 2>/dev/null' SIGCHLD
+   [ -n "$pgid" ] && kill -TERM -$pgid 2>/dev/null
-   sleep 1 &
+   _tfw_job_pgids[$job]=
-   spid=$!
+   # Report the test script outcome.
-   set -m
+   if [ -s "$_tfw_results_dir/job-$job" ]; then
-   wait $spid >/dev/null 2>/dev/null
+      local testPosition
-   trap - SIGCHLD
+      local testNumber
-   # </incantation>
+      local testName
-   local -a surviving_jobs=()
+      local result
-   local job
+      local testSourceFile
-   for job in ${_tfw_running_jobs[*]}; do
+      _tfw_unpack_words "$(<"$_tfw_results_dir/job-$job")" testPosition testNumber testName result testSourceFile
-      if jobs %$job >/dev/null 2>/dev/null; then
+      case "$result" in
-         surviving_jobs+=($job)
+      ERROR)
-         continue
+         let _tfw_errorcount=_tfw_errorcount+1
-      fi
+         ;;
-      # Kill any residual processes from the test case.
+      PASS)
-      local pgid=${_tfw_job_pgids[$job]}
+         let _tfw_passcount=_tfw_passcount+1
-      [ -n "$pgid" ] && kill -TERM -$pgid 2>/dev/null
+         ;;
-      # Report the test script outcome.
+      FAIL)
-      if [ -s "$_tfw_results_dir/job-$job" ]; then
+         let _tfw_failcount=_tfw_failcount+1
-         local testPosition
+         ;;
-         local testNumber
+      *)
-         local testName
+         result=FATAL
-         local result
+         let _tfw_fatalcount=_tfw_fatalcount+1
-         local testSourceFile
+         ;;
-         _tfw_unpack_words "$(<"$_tfw_results_dir/job-$job")" testPosition testNumber testName result testSourceFile
+      esac
-         case "$result" in
+      local lines
-         ERROR)
+      if ! $_tfw_verbose && [ $_tfw_njobs -eq 1 ]; then
-            let _tfw_errorcount=_tfw_errorcount+1
+         _tfw_echo_progress $testPosition $testNumber "$testSourceFile" $testName $result
-            ;;
+         echo
-         PASS)
+      elif ! $_tfw_verbose && lines=$($_tfw_tput lines); then
-            let _tfw_passcount=_tfw_passcount+1
+         local travel=$(($_tfw_test_number_watermark - $testPosition + 1))
-            ;;
+         if [ $travel -gt 0 -a $travel -lt $lines ] && $_tfw_tput cuu $travel ; then
         FAIL)
            let _tfw_failcount=_tfw_failcount+1
            ;;
         *)
            result=FATAL
            let _tfw_fatalcount=_tfw_fatalcount+1
            ;;
         esac
         local lines
         if ! $_tfw_verbose && [ $_tfw_njobs -eq 1 ]; then
            _tfw_echo_progress $testPosition $testNumber "$testSourceFile" $testName $result
            echo
         elif ! $_tfw_verbose && lines=$($_tfw_tput lines); then
            local travel=$(($_tfw_test_number_watermark - $testPosition + 1))
            if [ $travel -gt 0 -a $travel -lt $lines ] && $_tfw_tput cuu $travel ; then
               _tfw_echo_progress $testPosition $testNumber "$testSourceFile" $testName $result
               echo
               travel=$(($_tfw_test_number_watermark - $testPosition))
               [ $travel -gt 0 ] && $_tfw_tput cud $travel
            fi
         else
            _tfw_echo_progress $testPosition $testNumber "$testSourceFile" $testName $result
            echo
            travel=$(($_tfw_test_number_watermark - $testPosition))
            [ $travel -gt 0 ] && $_tfw_tput cud $travel
         fi
      else
-         _tfw_echoerr "${BASH_SOURCE[1]}: job %$job terminated without result"
+         _tfw_echo_progress $testPosition $testNumber "$testSourceFile" $testName $result
         echo
      fi
-      rm -f "$_tfw_results_dir/job-$job"
+   else
-   done
+      _tfw_echoerr "${BASH_SOURCE[1]}: job %$job terminated without result"
-   _tfw_running_jobs=(${surviving_jobs[*]})
+   fi
   rm -f "$_tfw_results_dir/job-$job"
 }
 _tfw_echo_progress() {
@ -894,18 +917,19 @@ _tfw_timestamp() {
 }
 _tfw_setup() {
   local testName="$1"
   tfw_log '# SETUP'
-   case `type -t setup_$_tfw_test_name` in
+   case `type -t setup_$testName` in
   function)
-      tfw_log "# call setup_$_tfw_test_name()"
+      tfw_log "# call setup_$testName()"
      $_tfw_trace && set -x
-      setup_$_tfw_test_name $_tfw_test_name
+      setup_$testName $testName
      set +x
      ;;
   *)
-      tfw_log "# call setup($_tfw_test_name)"
+      tfw_log "# call setup($testName)"
      $_tfw_trace && set -x
-      setup $_tfw_test_name
+      setup $testName
      set +x
      ;;
   esac
@ -913,19 +937,20 @@ _tfw_setup() {
 }
 _tfw_finalise() {
   local testName="$1"
   _tfw_phase=finalise
   tfw_log '# FINALISE'
-   case `type -t finally_$_tfw_test_name` in
+   case `type -t finally_$testName` in
   function)
-      tfw_log "# CALL finally_$_tfw_test_name()"
+      tfw_log "# CALL finally_$testName()"
      $_tfw_trace && set -x
-      finally_$_tfw_test_name
+      finally_$testName
      set +x
      ;;
   *)
-      tfw_log "# CALL finally($_tfw_test_name)"
+      tfw_log "# CALL finally($testName)"
      $_tfw_trace && set -x
-      finally $_tfw_test_name
+      finally $testName
      set +x
      ;;
   esac
@ -933,19 +958,20 @@ _tfw_finalise() {
 }
 _tfw_teardown() {
   local testName="$1"
   _tfw_phase=teardown
   tfw_log '# TEARDOWN'
-   case `type -t teardown_$_tfw_test_name` in
+   case `type -t teardown_$testName` in
   function)
-      tfw_log "# CALL teardown_$_tfw_test_name()"
+      tfw_log "# CALL teardown_$testName()"
      $_tfw_trace && set -x
-      teardown_$_tfw_test_name
+      teardown_$testName
      set +x
      ;;
   *)
-      tfw_log "# CALL teardown($_tfw_test_name)"
+      tfw_log "# CALL teardown($testName)"
      $_tfw_trace && set -x
-      teardown $_tfw_test_name
+      teardown $testName
      set +x
      ;;
   esac