Robustify YAML config file validation, enhance unit tests

* Fixed run path/too/deep duplication logic * Add more unit testing to cover edge cases in the YAML file * Unify type checking to reduce code duplication * Empty sections like run: are still ignored but now produce an error to remind the user to clean them up. * Make get_sims() work with labels=None * Test SIMs within deep directory structures to exercise pathing logic * Add test for invalid sim path: syntax Refs #1159
2025-04-14 14:36:46 +00:00 · 2021-06-10 15:22:03 -05:00 · 2021-06-10 15:22:03 -05:00 · 0be5f72e35
commit 0be5f72e35
parent 68cf81736c
6 changed files with 272 additions and 75 deletions
--- a/share/trick/trickops/TrickWorkflow.py
+++ b/share/trick/trickops/TrickWorkflow.py
@ -185,7 +185,7 @@ class TrickWorkflow(WorkflowCommon):
                return sim
        return None

-    def get_sims(self, labels):
+    def get_sims(self, labels=None):
        """
        Get a list of Sim() instances by label or labels listed in self.config_file

@ -194,8 +194,9 @@ class TrickWorkflow(WorkflowCommon):

        Parameters
        ----------
-        labels : str or list
-            label or labels that each sim must have to be returned by this function
+        labels : str or list or None
+            label or labels that each sim must have to be returned by this functionr.
+            If None, return all sims

        Returns
        -------
@ -208,6 +209,9 @@ class TrickWorkflow(WorkflowCommon):
        TypeError
            If labels is not a str or list
        """
+        # If no labels given, just return the full list
+        if not labels:
+            return self.sims
        sims_found = []
        ls = []
        if type(labels) == str:
@ -242,7 +246,7 @@ class TrickWorkflow(WorkflowCommon):
                y = yaml.load(file)
            return y
        except Exception as e:
-            tprint("Unable to parse config file: %s\nError: %s" % (config_file,e), 'DARK_RED')
+            tprint("Unable to parse config file: %s\nERROR: %s" % (config_file,e), 'DARK_RED')
            return None

    def _validate_config(self):
@ -293,13 +297,30 @@ class TrickWorkflow(WorkflowCommon):
        RuntimeError
            If self.config has insufficient content
        """
-        # All error messages should trigger self.config_errors to be True
+        # All error messages in config validation will trigger self.config_errors to be True
        def cprint(msg, color):
            self.config_errors = True
            tprint(msg, color)
+
+        # Utility method for ensuring a variable is the expected type. Returns True if type
+        # matches expected type, False otherwise. If fatal=True, raise RuntimeError.
+        def type_expected(var, expected_type, fatal=False, extra_msg=''):
+            if type(var) != expected_type:
+                prepend = "FATAL: " if fatal else "ERROR: "
+                msg =(prepend + "Entry resembling %s expected to be %s, but got %s instead. Please"
+                      " look for errors in %s. " %
+                      ( str(var), expected_type, type(var), self.config_file) + extra_msg)
+                cprint(msg, 'DARK_RED')
+                if fatal:
+                    raise RuntimeError(msg)
+                else:
+                    return False
+            return True
+
        c = copy.deepcopy(self.config)  # Make a copy for extra saftey
        if not c:  # If entire config is empty
-            msg = "ERROR Config file %s is empty. Cannot continue." % (self.config_file)
+            msg =("ERROR: Config file %s could not be loaded. Make sure file exists and is valid YAML syntax."
+                  " Cannot continue." % (self.config_file))
            cprint(msg, 'DARK_RED')
            self._cleanup()
            raise RuntimeError(msg)
@ -316,12 +337,14 @@ class TrickWorkflow(WorkflowCommon):
            if 'parallel_safety' not in c['globals'] or not c['globals']['parallel_safety']:
                self.parallel_safety = 'loose'
            elif  c['globals']['parallel_safety'] != 'loose' and c['globals']['parallel_safety'] != 'strict':
-                cprint( "ERROR parallel_safety value of %s in config file %s is unsupported. Defaulting to"
+                cprint( "ERROR: parallel_safety value of %s in config file %s is unsupported. Defaulting to"
                  " 'loose' and continuing..." % (c['globals']['parallel_safety'], self.config_file),
                  'DARK_RED')
                self.parallel_safety = 'loose'
            else:
                self.parallel_safety = c['globals']['parallel_safety']
+        if not type_expected(c, expected_type=dict, fatal=True, extra_msg='Cannot continue.'):
+          pass  # Unreachable, type_expected will raise
        c.pop('globals', None) # Remove to iterate on the rest of the non-global content
        all_sim_paths = [] # Keep a list of all paths for uniqueness check
        # Check sub-parameters of SIM entries
@ -334,11 +357,9 @@ class TrickWorkflow(WorkflowCommon):
          # If the structure doesn't start with SIM, ignore it and move on
          if not s.lower().startswith('sim'):
            continue
-          if type(c[s]) is not dict:
-            cprint("ERROR: SIM entry %s is not a dict!. SIM definitions must start with SIM, end"
-                   " with :, and contain the path: <path/to/SIM> key-value pair. Continuing but"
-                   " ignoring this entry from %s."
-                    % (s, self.config_file), 'DARK_RED')
+          # If what's stored under SIM_..: is not itself a dict
+          if not type_expected(c[s], expected_type=dict, extra_msg="SIM definitions must start with SIM, end"
+                   " with :, and contain the path: <path/to/SIM> key-value pair. Skipping over %s." % c[s]):
            self.config.pop(s)
            continue
          # If optional entries missing, or None, set defaults
@ -361,8 +382,14 @@ class TrickWorkflow(WorkflowCommon):
                % (s, self.config_file), 'DARK_RED')
            self.config.pop(s)
            continue
-          if 'labels' not in c[s] or not c[s]['labels']:
-              self.config[s]['labels'] = []
+          # Ensure labels is a list of strings
+          if ('labels' not in c[s] or not c[s]['labels'] or not type_expected(
+            c[s]['labels'], expected_type=list, extra_msg='Ignoring labels.')):
+            self.config[s]['labels'] = []
+          else:
+            sanitized_labels =( [l for l in c[s]['labels'] if type_expected(
+              l, expected_type=str, extra_msg='Ignoring label "%s".' % l) ] )
+            self.config[s]['labels'] = sanitized_labels
          # Create internal object to be populated with runs, valgrind runs, etc
          thisSim = TrickWorkflow.Sim(name=s, sim_dir=self.config[s]['path'],
            description=self.config[s]['description'], labels=self.config[s]['labels'],
@ -371,19 +398,16 @@ class TrickWorkflow(WorkflowCommon):
          all_sim_paths.append(c[s]['path'])
          # RUN sanity checks
          if 'runs' in c[s]:         # If it's there...
-            if not c[s]['runs']:     # but None, remove it
+            if not type_expected(c[s]['runs'], expected_type=dict,
+              extra_msg='Skipping entire run: section in %s' % c[s]):
              self.config[s].pop('runs')
-            elif type( c[s]['runs']) is not dict:     # but None, remove it
-              cprint("ERROR: Run %s is not a dict! Make sure the 'RUN...:' ends with a : in config file %s."
-                   " Continuing but skipping this run: section..."
-                   % (c[s]['runs'], self.config_file), 'DARK_RED')
-              self.config[s].pop('runs')
-              continue
            else:                  # If it's there and a valid list, check paths
              all_run_paths = []   # Keep a list of all paths for uniqueness check
              for r in c[s]['runs']:
-                just_RUN = r.split(' ')[0]
-                just_RUN_dir = r.split('/')[0]
+                if not type_expected(r, expected_type=str, extra_msg='Skipping this run.'):
+                  continue
+                just_RUN = r.split()[0]
+                just_RUN_dir = os.path.dirname(just_RUN)
                if not os.path.exists(os.path.join(self.project_top_level, c[s]['path'], just_RUN)):
                  cprint("ERROR: %s's 'run' path %s not found. Continuing but skipping this run "
                    "from %s." % (s, just_RUN, self.config_file), 'DARK_RED')
@ -406,8 +430,12 @@ class TrickWorkflow(WorkflowCommon):
                  c[s]['runs'][r] = dict(self.config[s]['runs'][r])
                elif 'returns' not in c[s]['runs'][r]:
                  self.config[s]['runs'][r]['returns'] = 0
-                elif (type(c[s]['runs'][r]['returns']) != int or
-                  c[s]['runs'][r]['returns'] < 0 or c[s]['runs'][r]['returns'] > 255):
+
+                elif (not type_expected(c[s]['runs'][r]['returns'], expected_type=int,
+                  extra_msg='Continuing but ignoring %s %s "returns:" value "%s"' %
+                  (s, r, c[s]['runs'][r]['returns']))):
+                  self.config[s]['runs'][r]['returns'] = 0 # Default to zero
+                elif (c[s]['runs'][r]['returns'] < 0 or c[s]['runs'][r]['returns'] > 255):
                  cprint("ERROR: %s's run '%s' has invalid 'returns' value (must be 0-255). "
                    "Continuing but assuming this run is expected to return 0 in %s." % (s, r, self.config_file),
                    'DARK_RED')
@ -424,6 +452,9 @@ class TrickWorkflow(WorkflowCommon):
                  pass # Deliberately leave open for workflows to extend how comparisons are defined
                else:  # If it's a list, make sure it fits the 'path vs. path' format
                  for cmp in c[s]['runs'][r]['compare']:
+                    if not type_expected(cmp, expected_type=str,
+                      extra_msg='Continuing but ignoring comparison %s' % cmp):
+                      continue
                    if ' vs. ' not in cmp:
                      cprint("ERROR: %s's run %s comparison '%s' does not match expected pattern. Must be of "
                        "form: 'path/to/log1 vs. path/to/log2'. Continuing but ignoring this comparison in %s."
@ -443,12 +474,8 @@ class TrickWorkflow(WorkflowCommon):
                thisSim.add_run(thisRun)
          # SIM's valgrind RUN path checks
          if 'valgrind' in c[s]:        # If it's there...
-            if not c[s]['valgrind']:  # but None, remove it
-              self.config[s].pop('valgrind')
-            elif type( c[s]['valgrind']) is not dict:     # If it's the wrong type
-              cprint("ERROR: Valgrind entry %s is not a dict! Make sure 'valgrind:' ends with a : in "
-                   "config file %s. Continuing but skipping this valgrind: section..."
-                   % (c[s]['valgrind'], self.config_file), 'DARK_RED')
+            if not type_expected(c[s]['valgrind'], expected_type=dict,
+              extra_msg='Skipping entire valgrind: section in %s' % c[s]):
              self.config[s].pop('valgrind')
            else:                     # If it's there and a valid dict
              if self.this_os == 'darwin':
@ -460,13 +487,14 @@ class TrickWorkflow(WorkflowCommon):
                if 'flags' not in c[s]['valgrind']:
                  self.config[s]['valgrind']['flags'] = ''
                if 'runs' in  c[s]['valgrind']:       # If it's there...
-                  if not c[s]['valgrind']['runs']:  # but None, remove entire valgrind section
-                    cprint("ERROR: %s's valgrind section has no 'run' paths. Continuing but skipping "
-                           "this valgrind section from %s." % (s, self.config_file), 'DARK_RED')
+                  if not type_expected(c[s]['valgrind']['runs'], expected_type=list,
+                     extra_msg='Skipping this valgrind runs: section for %s' % c[s]):
                    self.config[s].pop('valgrind')
                  else:
                    for r in c[s]['valgrind']['runs']:  # If it's there and a valid list, check paths
-                      just_RUN = r.split(' ')[0]
+                      if not type_expected(r, expected_type=str, extra_msg='Skipping this valgrind run.'):
+                        continue
+                      just_RUN = r.split()[0]
                      if not os.path.exists(os.path.join(self.project_top_level, c[s]['path'], just_RUN)):
                        cprint("ERROR: %s's valgrind 'run' path %s not found. Continuing but skipping "
                               "this run from %s." % (s, just_RUN, self.config_file), 'DARK_RED')
@ -873,6 +901,17 @@ class TrickWorkflow(WorkflowCommon):
            """
            return self.runs

+        def get_valgrind_runs(self):
+            """
+            Get all Run() instances associated with this sim
+
+            >>> s = TrickWorkflow.Sim(name='alloc', sim_dir=os.path.join(this_trick, 'test/SIM_alloc_test'))
+            >>> s.get_valgrind_runs()  # This sim has no runs
+            []
+
+            """
+            return self.valgrind_runs
+
        def add_run( self, run):
            """
            Append a new Run() instance to the internal run lists. Appends to valgrind
--- a/share/trick/trickops/tests/errors.yml
+++ b/share/trick/trickops/tests/errors.yml
@ -1,33 +0,0 @@
-# YAML file with many errors for unit testing
-# Global configuration parameters
-
-globals:
-  parallel_safety: unsupported_value
-
-extension_example:
-  should: be ignored by this framework
-
-# This sim exists, but has duplicate run entries which is an error
-SIM_ball_L1:
-    path: trick_sims/Ball/SIM_ball_L1
-    size: 6000
-    runs:
-        RUN_test/input.py:
-        RUN_test/input.py:
-
-# This sim exists, but it's RUN does not
-SIM_alloc_test:
-    path: test/SIM_alloc_test
-    runs:
-        RUN_buddy/input.py:
-
-# This one is has duplicate non-unique path which is an error
-SIM_L1_ball:
-    path: trick_sims/Ball/SIM_ball_L1
-
-# This sim doesn't exist
-SIM_foobar:
-    path: test/SIM_foobar
-    runs:
-        RUN_hi/input.py:
-
--- a/share/trick/trickops/tests/errors_fatal1.yml
+++ b/share/trick/trickops/tests/errors_fatal1.yml
@ -0,0 +1,7 @@
+# YAML file with fatal error used for unit testing
+# The runs: section has invalid YAML syntax
+SIM_alloc_test:
+    path: test/SIM_alloc_test
+    runs:
+       - RUN_test/input.py:
+       RUN_test/input.py:
--- a/share/trick/trickops/tests/errors_fatal2.yml
+++ b/share/trick/trickops/tests/errors_fatal2.yml
@ -0,0 +1,4 @@
+# YAML file with fatal error used for unit testing
+# Doesn't conform to expected dict format
+SIM_alloc_test
+    path - test/SIM_alloc_test
--- a/share/trick/trickops/tests/errors_nonfatal.yml
+++ b/share/trick/trickops/tests/errors_nonfatal.yml
@ -0,0 +1,79 @@
+# YAML file with many errors for unit testing
+# Global configuration parameters
+
+globals:
+  parallel_safety: unsupported_value
+
+extension_example:
+  should: be ignored by this framework
+
+# This sim exists, but has duplicate run entries which is an error
+SIM_ball_L1:
+    path: trick_sims/Ball/SIM_ball_L1
+    size: 6000
+    runs:
+        RUN_test/input.py:
+        RUN_test/input.py:
+    valgrind:
+      runs:
+        RUN_test/input.py:    # Wrong type, dict
+        - RUN_test/input.py:  # Should be list of str not list of dict
+
+# This sim exists, but its valgrind section is empty and its runs have problems
+SIM_alloc_test:
+    path: test/SIM_alloc_test
+    valgrind:
+    runs:
+        RUN_buddy/input.py:          # Doesn't exist
+        RUN_test/input.py:           # Does exist
+            extension: foobar        # Should be retained in self.config but not used
+            compare:
+                - RUN_test/log.trk:  # Should be list of str not list of dict
+                - share/trick/trickops/tests/testdata/log_a.csv vs. share/trick/trickops/tests/baselinedata/log_a.csv  # OK
+
+# This sim exists but has problems with types of internal variables
+SIM_events:
+    path: test/SIM_events
+    labels:
+      - fine1
+      - broke1:     # Should be list of str not list of dict
+      - broke2:     # Should be list of str not list of dict
+      - fine2
+    runs:
+      - RUN_test/input.py:      # List of dicts, should be ignored
+      - RUN_test/unit_test.py:  # List of dicts, should be ignored
+    valgrind:  # Empty so should be ignored
+      runs:
+
+# Sim exists, but runs have bad return values
+SIM_threads:
+    path: test/SIM_threads
+    runs:
+        RUN_test/sched.py:
+          returns:        # Empty so should print error and be ignored
+        RUN_test/amf.py:
+          returns: 270    # Invalid range
+        RUN_test/async.py:
+          returns: hello   # Invalid type
+        RUN_test/unit_test.py:
+          returns: 7       # Valid type and range
+
+# Missing the required path: so should be ignored
+SIM_demo_inputfile:
+
+# This sim has duplicate non-unique path which is an error
+SIM_L1_ball:
+    path: trick_sims/Ball/SIM_ball_L1
+
+# This sim doesn't exist
+SIM_foobar:
+    path: test/SIM_foobar
+    runs:
+        RUN_hi/input.py:
+
+# Sim exists but path is bad yaml syntax (no space)
+SIM_parachute:
+    path:trick_sims/SIM_parachute
+
+# Should be ignored
+999:
--- a/share/trick/trickops/tests/ut_TrickWorkflow.py
+++ b/share/trick/trickops/tests/ut_TrickWorkflow.py
@ -1,4 +1,4 @@
-import os, sys
+import os, sys, shutil
 import unittest
 import pdb
 from testconfig import this_trick, tests_dir
@ -17,8 +17,9 @@ class TrickWorkflowTestCase(unittest.TestCase):
          quiet=True)

    def tearDown(self):
-        self.instance._cleanup()  # Remove the log file this instance creates
-        del self.instance
+        if self.instance:
+            self.instance._cleanup()  # Remove the log file this instance creates
+            del self.instance
        self.instance = None

    def setUpWithEmptyConfig(self):
@ -26,10 +27,10 @@ class TrickWorkflowTestCase(unittest.TestCase):
          trick_dir=this_trick, config_file=os.path.join(tests_dir,"empty.yml"),
          quiet=True)

-    def setUpWithErrorConfig(self):
+    def setUpWithErrorConfig(self, file):
        self.tearDown()  # Cleanup the instance we get by default
        self.instance = TrickWorkflow(project_top_level=this_trick, log_dir='/tmp/',
-          trick_dir=this_trick, config_file=os.path.join(tests_dir,"errors.yml"),
+          trick_dir=this_trick, config_file=os.path.join(tests_dir,file),
          quiet=True)

    def test_init_nominal(self):
@ -47,11 +48,34 @@ class TrickWorkflowTestCase(unittest.TestCase):
        with self.assertRaises(RuntimeError):
            self.setUpWithEmptyConfig()

+    def test_init_bad_yaml_so_raises(self):
+        with self.assertRaises(RuntimeError):
+            self.setUpWithErrorConfig("errors_fatal1.yml")
+        with self.assertRaises(RuntimeError):
+            self.setUpWithErrorConfig("errors_fatal2.yml")
+
    def test_init_errors_but_no_raise(self):
-        self.setUpWithErrorConfig()
+        self.setUpWithErrorConfig("errors_nonfatal.yml")
        self.assertTrue(self.instance.config_errors)
        self.assertEqual(self.instance.parallel_safety , 'loose')
-        self.assertEqual(len(self.instance.sims), 2)
+        self.assertEqual(len(self.instance.sims), 4)
+        self.assertEqual(len(self.instance.get_sim('SIM_ball_L1').get_runs()), 1)
+        self.assertEqual(len(self.instance.get_sim('SIM_ball_L1').get_valgrind_runs()), 0)
+        self.assertEqual(len(self.instance.get_sim('SIM_alloc_test').get_runs()), 1)
+        self.assertEqual(len(self.instance.get_sim('SIM_alloc_test').get_valgrind_runs()), 0)
+        self.assertEqual(self.instance.config['SIM_alloc_test']['runs']['RUN_test/input.py']['extension'], 'foobar')
+        self.assertEqual(len(self.instance.get_sim('SIM_events').get_runs()), 0)
+        self.assertTrue('fine1' in self.instance.get_sim('SIM_events').labels)
+        self.assertTrue('fine2' in self.instance.get_sim('SIM_events').labels)
+        self.assertEqual(self.instance.get_sim('SIM_threads').get_run('RUN_test/sched.py').returns, 0)
+        self.assertEqual(self.instance.get_sim('SIM_threads').get_run('RUN_test/amf.py').returns, 0)
+        self.assertEqual(self.instance.get_sim('SIM_threads').get_run('RUN_test/async.py').returns, 0)
+        self.assertEqual(self.instance.get_sim('SIM_threads').get_run('RUN_test/unit_test.py').returns, 7)
+        self.assertTrue(self.instance.get_sim('SIM_demo_inputfile') is None)
+        self.assertTrue(self.instance.get_sim('SIM_L1_ball') is None)
+        self.assertTrue(self.instance.get_sim('SIM_foobar') is None)
+        self.assertTrue(self.instance.get_sim('SIM_parachute') is None)
+
        self.assertTrue(self.instance.config['extension_example'])
        self.instance.report()

@ -232,3 +256,80 @@ class TrickWorkflowTestCase(unittest.TestCase):
        baseline_data = 'share/trick/trickops/tests/baselinedata/log_a.csv'
        r.add_comparison(test_data, baseline_data)
        self.assertEqual(r.compare(), 1)
+
+    def setup_deep_directory_structure(self, runs, parallel_safety='loose'):
+        '''
+        Build a temporary dir structure to test deep pathing
+        runs is a list of runs relative to SIM_fake directory
+        returns: str/path of config file for this deep setup
+        '''
+        deep_root = os.path.join(tests_dir, 'deep/')
+        SIM_root = os.path.join(deep_root, 'SIM_fake/')
+        for run in runs:
+          just_RUN_rel = os.path.dirname(run)
+          just_RUN_root = os.path.join(SIM_root, just_RUN_rel)
+          SIM_root_rel =  os.path.relpath(SIM_root, this_trick)
+          os.makedirs(just_RUN_root, exist_ok=True)
+          Path(os.path.join(SIM_root,run)).touch()
+        yml_content=textwrap.dedent("""
+        globals:
+            parallel_safety: """ + parallel_safety + """
+        SIM_fake:
+            path: """ +  SIM_root_rel + """
+            runs:
+        """)
+        for run in runs:
+            yml_content += "        " + run + ":\n"
+
+        config_file = os.path.join(deep_root, "config.yml")
+        f = open(config_file, "w")
+        f.write(yml_content)
+        f.close()
+        return(config_file)
+    def teardown_deep_directory_structure(self):
+        # Clean up the dirs we created
+        deep_root = os.path.join(tests_dir, 'deep/')
+        shutil.rmtree(deep_root)
+
+    def test_deep_directory_structure_nominal(self):
+        '''
+        Set up a deep directory structure for sims and runs which will exercise
+        some of the path logic in _validate_config(). Nominal with no errors
+        '''
+        self.tearDown()  # Cleanup the instance we get by default, don't need it
+        config_file = self.setup_deep_directory_structure(runs=['SET_fake/RUN_fake/input.py'])
+        self.instance = TrickWorkflow(project_top_level=this_trick, log_dir='/tmp/',
+          trick_dir=this_trick, config_file=config_file, quiet=True)
+        self.assertEqual(len(self.instance.get_sims()), 1)
+        self.assertEqual(len(self.instance.get_sim('SIM_fake').get_runs()), 1)
+        self.teardown_deep_directory_structure()
+
+    def test_deep_directory_structure_loose(self):
+        '''
+        Set up a deep directory structure for sims and runs which will exercise
+        some of the path logic in _validate_config(). Duplicate runs with
+        parallel safety = loose, should not error.
+        '''
+        self.tearDown()  # Cleanup the instance we get by default, don't need it
+        config_file = self.setup_deep_directory_structure(runs=['SET_fake/RUN_fake/input.py',
+          'SET_fake/RUN_fake/test.py'])
+        self.instance = TrickWorkflow(project_top_level=this_trick, log_dir='/tmp/',
+          trick_dir=this_trick, config_file=config_file, quiet=True)
+        self.assertEqual(len(self.instance.get_sims()), 1)
+        self.assertEqual(len(self.instance.get_sim('SIM_fake').get_runs()), 2)
+        self.teardown_deep_directory_structure()
+
+    def test_deep_directory_structure_strict_error(self):
+        '''
+        Set up a deep directory structure for sims and runs which will exercise
+        some of the path logic in _validate_config(). Duplicate runs with
+        parallel safety = strict, should non-fatal error.
+        '''
+        self.tearDown()  # Cleanup the instance we get by default, don't need it
+        config_file = self.setup_deep_directory_structure(runs=['SET_fake/RUN_fake/input.py',
+          'SET_fake/RUN_fake/test.py'], parallel_safety='strict')
+        self.instance = TrickWorkflow(project_top_level=this_trick, log_dir='/tmp/',
+          trick_dir=this_trick, config_file=config_file, quiet=True)
+        self.assertEqual(len(self.instance.get_sims()), 1)
+        self.assertEqual(len(self.instance.get_sim('SIM_fake').get_runs()), 1)
+        self.teardown_deep_directory_structure()