LLNL · crkrenn · May 17, 2020 · Aug 21, 2020 · Aug 21, 2020 · Oct 13, 2020
diff --git a/TODO b/TODO
@@ -0,0 +1,8 @@
+datastructures/core/executiongraph.py
+
+extend setup workspace
+
+in utils, setup replacement dictionary
+add unique run
+use recursive render in Linker.link
+need to make one run-000x for each maestro run...
diff --git a/maestrowf/datastructures/core/executiongraph.py b/maestrowf/datastructures/core/executiongraph.py
@@ -232,6 +232,16 @@ def name(self):
         """
         return self.step.real_name
 
+    @property
+    def step_label(self):
+        """
+        Get step label of the step represented by the record instance.
+        This is the name of the step without any parameter substitutions.
+
+        :returns: The step label of the StudyStep contained within the record.
+        """
+        return self.step.step_label
+
     @property
     def walltime(self):
         """
@@ -318,6 +328,7 @@ def __init__(self, submission_attempts=1, submission_throttle=0,
         # Member variables for execution.
         self._adapter = None
         self._description = OrderedDict()
+        self.linker = None
 
         # Generate tempdir (if specfied)
         if use_tmp:
@@ -516,7 +527,6 @@ def generate_scripts(self):
                 continue
 
             # Record generates its own script.
-            record.setup_workspace()
             record.generate_script(adapter, self._tmp_dir)
 
     def _execute_record(self, record, adapter, restart=False):
@@ -546,6 +556,8 @@ def _execute_record(self, record, adapter, restart=False):
             # Generate the script for execution on the fly.
             record.setup_workspace()    # Generate the workspace.
             record.generate_script(adapter, self._tmp_dir)
+            if self.linker:
+                self.linker.link(record)
 
         if self.dry_run:
             record.mark_end(State.DRYRUN)

diff --git a/maestrowf/datastructures/core/study.py b/maestrowf/datastructures/core/study.py
@@ -67,7 +67,8 @@ class StudyStep:
 
     def __init__(self):
         """Object that represents a single workflow step."""
-        self._name = ""
+        self.name = ""
+        self.step_label = ""
         self.description = ""
         self.nickname = ""
         self.run = {
@@ -368,7 +369,7 @@ def add_step(self, step):
         self.add_node(step.real_name, step)
         LOGGER.info(
             "Adding step '%s' to study '%s'...", step.name, self.name)
-        # Apply the environment to the incoming step.
+        # Apply the dag to the incoming step.
         step.__dict__ = \
             apply_function(step.__dict__, self.environment.apply_environment)
 
@@ -421,7 +422,7 @@ def setup_environment(self):
 
     def configure_study(self, submission_attempts=1, restart_limit=1,
                         throttle=0, use_tmp=False, hash_ws=False,
-                        dry_run=False):
+                        dry_run=False, linker=None):
         """
         Perform initial configuration of a study. \
 
@@ -438,6 +439,7 @@ def configure_study(self, submission_attempts=1, restart_limit=1,
         ExecutionGraph dumps its information into a temporary directory. \
         :param dry_run: Boolean value that toggles dry run to just generate \
         study workspaces and scripts without execution or status checking. \
+        :param linker: Linker object.
         :returns: True if the Study is successfully setup, False otherwise. \
         """
 
@@ -447,6 +449,10 @@ def configure_study(self, submission_attempts=1, restart_limit=1,
         self._use_tmp = use_tmp
         self._hash_ws = hash_ws
         self._dry_run = dry_run
+        self.linker = linker
+        make_links_flag = False
+        if linker:
+            make_links_flag = linker.make_links_flag
 
         LOGGER.info(
             "\n------------------------------------------\n"
@@ -456,10 +462,11 @@ def configure_study(self, submission_attempts=1, restart_limit=1,
             "Use temporary directory =   %s\n"
             "Hash workspaces =           %s\n"
             "Dry run enabled =           %s\n"
+            "Make links enabled =        %s\n"
             "Output path =               %s\n"
             "------------------------------------------",
             submission_attempts, restart_limit, throttle,
-            use_tmp, hash_ws, dry_run, self._out_path
+            use_tmp, hash_ws, dry_run, make_links_flag, self._out_path
         )
 
         self.is_configured = True
@@ -679,6 +686,7 @@ def _stage(self, dag):
 
                     modified, step_exp = node.apply_parameters(combo)
                     step_exp.name = combo_str
+                    step_exp.step_label = step
                     step_exp.nickname = nickname
 
                     # Substitute workspaces into the combination.
@@ -806,6 +814,7 @@ def _stage_linear(self, dag):
                 r_cmd = r_cmd.replace(workspace_var, ws)
             node.run["cmd"] = cmd
             node.run["restart"] = r_cmd
+            node.study_label = step
 
             # Add the step
             dag.add_step(step, node, ws, rlimit)
@@ -874,6 +883,7 @@ def stage(self):
             use_tmp=self._use_tmp, dry_run=self._dry_run)
         dag.add_description(**self.description)
         dag.log_description()
+        dag.linker = self.linker
 
         # Because we're working within a Study class whose steps have already
         # been verified to not contain a cycle, we can override the check for

diff --git a/maestrowf/maestro.py b/maestrowf/maestro.py
@@ -29,23 +29,25 @@
 
 """A script for launching a YAML study specification."""
 from argparse import ArgumentParser, ArgumentError, RawTextHelpFormatter
-import jsonschema
+
 import logging
 import os
 import shutil
-import six
 import sys
-import tabulate
 import time
 
+import tabulate
+import six
+import jsonschema
+
 from maestrowf import __version__
 from maestrowf.conductor import Conductor
 from maestrowf.specification import YAMLSpecification
 from maestrowf.datastructures.core import Study
 from maestrowf.datastructures.environment import Variable
 from maestrowf.utils import \
     create_parentdir, create_dictionary, LoggerUtility, make_safe_path, \
-    start_process
+    start_process, Linker
 
 
 # Program Globals
@@ -167,6 +169,7 @@ def run_study(args):
 
     # Set up the output directory.
     out_dir = environment.remove("OUTPUT_PATH")
+    out_name = ""
     if args.out:
         # If out is specified in the args, ignore OUTPUT_PATH.
         output_path = os.path.abspath(args.out)
@@ -263,11 +266,18 @@ def run_study(args):
         raise ArgumentError(_msg)
 
     # Set up the study workspace and configure it for execution.
+    linker = Linker(
+        make_links_flag=args.make_links,
+        link_directory=args.link_directory,
+        link_template=args.link_template,
+        output_name=out_name,
+        output_root=out_dir
+        )
     study.setup_workspace()
     study.configure_study(
         throttle=args.throttle, submission_attempts=args.attempts,
         restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws,
-        dry_run=args.dry)
+        dry_run=args.dry, linker=linker)
     study.setup_environment()
 
     if args.dry:
@@ -346,43 +356,74 @@ def setup_argparser():
     cancel.set_defaults(func=cancel_study)
 
     # subparser for a run subcommand
-    run = subparsers.add_parser('run',
-                                help="Launch a study based on a specification")
+    # need manual line breaks to allow formatted template documentation.
+    run = subparsers.add_parser(
+        'run',
+        help="Launch a study based on a specification",
+        formatter_class=RawTextHelpFormatter)
+
     run.add_argument("-a", "--attempts", type=int, default=1,
-                     help="Maximum number of submission attempts before a "
+                     help="Maximum number of submission attempts before a\n"
                      "step is marked as failed. [Default: %(default)d]")
     run.add_argument("-r", "--rlimit", type=int, default=1,
-                     help="Maximum number of restarts allowed when steps. "
-                     "specify a restart command (0 denotes no limit). "
+                     help="Maximum number of restarts allowed when steps. \n"
+                     "specify a restart command (0 denotes no limit). \n"
                      "[Default: %(default)d]")
     run.add_argument("-t", "--throttle", type=int, default=0,
-                     help="Maximum number of inflight jobs allowed to execute "
-                     "simultaneously (0 denotes not throttling). "
+                     help="Maximum number of inflight jobs allowed to \n"
+                     "execute simultaneously (0 denotes not throttling). "
                      "[Default: %(default)d]")
     run.add_argument("-s", "--sleeptime", type=int, default=60,
-                     help="Amount of time (in seconds) for the manager to "
+                     help="Amount of time (in seconds) for the manager to \n"
                      "wait between job status checks. [Default: %(default)d]")
     run.add_argument("--dry", action="store_true", default=False,
-                     help="Generate the directory structure and scripts for a "
-                     "study but do not launch it. [Default: %(default)s]")
+                     help="Generate the directory structure and scripts for \n"
+                     "a study but do not launch it. [Default: %(default)s]")
     run.add_argument("-p", "--pgen", type=str,
-                     help="Path to a Python code file containing a function "
-                     "that returns a custom filled ParameterGenerator "
+                     help="Path to a Python code file containing a function \n"
+                     "that returns a custom filled ParameterGenerator \n"
                      "instance.")
     run.add_argument("--pargs", type=str, action="append", default=[],
-                     help="A string that represents a single argument to pass "
-                     "a custom parameter generation function. Reuse '--parg' "
-                     "to pass multiple arguments. [Use with '--pgen']")
+                     help="A string that represents a single argument to  \n"
+                     "pass a custom parameter generation function. Reuse ' \n"
+                     "'--parg to pass multiple arguments. [Use with '--pgen']")
     run.add_argument("-o", "--out", type=str,
-                     help="Output path to place study in. [NOTE: overrides "
+                     help="Output path to place study in. [NOTE: overrides \n"
                      "OUTPUT_PATH in the specified specification]")
     run.add_argument("-fg", action="store_true", default=False,
-                     help="Runs the backend conductor in the foreground "
+                     help="Runs the backend conductor in the foreground \n"
                      "instead of using nohup. [Default: %(default)s]")
     run.add_argument("--hashws", action="store_true", default=False,
-                     help="Enable hashing of subdirectories in parameterized "
-                     "studies (NOTE: breaks commands that use parameter labels"
-                     " to search directories). [Default: %(default)s]")
+                     help="Enable hashing of subdirectories in \n"
+                     "parameterized studies (NOTE: breaks commands that use \n"
+                     "parameter labels to search directories). \n"
+                     " [Default: %(default)s]")
+    run.add_argument("--make-links", action="store_true", default=False,
+                     help="Automatically make customizable, human-readable \n"
+                     "links to run directories. [Default: %(default)s]")
+    run.add_argument(
+        "--link-directory",
+        type=str,
+        default="{{output_root}}/links",
+        help="Jinja template for path where links to run directories \n"
+        "are made. [Default: %(default)s]")
+
+    run.add_argument(
+        "--link-template",
+        type=str,
+        default=(
+            "{{link_directory}}/{{date}}/run-{{INDEX}}/{{instance}}/{{step}}"),
+        help="Jinja template for links to run directories\n"
+        "[Default: %(default)s]\n \n"
+        "Currently supported Jinja variables:\n"
+        "{{output_root}} - Parent directory for this maestro study\n"
+        "{{link_directory}} - Link directory for this maestro study\n"
+        "{{date}} - Human-readable date (e.g. '2020_07_28')\n"
+        "{{instance}} - Maestro label for a set of parameters\n"
+        "               (e.g. 'X1.5.X2.5.X3.20')\n"
+        "               [maximum length: 255 characters]\n"
+        "{{step}} - Maestro label for a given step (e.g. 'run')\n"
+        "{{INDEX}} - Unique number for each maestro execution (e.g. '0001')")
 
     prompt_opts = run.add_mutually_exclusive_group()
     prompt_opts.add_argument(
@@ -395,12 +436,12 @@ def setup_argparser():
     # The only required positional argument for 'run' is a specification path.
     run.add_argument(
         "specification", type=str,
-        help="The path to a Study YAML specification that will be loaded and "
-        "executed.")
+        help="The path to a Study YAML specification that will be loaded \n"
+        "and executed.")
     run.add_argument(
         "--usetmp", action="store_true", default=False,
-        help="Make use of a temporary directory for dumping scripts and other "
-        "Maestro related files.")
+        help="Make use of a temporary directory for dumping scripts and \n"
+        "other Maestro related files.")
     run.set_defaults(func=run_study)
 
     # subparser for a status subcommand