Skip to content

Commit

Permalink
Merge pull request #93 from scailfin/feature/yaml-include
Browse files Browse the repository at this point in the history
Feature/yaml include
  • Loading branch information
heikomuller authored Jun 22, 2021
2 parents f4aa528 + da5de97 commit ad3501f
Show file tree
Hide file tree
Showing 15 changed files with 151 additions and 23 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,9 @@
* Support different storage volumes for different types of workers.
* Add support for workflow steps that are implemented as Jupyter Notebooks (\#79).
* Include flowapp to run workflow templates using streamlit GUI.


### 0.9.1 - 2021-06-22

* Add option to resolve references to external files when reading Yaml files.
* Support inclusion and import of files in instructions markdown text.
19 changes: 1 addition & 18 deletions flowserv/client/cli/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from flowserv.client.api import service
from flowserv.client.cli.table import ResultTable
from flowserv.model.parameter.base import PARA_INT, PARA_STRING
from flowserv.model.workflow.manifest import read_instructions

import flowserv.view.files as flbls
import flowserv.view.run as rlbls
Expand Down Expand Up @@ -319,21 +320,3 @@ def cli_workflow_download():
cli_benchmark.add_command(list_workflows, name='list')
cli_benchmark.add_command(show_ranking, name='ranking')
cli_benchmark.add_command(get_workflow, name='show')


# -- Helper Methods -----------------------------------------------------------

def read_instructions(filename):
"""Read instruction text from a given file. If the filename is None the
result will be None as well.
Returns
-------
string
"""
# Read instructions from file if given
instruction_text = None
if filename is not None:
with open(filename, 'r') as f:
instruction_text = f.read().strip()
return instruction_text
47 changes: 45 additions & 2 deletions flowserv/model/workflow/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""Helper functions to read workflow manifest files."""

import os
import re

from typing import List, Tuple

Expand Down Expand Up @@ -36,6 +37,9 @@
# Flowserv manifest file names..
MANIFEST_FILES = ['flowserv{}'.format(suffix) for suffix in DEFAULT_SUFFIXES]

"""Regular expression for file includes in markdown."""
REGEX_INCLUDE = r'\{\{(.*?)\}\}'


class WorkflowManifest(object):
"""The workflow manifest contains the workflow specification, the workflow
Expand Down Expand Up @@ -190,8 +194,7 @@ def load(
manifest_value=doc.get('instructions'),
user_argument=instructions
)
with open(filename, 'r') as f:
doc['instructions'] = f.read().strip()
doc['instructions'] = read_instructions(filename)
# Get the workflow specification file.
filename = getfile(
basedir=basedir,
Expand Down Expand Up @@ -256,6 +259,46 @@ def getfile(basedir, manifest_value, user_argument):
return os.path.join(basedir, manifest_value)


def read_instructions(filename: str) -> str:
"""Read instruction text from a given file. If the filename is None the
result will be None as well.
Returns
-------
string
"""
# Read instructions from file if given.
instruction_text = None
if filename is not None:
with open(filename, 'r') as f:
instruction_text = f.read().strip()
parent = os.path.dirname(os.path.abspath(filename))

# Replace function for file imports.
def replace_include(match):
"""Function to replace references to files in markdown text.
All file names should be relative to the path of the main document that
imports the file.
Parameters
----------
match: re.MatchObject
Regular expression match object.
Returns
-------
string
"""
ref = match.group()
# Strip expression of import reference syntax.
expr = ref[2:-2]
# Read file and return content.
return read_instructions(os.path.join(parent, expr))

return re.sub(REGEX_INCLUDE, replace_include, instruction_text)


def unique_name(name, existing_names):
"""Ensure that the workflow name in the project metadata is not empty, not
longer than 512 character, and unique.
Expand Down
5 changes: 5 additions & 0 deletions flowserv/util/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import shutil
import yaml

from yamlinclude import YamlIncludeConstructor
from typing import Dict, IO, List, Optional, Union


Expand Down Expand Up @@ -151,6 +152,10 @@ def read_object(filename: str, format: Optional[str] = None) -> Dict:
else:
format = FORMAT_YAML
if format.upper() == FORMAT_YAML:
YamlIncludeConstructor.add_to_loader_class(
loader_class=yaml.FullLoader,
base_dir=os.path.dirname(os.path.abspath(filename))
)
with open(filename, 'r') as f:
return yaml.load(f.read(), Loader=yaml.FullLoader)
elif format.upper() == FORMAT_JSON:
Expand Down
2 changes: 1 addition & 1 deletion flowserv/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
# terms of the MIT License; see LICENSE file for more details.

"""Information about the current version of the flowServ package."""
__version__ = '0.9.0'
__version__ = '0.9.1'
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ requests
SQLAlchemy>=1.3.18
papermill
Click
pyyaml-include
boto3
google-cloud-storage
docker
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
'pyyaml>=5.1',
'requests',
'SQLAlchemy>=1.3.18',
'Click'
'Click',
'pyyaml-include'
]
aws_requires = ['boto3']
docker_requires = ['docker']
Expand Down
1 change: 1 addition & 0 deletions tests/.files/benchmark/include-test/include1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Include me
1 change: 1 addition & 0 deletions tests/.files/benchmark/include-test/include2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Include me, too
6 changes: 6 additions & 0 deletions tests/.files/benchmark/include-test/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# These are the main instructions.

We also include:

* {{include1.md}}, and
* {{include2.md}}.
4 changes: 4 additions & 0 deletions tests/.files/benchmark/include-test/parameters/para1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: names
label: 'Input file'
dtype: file
target: data/names.txt
4 changes: 4 additions & 0 deletions tests/.files/benchmark/include-test/parameters/para2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: sleeptime
label: 'Sleep time (s)'
dtype: int
defaultValue: 10
30 changes: 30 additions & 0 deletions tests/.files/benchmark/include-test/template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
workflow:
version: 0.3.0
inputs:
files:
- $[[code]]
- code/analyze.py
- data/sequences.txt
parameters:
codefile: $[[code]]
inputfile: data/sequences.txt
outputfile: results/predictions.txt
workflow:
type: serial
specification:
steps:
- environment: 'python:3.7'
commands:
- ${python} "${codefile}"
--inputfile "${inputfile}"
--outputfile "${outputfile}"
- ${python} code/analyze.py
--inputfile "${outputfile}"
--outputfile results/eval.json
outputs:
files:
- results/predictions.txt
- results/eval.json
parameters:
- !include parameters/para1.yaml
- !include parameters/para2.yaml
4 changes: 3 additions & 1 deletion tests/controller/serial/engine/test_postproc_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def test_postproc_workflow(tmpdir):
user_id = create_user(api)
# Create four groups and run the workflow with a slightly different input
# file.
prev_postproc = None
for i in range(4):
with service(user_id=user_id) as api:
group_id = create_group(api, workflow_id)
Expand Down Expand Up @@ -108,13 +109,14 @@ def test_postproc_workflow(tmpdir):
assert 'postproc' in wh
serialize.validate_workflow_handle(wh)
attmpts = 0
while wh['postproc']['state'] in st.ACTIVE_STATES:
while wh['postproc']['state'] in st.ACTIVE_STATES or wh['postproc']['id'] == prev_postproc:
time.sleep(1)
with service() as api:
wh = api.workflows().get_workflow(workflow_id=workflow_id)
attmpts += 1
if attmpts > 60:
raise RuntimeError('max. attempts reached')
prev_postproc = wh['postproc']['id']
serialize.validate_workflow_handle(wh)
with service() as api:
ranking = api.workflows().get_ranking(workflow_id=workflow_id)
Expand Down
41 changes: 41 additions & 0 deletions tests/model/template/test_read_include.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# This file is part of the Reproducible and Reusable Data Analysis Workflow
# Server (flowServ).
#
# Copyright (C) 2019-2021 NYU.
#
# flowServ is free software; you can redistribute it and/or modify it under the
# terms of the MIT License; see LICENSE file for more details.

"""Unit tests for reading a workflow templates that contains references to
additional files using !include (pyyaml-include) and {{}} (in instructions
markdown).
"""

import os

from flowserv.model.workflow.manifest import read_instructions

import flowserv.util as util


DIR = os.path.dirname(os.path.realpath(__file__))
BENCHMARK_DIR = os.path.join(DIR, '../../.files/benchmark/include-test')
INSTRUCTIONS_FILE = os.path.join(BENCHMARK_DIR, 'instructions.md')
TEMPLATE_FILE = os.path.join(BENCHMARK_DIR, 'template.yaml')


def test_read_instructions_with_include():
"""Test reading a template that includes other files."""
text = read_instructions(filename=INSTRUCTIONS_FILE)
assert '# These are the main instructions.' in text
assert '* Include me, and' in text
assert '* Include me, too.' in text


def test_read_template_with_include():
"""Test reading a template that includes other files."""
doc = util.read_object(filename=TEMPLATE_FILE)
assert doc['parameters'] == [
{'name': 'names', 'label': 'Input file', 'dtype': 'file', 'target': 'data/names.txt'},
{'name': 'sleeptime', 'label': 'Sleep time (s)', 'dtype': 'int', 'defaultValue': 10}
]

0 comments on commit ad3501f

Please sign in to comment.