Skip to content

Commit

Permalink
Param validation (#95)
Browse files Browse the repository at this point in the history
* Add parameter validation

* Add schema files

* Update changelog

* Fix linting and comment

* Add .gitattributes
  • Loading branch information
yashpatel6 authored Jul 12, 2022
1 parent c14f5fb commit 5f2cf1c
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 5 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.config linguist-language=groovy
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
## [Unreleased]
### Added
- IndelRealignment compression parameter
- Param validation
### Changed
- Parse CSV inputs using modularized `csv_parser`
- Delete merged but un-deduplicated BAMs earlier for more efficient disk usage
Expand Down
94 changes: 94 additions & 0 deletions config/custom_schema_types.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/**
* This custom schema namespace implements a custom type for checking input BAMs for call-gSNP
*/
custom_schema_types {
allowed_input_types = [
'BAM'
]
allowed_bam_types = [
'normal',
'tumour'
]

/**
* Check that input types are in allowed list
*/
check_input_type_keys = { List given, String name, List choices=custom_schema_types.allowed_input_types ->
for (elem in given) {
if (!(elem in choices)) {
throw new Exception("Invalid paramter ${name}. Valid types: ${choices}.")
}
}
}

/**
* Check if given input is a Namespace
*/
check_if_namespace = { val, String name ->
if (!(val in Map)) {
throw new Exception("${name} should be a Namespace, not ${val.getClass()}.")
}
}

/**
* Check if given input is a list
*/
check_if_list = { val, String name ->
if (!(val in List || val in Set)) {
throw new Exception("${name} should be a List, not ${val.getClass()}.")
}
}

/**
* Check that input is namespace of expected types
*/
check_input_namespace = { Map options, String name, Map properties ->
// Check parameters keys
custom_schema_types.check_if_namespace(options[name], name)
def given_keys = options[name].keySet() as ArrayList
custom_schema_types.check_input_type_keys(given_keys, name)

options[name].each { entry ->
def entry_as_map = [:]
entry_as_map[entry.key] = entry.value
schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
}
}

/**
* Check namespace BAM
*/
check_bam_namespace = { Map options, String name, Map properties ->
custom_schema_types.check_if_namespace(options[name], name)
def given_keys = options[name].keySet() as ArrayList
if (given_keys.size() <= 0) {
throw new Exception("No inputs provided! Please provide inputs in the CSV or YAML.")
}
custom_schema_types.check_input_type_keys(given_keys, name, custom_schema_types.allowed_bam_types)

options[name].each { entry ->
def entry_as_map = [:]
entry_as_map[entry.key] = entry.value
schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
}
}

/**
* Check if proper BAM entry list
*/
check_bam_list = { Map options, String name, Map properties ->
custom_schema_types.check_if_list(options[name], name)
for (item in options[name]) {
custom_schema_types.check_if_namespace(item, name)
properties.elements.each { key, val ->
schema.validate_parameter(item, key, val)
}
}
}

types = [
'InputNamespace': custom_schema_types.check_input_namespace,
'InputBAMNamespace': custom_schema_types.check_bam_namespace,
'BAMEntryList': custom_schema_types.check_bam_list
]
}
11 changes: 6 additions & 5 deletions config/methods.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import nextflow.util.SysHelper
includeConfig "${projectDir}/external/nextflow-config/config/csv/csv_parser.config"
includeConfig "${projectDir}/external/nextflow-config/config/schema/schema.config"

class log_output_dir {
static def check_permissions(path) {
Expand Down Expand Up @@ -245,7 +246,7 @@ methods {
}
}

parse_input = {
convert_to_yaml_input = {
if (params.containsKey('input')) {
// YAML was used so set modes accordingly
params.is_NT_paired = params.input.BAM.containsKey('normal') && params.input.BAM.containsKey('tumour')
Expand All @@ -257,7 +258,6 @@ methods {
}
params.single_sample_type = all_input_keys[0]
}
methods.format_input_from_yaml()
} else if (params.containsKey('input_csv')) {
// Parse CSV header line and determine modes
def reader = new BufferedReader(new FileReader(params.input_csv))
Expand All @@ -278,8 +278,6 @@ methods {
methods.set_ids_from_csv(raw_csv_input)
// Format the CSV input to match input YAML format
methods.format_csv_input(raw_csv_input)
// Call YAML input formatter to generate matching input for pipeline
methods.format_input_from_yaml()
} else {
throw new Exception("Neither YAML nor CSV inputs found! Please run pipeline with inputs.")
}
Expand All @@ -288,7 +286,10 @@ methods {
// Set up env, timeline, trace, and report above.
setup = {
methods.set_env()
methods.parse_input()
methods.convert_to_yaml_input()
schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
schema.validate()
methods.format_input_from_yaml()
methods.set_log_output_dir()
methods.set_output_dir()
log_output_dir.check_permissions(params.log_output_dir)
Expand Down
132 changes: 132 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
---
patient_id:
type: 'String'
required: true
help: 'Patient ID'
sample_id:
type: 'String'
required: true
help: 'Sample ID'
output_dir:
type: 'Path'
mode: 'w'
required: true
help: 'Absolute path to output directory'
save_intermediate_files:
type: 'Bool'
required: true
default: false
help: 'Whether to save intermediate files'
is_emit_original_quals:
type: 'Bool'
required: true
default: true
help: 'Whether to emit original quality scores after recalibration'
input_csv:
type: 'Path'
mode: 'r'
required: false
help: 'Absolute path to input CSV containing sample information'
is_DOC_run:
type: 'Bool'
required: true
default: false
help: 'Whether to run the DepthOfCoverage process, which is very time-consuming for large BAMs'
intervals:
type: 'String'
allow_empty: true
required: true
help: 'Target intervals to process for DNA panel/targeted sequencing samples; leave empty for WGS'
scatter_count:
type: 'Integer'
required: true
default: 50
help: 'How many intervals to divide the genome into for parallelization'
split_intervals_extra_args:
type: 'String'
allow_empty: true
required: false
help: 'Extra arguments for interval splitting'
gatk_ir_compression:
type: 'Integer'
choices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
default: 0
required: false
reference_fasta:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to reference genome fasta'
bundle_mills_and_1000g_gold_standard_indels_vcf_gz:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to Mills and 1000g gold standard INDELs VCF'
bundle_known_indels_vcf_gz:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to known INDELs VCF'
bundle_v0_dbsnp138_vcf_gz:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to v0 dbSNP 138 VCF'
bundle_hapmap_3p3_vcf_gz:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to HapMap 3p3 VCF'
bundle_omni_1000g_2p5_vcf_gz:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to OMNI 1000g 2p5 VCF'
bundle_phase1_1000g_snps_high_conf_vcf_gz:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to phase 1 1000g high confidence SNPs VCF'
bundle_contest_hapmap_3p3_vcf_gz:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to ConEst HapMap 3p3 VCF'
input:
type: 'InputNamespace'
required: true
help: 'Input samples'
elements:
BAM:
type: 'InputBAMNamespace'
required: true
help: 'Input BAMs for calling'
elements:
normal:
type: 'BAMEntryList'
required: false
help: 'Input normal BAMs'
elements:
id:
type: 'String'
required: true
help: 'Identifier for sample'
path:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to BAM file'
tumour:
type: 'BAMEntryList'
required: false
help: 'Input normal BAMs'
elements:
id:
type: 'String'
required: true
help: 'Identifier for sample'
path:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to BAM file'

0 comments on commit 5f2cf1c

Please sign in to comment.