diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 1553513fd7..adac4d3e88 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -160,10 +160,10 @@ workflow Multiome { File gene_metrics_gex = Optimus.gene_metrics File? cell_calls_gex = Optimus.cell_calls File h5ad_output_file_gex = JoinBarcodes.gex_h5ad_file - Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix - Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix - Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix - Array[File?] multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix + File? multimappers_EM_matrix = Optimus.multimappers_EM_matrix + File? multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix + File? multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix + File? multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix File? gex_aligner_metrics = Optimus.aligner_metrics File? library_metrics = Optimus.library_metrics File? mtx_files = Optimus.mtx_files diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index a975931245..1947babb3d 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -166,23 +166,10 @@ workflow Optimus { ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } - call FastqProcessing.FastqProcessing as SplitFastq { - input: - i1_fastq = i1_fastq, - r1_fastq = r1_fastq, - r2_fastq = r2_fastq, - whitelist = whitelist, - chemistry = tenx_chemistry_version, - sample_id = input_id, - read_struct = read_struct, - warp_tools_docker_path = docker_prefix + warp_tools_docker - } - - scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { - call StarAlign.STARsoloFastq as STARsoloFastq { + call StarAlign.STARsoloFastq as STARsoloFastq { input: - r1_fastq = [SplitFastq.fastq_R1_output_array[idx]], - r2_fastq = [SplitFastq.fastq_R2_output_array[idx]], + r1_fastq = r1_fastq, + r2_fastq = r2_fastq, star_strand_mode = star_strand_mode, white_list = whitelist, tar_star_reference = tar_star_reference, @@ -193,18 +180,11 @@ workflow Optimus { soloMultiMappers = soloMultiMappers, samtools_star_docker_path = docker_prefix + samtools_star, is_slidetags = is_slidetags - } } - call Merge.MergeSortBamFiles as MergeBam { - input: - bam_inputs = STARsoloFastq.bam_output, - output_bam_filename = output_bam_basename + ".bam", - sort_order = "coordinate", - picard_cloud_docker_path = docker_prefix + picard_cloud_docker - } - call Metrics.CalculateGeneMetrics as GeneMetrics { + + call Metrics.CalculateGeneMetrics as GeneMetrics { input: - bam_input = MergeBam.output_bam, + bam_input = STARsoloFastq.bam_output, mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, @@ -213,7 +193,7 @@ workflow Optimus { call Metrics.CalculateCellMetrics as CellMetrics { input: - bam_input = MergeBam.output_bam, + bam_input = STARsoloFastq.bam_output, mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, @@ -222,13 +202,13 @@ workflow Optimus { call StarAlign.MergeStarOutput as MergeStarOutputs { input: - barcodes = STARsoloFastq.barcodes, - features = STARsoloFastq.features, - matrix = STARsoloFastq.matrix, - cell_reads = STARsoloFastq.cell_reads, - summary = STARsoloFastq.summary, - align_features = STARsoloFastq.align_features, - umipercell = STARsoloFastq.umipercell, + barcodes = [STARsoloFastq.barcodes], + features = [STARsoloFastq.features], + matrix = [STARsoloFastq.matrix], + cell_reads = [STARsoloFastq.cell_reads], + summary = [STARsoloFastq.summary], + align_features = [STARsoloFastq.align_features], + umipercell = [STARsoloFastq.umipercell], input_id = input_id, counting_mode = counting_mode, star_merge_docker_path = docker_prefix + star_merge_docker, @@ -272,10 +252,10 @@ workflow Optimus { if (count_exons && counting_mode=="sn_rna") { call StarAlign.MergeStarOutput as MergeStarOutputsExons { input: - barcodes = STARsoloFastq.barcodes_sn_rna, - features = STARsoloFastq.features_sn_rna, - matrix = STARsoloFastq.matrix_sn_rna, - cell_reads = STARsoloFastq.cell_reads_sn_rna, + barcodes = [STARsoloFastq.barcodes_sn_rna], + features = [STARsoloFastq.features_sn_rna], + matrix = [STARsoloFastq.matrix_sn_rna], + cell_reads = [STARsoloFastq.cell_reads_sn_rna], input_id = input_id, counting_mode = "sc_rna", summary = STARsoloFastq.summary_sn_rna, @@ -346,12 +326,11 @@ workflow Optimus { File final_h5ad_output = select_first([OptimusH5adGenerationWithExons.h5ad_output, OptimusH5adGeneration.h5ad_output]) File final_library_metrics = select_first([OptimusH5adGenerationWithExons.library_metrics, OptimusH5adGeneration.library_metrics]) - output { # version of this pipeline String pipeline_version_out = pipeline_version File genomic_reference_version = ReferenceCheck.genomic_ref_version - File bam = MergeBam.output_bam + File bam = STARsoloFastq.bam_output File matrix = MergeStarOutputs.sparse_counts File matrix_row_index = MergeStarOutputs.row_index File matrix_col_index = MergeStarOutputs.col_index @@ -359,15 +338,14 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out + File? multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix + File? multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix + File? multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix + File? multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix + # File? library_metrics = MergeStarOutputs.library_metrics File library_metrics = final_library_metrics File? mtx_files = MergeStarOutputs.mtx_files - File? filtered_mtx_files = MergeStarOutputs.filtered_mtx_files - - Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix - Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix - Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix - Array[File?] multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix - + File? filtered_mtx_files = MergeStarOutputs.filtered_mtx_files # h5ad File h5ad_output_file = final_h5ad_output diff --git a/pipelines/skylab/optimus/example_inputs/human_v2_example.json b/pipelines/skylab/optimus/example_inputs/human_v2_example.json index 0b0da39f58..e40f2a0c51 100644 --- a/pipelines/skylab/optimus/example_inputs/human_v2_example.json +++ b/pipelines/skylab/optimus/example_inputs/human_v2_example.json @@ -15,5 +15,9 @@ "Optimus.tar_star_reference": "gs://gcp-public-data--broad-references/hg38/v0/star/star_2.7.9a_primary_gencode_human_v27.tar", "Optimus.input_id": "pbmc4k_human", "Optimus.chemistry": "tenX_v2", - "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/gencode.v27.primary_assembly.annotation.gtf" + "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/gencode.v27.primary_assembly.annotation.gtf", + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa", + "Optimus.STARsoloFastq.cpu_platform":"Intel Cascade Lake", + "Optimus.STARsoloFastq.cpu":"16", + "Optimus.STARsoloFastq.mem_size":"64" } diff --git a/pipelines/skylab/optimus/example_inputs/human_v3_example.json b/pipelines/skylab/optimus/example_inputs/human_v3_example.json index 6a0e8edf98..15fc11cf8b 100644 --- a/pipelines/skylab/optimus/example_inputs/human_v3_example.json +++ b/pipelines/skylab/optimus/example_inputs/human_v3_example.json @@ -15,5 +15,9 @@ "Optimus.tar_star_reference": "gs://gcp-public-data--broad-references/hg38/v0/star/star_2.7.9a_primary_gencode_human_v27.tar", "Optimus.input_id": "pbmc_human_v3", "Optimus.chemistry": "tenX_v3", - "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/gencode.v27.primary_assembly.annotation.gtf" + "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/gencode.v27.primary_assembly.annotation.gtf", + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa", + "Optimus.STARsoloFastq.cpu_platform":"Intel Cascade Lake", + "Optimus.STARsoloFastq.cpu":"16", + "Optimus.STARsoloFastq.mem_size":"64" } diff --git a/pipelines/skylab/optimus/example_inputs/mouse_v2_example.json b/pipelines/skylab/optimus/example_inputs/mouse_v2_example.json index 8efad7a498..d284509c2b 100644 --- a/pipelines/skylab/optimus/example_inputs/mouse_v2_example.json +++ b/pipelines/skylab/optimus/example_inputs/mouse_v2_example.json @@ -27,5 +27,9 @@ "Optimus.tar_star_reference": "gs://gcp-public-data--broad-references/mm10/v0/star/star_2.7.9a_primary_gencode_mouse_vM21.tar", "Optimus.input_id": "neurons2k_mouse", "Optimus.chemistry": "tenX_v2", - "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/gencode.vM21.primary_assembly.annotation.gtf" + "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/gencode.vM21.primary_assembly.annotation.gtf", + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/mm10/v0/GRCm38.primary_assembly.genome.fa", + "Optimus.STARsoloFastq.cpu_platform":"Intel Cascade Lake", + "Optimus.STARsoloFastq.cpu":"16", + "Optimus.STARsoloFastq.mem_size":"64" } diff --git a/pipelines/skylab/optimus/example_inputs/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/example_inputs/mouse_v2_snRNA_example.json index e3b905f62d..c3139f4391 100644 --- a/pipelines/skylab/optimus/example_inputs/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/example_inputs/mouse_v2_snRNA_example.json @@ -25,5 +25,8 @@ "Optimus.chemistry": "tenX_v2", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/gencode.vM21.primary_assembly.annotation.gtf", "Optimus.counting_mode": "sn_rna", - "Optimus.count_exons": true + "Optimus.count_exons": true, + "Optimus.STARsoloFastq.cpu_platform":"Intel Cascade Lake", + "Optimus.STARsoloFastq.cpu":"16", + "Optimus.STARsoloFastq.mem_size":"64" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json index 36c0b5d3bd..745de5b4ef 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json @@ -16,6 +16,10 @@ "Optimus.tenx_chemistry_version": "3", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", "Optimus.star_strand_mode": "Forward", + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa", + "Optimus.STARsoloFastq.cpu_platform":"Intel Cascade Lake", + "Optimus.STARsoloFastq.cpu":"16", + "Optimus.STARsoloFastq.mem_size":"64", "Optimus.cloud_provider": "gcp", "Optimus.gex_nhash_id":"example_1234" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json index d999f69fa9..7e234f4fbe 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json @@ -27,6 +27,10 @@ "Optimus.input_id": "neurons2k_mouse", "Optimus.tenx_chemistry_version": "2", "Optimus.star_strand_mode": "Unstranded", + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/GRCm39/GRCm39.primary_assembly.genome.fa.gz", + "Optimus.STARsoloFastq.cpu_platform":"Intel Cascade Lake", + "Optimus.STARsoloFastq.cpu":"16", + "Optimus.STARsoloFastq.mem_size":"64", "Optimus.cloud_provider": "gcp", "Optimus.gex_nhash_id":"example_1234", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf" diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index a68235cfbf..eb2d80b36c 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -26,6 +26,9 @@ "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", "Optimus.counting_mode": "sn_rna", "Optimus.count_exons": true, + "Optimus.STARsoloFastq.cpu_platform":"Intel Cascade Lake", + "Optimus.STARsoloFastq.cpu":"16", + "Optimus.STARsoloFastq.mem_size":"64", "Optimus.cloud_provider": "gcp", "Optimus.gex_nhash_id":"example_1234" } diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index 530eee652b..22e98618cc 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -103,7 +103,7 @@ task FastqProcessing { fi fastqprocess \ - --bam-size 30.0 \ + --num-output-files 1 \ --sample-id "~{sample_id}" \ $FASTQS \ --white-list "~{whitelist}" \ diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index b2a07a4d0a..354e3230fb 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -227,18 +227,14 @@ task STARsoloFastq { # runtime values String samtools_star_docker_path - Int machine_mem_mb = 64000 - Int cpu = 8 - # by default request non preemptible machine to make sure the slow star alignment step completes - Int preemptible = 3 + String cpu_platform = "Intel Ice Lake" + Int machine_mem_mb = 512000 + Int mem_size = 512 + Int cpu = 128 + Int disk = 2000 + # by default request non preemptible machine to make sure the slow star alignment step completes + Int preemptible = 1 - # if slide_tags true set disk to 1000 otherwise dynamic allocation based on input size - # dynamic allocation multiplies input size by 2.2 to account for output bam file + 20% overhead, add size of reference. - Boolean is_slidetags - Int disk = if is_slidetags then 1000 else - ceil(size(tar_star_reference, "Gi") * 3) + - ceil(size(r1_fastq, "Gi") * 20) + - ceil(size(r2_fastq, "Gi") * 20) } meta { @@ -340,9 +336,9 @@ task STARsoloFastq { # validate the bam with samtools quickcheck samtools quickcheck -v Aligned.sortedByCoord.out.bam - echo "UMI LEN " $UMILen + # why is this here? touch barcodes_sn_rna.tsv touch features_sn_rna.tsv touch matrix_sn_rna.mtx @@ -351,7 +347,6 @@ task STARsoloFastq { touch Summary_sn_rna.csv touch UMIperCellSorted_sn_rna.txt - if [[ "~{counting_mode}" == "sc_rna" ]] then SoloDirectory="Solo.out/Gene/raw" @@ -424,12 +419,12 @@ task STARsoloFastq { >>> runtime { - docker: samtools_star_docker_path - memory: "~{machine_mem_mb} MiB" - disks: "local-disk ~{disk} HDD" + memory: "~{mem_size} GiB" + disks: "local-disk ~{disk} SSD" disk: disk + " GB" # TES cpu: cpu preemptible: preemptible + cpuPlatform: cpu_platform } output {