galaxyproject · hangjiaz · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/tools/pgxRpi/.shed.yml b/tools/pgxRpi/.shed.yml
@@ -0,0 +1,10 @@
+name: pgxrpi
+owner: iuc
+description: pgxLoader and pgxFreqplot functions from pgxRpi 1.2.0
+homepage_url: https://github.com/progenetix/pgxRpi
+long_description: |
+    pgxRpi is an R wrapper package for the Progenetix REST API that leverages the capabilities of the Beacon v2 specification. It also provides functions to enhance the visualisation of the retrieved genomic variation data.
+remote_repository_url:  https://github.com/galaxyproject/tools-iuc/tree/master/tools/pgxrpi
+type: unrestricted
+categories:
+- Variant Analysis
diff --git a/tools/pgxRpi/pgxFreqplot.xml b/tools/pgxRpi/pgxFreqplot.xml
@@ -0,0 +1,80 @@
+<tool id="pgx_freqplot" name="pgxRpi pgxFreqplot" version="0.1.0+galaxy0" profile="21.05">
+    <requirements>
+        <requirement type="package" version="4.4.2">r-base</requirement>
+        <requirement type="package" version="1.20">bioconductor-pgxrpi</requirement>
+        <requirement type="package" version="1.58.0">bioconductor-genomicranges</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '$run_pgxFreqplot'
+    ]]></command>
+    <configfiles>
+        <configfile name='run_pgxFreqplot'><![CDATA[
+            ## Setup R error handling to go to stderr
+            options(show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr()); q("no", 1, F) })
+
+            library(pgxRpi)    
+            library(GenomicRanges)
+            data <- readLines("$freqdata")
+            metacolumn <- which(grepl("#",data))
+            metadata <- data[which(grepl("#",data))]
+            metadata <- lapply(seq_len(length(metadata)), function(i){
+                indmeta <- metadata[i]
+                indmeta <- unlist(strsplit(indmeta,split = ";"))
+                indmeta <- data.frame(filter=gsub("#group_id=","",indmeta[1]),label=gsub("label=","",indmeta[2]),sample_count=gsub("sample_count=","",indmeta[3]))
+                return(indmeta)
+            })
+            metadata <- do.call(rbind,metadata)
+            data <- read.csv("$freqdata",sep="\t",skip = max(metacolumn),header = T)
+            freq <- makeGRangesListFromDataFrame(data,split.field = 'group_name',keep.extra.columns=TRUE) 
+            S4Vectors::mcols(freq) <- metadata
+
+            process_input_parameter <- function(param, if_numeric=FALSE){
+                if (param == ""){
+                    param <- NULL
+                }
+                else{
+                    param <- trimws(unlist(strsplit(param,split=',')))
+                    if (if_numeric) param <- as.numeric(param)
+                }
+                return(param)   
+            }
+
+
+            chrom <- process_input_parameter('$chrom',TRUE)
+            filters <- process_input_parameter('$filters')
+            layout <- process_input_parameter('$layout',TRUE)
+
+            pdf("$pgxplot",width=$plotwidth,height=$plotheight)
+            pgxFreqplot(freq,filters=filters,chrom=chrom,layout=layout,assembly="$assembly")
+            dev.off()
+        ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param type='data' name='freqdata' format='txt' label='CNV frequency data' help='CNV frequency data returned by pgxLodaer'/>
+        <param name='chrom' type='text' optional='true' label='Chromosomes to plot' help='Use commas to separate multiple chromosomes (e.g. 1,3,5). If not specified, the plot will cover the entire genome.' />
+        <param name='layout' type='text' optional='true' label='Number of rows and columns in plot' help='Use commas to separate rows and columns (e.g. 3,1). Only used in plot by chromosome.'/>
+        <param name='filters' type='text' optional='true' label='Filter to plot' help='Only support one filter.' />
+        <param name='assembly' type='select' label='Genome assembly version'>
+            <option value="hg38" selected="true">hg38</option>
+            <option value="hg19">hg19</option>
+        </param>
+        <param name='plotwidth' type='integer' value='8' label='Width of the plot in inches.' />
+        <param name='plotheight' type='integer' value='4' label='Height of the plot in inches.' />
+    </inputs>
+    <outputs>
+        <data name='pgxplot' format='pdf' label='CNV frequency plot'/>
+    </outputs>
+    <tests>
+        <test>
+            <param name='freqdata' value='cnv-frequency.txt'/>
+            <param name="filters" value="NCIT:C3058"/>
+            <output name="pgxplot" file="cnv-frequency.pdf" />
+        </test>
+    </tests>
+    <help><![CDATA[
+       Thie function plots the CNV frequency loaded from pgxRpi
+    ]]></help>
+    <citations>
+        <citation type='doi'>10.18129/B9.bioc.pgxRpi</citation>
+    </citations>
+</tool>
diff --git a/tools/pgxRpi/pgxLoader.xml b/tools/pgxRpi/pgxLoader.xml
@@ -0,0 +1,103 @@
+<tool id="pgx_loader" name="pgxRpi pgxLodaer" version="0.1.0+galaxy0" profile="21.05">
+    <requirements>
+        <requirement type="package" version="4.4.2">r-base</requirement>
+        <requirement type="package" version="1.20">bioconductor-pgxrpi</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '$run_pgxLoader'
+    ]]></command>
+    <configfiles>
+        <configfile name='run_pgxLoader'><![CDATA[
+            ## Setup R error handling to go to stderr
+            options(show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr()); q("no", 1, F) })
+
+            library('pgxRpi')    
+
+            real_output <- '$advanced_settings.output'
+
+            if ('$type' != "g_variants" | real_output == "Default"){
+                real_output <- NULL
+            }
+
+            process_input_parameter <- function(param){
+                if (param == ""){
+                    param <- NULL
+                }
+                else{
+                    param <- trimws(unlist(strsplit(param,split=',')))
+                }
+                return(param)   
+            }
+
+            biosample_ids <- process_input_parameter('$biosample_id')
+            individual_ids <- process_input_parameter('$individual_id')
+            filters <- process_input_parameter('$filters')
+            dataset <- process_input_parameter('$advanced_settings.dataset')            
+
+            ## adapt to old version of this package (1.2.0) 
+            if ('$type' == "filtering_terms"){
+                result <- data.frame(filters=pgxFilter())
+            } else{
+                result <- pgxLoader(type='$type',output=real_output,biosample_id=biosample_ids,individual_id=individual_ids,filters=filters,codematches='$codematches',skip=as.numeric($advanced_settings.skip),limit=as.numeric('$advanced_settings.limit'),dataset=dataset,domain='$advanced_settings.domain',entry_point='$advanced_settings.entry_point')
+            }
+
+            append <- FALSE
+
+            ## adapt to tsv format
+            if ('$type' == "cnv_frequency"){
+                metadata <- S4Vectors::mcols(result)
+                metadata_lines <- paste0("#group_id=",metadata[['filter']],";label=",metadata[['label']],";sample_count=",metadata[['sample_count']])
+                write.table(metadata_lines,file='$pgxdata', quote=FALSE, col.names=FALSE, row.names=FALSE)
+                append <- TRUE
+            } else if ('$type' == "cnv_fraction"){
+                result <- cbind(analysis_id=rownames(result[["genome_cnv_frac"]]),result[["genome_cnv_frac"]],result[["chr_cnv_frac"]],result[["arm_cnv_frac"]])
+            }
+
+            suppressWarnings(write.table(result, file='$pgxdata', append=append, quote=FALSE, col.names=TRUE, row.names=FALSE, sep = '\t'))        
+        ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name='type' type='select' label='Output data type'>
+            <option value="individuals">individuals</option>
+            <option value="biosamples">biosamples</option>
+            <option value="analyses">analyses</option>
+            <option value="g_variants">genomic variations</option>
+            <option value="cnv_frequency">precomputed CNV frequency data from Progenetix</option>
+            <option value="cnv_fraction">CNV fraction per sample based on Progenetix data</option>
+            <option value="sample_count">count of samples in Progenetix</option>
+            <option value="filtering_terms">All available filters in Progenetix</option>
+        </param>
+        <param name='biosample_id' type='text' optional='true' label='Identifiers of biosamples' help='Use commas to separate multiple IDs (e.g. pgxbs-m3io46hq,pgxbs-m3io41c2). If the output data type is "genomic variations", only this search condition is supported.' />
+        <param name='individual_id' type='text' optional='true' label='Identifiers of individuals' help='Use commas to separate multiple IDs (e.g. pgxind-m3io3pzi,pgxind-m3io3mi3).'/>
+        <param name='filters' type='text' optional='true' label='Filters' help='Use commas to combine filters (e.g. NCIT:C3512,PMID:37084736) using AND logic when the output data type is "individuals", "biosamples", "analyses"; OR logic when the output data type is "precomputed CNV frequency", "count of samples". Do not use multiple filters when the output data type is "CNV fraction".' />
+        <param name='codematches' type='boolean' checked='false' label='Whether to exclude samples from child concepts of the specified filters' />
+        <section name="advanced_settings" title="Advanced settings" expanded="false">
+            <param name='output' type='select' value="Default" optional="true" label='Variant data format'>
+                <option value="Default" selected="true">Default</option>
+                <option value="pgxseg">pgxseg</option>
+                <option value="seg">seg</option>
+            </param>
+            <param name='limit' type='integer' value='0' optional='true' label='Integer to specify the number of returned profiles' />  
+            <param name='skip' type='integer' value='0' optional='true' label='Integer specifying the number of profiles to skip' />
+            <param name='dataset' type='text' optional='true' label='Dataset to query from the Beacon response' help='Use commas to separate multiple datasets to enter' />
+            <param name='domain' type='text' value="http://progenetix.org" optional='true' label='Domain of the query data resource' />
+            <param name='entry_point' type='text' value="beacon" optional='true' label='Entry point of the Beacon v2 API' />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="pgxdata" format="txt" label="${tool.name} for $type data" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="type" value="biosamples"/>
+            <param name="biosample_id" value="pgxbs-kftvki7h"/>
+            <output name="pgxdata" file="biosamples.txt" />
+        </test>
+    </tests>
+    <help><![CDATA[
+        pgxRpi is an R wrapper package for Progenetix REST API that leverages the capabilities of Beacon v2 specification.
+    ]]></help>
+    <citations>
+        <citation type='doi'>10.18129/B9.bioc.pgxRpi</citation>
+    </citations>
+</tool>
diff --git a/tools/pgxRpi/test-data/biosamples.txt b/tools/pgxRpi/test-data/biosamples.txt
@@ -0,0 +1,2 @@
+biosample_id	individual_id	biosample_status_id	biosample_status_label	sample_origin_type_id	sample_origin_type_label	histological_diagnosis_id	histological_diagnosis_label	sampled_tissue_id	sampled_tissue_label	pathological_stage_id	pathological_stage_label	tnm_id	tnm_label	tumor_grade_id	tumor_grade_label	age_iso	notes	icdo_morphology_id	icdo_morphology_label	icdo_topography_id	icdo_topography_label	pubmed_id	cellosaurus_id	cbioportal_id	tcga_project_id	analysis_info_experiment_id	analysis_info_series_id	analysis_info_platform_id	cohort_ids	biosample_legacy_id	geoprov_city	geoprov_country	geoprov_iso_alpha3	geoprov_long_latitude	geoprov_long_longitude	updated
+pgxbs-kftvki7h	pgxind-kftx6ltd	EFO:0009656	neoplastic sample	OBI:0001479	specimen from organism	NCIT:C3512	Lung Adenocarcinoma	UBERON:0002048	lung	NCIT:C27976	Stage Ib	NCIT:C48706,NCIT:C48714,NCIT:C48724	N1 Stage Finding,N3 Stage Finding,T2 Stage Finding	NA	NA	P56Y	adenocarcinoma [lung]	pgx:icdom-81403	Adenocarcinoma, NOS	pgx:icdot-C34.9	Lung, NOS	PMID:19607727	NA	NA	NA	geo:GSM417055	geo:GSE16597	geo:GPL8690	pgx:cohort-arraymap,pgx:cohort-2021progenetix,pgx:cohort-carriocordo2021heterogeneity	NA	New York City	United States of America	USA	40.71	-74.01	2020-09-10 17:46:45.105000
diff --git a/tools/pgxRpi/test-data/cnv-frequency.pdf b/tools/pgxRpi/test-data/cnv-frequency.pdf
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		biosample_id individual_id biosample_status_id biosample_status_label sample_origin_type_id sample_origin_type_label histological_diagnosis_id histological_diagnosis_label sampled_tissue_id sampled_tissue_label pathological_stage_id pathological_stage_label tnm_id tnm_label tumor_grade_id tumor_grade_label age_iso notes icdo_morphology_id icdo_morphology_label icdo_topography_id icdo_topography_label pubmed_id cellosaurus_id cbioportal_id tcga_project_id analysis_info_experiment_id analysis_info_series_id analysis_info_platform_id cohort_ids biosample_legacy_id geoprov_city geoprov_country geoprov_iso_alpha3 geoprov_long_latitude geoprov_long_longitude updated
		pgxbs-kftvki7h pgxind-kftx6ltd EFO:0009656 neoplastic sample OBI:0001479 specimen from organism NCIT:C3512 Lung Adenocarcinoma UBERON:0002048 lung NCIT:C27976 Stage Ib NCIT:C48706,NCIT:C48714,NCIT:C48724 N1 Stage Finding,N3 Stage Finding,T2 Stage Finding NA NA P56Y adenocarcinoma [lung] pgx:icdom-81403 Adenocarcinoma, NOS pgx:icdot-C34.9 Lung, NOS PMID:19607727 NA NA NA geo:GSM417055 geo:GSE16597 geo:GPL8690 pgx:cohort-arraymap,pgx:cohort-2021progenetix,pgx:cohort-carriocordo2021heterogeneity NA New York City United States of America USA 40.71 -74.01 2020-09-10 17:46:45.105000