From 9ad4662f3331de4493c09c93a93d52c0e152d1f9 Mon Sep 17 00:00:00 2001 From: Selina Wu Date: Tue, 17 Sep 2024 18:04:33 -0700 Subject: [PATCH 01/12] add functions and plotting script for density plots and clustered SNVs histogram --- R/calculate.density.each.clone.R | 16 ++++ R/plot.clone.densityplot.R | 102 ++++++++++++++++++++++++++ R/plot.clones.densityplot.histogram.R | 49 +++++++++++++ R/plot.snv.histogram.R | 79 ++++++++++++++++++++ 4 files changed, 246 insertions(+) create mode 100644 R/calculate.density.each.clone.R create mode 100644 R/plot.clone.densityplot.R create mode 100644 R/plot.clones.densityplot.histogram.R create mode 100644 R/plot.snv.histogram.R diff --git a/R/calculate.density.each.clone.R b/R/calculate.density.each.clone.R new file mode 100644 index 0000000..17b94a4 --- /dev/null +++ b/R/calculate.density.each.clone.R @@ -0,0 +1,16 @@ +calculate.density.each.clone <- function(cluster.df, cloneID) { + # Skip clusters with only one SNV + if (nrow(cluster.df) <= 1) { + return(NULL); + } + density <- density( + x = cluster.df$subclonal.fraction, + bw = 'nrd', + adjust = 1.2, + na.rm = TRUE + ); + density.df <- as.data.frame(density[c('x','y')]); + density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y; + density.df$clone.id <- cloneID; + return(density.df) + } diff --git a/R/plot.clone.densityplot.R b/R/plot.clone.densityplot.R new file mode 100644 index 0000000..add7c75 --- /dev/null +++ b/R/plot.clone.densityplot.R @@ -0,0 +1,102 @@ +plot.clone.densityplot <- function( + sample.df, + sampleID, + src.tool, + output.dir + ) { + # Create table of densities for plotting each cluster + density.list <- list(); + for (clone in unique(sample.df$cluster.label)) { + clone.df <- calculate.density.each.clone( + cluster.df = sample.df[sample.df$cluster.label == clone, ], + cloneID = clone + ); + density.list[[clone]] <- clone.df; + } + density.df <- do.call(rbind, density.list); + + # Calculate average CCF per cluster + cluster.meanCCFs <- unique(sample.df$location); + + # Get plot legend + clone.IDs <- unique(density.df$clone.id); + colors.qual <- BoutrosLab.plotting.general::default.colours(12); + cluster.colours <- setNames(colors.qual[1:length(clone.IDs)], clone.IDs); + cluster.legend <- BoutrosLab.plotting.general::legend.grob( + list( + legend = list( + title = 'Clone', + labels = names(cluster.colours), + colours = c(cluster.colours), + border = 'black' + ) + ), + size = 1, + title.cex = 1, + label.cex = 1 + ); + + # Set y-axis max limit and y-increments + ymax <- ceiling(max(density.df$count) / 5) * 5; + yseq <- if (ymax > 1000) { + 100; + } else if (ymax > 100) { + 20; + } else if (ymax > 20) { + 10; + } else { + 5; + } + + save.plt <- file.path( + output.dir, + paste(sampleID, src.tool, 'clone_densityplot.png', sep = '_') + ); + return( + BoutrosLab.plotting.general::create.scatterplot( + filename = save.plt, + formula = count ~ x, + data = density.df, + groups = density.df$clone.id, + xlab.label = 'CCF', + ylab.label = 'SNV Count', + xlab.top.label = paste('Total SNVs Clustered:', nrow(sample.df)), + xlab.top.cex = 1.5, + xlab.top.x = 0.5, + xlab.top.y = 1, + xlab.cex = 1.5, + ylab.cex = 1.5, + xaxis.cex = 1.2, + yaxis.cex = 1.2, + xlimits = c(0, 1.5), + ylimits = c(0, ymax), + xat = seq(0, 1.5, 0.25), + yat = seq(0, ymax, yseq), + xaxis.tck = c(1, 0), + yaxis.tck = c(1, 0), + col = cluster.colours, + type = 'l', + lwd = 3, + abline.v = cluster.meanCCFs, + abline.lwd = 0.5, + abline.lty = 'longdash', + # add.text = TRUE, + text.labels = lapply(cluster.meanCCFs, round, 2), + text.x = cluster.meanCCFs, + text.y = ymax - 1, + text.fontface = 'plain', + text.cex = 1, + legend = list( + right = list( + fun = cluster.legend + ) + ), + top.padding = 2, + right.padding = 1, + left.padding = 1, + height = 6, + width = 6.5, + resolution = 800 + ) + ); + } diff --git a/R/plot.clones.densityplot.histogram.R b/R/plot.clones.densityplot.histogram.R new file mode 100644 index 0000000..df25f68 --- /dev/null +++ b/R/plot.clones.densityplot.histogram.R @@ -0,0 +1,49 @@ +# Description: Plot density curves of clones from SRC tool results and histograms of SNVs per clone + +### PREAMBLE ####################################################################################### +library(BoutrosLab.utilities); +library(BoutrosLab.plotting.general); + +# Source functions +dir.functions <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/functions'; +files.functions <- list.files(dir.functions, full.names = TRUE); +lapply(files.functions, source); + +dir.dpclust.parsed <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/call-SRC/DPClust_results_parsed'; +dir.plot.output <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/plot'; + + +### LOAD DATA ###################################################################################### +# Read in clustered SNVs data +files.dpclust <- list.files(dir.dpclust.parsed, pattern = 'DPClust_data.tsv', full.names = TRUE); +names(files.dpclust) <- regmatches( + files.dpclust, + regexpr('STGHKGFH\\d{6}-\\S{4}-\\S{3}-\\w|TCGA-\\S{2}-\\S{4}-\\S{3}', files.dpclust) + ); +list.dpclust.dfs <- lapply(files.dpclust, read.table, sep = '\t', header = TRUE); + + +### PLOTTING ####################################################################################### +# Plot histograms +for (i in 1:length(list.dpclust.dfs)) { + dpclust.df <- list.dpclust.dfs[[i]]; + sampleID <- names(list.dpclust.dfs)[i]; + plot.snv.histogram( + dpclust.df, + sampleID, + 'DPClust', + dir.plot.output + ); + } + +# Plot density plots +for (i in 1:length(list.dpclust.dfs)) { + dpclust.df <- list.dpclust.dfs[[i]]; + sampleID <- names(list.dpclust.dfs)[i]; + plot.clone.densityplot( + dpclust.df, + sampleID, + 'DPClust', + dir.plot.output + ); + } diff --git a/R/plot.snv.histogram.R b/R/plot.snv.histogram.R new file mode 100644 index 0000000..3fb3dae --- /dev/null +++ b/R/plot.snv.histogram.R @@ -0,0 +1,79 @@ +# How to automate n.breaks? + # Use 100 if > 1000 SNVs, 50 if > 500 SNVs, 25 if > 100 SNVs, 10 if > 50 SNVs? +# How to automate ymax? + +plot.snv.histogram <- function(dpclust.df, sampleID, src.tool, output.dir) { + num.snv <- nrow(dpclust.df); + + # Automate number of breaks + # # Freedman-Diaconis Rule method + # iqr <- IQR(dpclust.df$subclonal.fraction); + # binwidth <- 2 * iqr / (num.snv^(1 / 3)); + # n.breaks <- ceiling((max(dpclust.df$subclonal.fraction) - min(dpclust.df$subclonal.fraction)) / binwidth); + + # Piecewise method: + n.breaks <- if (num.snv > 1000) { + 100; + } else if (num.snv > 500) { + 50; + } else if (num.snv > 100) { + 25; + } else if (num.snv > 40) { + 20; + } else { + 10; + } + + # Set y-axis max limit + hist.data <- hist(dpclust.df$subclonal.fraction, breaks = n.breaks, plot = FALSE); + ymax <- ceiling(max(hist.data$counts) / 10) * 10; + # Set y-axis increments + yseq <- if (ymax > 1000) { + 100; + } else if (ymax > 200) { + 50; + } else if (ymax > 40) { + 20; + } else { + 10; + } + + save.plt <- file.path( + output.dir, + paste(sampleID, src.tool, 'clustered_SNVs_histogram.png', sep = '_') + ); + + return( + BoutrosLab.plotting.general::create.histogram( + filename = save.plt, + x = dpclust.df$subclonal.fraction, + xlab.label = 'CCF', + ylab.label = 'SNV Count', + xlab.top.label = paste('Total SNVs Clustered:', num.snv), + xlab.top.cex = 1.5, + xlab.top.x = 0.5, + xlab.top.y = 1, + xlab.cex = 1.5, + ylab.cex = 1.5, + xaxis.cex = 1.2, + yaxis.cex = 1.2, + xlimits = c(0, 1.5), + ylimits = c(0, ymax), + xat = seq(0, 1.5, 0.25), + yat = seq(0, ymax, yseq), + xaxis.tck = c(1, 0), + yaxis.tck = c(1, 0), + breaks = n.breaks, + type = 'count', + col = 'gray80', + border.col = 'black', + lwd = 0.1, + top.padding = 1, + right.padding = 1, + left.padding = 1, + height = 6, + width = 6, + resolution = 800 + ) + ); + } From c655197e5db3172944c9e15cb42a78737bef8383 Mon Sep 17 00:00:00 2001 From: Selina Wu Date: Fri, 18 Oct 2024 09:22:26 -0700 Subject: [PATCH 02/12] update column names to reflect more standardized names --- R/calculate.density.each.clone.R | 3 ++- R/plot.clone.densityplot.R | 13 +++++++------ R/plot.snv.histogram.R | 5 +++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/R/calculate.density.each.clone.R b/R/calculate.density.each.clone.R index 17b94a4..dbb1d04 100644 --- a/R/calculate.density.each.clone.R +++ b/R/calculate.density.each.clone.R @@ -1,10 +1,11 @@ +## Function: calculate.density.each.clone --------------------------------------------------------- calculate.density.each.clone <- function(cluster.df, cloneID) { # Skip clusters with only one SNV if (nrow(cluster.df) <= 1) { return(NULL); } density <- density( - x = cluster.df$subclonal.fraction, + x = cluster.df$CCF, bw = 'nrd', adjust = 1.2, na.rm = TRUE diff --git a/R/plot.clone.densityplot.R b/R/plot.clone.densityplot.R index add7c75..3b1cfb2 100644 --- a/R/plot.clone.densityplot.R +++ b/R/plot.clone.densityplot.R @@ -1,14 +1,15 @@ +## Function: plot.clone.densityplot ---------------------------------------------------------------- plot.clone.densityplot <- function( - sample.df, + dpclust.df, sampleID, src.tool, output.dir ) { # Create table of densities for plotting each cluster density.list <- list(); - for (clone in unique(sample.df$cluster.label)) { + for (clone in unique(dpclust.df$clone.id)) { clone.df <- calculate.density.each.clone( - cluster.df = sample.df[sample.df$cluster.label == clone, ], + cluster.df = dpclust.df[dpclust.df$clone.id == clone, ], cloneID = clone ); density.list[[clone]] <- clone.df; @@ -16,7 +17,7 @@ plot.clone.densityplot <- function( density.df <- do.call(rbind, density.list); # Calculate average CCF per cluster - cluster.meanCCFs <- unique(sample.df$location); + cluster.meanCCFs <- unique(dpclust.df$location); # Get plot legend clone.IDs <- unique(density.df$clone.id); @@ -26,7 +27,7 @@ plot.clone.densityplot <- function( list( legend = list( title = 'Clone', - labels = names(cluster.colours), + labels = as.character(names(cluster.colours)), colours = c(cluster.colours), border = 'black' ) @@ -60,7 +61,7 @@ plot.clone.densityplot <- function( groups = density.df$clone.id, xlab.label = 'CCF', ylab.label = 'SNV Count', - xlab.top.label = paste('Total SNVs Clustered:', nrow(sample.df)), + xlab.top.label = paste('Total SNVs Clustered:', nrow(dpclust.df)), xlab.top.cex = 1.5, xlab.top.x = 0.5, xlab.top.y = 1, diff --git a/R/plot.snv.histogram.R b/R/plot.snv.histogram.R index 3fb3dae..50f53ea 100644 --- a/R/plot.snv.histogram.R +++ b/R/plot.snv.histogram.R @@ -1,3 +1,4 @@ +## Function: plot.snv.histogram -------------------------------------------------------------------- # How to automate n.breaks? # Use 100 if > 1000 SNVs, 50 if > 500 SNVs, 25 if > 100 SNVs, 10 if > 50 SNVs? # How to automate ymax? @@ -25,7 +26,7 @@ plot.snv.histogram <- function(dpclust.df, sampleID, src.tool, output.dir) { } # Set y-axis max limit - hist.data <- hist(dpclust.df$subclonal.fraction, breaks = n.breaks, plot = FALSE); + hist.data <- hist(dpclust.df$CCF, breaks = n.breaks, plot = FALSE); ymax <- ceiling(max(hist.data$counts) / 10) * 10; # Set y-axis increments yseq <- if (ymax > 1000) { @@ -46,7 +47,7 @@ plot.snv.histogram <- function(dpclust.df, sampleID, src.tool, output.dir) { return( BoutrosLab.plotting.general::create.histogram( filename = save.plt, - x = dpclust.df$subclonal.fraction, + x = dpclust.df$CCF, xlab.label = 'CCF', ylab.label = 'SNV Count', xlab.top.label = paste('Total SNVs Clustered:', num.snv), From acc41d662c92002e9b622a2aa9f19e032a2d9e5a Mon Sep 17 00:00:00 2001 From: whelena Date: Fri, 22 Nov 2024 15:48:54 -0800 Subject: [PATCH 03/12] generalize density calculation function --- R/create.clone.genome.distribution.densityplot.R | 15 ++++++++++----- R/create.clone.genome.distribution.plot.R | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/R/create.clone.genome.distribution.densityplot.R b/R/create.clone.genome.distribution.densityplot.R index e45b7d3..d6d673c 100644 --- a/R/create.clone.genome.distribution.densityplot.R +++ b/R/create.clone.genome.distribution.densityplot.R @@ -25,11 +25,16 @@ create.clone.genome.distribution.densityplot <- function( )); } -calculate.density.and.scale <- function(cluster.df) { - density <- density(x = cluster.df$genome.pos, bw = 'nrd', adjust = 0.05, na.rm = TRUE); - density.df <- as.data.frame(density[c('x','y')]); - density.df$clone.id <- unique(cluster.df$clone.id); - density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y; +calculate.density.and.scale <- function( + x, + value = 'genome.pos', + group = 'clone.id' + ) { + + density <- density(x = x[[value]], bw = 'nrd', adjust = 0.05, na.rm = TRUE); + density.df <- as.data.frame(density[c('x', 'y')]); + density.df$clone.id <- unique(x[[group]]); + density.df$count <- nrow(x) / sum(density.df$y) * density.df$y; return(density.df) } diff --git a/R/create.clone.genome.distribution.plot.R b/R/create.clone.genome.distribution.plot.R index 2360d86..14479b7 100644 --- a/R/create.clone.genome.distribution.plot.R +++ b/R/create.clone.genome.distribution.plot.R @@ -95,7 +95,7 @@ create.clone.genome.distribution.plot.per.sample <- function( next; } density.list[[k]] <- calculate.density.and.scale( - cluster.df = sample.df[sample.df$clone.id == k, ] + x = sample.df[sample.df$clone.id == k, ] ); } density.df <- do.call(rbind, density.list); From f8d6f03e9be901c5ea36739123472d76abdc0a50 Mon Sep 17 00:00:00 2001 From: whelena Date: Sat, 23 Nov 2024 17:51:49 -0800 Subject: [PATCH 04/12] generalize density calculation --- R/calculate.density.R | 19 ++++ R/calculate.density.each.clone.R | 17 --- ...te.clone.genome.distribution.densityplot.R | 16 +-- R/create.clone.genome.distribution.plot.R | 5 +- R/plot.clone.densityplot.R | 103 ------------------ R/plot.clones.densityplot.histogram.R | 49 --------- R/plot.snv.histogram.R | 80 -------------- 7 files changed, 23 insertions(+), 266 deletions(-) create mode 100644 R/calculate.density.R delete mode 100644 R/calculate.density.each.clone.R delete mode 100644 R/plot.clone.densityplot.R delete mode 100644 R/plot.clones.densityplot.histogram.R delete mode 100644 R/plot.snv.histogram.R diff --git a/R/calculate.density.R b/R/calculate.density.R new file mode 100644 index 0000000..185393c --- /dev/null +++ b/R/calculate.density.R @@ -0,0 +1,19 @@ +calculate.density <- function( + x, + value = 'genome.pos', + group = 'clone.id', + scale = TRUE, + ... + ) { + + if (nrow(x) <= 1) { + return(NULL); + } + density <- density(x = x[[value]], bw = 'nrd', na.rm = TRUE, ...); + density.df <- as.data.frame(density[c('x', 'y')]); + density.df$clone.id <- unique(x[[group]]); + if (scale) { + density.df$y <- nrow(x) / sum(density.df$y) * density.df$y; + } + return(density.df) + } \ No newline at end of file diff --git a/R/calculate.density.each.clone.R b/R/calculate.density.each.clone.R deleted file mode 100644 index dbb1d04..0000000 --- a/R/calculate.density.each.clone.R +++ /dev/null @@ -1,17 +0,0 @@ -## Function: calculate.density.each.clone --------------------------------------------------------- -calculate.density.each.clone <- function(cluster.df, cloneID) { - # Skip clusters with only one SNV - if (nrow(cluster.df) <= 1) { - return(NULL); - } - density <- density( - x = cluster.df$CCF, - bw = 'nrd', - adjust = 1.2, - na.rm = TRUE - ); - density.df <- as.data.frame(density[c('x','y')]); - density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y; - density.df$clone.id <- cloneID; - return(density.df) - } diff --git a/R/create.clone.genome.distribution.densityplot.R b/R/create.clone.genome.distribution.densityplot.R index c301fdd..3bf4c3f 100644 --- a/R/create.clone.genome.distribution.densityplot.R +++ b/R/create.clone.genome.distribution.densityplot.R @@ -8,7 +8,7 @@ create.clone.genome.distribution.densityplot <- function( return(BoutrosLab.plotting.general::create.scatterplot( filename = save.plt, - formula = count ~ x, + formula = y ~ x, data = density.df, groups = density.df$clone.id, xlab.label = 'Chromosome', @@ -24,17 +24,3 @@ create.clone.genome.distribution.densityplot <- function( ... )); } - -calculate.density.and.scale <- function( - x, - value = 'genome.pos', - group = 'clone.id' - ) { - - density <- density(x = x[[value]], bw = 'nrd', adjust = 0.05, na.rm = TRUE); - density.df <- as.data.frame(density[c('x', 'y')]); - density.df$clone.id <- unique(x[[group]]); - density.df$count <- nrow(x) / sum(density.df$y) * density.df$y; - - return(density.df) - } diff --git a/R/create.clone.genome.distribution.plot.R b/R/create.clone.genome.distribution.plot.R index e12596a..daa19f4 100644 --- a/R/create.clone.genome.distribution.plot.R +++ b/R/create.clone.genome.distribution.plot.R @@ -104,8 +104,9 @@ create.clone.genome.distribution.plot.per.sample <- function( warning(paste('Skipping clone', k, 'in sample', unique(sample.df$ID), 'since there is only one SNV')); next; } - density.list[[k]] <- calculate.density.and.scale( - x = sample.df[sample.df$clone.id == k, ] + density.list[[k]] <- calculate.density( + x = sample.df[sample.df$clone.id == k, ], + adjust = 0.05 ); } density.df <- do.call(rbind, density.list); diff --git a/R/plot.clone.densityplot.R b/R/plot.clone.densityplot.R deleted file mode 100644 index 3b1cfb2..0000000 --- a/R/plot.clone.densityplot.R +++ /dev/null @@ -1,103 +0,0 @@ -## Function: plot.clone.densityplot ---------------------------------------------------------------- -plot.clone.densityplot <- function( - dpclust.df, - sampleID, - src.tool, - output.dir - ) { - # Create table of densities for plotting each cluster - density.list <- list(); - for (clone in unique(dpclust.df$clone.id)) { - clone.df <- calculate.density.each.clone( - cluster.df = dpclust.df[dpclust.df$clone.id == clone, ], - cloneID = clone - ); - density.list[[clone]] <- clone.df; - } - density.df <- do.call(rbind, density.list); - - # Calculate average CCF per cluster - cluster.meanCCFs <- unique(dpclust.df$location); - - # Get plot legend - clone.IDs <- unique(density.df$clone.id); - colors.qual <- BoutrosLab.plotting.general::default.colours(12); - cluster.colours <- setNames(colors.qual[1:length(clone.IDs)], clone.IDs); - cluster.legend <- BoutrosLab.plotting.general::legend.grob( - list( - legend = list( - title = 'Clone', - labels = as.character(names(cluster.colours)), - colours = c(cluster.colours), - border = 'black' - ) - ), - size = 1, - title.cex = 1, - label.cex = 1 - ); - - # Set y-axis max limit and y-increments - ymax <- ceiling(max(density.df$count) / 5) * 5; - yseq <- if (ymax > 1000) { - 100; - } else if (ymax > 100) { - 20; - } else if (ymax > 20) { - 10; - } else { - 5; - } - - save.plt <- file.path( - output.dir, - paste(sampleID, src.tool, 'clone_densityplot.png', sep = '_') - ); - return( - BoutrosLab.plotting.general::create.scatterplot( - filename = save.plt, - formula = count ~ x, - data = density.df, - groups = density.df$clone.id, - xlab.label = 'CCF', - ylab.label = 'SNV Count', - xlab.top.label = paste('Total SNVs Clustered:', nrow(dpclust.df)), - xlab.top.cex = 1.5, - xlab.top.x = 0.5, - xlab.top.y = 1, - xlab.cex = 1.5, - ylab.cex = 1.5, - xaxis.cex = 1.2, - yaxis.cex = 1.2, - xlimits = c(0, 1.5), - ylimits = c(0, ymax), - xat = seq(0, 1.5, 0.25), - yat = seq(0, ymax, yseq), - xaxis.tck = c(1, 0), - yaxis.tck = c(1, 0), - col = cluster.colours, - type = 'l', - lwd = 3, - abline.v = cluster.meanCCFs, - abline.lwd = 0.5, - abline.lty = 'longdash', - # add.text = TRUE, - text.labels = lapply(cluster.meanCCFs, round, 2), - text.x = cluster.meanCCFs, - text.y = ymax - 1, - text.fontface = 'plain', - text.cex = 1, - legend = list( - right = list( - fun = cluster.legend - ) - ), - top.padding = 2, - right.padding = 1, - left.padding = 1, - height = 6, - width = 6.5, - resolution = 800 - ) - ); - } diff --git a/R/plot.clones.densityplot.histogram.R b/R/plot.clones.densityplot.histogram.R deleted file mode 100644 index df25f68..0000000 --- a/R/plot.clones.densityplot.histogram.R +++ /dev/null @@ -1,49 +0,0 @@ -# Description: Plot density curves of clones from SRC tool results and histograms of SNVs per clone - -### PREAMBLE ####################################################################################### -library(BoutrosLab.utilities); -library(BoutrosLab.plotting.general); - -# Source functions -dir.functions <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/functions'; -files.functions <- list.files(dir.functions, full.names = TRUE); -lapply(files.functions, source); - -dir.dpclust.parsed <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/call-SRC/DPClust_results_parsed'; -dir.plot.output <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/plot'; - - -### LOAD DATA ###################################################################################### -# Read in clustered SNVs data -files.dpclust <- list.files(dir.dpclust.parsed, pattern = 'DPClust_data.tsv', full.names = TRUE); -names(files.dpclust) <- regmatches( - files.dpclust, - regexpr('STGHKGFH\\d{6}-\\S{4}-\\S{3}-\\w|TCGA-\\S{2}-\\S{4}-\\S{3}', files.dpclust) - ); -list.dpclust.dfs <- lapply(files.dpclust, read.table, sep = '\t', header = TRUE); - - -### PLOTTING ####################################################################################### -# Plot histograms -for (i in 1:length(list.dpclust.dfs)) { - dpclust.df <- list.dpclust.dfs[[i]]; - sampleID <- names(list.dpclust.dfs)[i]; - plot.snv.histogram( - dpclust.df, - sampleID, - 'DPClust', - dir.plot.output - ); - } - -# Plot density plots -for (i in 1:length(list.dpclust.dfs)) { - dpclust.df <- list.dpclust.dfs[[i]]; - sampleID <- names(list.dpclust.dfs)[i]; - plot.clone.densityplot( - dpclust.df, - sampleID, - 'DPClust', - dir.plot.output - ); - } diff --git a/R/plot.snv.histogram.R b/R/plot.snv.histogram.R deleted file mode 100644 index 50f53ea..0000000 --- a/R/plot.snv.histogram.R +++ /dev/null @@ -1,80 +0,0 @@ -## Function: plot.snv.histogram -------------------------------------------------------------------- -# How to automate n.breaks? - # Use 100 if > 1000 SNVs, 50 if > 500 SNVs, 25 if > 100 SNVs, 10 if > 50 SNVs? -# How to automate ymax? - -plot.snv.histogram <- function(dpclust.df, sampleID, src.tool, output.dir) { - num.snv <- nrow(dpclust.df); - - # Automate number of breaks - # # Freedman-Diaconis Rule method - # iqr <- IQR(dpclust.df$subclonal.fraction); - # binwidth <- 2 * iqr / (num.snv^(1 / 3)); - # n.breaks <- ceiling((max(dpclust.df$subclonal.fraction) - min(dpclust.df$subclonal.fraction)) / binwidth); - - # Piecewise method: - n.breaks <- if (num.snv > 1000) { - 100; - } else if (num.snv > 500) { - 50; - } else if (num.snv > 100) { - 25; - } else if (num.snv > 40) { - 20; - } else { - 10; - } - - # Set y-axis max limit - hist.data <- hist(dpclust.df$CCF, breaks = n.breaks, plot = FALSE); - ymax <- ceiling(max(hist.data$counts) / 10) * 10; - # Set y-axis increments - yseq <- if (ymax > 1000) { - 100; - } else if (ymax > 200) { - 50; - } else if (ymax > 40) { - 20; - } else { - 10; - } - - save.plt <- file.path( - output.dir, - paste(sampleID, src.tool, 'clustered_SNVs_histogram.png', sep = '_') - ); - - return( - BoutrosLab.plotting.general::create.histogram( - filename = save.plt, - x = dpclust.df$CCF, - xlab.label = 'CCF', - ylab.label = 'SNV Count', - xlab.top.label = paste('Total SNVs Clustered:', num.snv), - xlab.top.cex = 1.5, - xlab.top.x = 0.5, - xlab.top.y = 1, - xlab.cex = 1.5, - ylab.cex = 1.5, - xaxis.cex = 1.2, - yaxis.cex = 1.2, - xlimits = c(0, 1.5), - ylimits = c(0, ymax), - xat = seq(0, 1.5, 0.25), - yat = seq(0, ymax, yseq), - xaxis.tck = c(1, 0), - yaxis.tck = c(1, 0), - breaks = n.breaks, - type = 'count', - col = 'gray80', - border.col = 'black', - lwd = 0.1, - top.padding = 1, - right.padding = 1, - left.padding = 1, - height = 6, - width = 6, - resolution = 800 - ) - ); - } From 144ddb7733b66b869faf23095e7e724d5b159d3a Mon Sep 17 00:00:00 2001 From: whelena Date: Sat, 23 Nov 2024 17:52:06 -0800 Subject: [PATCH 05/12] add function to create ccf densityplot --- R/create.ccf.densityplot.R | 115 +++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 R/create.ccf.densityplot.R diff --git a/R/create.ccf.densityplot.R b/R/create.ccf.densityplot.R new file mode 100644 index 0000000..c98c404 --- /dev/null +++ b/R/create.ccf.densityplot.R @@ -0,0 +1,115 @@ +create.ccf.densityplot <- function( + x, + filename = NULL, + clone.colours = NULL, + breaks = 100, + xlab.label = 'CCF', + ylab.label = 'SNV Density', + xlimits = c(0, 1.5), + xat = seq(0, 1.5, 0.25), + legend.size = 3, + legend.title.cex = 1.2, + legend.label.cex = 1, + legend.x = 0.8, + legend.y = 0.9, + height = 6, + width = 10, + size.units = 'in', + resolution = 1000, + ... + ) { + + if (is.null(clone.colours)) { + clone.colours <- get.colours(x$clone.id, return.names = TRUE); + } + + # calculate mean CCF and nsnv per cluster ----------------------------------------------------- + mean.ccf <- aggregate(CCF ~ clone.id, data = x, FUN = mean); + nsnv <- aggregate(SNV.id ~ clone.id, data = x, FUN = length); + + # calculate densities for each cluster -------------------------------------------------------- + density.list <- list(); + for (k in unique(x$clone.id)) { + density.list[[k]] <- calculate.density( + x = x[x$clone.id == k, ], + value = 'CCF', + adjust = 1, + scale = FALSE + ); + } + density.df <- do.call(rbind, density.list); + density.df$y <- density.df$y * (nsnv$SNV.id[match(density.df$clone.id, nsnv$clone.id)] / nrow(x)); + + + # get plot legend ----------------------------------------------------------------------------- + legend.label <- sapply(names(clone.colours), function(k) { + nsnv <- nsnv[nsnv$clone.id == k, ]$SNV.id; + return(paste0(k, ' (', nsnv, ')')); + }); + clone.legend <- BoutrosLab.plotting.general::legend.grob( + list( + legend = list( + title = 'Clone (SNVs)', + labels = legend.label[names(clone.colours)], + colours = c(clone.colours), + border = 'black' + ) + ), + size = legend.size, + title.just = 'left', + title.cex = legend.title.cex, + label.cex = legend.label.cex + ); + + ymax <- ceiling(max(density.df$y, na.rm = TRUE)); + + hist <- BoutrosLab.plotting.general::create.histogram( + x = x$CCF, + type = 'density', + col = 'gray90', + border.col = 'gray30', + lwd = 0.1, + xlab.label = xlab.label, + ylab.label = ylab.label, + xlimits = xlimits, + xat = xat, + ylimits = c(-0.05, 1.05) * ymax, + legend = list(inside = list( + fun = clone.legend, + x = legend.x, + y = legend.y + )), + ... + ); + + scatter <- BoutrosLab.plotting.general::create.scatterplot( + formula = y ~ x, + data = density.df, + groups = density.df$clone.id, + type = 'l', + lwd = 3, + col = clone.colours, + xlimits = xlimits, + ylimits = c(-0.05, 1.05) * ymax, + abline.v = mean.ccf$CCF, + abline.lwd = 0.5, + abline.lty = 'longdash', + abline.col = 'gray50', + add.text = TRUE, + text.labels = lapply(mean.ccf$CCF, round, 2), + text.x = mean.ccf$CCF, + text.y = ymax, + text.fontface = 'bold', + text.cex = legend.title.cex + ); + + combn.plt <- hist + scatter; + return(BoutrosLab.plotting.general::write.plot( + trellis.object = combn.plt, + filename = filename, + height = height, + width = width, + size.units = size.units, + resolution = resolution + )); + } \ No newline at end of file From ee0d3c656f258fca5f55923633e7d4e4f86dc3af Mon Sep 17 00:00:00 2001 From: whelena Date: Sat, 23 Nov 2024 17:59:00 -0800 Subject: [PATCH 06/12] add documentation --- man/create.ccf.densityplot.Rd | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 man/create.ccf.densityplot.Rd diff --git a/man/create.ccf.densityplot.Rd b/man/create.ccf.densityplot.Rd new file mode 100644 index 0000000..499dc7d --- /dev/null +++ b/man/create.ccf.densityplot.Rd @@ -0,0 +1,51 @@ +\name{create.ccf.densityplot} +\alias{create.ccf.densityplot} +\title{CCF Density Plot} +\description{ +Creates a density plot of cancer cell fraction (CCF) distribution across a sample. +} +\usage{ +create.ccf.densityplot( + x, + filename = NULL, + clone.colours = NULL, + breaks = 100, + xlab.label = 'CCF', + ylab.label = 'SNV Density', + xlimits = c(0, 1.5), + xat = seq(0, 1.5, 0.25), + legend.size = 3, + legend.title.cex = 1.2, + legend.label.cex = 1, + legend.x = 0.8, + legend.y = 0.9, + height = 6, + width = 10, + size.units = 'in', + resolution = 1000, + ... + ); +} +\arguments{ + \item{x}{A data-frame with the following column names: 'SNV.id', 'clone.id', 'CCF'.} + \item{filename}{Filename for tiff output, or if NULL returns the trellis object itself. Defaults to \code{NULL}.} + \item{clone.colours}{Named list to provide a colour scheme for the clone ID covariate bar. If NULL, colours will be randomly generated. Defaults to \code{NULL}.} + \item{breaks}{Number of breaks for the histogram. Defaults to 100.} + \item{xlab.label}{Defaults to \dQuote{CCF}.} + \item{ylab.label}{Defaults to \dQuote{SNV Density}.} + \item{x.limits}{Limits for the x-axis. Defaults to \code{c(0, 1.5)}.} + \item{xat}{Positions for the x-axis labels. Defaults to \code{seq(0, 1.5, 0.25)}.} + \item{legend.size}{Width of the legend boxes in 'character' units. Defaults to 3} + \item{legend.title.cex}{Size of titles in the legends. Defaults to 1.2} + \item{legend.label.cex}{Size of text labels in the legends. Defaults to 1} + \item{legend.x}{x position of the legend. Defaults to 0.8} + \item{legend.y}{y position of the legend. Defaults to 0.9} + \item{height}{Height of the plot. Defaults to 6} + \item{width}{Width of the plot. Defaults to 10} + \item{size.units}{Units for the height and width. Defaults to \dQuote{in}.} + \item{resolution}{Resolution of the plot. Defaults to 1000} + \item{...}{Pass through argument. See BoutrosLab.plotting.general::create.histogram() for further details.} +} +\value{A `grob` object of the heatmap.} +\author{Helena Winata} +\seealso{\code{\link[BoutrosLab.plotting.general]{create.histogram}}, \code{\link[BoutrosLab.plotting.general]{create.scatterplot}}} From 682fe511938b7ec88bd29ec13b67808f0b5a9db4 Mon Sep 17 00:00:00 2001 From: whelena Date: Sat, 23 Nov 2024 17:59:11 -0800 Subject: [PATCH 07/12] update NEWS.md and NAMESPACE --- NAMESPACE | 1 + NEWS.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index c5603df..2b0526e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,3 +15,4 @@ export(create.ccf.heatmap) export(create.cluster.heatmap) export(create.ccf.summary.heatmap) export(create.clone.genome.distribution.plot) +export(create.ccf.densityplot) \ No newline at end of file diff --git a/NEWS.md b/NEWS.md index d2bb81e..d567b39 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,7 +11,8 @@ * Add parameters to specify polygon shape and width. * Add option to use scale bars instead of y-axes. * Wrapper function for `SRCgrob` to automatically save plots to file -* * Add option to annotate the CCF summary heatmap with the cell values. +* Add option to annotate the CCF summary heatmap with the cell values. +* Function to generate single-sample density plot ## Update * Fixed angle calculation bug where child angles do not follow From 687562950f0e9b2e12dfee0e7afa8f41629d9a8b Mon Sep 17 00:00:00 2001 From: whelena Date: Mon, 2 Dec 2024 13:27:51 -0800 Subject: [PATCH 08/12] add option to set genome.dist legend outside of plot --- R/create.clone.genome.distribution.plot.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/R/create.clone.genome.distribution.plot.R b/R/create.clone.genome.distribution.plot.R index daa19f4..0efd0ca 100644 --- a/R/create.clone.genome.distribution.plot.R +++ b/R/create.clone.genome.distribution.plot.R @@ -169,6 +169,15 @@ create.clone.genome.distribution.plot.per.sample <- function( height.scatter <- 0.5 * length(unique(sample.df$clone.id)); total.height <- height.scatter + 5; + if (legend.x > 1) { + cluster.legend <- list(right = list(fun = cluster.legend)); + } else { + cluster.legend <- list(inside = list( + fun = cluster.legend, + x = legend.x, + y = legend.y + )); + } return(BoutrosLab.plotting.general::create.multipanelplot( filename = save.plt, plot.objects = list( @@ -178,11 +187,7 @@ create.clone.genome.distribution.plot.per.sample <- function( layout.width = 1, layout.height = 2, plot.objects.heights = c(height.scatter, 5) / total.height, - legend = list(inside = list( - fun = cluster.legend, - x = legend.x, - y = legend.y - )), + legend = cluster.legend, height = total.height, width = width, ... From 51585dfc1e2640512f5e716018a63d02ffb2b790 Mon Sep 17 00:00:00 2001 From: whelena Date: Thu, 20 Feb 2025 14:52:33 -0800 Subject: [PATCH 09/12] minor changes --- R/create.cluster.heatmap.R | 3 ++- R/create.phylogenetic.tree.R | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/R/create.cluster.heatmap.R b/R/create.cluster.heatmap.R index 131b81d..7e83564 100644 --- a/R/create.cluster.heatmap.R +++ b/R/create.cluster.heatmap.R @@ -15,6 +15,7 @@ create.cluster.heatmap <- function( xaxis.fontface = 'bold', y.spacing = 1, colour.scheme = c('white', 'blue'), + plot.objects.heights = c(1, 0.2), ... ) { @@ -94,7 +95,7 @@ create.cluster.heatmap <- function( plot.objects = list(hm, cov), layout.width = 1, layout.height = 2, - plot.objects.heights = c(1, 0.2), + plot.objects.heights = plot.objects.heights, legend = list(right = list( fun = legend.clone )), diff --git a/R/create.phylogenetic.tree.R b/R/create.phylogenetic.tree.R index 3606d25..e4b6154 100644 --- a/R/create.phylogenetic.tree.R +++ b/R/create.phylogenetic.tree.R @@ -12,6 +12,9 @@ create.phylogenetic.tree <- function( tree <- as.data.frame(tree); if ('node.id' %in% colnames(tree)) { rownames(tree) <- tree$node.id; + if (!'label' %in% colnames(tree)) { + tree$label <- tree$node.id; + } } plt <- SRCGrob( From 1fa392789cc091648669dd83c21710677d9b4d37 Mon Sep 17 00:00:00 2001 From: whelena Date: Thu, 20 Feb 2025 15:53:59 -0800 Subject: [PATCH 10/12] fix warnings --- man/create.ccf.densityplot.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/create.ccf.densityplot.Rd b/man/create.ccf.densityplot.Rd index 499dc7d..30ff75d 100644 --- a/man/create.ccf.densityplot.Rd +++ b/man/create.ccf.densityplot.Rd @@ -33,7 +33,7 @@ create.ccf.densityplot( \item{breaks}{Number of breaks for the histogram. Defaults to 100.} \item{xlab.label}{Defaults to \dQuote{CCF}.} \item{ylab.label}{Defaults to \dQuote{SNV Density}.} - \item{x.limits}{Limits for the x-axis. Defaults to \code{c(0, 1.5)}.} + \item{xlimits}{Limits for the x-axis. Defaults to \code{c(0, 1.5)}.} \item{xat}{Positions for the x-axis labels. Defaults to \code{seq(0, 1.5, 0.25)}.} \item{legend.size}{Width of the legend boxes in 'character' units. Defaults to 3} \item{legend.title.cex}{Size of titles in the legends. Defaults to 1.2} From 7273ea709e94e8abff481acde9beaa635827d459 Mon Sep 17 00:00:00 2001 From: whelena Date: Thu, 20 Feb 2025 16:06:57 -0800 Subject: [PATCH 11/12] fix CICD and R CMD check stuff --- R/calculate.density.R | 2 +- R/create.ccf.densityplot.R | 2 +- man/create.cluster.heatmap.Rd | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/calculate.density.R b/R/calculate.density.R index 185393c..191bae7 100644 --- a/R/calculate.density.R +++ b/R/calculate.density.R @@ -16,4 +16,4 @@ calculate.density <- function( density.df$y <- nrow(x) / sum(density.df$y) * density.df$y; } return(density.df) - } \ No newline at end of file + } diff --git a/R/create.ccf.densityplot.R b/R/create.ccf.densityplot.R index c98c404..70e2105 100644 --- a/R/create.ccf.densityplot.R +++ b/R/create.ccf.densityplot.R @@ -112,4 +112,4 @@ create.ccf.densityplot <- function( size.units = size.units, resolution = resolution )); - } \ No newline at end of file + } diff --git a/man/create.cluster.heatmap.Rd b/man/create.cluster.heatmap.Rd index 1b89877..d9cf340 100644 --- a/man/create.cluster.heatmap.Rd +++ b/man/create.cluster.heatmap.Rd @@ -22,6 +22,7 @@ create.cluster.heatmap( xaxis.fontface = 'bold', y.spacing = 1, colour.scheme = c('white', 'blue'), + plot.objects.heights = c(1, 0.2), ... ); } @@ -42,6 +43,7 @@ create.cluster.heatmap( \item{xaxis.fontface}{Defaults to \dQuote{bold}.} \item{y.spacing}{Spacing between heatmap and clone covariate bar. Defaults to 1} \item{colour.scheme}{Colour scheme for the heatmap. Defaults to \code{c('white', 'blue')}.} + \item{plot.objects.heights}{Object heights. Defaults to \code{c(1, 0.2)}.} \item{...}{Pass through argument. See BoutrosLab.plotting.general::create.heatmap() for further details.} } \value{A `grob` object of the heatmap.} From b5732294a5ee74807aee63d08a572e481353abe8 Mon Sep 17 00:00:00 2001 From: whelena Date: Mon, 10 Mar 2025 14:31:27 -0700 Subject: [PATCH 12/12] address comments --- R/calculate.density.R | 2 +- R/create.ccf.densityplot.R | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/R/calculate.density.R b/R/calculate.density.R index 191bae7..bf7a12c 100644 --- a/R/calculate.density.R +++ b/R/calculate.density.R @@ -15,5 +15,5 @@ calculate.density <- function( if (scale) { density.df$y <- nrow(x) / sum(density.df$y) * density.df$y; } - return(density.df) + return(density.df); } diff --git a/R/create.ccf.densityplot.R b/R/create.ccf.densityplot.R index 70e2105..c804fc1 100644 --- a/R/create.ccf.densityplot.R +++ b/R/create.ccf.densityplot.R @@ -23,11 +23,9 @@ create.ccf.densityplot <- function( clone.colours <- get.colours(x$clone.id, return.names = TRUE); } - # calculate mean CCF and nsnv per cluster ----------------------------------------------------- mean.ccf <- aggregate(CCF ~ clone.id, data = x, FUN = mean); nsnv <- aggregate(SNV.id ~ clone.id, data = x, FUN = length); - # calculate densities for each cluster -------------------------------------------------------- density.list <- list(); for (k in unique(x$clone.id)) { density.list[[k]] <- calculate.density( @@ -40,8 +38,6 @@ create.ccf.densityplot <- function( density.df <- do.call(rbind, density.list); density.df$y <- density.df$y * (nsnv$SNV.id[match(density.df$clone.id, nsnv$clone.id)] / nrow(x)); - - # get plot legend ----------------------------------------------------------------------------- legend.label <- sapply(names(clone.colours), function(k) { nsnv <- nsnv[nsnv$clone.id == k, ]$SNV.id; return(paste0(k, ' (', nsnv, ')'));