Skip to content

Commit d8dc27d

Browse files
committed
accept df
1 parent 6bef9e7 commit d8dc27d

6 files changed

+182
-65
lines changed

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ importFrom(GenomicRanges,makeGRangesListFromDataFrame)
8787
importFrom(Matrix,colSums)
8888
importFrom(S4Vectors,metadata)
8989
importFrom(SummarizedExperiment,SummarizedExperiment)
90+
importFrom(SummarizedExperiment,as.data.frame)
9091
importFrom(SummarizedExperiment,assays)
9192
importFrom(SummarizedExperiment,colData)
9293
importFrom(SummarizedExperiment,rowData)

R/functions_SE.R

+33-9
Original file line numberDiff line numberDiff line change
@@ -1446,6 +1446,33 @@ univariable_differential_tissue_composition_SE = function(
14461446
unnest(surv_test, keep_empty = TRUE)
14471447
}
14481448

1449+
.resolve_complete_confounders_of_non_interest_df <- function(df, ...){
1450+
1451+
combination_of_factors_of_NON_interest =
1452+
# Factors
1453+
df |>
1454+
as_tibble(rownames = ".sample") |>
1455+
select(...) |>
1456+
suppressWarnings() |>
1457+
colnames() |>
1458+
1459+
# Combinations
1460+
combn(2) |>
1461+
t() |>
1462+
as_tibble() |>
1463+
set_names(c("factor_1", "factor_2"))
1464+
1465+
for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){
1466+
df =
1467+
df |>
1468+
resolve_complete_confounders_of_non_interest_pair_df(
1469+
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1),
1470+
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2)
1471+
)
1472+
}
1473+
1474+
df
1475+
}
14491476

14501477
#' Resolve Complete Confounders of Non-Interest
14511478
#'
@@ -1475,13 +1502,13 @@ univariable_differential_tissue_composition_SE = function(
14751502
#' # se is a SummarizedExperiment object
14761503
#' resolve_complete_confounders_of_non_interest(se, .factor_1 = factor1, .factor_2 = factor2)
14771504
#' @noRd
1478-
resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1, .factor_2){
1505+
resolve_complete_confounders_of_non_interest_pair_df <- function(df, .factor_1, .factor_2){
14791506

14801507
.factor_1 <- enquo(.factor_1)
14811508
.factor_2 <- enquo(.factor_2)
14821509

14831510
cd =
1484-
colData(se) |>
1511+
df |>
14851512
as_tibble() |>
14861513
rowid_to_column() |>
14871514
distinct(rowid, !!.factor_1, !!.factor_2) |>
@@ -1516,15 +1543,12 @@ resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1,
15161543
cd = cd |>
15171544
mutate(!!.factor_2 := if_else(n1 + n2 < 3, dummy_factor_2, !!.factor_2))
15181545
}
1519-
1520-
colData(se)[,c(quo_name(.factor_1), quo_name(.factor_2))] =
1546+
1547+
df[,c(quo_name(.factor_1), quo_name(.factor_2))] =
15211548
cd |>
15221549
unnest(se_data) |>
15231550
arrange(rowid) |>
1524-
select(!!.factor_1, !!.factor_2) |>
1525-
DataFrame()
1526-
1527-
se
1551+
select(!!.factor_1, !!.factor_2)
15281552

1553+
df
15291554
}
1530-

R/methods.R

+58-12
Original file line numberDiff line numberDiff line change
@@ -4914,24 +4914,70 @@ as_matrix <- function(tbl,
49144914

49154915
#' Resolve Complete Confounders of Non-Interest
49164916
#'
4917-
#' This generic function processes a SummarizedExperiment object to handle confounders
4918-
#' that are not of interest in the analysis. It dynamically handles combinations
4919-
#' of provided factors, adjusting the data by nesting and summarizing over these factors.
4917+
#' This function identifies and resolves complete confounders among specified factors of non-interest within a `SummarizedExperiment` object.
4918+
#' Complete confounders occur when the levels of one factor are entirely predictable based on the levels of another factor.
4919+
#' Such relationships can interfere with downstream analyses by introducing redundancy or collinearity.
49204920
#'
4921+
#' The function systematically examines pairs of specified factors and determines whether they are completely confounded.
4922+
#' If a pair of factors is found to be confounded, one of the factors is adjusted or removed to resolve the issue.
4923+
#' The adjusted `SummarizedExperiment` object is returned, preserving all assays and metadata except the resolved factors.
49214924
#'
4922-
#' @param se A SummarizedExperiment object that contains the data to be processed.
4923-
#' @param ... Arbitrary number of factor variables represented as symbols or quosures
4924-
#' to be considered for resolving confounders. These factors are processed
4925-
#' in combinations of two.
4925+
#' @param se A `SummarizedExperiment` object. This object contains assay data, row data (e.g., gene annotations), and column data (e.g., sample annotations).
4926+
#' @param ... Factors of non-interest (column names from `colData(se)`) to examine for complete confounders.
49264927
#'
4927-
#' @rdname resolve_complete_confounders_of_non_interest-methods
4928+
#' @details
4929+
#' Complete confounders of non-interest can create dependencies between variables that may bias statistical models or violate their assumptions.
4930+
#' This function systematically addresses this by:
4931+
#' 1. Identifying pairs of factors in the specified columns that are fully confounded.
4932+
#' 2. Resolving confounding by adjusting or removing one of the factors from the `colData` slot.
49284933
#'
4929-
#' @return A modified SummarizedExperiment object with confounders resolved.
4934+
#' The resolution strategy depends on the analysis context and can be modified in the helper function
4935+
#' `resolve_complete_confounders_of_non_interest_pair_SE()`. By default, the function removes one of the confounded factors.
4936+
#'
4937+
#' @return
4938+
#' A `SummarizedExperiment` object with resolved confounders. The object retains its structure, including assays and metadata,
4939+
#' but the column data (`colData`) is updated to reflect the resolved factors.
49304940
#'
49314941
#' @examples
4932-
#' # Not run:
4933-
#' # se is a SummarizedExperiment object
4934-
#' # resolve_complete_confounders_of_non_interest(se, factor1, factor2, factor3)
4942+
#' # Load necessary libraries
4943+
#' library(SummarizedExperiment)
4944+
#' library(dplyr)
4945+
#'
4946+
#' # Sample annotations
4947+
#' sample_annotations <- data.frame(
4948+
#' sample_id = paste0("Sample", seq(1, 9)),
4949+
#' factor_of_interest = c(rep("treated", 4), rep("untreated", 5)),
4950+
#' A = c("a1", "a2", "a1", "a2", "a1", "a2", "a1", "a2", "a3"),
4951+
#' B = c("b1", "b1", "b2", "b1", "b1", "b1", "b2", "b1", "b3"),
4952+
#' C = c("c1", "c1", "c1", "c1", "c1", "c1", "c1", "c1", "c3"),
4953+
#' stringsAsFactors = FALSE
4954+
#' )
4955+
#'
4956+
#' # Simulated assay data
4957+
#' assay_data <- matrix(rnorm(100 * 9), nrow = 100, ncol = 9)
4958+
#'
4959+
#' # Row data (e.g., gene annotations)
4960+
#' row_data <- data.frame(gene_id = paste0("Gene", seq_len(100)))
4961+
#'
4962+
#' # Create SummarizedExperiment object
4963+
#' se <- SummarizedExperiment(
4964+
#' assays = list(counts = assay_data),
4965+
#' rowData = row_data,
4966+
#' colData = DataFrame(sample_annotations)
4967+
#' )
4968+
#'
4969+
#' # Apply the function to resolve confounders
4970+
#' se_resolved <- resolve_complete_confounders_of_non_interest(se, A, B, C)
4971+
#'
4972+
#' # View the updated column data
4973+
#' colData(se_resolved)
4974+
#'
4975+
#' @seealso
4976+
#' \code{\link[SummarizedExperiment]{SummarizedExperiment}} for creating and handling `SummarizedExperiment` objects.
4977+
#'
4978+
#' @importFrom dplyr select
4979+
#' @importFrom rlang set_names
4980+
#' @importFrom tibble as_tibble
49354981
#' @export
49364982
setGeneric("resolve_complete_confounders_of_non_interest", function(se, ...) {
49374983
standardGeneric("resolve_complete_confounders_of_non_interest")

R/methods_SE.R

+8-24
Original file line numberDiff line numberDiff line change
@@ -2816,33 +2816,17 @@ setMethod("describe_transcript", "RangedSummarizedExperiment", .describe_transcr
28162816
#' @importFrom dplyr select
28172817
#' @importFrom rlang set_names
28182818
#' @importFrom tibble as_tibble
2819+
#' @importFrom SummarizedExperiment as.data.frame
28192820
.resolve_complete_confounders_of_non_interest <- function(se, ...){
28202821

2821-
combination_of_factors_of_NON_interest =
2822-
# Factors
2823-
se[1,1, drop=FALSE] |>
2824-
colData() |>
2825-
as_tibble(rownames = ".sample") |>
2826-
select(...) |>
2827-
suppressWarnings() |>
2828-
colnames() |>
2829-
2830-
# Combinations
2831-
combn(2) |>
2832-
t() |>
2833-
as_tibble() |>
2834-
set_names(c("factor_1", "factor_2"))
2835-
2836-
for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){
2837-
se =
2838-
se |>
2839-
resolve_complete_confounders_of_non_interest_pair_SE(
2840-
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1),
2841-
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2)
2842-
)
2843-
}
2844-
2822+
colData(se) =
2823+
colData(se) |>
2824+
as.data.frame() |>
2825+
.resolve_complete_confounders_of_non_interest_df(...) |>
2826+
DataFrame()
2827+
28452828
se
2829+
28462830
}
28472831

28482832
#' resolve_complete_confounders_of_non_interest

man/resolve_complete_confounders_of_non_interest-methods.Rd

+8-20
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/resolve_complete_confounders_of_non_interest.Rd

+74
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)