library("pheatmap")
library("DESeq2")
library("ggplot2")
library("corrplot")
library("RColorBrewer")
library("gplots")
library("vsn")
library("genefilter")
library("EnhancedVolcano")
metadata
: tabla con nombres de muestras y sus condiciones (p. ej., control o tratamiento).counts
: matriz de conteos crudos con genes como filas y muestras como columnas.
- Importar datos
metadata <- read.csv("metadata.csv", row.names = 1)
counts <- read.csv("counts.csv", row.names = 1)
- Verificar integridad de los datos
head(metadata)
head(counts)
stopifnot(all(colnames(counts) == metadata$SampleID))
- Crear objeto DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = metadata,
design = ~Condition)
- Filtrar genes con pocos conteos
keep <- rowSums(counts(dds)) > 10
dds <- dds[keep, ]
2)Normalización
dds <- estimateSizeFactors(dds)
normalized_counts <- counts(dds, normalized = TRUE)
write.table(normalized_counts, file = "normalized_counts.txt", sep = "\t", quote = FALSE, col.names = NA)
dds <- DESeq(dds)
res <- results(dds)
write.table(res, file = "differential_expression_results.txt", sep = "\t", quote = FALSE, col.names = NA)
res.05 <- results(dds, alpha = 0.05)
summary(res.05)
plotDispEsts(dds, main = "Dispersion plot") ### Gráfico de dispersión
plotMA(res, main = "MA Plot") ### MA-plot
plotMA(res.05, main = "MA Plot (p-adj < 0.05)")
resLFC <- lfcShrink(dds, coef = "Condition_Treatment_vs_Control", type = "apeglm")
plotMA(resLFC, ylim = c(-2, 2), main = "MA Plot with LFC Shrinkage")
vsd <- vst(dds, blind = FALSE)
plotPCA(vsd, intgroup = c("Condition"))
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(metadata$Condition, metadata$SampleID, sep = "-")
colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists, col = colors)
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:50]
pheatmap(assay(vsd)[select, ], cluster_rows = TRUE, scale = "row", show_rownames = FALSE)
EnhancedVolcano(resLFC,
x = "log2FoldChange",
y = "padj",
lab = rownames(resLFC),
xlim = c(-5, 5),
ylim = c(0, -log10(1e-6)),
pCutoff = 0.05,
FCcutoff = 2)
pdf("heatmap_genes.pdf")
pheatmap(assay(vsd)[select, ], cluster_rows = TRUE, scale = "row", show_rownames = FALSE)
dev.off()
pdf("pca_plot.pdf")
plotPCA(vsd, intgroup = c("Condition"))
dev.off()