Nothing
#Code for benchmarking package against WGCNA
#@author Annice
#Load relevant libraries
library(WGCNA)
library(ggplot2)
library(pheatmap)
library(SurprisalAnalysis)
library(data.table)
library(bapred)
library(tidyverse)
library(GEOquery)
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
#Step I: Apply Surprisal Analysis
#Read data for surprisal analysis
expr_df <- read.csv("https://zenodo.org/records/17064878/files/helper_T_cell_0_test.csv?download=1")
sa <- surprisal_analysis(expr_df)
holds <- sa[[1]]
alph <- sa[[2]]
#Step II: Apply WGCNA
#substeps:
#'
#'Consider the entire data,
#'Extract differentially expressed genes through GeoR
#'Perform batch adjustment
#'Separate out the particular sample and run WGCNA on it
#'
#Batch adjustment is not necessary for Surprisal analysis since we log normalize the data
#AND if batch effects happen we would have a constraint related to the batch
#For WGCNA, however, it is necessary
#Load data first
##Set paths
gse_id <- "GSE200250"
#out_dir <- "C:/Users/annic/Downloads/test_data"
safe_out_dir <- function(out_dir = NULL) {
if (is.null(out_dir)) {
out_dir <- file.path(tempdir(), "SurprisalBenchmark")
}
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
out_dir
}
out_dir <- safe_out_dir(NULL)
save_gsm <- FALSE
annot_path <- "https://zenodo.org/records/17064878/files/MouseWG-6_V2_0_R3_11278593_A.csv?download=1"
deg_path <- "https://zenodo.org/records/17064878/files/GSE200250.top.table.csv?download=1"
#Download GSE files
gse_obj <- getGEO(gse_id, destdir = out_dir)
gse_obj <- if (is.list(gse_obj)) gse_obj[[1]] else gse_obj
test.data <- pData(gse_obj)
gsm_ids <- test.data$geo_accession
gsm_tbls <- lapply(gsm_ids, function(id) GEOquery::Table(getGEO(id)))
names(gsm_tbls) <- gsm_ids
if (isTRUE(save_gsm)) {
invisible(mapply(function(tb, id) {
write.csv(tb, file.path(out_dir, sprintf("%s.csv", id)), row.names = FALSE)
}, gsm_tbls, gsm_ids))
}
annotation <- read.csv(annot_path, stringsAsFactors = FALSE)
probe_ids_last <- gsm_tbls[[length(gsm_tbls)]]$ID_REF
gene_by_probe <- annotation$ILMN_Gene[ match(probe_ids_last, annotation$Probe_Id) ]
tx_by_probe <- annotation$Transcript[ match(probe_ids_last, annotation$Probe_Id) ]
gene_by_probe[is.na(gene_by_probe)] <- tx_by_probe[is.na(gene_by_probe)]
keep_idx <- !duplicated(gene_by_probe)
my.rows <- gene_by_probe[keep_idx]
sample_titles <- test.data$title
df_mat <- matrix(NA_real_, nrow = length(my.rows), ncol = length(sample_titles),
dimnames = list(my.rows, sample_titles))
for (i in seq_along(gsm_ids)) {
tb <- gsm_tbls[[i]]
g <- annotation$ILMN_Gene[ match(tb$ID_REF, annotation$Probe_Id) ]
t <- annotation$Transcript[ match(tb$ID_REF, annotation$Probe_Id) ]
g[is.na(g)] <- t[is.na(g)]
dt <- data.table(gene = g, value = tb$VALUE)
agg <- dt[, .(meanmean = mean(value, na.rm = TRUE)), by = gene]
df_mat[ , i] <- agg$meanmean[ match(my.rows, agg$gene) ]
}
df <- as.data.frame(df_mat)
time.points <- vapply(sample_titles, function(s) strsplit(s, "_")[[1]][2], character(1))
ref.deg <- read.csv(deg_path)
ref.deg <- dplyr::filter(ref.deg, P.Value < 0.1)
df <- df[ rownames(df) %in% toupper(ref.deg$Gene.symbol), , drop = FALSE]
t.df <- t(data.frame(time = time.points))
colnames(t.df) <- colnames(df)
df <- rbind(df, t.df)
th.labels <- colnames(df)
#batch adjustment
th.combatted <- combatba( t(data.matrix(df[-nrow(df), , drop = FALSE])),
as.factor(time.points) )
adjusted.th.data <- th.combatted$xadj
#Typical WGCNA steps
soft_power_grid <- c(1:10, seq(12, 50, by = 2))
allowWGCNAThreads()
#IMPORTANT - Separate out only the sample we are testing - Th0 replicate 1.
filt.test.dat <- adjusted.th.data[1:9,]
R.power.table <- pickSoftThreshold(filt.test.dat,
powerVector = soft_power_grid,
networkType = "signed")[[2]]
colors <- colorRampPalette(c("#0B2447", "#00809D"))(nrow(R.power.table))
p.n.1 <- ggplot(R.power.table, aes(Power, SFT.R.sq, label = Power)) +
geom_point(color = colors, stroke = 1.5, shape=8) +
geom_text(nudge_y = 0.1) +
geom_hline(yintercept = 0.8, color = "#0B2447") +
labs(x = "Power", y = "Scale Free Topology Model Fit, signed R^2", tag = "A") +
theme_minimal() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18))
p.n.2 <- ggplot(R.power.table, aes(Power, mean.k., label = Power)) +
geom_point(color = colors, stroke = 1.5, shape = 8) +
geom_text(nudge_y = 0.1) +
labs(x = "Power", y = "Mean Connectivity", tag = "B") +
theme_minimal() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18))
p.n.1 + p.n.2
soft.thresh <- 12
bwmod <- blockwiseModules(filt.test.dat, maxBlockSize = 4780,
TOMType = "signed", power=soft.thresh,
mergeCutHeight = 0.75, numericLabels = FALSE,
RandomSeed = 1234, verbose = 3)
mod.eigengenes <- bwmod$MEs
table(bwmod$colors)
plotDendroAndColors(bwmod$dendrograms[[1]], cbind(bwmod$unmergedColors[bwmod$blockGenes[[1]]],
bwmod$colors[bwmod$blockGenes[[1]]]), c("unmerged", "merged"),
dendroLabels = FALSE,
addGuide = TRUE,
hang=0.03, guideHang=0.05)
module.gene.mapping <- as.data.frame(bwmod$colors)
module.gene.mapping %>% filter(`bwmod$colors`=="blue") %>% rownames()->blue.genes
module.gene.mapping %>% filter(`bwmod$colors`=="turquoise") %>% rownames()->turquoise.genes
module.gene.mapping %>% filter(`bwmod$colors`=="grey") %>% rownames()->grey.genes
module.gene.mapping %>% filter(`bwmod$colors`=="yellow") %>% rownames()->yellow.genes
module.gene.mapping %>% filter(`bwmod$colors`=="brown") %>% rownames()->brown.genes
module.gene.mapping %>% filter(`bwmod$colors`=="green") %>% rownames()->green.genes
entrez_ids <- mapIds(org.Mm.eg.db, keys=str_to_title(blue.genes), column="ENTREZID", keytype="SYMBOL")
GO_results <- enrichGO(gene = entrez_ids[!is.na(entrez_ids)], OrgDb = "org.Mm.eg.db", keyType="ENTREZID", ont = "BP")
fit <- plot(barplot(GO_results, showCategory = 15))
head(GO_results@result, 15)->Go.top.15
ggplot(Go.top.15, aes(x=Description, y=Count, fill=p.adjust))+geom_bar(stat="identity")+scale_fill_gradient(low = "#790915", high = "#062c5c")+theme_minimal()+
theme(
# Remove panel border
panel.border=element_blank(),
#plot.border = element_blank(),
# Remove panel grid lines
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Add axis line
axis.line = element_line(colour = "black"),
#axis.title.x = element_blank(),
axis.title.y = element_blank(),
#axis.text = element_blank(),
#legend.position = "none",
plot.title = element_text(hjust = 0.5, size=20),
#axis.text = element_text(size = 15),
text = element_text(size=18)
) +coord_flip()
percentile_GO <- 0.95 #change based on your preference
lambda_no <- 3 #change based on your preference
GO_analysis_surprisal_analysis(alph, percentile_GO, lambda_no, key_type = "SYMBOL", flip = FALSE, species.db.str = "org.Mm.eg.db", top_GO_terms=15)
ggplot(Go.top.15, aes(x=Description, y=Count, fill=p.adjust))+geom_bar(stat="identity")+scale_fill_gradient(low = "#790915", high = "#062c5c")+theme_minimal()+
theme(
# Remove panel border
panel.border=element_blank(),
#plot.border = element_blank(),
# Remove panel grid lines
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Add axis line
axis.line = element_line(colour = "black"),
#axis.title.x = element_blank(),
axis.title.y = element_blank(),
#axis.text = element_blank(),
#legend.position = "none",
plot.title = element_text(hjust = 0.5, size=20),
#axis.text = element_text(size = 15),
text = element_text(size=18)
) +coord_flip()
library(pheatmap)
library(dplyr)
module_colors <- bwmod$colors
module_sets <- split(names(module_colors), module_colors)
get_sa_set <- function(G_mat, lambda_idx = 2, top_frac = 0.05, combine_top_bottom = TRUE) {
g <- G_mat[, lambda_idx]
if (combine_top_bottom) {
k <- max(1, ceiling(top_frac * length(g)))
names(sort(abs(g), decreasing = TRUE))[seq_len(k)]
} else {
k <- max(1, ceiling(top_frac * length(g)))
list(top = names(sort(g, decreasing = TRUE))[seq_len(k)],
bot = names(sort(g, decreasing = FALSE))[seq_len(k)])
}
}
sa_sets <- list(
`SA top30% (lambda_1)` = get_sa_set(alph, lambda_idx = 2, top_frac = 0.1),
`SA top30% (lambda_2)` = get_sa_set(alph, lambda_idx = 3, top_frac = 0.1),
`SA top30% (lambda_3)` = get_sa_set(alph, lambda_idx = 4, top_frac = 0.1)
)
universe <- Reduce(intersect, list(rownames(alph), names(module_colors)))
sa_sets_u <- lapply(sa_sets, function(s) intersect(s, universe))
module_sets_u <- lapply(module_sets, function(s) intersect(s, universe))
jaccard <- function(a, b) {
u <- union(a, b); if (length(u) == 0) return(NA_real_)
length(intersect(a, b)) / length(u)
}
Jac <- sapply(module_sets_u, function(mod) vapply(sa_sets_u, jaccard, b = mod, numeric(1)))
pheatmap(Jac,
main = "Jaccard similarity: Surprisal sets vs WGCNA modules",
color = colorRampPalette(c("#f7fbff", "#6baed6", "#08306b"))(100),
display_numbers = TRUE, number_format = "%.2f", border_color = NA)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.