Nothing
library(MASS)
# x: list of numbers
# y: list of numbers
# robust: boolean. indicate if using robust correlation
# qtl: numeric between (0,1), only meaningful when robust is turned on. indicate the fraction of the data that is considered to be "good portion" and will participate in the correlation calculation
# check the correlation between x and y after excluding outliers defined by the qtl
# note that if in IQR is 0 in either x or y, OR x and y are completely colinear, robust correlation will be NA and thus filled by 0
safe_cor = function(x, y, robust = TRUE, qtl = 0.95){
res = tryCatch({
if(robust){
cov.rob(cbind(x,y), cor = TRUE, quantile.used = round(qtl*length(x)) ,method = "mve")$cor[1,2]
}else{
cor(x,y)
}
}, error = function(cond){
0
})
# in regular correlation if all constant, then it is going to be NA
if (is.na(res)){
return(0)
}else{
return(res)
}
}
# Z.true is an array of k by m by n, the true tensor
# Z.hat is an array of k by m by n, the estimated tensor
# eval.feature.source is a matrix of logical values that is number of features by number of sources. only calculate correlation on feature-source that has TRUE in this matrix
# robust: boolean. indicate if using robust correlation
# qtl: numeric between (0,1), only meaningful when robust is turned on. indicate the fraction of the data that is considered to be "good portion" and will participate in the correlation calculation
# return a m by k correltion matrix, each column is a different source and each row is a different feature,
# each entry is a correlation score, indicating that feautre, that source's tensor estimated across samples
# those entry with FALSE in the eval.feature.source is going to be set to NA
calc_Z_corrs <- function(Z.true, Z.hat, eval.feature.source = NULL, robust = TRUE, qtl = 0.95){
m = dim(Z.true)[2]
k = dim(Z.true)[1]
Z.corrs = matrix(NA, m, k)
rownames(Z.corrs) = dimnames(Z.true)[[2]]
colnames(Z.corrs) = dimnames(Z.true)[[1]]
# if eval.feature.source is not present, return all results
if(is.null(eval.feature.source)){
eval.feature.source = matrix(TRUE, m, k)
}
for (h in 1:k){
for (j in 1:m){
if(eval.feature.source[j,h]){
Z.corrs[j,h] <- safe_cor(Z.true[h,j,], Z.hat[h,j,], robust, qtl)
}else{
Z.corrs[j,h] <- NA
}
}
}
return(Z.corrs)
}
library("ggplot2")
library("ggpubr")
library("hexbin")
plot_qq_compare <- function(pvals_mat1, pvals_mat2, labels, ggarrange.nrow = 1, ggarrange.ncol = 1,
alpha = 0.05, text.size = 10,
xlab = "pval1", ylab = "pval2", title = ""){
pvals1 = list()
pvals2 = list()
for (l in 1:ncol(pvals_mat1)){
pvals1[[l]] = pvals_mat1[,l]
pvals2[[l]] = pvals_mat2[,l]
}
qqplots <- lapply(1:length(pvals1), function(p){
df <- data.frame(pvals.1 = -log10(pvals1[[p]]),
pvals.2 = -log10(pvals2[[p]]));
qqplot <- ggplot(df, aes(x = pvals.1, y = pvals.2)) +
stat_binhex(geom = "point", bins=1000, size=1, alpha = alpha) +
geom_abline() +
coord_cartesian(xlim = c(min(df), max(df)),
ylim = c(min(df), max(df))) +
theme_bw() +
guides(fill="none") +
ggtitle(paste0(labels[p],
" (r = ",
round(safe_cor(df$pvals.1, df$pvals.2, robust = T, qtl = 0.95),2),
")")) +
xlab(parse(text = paste0("\'",xlab,"\'~", expression(-log[10](P))))) +
ylab(parse(text = paste0("\'",ylab,"\'~", expression(-log[10](P))))) +
theme(plot.title = element_text(hjust = 0.5, size = text.size, face="bold"))+
theme(axis.title.x = element_text(size = text.size)) +
theme(axis.text.x = element_text(size = text.size * 0.75)) +
theme(axis.title.y = element_text(size = text.size)) +
theme(axis.text.y = element_text(size = text.size * 0.75))
return(qqplot)
})
g = egg::ggarrange(plots = qqplots, ncol = ggarrange.ncol, nrow = ggarrange.nrow)
g = annotate_figure(g,top = text_grob(title,
color = "black", face = "bold", size = text.size + 5))
return(g)
}
plot_qq<- function(pvals_mat, labels, ggarrange.nrow = 1, ggarrange.ncol = 1,
alpha = 0.5, text.size = 10,
title = ""){
pvals = list()
for (l in 1:ncol(pvals_mat)){
pvals[[l]] = pvals_mat[,l]
}
significance_th <- list(alpha/length(pvals[[1]]))
if(length(pvals)-1) significance_th[[2]] <- alpha/(length(pvals)*length(pvals[[1]]))
qqplots <- lapply(1:length(pvals), function(p){
df <- data.frame(pvals.obs = -log10(sort(pvals[[p]])),
pvals.exp = -log10(sort((1:length(pvals[[p]]))/length(pvals[[p]]))));
qqplot <- ggplot(df, aes(x = pvals.exp, y = pvals.obs)) +
stat_binhex(geom = "point", bins=1000, size=1, alpha = alpha) +
geom_abline() +
coord_cartesian(#xlim = c(min(df), max(-log10(df$pvals.exp))),
ylim = c(min(df), max(max(df), -log10(significance_th[[2]])))
#ylim = c(min(df), -log10(significance_th[[2]]))
) +
theme_bw() +
guides(fill="none") +
ggtitle(labels[p]) +
xlab(expression(Expected~-log[10](P))) +
ylab(expression(Observed~-log[10](P))) +
theme(plot.title = element_text(hjust = 0.5, size = text.size, face="bold"))+
theme(axis.title.x = element_text(size = text.size)) +
theme(axis.text.x = element_text(size = text.size * 0.75)) +
theme(axis.title.y = element_text(size = text.size)) +
theme(axis.text.y = element_text(size = text.size * 0.75))
#qqplot = qqplot + geom_hline(yintercept=-log10(significance_th[[1]]), linetype="dashed", color = "red", size=1 ) +
# geom_hline(yintercept=-log10(significance_th[[2]]), linetype="dashed", color = "red", size=0.5)
# only show the celltpye cpg wide
qqplot = qqplot + geom_hline(yintercept=-log10(significance_th[[2]]), linetype="dashed", color = "red", size=1)
return(qqplot)
})
g = egg::ggarrange(plots = qqplots, ncol = ggarrange.ncol, nrow = ggarrange.nrow)
g = annotate_figure(g,top = text_grob(title,
color = "black", face = "bold", size = text.size + 5))
return(g)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.