#' Plotting a scatterplot
#'
#' @param expr_dt_path A file path of the expression table file
#' @param meta_dt_path A file path of the metadata file
#' @param output_dir A directory of the output file(s)
#' @import ggplot2
#' @import data.table
#' @importFrom dplyr %>%
#' @importFrom ggthemes theme_few
#' @importFrom edgeR DGEList
#' @importFrom edgeR filterByExpr
#' @importFrom edgeR calcNormFactors
#' @importFrom limma voom
#' @importFrom limma lmFit
#' @importFrom limma eBayes
#' @importFrom limma topTable
#' @export
plot_corr <- function(expr_dt_path,meta_dt_path,output_dir){
expr_dt <- read.csv(expr_dt_path)
meta_dt <- read.csv(meta_dt_path)
ref_snrcorr_dir <- file.path(system.file(package = "ProtQC"), "data/ref_snrcorr.rds")
snrcorr <- readRDS(ref_snrcorr_dir)
ref_dt_dir <- file.path(system.file(package = "ProtQC"), "data/example_ref_dt.rds")
ref_dt <- readRDS(ref_dt_dir)
group <- factor(meta_dt$sample)
expr_matrix <- as.matrix(data.frame(expr_dt[,2:ncol(expr_dt)],row.names = expr_dt$rowname))
expr_matrix[is.na(expr_matrix)] <- 0
result_final <- c()
sample_pairs <- combn(levels(group),2)
for(i in 1:ncol(sample_pairs)){
v1 <- expr_matrix[,grep(sample_pairs[1,i],group)]
v2 <- expr_matrix[,grep(sample_pairs[2,i],group)]
dge <- DGEList(counts = cbind(v1,v2))
sample_pair <- factor(rep(1:2,c(ncol(v1),ncol(v2))))
design <- model.matrix(~sample_pair)
keep <- filterByExpr(dge, design)
dge <- dge[keep,,keep.lib.sizes=FALSE]
dge <- calcNormFactors(dge)
v <- voom(dge, design, plot=F)
fit <- lmFit(v, design)
fit <- eBayes(fit)
result <- topTable(fit, coef=ncol(design), sort.by = 'logFC', number = Inf)
result$gene = rownames(result)
result$sample_pair1 = sample_pairs[1,i]
result$sample_pair2 = sample_pairs[2,i]
result$sample_pair = paste(sample_pairs[2,i],sample_pairs[1,i],sep = '.')
result_final <-rbind(result_final,result)
}
test_dt <- as.data.table(result_final)
sample_pairs <- unique(test_dt$sample_pair)
df_cor <- c()
for(i in 1:length(sample_pairs)){
df <- test_dt[test_dt$sample_pair %in% sample_pairs[i],]
df_perpair <- df[df$adj.P.Val<0.05,c(7,1)]
df_ref <- ref_dt[ref_dt$sample_pair %in% sample_pairs[i],]
df_ref_perpair <- df_ref[df_ref$adj.P.Val<0.05,c(7,1)]
df_test_perpair <- merge(df_perpair,df_ref_perpair,by.x = 'gene',by.y = 'gene')
REC_value <- cor(df_test_perpair$logFC.x,df_test_perpair$logFC.y)
df_cor <- rbind(df_cor,data.frame(sample_pair = sample_pairs[i],REC = REC_value))
}
sample_pair <- df_cor[order(df_cor$REC)[3],1]
cor_value <- df_cor[order(df_cor$REC)[3],2]
cor_value <- round(cor_value,3)
cor_value_rank_length <- length(rank(c(cor_value,snrcorr$COR)))
cor_value_rank <- c(cor_value_rank_length-rank(c(cor_value,snrcorr$COR))[1]+1)
if(cor_value_rank/cor_value_rank_length<=0.25){cor_value_performance <- "high"
}else if(cor_value_rank/cor_value_rank_length<=0.5){cor_value_performance <- "mid-high"
}else if(cor_value_rank/cor_value_rank_length<=0.75){cor_value_performance <- "mid-low"
}else cor_value_performance <- "low"
historical_mean <- round(mean(snrcorr$COR),3)
historical_sd <- round(sd(snrcorr$COR),3)
output_cor_value <- data.table(
"Quality Metrics" = c("Correlation with Reference Datasets (COR)"),
"Value" = c(cor_value),
"Historical Value (mean ± SD)" = paste(historical_mean,' ± ',historical_sd,sep = ''),
"Rank" = c(paste(as.character(cor_value_rank),'/',cor_value_rank_length,sep = '')),
"Performance" = cor_value_performance
)
test_dt <- data.frame(test_dt)
df <- test_dt[test_dt$sample_pair %in% sample_pair,]
df_perpair <- df[df$adj.P.Val<0.05,c(7,1)]
ref_dt <- data.frame(ref_dt)
df_ref <- ref_dt[ref_dt$sample_pair %in% sample_pair,]
df_ref_perpair <- df_ref[df_ref$adj.P.Val<0.05,c(7,1)]
df_test_perpair <- merge(df_perpair,df_ref_perpair,by.x = 'gene',by.y = 'gene')
cor(df_test_perpair$logFC.x,df_test_perpair$logFC.y)
p <- ggplot(df_test_perpair,aes(x=logFC.x, y=logFC.y))+
geom_point(color='steelblue4', size=2.5, alpha=.1)+
scale_fill_brewer(palette = 'Blues')+
theme_few()+
theme(axis.text = element_text(family="Arial",size=16,face="plain",color = "black"),
axis.title = element_text(family="Arial",size=16,face='plain',color = "black"),
legend.text = element_text(family="Arial",size=16,color = "black"),
plot.subtitle = element_text(family="Arial",face='plain',color = "black",hjust=0.5,size=16),
plot.title = element_text(family="Arial",face='plain',color = "black",hjust=0.5,size=16))+
labs(x='Test Dataset',
y='Reference Datasets',
title=paste("Correlation = ",cor_value,sep=""),
subtitle = paste("(N = ",nrow(df_test_perpair),')',sep=""))+
coord_fixed(
xlim = c(-max(df_test_perpair$logFC.y),max(df_test_perpair$logFC.y)),
ylim = c(-max(df_test_perpair$logFC.y),max(df_test_perpair$logFC.y)))
output_dir_final1 <- file.path(output_dir,'corr_plot.png')
output_dir_final2 <- file.path(output_dir,'deps_table.tsv')
output_dir_final3 <- file.path(output_dir,'corr_table.tsv')
ggsave(output_dir_final1,p,height = 5.5,width = 5.5)
write.table(test_dt,output_dir_final2,sep = '\t',row.names = F)
write.table(df_test_perpair,output_dir_final3,sep = '\t',row.names = F)
return(output_cor_value)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.