#'@title Generate data.frame for the precision recall curve.
#'@param result_lst a list generated by the function \code{get_predictive_values}.
#'@param gt_gr a GRanges for the ground truce of positive methylation sites, recommended to be all single based in width.
#'@param N number of points returned for each curve, default = 60.
#'@param group_name a character for the name of the group returned, default = "prc".
#'
#'@examples
#' whistle_predict <- readRDS("predictedResults.rds")
#' whistle_gr <- readRDS("exon_DRACH.rds")
#' whistle_gr$prob <- whistle_predict
#' result_lst <- readRDS("hESC_C_noGC.rds")
#' gt_gr <- whistle_gr[whistle_gr$prob > 0.5]
#' plot_df <- PRC_df(result_lst = result_lst, gt_gr = gt_gr, N = 100, group_name = "no GC")
#'
#'@import magrittr SummarizedExperiment GenomicRanges
#'@export
PRC_df <- function(result_lst, gt_gr, N = 60, group_name = "prc") {
pc_gr <- rowRanges( result_lst$dds )
gt_gr <- subsetByOverlaps(gt_gr, pc_gr)
pc_gr <- resize(pc_gr,201,fix = "center")
#####################################################
# precision-recall for deseq2 p values #
#####################################################
pred_pvalue <- result_lst$DESeq2Results$pvalue
pred_pvalue[is.na(pred_pvalue)] <- 1
pred_pvalue <- -log10(pred_pvalue)
vec_pcc <- rep(NA,N)
vec_recall <- rep(NA,N)
pred_pvalue[pred_pvalue == Inf] <- max(pred_pvalue[pred_pvalue != Inf])
cutoff <- quantile(pred_pvalue, seq(0,1,length.out = N), na.rm = TRUE)
for(i in 1:N){
pred_P <- pc_gr[which(pred_pvalue >= cutoff[i])]
vec_pcc[i] <- mean(pred_P %over% gt_gr)
vec_recall[i] <- mean(gt_gr %over% pred_P)
}
vec_pcc <- c(vec_pcc, 1)
vec_recall <- c(vec_recall, 0)
#plot(vec_recall,vec_pcc)
plot_df <- data.frame(recall = vec_recall,
pcc = vec_pcc,
value = "deseq_pvalue",
group = group_name )
#####################################################
# precision-recall for deseq2 log2FC #
#####################################################
pred_log2FC <- result_lst$DESeq2Results$log2FoldChange
vec_pcc <- rep(NA,N)
vec_recall <- rep(NA,N)
cutoff <- quantile(pred_log2FC, seq(0,1,length.out = N), na.rm = TRUE)
for(i in 1:N){
pred_P <- pc_gr[which(pred_log2FC >= cutoff[i])]
vec_pcc[i] <- mean(pred_P %over% gt_gr)
vec_recall[i] <- mean(gt_gr %over% pred_P)
}
vec_pcc <- c(vec_pcc, 1)
vec_recall <- c(vec_recall, 0)
#plot(vec_recall,vec_pcc)
plot_df <- rbind(plot_df, data.frame(recall = vec_recall,
pcc = vec_pcc,
value = "deseq_lfc",
group = group_name ))
#####################################################
# precision-recall for c-test fdr #
#####################################################
pred_cfdr <- result_lst$cons_pvalue_M
pred_cfdr[is.na(pred_cfdr)] <- 1
pred_cfdr <- -log10(pred_cfdr)
pred_cfdr[pred_cfdr == Inf] <- max(pred_cfdr[pred_cfdr != Inf])
vec_pcc <- rep(NA,N)
vec_recall <- rep(NA,N)
cutoff <- quantile(pred_cfdr, seq(0,1,length.out = N), na.rm = TRUE)
for(i in 1:N){
pred_P <- pc_gr[rowSums( cbind( pred_cfdr > cutoff[i] ) ) >= qbinom(0.95,ncol(cbind(pred_cfdr)),0.8)]
vec_pcc[i] <- mean(pred_P %over% gt_gr)
vec_recall[i] <- mean(gt_gr %over% pred_P)
}
vec_pcc <- c(vec_pcc, 1)
vec_recall <- c(vec_recall, 0)
#plot(vec_recall,vec_pcc)
#plot_df <- plot_df[plot_df$value != "c-test_fdr",]
plot_df <- rbind(plot_df, data.frame(recall = vec_recall,
pcc = vec_pcc,
value = "c-test_pvalue",
group = group_name ))
###########################################################
# precision-recall for random positives : ) #
###########################################################
vec_pcc <- rep(NA,N)
vec_recall <- rep(NA,N)
n_pos <- round(exp(seq(1,log(length(pc_gr)),length.out = N)),0)
for(i in 1:N){
pred_P <- pc_gr[sample.int(length(pc_gr),n_pos[i])]
vec_pcc[i] <- mean(pred_P %over% gt_gr)
vec_recall[i] <- mean(gt_gr %over% pred_P)
}
#plot_df <- plot_df[plot_df$value != "random",]
plot_df <- rbind(plot_df, data.frame(recall = vec_recall,
pcc = vec_pcc,
value = "random",
group = group_name ))
#####################################################
# precision-recall for c-test log2FC #
#####################################################
pred_clfc <- result_lst$cons_log2FC_M
pred_clfc[is.na(pred_clfc)] <- 1
vec_pcc <- rep(NA,N)
vec_recall <- rep(NA,N)
cutoff <- quantile(pred_clfc, seq(0,1,length.out = N), na.rm = TRUE)
for(i in 1:N){
pred_P <- pc_gr[rowSums( cbind( pred_clfc > cutoff[i] ) ) >= qbinom(0.95,ncol(cbind(pred_clfc)),0.8)]
vec_pcc[i] <- mean(pred_P %over% gt_gr)
vec_recall[i] <- mean(gt_gr %over% pred_P)
}
vec_pcc <- c(vec_pcc, 1)
vec_recall <- c(vec_recall, 0)
#plot(vec_recall,vec_pcc)
#plot_df <- plot_df[plot_df$value != "c-test_fdr",]
plot_df <- rbind(plot_df, data.frame(recall = vec_recall,
pcc = vec_pcc,
value = "c-test_lfc",
group = group_name ))
######################################################################
# precision-recall for a combination of several metrics #
######################################################################
indx_ctest_lfc <- rowSums( cbind(result_lst$cons_log2FC_M > 1) ) >= qbinom(0.95, ncol(cbind(result_lst$cons_log2FC_M)), 0.8)
indx_ctest_pvalue <- rowSums( cbind( pred_cfdr > -log10(0.05) ) ) >= qbinom(0.95, ncol(cbind(pred_cfdr)), 0.8)
pval_subset <- result_lst$DESeq2Results$pvalue[indx_ctest_lfc&indx_ctest_pvalue]
pval_subset <- -log10(pval_subset)
pval_subset[pval_subset == Inf] <- max(pval_subset[pval_subset != Inf])
vec_pcc <- rep(NA,N)
vec_recall <- rep(NA,N)
cutoff <- quantile(pval_subset, seq(0,1,length.out = N), na.rm = TRUE)
indx_sub <- indx_ctest_lfc & indx_ctest_pvalue
for(i in 1:N){
pred_P <- pc_gr[which(pred_pvalue >= cutoff[i] & indx_sub)]
vec_pcc[i] <- mean(pred_P %over% gt_gr)
vec_recall[i] <- mean(gt_gr %over% pred_P)
}
vec_pcc <- c(vec_pcc, 1)
vec_recall <- c(vec_recall, 0)
#plot(vec_recall,vec_pcc)
#plot_df <- plot_df[plot_df$value != "combinded_cutoff",]
plot_df <- rbind(plot_df, data.frame(recall = vec_recall,
pcc = vec_pcc,
value = "combinded_cutoff",
group = group_name ) )
plot_df[is.na(plot_df)] = 1
return(plot_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.