R/estLODR.R
In erccdashboard: Assess Differential Gene Expression Experiments with ERCC Controls

Documented in estLODR

#' Estimate Limit of Detection of Ratios (LODR)
#'
#' @param exDat    list, contains input data and stores analysis results
#' @param kind      "ERCC" or "Sim"
#' @param prob      probability, ranging from 0 - 1, default is 0.9
#' 
#' @details
#' This is the function to estimate a limit of detection of ratios (LODR) for a
#' a chosen probability and threshold p-value for the fold changes in the ERCC
#' control ratio mixtures.
#' 
#' @examples
#' data(SEQC.Example)
#'  
#' exDat <- initDat(datType="array", isNorm=FALSE, 
#'                  exTable=UHRR.HBRR.arrayDat,
#'                  filenameRoot="testRun", sample1Name="UHRR",
#'                  sample2Name="HBRR", erccmix="RatioPair", 
#'                  erccdilution = 1, spikeVol = 50, 
#'                  totalRNAmass = 2.5*10^(3), choseFDR=0.01)
#'                  
#' exDat <- est_r_m(exDat)
#'                   
#' exDat <- dynRangePlot(exDat)
#'
#' exDat <- geneExprTest(exDat)
#' 
#' exDat <- estLODR(exDat, kind = "ERCC", prob = 0.9)
#' 
#' exDat$Figures$lodrERCCPlot
#'
#' @export
#' 
estLODR <- function(exDat,kind = "ERCC", prob=0.9){
    ##############
    #### LODR ####
    ##############
    ### Select fitting parameters
    # pval.cutoff default is .001
    # probability cutoff is .9
    # kind can be ERCC or Sim
    sampleInfo <- exDat$sampleInfo
    erccInfo <- exDat$erccInfo
    plotInfo <- exDat$plotInfo
    
    pval.cutoff<- exDat$Results$p.thresh 
    filenameRoot<-sampleInfo$filenameRoot
    FCcode <- erccInfo$FCcode
    #FCcode = data.frame(Ratio = c("4:1","1:1","1.5:1","2:1"),FC=c(4,1,.667,.5))
    legendLabels = erccInfo$legendLabels
    #legendLabels = FCcode$Ratio
    
    colScale <- plotInfo$colScale
    fillScale <- plotInfo$fillScale
    
    ### Read in results for LODR ####
    ### Modify this file path to work with other ERCC data
    ### Note that you must format your provided data in the same manner as the 
    ### provided example
    ### i.e. the column headings of your selected datamust match those of the 
    ### provided examples
    
    if(kind == "ERCC"){
        erccPval <- file.exists(paste(filenameRoot,"ERCC","Pvals.csv"))
        allPval <- file.exists(paste0(filenameRoot, ".All.Pvals.csv"))
        if((erccPval == TRUE)&(allPval==TRUE)){
            pval.res = read.csv(file=paste(filenameRoot,"ERCC","Pvals.csv"),
                                header=TRUE)  
        }
        if(allPval == TRUE){
            pval.res = read.csv(file = paste0(filenameRoot, ".All.Pvals.csv"),
                                header = TRUE)
            pval.res <- pval.res[grep("ERCC-00",pval.res$Feature),]
        }else{
            cat(paste0("\n",filenameRoot," ERCC Pvals.csv file is missing."))
            cat("\nExiting LODR estimation...\n")
            return(exDat)
        }
    }
    if (kind == "Sim"){
        if(file.exists(paste(filenameRoot,"Sim Pvals.csv"))){
            pval.res<-read.csv(paste(filenameRoot,kind,"Pvals.csv"),header=TRUE)  
        }else{
            cat(paste0("\n",filenameRoot," Sim Pvals.csv is missing.",
                       "\nExiting LODR estimation...\n"))
            return(exDat)
        }  
    }
    
    
    names(pval.res)[1]= "Feature"
    
    #Check number of data points, throw warning if too few
    oneSet <- pval.res[which(pval.res$Fold == 1),]
    sizeP <- dim(oneSet)[1]
    if(!(sizeP > 5)) stop("Not enough data points for LODR estimation")
    
    cat(paste("\nEstimating",kind,"LODR\n"))
    #### Function used to estimate LODR and its uncertainty
    LODR<-function(pval,mn,cutoff,prob){
        cutoff<-log10(cutoff)
        fit<-locfit(log10(pval)~lp(log10(mn)),maxk=300)
        X<-preplot(fit,band="pred",newdata=log10(mn))
        
        ### See plot of fit
        #plot(fit,band="pred",get.data=TRUE,xlim=range(log10(mn)))
        
        find.mn<-function(mn,fit,cutoff,prob){
            X<-preplot(fit,newdata=mn,band="pred")
            (X$fit+qnorm(prob)*X$se.fit-cutoff)^2
        }
        
        rng.mn<-range(log10(mn))
        
        ### Search in sections to get first crossing 
        segmented.search<-function(fit){
            X<-preplot(fit,newdata=min(log10(mn)),band="pred")
            if((X$fit+qnorm(prob)*X$se.fit)<cutoff){
                t.lodr<-list(minimum=min(log10(mn)),objective=0)
            } else{
                ppp<-.2
                t.lodr<-optimize(find.mn,c(rng.mn[1],sum(rng.mn*c(1-ppp,ppp))),
                                 fit=fit,cutoff=cutoff,prob=prob)
                while(t.lodr$objective>.0001&ppp<=1){
                    t.lodr<-optimize(find.mn,c(sum(rng.mn*c(1-ppp+.2,ppp-.2)),
                                               sum(rng.mn*c(1-ppp,ppp))),
                                     fit=fit,cutoff=cutoff,prob=prob)
                    ppp<-ppp+.2
                }
            }
            t.lodr
        }    
        
        lodr<-segmented.search(fit)
        
        ### Bootstrap to estimate uncertainty
        lodr.boot<-NULL
        for(ii in 1:500){
            y.boot<-X$fit+sample(residuals(fit),length(mn))
            #if(ii %in%c(20*(1:5))){points(log10(mn),y.boot,col=j); j<-j+1}
            fit.boot<-locfit(y.boot~lp(log10(mn)),maxk=300)
            lodr.boot<-c(lodr.boot,segmented.search(fit.boot)$minimum)
            if (ii %% 100 == 0){
                cat ("...")
            }
        }
        return(c(lodr$objective,lodr$minimum,quantile(lodr.boot,c(.05,.95))))
    }
    
    
    #### Apply to loaded data ####
    lodr.resPlot<-NULL; set.seed(1)
    lodr.resLess<-NULL; set.seed(1)
    
    #lodr.res<-NULL; set.seed(1)
    lineDat <-NULL
    
    pval.res$Ratio = NA
    FCcode$Ratio <- as.factor(FCcode$Ratio)
    for (i in 1:nlevels(FCcode$Ratio)){
        pval.res$Ratio[which(pval.res$Fold == 
                                 FCcode$FC[i])]=as.character(FCcode$Ratio[i])
    }
    
    #pval.res$Ratio <- as.factor(pval.res$Ratio)
    pval.res$Ratio <- factor(pval.res$Ratio, 
                             levels=as.character(FCcode$Ratio))
    for(i in 1:nlevels(FCcode$Ratio)){ # Analyze fold change groups separately
        
        grp<-which(pval.res$Ratio==FCcode$Ratio[i])  ### identify group members
        
        x<-pval.res$MnSignal[grp][pval.res$Pval[grp]!=0] 
        y<-pval.res$Pval[grp][pval.res$Pval[grp]!=0]
        
        fit<-locfit(log10(y)~lp(log10(x)),maxk=300)
        #x.new<-log10(pval.res$MnSignal[grp])
        x.new<-seq(min(log10(x)),max(log10(x)),length.out=100)
        X<-preplot(fit,band="pred",newdata=x.new)
        
        x.new<-10^x.new
        fitLine = 10^(X$fit)
        fitUpper = 10^(X$fit+qnorm(prob)*X$se.fit)
        fitLower = 10^(X$fit-qnorm(prob)*X$se.fit)
        fitData = data.frame(x.new,fitLine,fitUpper,fitLower,
                             Ratio = FCcode$Ratio[i])
        lineDat = rbind(lineDat,fitData)
        
        # ### Estimate LODR
        #lodr.res<-rbind(lodr.res,c(abs(log2(FCcode$FC[i])),LODR(y,x,
        # cutoff=pval.cutoff,prob=prob)))
        
        ### Estimate LODR
        if(FCcode$FC[i]!=1){
            t.res<-LODR(y,x,cutoff=pval.cutoff,prob=prob)
            t.res[-1]<-signif(10^t.res[-1],2)
            if(t.res[1]>.01){
                t.res[2]<-Inf
                t.res[3:4]<-NA
            }
            t.resLess <- t.res
            t.resLess[-1][t.resLess[-1] == 
                              signif(min(x),2)]<-paste("<",
                                                       signif(min(x),2),sep="")
            t.res[-1][t.res[-1]==signif(min(x),2)] <- Inf
        
        if(FCcode$FC[i]==1){
            t.res<-rep(NA,4)
            t.resLess<-rep(NA,4)
        } 
        lodr.resPlot<-rbind(lodr.resPlot,c(round(abs(log2(FCcode$FC[i])),3),
                                           t.res))
        lodr.resLess <- rbind(lodr.resLess,c(round(abs(log2(FCcode$FC[i])),3),
                                             t.resLess))
        }
    }
    pval.res = subset(pval.res,pval.res$Pval != 0)
    
    colnames(lodr.resLess)[1:3]<-c("|log2(Fold)|","MinError","Estimate")
    
    colnames(lodr.resPlot)[1:3]<-c("Ratio","MinError","Estimate")
    colnames(lodr.resLess)[1:3]<-c("Ratio","MinError","Estimate")
    
    lodr.resPlot <- as.data.frame(lodr.resPlot)
    lodr.resLess <- as.data.frame(lodr.resLess)
   
    lodr.resPlot$Ratio <- as.character(legendLabels[-which(FCcode$FC == 1)])
    lodr.resLess$Ratio <- as.character(legendLabels[-which(FCcode$FC == 1)]) 
    annoTable <- lodr.resLess[-c(2)]
   
    colnames(annoTable) <- c("Ratio",expression("LODR Estimate"), 
                             expression("90% CI Lower Bound"), 
                             expression("90% CI Upper Bound"))
    #### fix this need to get rid of the "1:1" hard coding...
    #annoTable <- annoTable[-which(annoTable$Ratio == "1:1"),]
    ####
    cat("\n")
    print(annoTable, quote = FALSE, row.names = FALSE)
    
    arrowDat = data.frame(Ratio = lodr.resPlot$Ratio, 
                          x = lodr.resPlot[,3], y = pval.cutoff, 
                          xend = lodr.resPlot[,3], yend = 0)
    arrowDat$x[grep('<',lodr.resLess[,3])] <- Inf
    
    #arrowDat = arrowDat[-which(arrowDat$FC == "1"),]
   
    arrowDat = arrowDat[which(is.finite(arrowDat$x)),]
    
    if(dim(arrowDat)[1] == 0){
        cat(paste("\nWarning! Estimated distribution of p-values does not",
                  "cross threshold p-value,\n",
                  "may be due to insufficient data quantity\n",
                  "Consider adjusting FDR choice.\n"))
        #break
    }
    if(exDat$sampleInfo$datType == "array"){
        xlabDE <- xlab("Average Fluorescence Intensity")
        xrange <- c(min(pval.res$MnSignal),max(pval.res$MnSignal))
        legendPos <- theme(legend.justification=c(1,0), legend.position=c(1,0))
    }
    if(exDat$sampleInfo$datType == "count"){
        xlabDE <- xlab("Average Counts")
        xrange <- c(1,max(pval.res$MnSignal))
        legendPos <- theme(legend.justification=c(0,0), legend.position=c(0,0))
    }
    
    ## create inset table
#    ttLODR <- ttheme_default(core=list(bg_fun = rectGrob(gp=gpar(fill=NA))))
    
    my_table <- tableGrob(d=annoTable,rows=NULL)
#     my_table <- tableGrob(d=annoTable,
#                           show.rownames=FALSE,
#                           gpar.coretext =gpar(fontsize=14),
#                           gpar.coltext=gpar(fontsize=14),
#                           gpar.rowtext=gpar(fontsize=14),
#                           gpar.corefill = gpar(fill = "grey85", col = "white"), 
#                           gpar.rowfill = gpar(fill = "grey80", col = "white"),
#                           gpar.colfill = gpar(fill = "grey80", col = "white"))
    
    # Appease R CMD Check
    MnSignal <- Pval <- Ratio <- xend <- yend <- NULL
    
    if(dim(arrowDat)[1]!=0){
        LODRplot <- ggplot(pval.res, aes(x=MnSignal,
                                         y=Pval,colour=Ratio)) + 
            geom_point(size = 6) + 
            scale_x_log10(limits = xrange) + 
            scale_y_log10(breaks = c(1e-12,1e-10,1e-8,1e-5,1e-4,
                                     1e-3,1e-2,1e-1,1e0))+
            geom_ribbon(data = lineDat, aes(x = x.new, 
                                            y = fitLine, 
                                            ymin=fitLower, 
                                            ymax=fitUpper,
                                            fill = Ratio), alpha = 0.3,
                        colour=NA,show.legend=FALSE) + 
            geom_line(data = lineDat,aes(x = x.new,
                                         y=fitLine, 
                                         colour = Ratio),show.legend = FALSE) + 
            colScale + fillScale + xlabDE + ylab("DE Test P-values") + 
            geom_hline(yintercept = pval.cutoff, linetype = 2, size = 2 ) + 
            geom_segment(data = arrowDat, aes(x = x,
                                              y = y, 
                                              xend = xend,
                                              yend = yend, 
                                              colour = Ratio), 
                         lineend = "round", arrow=arrow(length=unit(0.5,"cm")), 
                         size = 2, alpha = 0.6) + theme_bw()+ legendPos  
    }else{
        LODRplot <- ggplot(pval.res, aes(x=MnSignal, y=Pval, colour=Ratio)) + 
            geom_point(size = 6) + 
            scale_x_log10(limits = xrange) + 
            scale_y_log10(breaks = c(1e-12,1e-10,1e-8,1e-5,1e-4,
                                     1e-3,1e-2,1e-1,1e0)) +
            geom_ribbon(data = lineDat, aes(x = x.new, 
                                            y = fitLine, 
                                            ymin=fitLower, 
                                            ymax=fitUpper,
                                            fill = Ratio), alpha = 0.3,
                        colour= NA,show.legend=FALSE) + 
            geom_line(data = lineDat,aes(x = x.new,
                                         y=fitLine, 
                                         colour = Ratio),show.legend = FALSE) + 
            colScale + fillScale + xlabDE + ylab("DE Test P-values") + 
            geom_hline(yintercept = pval.cutoff, linetype = 2, size = 2 ) +
            theme_bw()+ legendPos
    }

    annotLODRplot <- grid.arrange(arrangeGrob(grobs = list(LODRplot, my_table), 
                                               ncol = 1, heights = c(2,0.5)))
    dev.off()
    
    nam <- paste0("lodr",kind,"Plot")
    exDat$Figures$plotLODR <- annotLODRplot
    
    names(exDat$Figures)[which(names(exDat$Figures) == "plotLODR")] <- nam
    
    nam <- paste("lodr.res",kind,sep = ".")
    exDat$Results$lodr.res <- lodr.resLess
    names(exDat$Results)[which(names(exDat$Results) == "lodr.res")] <- nam
    
    return(exDat)
}
Any scripts or data that you put into this service are public.
erccdashboard documentation built on Nov. 8, 2020, 5:50 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
erccdashboard
Assess Differential Gene Expression Experiments with ERCC Controls

R/estLODR.R
In erccdashboard: Assess Differential Gene Expression Experiments with ERCC Controls

Defines functions estLODR

Documented in estLODR

Try the erccdashboard package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

erccdashboard Assess Differential Gene Expression Experiments with ERCC Controls

R/estLODR.R In erccdashboard: Assess Differential Gene Expression Experiments with ERCC Controls

Defines functions estLODR

Documented in estLODR

Try the erccdashboard package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

erccdashboard
Assess Differential Gene Expression Experiments with ERCC Controls

R/estLODR.R
In erccdashboard: Assess Differential Gene Expression Experiments with ERCC Controls