R/dropout.R

Defines functions Plot4sUDropoutRank Plot4sUDropoutRankDeferAll Plot4sUDropoutDeferAll Plot4sUDropoutAll Plot4sUDropoutRankAll Estimate4sUDropoutPercentageForSample Estimate4sUDropoutPercentage Correct4sUDropoutHLSpline Correct4sUDropoutHLFactor ComputeSummaryStatistics Make4sUDropoutTable Findno4sUPairs

Documented in ComputeSummaryStatistics Correct4sUDropoutHLFactor Correct4sUDropoutHLSpline Estimate4sUDropoutPercentage Estimate4sUDropoutPercentageForSample Findno4sUPairs Plot4sUDropoutAll Plot4sUDropoutDeferAll Plot4sUDropoutRank Plot4sUDropoutRankAll Plot4sUDropoutRankDeferAll

#' Find equivalent no4sU samples for 4sU samples
#'
#' Identify all no4sU samples in the same condition, and return everything as a list to be used in
#' \link{Plot4sUDropout}, \link{Plot4sUDropoutRank}, \link{Plot4sUDropoutAll}, \link{Plot4sUDropoutRankAll}
#'
#' @param data a grandR object
#' @param paired.replicates pair replicates, i.e. only no4sU.A is found for 4sU.A
#' @param discard.no4sU do not report references for no4sU samples
#'
#' @return a named list containing, for each 4sU sample, a vector of equivalent no4sU samples
#' @export
#'
#' @seealso \link{Plot4sUDropout}, \link{Plot4sUDropoutRank}, \link{Plot4sUDropoutAll}, \link{Plot4sUDropoutRankAll}
#'
#' @examples
#' sars <- ReadGRAND(system.file("extdata", "sars.tsv.gz", package = "grandR"),
#'                   design=c("Condition",Design$dur.4sU,Design$Replicate))
#' Findno4sUPairs(sars)
#'
#' @concept dropout
Findno4sUPairs=function(data, paired.replicates=FALSE,discard.no4sU=TRUE) {
  # R CMD check guard for non-standard evaluation
  no4sU <- NULL

  grp=NULL
  if (paired.replicates && Design$Replicate %in% names(Coldata(data))) grp=c(grp,Design$Replicate)
  if (Design$Condition %in% names(Coldata(data))) grp=c(grp,Design$Condition)

  pairs=FindReferences(data,reference=no4sU,group = grp, as.list=TRUE)

  pairs=pairs[as.character(data$coldata$Name[!data$coldata$no4sU])]
  if (any(sapply(pairs,function(a) length(a)==0))) warning("There were samples without corresponding no4sU sample!")
  if (discard.no4sU) pairs=pairs[sapply(pairs,function(a) length(a)>0)]
  if (length(pairs)==0) stop("No no4sU pairs found!")
  pairs
}

Make4sUDropoutTable=function(data,w4sU,no4sU=Findno4sUPairs(data)[[w4sU]],transform=rank,ntr=w4sU,LFC.fun=lfc::PsiLFC,slot="count",rm.all.zero=TRUE,correction=1,...) {
  w=rowMeans(GetTable(data,type=slot,columns=w4sU))
  col=no4sU
  n=if (is.numeric(no4sU)) no4sU else rowMeans(GetTable(data,type=slot,columns=col))
  ntr=if (is.numeric(ntr)) ntr else apply(GetTable(data,"ntr",columns=ntr),1,mean,rm.na=TRUE)
  use=!is.na(w+n+ntr)
  w=w[use]
  n=n[use]
  ntr=ntr[use]

  f1=1/correction-1
  nw=w+f1*ntr*w
  ntr=(ntr*w+f1*ntr*w)/nw
  w=nw

  phl=transform(ntr,...)

  df=data.frame(`4sU`=w,`no4sU`=n,ntr=ntr,lfc=LFC.fun(w,n),covar=phl,check.names = FALSE)

  if (rm.all.zero) {
    df=df[df$`4sU`>0,]
  }

  df
}

#' Compute summary statistics
#'
#' Summary statistics are computed for all samples (or cells).
#'
#' @param data a grandR object
#' @param pairs a no4sU pairs list as generated by \link{Findno4sUPairs}
#' @param coldata if TRUE, add the coldata table
#' @param do.bootstrap if TRUE, also report standard errors of the 4sU dropout estimated via bootstrapping
#' @param seed the seed for the random number generator for bootstrapping
#'
#' @return a table of summary statistics including:
#' \itemize{
#'   \item{Mean LFC: the mean absolute log2 fold change of each sample vs the corresponding 4sU naive sample}
#'   \item{4sU dropout: the estimated 4sU dropout percentage}
#'   \item{p.conv: The 4sU incorporation frequency estimated by GRAND-SLAM}
#'   \item{Frction labelled: the global NTR}
#' }
#'
#' @concept dropout
#'
#' @export
#'
ComputeSummaryStatistics=function(data,pairs=Findno4sUPairs(data),coldata=FALSE,do.bootstrap=FALSE,seed=1337) {
  re=data.frame(Name=colnames(data))
  re$`Corresponding no4sU`=sapply(re$Name,function(n) paste(pairs[[n]],collapse=","))

  re$`Mean LFC`=NA
  for (n in names(pairs)) {
    re$`Mean LFC`[re$Name==n]=mean(abs(lfc::PsiLFC(rowSums(GetTable(data,type="count",columns=n)),rowSums(GetTable(data,type="count",columns=pairs[[n]])))))
  }

  l=Estimate4sUDropoutPercentage(data,pairs=pairs,bootstrap=FALSE)
  re$`4sU dropout`=l[colnames(data)]
  if (do.bootstrap) {
    l=apply(psapply(1:100,function(i) Estimate4sUDropoutPercentage(data,pairs=pairs,bootstrap=TRUE),seed=seed),1,sd)
    re$`4sU dropout.SE`=l[colnames(data)]
  }

  if (data$metadata$`GRAND-SLAM version`==3) {
    tab=GetTableQC(data,"model.parameters")
    tab=tab[tab$Label==tab$Label[1] & tab$Estimator==tab$Estimator[1],]
    for (sub in unique(tab$Subread)) {
      m=setNames(tab$`Binom p.conv`[tab$Subread==sub],tab$Condition[tab$Subread==sub])
      re[[paste("p.conv",sub)]]=m[colnames(data)]
    }
  } else {
    tab=GetTableQC(data,"mismatches",stop.if.not.exist=FALSE)
    if (!is.null(tab)) {
      strand=GetTableQC(data,"strandness",stop.if.not.exist=FALSE)$V1
      if (strand=="Antisense") {
        tab=tab[(tab$Orientation=="First" & tab$Genomic=="A" & tab$Read=="G")|(tab$Orientation=="Second" & tab$Genomic=="T" & tab$Read=="C"),]
      } else if (strand=="Sense") {
        tab=tab[(tab$Orientation=="First" & tab$Genomic=="T" & tab$Read=="C")|(tab$Orientation=="Second" & tab$Genomic=="A" & tab$Read=="G"),]
      } else {
        tab=tab[(tab$Genomic=="T" & tab$Read=="C")|(tab$Genomic=="A" & tab$Read=="G"),]
      }
      tab=tab[tab$Category=="Exonic",]
      tab=plyr::ddply(tab,c("Condition"),function(s) data.frame(Coverage=sum(s$Coverage),Mismatches=sum(s$Mismatches)))
      m=setNames(tab$Mismatches/tab$Coverage,tab$Condition)
      re$`raw conversions`=m[colnames(data)]
    }

        tab=GetTableQC(data,"rates",stop.if.not.exist=FALSE)
    if (!is.null(tab)) {
      m=unlist(tab[tab$Rate=="single_new",-1])
      re$`p.conv single`=m[colnames(data)]
      re$`p.conv single`[Coldata(data,"no4sU")]=NA
      m=unlist(tab[tab$Rate=="double_new",-1])
      if (any(m!=0)) {
        re$`p.conv double`=m[colnames(data)]
        re$`p.conv double`[Coldata(data,"no4sU")]=NA
      }
    }

  }

  re$`Fraction labeled`=colSums(GetTable(data,type='new.count',gene.info = FALSE,ntr.na = FALSE))/colSums(GetTable(data,type='count',gene.info = FALSE,ntr.na = FALSE))
  if (coldata) re=cbind(Coldata(data),re[,-1])
  re
}


#' Correct for 4sU dropout
#'
#' For several potential reasons, a sample specific percentage of reads from labelled RNA might be lost.
#' This can be corrected for by increasing the amount of labelled RNA (see details).
#'
#' @param data a grandR object
#' @param pairs a no4sU pairs list as generated by \link{Findno4sUPairs}
#' @param factors the 4sU dropout percentages
#' @param spline.df the degrees of freedom to be used for smoothing splines
#' @param ... further arguments to be passed to or from other methods.
#'
#' @details The factor based correction approach requires estimates of the 4sU dropout percentage d. Labelled RNA is multiplied by 1/(1-d),
#' and total count and NTRs are adapted accordingly. alpha and beta are also adapted such that their sum is maintained, but the mean of the
#' corresponding beta function is the new NTR. All other slots are treated to be expression estimates (and are adapted accordingly).
#'
#' @details The spline approach uses quantile regression to fit a smoothing spline to the 4sU dropout rank plot, which is then used to correct labelled RNA.
#'
#' @return a new grandR object that is corrected for 4sU dropout
#'
#' @seealso \link{Estimate4sUDropoutPercentage},\link{ComputeSummaryStatistics}
#'
#' @name correctdropout
#' @concept dropout
NULL
#> NULL


#' @rdname correctdropout
#' @export
Correct4sUDropoutHLFactor=function(data,pairs=Findno4sUPairs(data),factors=Estimate4sUDropoutPercentage(data,pairs=pairs,...),...) {
  n=if(is.matrix(factors)) ncol(factors) else length(factors)
  for (i in 1:n) {
    f=factors[i]/(1-factors[i])

    count=data$data$count[,names(pairs)[i]]
    ntr=data$data$ntr[,names(pairs)[i]]
    a=data$data$alpha[,names(pairs)[i]]
    b=data$data$beta[,names(pairs)[i]]
    data$data$count[,names(pairs)[i]] = count+f*count*ntr
    # assume all other tables are expression tables!
    for (n in setdiff(names(data$data),c("count","ntr","alpha","beta"))) data$data[[n]][,names(pairs)[i]] = data$data[[n]][,names(pairs)[i]]+f*data$data[[n]][,names(pairs)[i]]*ntr

    data$data$ntr[,names(pairs)[i]] = ifelse(ntr==0,0,(ntr*count+f*ntr*count)/(count+f*count*ntr))
    ntr=data$data$ntr[,names(pairs)[i]]
    data$data$alpha[,names(pairs)[i]]=ntr*(a+b-2)+1
    data$data$beta[,names(pairs)[i]]=a+b-1-ntr*(a+b-2)
  }
  data
}

#' @rdname correctdropout
#' @export
Correct4sUDropoutHLSpline=function(data,pairs=Findno4sUPairs(data),spline.df=15) {
  checkPackages("quantreg")

  set.seed(42)
  for (i in 1:length(pairs)) {
    df=Make4sUDropoutTable(data=data,w4sU=names(pairs)[i],no4sU=pairs[[i]],transform=rank,ties='random',rm.all.zero=FALSE)

    X <- model.matrix(lfc ~ splines::bs(covar, df=spline.df),data=df)
    fit <- quantreg::rq(lfc ~ splines::bs(covar, df=spline.df), data=df)
    lfc.fit <- X %*% fit$coef

    df$f=2^(-lfc.fit)
    df$f=df$f/min(df$f) # scale such that we have an increase for each gene!

    count=data$data$count[,names(pairs)[i]]
    ntr=data$data$ntr[,names(pairs)[i]]
    a=data$data$alpha[,names(pairs)[i]]
    b=data$data$beta[,names(pairs)[i]]

    #f1=(count*df$f-count)/(count*ntr)
    data$data$count[,names(pairs)[i]] = count*df$f #==count+f1*count*ntr
    for (n in setdiff(names(data$data),c("count","ntr","alpha","beta"))) data$data[[n]][,names(pairs)[i]] = data$data[[n]][,names(pairs)[i]]*df$f
    data$data$ntr[,names(pairs)[i]] = (ntr+df$f-1) / df$f #==(ntr*count+f1*ntr*count)/(count+f1*count*ntr)
    ntr=data$data$ntr[,names(pairs)[i]]
    data$data$alpha[,names(pairs)[i]]=ntr*(a+b-2)+1
    data$data$beta[,names(pairs)[i]]=a+b-1-ntr*(a+b-2)
  }
  data
}

#' Estimate 4sU dropout percentages
#'
#' For several potential reasons, a sample specific percentage of reads from labelled RNA might be lost.
#' This percentage can be estimated from data of this sample and an equivalent 4sU naive control (see details).
#'
#' @param data a grandR object
#' @param pairs a no4sU pairs list as generated by \link{Findno4sUPairs}
#' @param w4sU the name of a 4sU sample
#' @param no4sU the name(s) of equivalent no4sU sample(s)
#' @param ntr the name of a sample to take NTRs from (usually equal to w4sU)
#' @param LFC.fun function to compute log fold change (default: \link[lfc]{PsiLFC}, other viable option: \link[lfc]{NormLFC})
#' @param type one of "spearman","quantreg","linear" or "lowess" (see details)
#' @param bootstrap if TRUE, perform a single bootstrap sample  (by drawing genes with replacement)
#' @param ... further arguments to be passed to or from other methods.
#'
#' @details The percentage of 4sU dropout is estimated by numerical optimization of the factor f
#' that has to be multiplied with the NTR to mitigate the effect of 4sU dropout. The exact objective function depends on the type parameter:
#' \itemize{
#'   \item{spearman: f is estimated such that the spearman correlation coefficient of the log2 fold change 4sU/no4sU vs the ntr rank is 0}
#'   \item{quantreg: f is estimated such that the slope of a median regression with the the ntr rank as independent variable and the log2 fold change 4sU/no4sU as dependent variable is 0}
#'   \item{linear: f is estimated such that the slope of a linear regression with the the ntr rank as independent variable and the log2 fold change 4sU/no4sU as dependent variable is 0}
#'   \item{lowess: f is estimated by minimizing the sum-of-squares of the residuals from a lowess regression with the the ntr rank as independent variable and the log2 fold change 4sU/no4sU as dependent variable is 0}
#' }
#' Once f is computed the percentage of 4sU dropout is f/(f+1).
#'
#' @return the percentage of 4sU dropout for a single sample (Estimate4sUDropoutPercentageForSample) or all samples (Estimate4sUDropoutPercentage)
#'
#' @seealso \link{Correct4sUDropoutHLFactor},\link{ComputeSummaryStatistics}
#'
#' @name dropoutpercent
#' @concept dropout
NULL
#> NULL


#' @rdname dropoutpercent
#' @export
Estimate4sUDropoutPercentage=function(data,pairs=Findno4sUPairs(data),...) {
  sapply(names(pairs),function(n) Estimate4sUDropoutPercentageForSample(data,n,pairs[[n]],...))
}

#' @rdname dropoutpercent
#' @export
Estimate4sUDropoutPercentageForSample = function(data,w4sU,no4sU,ntr=w4sU,LFC.fun=lfc::PsiLFC, type=c("spearman","quantreg","linear","lowess"),bootstrap=FALSE) {
  df=Make4sUDropoutTable(data=data,w4sU=w4sU,no4sU=no4sU,transform=rank,ntr=ntr,LFC.fun=LFC.fun)

  if (bootstrap) df = df[sample.int(nrow(df),nrow(df),replace=TRUE),]

  if (type[1]=="spearman") {
    obj=function(f1) {
      df=data.frame(lfc = LFC.fun(df$`4sU`+df$`4sU`*df$ntr*f1, df$`no4sU`),covar=(df$`4sU`*df$ntr+df$`4sU`*df$ntr*f1)/(df$`4sU`+df$`4sU`*df$ntr*f1))
      cor(df$lfc,df$covar,method="spearman",use='c')
    }
    l=obj(0)
    r=obj(19)
    if (l*r>=0) {
      if (abs(l)<abs(r)) return(0)
      return(1)
    }
    f1=uniroot(obj,c(0,19))$root
  }
  else if (type[1]=="linear") {
    obj=function(f1) lm(lfc~covar,data=data.frame(lfc = LFC.fun(df$`4sU`+df$`4sU`*df$ntr*f1, df$`no4sU`), covar=(df$`4sU`*df$ntr+df$`4sU`*df$ntr*f1)/(df$`4sU`+df$`4sU`*df$ntr*f1)))$coeff[2]
    l=obj(0)
    r=obj(19)
    if (l*r>=0) {
      if (abs(l)<abs(r)) return(0)
      return(1)
    }
    f1=uniroot(obj,c(0,19))$root
  }
  else if (type[1]=="lowess") {
    obj=function(f1) sum(loess(lfc~covar,data=data.frame(lfc = LFC.fun(df$`4sU`+df$`4sU`*df$ntr*f1, df$`no4sU`), covar=(df$`4sU`*df$ntr+df$`4sU`*df$ntr*f1)/(df$`4sU`+df$`4sU`*df$ntr*f1)))$y^2)
    f1=optimize(obj,c(0,19))$minimum
  }
  else if (type[1]=="quantreg") {
    checkPackages("quantreg")
    obj=function(f1) quantreg::rq(lfc~covar,data=data.frame(lfc = LFC.fun(df$`4sU`+df$`4sU`*df$ntr*f1, df$`no4sU`), covar=(df$`4sU`*df$ntr+df$`4sU`*df$ntr*f1)/(df$`4sU`+df$`4sU`*df$ntr*f1)))$coeff[2]
    l=obj(0)
    r=obj(19)
    if (l*r>=0) {
      if (abs(l)<abs(r)) return(0)
      return(1)
    }
    f1=uniroot(obj,c(0,19))$root
  }

  f1/(f1+1)

}

#' Perform 4sU dropout tests
#'
#' Testing for RNA dropout of a 4sU sample is performed by comparing half-lives or NTR ranks against
#' the log2 fold change of the 4sU sample vs equivalent no4sU samples.
#'
#' @param data a grandR object
#' @param pairs a no4sU pairs list as generated by \link{Findno4sUPairs}
#' @param w4sU the name of a 4sU sample
#' @param no4sU the name(s) of equivalent no4sU sample(s)
#' @param ntr the name of a sample to take NTRs from (usually equal to w4sU)
#' @param ylim y axis limits
#' @param LFC.fun function to compute log fold change (default: \link[lfc]{PsiLFC}, other viable option: \link[lfc]{NormLFC})
#' @param slot the slot of the grandR object to take the data from; for \link[lfc]{PsiLFC}, this really should be "count"!
#' @param hl.quantile the half-life quantile to cut the plot
#' @param correction correction factor
#' @param label.corr add statistics as subtitle
#' @param return.corr instead of only the ggplot object, return a list with slots plot (what is normally returned) and label (the correlation statistics)
#' @param boxplot.bins how many boxplots for \code{Plot4sUDropoutRank}
#' @param title the main title for the plot
#' @param size the point size
#' @param hl if NULL, compute half-lives from the ntr column; otherwise, must be a vector containing half-lives
#' @param invert.ranks if TRUE, left to right on the plot is largest NTR to smallest NTR
#' @param color.by.ntr if true, compute the density colors along the ntr axis instead of globally
#' @param ... further arguments to be passed to or from other methods.
#'
#' @details The deferred versions are useful to be used in conjunction with \link{ServeGrandR} plot.static. Their implementation
#' make sure that they are lightweight, i.e. when saving the returned function to an Rdata file, the grandR object is not stored.
#'
#' @return either a ggplot object, a list of ggplot objects, or a list of deferred functions for plotting
#'
#' @seealso \link{Findno4sUPairs},\link{Defer}
#'
#' @name dropout
#' @concept dropout
NULL
#> NULL

#' @rdname dropout
#' @export
Plot4sUDropoutRankAll=function(data,pairs=Findno4sUPairs(data),...) {
  setNames(lapply(names(pairs),function(n) Plot4sUDropoutRank(data,n,pairs[[n]],...)),names(pairs))
}
#' @rdname dropout
#' @export
Plot4sUDropoutAll=function(data,pairs=Findno4sUPairs(data),...) {
  setNames(lapply(names(pairs),function(n) Plot4sUDropout(data,n,pairs[[n]],...)),names(pairs))
}

#' @rdname dropout
#' @export
Plot4sUDropoutDeferAll=function(data,pairs=NULL,...) {
  if (is.null(pairs)) pairs=Findno4sUPairs(data)
  rm(data)
  setNames(lapply(names(pairs),function(n) Defer(Plot4sUDropout,w4sU=n,no4sU=pairs[[n]],add=ggtitle(n),...)),names(pairs))
}
#' @rdname dropout
#' @export
Plot4sUDropoutRankDeferAll=function(data,pairs=NULL,...) {
  if (is.null(pairs)) pairs=Findno4sUPairs(data)
  rm(data)
  setNames(lapply(names(pairs),function(n) Defer(Plot4sUDropoutRank,w4sU=n,no4sU=pairs[[n]],add=ggtitle(n),...)),names(pairs))
}

#' @rdname dropout
#' @export
Plot4sUDropoutRank=function(data,w4sU,no4sU=Findno4sUPairs(data)[[w4sU]],ntr=w4sU,ylim=NULL,LFC.fun=lfc::PsiLFC,slot="count",correction=1,label.corr=TRUE,return.corr=FALSE,boxplot.bins=10,title=w4sU,size=1.5, invert.ranks=FALSE) {
  # R CMD check guard for non-standard evaluation
  covar <- lfc <- NULL

  t.fun=if (invert.ranks) function(v) length(v)-rank(v) else function(v) rank(v)
  df=Make4sUDropoutTable(data=data,w4sU=w4sU,no4sU=no4sU,transform=t.fun,ntr=ntr,LFC.fun=LFC.fun,slot=slot,correction=correction)
  if (is.null(ylim)) {
    d=max(abs(quantile(df$lfc,c(0.01,0.99))))*1.5
    ylim=c(-d,d)
  }
  rho=round(cor(df$covar,df$lfc,method="spearman"),digits = 2)
  p=cor.test(df$covar,df$lfc,method="spearman")$p.value
  p=bquote.pval(p)
  df$lfc=ifelse(df$lfc<ylim[1],-Inf,df$lfc)
  df$lfc=ifelse(df$lfc>ylim[2],+Inf,df$lfc)

  pointslayer=ggplot2::geom_point(alpha=1,size=size)
  if (!checkPackages("ggrastr",error = FALSE,warn = FALSE)) {
    singleMessage("Install the ggrastr package to get rasterized dropout plots!")
  } else {
    pointslayer = ggrastr::rasterize(pointslayer)
  }


  re=ggplot(df,aes(covar,lfc,color=density2d(covar, lfc, n = 100,margin = 'x')))+
    cowplot::theme_cowplot()+
    scale_color_viridis_c(name = "Density",guide='none')+
    pointslayer+
    geom_hline(yintercept=0)+
    #geom_smooth(method="loess",formula=y~x)+
    xlab("NTR rank")+ylab("log FC 4sU/no4sU")+
    coord_cartesian(ylim=ylim)

  lab=bquote(rho == .(rho) ~ "," ~ p ~ .(p))

  if (!is.na(boxplot.bins) && boxplot.bins>1) {
    bin=max(df$covar)/boxplot.bins
    df$cat=floor(pmax(df$covar-1,0)/bin)*bin+bin/2
    pp=kruskal.test(lfc~cat,data=df)$p.value
    pp=bquote.pval(pp)
    re=re+
      geom_boxplot(data=df,mapping=aes(x=cat,color=NULL,group=factor(cat)),color="black",fill=NA,outlier.shape = NA,size=1)
    lab=bquote(rho == .(rho) * "," ~ .(p) * ", Kruskall-Wallis" ~ .(pp))
  }

  re=re+ggtitle(title,subtitle = if (label.corr) lab)

  if (return.corr) list(plot=re,label=lab,df=df) else re
}

#' @rdname dropout
#' @export
Plot4sUDropout=function(data,w4sU,no4sU=Findno4sUPairs(data)[[w4sU]],ntr=w4sU,ylim=NULL,LFC.fun=lfc::PsiLFC,slot="count",hl.quantile=0.8,hl=NULL,correction=1,label.corr=FALSE,return.corr=FALSE,title=w4sU,size=1.5,color.by.ntr=FALSE) {
  # R CMD check guard for non-standard evaluation
  covar <- lfc <- NULL

  time=if(Design$dur.4sU %in% names(data$coldata)) data$coldata[if (is.numeric(ntr)) w4sU else ntr,Design$dur.4sU] else 1
  df=Make4sUDropoutTable(data=data,w4sU=w4sU,no4sU=no4sU,transform=function(x) comp.hl(x,time=time),ntr=ntr,LFC.fun=LFC.fun,slot=slot,correction=correction)
  if (is.null(hl)) hl=quantile(df$covar[is.finite(df$covar)],hl.quantile)
  if (sum(df$covar<hl & df$ntr<1 & df$ntr>0)<nrow(df)) warning(sprintf("Removed %d genes!",nrow(df)-sum(df$covar<hl & df$ntr<1 & df$ntr>0)))
  df=df[df$covar<hl & df$ntr<1 & df$ntr>0,]
  if (is.null(ylim)) {
    d=max(abs(quantile(df$lfc[is.finite(df$lfc)],c(0.01,0.99))))*1.5
    ylim=c(-d,d)
  }

  pointslayer=ggplot2::geom_point(alpha=1,size=size)
  if (!checkPackages("ggrastr",error = FALSE,warn = FALSE)) {
    singleMessage("Install the ggrastr package to get rasterized dropout plots!")
  } else {
    pointslayer = ggrastr::rasterize(pointslayer)
  }
  re=ggplot(df,aes(covar,lfc,color=density2d(covar, lfc, n = 100,margin = if (color.by.ntr) 'x' else 'n')))+
    cowplot::theme_cowplot()+
    scale_color_viridis_c(name = "Density",guide="none")+
    pointslayer+
    geom_hline(yintercept=0)+
    geom_smooth(method="loess",color='red')+
    xlab("RNA half-life [h]")+ylab("log FC 4sU/no4sU")+
    coord_cartesian(ylim=ylim)



  re=re+ggtitle(title,subtitle = if (label.corr) {
    rho=round(cor(df$covar,df$lfc,method="spearman"),digits = 2)
    p=cor.test(df$covar,df$lfc,method="spearman")$p.value
    p=bquote.pval(p)
    lab=bquote(rho == .(rho) ~ "," ~ .(p))
    lab
    })

  if (return.corr) list(plot=re,label=lab,df=df) else re
}

Try the grandR package in your browser

Any scripts or data that you put into this service are public.

grandR documentation built on April 4, 2025, 2:27 a.m.