R/utils.R

Defines functions bquote.pval Summarize smartrbind read.tsv

Documented in Summarize

read.tsv=function(t,verbose=FALSE,stringsAsFactors=FALSE,...) {

  readit=function(file,...)
    if (requireNamespace("data.table",quietly = TRUE) && !grepl("\\.gz$",file)) {
      as.data.frame(data.table::fread(file = file,stringsAsFactors=stringsAsFactors,check.names=FALSE,data.table = FALSE,...))
    } else {
      utils::read.delim(file = file,stringsAsFactors=stringsAsFactors,check.names=FALSE,...)
    }

  if (suppressWarnings(requireNamespace("RCurl",quietly = TRUE)) && RCurl::url.exists(t)) {
    fn=gsub(".*/","",gsub("\\?.*","",t))
    fn1 = gsub("\\..*","",fn)
    ext=substr(fn,nchar(fn1)+1,nchar(fn))
    file <- tempfile(pattern = fn1,fileext = ext)
    if (verbose) cat(sprintf("Downloading file to %s...\n",file))
    utils::download.file(t, file, quiet=!verbose)
    if (verbose) cat("Reading file...\n")
    t=readit(file,...)
    if (verbose) cat("Deleting temporary file...\n")
    unlink(file)
  } else {
    if (verbose) cat("Reading file...\n")
    t=readit(t,...)
  }
  if (stringsAsFactors==TRUE) t=as.data.frame(lapply(t,function(c) if (is.character(c)) factor(c,levels=unique(c)) else c),check.names=FALSE)
  t
}

smartrbind=function(a,b) {
  # rbind data frames paying attention to columns and factor levels
  cd=NULL
  for (common in intersect(names(a),names(b))) {
    if(is.factor(a[[common]])) {
      r = c(as.character(a[[common]]),as.character(b[[common]]))
      oll=levels(b[[common]])
      if (is.null(oll)) oll = unique(b[[common]])
      r=factor(r,levels=union(levels(a[[common]]),oll))
    } else {
      r = c(a[[common]],b[[common]])
    }
    df=setNames(data.frame(r),common)
    cd=if (is.null(cd)) df else cbind(cd,df)
  }
  for (re.only in setdiff(names(a),names(b))) {
    r=c(a[[re.only]],rep(NA,nrow(b)))
    df=setNames(data.frame(r),re.only)
    cd=if (is.null(cd)) df else cbind(cd,df)
  }
  for (add.only in setdiff(names(b),names(a))) {
    r=c(rep(NA,nrow(a)),b[[add.only]])
    df=setNames(data.frame(r),add.only)
    cd=if (is.null(cd)) df else cbind(cd,df)
  }
  rownames(cd)=make.unique(c(rownames(a),rownames(b)))
  cd
}

#' Summarize a data matrix
#'
#' Helper function to work in conjunction with \link{GetMatrix} or similar to obtain a summarized matrix.
#'
#' @param mat the matrix to summarize
#' @param summarize.mat the matrix defining how to summarize (see details)
#'
#' @details
#' The summarize.mat can be obtained via \link{GetSummarizeMatrix}. If there are missing (NA) values in the matrix, they are imputed from the rest (average)
#'
#' @return the summarized matrix
#' @export
#'
#' @concept data
Summarize=function(mat,summarize.mat) {
  summarize.mat=summarize.mat[,colSums(summarize.mat!=0)>0,drop=FALSE]

  apply(summarize.mat,2,function(cc) {
    h=mat[,cc!=0,drop=FALSE]
    cc=cc[cc!=0]
    apply(h,1,function(v) { if (all(is.na(v))) NA else {v[is.na(v)] = mean(v,na.rm = TRUE); sum(v*cc)}})
  })
}



confint_nls=function (object, parm, level = 0.95, ...)
{
  cc <- coef(object)
  if (missing(parm))
    parm <- seq_along(cc)
  levs <- c((1 - level)/2, 0.5 + level/2)
  dfval <- (length(object$fvec) - length(object$par))
  tdist <- qt(levs[2], dfval)
  m1 <- outer(sqrt(diag(vcov(object))), c(-1, 1)) * tdist
  m2 <- sweep(m1, cc, MARGIN = 1, "+")
  colnames(m2) <- paste(100 * levs, "%", sep = "")
  m2[parm, ]
}

bquote.pval = function(p, digits = 2) {
  if (p<2.2E-16) bquote("p"~"<"~2.2 %*% 10^-16) else {
    s = sprintf("%.2g",p)
    if (grepl("e",s)) {
      s = gsub("e-0+","e-",s)
      v=as.numeric(strsplit(s,"e")[[1]])
      bquote("p"~"="~ .(v[1]) %*% 10^.(v[2]))
    } else bquote("p"~"="~ .(s))
  }
}

equal = function(a,b) length(a)==length(b) && all(a==b)

#' Defer calling a function
#'
#' This generates a function with one mandatory parameter (and additional optional parameters)
#' that, when called, (i) also receives the parameters given when calling \code{Defer}, and (ii)
#' after calling it each element of the \code{add} list is appended by \code{+}. When no optional parameters
#' are given, the result is cached.
#'
#' @param FUN the function to be deferred
#' @param ... additional parameters to be used when the deferred function is called
#' @param add list containing additional elements to be added \code{+} to the result of the deferred function
#' @param cache use caching mechanism
#' @param width.height a vector containing the desired width and height (not checked!)
#'
#' @return a function that can be called
#' @export
#'
#' @details The following expressions are very similar: \code{f <- function(d) Heavy.function(d)} and \code{f <- Defer(Heavy.function)}. In
#' both cases, you get a function \code{f} that you can call for some \code{d}, which in turn calls \code{Heavy.function}. The only
#' difference is that in the second case, the result is cached: \code{Heavy.function} is called only once when first calling \code{f},
#' if \code{f} is called a second time, the previous result is returned. This makes sense if the parameter \code{d} is constant (like a grandR object)
#' and if \code{Heavy.function} is deterministic.
#'
#' @details If additional parameters are provided to \code{f}, caching is disabled. If any of these additional parameters has the same name as the parameters
#' given to \code{Defer()}, the parameters given to \code{Defer()} are overwritten. Be careful if \code{Heavy.function} is not deterministic (see examples).
#'
#' @details Use case scenario: You want to produce a heatmap from a grandR object to be used as \code{plot.static} in the shiny web interface.
#' \code{\link{PlotHeatmap}} takes some time, and the resulting object is pretty large in memory. Saving the heatmap object to disk is very
#' inefficient (the Rdata file will be huge, especially with many heatmaps). Deferring the call without caching also is bad, because whenever
#' the user clicks onto the heatmap, it is regenerated.
#'
#' @examples
#' Heavy.function <- function(data) rnorm(5,mean=data)
#' f1=Defer(Heavy.function)
#' f2=function(d) Heavy.function(d)
#' f2(4)
#' f2(4) # these are not equal, as rnorm is called twice
#' f1(4)
#' f1(4) # these are equal, as the result of rnorm is cached
#'
#' @concept helper
Defer=function(FUN,...,add=NULL, cache=TRUE,width.height=NULL) {
  param=list(...)
  value=NULL
  re=function(data,...) {
    pp=list(...)
    if (length(pp)>0) {
      re=do.call(FUN,c(list(data),utils::modifyList(param,pp)))
      if (!is.null(add)) for (e in if (methods::is(add,"gg")) list(add) else add) re=re+e
      return(re)
    }

    if (is.null(value)) {
      value<<-do.call(FUN,c(list(data),param))
      if (!is.null(add)) for (e in if (methods::is(add,"gg")) list(add) else add) value<<-value+e
    }
    value
  }
  if (!is.null(width.height)) {
    attr(re,"width")=width.height[1]
    attr(re,"height")=width.height[2]
  }
  re
}

try.call.ignore.unused=function(FUN,...) {
    tryCatch({
      FUN(...)
    }, error = function(e) {
      l=list(...)
      keep = !sapply(names(l),function(n) n!="" && grepl(paste0(n," = "),e$message))
      l=l[keep]
      do.call(FUN,l)
    })
}


#' Convert a structure into a vector
#'
#' The structure is supposed to be a list. Flattening is done by extracting the given fields (\code{return.fields})
#' and applying the additional function (\code{return.extra}). This is mainly to be used within \code{sapply} and similar.
#'
#' @param d the data structure
#' @param return.fields which fields should be extracted directly (may be NULL)
#' @param return.extra apply a function returning a flat list or vector (may be NULL)
#'
#'
#' @return the data flattened into a vector
#' @export
#'
#' @examples
#' sars <- ReadGRAND(system.file("extdata", "sars.tsv.gz", package = "grandR"),
#'                   design=c("Condition",Design$dur.4sU,Design$Replicate))
#' sars <- Normalize(sars)
#' fit <- FitKineticsGeneLeastSquares(sars,"SRSF6")$Mock
#' print(fit)
#' kinetics2vector(fit)
#'
#' @concept helper
structure2vector=function(d,return.fields=NULL,return.extra=NULL) {
  r=list()
  if (!is.null(return.fields)) r=c(r,d[return.fields])
  if (!is.null(return.extra)) r=c(r,return.extra(d))
  unlist(r)
}
#' @describeIn structure2vector Convert the output of the FitKinetics methods into a vector
#' @param condition if the original grandR object had \code{\link{Condition}} set, which condition to extract (NULL otherwise)
#' @export
kinetics2vector=function(d,condition=NULL,return.fields=c("Synthesis","Half-life"),return.extra=NULL) structure2vector(if (is.null(condition)) d else d[[condition]],return.fields=return.fields,return.extra=return.extra)

my.precision=function (x)
{
  x <- unique(x)
  x <- x[is.finite(x)]
  if (length(x) <= 1) {
    return(1)
  }
  smallest_diff <- min(diff(sort(x)))
  if (smallest_diff < sqrt(.Machine$double.eps)) {
    1
  }
  else {
    pmin(10^(floor(log10(smallest_diff))), 1)
  }
}

sd.from.hessian=function(H) {
  m=-H
  keep=rep(T,nrow(m))
  while(sum(keep>0) && rcond(m[keep,keep,drop=FALSE])<.Machine$double.eps) {
    s=apply(abs(m),1,sum)
    keep=keep&s>min(s[keep])
  }
  d=rep(0,nrow(m))
  d[keep]=sqrt(diag(solve(m[keep,keep])))
  d
}


cnt=function(m) {
  m=as.matrix(m)
  mode(m) <- "integer"
  m
}

#' Estimate dispersion parameters for a count matrix using DESeq2
#'
#' @param ss the count matrix
#'
#' @return a vector of dispersion parameters (to be used as size=1/dispersion for Xnbinom functions)
#' @export
#'
#' @concept helper
estimate.dispersion=function(ss) {
  checkPackages("DESeq2")

  dds=DESeq2::DESeqDataSetFromMatrix(countData = cnt(ss),colData=data.frame(rep(1,ncol(ss))),design = ~1)
  dds=DESeq2::estimateSizeFactors(dds)
  dds=DESeq2::estimateDispersions(dds,quiet=TRUE)
  disp=DESeq2::dispersions(dds)
  disp[is.na(disp)]=0.1 # ss=0 0 0 ...
  setNames(disp,rownames(ss))
}

GetField=function(name,field,sep=".") sapply(strsplit(as.character(name),sep,fixed=TRUE),function(v) v[field])


#' Parallel (s/l)apply
#'
#' Depending on whether \link{SetParallel} has been called, execute in parallel or not.
#'
#' @param ... forwarded to lapply or parallel::mclapply
#' @param seed Seed for the random number generator
#' @param enforce if TRUE, do it parallelized no matter what IsParallel() says, if FALSE do it non-parallelized no matter what IsParallel() says
#'
#' @details If the code uses random number specify the seed to make it deterministic
#'
#' @return a vector (psapply) or list (plapply)
#' @export
#'
#' @concept helper
psapply=function(...,seed=NULL, enforce = NA) {simplify2array(plapply(...,seed=seed,enforce=enforce))}
#' @rdname psapply
#' @export
plapply=function(...,seed=NULL, enforce = NA) {

  parallel = if (is.na(enforce)) IsParallel() else enforce

  if (!parallel) return(lapply(...))

  if (!is.null(seed)) {
    rng=RNGkind()[1]
    RNGkind("L'Ecuyer-CMRG") # make it deterministic!
    set.seed(seed)
  }

  re=if (parallel) parallel::mclapply(...,mc.cores=opt$nworkers) else lapply(...)

  if (!is.null(seed)) {
   RNGkind(rng)
  }

  re
}


opt <- new.env()
#opt$lapply=function(...) lapply(...)
#opt$sapply=function(...) simplify2array(opt$lapply(...))
opt$nworkers=0
opt$singlemsg=list()

singleMessage=function(msg) {
  if (is.null(opt$singlemsg[[msg]])) {
    cat(sprintf("%s\n (This message is only shown once per session)\n",msg))
    opt$singlemsg[[msg]] = 1
  }
}


#' Set up parallel execution
#'
#' Set the number of cores for parallel execution.
#'
#' @param cores number of cores
#'
#' @details Whenever \link{psapply} or \link{plapply} are used, they are executed in parallel.
#'
#' @return No return value, called for side effects
#' @export
#'
#' @concept helper
SetParallel=function(cores=max(1,parallel::detectCores()-2)) {
  opt$nworkers=cores
  if (cores>1) {
    if (.Platform$OS.type!="unix") {
      warning("Parallelism is not supported under windows. Will use a single thread!")
      opt$nworkers=0
    } else {
      #opt$lapply<-function(...) parallel::mclapply(...,mc.cores=cores)
    }
  } else {
    #opt$lapply<-function(...) lapply(...)
  }
}

#' Checks for parallel execution
#'
#' @return whether or not parallelism is activated
#' @export
#' @concept helper
IsParallel=function() {opt$nworkers>1}


checkPackages=function(pp,error=TRUE,warn=TRUE) {
  missing = c();
  for (p in pp) {
    if (!suppressWarnings(requireNamespace(p,quietly = TRUE))) {
      missing = c(missing,p)
    }
  }
  if (length(missing)>0) {
    msg = sprintf("For this function, you need additional packages. Missing package%s: %s. Please install them e.g. using \n BiocManager::install(c('%s'))\n",
                  if (length(missing)>1) "s" else "",paste(missing,collapse=","),paste(missing,collapse="','"))
    if (error) stop(msg)
    if (warn) warning(msg)
    return(FALSE)
  }
  return(TRUE)
}

Try the grandR package in your browser

Any scripts or data that you put into this service are public.

grandR documentation built on April 4, 2025, 2:27 a.m.