R/fileutilities.R

Defines functions ct.buildSE ct.resultCheck ct.inputCheck dir.writable initOutDir

Documented in ct.buildSE ct.inputCheck ct.resultCheck dir.writable initOutDir

## -----------------------------------------------------------------------------
## File Utilities 


##' Initializes the output directory
##'
##' If outdir is NULL, then no directory is checked/created. This also implies
##' that creating plots is not possible.
##'
##' @param outdir character vector pointing to a directory to check/create
##' @return TRUE if the output directory was created, otherwise FALSE (it might
##' already exist).
##' @keywords internal
##' @author Steve Lianoglou, Russell Bainer
initOutDir <- function(outdir) {
  if (is.null(outdir)) {
    return(FALSE)
  }
  if (!is.character(outdir) && length(outdir) != 1) {
    stop("character required for `outdir`")
  }
  outdir.created <- FALSE
  if (dir.exists(outdir)) {
    if (!dir.writable(outdir)) {
      stop("Can't write to output directory: ", outdir)
    }
  } else {
    pdir <- dirname(outdir)
    if (!dir.exists(pdir)) {
      stop("Path to outdir does not exist: ", pdir)
    }
    if (!dir.writable(pdir)) {
      stop("Can't create output directory in: ", pdir)
    }
    dir.create(outdir)
    outdir.created <- TRUE
  }
  outdir.created
}


##' Checks that the directory provided is writable by the current user
##'
##' This works by testing to put a temporary file into an already existing
##' directory
##'
##' @param path The path to a directory to check.
##'
##' @return \code{logical}, \code{TRUE} if \code{path} is writable by the
##' current user, otherise \code{FALSE}
##' @keywords internal
##' @author Steve Lianoglou
dir.writable <- function(path) {
  if (!dir.exists(path)) {
    stop("The directory provided does not exist: ", path)
  }
  tmp.fn <- tempfile(tmpdir=path, fileext='.test.tmp')
  on.exit({
    if (file.exists(tmp.fn)) unlink(tmp.fn)
  })
  
  tryCatch({
    suppressWarnings(writeLines('test', tmp.fn))
    TRUE
  }, error=function(e) FALSE)
}


##' @title Check compatibility of a sample key with a supplied object
##' @description For many gCrisprTools functions, a sample key must be provided that specifies 
##' sample mapping to experimental groups and specifies which of these contains control samples. 
##' This function checks whether the specified sample key is of the proper format and has 
##' properties consistent matching the specified object. 
##' @param sampleKey A named factor, where the \code{levels} indicate the experimental replicate 
##' groups and the \code{names} match the \code{colnames} of the expression matrix contained in \code{object}. 
##' The first \code{level} should correspond to the control samples, but obviously there is no 
##' way to algorithmically control this. 
##' @param object An \code{ExpressionSet}, \code{EList}, or matrix.  
##' @return A logical indicating whether the objects are compatible.
##' @import limma
##' @author Russell Bainer
##' @examples data('es')
##' library(limma)
##' library(Biobase)
##' 
##' #Build the sample key
##' sk <- relevel(as.factor(pData(es)$TREATMENT_NAME), "ControlReference")
##' names(sk) <- row.names(pData(es))
##' ct.inputCheck(sk, es)
##' @export
ct.inputCheck <- function(sampleKey, object){
  
  #Check input formats
  if(!any(is(object, "ExpressionSet"), is(object, "EList"), is(object, "matrix"))){
    stop(paste(deparse(substitute(object)), "is not an ExpressionSet, Elist, or matrix. Class is: ", class(object)))
  }
  
  if(!(is.factor(sampleKey))){stop(paste(deparse(substitute(sampleKey)), "is not an ordered factor."))}
  if(is.null(names(sampleKey))){stop(paste(deparse(substitute(sampleKey)), "must have a names attribute, specifying the sample assignments in", deparse(substitute(object)), "."))}
  
  #Check to see if the names match properly
  if(methods::is(object, "EList")){
    dat <- object$E
  }else if(methods::is(object, "ExpressionSet")){
    dat <- exprs(object)
  }else{
    dat <- object
  }
  
  if(!setequal(colnames(dat), names(sampleKey))){stop(paste("The names of", deparse(substitute(sampleKey)), "must exactly match the colnames of the data contained in", deparse(substitute(object)), "."))}
  
  return(TRUE)  
}  


##' @title Determine whether a supplied object contains the results of a Pooled Screen
##' @description Many gCrisprTools functions operate on a \code{data.frame} of results
##' generated by a CRISPR screen. This function takes in a supplied object and returns 
##' a logical indicating whether the object can be treated as one of these data.frames 
##' for the purposes of downstream analyses. This is largely used internally, but can 
##' be useful if a user needs to build a result object for some reason.  
##' @param summaryDF A \code{data.frame}, usually returned by \code{ct.generateResults}. 
##' if you need to generate one of these by hand for some reason, see the example 
##' \code{resultsDF} object loaded in the example below. 
##' @return A logical indicating whether the object is of the appropriate format.
##' @author Russell Bainer
##' @examples data('resultsDF')
##' ct.resultCheck(resultsDF)
##' @export
ct.resultCheck <- function(summaryDF){
  
  #Check input formats
  if(!is.data.frame(summaryDF)){
    summaryDF <- as.data.frame(summaryDF, stringsAsFactors = FALSE)
    message('The supplied screen results are not a dataframe.')
    return(FALSE)
    }
   
  expectedNames <- c("geneID","geneSymbol","gRNA Log2 Fold Change","gRNA Depletion P",
                     "gRNA Depletion Q","gRNA Enrichment P","gRNA Enrichment Q", "Target-level Enrichment P",
                     "Target-level Enrichment Q", "Target-level Depletion P",  "Target-level Depletion Q",
                     "Median log2 Fold Change", "Rho_enrich", "Rho_deplete")
  
  if(!all(expectedNames %in% names(summaryDF))){
    missing <- setdiff(expectedNames, names(summaryDF))
    warning('The supplied result object seems to have some incorrect columns. I was expecting: ')
    print(missing)
    stop('Please supply a summaryDF object generated from ct.generateResults() in the gCrisprTools package.')
    return(FALSE)
  } 
  
  if(!setequal(vapply(summaryDF, class, character(1)), rep(c('character', 'numeric'), times = c(4,12)))){
    stop('Some of the columns in the supplied result object seem to be of the wrong type. Please supply a summaryDF object generated from ct.generateResults() in the gCrisprTools package.')
    return(FALSE) 
  } 
  
  return(TRUE)
}
  
##' @title Package Screen Data into a `SummarizedExperiment` Object 
##' @description Convenience function to package major components of a screen into a `SummarizedExperiment` container
##' for downstream visualization and analysis. All arguments are optional except for `es`. 
##' @param es An `ExpressionSet` of screen data. Required. 
##' @param sampleKey a gCrisprTools `sampleKey` object, to be added to the `colData`. 
##' @param ann Annotation object to be packaged into the `rowData`
##' @param vm A `voom`-derived normalized object
##' @param fit a `MArrayLM` object containing the contrast information and model results
##' @param summaryList A named list of \code{data.frame}s, returned by \code{ct.generateResults}. 
##' if you need to generate one of these by hand for some reason, see the example 
##' \code{resultsDF} object loaded in the example below. 
##' @return A `SummarizedExperiment` object. 
##' @importFrom SummarizedExperiment SummarizedExperiment
##' @author Russell Bainer
##' @examples 
##' data('ann', 'es', 'fit', 'resultsDF')
##' ct.buildSE(es, ann = ann, fit = 'fit', summaryList = list('resA' = resultsDF, 'resB' = resultsDF))
##' 
##' @export
ct.buildSE <- function(es, 
                       sampleKey = NULL, 
                       ann = NULL, 
                       vm = NULL,
                       fit = NULL, 
                       summaryList = NULL){

  stopifnot(methods::is(es, 'ExpressionSet'))
  
  asy <- list('counts' = exprs(es))
  met <- list()
  rd <- fData(es)
  cd <- pData(es)
  
  if(!is.null(sampleKey)){
    cd$sampleKey <- sampleKey[row.names(cd)]
 }
  
  if(!is.null(vm)){
    stopifnot(setequal(colnames(vm), colnames(es)), 
              is(vm, 'EList'), 
              setequal(row.names(vm), row.names(es)))
      asy['voom'] <- vm$E
      asy['weights'] <- vm$weights
      met['design'] <- vm$design
      
      newCols <- (ncol(cd) + 1):(ncol(cd) + ncol(vm$targets))
      cd <- cbind(cd, vm$targets[row.names(cd),])
      colnames(cd)[newCols] <- colnames(vm$targets) 
    }

  if(!is.null(fit)){
    met['fit'] <- fit
  }
  
  if(!is.null(summaryList)){
    if((!is(summaryList, 'list')) | (is.null(names(summaryList)))){
      stop('When supplied, results dataframes must be provided as a named list.')
      }
    invisible(lapply(summaryList, ct.resultCheck))
    met$results <- summaryList
  }
  
  if(!is.null(ann)){
    ann <- ct.prepareAnnotation(ann, es)
    rd <- cbind(rd, ann[row.names(rd),])
  }
 
  se <- SummarizedExperiment::SummarizedExperiment(assays = asy, 
                                                   rowData = rd, 
                                                   colData = cd, 
                                                   metadata = met)

  return(se)
}

Try the gCrisprTools package in your browser

Any scripts or data that you put into this service are public.

gCrisprTools documentation built on Nov. 8, 2020, 8:17 p.m.