R/class_dmDSprecision.R
In DRIMSeq: Differential transcript usage and tuQTL analyses with Dirichlet-multinomial model in RNA-seq

#' @include class_dmDSdata.R
NULL

################################################################################
### dmDSprecision class
################################################################################

#' dmDSprecision object
#' 
#' dmDSprecision extends the \code{\linkS4class{dmDSdata}} by adding the 
#' precision estimates of the Dirichlet-multinomial distribution used to model 
#' the feature (e.g., transcript, exon, exonic bin) counts for each gene in the 
#' differential usage analysis. Result of calling the \code{\link{dmPrecision}}
#' function.
#' 
#' @details Normally, in the differential analysis based on RNA-seq data, such 
#'   as, for example, differential gene expression, dispersion (of 
#'   negative-binomial model) is estimated. Here, we estimate precision of the 
#'   Dirichlet-multinomial model as it is more convenient computationally. To 
#'   obtain dispersion estimates, one can use a formula: dispersion = 1 / (1 + 
#'   precision).
#'   
#' @return
#' 
#' \itemize{ \item \code{mean_expression(x)}: Get a data frame with mean gene
#' expression. \item \code{common_precision(x), common_precision(x) <- value}:
#' Get or set common precision. \code{value} must be numeric of length 1. \item
#' \code{genewise_precision(x), genewise_precision(x) <- value}: Get a data
#' frame with gene-wise precision or set new gene-wise precision. \code{value}
#' must be a data frame with "gene_id" and "genewise_precision" columns. }
#' 
#' @param x,object dmDSprecision object.
#' @param value Values that replace current attributes.
#' @param ... Other parameters that can be defined by methods using this 
#'   generic.
#'   
#' @slot mean_expression Numeric vector of mean gene expression.
#' @slot common_precision Numeric value of estimated common precision.
#' @slot genewise_precision Numeric vector of estimated gene-wise precisions.
#' @slot design_precision Numeric matrix of the design used to estimate 
#'   precision.
#'   
#' @examples 
#' # --------------------------------------------------------------------------
#' # Create dmDSdata object 
#' # --------------------------------------------------------------------------
#' ## Get kallisto transcript counts from the 'PasillaTranscriptExpr' package
#' 
#' library(PasillaTranscriptExpr)
#' \donttest{
#' data_dir  <- system.file("extdata", package = "PasillaTranscriptExpr")
#' 
#' ## Load metadata
#' pasilla_metadata <- read.table(file.path(data_dir, "metadata.txt"), 
#' header = TRUE, as.is = TRUE)
#' 
#' ## Load counts
#' pasilla_counts <- read.table(file.path(data_dir, "counts.txt"), 
#' header = TRUE, as.is = TRUE)
#' 
#' ## Create a pasilla_samples data frame
#' pasilla_samples <- data.frame(sample_id = pasilla_metadata$SampleName, 
#'   group = pasilla_metadata$condition)
#' levels(pasilla_samples$group)
#' 
#' ## Create a dmDSdata object
#' d <- dmDSdata(counts = pasilla_counts, samples = pasilla_samples)
#' 
#' ## Use a subset of genes, which is defined in the following file
#' gene_id_subset <- readLines(file.path(data_dir, "gene_id_subset.txt"))
#' 
#' d <- d[names(d) %in% gene_id_subset, ]
#' 
#' # --------------------------------------------------------------------------
#' # Differential transcript usage analysis - simple two group comparison 
#' # --------------------------------------------------------------------------
#' 
#' ## Filtering
#' ## Check what is the minimal number of replicates per condition 
#' table(samples(d)$group)
#' 
#' d <- dmFilter(d, min_samps_gene_expr = 7, min_samps_feature_expr = 3,
#'   min_gene_expr = 10, min_feature_expr = 10)
#' 
#' plotData(d)
#' 
#' ## Create the design matrix
#' design_full <- model.matrix(~ group, data = samples(d))
#' 
#' ## To make the analysis reproducible
#' set.seed(123)
#' ## Calculate precision
#' d <- dmPrecision(d, design = design_full)
#' 
#' plotPrecision(d)
#' 
#' head(mean_expression(d))
#' common_precision(d)
#' head(genewise_precision(d))
#' }
#' @author Malgorzata Nowicka
#' @seealso \code{\linkS4class{dmDSdata}}, \code{\linkS4class{dmDSfit}}, 
#'   \code{\linkS4class{dmDStest}}
setClass("dmDSprecision", 
  contains = "dmDSdata",
  representation(mean_expression = "numeric", 
    common_precision = "numeric",
    genewise_precision = "numeric",
    design_precision = "matrix"))


# -----------------------------------------------------------------------------

setValidity("dmDSprecision", function(object){
  ## Has to return TRUE when valid object!
  out <- TRUE
  
  if(length(object@mean_expression) > 0){
    if(length(object@mean_expression) == length(object@counts)){
      if(all(names(object@mean_expression) == names(object@counts)))
        out <- TRUE
      else
        return(paste0("Different names of 'counts' and 'mean_expression'"))
    }
    else 
      return(paste0("Unequal length of 'counts' and 'mean_expression'"))
  }
  
  if(length(object@genewise_precision) > 0){
    if(length(object@genewise_precision) == length(object@counts)){
      if(all(names(object@genewise_precision) == names(object@counts)))
        out <- TRUE
      else
        return(paste0("Different names of 'counts' and 'genewise_precision'"))
    }
    else 
      return(paste0("Unequal length of 'counts' and 'genewise_precision'"))
  }
  
  if(length(object@common_precision) > 0){
    if(length(object@common_precision) == 1)
      out <- TRUE
    else
      return(paste0("'common_precision' must be a vector of length 1'"))
  }
  
  if(nrow(object@design_precision) == ncol(object@counts)){
    out <- TRUE
  }else{
    return(paste0("Number of rows in the design matrix must be equal 
          to the number of columns in counts"))
  }
  
  return(out)
  
})

################################################################################
### accessing methods
################################################################################

#' @rdname dmDSprecision-class
#' @param type Character indicating which design matrix should be returned.
#'   Possible values \code{"precision"}, \code{"full_model"} or
#'   \code{"null_model"}.
#' @export
setMethod("design", "dmDSprecision", function(object, type = "precision"){
  
  stopifnot(type %in% c("precision", "full_model", "null_model"))
  
  if(type == "precision")
    object@design_precision
  else
    NULL
  
})


#' @rdname dmDSprecision-class
#' @export
setGeneric("mean_expression", function(x, ...) 
  standardGeneric("mean_expression"))

#' @rdname dmDSprecision-class
#' @export
setMethod("mean_expression", "dmDSprecision", function(x){
  
  data.frame(gene_id = names(x@mean_expression), 
    mean_expression = x@mean_expression, 
    stringsAsFactors = FALSE, row.names = NULL)
  
})


#' @rdname dmDSprecision-class
#' @export
setGeneric("common_precision", function(x, ...) 
  standardGeneric("common_precision"))

#' @rdname dmDSprecision-class
#' @export
setMethod("common_precision", "dmDSprecision", function(x) 
  x@common_precision )


#' @rdname dmDSprecision-class
#' @export
setGeneric("common_precision<-", function(x, value) 
  standardGeneric("common_precision<-"))

#' @rdname dmDSprecision-class
#' @export
setMethod("common_precision<-", "dmDSprecision", function(x, value){
  ### value must be a numeric of length 1

  names(value) <- NULL
  
  return(new("dmDSprecision", mean_expression = x@mean_expression, 
    common_precision = value, genewise_precision = x@genewise_precision, 
    design_precision = x@design_precision,
    counts = x@counts, samples = x@samples))
  
})

#' @rdname dmDSprecision-class
#' @export
setGeneric("genewise_precision", function(x, ...) 
  standardGeneric("genewise_precision"))

#' @rdname dmDSprecision-class
#' @export
setMethod("genewise_precision", "dmDSprecision", function(x){
  
  data.frame(gene_id = names(x@genewise_precision), 
    genewise_precision = x@genewise_precision, stringsAsFactors = FALSE, 
    row.names = NULL)
  
})


#' @rdname dmDSprecision-class
#' @export
setGeneric("genewise_precision<-", function(x, value) 
  standardGeneric("genewise_precision<-"))


#' @rdname dmDSprecision-class
#' @export
setMethod("genewise_precision<-", "dmDSprecision", function(x, value){
  # value must be a data frame with gene_id and genewise_precision
  
  stopifnot(all(c("gene_id", "genewise_precision") %in% colnames(value)))
  stopifnot(all(names(x@counts) %in% value[,"gene_id"]))
  order <- match(names(x@counts), value[,"gene_id"])
  
  return(new("dmDSprecision", mean_expression = x@mean_expression, 
    common_precision = x@common_precision, 
    genewise_precision = value[order, "genewise_precision"],
    design_precision = x@design_precision,  
    counts = x@counts, samples = x@samples))
  
})



# -----------------------------------------------------------------------------


setMethod("show", "dmDSprecision", function(object){
  
  callNextMethod(object)
  
  cat("  design()\n")
  cat("  mean_expression(), common_precision(), genewise_precision()\n")

})


################################################################################
### dmPrecision
################################################################################

#' Estimate the precision parameter in the Dirichlet-multinomial model
#' 
#' Maximum likelihood estimates of the precision parameter in the 
#' Dirichlet-multinomial model used for the differential exon/transcript usage
#' or QTL analysis.
#' 
#' @details Normally, in the differential analysis based on RNA-seq data, such 
#'   as, for example, differential gene expression, dispersion (of 
#'   negative-binomial model) is estimated. Here, we estimate precision of the 
#'   Dirichlet-multinomial model as it is more convenient computationally. To 
#'   obtain dispersion estimates, one can use a formula: dispersion = 1 / (1 + 
#'   precision).
#'   
#' @param x \code{\linkS4class{dmDSdata}} or \code{\linkS4class{dmSQTLdata}} 
#'   object.
#' @param ... Other parameters that can be defined by methods using this 
#'   generic.
#' @export
setGeneric("dmPrecision", function(x, ...) standardGeneric("dmPrecision"))


# -----------------------------------------------------------------------------


#' @details Parameters that are used in the precision (dispersion = 1 / (1 + 
#'   precision)) estimation start with prefix \code{prec_}. Those that are used 
#'   for the proportion estimation in each group when the shortcut fitting 
#'   \code{one_way = TRUE} can be used start with \code{prop_}, and those that 
#'   are used in the regression framework start with \code{coef_}.
#'   
#'   There are two optimization methods implemented within dmPrecision: 
#'   \code{"optimize"} for the common precision and \code{"grid"} for the 
#'   gene-wise precision.
#'   
#'   Only part of the precision parameters in dmPrecision have an influence on
#'   a given optimization method. Here is a list of such active parameters:
#'   
#'   \code{"optimize"}:
#'   
#'   \itemize{ \item \code{prec_interval}: Passed as \code{interval}. \item 
#'   \code{prec_tol}: The accuracy defined as \code{tol}. }
#'   
#'   \code{"grid"}, which uses the grid approach from 
#'   \code{\link[edgeR]{estimateDisp}} in \code{\link{edgeR}}:
#'   
#'   \itemize{ \item \code{prec_init}, \code{prec_grid_length}, 
#'   \code{prec_grid_range}: Parameters used to construct the search grid 
#'   \code{prec_init * 2^seq(from = prec_grid_range[1]}, \code{to = 
#'   prec_grid_range[2]}, \code{length = prec_grid_length)}. \item 
#'   \code{prec_moderation}: Dipsersion shrinkage is available only with 
#'   \code{"grid"} method. \item \code{prec_prior_df}: Used only when precision
#'   shrinkage is activated. Moderated likelihood is equal to \code{loglik + 
#'   prec_prior_df * moderation}. Higher \code{prec_prior_df}, more shrinkage 
#'   toward common or trended precision is applied. \item \code{prec_span}: 
#'   Used only when precision moderation toward trend is activated. }
#'   
#' @param design Numeric matrix defining the model that should be used when 
#'   estimating precision. Normally this should be a full model design used 
#'   also in \code{\link{dmFit}}.
#' @param mean_expression Logical. Whether to estimate the mean expression of 
#'   genes.
#' @param common_precision Logical. Whether to estimate the common precision.
#' @param genewise_precision Logical. Whether to estimate the gene-wise 
#'   precision.
#' @param prec_adjust Logical. Whether to use the Cox-Reid adjusted or 
#'   non-adjusted profile likelihood.
#' @param one_way Logical. Should the shortcut fitting be used when the design 
#'   corresponds to multiple group comparison. This is a similar approach as in 
#'   \code{\link{edgeR}}. If \code{TRUE} (the default), then proportions are 
#'   fitted per group and regression coefficients are recalculated from those 
#'   fits.
#' @param prec_subset Value from 0 to 1 defining the percentage of genes used in
#'   common precision estimation. The default is 0.1, which uses 10% of 
#'   randomly selected genes to speed up the precision estimation process. Use 
#'   \code{set.seed} function to make the analysis reproducible. See Examples.
#' @param prec_interval Numeric vector of length 2 defining the interval of 
#'   possible values for the common precision.
#' @param prec_tol The desired accuracy when estimating common precision.
#' @param prec_init Initial precision. If \code{common_precision} is 
#'   \code{TRUE}, then \code{prec_init} is overwritten by common precision 
#'   estimate.
#' @param prec_grid_length Length of the search grid.
#' @param prec_grid_range Vector giving the limits of grid interval.
#' @param prec_moderation Precision moderation method. One can choose to shrink
#'   the precision estimates toward the common precision (\code{"common"}) or 
#'   toward the (precision versus mean expression) trend (\code{"trended"})
#' @param prec_prior_df Degree of moderation (shrinkage) in case when it can not
#'   be calculated automaticaly (number of genes on the upper boundary of grid 
#'   is smaller than 10). By default it is equal to 0.
#' @param prec_span Value from 0 to 1 defining the percentage of genes used in 
#'   smoothing sliding window when calculating the precision versus mean 
#'   expression trend.
#' @param prop_mode Optimization method used to estimate proportions. Possible 
#'   value \code{"constrOptim"}.
#' @param prop_tol The desired accuracy when estimating proportions.
#' @param coef_mode Optimization method used to estimate regression 
#'   coefficients. Possible value \code{"optim"}.
#' @param coef_tol The desired accuracy when estimating regression coefficients.
#' @param verbose Numeric. Definie the level of progress messages displayed. 0 -
#'   no messages, 1 - main messages, 2 - message for every gene fitting.
#' @param add_uniform Whether to add a small fractional count to zeros,
#' (adding a uniform random variable between 0 and 0.1).
#' This option allows for the fitting of genewise precision and coefficients
#' for genes with two features having all zero for one group, or the last
#' feature having all zero for one group.
#' @param BPPARAM Parallelization method used by 
#'   \code{\link[BiocParallel]{bplapply}}.
#'   
#' @return Returns a \code{\linkS4class{dmDSprecision}} or 
#'   \code{\linkS4class{dmSQTLprecision}} object.
#' @examples 
#' # --------------------------------------------------------------------------
#' # Create dmDSdata object 
#' # --------------------------------------------------------------------------
#' ## Get kallisto transcript counts from the 'PasillaTranscriptExpr' package
#' 
#' library(PasillaTranscriptExpr)
#' \donttest{
#' data_dir  <- system.file("extdata", package = "PasillaTranscriptExpr")
#' 
#' ## Load metadata
#' pasilla_metadata <- read.table(file.path(data_dir, "metadata.txt"), 
#' header = TRUE, as.is = TRUE)
#' 
#' ## Load counts
#' pasilla_counts <- read.table(file.path(data_dir, "counts.txt"), 
#' header = TRUE, as.is = TRUE)
#' 
#' ## Create a pasilla_samples data frame
#' pasilla_samples <- data.frame(sample_id = pasilla_metadata$SampleName, 
#'   group = pasilla_metadata$condition)
#' levels(pasilla_samples$group)
#' 
#' ## Create a dmDSdata object
#' d <- dmDSdata(counts = pasilla_counts, samples = pasilla_samples)
#' 
#' ## Use a subset of genes, which is defined in the following file
#' gene_id_subset <- readLines(file.path(data_dir, "gene_id_subset.txt"))
#' 
#' d <- d[names(d) %in% gene_id_subset, ]
#' 
#' # --------------------------------------------------------------------------
#' # Differential transcript usage analysis - simple two group comparison 
#' # --------------------------------------------------------------------------
#' 
#' ## Filtering
#' ## Check what is the minimal number of replicates per condition 
#' table(samples(d)$group)
#' 
#' d <- dmFilter(d, min_samps_gene_expr = 7, min_samps_feature_expr = 3,
#'   min_gene_expr = 10, min_feature_expr = 10)
#' 
#' plotData(d)
#' 
#' ## Create the design matrix
#' design_full <- model.matrix(~ group, data = samples(d))
#' 
#' ## To make the analysis reproducible
#' set.seed(123)
#' ## Calculate precision
#' d <- dmPrecision(d, design = design_full)
#' 
#' plotPrecision(d)
#' 
#' head(mean_expression(d))
#' common_precision(d)
#' head(genewise_precision(d))
#' }
#' @seealso \code{\link{plotPrecision}} \code{\link[edgeR]{estimateDisp}}
#' @author Malgorzata Nowicka
#' @references McCarthy, DJ, Chen, Y, Smyth, GK (2012). Differential expression
#' analysis of multifactor RNA-Seq experiments with respect to biological
#' variation. Nucleic Acids Research 40, 4288-4297.
#' @rdname dmPrecision
#' @importFrom limma nonEstimable
#' @importFrom stats runif
#' @export
setMethod("dmPrecision", "dmDSdata", function(x, design, 
  mean_expression = TRUE, common_precision = TRUE, genewise_precision = TRUE,
  prec_adjust = TRUE, prec_subset = 0.1, 
  prec_interval = c(0, 1e+3), prec_tol = 1e+01, 
  prec_init = 100, prec_grid_length = 21, prec_grid_range = c(-10, 10), 
  prec_moderation = "trended", prec_prior_df = 0, prec_span = 0.1, 
  one_way = TRUE, 
  prop_mode = "constrOptim", prop_tol = 1e-12, 
  coef_mode = "optim", coef_tol = 1e-12,
  verbose = 0,
  add_uniform = FALSE,
  BPPARAM = BiocParallel::SerialParam()){
  
  # Check design as in edgeR
  design <- as.matrix(design)
  stopifnot(nrow(design) == ncol(x@counts))
  
  ne <- limma::nonEstimable(design)
  if(!is.null(ne)) 
    stop(paste("Design matrix not of full rank. 
      The following coefficients not estimable:\n", paste(ne, collapse = " ")))
  
  # Check other parameters
  stopifnot(is.logical(mean_expression))
  stopifnot(is.logical(common_precision))
  stopifnot(is.logical(genewise_precision))
  stopifnot(is.logical(prec_adjust))
  stopifnot(length(prec_subset) == 1)
  stopifnot(is.numeric(prec_subset) && prec_subset > 0 && prec_subset <= 1)
  stopifnot(length(prec_interval) == 2)
  stopifnot(prec_interval[1] < prec_interval[2])
  stopifnot(length(prec_tol) == 1)
  stopifnot(is.numeric(prec_tol) && prec_tol > 0)
  stopifnot(length(prec_init) == 1)
  stopifnot(is.numeric(prec_init))
  stopifnot(prec_grid_length > 2)
  stopifnot(length(prec_grid_range) == 2)
  stopifnot(prec_grid_range[1] < prec_grid_range[2])
  stopifnot(length(prec_moderation) == 1)
  stopifnot(prec_moderation %in% c("none", "common", "trended"))
  stopifnot(length(prec_prior_df) == 1)
  stopifnot(is.numeric(prec_prior_df) && prec_prior_df >= 0)
  stopifnot(length(prec_span) == 1)
  stopifnot(is.numeric(prec_span) && prec_span > 0 && prec_span < 1)
  
  stopifnot(is.logical(one_way))
  stopifnot(length(prop_mode) == 1)
  stopifnot(prop_mode %in% c("constrOptim"))
  stopifnot(length(prop_tol) == 1)
  stopifnot(is.numeric(prop_tol) && prop_tol > 0)
  
  stopifnot(length(coef_mode) == 1)
  stopifnot(coef_mode %in% c("optim", "nlminb", "nlm"))
  stopifnot(length(coef_tol) == 1)
  stopifnot(is.numeric(coef_tol) && coef_tol > 0)
  
  stopifnot(verbose %in% 0:2)
  
  if(mean_expression || (genewise_precision && 
      prec_moderation == "trended")){
    mean_expression <- dm_estimateMeanExpression(counts = x@counts, 
      verbose = verbose)
  }else{
    mean_expression <- numeric()
  }
  
  if(common_precision){
    
    if(prec_subset < 1){
      
      message(paste0("! Using a subset of ", prec_subset, 
        " genes to estimate common precision !\n"))
      
      genes2keep <- sample(1:length(x@counts), 
        max(round(prec_subset * length(x@counts)), 1), replace = FALSE)
      
    }else{
      genes2keep <- 1:length(x@counts)
    }
    
    common_precision <- dmDS_estimateCommonPrecision(
      counts = x@counts[genes2keep, ], 
      design = design, prec_adjust = prec_adjust, 
      prec_interval = prec_interval, prec_tol = prec_tol,
      one_way = one_way,
      prop_mode = prop_mode, prop_tol = prop_tol, 
      coef_mode = coef_mode, coef_tol = coef_tol,
      verbose = verbose, BPPARAM = BPPARAM)
    
  }else{
    common_precision <- numeric()
  }
  
  if(genewise_precision){
    
    if(length(common_precision) == 1){
      message("! Using common_precision = ", round(common_precision, 4), 
        " as prec_init !\n")
      prec_init <- common_precision
    }

    # add random small fractional counts to zeros
    counts <- if (add_uniform) addUniform(x@counts) else x@counts

    genewise_precision <- dmDS_estimateTagwisePrecision(counts = counts, 
      design = design, mean_expression = mean_expression, 
      prec_adjust = prec_adjust, prec_init = prec_init, 
      prec_grid_length = prec_grid_length, prec_grid_range = prec_grid_range, 
      prec_moderation = prec_moderation, 
      prec_prior_df = prec_prior_df, prec_span = prec_span, 
      one_way = one_way,
      prop_mode = prop_mode, prop_tol = prop_tol, 
      coef_mode = coef_mode, coef_tol = coef_tol,
      verbose = verbose, BPPARAM = BPPARAM)
    
  }else{
    genewise_precision <- numeric()
  }
  
  return(new("dmDSprecision", mean_expression = mean_expression, 
    common_precision = common_precision, 
    genewise_precision = genewise_precision, 
    design_precision = design,
    counts = x@counts, samples = x@samples))
  
})


################################################################################
### plotPrecision
################################################################################

#' Precision versus mean expression plot
#' 
#' @return Normally in the differential analysis based on RNA-seq data, such 
#'   plot has dispersion parameter plotted on the y-axis. Here, the y-axis 
#'   represents precision since in the Dirichlet-multinomial model this is the 
#'   parameter that is directly estimated. It is important to keep in mind that
#'   the precision parameter (gamma0) is inverse proportional to dispersion 
#'   (theta): theta = 1 / (1 + gamma0). In RNA-seq data, we can typically
#'   observe a trend where the dispersion decreases (here, precision increases)
#'   for genes with higher mean expression.
#'   
#' @param x \code{\linkS4class{dmDSprecision}} or 
#'   \code{\linkS4class{dmSQTLprecision}} object.
#' @param ... Other parameters that can be defined by methods using this 
#'   generic.
#' @export
setGeneric("plotPrecision", function(x, ...) standardGeneric("plotPrecision"))


# -----------------------------------------------------------------------------


#' @inheritParams plotData
#' @examples 
#' # --------------------------------------------------------------------------
#' # Create dmDSdata object 
#' # --------------------------------------------------------------------------
#' ## Get kallisto transcript counts from the 'PasillaTranscriptExpr' package
#' 
#' library(PasillaTranscriptExpr)
#' \donttest{
#' data_dir  <- system.file("extdata", package = "PasillaTranscriptExpr")
#' 
#' ## Load metadata
#' pasilla_metadata <- read.table(file.path(data_dir, "metadata.txt"), 
#' header = TRUE, as.is = TRUE)
#' 
#' ## Load counts
#' pasilla_counts <- read.table(file.path(data_dir, "counts.txt"), 
#' header = TRUE, as.is = TRUE)
#' 
#' ## Create a pasilla_samples data frame
#' pasilla_samples <- data.frame(sample_id = pasilla_metadata$SampleName, 
#'   group = pasilla_metadata$condition)
#' levels(pasilla_samples$group)
#' 
#' ## Create a dmDSdata object
#' d <- dmDSdata(counts = pasilla_counts, samples = pasilla_samples)
#' 
#' ## Use a subset of genes, which is defined in the following file
#' gene_id_subset <- readLines(file.path(data_dir, "gene_id_subset.txt"))
#' 
#' d <- d[names(d) %in% gene_id_subset, ]
#' 
#' # --------------------------------------------------------------------------
#' # Differential transcript usage analysis - simple two group comparison 
#' # --------------------------------------------------------------------------
#' 
#' ## Filtering
#' ## Check what is the minimal number of replicates per condition 
#' table(samples(d)$group)
#' 
#' d <- dmFilter(d, min_samps_gene_expr = 7, min_samps_feature_expr = 3,
#'   min_gene_expr = 10, min_feature_expr = 10)
#' 
#' plotData(d)
#' 
#' ## Create the design matrix
#' design_full <- model.matrix(~ group, data = samples(d))
#' 
#' ## To make the analysis reproducible
#' set.seed(123)
#' ## Calculate precision
#' d <- dmPrecision(d, design = design_full)
#' 
#' plotPrecision(d)
#' 
#' head(mean_expression(d))
#' common_precision(d)
#' head(genewise_precision(d))
#' }
#' @author Malgorzata Nowicka
#' @seealso \code{\link{plotData}}, \code{\link{plotProportions}},
#'   \code{\link{plotPValues}}
#'   
#' @rdname plotPrecision
#' @export
setMethod("plotPrecision", "dmDSprecision", function(x){
  
  if(!length(x@genewise_precision) == length(x@counts))
    stop("Genewise precision must be estimated for each gene!")
  if(!length(x@genewise_precision) == length(x@mean_expression))
    stop("Mean expression must be estimated for each gene!")
  
  if(length(x@common_precision) == 0){
    common_precision <- NULL
  }else{
    common_precision <- x@common_precision
  }
  
  ggp <- dm_plotPrecision(genewise_precision = x@genewise_precision, 
    mean_expression = x@mean_expression, nr_features = elementNROWS(x@counts), 
    common_precision = common_precision)
  
  return(ggp)  
  
})

# function to add small fraction of counts to zeros
addUniform <- function(counts, uniform_max=0.1) {
  counts_new <- lapply(seq_along(counts), function(g) {
    expr <- counts[[g]]
    zeros <- expr == 0
    expr[zeros] <- runif(sum(zeros), 0, uniform_max)
    expr
  })
  names(counts_new) <- names(counts)
  MatrixList(counts_new)
}
Any scripts or data that you put into this service are public.
DRIMSeq documentation built on Nov. 8, 2020, 8:25 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
DRIMSeq
Differential transcript usage and tuQTL analyses with Dirichlet-multinomial model in RNA-seq

R/class_dmDSprecision.R
In DRIMSeq: Differential transcript usage and tuQTL analyses with Dirichlet-multinomial model in RNA-seq

Defines functions addUniform

Try the DRIMSeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DRIMSeq Differential transcript usage and tuQTL analyses with Dirichlet-multinomial model in RNA-seq

R/class_dmDSprecision.R In DRIMSeq: Differential transcript usage and tuQTL analyses with Dirichlet-multinomial model in RNA-seq

Defines functions addUniform

Try the DRIMSeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DRIMSeq
Differential transcript usage and tuQTL analyses with Dirichlet-multinomial model in RNA-seq

R/class_dmDSprecision.R
In DRIMSeq: Differential transcript usage and tuQTL analyses with Dirichlet-multinomial model in RNA-seq