R/LRTests.R

Defines functions LRTests

Documented in LRTests

#' @include class_RegspliceData.R class_RegspliceResults.R
NULL




#' Calculate likelihood ratio tests.
#' 
#' Calculate likelihood ratio tests between fitted models and null models.
#' 
#' The regularized (lasso) fitted models contain an optimal subset of exon:condition 
#' interaction terms for each gene, and the "full" fitted models contain all 
#' exon:condition interaction terms. The null models contain zero interaction terms, so 
#' they are nested within the fitted models.
#' 
#' The likelihood ratio (LR) tests compare the fitted models against the nested null 
#' models.
#' 
#' If the regularized (lasso) model contains at least one exon:condition interaction 
#' term, the LR test compares the lasso model against the null model. However, if the 
#' lasso model contains zero interaction terms, then the lasso and null models are 
#' identical, so the LR test cannot be calculated. The \code{when_null_selected} argument
#' lets the user choose what to do in these cases: either set p-values equal to 1 
#' (\code{when_null_selected = "ones"}); or calculate a LR test using the "full" model 
#' containing all exon:condition interaction terms (\code{when_null_selected = "full"}), 
#' which reduces power due to the larger number of terms, but allows the evidence for 
#' differential exon usage among these genes to be distinguished. You can also return 
#' \code{NA}s for these genes (\code{when_null_selected = "NA"}).
#' 
#' The default option is \code{when_null_selected = "ones"}. This simply calls all these 
#' genes non-significant, which in most cases is sufficient since we are more interested 
#' in genes with strong evidence for differential exon usage. However, if it is important
#' to rank the low-evidence genes in your data set, use the \code{when_null_selected = 
#' "full"} option.
#' 
#' If \code{when_null_selected = "ones"} or \code{when_null_selected = "NA"}, the "full" 
#' fitted models are not required.
#' 
#' Previous step: Fit models with \code{\link{fitRegMultiple}}, 
#' \code{\link{fitNullMultiple}}, and \code{\link{fitFullMultiple}}.
#' Next step: Generate summary table of results with \code{\link{summaryTable}}.
#' 
#' 
#' @param rs_results \code{\linkS4class{RegspliceResults}} object containing results 
#'   generated by \code{\link{fitRegMultiple}}, \code{\link{fitNullMultiple}}, and 
#'   \code{\link{fitFullMultiple}}. If \code{when_null_selected = "ones" or "NA"}, the 
#'   "full" models are not required. See \code{\linkS4class{RegspliceResults}} for 
#'   details.
#' @param when_null_selected Which option to use for genes where the lasso model selects 
#'   zero interaction terms, i.e. identical to the null model. Options are \code{"ones"},
#'   \code{"full"}, and \code{"NA"}. Default is \code{"ones"}. See below for details.
#' 
#' 
#' @return Returns a \code{\linkS4class{RegspliceResults}} object containing results of 
#'   the LR tests. The results consist of the following entries for each gene:
#' \itemize{
#' \item p_vals: raw p-values
#' \item p_adj: multiple testing adjusted p-values (Benjamini-Hochberg false discovery 
#' rates, FDR)
#' \item LR_stats: likelihood ratio test statistics
#' \item df_tests: degrees of freedom of likelihood ratio tests
#' }
#' 
#' @seealso \code{\link{RegspliceResults}} \code{\link{initializeResults}} 
#'   \code{\link{fitRegMultiple}} \code{\link{fitNullMultiple}} 
#'   \code{\link{fitFullMultiple}} \code{\link{summaryTable}}
#' 
#' @importFrom stats pchisq p.adjust
#' 
#' @export
#' 
#' @examples
#' file_counts <- system.file("extdata/vignette_counts.txt", package = "regsplice")
#' data <- read.table(file_counts, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
#' head(data)
#' 
#' counts <- data[, 2:7]
#' tbl_exons <- table(sapply(strsplit(data$exon, ":"), function(s) s[[1]]))
#' gene_IDs <- names(tbl_exons)
#' n_exons <- unname(tbl_exons)
#' condition <- rep(c("untreated", "treated"), each = 3)
#' 
#' rs_data <- RegspliceData(counts, gene_IDs, n_exons, condition)
#' 
#' rs_data <- filterZeros(rs_data)
#' rs_data <- filterLowCounts(rs_data)
#' rs_data <- runNormalization(rs_data)
#' rs_data <- runVoom(rs_data)
#' 
#' rs_results <- initializeResults(rs_data)
#' 
#' rs_results <- fitRegMultiple(rs_results, rs_data)
#' rs_results <- fitNullMultiple(rs_results, rs_data)
#' rs_results <- fitFullMultiple(rs_results, rs_data)
#' 
#' rs_results <- LRTests(rs_results)
#' 
LRTests <- function(rs_results, when_null_selected = c("ones", "full", "NA")) {
  
  when_null_selected <- match.arg(when_null_selected)
  
  if (is.null(rs_results@fit_full_dev) & when_null_selected == "full") {
    stop("fitted 'full' models must be provided if when_null_selected = 'full'")
  }
  
  LR_stats <- abs(rs_results@fit_reg_dev - rs_results@fit_null_dev)
  df_tests <- abs(rs_results@fit_reg_df - rs_results@fit_null_df)
  
  # genes where lasso selected zero interaction terms (equivalent to null model); 
  # or NAs (where glmnet did not complete)
  ix_remove <- df_tests == 0 | is.na(df_tests)
  
  p_vals_keep <- stats::pchisq(LR_stats[!ix_remove], df_tests[!ix_remove], lower.tail = FALSE)
  
  p_vals <- p_adj <- rep(NA, length(rs_results@fit_reg_dev))
  
  if (when_null_selected == "ones") {
    p_vals[!ix_remove] <- p_vals_keep
    p_vals[ix_remove] <- 1
    
    # multiple testing adjustment for number of calculated p-values (i.e. non-ones)
    p_adj[!ix_remove] <- stats::p.adjust(p_vals_keep, method = "fdr")
    p_adj[ix_remove] <- 1
    
    LR_stats[ix_remove] <- NA
    df_tests[ix_remove] <- NA
    
  } else if (when_null_selected == "full") {
    LR_stats_full <- abs(rs_results@fit_full_dev - rs_results@fit_null_dev)
    df_tests_full <- abs(rs_results@fit_full_df - rs_results@fit_null_df)
    
    p_vals_full <- stats::pchisq(LR_stats_full, df_tests_full, lower.tail = FALSE)
    
    p_vals[!ix_remove] <- p_vals_keep
    p_vals[ix_remove] <- p_vals_full[ix_remove]
    
    # multiple testing adjustment for number of calculated p-values (i.e. all genes)
    p_adj <- stats::p.adjust(p_vals, method = "fdr")
    
    LR_stats[ix_remove] <- LR_stats_full[ix_remove]
    df_tests[ix_remove] <- df_tests_full[ix_remove]
    
  } else if (when_null_selected == "NA") {
    p_vals[!ix_remove] <- p_vals_keep
    
    # multiple testing adjustment for number of calculated p-values (i.e. non-NAs)
    p_adj[!ix_remove] <- stats::p.adjust(p_vals_keep, method = "fdr")
    
    LR_stats[ix_remove] <- NA
    df_tests[ix_remove] <- NA
  }
  
  rs_results@p_vals <- p_vals
  rs_results@p_adj <- p_adj
  rs_results@LR_stats <- LR_stats
  rs_results@df_tests <- df_tests
  
  rs_results
}
lmweber/regsplice documentation built on March 25, 2020, 2:07 p.m.