## de_edger.r: Standardize inputs/outputs for differential expression analysis
## with EdgeR. The caveats and ideas here are basically identical to de_deseq.r
#' Set up a model matrix and set of contrasts to do pairwise comparisons using EdgeR.
#'
#' This function performs the set of possible pairwise comparisons using EdgeR.
#'
#' Like the other _pairwise() functions, this attempts to perform all pairwise
#' contrasts in the provided data set. The details are of course slightly
#' different when using EdgeR. Thus, this uses the function
#' choose_binom_dataset() to try to ensure that the incoming data is appropriate
#' for EdgeR (if one normalized the data, it will attempt to revert to raw
#' counts, for example). It continues on to extract the conditions and batches
#' in the data, choose an appropriate experimental model, and run the EdgeR
#' analyses as described in the manual. It defaults to using an experimental
#' batch factor, but will accept a string like 'sva' instead, in which case it
#' will use sva to estimate the surrogates, and append them to the experimental
#' design. The edger_method parameter may be used to apply different EdgeR code
#' paths as outlined in the manual. If you want to play with non-standard data,
#' the force argument will round the data and shoe-horn it into EdgeR.
#'
#' @param input Dataframe/vector or expt class containing data, normalization
#' state, etc.
#' @param conditions Factor of conditions in the experiment.
#' @param batches Factor of batches in the experiment.
#' @param model_cond Include condition in the experimental model?
#' @param model_batch Include batch in the model? In most cases this is a good
#' thing(tm).
#' @param model_intercept Use an intercept containing model?
#' @param alt_model Alternate experimental model to use?
#' @param extra_contrasts Add some extra contrasts to add to the list of
#' pairwise contrasts. This can be pretty neat, lets say one has conditions
#' A,B,C,D,E and wants to do (C/B)/A and (E/D)/A or (E/D)/(C/B) then use this
#' with a string like: "c_vs_b_ctrla = (C-B)-A, e_vs_d_ctrla = (E-D)-A,
#' de_vs_cb = (E-D)-(C-B),"
#' @param annot_df Annotation information to the data tables?
#' @param force Force edgeR to accept inputs which it should not have to deal with.
#' @param keepers Ask for a specific set of contrasts instead of all.
#' @param edger_method I found a couple/few ways of doing edger in the manual,
#' choose with this.
#' @param ... The elipsis parameter is fed to write_edger() at the end.
#' @return List including the following information:
#' contrasts = The string representation of the contrasts performed.
#' lrt = A list of the results from calling glmLRT(), one for each contrast.
#' contrast_list = The list of each call to makeContrasts()
#' I do this to avoid running into the limit on # of contrasts addressable by topTags()
#' all_tables = a list of tables for the contrasts performed.
#' @seealso [edgeR] [deseq_pairwise()] [ebseq_pairwise()] [limma_pairwise()] [basic_pairwise()]
#' DOI:10.12688/f1000research.8987.2
#' @examples
#' \dontrun{
#' expt <- create_expt(metadata = "metadata.xlsx", gene_info = annotations)
#' pretend <- edger_pairwise(expt, model_batch = "sva")
#' }
#' @export
edger_pairwise <- function(input = NULL, conditions = NULL,
batches = NULL, model_cond = TRUE,
model_batch = TRUE, model_intercept = FALSE,
alt_model = NULL, extra_contrasts = NULL,
annot_df = NULL, force = FALSE, keepers = NULL,
edger_method = "long", ...) {
arglist <- list(...)
edger_test <- "lrt"
if (!is.null(arglist[["edger_test"]])) {
edger_test <- arglist[["edger_test"]]
}
message("Starting edgeR pairwise comparisons.")
input <- sanitize_expt(input)
input_data <- choose_binom_dataset(input, force = force)
design <- pData(input)
conditions <- design[["condition"]]
conditions_table <- table(conditions)
batches <- design[["batch"]]
batches_table <- table(batches)
data <- input_data[["data"]]
conditions <- as.factor(conditions)
batches <- as.factor(batches)
model_choice <- choose_model(input, conditions = conditions,
batches = batches,
model_batch = model_batch,
model_cond = model_cond,
model_intercept = model_intercept,
alt_model = alt_model,
...)
##model_choice <- choose_model(input, conditions, batches,
## model_batch = model_batch,
## model_cond = model_cond,
## model_intercept = model_intercept,
## alt_model = alt_model)
model_including <- model_choice[["including"]]
if (class(model_choice[["model_batch"]])[1] == "matrix") {
model_batch <- model_choice[["model_batch"]]
}
model_data <- model_choice[["chosen_model"]]
model_string <- model_choice[["chosen_string"]]
## I have a strong sense that the most recent version of edgeR changed its
## dispersion estimate code. Here is a note from the user's guide, which may
## have been there previously and I merely did not notice: To estimate common
## dispersion, trended dispersions and tagwise dispersions in one run
## y <- estimateDisp(y, design)
## raw <- edgeR::DGEList(counts = data, group = conditions)
## norm <- edgeR::calcNormFactors(raw)
norm <- import_edger(data, conditions, tximport = input[["tximport"]][["raw"]])
message("EdgeR step 1/9: Importing and normalizing data.")
final_norm <- NULL
if (edger_method == "short") {
message("EdgeR steps 2 through 6/9: All in one!")
final_norm <- edgeR::estimateDisp(norm, design = model_data)
} else {
state <- TRUE
message("EdgeR step 2/9: Estimating the common dispersion.")
disp_norm <- try(edgeR::estimateCommonDisp(norm))
if (class(disp_norm)[1] == "try-error") {
warning("estimateCommonDisp() failed. Trying again with estimateDisp().")
state <- FALSE
}
if (isTRUE(state)) {
message("EdgeR step 3/9: Estimating dispersion across genes.")
tagdisp_norm <- try(edgeR::estimateTagwiseDisp(disp_norm))
if (class(tagdisp_norm)[1] == "try-error") {
warning("estimateTagwiseDisp() failed. Trying again with estimateDisp().")
state <- FALSE
}
}
if (isTRUE(state)) {
message("EdgeR step 4/9: Estimating GLM Common dispersion.")
glm_norm <- try(edgeR::estimateGLMCommonDisp(tagdisp_norm, model_data))
if (class(glm_norm)[1] == "try-error") {
warning("estimateGLMCommonDisp() failed. Trying again with estimateDisp().")
state <- FALSE
}
}
if (isTRUE(state)) {
message("EdgeR step 5/9: Estimating GLM Trended dispersion.")
glm_trended <- try(edgeR::estimateGLMTrendedDisp(glm_norm, model_data))
if (class(glm_trended)[1] == "try-error") {
warning("estimateGLMTrendedDisp() failed. Trying again with estimateDisp().")
state <- FALSE
}
}
if (isTRUE(state)) {
message("EdgeR step 6/9: Estimating GLM Tagged dispersion.")
final_norm <- try(edgeR::estimateGLMTagwiseDisp(glm_trended, model_data))
if (class(final_norm)[1] == "try-error") {
warning("estimateGLMTagwiseDisp() failed. Trying again with estimateDisp().")
state <- FALSE
}
}
## If we had a failure along the way, redo using estimateDisp()
if (!isTRUE(state)) {
warning("There was a failure when doing the estimations.")
message("There was a failure when doing the estimations, using estimateDisp().")
final_norm <- edgeR::estimateDisp(norm, design = model_data, robust = TRUE)
}
}
cond_fit <- NULL
if (edger_test == "lrt") {
message("EdgeR step 7/9: Running glmFit, ",
"switch to glmQLFit by changing the argument 'edger_test'.")
cond_fit <- edgeR::glmFit(final_norm, design = model_data, robust = TRUE)
} else {
message("EdgeR step 7/9: Running glmQLFit, ",
"switch to glmFit by changing the argument 'edger_test'.")
cond_fit <- edgeR::glmQLFit(final_norm, design = model_data, robust = TRUE)
}
message("EdgeR step 8/9: Making pairwise contrasts.")
apc <- make_pairwise_contrasts(model_data, conditions,
extra_contrasts = extra_contrasts,
do_identities = FALSE, keepers = keepers, ...)
contrast_string <- apc[["contrast_string"]]
## This section is convoluted because glmLRT only seems to take up to 7
## contrasts at a time. As a result, I iterate through the set of possible
## contrasts one at a time and ask for each separately.
contrast_list <- list()
result_list <- list()
lrt_list <- list()
sc <- vector("list", length(apc[["names"]]))
end <- length(apc[["names"]])
if (isTRUE(verbose)) {
bar <- utils::txtProgressBar(style = 3)
}
for (con in seq_along(apc[["names"]])) {
name <- apc[["names"]][[con]]
if (isTRUE(verbose)) {
pct_done <- con / length(apc[["names"]])
utils::setTxtProgressBar(bar, pct_done)
}
sc[[name]] <- gsub(pattern = ",", replacement = "", apc[["all_pairwise"]][[con]])
tt <- parse(text = sc[[name]])
## ctr_string <- paste0("tt = mymakeContrasts(", tt, ", levels = model_data)")
ctr_string <- glue("tt = mymakeContrasts({tt}, levels = model_data)")
eval(parse(text = ctr_string))
contrast_list[[name]] <- tt
lrt_list[[name]] <- NULL
tt <- sm(requireNamespace("edgeR"))
tt <- sm(try(attachNamespace("edgeR"), silent = TRUE))
if (edger_test == "lrt") {
lrt_list[[name]] <- edgeR::glmLRT(cond_fit, contrast = contrast_list[[name]])
} else {
lrt_list[[name]] <- edgeR::glmQLFTest(cond_fit, contrast = contrast_list[[name]])
}
res <- edgeR::topTags(lrt_list[[name]], n = nrow(data), sort.by = "logFC")
res <- as.data.frame(res)
res[["logFC"]] <- signif(x = as.numeric(res[["logFC"]]), digits = 4)
res[["logCPM"]] <- signif(x = as.numeric(res[["logCPM"]]), digits = 4)
if (!is.null(res[["LR"]])) {
res[["LR"]] <- signif(x = as.numeric(res[["LR"]]), digits = 4)
} else if (!is.null(res[["F"]])) {
res[["F"]] <- signif(x = as.numeric(res[["F"]]), digits = 4)
}
res[["PValue"]] <- signif(x = as.numeric(res[["PValue"]]), digits = 4)
res[["FDR"]] <- signif(x = as.numeric(res[["FDR"]]), digits = 4)
result_list[[name]] <- res
} ## End for loop
if (isTRUE(verbose)) {
close(bar)
}
dispersions <- sm(try(edgeR::plotBCV(y = final_norm), silent = TRUE))
dispersion_plot <- NULL
if (class(dispersions)[1] != "try-error") {
dispersion_plot <- grDevices::recordPlot()
}
retlist <- list(
"all_tables" = result_list,
"batches" = batches,
"batches_table" = batches_table,
"conditions" = conditions,
"conditions_table" = conditions_table,
"contrast_list" = contrast_list,
"contrasts" = apc,
"contrast_string" = contrast_string,
"contrasts_performed" = apc[["names"]],
"dispersion_plot" = dispersion_plot,
"input_data" = input,
"lrt" = lrt_list,
"method" = "edger",
"model" = model_data,
"model_string" = model_string)
class(retlist) <- c("edger_pairwise", "list")
if (!is.null(arglist[["edger_excel"]])) {
retlist[["edger_excel"]] <- write_edger(retlist, excel = arglist[["edger_excel"]])
}
class(retlist) <- c("edger_pairwise", "list")
return(retlist)
}
#' Import tximport information into edgeR.
#'
#' This was taken from the tximport manual with minor modifications.
#'
#' @param data data to be coerced into edgeR.
#' @param conditions Set of conditions used to make the DGEList.
#' @param tximport Tell this if the data is actually coming from tximport.
#' @return Hopefully valid DGEList for edgeR.
#' @seealso [import_deseq()]
import_edger <- function(data, conditions, tximport = NULL) {
if (is.null(tximport)) {
raw <- edgeR::DGEList(counts = data, group = conditions)
norm <- edgeR::calcNormFactors(raw)
} else {
raw <- tximport[["counts"]]
norm_mat <- tximport[["length"]]
norm_mat <- norm_mat / exp(rowMeans(log(norm_mat)))
offset <- log(edgeR::calcNormFactors(raw / norm_mat)) + log(colSums(raw / norm_mat))
norm <- edgeR::DGEList(raw)
norm$offset <- t(t(log(norm_mat)) + offset)
}
return(norm)
}
#' Writes out the results of a edger search using write_de_table()
#'
#' Looking to provide a single interface for writing tables from edger and friends.
#'
#' Tested in test_26edger.R
#'
#' @param data Output from deseq_pairwise()
#' @param ... Options for writing the xlsx file.
#' @seealso [write_de_Table()]
#' @examples
#' \dontrun{
#' finished_comparison <- edger_pairwise(expressionset)
#' data_list <- write_edger(finished_comparison, excel = "edger_result.xlsx")
#' }
#' @export
write_edger <- function(data, ...) {
result <- write_de_table(data, type = "edger", ...)
return(result)
}
## EOF
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.