#' Load a MAGMA .gcov.out file
#'
#' Convenience function which just does a little formatting to make
#' it easier to use.
#'
#' @param path Path to a .gcov.out file (if \code{EnrichmentMode=='Linear'}).
#' or .sets.out (if EnrichmentMode=='Top 10\%').
#' @param genesOutCOND [Optional] If the analysis controlled for another GWAS,
#' then this is included as a column. Otherwise the column is \code{NA}.
#' @param EnrichmentMode [Optional] Either 'Linear' or 'Top 10\%'
#' (Default: \code{'Linear'}).
#' @param ControlForCT [Optional] May be an internal argument.
#' We suggest ignoring or take a look at the code to figure it out.
#' @param keep_all_rows Keep all rows, without filtering
#' such that there is only one row per celltype.
#' @param verbose Print messages.
#' @inheritParams calculate_celltype_associations
#' @inheritParams calculate_celltype_enrichment_limma
#' @returns Contents of the .gcov.out file.
#'
#' @export
#' @importFrom utils read.table
#' @examples
#' path <- system.file(
#' "extdata","ieu-a-298.tsv.gz.35UP.10DOWN.level1.MainRun.gsa.out",
#' package = "MAGMA.Celltyping")
#' ctd <- ewceData::ctd()
#' annotLevel <- 1
#' EnrichmentMode <- "Linear"
#' res <- load_magma_results_file(path = path,
#' ctd = ctd,
#' annotLevel = annotLevel,
#' EnrichmentMode = EnrichmentMode)
load_magma_results_file <- function(path,
ctd,
annotLevel,
EnrichmentMode,
genesOutCOND = NA,
analysis_name = NULL,
ControlForCT = "BASELINE",
keep_all_rows = FALSE,
verbose = TRUE) {
# devoptera::args2vars(load_magma_results_file)
requireNamespace("utils")
requireNamespace("dplyr")
VARIABLE <- NGENES <- TYPE <- NULL;
messager("Reading enrichment results file into R.",v=verbose)
force(path)
force(ctd)
force(annotLevel)
genesOutCOND <- genesOutCOND[1]
path <- get_actual_path(path)
#### Check EnrichmentMode has correct values ####
check_enrichment_mode(EnrichmentMode = EnrichmentMode)
#### Check the file has appropriate ending given EnrichmentMode ####
if (EnrichmentMode == "Linear" &&
length(grep(".gsa.out$|.gsa.out.txt$", path)) == 0) {
stop("If EnrichmentMode=='Linear' then path must end in '.gsa.out'")
}
{
res <- utils::read.table(path, stringsAsFactors = FALSE, header = FALSE)
colnames(res) <- as.character(res[1, ])
res <- res[-1, ]
}
# Check if some of the variables are ZSTAT
# (if so, this indicates that another GWAS is being controlled for)
isConditionedOnGWAS <- (sum(grepl("ZSTAT", colnames(res))) > 0 ) |
sum(grepl("^ZSTAT", res$VARIABLE))>0
# The VARIABLE column in MAGMA output is limited by 30 characters.
# If so, use the FULL_NAME column.
if (!is.null(res$FULL_NAME)) {
res$VARIABLE <- res$FULL_NAME
}
#### Do some error checking ####
ctd_celltypes <- colnames(ctd[[annotLevel]]$specificity)
res_celltypes <- unique(
grep("^ZSTAT",res$VARIABLE, value = TRUE, invert = TRUE)
)
numCTinCTD <- length(ctd_celltypes)
numCTinRes <- length(res_celltypes)
if (ControlForCT[1] == "BASELINE" &&
isFALSE(isConditionedOnGWAS)) {
if (numCTinCTD != numCTinRes) {
if (abs(numCTinCTD - numCTinRes) > as.integer(numCTinRes * .5)) {
stop(sprintf(
">50% of celltypes missing. %s celltypes
in ctd but %s in results file.
Did you provide the correct annotLevel?",
numCTinCTD, numCTinRes
))
} else {
messager(numCTinCTD,
"celltypes in ctd but",numCTinRes,"in results file.",
"Some celltypes may have been dropped due if the variance",
"being too low (i.e. set contains only one gene used in analysis)",
"OR duplicate celltype names (after ignoring case).")
messager(
"<50% of celltypes missing.",
"Attemping to fix by removing missing cell-types:\n",
paste(" - ",
base::setdiff(
ctd_celltypes,
res_celltypes
),
collapse = "\n"
)
)
res <- subset(res,
VARIABLE %in% ctd_celltypes |
startsWith(VARIABLE,"ZSTAT"))
}
}
}
# In the new version of MAGMA, when you run conditional analyses,
# each model has a number, and the covariates also get p-values...
# ---- The information contained is actually quite useful....
# but for now just drop it
if(isFALSE(keep_all_rows)){
if(EnrichmentMode=="Linear"){
res <- subset(res, TYPE=="COVAR" & (VARIABLE %in% res_celltypes))
} else if (EnrichmentMode=="Top 10%"){
res <- subset(res, TYPE=="SET" & (VARIABLE %in% res_celltypes))
}
}
res$BETA <- as.numeric(res$BETA)
res$BETA_STD <- as.numeric(res$BETA_STD)
res$SE <- as.numeric(res$SE)
res$P <- as.numeric(res$P)
res$log10p <- log(res$P, 10)
res$level <- annotLevel
res$Method <- "MAGMA"
res$EnrichmentMode <- EnrichmentMode
res$GCOV_FILE <- basename(path)
res$CONTROL <- paste(ControlForCT, collapse = ",")
res$CONTROL_label <- paste(ControlForCT, collapse = ",")
res$genesOutCOND <- paste(genesOutCOND, collapse = " | ")
res$analysis_name <- analysis_name
res <- res |>
dplyr::rename(Celltype = VARIABLE,
OBS_GENES = NGENES)
res$SET <- NULL
return(res)
}
load.magma.results.file <- function(...) {
.Deprecated("load_magma_results_file")
load_magma_results_file(...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.