#' Adjust MAGMA Z-statistic from \emph{.genes.out} files
#'
#' Used when you want to directly analyse the gene-level
#' Z-scores for a given GWAS while correcting
#' for known confounding variables such as:
#' \itemize{
#' \item{\code{NSNPS} : }{Number of SNPs}
#' \item{\code{NPARAM} : }{Number of parameters?}
#' \item{\code{GENELEN} : }{Gene length}
#' \item{\code{log***} : }{The logged version of each of the above variables,
#' using the default \link[base]{log} function.}
#' }
#' @param magma_GenesOut_file A MAGMA \emph{.genes.out} file
#' generated by \link[MAGMA.Celltyping]{map_snps_to_genes}.
#' @param ... Additional arguments passed for \link[EWCE]{standardise_ctd}.
#' @inheritParams calculate_celltype_associations
#' @inheritParams prepare_quantile_groups
#' @inheritParams EWCE::standardise_ctd
#' @inheritDotParams EWCE::standardise_ctd
#' @inheritParams stats::p.adjust
#'
#' @examples
#' myGenesOut <- MAGMA.Celltyping::import_magma_files(
#' ids = c("ieu-a-298"),
#' file_types = ".genes.out",
#' return_dir = FALSE)
#' ctd <- ewceData::ctd()
#'
#' magmaGenesOut <- MAGMA.Celltyping::adjust_zstat_in_genesOut(
#' ctd = ctd,
#' magma_GenesOut_file = myGenesOut,
#' ctd_species = "mouse"
#' )
#' @export
#' @importFrom stats lm p.adjust
#' @importFrom utils read.table
adjust_zstat_in_genesOut <- function(magma_GenesOut_file,
ctd = NULL,
ctd_species = infer_ctd_species(ctd),
prepare_ctd = TRUE,
method = "bonferroni",
verbose = TRUE,
...) {
#### Prepare CTD ####
if(!is.null(ctd)){
if((ctd_species!="human") && (isFALSE(prepare_ctd))) {
messager("ctd_species must be 'human'.",
"Converting to human genes by setting prepare_ctd=TRUE",
v=verbose)
prepare_ctd <- TRUE
}
#### Convert orthologs to human gene symbols ####
if (isTRUE(prepare_ctd)) {
ctd <- EWCE::standardise_ctd(ctd = ctd,
dataset = "NULL",
input_species = ctd_species,
output_species = "human",
verbose = verbose,
...)
}
## The exact genes present in each CTD level may vary sightly.
## Using first level as a proxy.
allGenes <- unname(rownames(ctd[[1]]$specificity))
} else{
allGenes <- NULL
}
#### Load the MAGMA data ####
magma <- read_magma_genes_out(
path = magma_GenesOut_file,
verbose = verbose)
magma <- magma[order(magma$P), ]
#### Filter MAGMA results ####
#### Remove nan genes (don't have HGNC conversions) ####
nan_genes <- magma$hgnc_symbol=="nan"
if(sum(nan_genes)>0){
messager(formatC(sum(nan_genes),big.mark = ","),
"genes without HGNC gene symbols were dropped.",
v=verbose)
magma <- magma[!nan_genes, ]
}
#### Remove genes not in CTD
if(!is.null(allGenes)){
dropped_genes <- sum(!magma$hgnc_symbol %in% allGenes, na.rm = TRUE)
messager(formatC(dropped_genes,big.mark = ","),
"genes that are absent from the ctd were dropped.",
v=verbose)
magma <- magma[magma$hgnc_symbol %in% allGenes, ]
}
#### Check for duplicated genes ####
dup_genes <- duplicated(magma$hgnc_symbol)
if(sum(dup_genes)>0){
messager(formatC(sum(dup_genes),big.mark = ","),
"genes with duplicated HGNC gene symbols were dropped.",
v=verbose)
magma <- magma[!dup_genes, ]
}
#### Apply multiple testing correction ####
magma$Q <- stats::p.adjust(magma$P, method = method)
magma$logNSNPS <- log(magma$NSNPS)
magma$logNPARAM <- log(magma$NPARAM)
#### Remove MHC region ####
magma <- magma[!(magma$CHR == 6 &
magma$START >= 25000000 &
magma$STOP <= 34000000), ] # DROP MHC: chr6, 25-34 mb
magma$GENELEN <- abs(magma$STOP - magma$START)
magma$logGENELEN <- log(magma$GENELEN)
# Regress out effects of NSNPS and NPARAM (see 'boxplots_by_decile.r'
# and the section on downsampling for info)
#--- NSNPS only really has major effects (i.e. zscore+2) when
# a gene has ~10000 SNPS
messager("Computing adjusted Z-statistic.",v=verbose)
mod <- stats::lm(ZSTAT ~
NSNPS + logNSNPS +
NPARAM + logNPARAM +
GENELEN + logGENELEN,
data = magma)
magma$ADJ_ZSTAT <- magma$ZSTAT - (
magma$NSNPS * mod$coefficients[2] +
magma$logNSNPS * mod$coefficients[3] +
magma$NPARAM * mod$coefficients[4] +
magma$logNPARAM * mod$coefficients[5] +
magma$GENELEN * mod$coefficients[6] +
magma$logGENELEN * mod$coefficients[7])
# magma = arrange(magma,desc(magma$ADJ_ZSTAT))
# magma = magma[order(magma$ADJ_ZSTAT,decreasing=TRUE),]
return(magma)
}
adjust.zstat.in.genesOut <- function(...) {
.Deprecated(adjust_zstat_in_genesOut)
adjust_zstat_in_genesOut(...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.