R/adjust_zstat_in_genesOut.r

Defines functions adjust.zstat.in.genesOut adjust_zstat_in_genesOut

Documented in adjust_zstat_in_genesOut

#' Adjust MAGMA Z-statistic from \emph{.genes.out} files
#'
#' Used when you want to directly analyse the gene-level
#'  Z-scores for a given GWAS while correcting 
#'  for known confounding variables such as: 
#' \itemize{
#' \item{\code{NSNPS} : }{Number of SNPs}
#' \item{\code{NPARAM} : }{Number of parameters?}
#' \item{\code{GENELEN} : }{Gene length}
#' \item{\code{log***} : }{The logged version of each of the above variables, 
#' using the default \link[base]{log} function.}
#' }
#' @param magma_GenesOut_file A MAGMA \emph{.genes.out} file 
#' generated by \link[MAGMA.Celltyping]{map_snps_to_genes}.   
#' @param ... Additional arguments passed for \link[EWCE]{standardise_ctd}. 
#' @inheritParams calculate_celltype_associations
#' @inheritParams prepare_quantile_groups
#' @inheritParams EWCE::standardise_ctd
#' @inheritDotParams EWCE::standardise_ctd
#' @inheritParams stats::p.adjust
#'
#' @examples
#' myGenesOut <- MAGMA.Celltyping::import_magma_files(
#'     ids = c("ieu-a-298"),
#'     file_types = ".genes.out",
#'     return_dir = FALSE)
#' ctd <- ewceData::ctd()
#' 
#' magmaGenesOut <- MAGMA.Celltyping::adjust_zstat_in_genesOut(
#'     ctd = ctd,
#'     magma_GenesOut_file = myGenesOut,
#'     ctd_species = "mouse"
#' )
#' @export
#' @importFrom stats lm p.adjust 
#' @importFrom utils read.table 
adjust_zstat_in_genesOut <- function(magma_GenesOut_file,
                                     ctd = NULL,
                                     ctd_species = infer_ctd_species(ctd),
                                     prepare_ctd = TRUE, 
                                     method = "bonferroni",
                                     verbose = TRUE,
                                     ...) {
    #### Prepare CTD ####
    if(!is.null(ctd)){ 
        if((ctd_species!="human") && (isFALSE(prepare_ctd))) {
            messager("ctd_species must be 'human'.",
                    "Converting to human genes by setting prepare_ctd=TRUE",
                    v=verbose)
            prepare_ctd <- TRUE
        }
        #### Convert orthologs to human gene symbols ####
        if (isTRUE(prepare_ctd)) { 
            ctd <- EWCE::standardise_ctd(ctd = ctd,
                                         dataset = "NULL",
                                         input_species = ctd_species, 
                                         output_species = "human",
                                         verbose = verbose,
                                         ...)
        } 
        ## The exact genes present in each CTD level may vary sightly.
        ## Using first level as a proxy. 
        allGenes <- unname(rownames(ctd[[1]]$specificity))
    }  else{
        allGenes <- NULL
    }
    #### Load the MAGMA data ####
    magma <- read_magma_genes_out(
        path = magma_GenesOut_file, 
        verbose = verbose) 
    magma <- magma[order(magma$P), ]
    #### Filter MAGMA results ####
    #### Remove nan genes (don't have HGNC conversions) ####
    nan_genes <- magma$hgnc_symbol=="nan"
    if(sum(nan_genes)>0){
        messager(formatC(sum(nan_genes),big.mark = ","),
                 "genes without HGNC gene symbols were dropped.",
                 v=verbose)
        magma <- magma[!nan_genes, ]
    } 
    #### Remove genes not in CTD 
    if(!is.null(allGenes)){
        dropped_genes <- sum(!magma$hgnc_symbol %in% allGenes, na.rm = TRUE)
        messager(formatC(dropped_genes,big.mark = ","),
                 "genes that are absent from the ctd were dropped.",
                 v=verbose)
        magma <- magma[magma$hgnc_symbol %in% allGenes, ]
    }   
    #### Check for duplicated genes ####
    dup_genes <- duplicated(magma$hgnc_symbol)
    if(sum(dup_genes)>0){
        messager(formatC(sum(dup_genes),big.mark = ","),
                 "genes with duplicated HGNC gene symbols were dropped.",
                 v=verbose)
        magma <- magma[!dup_genes, ]
    } 
    #### Apply multiple testing correction ####
    magma$Q <- stats::p.adjust(magma$P, method = method)
    magma$logNSNPS <- log(magma$NSNPS)
    magma$logNPARAM <- log(magma$NPARAM)
    #### Remove MHC region ####
    magma <- magma[!(magma$CHR == 6 &
                         magma$START >= 25000000 &
                         magma$STOP <= 34000000), ] # DROP MHC: chr6, 25-34 mb
    magma$GENELEN <- abs(magma$STOP - magma$START)
    magma$logGENELEN <- log(magma$GENELEN)

    # Regress out effects of NSNPS and NPARAM (see 'boxplots_by_decile.r'
    # and the section on downsampling for info)
    #--- NSNPS only really has major effects (i.e. zscore+2) when
    # a gene has ~10000 SNPS
    messager("Computing adjusted Z-statistic.",v=verbose)
    mod <- stats::lm(ZSTAT ~ 
                         NSNPS + logNSNPS + 
                         NPARAM + logNPARAM + 
                         GENELEN + logGENELEN,
                     data = magma)
    magma$ADJ_ZSTAT <- magma$ZSTAT - (
        magma$NSNPS * mod$coefficients[2] +
        magma$logNSNPS * mod$coefficients[3] +
        magma$NPARAM * mod$coefficients[4] + 
        magma$logNPARAM * mod$coefficients[5] +
        magma$GENELEN * mod$coefficients[6] + 
        magma$logGENELEN * mod$coefficients[7])
    # magma = arrange(magma,desc(magma$ADJ_ZSTAT))
    # magma = magma[order(magma$ADJ_ZSTAT,decreasing=TRUE),]
    return(magma)
}


adjust.zstat.in.genesOut <- function(...) {
    .Deprecated(adjust_zstat_in_genesOut)
    adjust_zstat_in_genesOut(...)
}
NathanSkene/MAGMA_Celltyping documentation built on Oct. 11, 2024, 7:19 a.m.