R/map_specificity_to_entrez.r

Defines functions map_specificity_to_entrez

Documented in map_specificity_to_entrez

#' Map specificity to Entrez gene IDs
#'
#' Convenience function used in \link[MAGMA.Celltyping]{create_gene_covar_file}.
#'
#' @param return_ctd Return the actual CellTypeDataset,
#'  rather than path to where it is saved.
#' @param use_saved Use a saved version of the gene mapping table.
#' @param use_matrix Which CTD matrix to use as input. 
#' @inheritParams calculate_celltype_associations
#'
#' @returns Matrix in which the first column is 'entrez'
#' and then the specificity decile for each cell type
#'
#' @keywords internal
#' @importFrom dplyr rename
#' @importFrom orthogene convert_orthologs map_genes
map_specificity_to_entrez <- function(ctd,
                                      annotLevel,
                                      ctd_species,
                                      use_matrix = "specificity_quantiles",
                                      return_ctd = FALSE,
                                      use_saved = TRUE,
                                      verbose = TRUE) {
    #### Avoid confusing checks ####
    genes <- input <- target <- NULL;
    #### Check if matrix exists ####
    if(!use_matrix %in% names(ctd[[annotLevel]])){
        stopper(use_matrix,"is not a matrix in the provided ctd.",
                "Try running ctd through prepare_quantile_groups() first.")
    }
    # Because sumstats use entrez genes & ctd uses
    # gene symbols, match entrez-->symbols
    messager("Mapping gene symbols in",use_matrix,"matrix",
             "to entrez IDs.",v=verbose)
    #### Extract gene list ####
    mat <- ctd[[annotLevel]][[use_matrix]] 
    #### Find 1:1 orthologs ####
    # Skip this step if the genes are already human
    if (ctd_species != "human") {
        output_species <- "human"
        mat <- orthogene::convert_orthologs(
            gene_df = mat,
            gene_input = "rownames",
            gene_output = "rownames",
            input_species = ctd_species,
            output_species = output_species,
            method = "homologene",
            verbose = verbose
        )
        ctd_species <- output_species
    }
    #### Map gene symbols to Entrez IDs ####
    # Queries the respective species 
    if(use_saved){
        gene_map <- MAGMA.Celltyping::hgnc2entrez_orthogene
    } else {
        gene_map <- orthogene::map_genes(
            genes = genes,
            species = ctd_species,
            target = "ENTREZGENE_ACC",
            drop_na = TRUE,
            # only return 1 Entrez ID per gene symbol
            mthreshold = 1,
            verbose = verbose
        )
        gene_map <- gene_map |> dplyr::rename(hgnc_symbol = input, 
                                               entrez = target)
    }
    #### Subset gene_map #### 
    gene_map <- gene_map[gene_map$hgnc_symbol %in% rownames(mat),]
    gene_map <- gene_map[!duplicated(gene_map$hgnc_symbol),]
    #### Subset matrix ####
    quantDat2 <- data.frame(
        entrez = gene_map$entrez,
        as.matrix(mat[gene_map$hgnc_symbol, ]),
        check.rows = FALSE,
        check.names = FALSE
    )
    quantDat2 <- quantDat2[!duplicated(quantDat2$entrez), ]
    #### Return resusults ####
    if (return_ctd) {
        ctd[[annotLevel]]$quantDat2 <- quantDat2
        return(ctd)
    } else {
        return(quantDat2)
    }
}
NathanSkene/MAGMA_Celltyping documentation built on Aug. 21, 2023, 8:55 a.m.