R/convert.R

Defines functions convertMSigDB

Documented in convertMSigDB

#' convert ids of MSigDB to another organism
#'
#' This function gets two files: one is downloaded from the MSigDB, it is .gmt file, the other is a .txt file downloaded form the HCOP database.
#'
#'
#' @param msigdbPath path to the MSigDB file
#'
#' @param hcopPaht path to the hcop annotation file
#'
#' @param idType 'entrez' or 'ensembl'
#'
convertMSigDB <- function(msigdbPath, hcopPath, idType = 'ensembl'){

  # get msigdb data
  msigdb <- GSEABase::getGmt(msigdbPath)
  msigdb <- GSEABase::geneIds(msigdb)

  # get hcop data
  hcop <- readr::read_delim(hcopPath, delim = '\t')

  # progress bar estimation
  pb <- dplyr::progress_estimated(base::length(msigdb))

  res <- purrr::map2(
    .x = msigdb,
    .y = names(msigdb),
    .f = function(origID, nm){

      pb$tick()$print() # progress bar

      altID <- hcop%>%
        dplyr::filter(human_entrez_gene %in% origID)%>%
        dplyr::pull(base::paste('mouse_', idType, '_gene', sep = ''))%>%
        base::unique()

      altID <- altID[!altID == '-'] # drop '-' cells

      altID <- GSEABase::GeneSet(altID, setName = nm)

      return(altID)
    }

  )

  res <- GSEABase::GeneSetCollection(res)

  rm(msigdb, hcop)

  return(res)

}
TamasKiss26/MSigDBOrthology documentation built on May 26, 2020, 1:25 a.m.