R/gene_sets.R

Defines functions update_geneset_symbols

Documented in update_geneset_symbols

#' Update Geneset Symbols
#'
#' Update gene symbols in a geneset found in HIPCMatrix package data
#'
#' @param geneset_name name of a gene set found in HIPCMatrix package data
#' @param gene_alias_map mapping of gene symbol to alias.
#'
#' @export
update_geneset_symbols <- function(geneset_name,
                                   gene_alias_map = hgncAlias2Symbol) {
  # read in as DT
  gs_list <- get(geneset_name)
  gs_dt <- rbindlist(lapply(gs_list, data.table), idcol = "module")
  setnames(gs_dt, "V1", "ALIAS")

  gs_dt[, SYMBOL := mapAlias2Symbol(ALIAS, gene_alias_map)]
  setnames(gs_dt, "module", "pathway")
  gs_dt[, ALIAS := NULL]
  gs_dt <- gs_dt[!is.na(gs_dt$SYMBOL)]
  gs_dt <- unique(gs_dt)

  gs_dt
}


#' Original HIPC BTM list
#'
#' A dataset containing the geneSet pathways and geneIds for
#' a BTM file with the original date as October 08, 2013 generated
#' by the HIPC collaborators and stored in the HIPC google drive.
#'
#' @format A list of lists with charactar vector elements
#' \describe{
#'   \item{names}{geneSet pathway names}
#'   \item{elements}{character vector of gene symbols}
#'   ...
#' }
"orig_btm_list"

#' Updated HIPC BTM list
#'
#' A dataset containing the geneSet pathways and geneIds for
#' a BTM file that is updated using the most recent version of
#' the HGNC database when package data is re-built.
#'
#' @format A list of lists with character vector elements
#' \describe{
#'   \item{names}{geneSet pathway names}
#'   \item{elements}{character vector of gene symbols}
#'   ...
#' }
"updated_btm_list"

#' Updated HIPC BTM data frame
#'
#' A dataset containing the geneSet pathways and geneIds for
#' a BTM file with the original date as October 08, 2013 generated
#' by the HIPC collaborators and stored in the HIPC google drive.
#'
#' @format A dataframe with pathways and geneIds
#' \describe{
#'   \item{pathway}{geneSet pathway names}
#'   \item{SYMBOL}{updated gene symbols}
#'   ...
#' }
"updated_btm_df"

#' @name msigdb_immunologic_signatures
#' @title Immunologic signatures
#'
#' @description
#' Gene sets that represent cell states and perturbations within the immune
#' system. The signatures were generated by manual curation of published studies
#' in human and mouse immunology. For each study, pairwise comparisons of
#' relevant classes were made and genes ranked by mutual information. Gene sets
#' correspond to top or bottom genes (FDR < 0.25 or maximum of 200 genes) for
#' each comparison. This resource is generated as part of the Human Immunology
#' Project Consortium (HIPC; http://www.immuneprofiling.org/). NOTE: Gene Symbols
#' are updated by the updateDataWithLatestHGNCMap.Rmd when it is run.  Original is kept as
#' orig_msigdb.
#'
#' @usage data(msigdb_immunologic_signatures)
#' @format A \code{list} of \code{character} of length 1910, with one element
#' per module.
#'
#' @references http://www.broadinstitute.org/gsea/msigdb/genesets.jsp?collection=C7
"msigdb_immunologic_signatures"

#' @name emory_blood_transcript_modules
#' @title Blood transcriptome modules
#' @description NOTE: Gene Symbols are updated by the updateDataWithLatestHGNCMap.Rmd when it is run.
#' Original is kept as orig_emory.
#'
#' @usage data(emory_blood_transcript_modules)
#' @format A \code{list} of \code{character} of length 346, with one element per
#' module
#'
#' @references
#'  http://www.immuneprofiling.org/meni/meni-paper/btm-landing.gsp
#'  http://www.nature.com/ni/journal/v15/n2/full/ni.2789.html
"emory_blood_transcript_modules"

#' @name chaussabel_modules
#' @title Modules from Chaussabel (2008)
#'
#' @description
#' Repertoire of co-clustering genes. NOTE: Gene Symbols are updated by the updateDataWithLatestHGNCMap.Rmd
#' when it is run.  Original is kept as orig_chaussabel.
#'
#' @usage data(chaussabel_modules)
#' @format A \code{list} of \code{character} of length 260, with one element per
#' module.
#'
#' @references
#'  http://www.biir.net/public_wikis/module_annotation/G2_Trial_8_Modules
#'  http://www.nature.com/nri/journal/v14/n4/full/nri3642.html
"chaussabel_modules"

#' @name orig_msigdb
#' @title Immunologic signatures
#'
#' @description
#' Gene sets that represent cell states and perturbations within the immune
#' system. The signatures were generated by manual curation of published studies
#' in human and mouse immunology. For each study, pairwise comparisons of
#' relevant classes were made and genes ranked by mutual information. Gene sets
#' correspond to top or bottom genes (FDR < 0.25 or maximum of 200 genes) for
#' each comparison. This resource is generated as part of the Human Immunology
#' Project Consortium (HIPC; http://www.immuneprofiling.org/).
#'
#' @usage data(orig_msigdb)
#' @format A \code{list} of \code{character} of length 1910, with one element
#' per module.
#'
#' @references http://www.broadinstitute.org/gsea/msigdb/genesets.jsp?collection=C7
"orig_msigdb"

#' @name orig_emory
#' @title Blood transcriptome modules
#'
#' @usage data(orig_emory)
#' @format A \code{list} of \code{character} of length 346, with one element per
#' module
#'
#' @references
#'  http://www.immuneprofiling.org/meni/meni-paper/btm-landing.gsp
#'  http://www.nature.com/ni/journal/v15/n2/full/ni.2789.html
"orig_emory"

#' @name orig_chaussabel
#' @title Modules from Chaussabel (2008)
#'
#' @description
#' Repertoire of co-clustering genes.
#'
#' @usage data(orig_chaussabel)
#' @format A \code{list} of \code{character} of length 260, with one element per
#' module.
#'
#' @references
#'  http://www.biir.net/public_wikis/module_annotation/G2_Trial_8_Modules
#'  http://www.nature.com/nri/journal/v14/n4/full/nri3642.html
"orig_chaussabel"
RGLab/HIPCMatrix documentation built on Jan. 29, 2023, 5:13 a.m.