##' ORA analysis for Pathway Commons
##'
##' This function performs over-representation analysis using Pathway Commons
##' @title enrichPC
##' @param gene a vector of genes (either hgnc symbols or uniprot IDs)
## @param source Data source of Pathway Commons, e.g., 'reactome', 'kegg', 'pathbank', 'netpath', 'panther', etc.
## @param keyType specify the type of input 'gene' (one of 'hgnc' or 'uniprot')
##' @param ... additional parameters, see also the parameters supported by the enricher() function
##' @return A \code{enrichResult} instance
##' @export
enrichPC <- function(gene, ...) {
# keyType <- match.arg(keyType, c("hgnc", "uniprot"))
# source <- match.arg(source, get_pc_source())
# pcdata <- get_pc_data(source, keyType, output = 'gson')
pcdata <- get_pc_data(output = 'gson')
res <- enricher(gene, gson = pcdata, ...)
if (is.null(res)) return(res)
res@ontology <- pcdata@gsname
res@organism <- pcdata@species
# res@keytype <- keyType
return(res)
}
##' GSEA analysis for Pathway Commons
##'
##' This function performs GSEA using Pathway Commons
##' @title gsePC
##' @param geneList a ranked gene list
## @param source Data source of Pathway Commons, e.g., 'reactome', 'kegg', 'pathbank', 'netpath', 'panther', etc.
## @param keyType specify the type of input 'gene' (one of 'hgnc' or 'uniprot')
##' @param ... additional parameters, see also the parameters supported by the GSEA() function
##' @importFrom rlang check_installed
##' @return A \code{gseaResult} instance
##' @export
gsePC <- function(geneList, ...) {
# keyType <- match.arg(keyType, c("hgnc", "uniprot"))
# source <- match.arg(source, get_pc_source())
# pcdata <- get_pc_data(source, keyType, output = 'gson')
pcdata <- get_pc_data(output = 'gson')
res <- GSEA(geneList, gson = pcdata, ...)
if (is.null(res)) return(res)
res@ontology <- pcdata@gsname
res@organism <- pcdata@species
# res@keytype <- keyType
return(res)
}
prepare_pc_data <- function() {
pc2gene <- get_pc_data(output = 'data.frame')
##TERM2GENE
pcid2gene <- pc2gene[, c("id", "gene")]
##TERM2NAME
pcid2name <- unique(pc2gene[, c("id", "name")])
list(PCID2GENE = pcid2gene,
PCID2NAME = pcid2name)
}
get_pc_gmtfile <- function() {
# pcurl <- 'https://www.pathwaycommons.org/archives/PC2/v12/'
pc2 <- 'https://download.baderlab.org/PathwayCommons/PC2/'
con <- readLines(pc2)
pattern <- ".*>v(\\d+)/</a>.*"
latest_version <- con[grep(pattern, con)] |>
sub(pattern, "\\1", x = _) |>
as.numeric() |>
max()
pcurl <- sprintf('%sv%s/', pc2, latest_version)
x <- readLines(pcurl)
y <- x[grep('\\.gmt.gz',x)]
# sub(".*(PathwayCommons.*\\.gmt.gz).*", "\\1", y)
file <- sub(".*>(.*\\.gmt\\.gz)</a>.*", "\\1", y)
sprintf("%s%s", pcurl, file)
}
### list supported data sources of Pathway Commons
# get_pc_source <- function() {
# gmtfile <- get_pc_gmtfile()
# source <- unique(sub("PathwayCommons\\d+\\.([_A-Za-z]+)\\.([_A-Za-z]+)\\.gmt.gz", "\\1", gmtfile))
#
# return(source)
# }
read.gmt.pc_internal <- function(gmtfile) {
# x <- readLines(gmtfile)
check_installed('readr', 'for `read.gmt.pc_internal()`, which is an internal function.')
x <- yread(gmtfile, readr::read_lines)
y <- strsplit(x, "\t")
id <- vapply(y, `[`, 1, FUN.VALUE = character(1))
pcid <- sub(".*/", "", id)
url <- sub(pcid[1], "", id[1]) # can be used to restored the url for web browse.
nn <- vapply(y, `[`, 2, FUN.VALUE = character(1))
names(y) <- sprintf("id: %s; %s", pcid, nn)
y <- lapply(y, "[", -c(1:2))
ont2gene <- stack(y)
ont2gene <- ont2gene[, c("ind", "values")]
colnames(ont2gene) <- c("term", "gene")
return(ont2gene)
# res <- list(ont2gene = ont2gene, pcid = pcid, url = url)
# return(res)
}
##' Parse gmt file from Pathway Common
##'
##' This function parse gmt file downloaded from Pathway common
##' @title read.gmt.pc
##' @param gmtfile A gmt file
##' @param output one of 'data.frame' or 'GSON'
##' @return A data.frame or A GSON object depends on the value of 'output'
##' @importFrom rlang .data
##' @importFrom tidyr separate
##' @export
read.gmt.pc <- function(gmtfile, output = "data.frame") {
output <- match.arg(output, c("data.frame", "gson", "GSON"))
pcdata <- read.gmt.pc_internal(gmtfile)
x <- tidyr::separate(pcdata, .data$term, c("id", "name","datasource","organism","idtype"), "; ")
x <- lapply(x, function(col) sub("\\w+:\\s*", "", col)) |> as.data.frame()
if (output == "data.frame") {
return(x)
}
gsid2gene <- data.frame(gsid=x$id, gene=x$gene)
gsid2name <- unique(data.frame(gsid=x$id, name=x$name))
organism <- taxID2name(x$organism[1])
gson(gsid2gene = gsid2gene,
gsid2name = gsid2name,
gsname = "Pathway Commons",
species = organism)
}
get_pc_data <- function(output = "data.frame") {
url <- get_pc_gmtfile()
# gmtfile <- gmtfile[grepl(source, gmtfile) & grepl(keyType, gmtfile)]
read.gmt.pc(url, output = output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.