##' Build annotation database
##' @name buildAnnot
##' @rdname buildAnnot-methods
##' @title make annotation database
##' @param species species for the annotation
##' @param keytype the gene Id type to extract
##' @param anntype annotation type (GO, KEGG, RO)
##' @param builtin use default database (TRUE or FALSE)
##' @param OP BP, CC, MF default use all
##' @examples
##' \dontrun{
##' annot<-buildAnnot(species = "human", keytype = "SYMBOL",
##' anntype = "KEGG")
##' }
##' @export
##' @author Kai Guo
buildAnnot<-function(species = "human", keytype = "SYMBOL", anntype = "GO",
builtin = TRUE, OP = NULL){
if(anntype == "GO"){
annot <- .makeGOdata(species = species, keytype = keytype, OP = OP)
}
if(anntype == "KEGG"){
annot <- .makeKOdata(species = species, keytype = keytype,
builtin = builtin)
}
if(anntype == "Reactome"){
annot <- .makeROdata(species = species, keytype = keytype)
}
if(anntype == "KEGGM"){
annot <- .makeKOMdata(species = species, keytype = keytype)
}
result<-new("Annot",
species = species,
anntype = anntype,
keytype = keytype,
annot = annot
)
}
#' Build GO annotation
#' @importFrom AnnotationDbi keys
#' @importFrom dplyr distinct_
#' @param species you can check the support species by using showData()
#' @param keytype the gene ID type
#' @param OP One of "BP", "MF", and "CC" subontologies, default:"ALL" for
#' all three.
#' @author Kai Guo
.makeGOdata<-function(species = "human", keytype = "SYMBOL", OP = NULL){
dbname <- .getdbname(species);
.load_pkg(dbname)
dbname <- eval(parse(text=dbname))
GO_FILE <- AnnotationDbi::select(dbname, keys = keys(dbname, keytype = keytype),
keytype = keytype, columns = c("GOALL","ONTOLOGYALL"))
colnames(GO_FILE)[1]<-"GeneID"
GO_FILE <- distinct_(GO_FILE,~GeneID, ~GOALL, ~ONTOLOGYALL)
annot <- .getann("GO")
GO_FILE$Annot <- annot[GO_FILE[,2],"annotation"]
if(!is.null(OP)){
GO_FILE <- GO_FILE[GO_FILE$ONTOLOGYALL == OP,]
}
GO_FILE <- GO_FILE[,c(1,2,4)]
return(GO_FILE)
}
#' Build KEGG annotation
#' @importFrom AnnotationDbi keys
#' @importFrom KEGGREST keggLink
#' @param species you can check the support species by using showData()
#' @param keytype the gene ID type
#' @param builtin use default database(TRUE or FALSE)
#' @author Kai Guo
.makeKOdata<-function(species = "human", keytype = "SYMBOL", builtin = TRUE){
dbname <- .getdbname(species = species);
if(isTRUE(builtin)){
.load_pkg(dbname)
dbname <- eval(parse(text = dbname))
KO_FILE <- AnnotationDbi::select(dbname, keys= keys(dbname,
keytype = keytype),
keytype = keytype, columns = "PATH")
KO_FILE <- na.omit(KO_FILE)
}else{
spe <- .getspeices(species)
tmp <- keggLink("pathway",spe)
tmp <- substr(tmp,9,13)
names(tmp) <- sub('.*:','',names(tmp))
tmp <- .vec_to_df(tmp, name = c(keytype, "PATH"))
if(keytype != "ENTREZID"){
tmp[,1] <- idconvert(species,keys = tmp[,1],
fkeytype = "ENTREZID",tkeytype = keytype)
tmp <- na.omit(tmp)
}
KO_FILE <- tmp
}
annot <-.getann("KEGG")
KO_FILE[,1] <- as.vector(KO_FILE[,1])
KO_FILE[,2] <- as.vector(KO_FILE[,2])
KO_FILE$Annot <- annot[KO_FILE[,2],"annotation"]
colnames(KO_FILE)[1] <- "GeneID"
return(KO_FILE)
}
##' Download database from Msigdb and prepare for enrichment analysis
##' @name buildMSIGDB
##' @importFrom msigdbr msigdbr
##' @importFrom dplyr filter
##' @importFrom dplyr select
##' @importFrom rlang sym
##' @importFrom magrittr %>%
##' @param species the species for query
##' @param keytype the gene ID type
##' @param anntype anntotaion type of gene set (GO,BP,CC,MF,KEGG,REACTOME,
##' BIOCARTA,HALLMARK)
##' @examples
##' \dontrun{
##' hsamsi<-buildMSIGDB(species = "human", keytype = "SYMBOL", anntype = "GO")
##' }
##' @export
##' @author Kai Guo
buildMSIGDB<-function(species="human",keytype="SYMBOL",anntype="GO"){
flag <- 0
anntypes <- NULL
if(!is.null(anntype)){
if(anntype=="HALLMARK"){
category <- "H"
anntype <- ""
}
if(anntype == "CGP"){
category <- "C2"
}
if(anntype == "CP"){
category <- "C2"
}
if(anntype == "KEGG"){
anntypes <- "CP:KEGG"
category <- "C2"
}
if(anntype == "REACTOME"){
anntypes <- "CP:REACTOME"
category <- "C2"
}
if(anntype == "BIOCARTA"){
anntypes <- "CP:BIOCARTA"
category <- "C2"
}
if(anntype == "MIR"){
category <- "C3"
}
if(anntype == "TFT"){
category <- "C3"
}
if(anntype == "CGN"){
category == "C4"
}
if(anntype == "CM"){
category <- "C4"
}
if(anntype == "GO"){
category <- "C5"
}
if(anntype == "BP"){
anntypes <- "GO:BP"
category <- "C5"
}
if(anntype == "CC"){
anntypes <- "GO:CC"
category <- "C5"
}
if(anntype == "MF"){
anntypes <- "GO:MF"
category <- "C5"
}
}
mspe<-.getmsig(species)
if(is.null(mspe)){
stop(cat("can't find support species!\n"))
}
if(keytype == "SYMBOL"){
key = "gene_symbol"
}else if(keytype == "ENTREZID"){
key = "entrez_gene"
}else{
key = "entrez_gene"
flag = 1
}
cat("Downloading msigdb datasets ...\n")
res <- msigdbr(species = mspe)
res <- subset(res, gs_cat == category)
if(!is.null(anntypes)){
res <- subset(res, gs_subcat == anntypes)
}
if(anntype%in%c("GO", "BP", "CC", "MF")){
res <- res[, c(key, "gs_exact_source", "gs_name")]
res <- as.data.frame(res)
colnames(res) <- c("GeneID","GOALL","Annot")
res$Annot <- sub('.*@','',sub('_','@',res$Annot))
}else if(anntype == "KEGG"){
res <- res[,c(key,"gs_exact_source","gs_name")]
res<-as.data.frame(res)
colnames(res)<-c("GeneID","PATH","Annot")
res$PATH<-substr(res$PATH,4,8)
}else{
res<-res[,c(key,"gs_name")]
res<-as.data.frame(res)
colnames(res)<-c("GeneID","Term")
res$Term<-sub('.*@','',sub('_','@',res$Term))
res$Annot<-res[,2]
}
if(flag==1){
res[,1]<-idconvert(species, keys=res[,1], fkeytype="ENTREZID",
tkeytype=keytype)
res<-na.omit(res)
}
result<-new("Annot",
species = species,
anntype = anntype,
keytype = keytype,
annot = res)
return(result)
}
#' Build Reactome annotation data
#' @importFrom AnnotationDbi as.list
#' @importFrom dplyr left_join
#' @param species you can check the supported species by using showAvailableRO
#' @param keytype key type export
#' @author Kai Guo
.makeROdata<-function(species = "human", keytype = "SYMBOL"){
dbname<-.getrodbname(species = species);
.load_pkg("reactome.db")
dbname = sapply(strsplit(dbname,"_"),'[[',1)
lhs <- as.list(reactomePATHNAME2ID)
lhs <- lhs[grep(dbname,names(lhs))]
roid <- as.list(reactomePATHID2EXTID)[unique(as.vector(unlist(lhs)))]
roid <- lapply(roid, function(x)unique(x))
roid <- data.frame("GeneID"=unlist(roid),"Term"=rep(names(roid),
times=lapply(roid, length)),row.names=NULL)
ll <- lapply(lhs, function(x)unique(x))
roan<-data.frame("Term"=unlist(ll),"Annot"=rep(names(ll),
times=lapply(ll,length)),row.names=NULL)
roan$Annot<-sub('.*@ ','',sub(':','@',roan$Annot))
res<-left_join(roid,roan,by=c("Term"="Term"))
if(keytype!="ENTREZID"){
keys = idconvert(species = species,keys = res$GeneID,
fkeytype = "ENTREZID",tkeytype = keytype)
res$GeneID = keys
res <- na.omit(res)
}
return(res[,c(1,3)])
}
###
#' make KEGG module annotation data function
#' @importFrom AnnotationDbi keys
#' @importFrom KEGGREST keggLink
#' @param species you can check the support species by using showData()
#' @param keytype the gene ID type
#' @author Kai Guo
.makeKOMdata<-function(species = "human", keytype = "ENTREZID"){
spe <- .getspeices(species)
tmp <- keggLink("module", spe)
tmp <- substr(tmp, 8, 13)
names(tmp) <- sub('.*:', '', names(tmp))
tmp <- .vec_to_df(tmp, name = c(keytype,"Module"))
if(keytype != "ENTREZID"){
tmp[,1] <- idconvert(species, keys=tmp[,1],
fkeytype = "ENTREZID", tkeytype = keytype)
tmp <- na.omit(tmp)
}
MO_FILE <- tmp
annot <- .get_kgm.data()
MO_FILE[,1] <- as.vector(MO_FILE[,1])
MO_FILE[,2] <- as.vector(MO_FILE[,2])
MO_FILE$Annot <- annot[MO_FILE[,2], "annotation"]
colnames(MO_FILE)[1] <- "GeneID"
return(MO_FILE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.