readMappings <- function(file, sep = "\t", IDsep = ",") {
a <- read.delim(file = file, header = FALSE,
quote = "", sep = sep, colClasses = "character")
## a bit of preprocesing to get it in a nicer form
map <- a[, 2]
names(map) <- gsub(" ", "", a[, 1]) ## trim the spaces
## split the IDs
return(lapply(map, function(x) gsub(" ", "", strsplit(x, split = IDsep)[[1]])))
}
gene2GO <- readMappings(file = "hs-ensembl2go.map")
set.seed(length(gene2GO))
index <- sample(length(gene2GO), 100)
str(head(gene2GO[index]))
## at this point we have the gene to GOs mapping and we can start building a topGOdata object.
## We need to generate a random list of interesting genes
## get the list of all gene ID's
geneNames <- names(gene2GO[index])
## select (or define) the list of interesting genes
myInterestedGenes <- sample(geneNames, 20)
str(geneNames)
str(myInterestedGenes)
## make a indicator vector showing which genes are interesting
geneList <- factor(as.integer(geneNames %in% myInterestedGenes))
names(geneList) <- geneNames
str(geneList)
library(topGO)
## build the topGOdata class
## there are three annotation functions available:
## 1. annFUN.db -- used for bioconductor affy chips
## 2. annFUN.gene2GO -- used when you have mappings from each gene to GOs
## 3. annFUN.GO2genes -- used when you have mappings from each GO to genes
##
help("annFUN")
GOdata <- new("topGOdata",
ontology = "BP",
allGenes = geneList,
annot = annFUN.gene2GO, ## the new annotation function
gene2GO = gene2GO[index]) ## the GO to gene ID's dataset
## write the gene2GO[index] into a file to use as an example in the vignette
a <- lapply(gene2GO[index], function(x) paste(x, collapse = ", "))
mat <- cbind(names(a), unlist(a, use.names = FALSE))
write.table(mat, file = "geneid2go.map", quote = FALSE, sep = "\t",
row.names = FALSE, col.names = FALSE)
fn <- system.file("examples/geneid2go.map", package = "topGO")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.