# Install Package: Ctrl + Shift + B
# devtools::document()
# for setNet
.findConX <- function(geneId, df1, sets){
if(geneId %in% rownames(df1)){
#for 1 target gene: find all genes that are in a set where this gene is as well
temp <- df1[geneId,] #isolate the gene's row
temp <- names(temp[temp==T]) #isolate the colnames that are TRUE for this row
temp <- mapply(function(x,y) if(x %in% temp){y}, x=names(sets), y=sets ) #for these sets, isolates all their genes
temp <- as.vector(unlist(temp)) #unlist them into one big vector
temp <- temp[temp %in% rownames(df1) & !temp %in% geneId] #keep only those genes that are in the df1 (genes of interest) but not the target gene itself
c(rbind(rep(geneId,length(temp)), temp)) # vector for i graph usage: for each gene, write the target gene and then the gene; repeat for every found gene
}
}
# for setNetGSEA: get igraph compatible vector listing set connections
.findConY <- function(geneId, df1){
temp <- df1[geneId,] #isolate the gene's row
temp <- names(temp[temp==T]) #isolate the colnames that are TRUE for this row
temp <- lapply(temp, function(x) do.call(rbind, lapply(temp, function(y) sort(c(x,y))))) #for each set, get a combination with each other set
temp <- do.call(rbind, temp) #combine lists into table
temp <- data.frame(x=temp[,1], y=temp[,2], stringsAsFactors = F) #new dataframe for better handling
temp <- subset(temp, !x==y & !duplicated(paste(x,y))) #filter
c(rbind(temp$x, temp$y))
}
#' Create an igraph from genes of interest and a list of gene sets
#'
#' Imports:
#' igraph,
#' data.table,
#' AnnotationDbi,
#' org.Hs.eg.db,
#' org.Mm.eg.db
#'
#' @param EntrezIds vector of characters or numbers, set of genes of interest
#' @param sets list of character vectors (ENTREZIDs). This is the setlist used to connect the genes of interest.
#' @param species character, either "human" or "mouse", must reflect the species of 'EntrezIds' and 'sets'
#' @param spread number, factor to expand the plot
#' @return prints a plot and outputs a list: 1) a table, summarizing the hits of the genes of interest against the sets, 2) a character vector that can directly be used as input for the igraph function 'graph', 3) igraph object, the output of the igraph function 'graph', 4) a layout that can be used when ploting the output from the igraph function 'graph', 5) a vector of numbers for the edge widths in the plot
#' @details it is complicated
#' @examples
#' EntrezIds <- c(246329,84314,10170)
#' sets <- qusage::read.gmt("path/to/file.gmt")
#' net <- setNet(myGenes, sets, "human")
#' @export
setNet <- function(EntrezIds, sets, species, edgefactor=1){
#genes of interest
EntrezIds <- as.character(EntrezIds)
#all genes from all sets
allGenes <- unique(unlist(sets))
#dataframe where every gene from allGenes is a row and every column is a set. cells indicate if gene is in set or not (boolean)
df <- lapply(sets, function(x) allGenes %in% x)
df <- do.call(cbind, df)
rownames(df) <- allGenes
#filter the dataframe to only include genes of interest
df <- subset(df, rownames(df) %in% EntrezIds)
#get an igraph compatibel vector listing gene connections
cons0 <- lapply(EntrezIds, function(x) .findConX(x, df, sets))
cons0 <- unlist(cons0)
thick <- data.table::data.table(a=cons0[c(T,F)], b=cons0[c(F,T)], num=1)
thick[,ab := apply(thick, 1, function(x) paste(sort(c(x[1], x[2])), collapse="_"))]
thick[,width := cumsum(num), by=list(ab)]
thick <- thick[order(thick$ab, thick$width, decreasing=T),]
thick <- thick[!duplicated(thick$ab, ),]
width <- ((thick$width)^2)/4*edgefactor
#important: cons is translated from EntrezIds to Symbols, but EntrezIds remain as the names (important for the seekNetStyle() function)
cons <- c(rbind(thick$a,thick$b))
if(species %in% "human"){
cons <- AnnotationDbi::mapIds(org.Hs.eg.db::org.Hs.eg.db,keys=cons,column="SYMBOL",keytype="ENTREZID",multiVals="first")
}else if(species %in% "mouse"){
cons <- AnnotationDbi::mapIds(org.Mm.eg.db::org.Mm.eg.db,keys=cons,column="SYMBOL",keytype="ENTREZID",multiVals="first")
}else{stop("No valid species selected. Select either 'human' or 'mouse'.")}
icons <- igraph::graph(cons, directed=F)
#icons <- igraph::simplify(icons, remove.multiple=T, remove.loops=T)
layout <- igraph::layout_with_fr(icons)
layout <- igraph::norm_coords(layout, ymin=-1, ymax=1, xmin=-1, xmax=1)
hit_sets <- lapply(sets, function(x) c(sum(EntrezIds %in% x), length(x), sum(EntrezIds %in% x)/length(x)*100))
hit_sets <- do.call(rbind, hit_sets)
colnames(hit_sets) <- c("hits","set_size","hit_percentage")
hit_sets <- subset(as.data.frame(hit_sets), hits>0)
hit_sets$set <- rownames(hit_sets)
hit_sets <- hit_sets[order(hit_sets$hit_percentage, decreasing=T),]
hit_sets <- hit_sets[order(hit_sets$hits, decreasing=T),]
print(plot(icons, layout=layout, rescale=F,
vertex.size=7,vertex.color="lightblue", vertex.frame.color="blue",
vertex.label.color="black", vertex.label.size=2,
edge.width=width, edge.color="gray"))
list(hit_sets=hit_sets, cons=cons, icons=icons, layout=layout, width=width)
}
#' Enhancing an igraph originating from the seekNet function
#'
#' #' Imports:
#' igraph
#'
#' @param net outout from the setNet() function
#' @param set vector of characters (ENTREZIDs). Genes contained in this set will be colored.
#' @param spread number, factor to expand the plot
#' @param vertexSize number, regulates the size of the nodes/vertexes of the plot
#' @param markGroups list of character vectors (EntrezIds), gene groups that should be highlighted
#' @return prints a plot and outputs a list: 1) a vector of characters with colors that can be added to the igraph object, 2) a character vector that can directly be used as input for the igraph function 'graph', 3) igraph object, the output of the igraph function 'graph', 4) a layout that can be used when ploting the output from the igraph function 'graph', 5) a vector of numbers for the edge widths in the plot
#' @details the variable net has to be a direct output from the function seekNet()
#' @examples
#' myGenes <- c("KRT83","CD80","SLC14A1")
#' sets <- qusage::read.gmt("path/to/file.gmt")
#' net <- setNet(myGenes, sets, "human")
#' net1 <- setNetStyle(net, sets[[1]])
#' @export
setNetStyle <- function(net, set, spread=1, vertexSize=7, labelSize=2, markGroups=NA){
cons <- net[["cons"]]
icons <- net[["icons"]]
layout <- net[["layout"]]
width <- net[["width"]]
colors <- ifelse(unique(names(cons)) %in% set, "firebrick1", "lightblue")
bordercolors <- ifelse(unique(names(cons)) %in% set, "red", "blue")
pies <- lapply(igraph::degree(icons, mode="all"), function(x) c(length(unique(cons))-x, x))
marks <- lapply(markGroups, function(x) seq(length(unique(cons)))[unique(names(cons)) %in% x ])
piecolors <- lapply(colors, function(x) c("white", x))
igraph::V(icons)$color <- colors
igraph::V(icons)$vertex.frame.color <- bordercolors
igraph::V(icons)$pie.color <- piecolors
if(!is.na(markGroups)){
print(plot(icons, layout=layout*spread, rescale=F,
vertex.size=vertexSize, vertex.distance=100,
vertex.label.color="black", vertex.label.size=2,
edge.width=width, edge.color="gray", vertex.shape="pie", vertex.label.dist=0.9, vertex.label.font=2, vertex.label.degree=5, vertex.pie=pies,
mark.groups=marks, mark.col=rainbow(length(marks), alpha = 0.1), mark.border=NA))
}else{
print(plot(icons, layout=layout*spread, rescale=F,
vertex.size=vertexSize, vertex.distance=100,
vertex.label.color="black", vertex.label.size=labelSize,
edge.width=width, edge.color="gray", vertex.shape="pie", vertex.label.dist=0.9, vertex.label.font=2, vertex.label.degree=5, vertex.pie=pies
))
}
list(colors=colors, cons=cons, icons=icons, layout=layout, width=width)
}
#' Broken!!! should record interactive graph. Graph is interactive, but the recording does not work properly
#'
#' #' Imports:
#' igraph
#'
#' @param net outout from the serNet() or setNetStyle() function
#' @param wait number, second to unti lthe graph coordinates are saved
setNetInteract <- function(net, wait=10){
net <- net
icons <- net[["icons"]]
layout <- net[["layout"]]
tkid <- igraph::tkplot(icons, layout=layout)
for(i in seq(wait,1,-1)){
print(i)
Sys.sleep(1)
}
net[["layout"]] <- igraph::tkplot.getcoords(tkid)
net
}
#' Creating an igraph from genes of interest and a list of gene sets
#'
#' Imports:
#' igraph,
#' data.table,
#'
#' @param gsea gsea object from function seekGSEA() from the seeqR package
#' @param spread number, factor to expand the plot
#' @param vertexSize number, regulates the size of the nodes/vertexes of the plot
#' @param vertexShape character, either "pie" (default) or "none", circle", “square”, “csquare”, “rectangle” “crectangle”, “vrectangle”, “raster”, or “sphere”
#' @return prints a plot and outputs a list
#' @details it is complicated
#' @examples
#' gsea <- seekGSEA(...)
#' setNetGSEA(gsea)
#' @export
setNetGSEA <- function(gsea, edgefactor=1, spread=1, vertexSize=1, vertexDistance=100, normalizevertexsize=T, vertexShape="pie", labelSize=2, labelDistance=1.7, pcutoff=0.05){
#filter GSEA
gsea <- subset(gsea, pval<=pcutoff)
rownames(gsea) <- gsea$GO_ID
#sets of interest
sets <- lapply(gsea[,"hit_genes"], function(x) unlist(strsplit(x, ",")))
names(sets) <- gsea[,"GO_ID"]
#all genes from sets
allGenes <- unique(unlist(sets))
#dataframe where every gene from allGenes is a row and every column is a set. cells indicate if gene is in set or not (boolean)
df <- lapply(sets, function(x) allGenes %in% x)
df <- do.call(cbind, df)
rownames(df) <- allGenes
#filter out genes that are only present in one set
df <- subset(df, rowSums(df)>1)
cons0 <- lapply(rownames(df), function(x) .findConY(x, df))
cons0 <- unlist(cons0)
thick <- data.table::data.table(a=cons0[c(T,F)], b=cons0[c(F,T)], num=1)
thick[,ab := paste(a, b, sep="_")]
thick[,width := cumsum(num), by=list(ab)]
thick <- thick[order(thick$b, thick$width, decreasing=T),]
thick <- thick[!duplicated(thick$ab, ),]
width <- thick$width*edgefactor
cons <- c(rbind(thick$a,thick$b))
temp <- cons
cons <- as.character(gsea[cons,"GO_name"])
names(cons) <- temp
icons <- igraph::graph(cons, directed=F)
layout <- igraph::layout_with_fr(icons)
layout <- igraph::norm_coords(layout, ymin=-1, ymax=1, xmin=-1, xmax=1)
pies <- gsea[unique(names(cons)) , c("set_size","hit_size")]
pies <- data.frame(rest=pies$set_size-pies$hit_size, hits=pies$hit_size)
pies <- as.list(as.data.frame(t(pies)))
if(normalizevertexsize){
sizes <- gsea[unique(names(cons)) , c("set_size")]
sizes <- log2(sizes/max(sizes, na.rm=T))
}else{sizes=1}
colors <- ifelse(gsea[unique(names(cons)) , "NES"] >0, "firebrick1", "lightblue")
bordercolors <- ifelse(gsea[unique(names(cons)) , "NES"] >0, "red", "blue")
piecolors <- lapply(colors, function(x) c("white", x))
igraph::V(icons)$color <- colors
igraph::V(icons)$vertex.frame.color <- bordercolors
igraph::V(icons)$pie.color <- piecolors
# plot(icons, layout=layout, rescale=F, edge.width=width, vertex.shape="pie", vertex.pie=pies,
# vertex.label.color="black", vertex.label.dist=1.7, vertex.label.font=2, vertex.label.degree=5)
print(plot(icons, layout=layout*spread, rescale=F,
vertex.size=vertexSize*sizes, vertex.distance=vertexDistance,
vertex.label.color="black", vertex.label.size=labelSize,
edge.width=width, edge.color="gray", vertex.shape=vertexShape,
vertex.label.dist=labelDistance, vertex.label.font=2, vertex.label.degree=5, vertex.pie=pies
))
list(cons=cons, icons=icons, layout=layout, width=width)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.