R/setNet.R

Defines functions setNetGSEA setNetInteract setNetStyle setNet .findConY .findConX

Documented in setNet setNetGSEA setNetInteract setNetStyle

# Install Package: Ctrl + Shift + B
# devtools::document()

# for setNet
.findConX <- function(geneId, df1, sets){
  if(geneId %in% rownames(df1)){
    #for 1 target gene: find all genes that are in a set where this gene is as well
    temp <- df1[geneId,] #isolate the gene's row
    temp <- names(temp[temp==T]) #isolate the colnames that are TRUE for this row
    temp <- mapply(function(x,y) if(x %in% temp){y}, x=names(sets), y=sets ) #for these sets, isolates all their genes
    temp <- as.vector(unlist(temp)) #unlist them into one big vector
    temp <- temp[temp %in% rownames(df1) & !temp %in% geneId] #keep only those genes that are in the df1 (genes of interest) but not the target gene itself
    c(rbind(rep(geneId,length(temp)), temp)) # vector for i graph usage: for each gene, write the target gene and then the gene; repeat for every found gene
  }
}

# for setNetGSEA: get igraph compatible vector listing set connections
.findConY <- function(geneId, df1){
  temp <- df1[geneId,] #isolate the gene's row
  temp <- names(temp[temp==T]) #isolate the colnames that are TRUE for this row
  temp <- lapply(temp, function(x) do.call(rbind, lapply(temp, function(y) sort(c(x,y))))) #for each set, get a combination with each other set
  temp <- do.call(rbind, temp) #combine lists into table
  temp <- data.frame(x=temp[,1], y=temp[,2], stringsAsFactors = F) #new dataframe for better handling
  temp <- subset(temp, !x==y & !duplicated(paste(x,y))) #filter
  c(rbind(temp$x, temp$y))
}

#' Create an igraph from genes of interest and a list of gene sets
#'
#' Imports:
#' igraph,
#' data.table,
#' AnnotationDbi,
#' org.Hs.eg.db,
#' org.Mm.eg.db
#'
#' @param EntrezIds vector of characters or numbers, set of genes of interest
#' @param sets list of character vectors (ENTREZIDs). This is the setlist used to connect the genes of interest.
#' @param species character, either "human" or "mouse", must reflect the species of 'EntrezIds' and 'sets'
#' @param spread number, factor to expand the plot
#' @return prints a plot and outputs a list: 1) a table, summarizing the hits of the genes of interest against the sets, 2) a character vector that can directly be used as input for the igraph function 'graph', 3) igraph object, the output of the igraph function 'graph', 4) a layout that can be used when ploting the output from the igraph function 'graph', 5) a vector of numbers for the edge widths in the plot
#' @details it is complicated
#' @examples
#' EntrezIds <- c(246329,84314,10170)
#' sets <- qusage::read.gmt("path/to/file.gmt")
#' net <- setNet(myGenes, sets, "human")
#' @export
setNet <- function(EntrezIds, sets, species, edgefactor=1){
  #genes of interest
  EntrezIds <- as.character(EntrezIds)

  #all genes from all sets
  allGenes <- unique(unlist(sets))

  #dataframe where every gene from allGenes is a row and every column is a set. cells indicate if gene is in set or not (boolean)
  df <- lapply(sets, function(x) allGenes %in% x)
  df <- do.call(cbind, df)
  rownames(df) <- allGenes
  #filter the dataframe to only include genes of interest
  df <- subset(df, rownames(df) %in% EntrezIds)

  #get an igraph compatibel vector listing gene connections
  cons0 <- lapply(EntrezIds, function(x) .findConX(x, df, sets))
  cons0 <- unlist(cons0)

  thick <- data.table::data.table(a=cons0[c(T,F)], b=cons0[c(F,T)], num=1)
  thick[,ab := apply(thick, 1, function(x) paste(sort(c(x[1], x[2])), collapse="_"))]
  thick[,width := cumsum(num), by=list(ab)]
  thick <- thick[order(thick$ab, thick$width, decreasing=T),]
  thick <- thick[!duplicated(thick$ab, ),]
  width <- ((thick$width)^2)/4*edgefactor

  #important: cons is translated from EntrezIds to Symbols, but EntrezIds remain as the names (important for the seekNetStyle() function)
  cons <- c(rbind(thick$a,thick$b))
  if(species %in% "human"){
    cons <- AnnotationDbi::mapIds(org.Hs.eg.db::org.Hs.eg.db,keys=cons,column="SYMBOL",keytype="ENTREZID",multiVals="first")
  }else if(species %in% "mouse"){
    cons <- AnnotationDbi::mapIds(org.Mm.eg.db::org.Mm.eg.db,keys=cons,column="SYMBOL",keytype="ENTREZID",multiVals="first")
  }else{stop("No valid species selected. Select either 'human' or 'mouse'.")}

  icons <- igraph::graph(cons, directed=F)
  #icons <- igraph::simplify(icons, remove.multiple=T, remove.loops=T)
  layout <- igraph::layout_with_fr(icons)
  layout <- igraph::norm_coords(layout, ymin=-1, ymax=1, xmin=-1, xmax=1)

  hit_sets <- lapply(sets, function(x) c(sum(EntrezIds %in% x), length(x), sum(EntrezIds %in% x)/length(x)*100))
  hit_sets <- do.call(rbind, hit_sets)
  colnames(hit_sets) <- c("hits","set_size","hit_percentage")
  hit_sets <- subset(as.data.frame(hit_sets), hits>0)
  hit_sets$set <- rownames(hit_sets)
  hit_sets <- hit_sets[order(hit_sets$hit_percentage, decreasing=T),]
  hit_sets <- hit_sets[order(hit_sets$hits, decreasing=T),]

  print(plot(icons, layout=layout, rescale=F,
             vertex.size=7,vertex.color="lightblue", vertex.frame.color="blue",
             vertex.label.color="black", vertex.label.size=2,
             edge.width=width, edge.color="gray"))
  list(hit_sets=hit_sets, cons=cons, icons=icons, layout=layout, width=width)
}

#' Enhancing an igraph originating from the seekNet function
#'
#' #' Imports:
#' igraph
#'
#' @param net outout from the setNet() function
#' @param set vector of characters (ENTREZIDs). Genes contained in this set will be colored.
#' @param spread number, factor to expand the plot
#' @param vertexSize number, regulates the size of the nodes/vertexes of the plot
#' @param markGroups list of character vectors (EntrezIds), gene groups that should be highlighted
#' @return prints a plot and outputs a list: 1) a vector of characters with colors that can be added to the igraph object, 2) a character vector that can directly be used as input for the igraph function 'graph', 3) igraph object, the output of the igraph function 'graph', 4) a layout that can be used when ploting the output from the igraph function 'graph', 5) a vector of numbers for the edge widths in the plot
#' @details the variable net has to be a direct output from the function seekNet()
#' @examples
#' myGenes <- c("KRT83","CD80","SLC14A1")
#' sets <- qusage::read.gmt("path/to/file.gmt")
#' net <- setNet(myGenes, sets, "human")
#' net1 <- setNetStyle(net, sets[[1]])
#' @export
setNetStyle <- function(net, set, spread=1, vertexSize=7, labelSize=2, markGroups=NA){
  cons <- net[["cons"]]
  icons <- net[["icons"]]
  layout <- net[["layout"]]
  width <- net[["width"]]
  colors <- ifelse(unique(names(cons)) %in% set, "firebrick1", "lightblue")
  bordercolors <- ifelse(unique(names(cons)) %in% set, "red", "blue")
  pies <- lapply(igraph::degree(icons, mode="all"), function(x) c(length(unique(cons))-x, x))
  marks <- lapply(markGroups, function(x) seq(length(unique(cons)))[unique(names(cons)) %in% x ])
  piecolors <- lapply(colors, function(x) c("white", x))
  igraph::V(icons)$color <-  colors
  igraph::V(icons)$vertex.frame.color <-  bordercolors
  igraph::V(icons)$pie.color <- piecolors
  if(!is.na(markGroups)){
    print(plot(icons, layout=layout*spread, rescale=F,
               vertex.size=vertexSize, vertex.distance=100,
               vertex.label.color="black", vertex.label.size=2,
               edge.width=width, edge.color="gray", vertex.shape="pie", vertex.label.dist=0.9, vertex.label.font=2, vertex.label.degree=5, vertex.pie=pies,
               mark.groups=marks, mark.col=rainbow(length(marks), alpha = 0.1), mark.border=NA))
  }else{
    print(plot(icons, layout=layout*spread, rescale=F,
               vertex.size=vertexSize, vertex.distance=100,
               vertex.label.color="black", vertex.label.size=labelSize,
               edge.width=width, edge.color="gray", vertex.shape="pie", vertex.label.dist=0.9, vertex.label.font=2, vertex.label.degree=5, vertex.pie=pies
               ))
  }

  list(colors=colors, cons=cons, icons=icons, layout=layout, width=width)
}


#' Broken!!! should record interactive graph. Graph is interactive, but the recording does not work properly
#'
#' #' Imports:
#' igraph
#'
#' @param net outout from the serNet() or setNetStyle() function
#' @param wait number, second to unti lthe graph coordinates are saved
setNetInteract <- function(net, wait=10){
  net <- net
  icons <- net[["icons"]]
  layout <- net[["layout"]]
  tkid <- igraph::tkplot(icons, layout=layout)
  for(i in seq(wait,1,-1)){
    print(i)
    Sys.sleep(1)
  }
  net[["layout"]] <- igraph::tkplot.getcoords(tkid)
  net
}


#' Creating an igraph from genes of interest and a list of gene sets
#'
#' Imports:
#' igraph,
#' data.table,
#'
#' @param gsea gsea object from function seekGSEA() from the seeqR package
#' @param spread number, factor to expand the plot
#' @param vertexSize number, regulates the size of the nodes/vertexes of the plot
#' @param vertexShape character, either "pie" (default) or "none", circle", “square”, “csquare”, “rectangle” “crectangle”, “vrectangle”, “raster”, or “sphere”
#' @return prints a plot and outputs a list
#' @details it is complicated
#' @examples
#' gsea <- seekGSEA(...)
#' setNetGSEA(gsea)
#' @export
setNetGSEA <- function(gsea, edgefactor=1, spread=1, vertexSize=1, vertexDistance=100, normalizevertexsize=T, vertexShape="pie", labelSize=2, labelDistance=1.7, pcutoff=0.05){
  #filter GSEA
  gsea <- subset(gsea, pval<=pcutoff)
  rownames(gsea) <- gsea$GO_ID

  #sets of interest
  sets <- lapply(gsea[,"hit_genes"], function(x) unlist(strsplit(x, ",")))
  names(sets) <- gsea[,"GO_ID"]

  #all genes from sets
  allGenes <- unique(unlist(sets))

  #dataframe where every gene from allGenes is a row and every column is a set. cells indicate if gene is in set or not (boolean)
  df <- lapply(sets, function(x) allGenes %in% x)
  df <- do.call(cbind, df)
  rownames(df) <- allGenes
  #filter out genes that are only present in one set
  df <- subset(df, rowSums(df)>1)

  cons0 <- lapply(rownames(df), function(x) .findConY(x, df))
  cons0 <- unlist(cons0)

  thick <- data.table::data.table(a=cons0[c(T,F)], b=cons0[c(F,T)], num=1)
  thick[,ab := paste(a, b, sep="_")]
  thick[,width := cumsum(num), by=list(ab)]
  thick <- thick[order(thick$b, thick$width, decreasing=T),]
  thick <- thick[!duplicated(thick$ab, ),]
  width <- thick$width*edgefactor

  cons <- c(rbind(thick$a,thick$b))
  temp <- cons
  cons <- as.character(gsea[cons,"GO_name"])
  names(cons) <- temp

  icons <- igraph::graph(cons, directed=F)
  layout <- igraph::layout_with_fr(icons)
  layout <- igraph::norm_coords(layout, ymin=-1, ymax=1, xmin=-1, xmax=1)
  pies <- gsea[unique(names(cons)) , c("set_size","hit_size")]
  pies <- data.frame(rest=pies$set_size-pies$hit_size, hits=pies$hit_size)
  pies <- as.list(as.data.frame(t(pies)))
  if(normalizevertexsize){
    sizes <- gsea[unique(names(cons)) , c("set_size")]
    sizes <- log2(sizes/max(sizes, na.rm=T))
  }else{sizes=1}


  colors <- ifelse(gsea[unique(names(cons)) , "NES"] >0, "firebrick1", "lightblue")
  bordercolors <- ifelse(gsea[unique(names(cons)) , "NES"] >0, "red", "blue")
  piecolors <- lapply(colors, function(x) c("white", x))

  igraph::V(icons)$color <-  colors
  igraph::V(icons)$vertex.frame.color <-  bordercolors
  igraph::V(icons)$pie.color <- piecolors

  # plot(icons, layout=layout, rescale=F, edge.width=width, vertex.shape="pie", vertex.pie=pies,
  #      vertex.label.color="black", vertex.label.dist=1.7, vertex.label.font=2, vertex.label.degree=5)

  print(plot(icons, layout=layout*spread, rescale=F,
             vertex.size=vertexSize*sizes, vertex.distance=vertexDistance,
             vertex.label.color="black", vertex.label.size=labelSize,
             edge.width=width, edge.color="gray", vertex.shape=vertexShape,
             vertex.label.dist=labelDistance, vertex.label.font=2, vertex.label.degree=5, vertex.pie=pies
  ))
  list(cons=cons, icons=icons, layout=layout, width=width)
}
Solatar/setR documentation built on Dec. 5, 2020, 10:50 p.m.