R/internal.R

Defines functions GOChord bezier check_chord IPPIlist_path_net IPPIpath_net select_class SelectedSample delete.NULLs Uniform

Documented in GOChord select_class SelectedSample

Uniform<-function(pathwayfrom){
  mapped_genes<-pathwayfrom
  xx <- unique(unlist(mapped_genes))
  top3 <- matrix(0, length(xx), length(pathwayfrom))
  rownames(top3) <- xx
  colnames(top3)<- names(pathwayfrom)
  for (j in  1:length(xx)){
    for (k in  1:length(pathwayfrom)){
      if (length(intersect(xx[[j]],pathwayfrom[[k]])!=0)){
        
        top3[j,k]<-xx[j]
      }
    }
  }
  top3[top3 == 0] <- " "
return(top3)
  }














delete.NULLs  <-  function(xlist){   # delele null/empty entries in a list
  xlist[unlist(lapply(xlist, nrow) != 0)]
}



#' @title Select the class of TCGA data
#' @description select two labels from ID barcode
#' @param Dataset gene expression matrix
#' @param typesample the labels of the samples (e.g. tumor,normal)
#' @export
#' @return a gene expression matrix of the samples with specified label
#' @examples
#' tumo<-SelectedSample(Dataset=Data_CANCER_normUQ_fil,typesample="tumour")[,2]
SelectedSample <- function(Dataset,typesample){
  if( typesample =="tumour"){
    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) == 01) ]
  }
  
  if( typesample =="normal"){
    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) >= 10) ]
  }
  
  return(Dataset)
  
}


#' @title Select the class of TCGA data
#' @description select best performance
#' @param performance_matrix  list of AUC value
#' @param cutoff cut-off for AUC value
#' @return a gene expression matrix with only pairwise pathway with a particular cut-off
select_class<-function(performance_matrix,cutoff){
  tmp_ordered <- as.data.frame(performance_matrix[order(performance_matrix[,1],decreasing=TRUE),])

  er<-tmp_ordered[tmp_ordered[,1]>cutoff,]
  #ase<-tmp_ordered[tmp_ordered$pathway>cutoff,]
  #rownames(er)<-rownames(tmp_ordered)
  #er[,2]<-tmp_ordered$pathway
  #lipid_metabolism<-er[1:length(ase),]
  return(er)
}




IPPIpath_net<-function(pathway,data){
  lista_int<-list()
  for (k in 1:ncol(pathway)){
    print(colnames(pathway)[k])
    currentPathway_genes<-pathway[,k]
    colnames(data) <- c("gene_symbolA", "gene_symbolB")
    i <- sapply(data, is.factor)
    data[i] <- lapply(data[i], as.character)
    ver<-unlist(data)
    n<-unique(ver)
    s<-intersect(n,currentPathway_genes)
    g <- graph.data.frame(data,directed=FALSE)
    g2 <- induced.subgraph(graph=g,vids=s)
    aaa<-get.data.frame(g2)
    colnames(aaa)[1] <- 'V1'
    colnames(aaa)[2] <- 'V2'
    lista_int[[k]]<-aaa
    names(lista_int)[k]<-colnames(pathway)[k] 
  }
  return(lista_int)
}





IPPIlist_path_net<-function(lista_net,pathway){
  v=list()
  bn=list()
  for (j in 1:length(lista_net)){
    cf<-lista_net[[j]]
    i <- sapply(cf, is.factor) 
    cf[i] <- lapply(cf[i], as.character)
    colnames(cf) <- c("m_shar_pro", "m2_shar_pro")
    m<-c(cf$m_shar_pro)
    m2<-c(cf$m2_shar_pro)
    s<-c(m,m2)
    fr<- unique(s)
    n<-as.data.frame(fr)
    if(length(n)==0){
      v[[j]]<-NULL
      
    }
    if(length(n)!=0){
      i <- sapply(n, is.factor) 
      n[i] <- lapply(n[i], as.character)
      #for (k in  1:ncol(pathway)){
      if (length(intersect(n$fr,pathway[,j]))==nrow(n)){
        print(paste("List of genes interacting in the same pathway:",colnames(pathway)[j]))
        aa<-intersect(n$fr,pathway[,j])
        v[[j]]<-aa
        names(v)[j]<-colnames(pathway)[j]
      }
    }}
  return(v)}





check_chord <- function(mat, limit){
  
  if(all(colSums(mat) >= limit[2]) & all(rowSums(mat) >= limit[1])) return(mat)
  
  tmp <- mat[(rowSums(mat) >= limit[1]),]
  mat <- tmp[,(colSums(tmp) >= limit[2])]
  
  mat <- check_chord(mat, limit)
  return(mat)
}

bezier <- function(data, process.col){
  x <- c()
  y <- c()
  Id <- c()
  sequ <- seq(0, 1, by = 0.01)
  N <- dim(data)[1]
  sN <- seq(1, N, by = 2)
  if (process.col[1] == '') col_rain <- grDevices::rainbow(N) else col_rain <- process.col
  for (n in sN){
    xval <- c(); xval2 <- c(); yval <- c(); yval2 <- c()
    for (t in sequ){
      xva <- (1 - t) * (1 - t) * data$x.start[n] + t * t * data$x.end[n]
      xval <- c(xval, xva)
      xva2 <- (1 - t) * (1 - t) * data$x.start[n + 1] + t * t * data$x.end[n + 1]
      xval2 <- c(xval2, xva2)
      yva <- (1 - t) * (1 - t) * data$y.start[n] + t * t * data$y.end[n]  
      yval <- c(yval, yva)
      yva2 <- (1 - t) * (1 - t) * data$y.start[n + 1] + t * t * data$y.end[n + 1]
      yval2 <- c(yval2, yva2)			
    }
    x <- c(x, xval, rev(xval2))
    y <- c(y, yval, rev(yval2))
    Id <- c(Id, rep(n, 2 * length(sequ)))
  }
  df <- data.frame(lx = x, ly = y, ID = Id)
  return(df)
}


#' @name GOChord
#' @title Displays the relationship between genes and terms.
#' @description The GOChord function generates a circularly composited overview 
#'   of selected/specific genes and their assigned processes or terms. More 
#'   generally, it joins genes and processes via ribbons in an intersection-like
#'   graph. 
#' @param data The matrix represents the binary relation (1= is related to, 0= 
#'   is not related to) between a set of genes (rows) and processes (columns); a
#'   column for the logFC of the genes is optional
#' @param title The title (on top) of the plot
#' @param space The space between the chord segments of the plot
#' @param gene.order A character vector defining the order of the displayed gene
#'   labels
#' @param gene.size The size of the gene labels
#' @param gene.space The space between the gene labels and the segement of the 
#'   logFC
#' @param nlfc Defines the number of logFC columns (default=1)
#' @param lfc.col The fill color for the logFC specified in the following form: 
#'   c(color for low values, color for the mid point, color for the high values)
#' @param lfc.min Specifies the minimium value of the logFC scale (default = -3)
#' @param lfc.max Specifies the maximum value of the logFC scale (default = 3)
#' @param ribbon.col The background color of the ribbons
#' @param border.size Defines the size of the ribbon borders
#' @param process.label The size of the legend entries
#' @param limit A vector with two cutoff values (default= c(0,0)). 
#' @import ggplot2
#' @import grDevices
GOChord <- function(data, title, space, gene.order, gene.size, gene.space, nlfc = 1, lfc.col, lfc.min, lfc.max, ribbon.col, border.size, process.label, limit){
  y <- id <- xpro <- ypro <- xgen <- ygen <- lx <- ly <- ID <- logFC <- NULL
  Ncol <- dim(data)[2]
  
  if (missing(title)) title <- ''
  if (missing(space)) space = 0
  if (missing(gene.order)) gene.order <- 'none'
  if (missing(gene.size)) gene.size <- 3
  if (missing(gene.space)) gene.space <- 0.2
  if (missing(lfc.col)) lfc.col <- c('brown1', 'azure', 'cornflowerblue')
  if (missing(lfc.min)) lfc.min <- -10
  if (missing(lfc.max)) lfc.max <- 10
  if (missing(border.size)) border.size <- 0.5
  if (missing (process.label)) process.label <- 11
  if (missing(limit)) limit <- c(0, 0)
  
  if (gene.order == 'logFC') data <- data[order(data[, Ncol], decreasing = T), ]
  if (gene.order == 'alphabetical') data <- data[order(rownames(data)), ]
  if (sum(!is.na(match(colnames(data), 'logFC'))) > 0){
    if (nlfc == 1){
      cdata <- check_chord(data[, 1:(Ncol - 1)], limit)
      lfc <- sapply(rownames(cdata), function(x) data[match(x,rownames(data)), Ncol])
    }else{
      cdata <- check_chord(data[, 1:(Ncol - nlfc)], limit)
      lfc <- sapply(rownames(cdata), function(x) data[, (Ncol - nlfc + 1)])
    }
  }else{
    cdata <- check_chord(data, limit)
    lfc <- 0
  }
  if (missing(ribbon.col)) colRib <- grDevices::rainbow(dim(cdata)[2]) else colRib <- ribbon.col
  nrib <- colSums(cdata)
  ngen <- rowSums(cdata)
  Ncol <- dim(cdata)[2]
  Nrow <- dim(cdata)[1]
  colRibb <- c()
  for (b in 1:length(nrib)) colRibb <- c(colRibb, rep(colRib[b], 202 * nrib[b]))
  r1 <- 1; r2 <- r1 + 0.1
  xmax <- c(); x <- 0
  for (r in 1:length(nrib)){
    perc <- nrib[r] / sum(nrib)
    xmax <- c(xmax, (pi * perc) - space)
    if (length(x) <= Ncol - 1) x <- c(x, x[r] + pi * perc)
  }
  xp <- c(); yp <- c()
  l <- 50
  for (s in 1:Ncol){
    xh <- seq(x[s], x[s] + xmax[s], length = l)
    xp <- c(xp, r1 * sin(x[s]), r1 * sin(xh), r1 * sin(x[s] + xmax[s]), r2 * sin(x[s] + xmax[s]), r2 * sin(rev(xh)), r2 * sin(x[s]))
    yp <- c(yp, r1 * cos(x[s]), r1 * cos(xh), r1 * cos(x[s] + xmax[s]), r2 * cos(x[s] + xmax[s]), r2 * cos(rev(xh)), r2 * cos(x[s]))
  }
  df_process <- data.frame(x = xp, y = yp, id = rep(c(1:Ncol), each = 4 + 2 * l))
  xp <- c(); yp <- c(); logs <- NULL
  x2 <- seq(0 - space, -pi - (-pi / Nrow) - space, length = Nrow)
  xmax2 <- rep(-pi / Nrow + space, length = Nrow)
  for (s in 1:Nrow){
    xh <- seq(x2[s], x2[s] + xmax2[s], length = l)
    if (nlfc <= 1){
      xp <- c(xp, (r1 + 0.05) * sin(x2[s]), (r1 + 0.05) * sin(xh), (r1 + 0.05) * sin(x2[s] + xmax2[s]), r2 * sin(x2[s] + xmax2[s]), r2 * sin(rev(xh)), r2 * sin(x2[s]))
      yp <- c(yp, (r1 + 0.05) * cos(x2[s]), (r1 + 0.05) * cos(xh), (r1 + 0.05) * cos(x2[s] + xmax2[s]), r2 * cos(x2[s] + xmax2[s]), r2 * cos(rev(xh)), r2 * cos(x2[s]))
    }else{
      tmp <- seq(r1, r2, length = nlfc + 1)
      for (t in 1:nlfc){
        logs <- c(logs, data[s, (dim(data)[2] + 1 - t)])
        xp <- c(xp, (tmp[t]) * sin(x2[s]), (tmp[t]) * sin(xh), (tmp[t]) * sin(x2[s] + xmax2[s]), tmp[t + 1] * sin(x2[s] + xmax2[s]), tmp[t + 1] * sin(rev(xh)), tmp[t + 1] * sin(x2[s]))
        yp <- c(yp, (tmp[t]) * cos(x2[s]), (tmp[t]) * cos(xh), (tmp[t]) * cos(x2[s] + xmax2[s]), tmp[t + 1] * cos(x2[s] + xmax2[s]), tmp[t + 1] * cos(rev(xh)), tmp[t + 1] * cos(x2[s]))
      }}}
  if(lfc[1] != 0){
    if (nlfc == 1){
      df_genes <- data.frame(x = xp, y = yp, id = rep(c(1:Nrow), each = 4 + 2 * l), logFC = rep(lfc, each = 4 + 2 * l))
    }else{
      df_genes <- data.frame(x = xp, y = yp, id = rep(c(1:(nlfc*Nrow)), each = 4 + 2 * l), logFC = rep(logs, each = 4 + 2 * l))  
    }
  }else{
    df_genes <- data.frame(x = xp, y = yp, id = rep(c(1:Nrow), each = 4 + 2 * l))
  }
  aseq <- seq(0, 180, length = length(x2)); angle <- c()
  for (o in aseq) if((o + 270) <= 360) angle <- c(angle, o + 270) else angle <- c(angle, o - 90)
  df_texg <- data.frame(xgen = (r1 + gene.space) * sin(x2 + xmax2/2),ygen = (r1 + gene.space) * cos(x2 + xmax2 / 2),labels = rownames(cdata), angle = angle)
  df_texp <- data.frame(xpro = (r1 + 0.15) * sin(x + xmax / 2),ypro = (r1 + 0.15) * cos(x + xmax / 2), labels = colnames(cdata), stringsAsFactors = FALSE)
  cols <- rep(colRib, each = 4 + 2 * l)
  x.end <- c(); y.end <- c(); processID <- c()
  for (gs in 1:length(x2)){
    val <- seq(x2[gs], x2[gs] + xmax2[gs], length = ngen[gs] + 1)
    pros <- which((cdata[gs, ] != 0) == T)
    for (v in 1:(length(val) - 1)){
      x.end <- c(x.end, sin(val[v]), sin(val[v + 1]))
      y.end <- c(y.end, cos(val[v]), cos(val[v + 1]))
      processID <- c(processID, rep(pros[v], 2))
    }
  }
  df_bezier <- data.frame(x.end = x.end, y.end = y.end, processID = processID)
  df_bezier <- df_bezier[order(df_bezier$processID,-df_bezier$y.end),]
  x.start <- c(); y.start <- c()
  for (rs in 1:length(x)){
    val<-seq(x[rs], x[rs] + xmax[rs], length = nrib[rs] + 1)
    for (v in 1:(length(val) - 1)){
      x.start <- c(x.start, sin(val[v]), sin(val[v + 1]))
      y.start <- c(y.start, cos(val[v]), cos(val[v + 1]))
    }
  }	
  df_bezier$x.start <- x.start
  df_bezier$y.start <- y.start
  df_path <- bezier(df_bezier, colRib)
  if(length(df_genes$logFC) != 0){
    tmp <- sapply(df_genes$logFC, function(x) ifelse(x > lfc.max, lfc.max, x))
    logFC <- sapply(tmp, function(x) ifelse(x < lfc.min, lfc.min, x))
    df_genes$logFC <- logFC
  }
  
  g<- ggplot() +
    geom_polygon(data = df_process, aes(x, y, group=id), fill='gray70', inherit.aes = F,color='black') +
    geom_polygon(data = df_process, aes(x, y, group=id), fill=cols, inherit.aes = F,alpha=0.6,color='black') +	
    geom_point(aes(x = xpro, y = ypro, size = factor(labels, levels = labels), shape = NA), data = df_texp) +
    guides(size = guide_legend("Pathway", ncol = 4, byrow = T, override.aes = list(shape = 22, fill = unique(cols), size = 8))) +
    theme(legend.text = element_text(size = process.label)) +
    geom_text(aes(xgen, ygen, label = labels, angle = angle), data = df_texg, size = gene.size) +
    geom_polygon(aes(x = lx, y = ly, group = ID), data = df_path, fill = colRibb, color = 'black', size = border.size, inherit.aes = F) +		
    labs(title = title) + theme(axis.line = element_blank(), axis.text.x = element_blank(),
                                axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(),
                                axis.title.y = element_blank(), panel.background = element_blank(), panel.border = element_blank(),
                                panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.background = element_blank())
  
  
  if (nlfc >= 1){
    g + geom_polygon(data = df_genes, aes(x, y, group = id, fill = logFC), inherit.aes = F, color = 'black') +
      scale_fill_gradient2('score', space = 'Lab', low = lfc.col[3], mid = lfc.col[2], high = lfc.col[1], guide = guide_colorbar(title.position = "top", title.hjust = 0.5), 
                           breaks = c(min(df_genes$logFC), max(df_genes$logFC)), labels = c(round(min(df_genes$logFC)), round(max(df_genes$logFC)))) +
      theme(legend.position = 'bottom', legend.background = element_rect(fill = 'transparent'), legend.box = 'horizontal', legend.direction = 'horizontal')
  }else{
    g + geom_polygon(data = df_genes, aes(x, y, group = id), fill = 'gray50', inherit.aes = F, color = 'black')+
      theme(legend.position = 'bottom', legend.background = element_rect(fill = 'transparent'), legend.box = 'horizontal', legend.direction = 'horizontal')
  }
}

Try the StarBioTrek package in your browser

Any scripts or data that you put into this service are public.

StarBioTrek documentation built on Nov. 8, 2020, 8:02 p.m.