R/Analysis.R

Defines functions cont.type display.clusters extract.clusters cluster.network buildBlast compute.matrix.Blast computeIC compute.matrix.Degree compute.matrix.FC compute.matrix.Distance compute.matrix.DSD compute.matrix read.network.edges splitgene splitBIOGRID splitDIP splituniprot splitrefseq read.network.tab read.network read.matrix.col3 read.matrix.RData read.matrix.table read.matrix

Documented in buildBlast cluster.network computeIC compute.matrix compute.matrix.Blast compute.matrix.Degree compute.matrix.Distance compute.matrix.DSD compute.matrix.FC cont.type display.clusters extract.clusters read.matrix read.matrix.col3 read.matrix.RData read.matrix.table read.network read.network.edges read.network.tab splitBIOGRID splitDIP splitgene splitrefseq splituniprot

#'Read Matrix
#'
#'Read a matrix in several formats
#'@param fileName directory of the matrix or data.frame (for mode col3)
#'@param mode table, col3, RData
#'@param sym True if the matrix must be symmetric.In modes table and RData,
#'if the matrix is not symmetric the value of M[i,j] will be (M[i,j]+M[j,i])/2.
#'In mode in 3col the value of M[i,j] will be the last that appears in the file
#'@param def the default value in mode 3col
#'@return matrix
read.matrix <- function(fileName,mode = "table", sym = "TRUE", def = 0) {
    switch(mode,
           table = return(read.matrix.table(fileName,sym)),
           col3 = return(read.matrix.col3(fileName,def = def)),
           RData = return(read.matrix.RData(fileName,sym)),
           stop("Enter a valid mode"))
  }

#'Intern
#'@keywords internal
read.matrix.table <- function(fileName,sym = "FALSE") {
  mat <- as.matrix(read.table(fileName))
  if (sym) {
    return((mat + t(mat)) / 2)
  }
  return(mat)
}

#'Intern
#'@keywords internal
read.matrix.RData <- function(fileName,sym = "FALSE") {
  load(fileName)
  mat <- get(ls()[ls() != "fileName"])
  if (sym) {
    return((mat + t(mat)) / 2)
  }
  return(mat)
}

#'Intern
#'@keywords internal
read.matrix.col3 <- function(fileName,def = 0) {
  if (class(fileName) == "character") {
    MMM <- read.table(fileName)
  }
  else{
    MMM <- fileName
  }
  colnames(MMM) <- c("V1","V2","V3")
  mat2 <- with(MMM, sparseMatrix(
    i = as.numeric(V1), j = as.numeric(V2), x = V3,dimnames = list(levels(V1), levels(V2))
  ))
  return(mat2)
}


#'Read Network
#'
#'Read network from a file
#'@param fileName directory of network or a data.frame
#'@param mode tab : a file from DIP or BIOGRID of type tab or mitab;
#'edges: a list of edges with attributes (not necessary)
#'@param db database to select proteins of mode tab (uniprot,DIP,refseq,BIOGRID, gene)
#'@param cols which columns must be selected with mode edges, you can use "all" to
#'select all columns
#'@param sep separator between columns in edge mode
#'@param int.type boolean to decide if must be select the interaction type in tab mode.
read.network <- function(fileName,mode = "tab", db = "uniprot", cols = "all",
                         sep = " ", int.type = TRUE) {
  switch(mode,
         tab = simplify(return(
           read.network.tab(fileName,db = db,int.type = int.type)
         )),
         edges = simplify(return(
           read.network.edges(fileName,cols,sep = sep)
         )),
         stop("No implemented yet"))
}

#' Network tab
#' @keywords internal
read.network.tab <- function(fileName, db = "uniprot", int.type = TRUE) {
  if (class(fileName) == "character") {
    tab <- read.table(
      fileName,sep = "\t",row.names = NULL,stringsAsFactors = FALSE
    )
  }
  else{
    tab <- fileName
  }
  tab2 <- tab[,c(1,2)]
  get.int.db <- function(i) {
    switch(
      db,
      refseq = {
        x = splitrefseq(tab2[i,1])
      },
      uniprot = {
        x = splituniprot(tab2[i,1])
      },
      DIP = {
        x = splitDIP(tab2[i,1])
      },
      BIOGRID = {
        x = splitBIOGRID(tab2[i,1])
      },
      gene = {
        x = splitgene(tab2[i,1])
      },
      stop("Enter valid database")
    )
    switch(
      db,
      refseq = {
        y = splitrefseq(tab2[i,2])
      },
      uniprot = {
        y = splituniprot(tab2[i,2])
      },
      DIP = {
        y = splitDIP(tab2[i,2])
      },
      BIOGRID = {
        y = splitBIOGRID(tab2[i,2])
      },
      gene = {
        y = splitgene(tab2[i,2])
      },
      stop("Enter valid database")
    )

    return(list(x,y))
  }
  tab3 <- list()
  type <- c()
  for (i in 1:dim(tab2)[1]) {
    x <- get.int.db(i)
    if (!is.na(x[[1]]) && !is.na(x[[2]])) {
      tab3[[i]] <- x[[1]]
      tab3[[i + dim(tab2)[1]]] <- x[[2]]
      type[i] <- tab[i,12]
    }
    else{
      tab3[[i]] <- NA
      tab3[[i + dim(tab2)[1]]] <- NA

    }
  }

  tab3 <- matrix(unlist(tab3)[!is.na(tab3)],ncol = 2,byrow = FALSE)

  Net <- graph.data.frame(tab3,directed = FALSE)
  if (int.type) {
    type <- type[!is.na(type)]
    Net <-  set.edge.attribute(Net,name = "Interaction type",value = type)
  }
  return(Net)
}

#' split refseq
#' @keywords internal
splitrefseq <- function(x) {
  x1 <- strsplit(x,split = "refseq:",fixed = TRUE)[[1]]
  if (length(x1) == 2) {
    return(strsplit(x1[2],split = "|",fixed = TRUE)[[1]][1])
  }
  return(NA)
}

#' split uniprot
#' @keywords internal
splituniprot <- function(x) {
  x1 <- strsplit(x,split = "uniprotkb:",fixed = TRUE)[[1]]
  if (length(x1) == 2) {
    return(x1[2])
  }
  return(NA)
}

#' split DIP
#' @keywords internal
splitDIP <- function(x) {
  x1 <- strsplit(x,split = "DIP-",fixed = TRUE)[[1]]
  if (length(x1) == 2) {
    return(paste("DIP-",strsplit(x1[2],split = "|",fixed = TRUE)[[1]][1],sep = ""))

  }
  return(NA)
}

#' split BIOGRID
#' @keywords internal
splitBIOGRID <- function(x) {
  x1 <- strsplit(x,split = "BIOGRID:",fixed = TRUE)[[1]]
  if (length(x1) == 2) {
    return(x1[2])
  }
  return(NA)
}

#' split gene
#' @keywords internal
splitgene <- function(x) {
  x1 <- strsplit(x,split = "locuslink:",fixed = TRUE)[[1]]
  if (length(x1) == 2) {
    return(strsplit(x1[2],split = "|",fixed = TRUE)[[1]][1])

  }
  return(NA)
}

#' Network edges
#' @keywords internal
read.network.edges <- function(fileName,cols,sep) {
  if (class(fileName) == "character") {
    tab <- read.table(
      fileName,sep = sep,header = TRUE,stringsAsFactors = FALSE
    )

  }
  else{
    tab <- fileName
  }
  if (cols[1] == "all") {
    return(graph.data.frame(tab,directed = FALSE))
  }
  return(graph.data.frame(tab[,cols],directed = FALSE))
}


#' Compute Matrix
#'
#' Compute several matrices
#' @param net1 an igraph object
#' @param net2 an igraph object for modes \code{BLAST} and \code{FC} if Net2
#' is not NULL, computes the matrix between nodes in Net1 and Net2.
#' @param type what matrice do you want to compute (BLAST, DSD, Distance,
#' FC, Degree)
#' @param mode for type \code{BLAST} : pident or bitscore.
#' For \code{DSD} and \code{Distance} distance, similarity or similarity by
#' components.
#' For \code{FC} the list of ontologies
#' @param byComp for \code{DSD} and \code{Distance} if the similarity or
#' distance must be normalized by components
#' @param database for \code{BLAST} and \code{FC} the database which
#' proteins belongs.
#' @param database2 for \code{BLAST} and \code{FC} the database which
#' proteins belongs.
#' @param normalized if the matrix will be normalized
#' @return The matrix
compute.matrix <- function(net1,net2 = NULL, type = "Distance", mode = "Similarity",
                          database = NULL, database2 = NULL, byComp = TRUE,
                          normalized = TRUE) {
  switch(
    type,
    BLAST = return(
      compute.matrix.Blast(net1,net2,mode,database,database2,normalized)
    ),
    DSD = return(compute.matrix.DSD(net1,mode,byComp,normalized)),
    Distance = return(compute.matrix.Distance(net1,mode,byComp,normalized)),
    FC = return(compute.matrix.FC(net1,net2,mode)),
    Degree = return(compute.matrix.Degree(net1,net2)),
    stop("Enter a valid type")
  )
}

#'DSD
#'@keywords internal
compute.matrix.DSD <- function(net,mode = "Similarity", byComp = TRUE, normalized = TRUE) {
  path <-
    paste(system.file(package = "AligNet"),"DSDmain.py", sep = "/")
  n <- length(V(net))
  prots <- V(net)$name
  DSD <- matrix(Inf,nrow = n,ncol = n)
  dimnames(DSD) <- list(prots,prots)
  cc <- decompose.graph(net)
  for (net in cc) {
    tmp <- tempfile()
    tmp2 <- tempfile()
    write.table(
      get.edgelist(net),quote = FALSE,file = tmp,row.names = FALSE,col.names = FALSE
    )
    command <- paste("python",path,"-m 1 -o",tmp2,tmp)
    response <- system(command, intern = T)
    table <- as.matrix(read.table(paste(tmp2,"DSD1",sep = ".")))
    diam <- max(table) + 1
    if (byComp) {
      if (mode == "Similarity") {
        DSD[rownames(table),colnames(table)] <- (max(table) + 1 - table) / (max(table) +
                                                                             1)
      }
      else{
        DSD[rownames(table),colnames(table)] <- table / max(table)
      }
    }
    else{
      DSD[rownames(table),colnames(table)] <- table
    }
  }
  mmm <- max(DSD[DSD < Inf])
  if (!byComp) {
    if (mode == "Similarity") {
      DSD <- (mmm + 1 - DSD) / (mmm + 1)
    }
    else{
      if (normalized) {
        DSD <- DSD / mmm
      }
    }
  }
  if (mode == "Similarity") {
    DSD[DSD == Inf] <- 0
  }
  DSD[DSD == -Inf] <- 0
  return(DSD)

}

#'Distance
#'@keywords internal
compute.matrix.Distance <- function(net, mode = "Similarity",
                                    byComp = TRUE, normalized = TRUE) {
  n <- length(V(net))
  prots <- V(net)$name
  if (!byComp) {
    dist <- shortest.paths(net)
    mmm <- max(dist[dist < Inf])
    if (mode == "Similarity") {
      dist <- (mmm + 1 - dist) / (mmm + 1)
      dist[dist == - nf] <- 0
      return(dist)
    }
    if (normalized) {
      dist <- dist / mmm
    }
    return(dist)
  }
  if (mode == "Similarity") {
    dist <- matrix(0,nrow = n,ncol = n)
  }
  else{
    dist <- matrix(Inf,nrow = n,ncol = n)
  }
  dimnames(dist) <- list(prots,prots)
  cc <- decompose.graph(net)
  for (nnn in cc) {
    dist2 <- shortest.paths(nnn)
    mmm2 <- max(dist2[dist2 < Inf])
    if (mode == "Similarity") {
      dist2 <- (mmm2 + 1 - dist2) / (mmm2 + 1)
      dist2[dist2 == -Inf] <- 0
      dist2[dist2 == Inf] <- 0
      dist[V(nnn)$name,V(nnn)$name] <- dist2
    }
    if (normalized) {
      dist2 <- dist2 / max(dist2[dist2 < Inf])
    }
    dist[V(nnn)$name,V(nnn)$name] <- dist2
  }
  return(dist)
}

#'FC
#'@keywords internal
compute.matrix.FC <- function(net1,net2 = NULL, gos) {
  onenet <- FALSE
  if (is.null(net2)) {
    net2 <- net1
    onenet <- TRUE
  }
  prots1 <- V(net1)$name
  prots2 <- V(net2)$name
  if (onenet)
    FSim <- diag(1, nrow = length(prots1),ncol = length(prots2))
  else{
    FSim <- matrix(0, nrow = length(prots1),ncol = length(prots2))
  }
  dimnames(FSim) <- list(prots1,prots2)
  if (onenet) {
    for (i in 1:(length(prots1) - 1)) {
      for (j in (i + 1):length(prots1)) {
        fc <- length(intersect(gos[[prots1[i]]],
                               gos[[prots1[j]]])) / length(union(gos[[prots1[i]]],
                                                                 gos[[prots1[j]]]))
        FSim[i,j] <- fc
        FSim[j,i] <- fc
      }
    }

  }
  else{
    for (i in 1:length(prots1)) {
      for (j in 1:length(prots2)) {
        fc <- length(intersect(gos[[prots1[i]]],
                               gos[[prots2[j]]])) / length(union(gos[[prots1[i]]],
                                                                 gos[[prots2[j]]]))
        FSim[i,j] <- fc
      }
    }
  }
  return(FSim)
}

#'Degree
#'@keywords internal
compute.matrix.Degree <- function(net1,net2 = NULL) {
  if (is.null(net2)) {
    net2 <- net1
  }
  deg <- degree(net1)
  deg2 <- degree(net2)
  return(matrix(unlist(lapply(deg, function(i)
    lapply(deg2, function(j)
      abs(i - j)))),nrow = length(deg), byrow = TRUE))

}

#'IC
#'@keywords internal
computeIC <- function(net1,net2) {
  degree1 <- degree(net1)
  degree2 <- degree(net2)
  neigh1 <- neighborhood(net1,order = 1)
  neigh2 <- neighborhood(net2,order = 1)
  lens1 <- unlist(lapply(neigh1,length))
  lens2 <- unlist(lapply(neigh2,length))
  sums1 <- unlist(lapply(neigh1,function(i)
    sum(1 / lens1[i])))
  sums2 <- unlist(lapply(neigh2,function(i)
    sum(1 / lens2[i])))
  mat <- matrix(0,nrow = vcount(net1),ncol = vcount(net2))
  maxneig <- max(degree1,degree2)
  for (i in 1:vcount(net1)) {
    for (j in 1:vcount(net2)) {
      mat[i,j] <- min(sums1[i],sums2[i]) / maxneig
    }
  }
  dimnames(mat) <- list(V(net1)$name,V(net2)$name)
  return(mat)
}

#'No funciona
#'@keywords internal
compute.matrix.Blast <- function(Net1,Net2,mode,database,database2,normalized) {
  tmp <- tempfile()
  buildBlast(Net1,Net2,mode,database,database2,tmp)
  BlastM <- read.matrix.table(tmp)
  if (normalized) {
    return(BlastM / max(BlastM))
  }
  return(BlastM)
}

#'No funciona
#'@keywords internal
buildBlast <- function(Net1,Net2,mode,database,database2,tmp) {
  path <-
    paste(system.file(package = "AligNet"),"Blast.jar", sep = "/")
  tmp1 <- tempfile()
  write(V(Net1)$name,file = tmp1)
  tmp2 <- tempfile()
  if (is.null(Net2)) {
    tmp2 <- tmp1
  }
  else{
    write(V(Net2)$name,file = tmp2)
  }
  if (is.null(database2)) {
    database <- database2
  }
  command <- paste(
    "java",path,"-prot1",tmp1,"-prot2",tmp2,"-db1",
    database,"-db2",database2,"-m",mode,"-outfmt mat -o",tmp
  )
  response <- system(command, intern = T)

}

#'Cluster Network
#'
#'Compute the cluster matrix from the similarity matrix sigma, where all the nodes have more
#'similarity than lambda and the size of cluster is less than k.
#'
#'@param sigma a similarity matrix
#'@param lambda the similarity threshold
#'@param k the size threshold
#'@return The cluster matrix, where M[i,j] is TRUE if node i belongs to the
#'cluster of node j and FALSE otherwise
cluster.network <- function(sigma, lambda = 0, k = dim(sigma)[1]) {
  if (is.data.frame(sigma)) {
    sigma <- read.matrix.col3(sigma,def = 0)

  }
  n <- dim(sigma)[1]
  clustmatrix <- matrix(c(as.matrix(sigma) > lambda),nrow = n,byrow = TRUE)
  sums <- apply(clustmatrix,2,sum)
  ind <- which(sums > k)
  if (length(ind) > 0) {
    lapply(ind,function(i)
      clustmatrix[sort(as.matrix(sigma)[i,],index.return = TRUE)$ix[1:(n - k)],i] <<- FALSE)
  }
  dimnames(clustmatrix) <- dimnames(sigma)
  clustmatrix <- as(clustmatrix,"sparseMatrix")
  return(clustmatrix)

}


#'Extract clusters
#'
#'Compute the subnetworks of \code{Net} from a cluster matrix
#'@param Net an igraph object
#'@param ClustMat a cluster matrix (output of \code{cluster.matrix})
#'@return Clusters in igraph format
extract.clusters <- function(Net, ClustMat) {
  prots <- rownames(ClustMat)
  clusts <- lapply(1:dim(ClustMat)[2], function(i)
    induced.subgraph(Net,prots[ClustMat[,i] == 1]))
  names(clusts) = prots
  return(clusts)

}

#'Display Clusters
#'
#'Given a cluster matrix, see \code{cluster.network} and a network
#'display the cluster matrix with the following colors for the position (i,j):
#'
#'- Yellow if the protein and protein i doesn't belongs to cluster of protein j and
#' proteins don't interact in the network
#'
#'-  Black if the protein and protein i belongs to cluster of protein j but
#' proteins don't interact in the network
#'
#'- Red if the protein and protein i doesn't belongs to cluster of protein j but
#' proteins interact in the network
#'
#'-  Green if the protein and protein i belongs to cluster of protein j and
#' proteins interact in the network
#'
#'@param clust a matrix which is the output1 of \code{cluster.network}
#'@param cols a list of 4 colors if you want to change the default colors
#'@param zoom an integer to define the size of plot or NA, to plot all clusters
#'@param type 0 = "yellow", 1 = "black", 2="red", 3 = "green"
#'@param col color to use with zoom
#'@param Net an igraph object
#'@param ... Additional plotting parameters
display.clusters <- function(clust, Net,zoom = NA, type = 1,
                             cols = c("yellow","black","red","green"),
                             col = cols[type + 1], ...) {
    clust <- as.matrix(clust)
    if (is.na(zoom)) {
      mmm <-   clust + 2 * as.matrix(get.adjacency(Net))
      dimnames(mmm) <- list(1:dim(mmm)[1],1:dim(mmm)[2])
      # par(oma=c(1,1,1,1))
      image2D(
        mmm,
        col = cols,
        axes = F,
        xlab = NA,
        ylab = NA,
        colkey = FALSE,
        ...
      )
      axis(
        1,at = seq(0,1,by = 1 / (dim(clust)[2] - 1)),
        labels = colnames(clust),las = 2,
        pos = -1 / (2 * (dim(clust)[1] - 1))
      )
      axis(
        2,at = seq(0,1,by = 1 / (dim(clust)[1] - 1)),
        labels = rownames(clust),las = 2,pos = -1 / (2 * (dim(clust)[2] - 1))
      )
      legend(
        x = 0.2,y = 1.4,legend = 0:3,fill = cols,horiz = TRUE,bty = "n",xpd = TRUE )

    }
    else{
      mat <- clust + 2 * as.matrix(get.adjacency(Net))
      k <- floor(length(V(Net)) / zoom)
      conts <- unlist(lapply(1:zoom,function(i)
        lapply(1:zoom, function(j)
          cont.type(mat,(i - 1) * k + 1,i * k,(j - 1) * k + 1,j * k,type))))
      mat2 <- matrix(conts,nrow = zoom, byrow = TRUE)
      print("Min")
      print(min(mat2))
      print("Max")
      print(max(mat2))
      image2D(
        mat2, col = ramp.col(col = c("white",col),n = k * k),
        axes = F,
        xlab = NA,
        ylab = NA,
        ...
      )

    }

  }

#' Cont type
#' @keywords internal
cont.type <- function(mat,init1, fin1,init2,fin2,type) {
  return(length(which(mat[init1:fin1,init2:fin2] == type)))
}
RicUIB/AligNet documentation built on Nov. 18, 2017, 8:54 a.m.