R/get_adj_matrix.R

Defines functions get_adj_matrix

Documented in get_adj_matrix

#' Generate adjacency matrix
#'
#' Generate an adjacency matrix representing protein-to-protein connections,
#' based on shared peptides. It is generated by cross product of the incidence
#' matrix of peptide-to-protein mappings.
#' @param incM a \code{logical} \code{matrix} containing the incidence matrix
#' with its column and row names (respectively, protein and peptide identifiers)
#' names and 0 or 1 values indicating whether or not the peptide maps on the
#' corresponding protein.
#' @return a \code{numeric} \code{matrix} containing the adjacency matrix,
#' with value >0 or 0 indicating whether or not two proteins are identified by
#' shared peptide(s)
#' @examples
#' # Read the tab-delimited file containing he proteome incidence matrix
#' incM_filename <- system.file( "extdata"
#'                              , "incM_example"
#'                              , package = "net4pg"
#'                              , mustWork = TRUE)
#' rownames_filename <- system.file( "extdata"
#'                                   , "peptideIDs_incM_example"
#'                                   , package = "net4pg"
#'                                   , mustWork = TRUE)
#' colnames_filename <- system.file( "extdata"
#'                                  , "proteinIDs_incM_example"
#'                                  , package = "net4pg"
#'                                  , mustWork = TRUE)
#' incM <- read_inc_matrix(incM_filename=incM_filename
#'                  , colnames_filename=colnames_filename
#'                  , rownames_filename=rownames_filename)
#' # Only retain proteins with at least one shared peptide and all peptides
#' # mapping on such proteins.
#' incM_reduced <- reduce_inc_matrix(incM)
#' # Generate adjacency matrix describing protein-to-protein mappings
#' adjM <- get_adj_matrix(incM_reduced)
#'
#' @author Laura Fancello
#'
#' @export
#'
get_adj_matrix <- function(incM) {

  # Sanity Checks  ----------------------------------------------------------
  # Check input arguments
  if (is.null(incM)) {
    stop("argument 'incM' is missing, with no default")
  }
  if (!(methods::is(incM)[1] == "matrix")) {
    stop("argument 'incM' is not a matrix object")
  }

  # Calculate adjacency matrix  ---------------------------------------------
  colnamesIncM <- colnames(incM)
  ## Convert to adjacency matrix
  adjM <- Matrix::crossprod(incM)
  ## If adjM more than 1000 rows convert ngCMatrix into matrix chunk by chunk
  nbLines <- dim(incM)[1] # nb rows of incidence matrix
  if (nbLines > 10000) {
    SplitSizeCols <- round(dim(adjM)[2] / 5)
    adjM1 <- as.matrix(adjM[, 1 : SplitSizeCols])
    adjM2 <- as.matrix(adjM[, (SplitSizeCols + 1) : (SplitSizeCols * 2)])
    adjM3 <- as.matrix(adjM[, ((SplitSizeCols * 2) + 1) : (SplitSizeCols * 3)])
    adjM4 <- as.matrix(adjM[, ((SplitSizeCols * 3) + 1) : (SplitSizeCols * 4)])
    adjM5 <- as.matrix(adjM[, ((SplitSizeCols * 4) + 1) : dim(adjM)[2]])
    adjM <- cbind(adjM1, adjM2, adjM3, adjM4, adjM5)
    rm(adjM1, adjM2, adjM3, adjM4, adjM5, SplitSizeCols) # clean memory
    gc()
  }else{
    adjM <- as.matrix(adjM)
  }
  ## Replace all values on diagonal (representing a protein's connection with
  ## itself) by 0. This allows to have a sparser matrix and speed up the code.
  diag(adjM) <- rep(FALSE, length(diag(adjM)))

  ## Put back colnames and rownames
  rownames(adjM) <- colnamesIncM
  colnames(adjM) <- colnamesIncM

  ## Return output
  return(adjM)

}
laurafancello/CCs4prot documentation built on July 1, 2022, 1:34 p.m.