SeuratForMayer2018: Seurat : R Toolkit for Single Cell Genomics

#' @include seurat.R
NULL
#' SNN Graph Construction
#'
#' Constructs a Shared Nearest Neighbor (SNN) Graph for a given dataset. We
#' first determine the k-nearest neighbors of each cell (defined by k.param *
#' k.scale). We use this knn graph to construct the SNN graph by calculating the
#' neighborhood overlap (Jaccard distance) between every cell and its k.param *
#' k.scale nearest neighbors (defining the neighborhood for each cell as the
#' k.param nearest neighbors).
#'
#' @param object Seurat object
#' @param genes.use A vector of gene names to use in construction of SNN graph
#' if building directly based on expression data rather than a dimensionally
#' reduced representation (i.e. PCs).
#' @param reduction.type Name of dimensional reduction technique to use in
#' construction of SNN graph. (e.g. "pca", "ica")
#' @param dims.use A vector of the dimensions to use in construction of the SNN
#' graph (e.g. To use the first 10 PCs, pass 1:10)
#' @param k.param Defines k for the k-nearest neighbor algorithm
#' @param k.scale Granularity option for k.param
#' @param plot.SNN Plot the SNN graph
#' @param prune.SNN Sets the cutoff for acceptable Jaccard distances when
#' computing the neighborhood overlap for the SNN construction. Any edges with
#' values less than or equal to this will be set to 0 and removed from the SNN
#' graph. Essentially sets the strigency of pruning (0 --- no pruning, 1 ---
#' prune everything).
#' @param print.output Whether or not to print output to the console
#' @param distance.matrix Build SNN from distance matrix (experimental)
#' @param force.recalc Force recalculation of SNN.
#' @importFrom FNN get.knn
#' @importFrom igraph plot.igraph graph.adjlist graph.adjacency E
#' @importFrom Matrix sparseMatrix
#' @return Returns the object with object@@snn filled
#' @export
BuildSNN <- function(
  object,
  genes.use = NULL,
  reduction.type = "pca",
  dims.use = NULL,
  k.param = 10,
  k.scale = 10,
  plot.SNN = FALSE,
  prune.SNN = 1/15,
  print.output = TRUE,
  distance.matrix = NULL,
  force.recalc = FALSE
) {
  if (! is.null(x = distance.matrix)) {
    data.use <- distance.matrix
  } else if (is.null(x = genes.use) && is.null(x = dims.use)) {
    genes.use <- object@var.genes
    data.use <- t(x = as.matrix(x = object@data[genes.use, ]))
  } else if (! is.null(x = dims.use)) {
    data.use <- GetCellEmbeddings(object, reduction.type = reduction.type,
                                    dims.use = dims.use)
  } else if (!is.null(genes.use) && is.null(dims.use)) {
    data.use <- t(x = as.matrix(x = object@data[genes.use, ]))
  } else {
      stop("Data error!")
  }
  parameters.to.store <- as.list(environment(), all = TRUE)[names(formals("BuildSNN"))]
  if (CalcInfoExists(object, "BuildSNN")){
    parameters.to.store$object <- NULL
    old.parameters <- GetAllCalcParam(object, "BuildSNN")
    old.parameters$time <- NULL
    if(all(old.parameters %in% parameters.to.store)){
      warning("Build parameters exactly match those of already computed and stored SNN. To force recalculation, set force.recalc to TRUE.")
      return(object)
    }
  }
  object <- SetCalcParams(object = object,
                          calculation = "BuildSNN",
                          ... = parameters.to.store)
  n.cells <- nrow(x = data.use)
  if (n.cells < k.param) {
    warning("k.param set larger than number of cells. Setting k.param to number of cells - 1.")
    k.param <- n.cells - 1
  }
  # find the k-nearest neighbors for each single cell
  if (is.null(x = distance.matrix)) {
    my.knn <- get.knn(
      data <- as.matrix(x = data.use),
      k = min(k.scale * k.param, n.cells - 1)
    )
    nn.ranked <- cbind(1:n.cells, my.knn$nn.index[, 1:(k.param-1)])
    nn.large <- my.knn$nn.index
  } else {
    warning("Building SNN based on a provided distance matrix")
    n <- nrow(x = distance.matrix)
    k.for.nn <- k.param * k.scale
    knn.mat <- matrix(data = 0, ncol = k.for.nn, nrow = n)
    knd.mat <- knn.mat
    for (i in 1:n){
      knn.mat[i, ] <- order(data.use[i, ])[1:k.for.nn]
      knd.mat[i, ] <- data.use[i, knn.mat[i, ]]
    }
    nn.large <- knn.mat
    nn.ranked <- knn.mat[, 2:k.param]
  }
  w <- CalcSNNSparse(
    cell.names = object@cell.names,
    k.param = k.param,
    nn.large = nn.large,
    nn.ranked = nn.ranked,
    prune.SNN = prune.SNN,
    print.output = print.output
  )
  object@snn <- w
  if (plot.SNN) {
    if (length(x = object@dr$tsne@cell.embeddings) < 1) {
      warning("Please compute a tSNE for SNN visualization. See RunTSNE().")
    } else {
      net <- graph.adjacency(
        adjmatrix = w,
        mode = "undirected",
        weighted = TRUE,
        diag = FALSE
      )
      plot.igraph(
        x = net,
        layout = as.matrix(x = object@dr$tsne@cell.embeddings),
        edge.width = E(graph = net)$weight,
        vertex.label = NA,
        vertex.size = 0
      )
    }
  }
  return(object)
}

# Function to convert the knn graph into the snn graph. Stored in a sparse
# representation.

# @param cell.names    A vector of cell names which will correspond to the row/
#                      column names of the SNN
# @param k.param       Defines nearest neighborhood when computing NN graph
# @param nn.large      Full KNN graph (computed with get.knn with k set to
#                      k.param * k.scale)
# @param nn.ranked     Subset of full KNN graph that only contains the first
#                      k.param nearest neighbors. Used to define Jaccard
#                      distances between any two cells
# @param prune.snn     Sets the cutoff for acceptable Jaccard distances when
#                      computing the neighborhood overlap for the SNN
#                      construction. Any edges with values less than or equal to
#                      this will be set to 0 and removed from the SNN graph.
#                      Essentially sets the strigency of pruning (0 --- no
#                      pruning, 1 --- prune everything).
# @param print.output  Whether or not to print output to the console
# @return              Returns an adjacency matrix representation of the SNN
#                      graph
#
#' @importFrom utils txtProgressBar setTxtProgressBar
#
CalcSNNSparse <- function(
  cell.names,
  k.param,
  nn.large,
  nn.ranked,
  prune.SNN,
  print.output
) {
  n.cells <- length(cell.names)
  counter <- 1
  idx1 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
  idx2 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
  edge.weight <- vector(mode = "double", length = n.cells ^ 2 / k.param)
  id <- 1
  # fill out the adjacency matrix w with edge weights only between your target
  # cell and its k.scale*k.param-nearest neighbors
  # speed things up (don't have to calculate all pairwise distances)
  # define the edge weights with Jaccard distance
  if (print.output) {
    print("Constructing SNN")
    pb <- txtProgressBar(min = 0, max = n.cells, style = 3)
  }
  for (i in 1:n.cells) {
    for (j in 1:ncol(x = nn.large)) {
      s <- intersect(x = nn.ranked[i, ], y = nn.ranked[nn.large[i, j], ])
      u <- union(nn.ranked[i, ], nn.ranked[nn.large[i, j], ])
      e <- length(x = s) / length(x = u)
      if (e > prune.SNN) {
        idx1[id] <- i
        idx2[id] <- nn.large[i, j]
        edge.weight[id] <- e
        id <- id + 1
      }
    }
    if (print.output) {
      setTxtProgressBar(pb = pb, value = i)
    }
  }
  if (print.output) {
    close(con = pb)
  }
  idx1 <- idx1[! is.na(x = idx1) & idx1 != 0]
  idx2 <- idx2[! is.na(x = idx2) & idx2 != 0]
  edge.weight <- edge.weight[! is.na(x = edge.weight) & edge.weight != 0]
  w <- sparseMatrix(
    i = idx1,
    j = idx2,
    x = edge.weight,
    dims = c(n.cells, n.cells)
  )
  diag(x = w) <- 1
  rownames(x = w) <- cell.names
  colnames(x = w) <- cell.names
  return(w)
}

# This function calculates the pairwise connectivity of clusters.

# @param object  Seurat object containing the snn graph and cluster assignments
# @return        matrix with all pairwise connectivities calculated

CalcConnectivity <- function(object) {
  SNN <- object@snn
  cluster.names <- unique(x = object@ident)
  num.clusters <- length(x = cluster.names)
  connectivity <- matrix(data = 0, nrow = num.clusters, ncol = num.clusters)
  rownames(x = connectivity) <- cluster.names
  colnames(x = connectivity) <- cluster.names
  n <- 1
  for (i in cluster.names) {
    for (j in cluster.names[-(1:n)]) {
      subSNN <- SNN[
        match(x = WhichCells(object = object, ident = i), colnames(x = SNN)),
        match(x = WhichCells(object = object, ident = j), rownames(x = SNN))
      ]
      if (is.object(x = subSNN)) {
        connectivity[i, j] <- sum(subSNN) / (nrow(x = subSNN) * ncol(x = subSNN))
      } else {
        connectivity[i, j] <- mean(x = subSNN)
      }
    }
    n <- n + 1
  }
  return(connectivity)
}

mayer-lab/SeuratForMayer2018 documentation built on May 25, 2019, 9:34 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

mayer-lab/SeuratForMayer2018
Seurat : R Toolkit for Single Cell Genomics

R/snn.R
In mayer-lab/SeuratForMayer2018: Seurat : R Toolkit for Single Cell Genomics

R Package Documentation

Browse R Packages

We want your feedback!

mayer-lab/SeuratForMayer2018 Seurat : R Toolkit for Single Cell Genomics

R/snn.R In mayer-lab/SeuratForMayer2018: Seurat : R Toolkit for Single Cell Genomics

R Package Documentation

Browse R Packages

We want your feedback!

mayer-lab/SeuratForMayer2018
Seurat : R Toolkit for Single Cell Genomics

R/snn.R
In mayer-lab/SeuratForMayer2018: Seurat : R Toolkit for Single Cell Genomics