R/view-composers.R

Defines functions remove_views add_paraview get_weight sample_nystrom_row add_juxtaview get_neighbors add_views create_view create_initial_view

Documented in add_juxtaview add_paraview add_views create_initial_view create_view remove_views

# mistyR view composition functions
# Copyleft (ɔ) 2020 Jovan Tanevski <jovan.tanevski@uni-heidelberg.de>

#' \strong{Start here:} create a basic view composition with an intraview
#'
#' This function is the first one to be called when building a mistyR workflow,
#' starting from view composition. The initial view describes the intraview of
#' the sample.
#'
#' @param data A \code{data.frame} or a \code{tibble} containing expression
#'     information for all markers of interest (in named columns) for each
#'     spatial unit (in rows).
#' @param unique.id A \code{character} vector. Identifier of the current sample.
#'     If not provided (\code{unique.id = NULL}) then an id is automatically
#'     generated by calculating the md5 hash of \code{table}.
#'
#' @return An initial mistyR view composition containing an \emph{intraview} list
#'     item named described with abbreviation "intra" and \emph{data} as provided
#'     in \code{data} and a \emph{misty.uniqueid} list item containing the
#'     provided or automatically calculated \code{unique.id}. A cache folder for
#'     the sample will be automatically created in the working directory as a
#'     subfolder of \file{.misty.temp/} with the same name as \code{unique.id}.
#'
#' @family view composition functions
#'
#' @examples
#' # Create an intrinsic view from the first sample in the dataset synthetic.
#'
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#'
#' create_initial_view(expr)
#' @export
create_initial_view <- function(data, unique.id = NULL) {
  init.list <- list(intraview = list(abbrev = "intra", data = data))

  misty.uniqueid <- ifelse(is.null(unique.id),
    digest::digest(data, "md5"),
    unique.id
  )

  view <- append(init.list, list(misty.uniqueid = misty.uniqueid))

  return(view)
}

# make a misty.view class?

#' Create a custom view
#'
#' Create a custom view from a \code{data.frame} or a \code{tibble}.
#'
#' Creating a custom view does not add it to the current view composition.
#'
#' @param name Name of the view. A character vector.
#' @param data A \code{data.frame} or a \code{tibble} with named variables in
#'     columns and rows for each spatial unit ordered as in the intraview.
#' @param abbrev Abbreviated name. A character vector.
#'
#' @return A new mistyR view. A list with a single \code{name}d item described by
#'     the provided \code{abbrev}iation and \emph{data} containing the provided
#'     \code{data}.
#'
#' @seealso \code{\link{add_views}()} for adding created views
#'     to a view composition.
#'
#' @family view composition functions
#'
#' @examples
#' # Create a view from the mean expression of the 10 nearest neighbors of
#' # each cell.
#'
#' library(dplyr)
#' library(purrr)
#' library(distances)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # find the 10 nearest neighbors
#' neighbors <- nearest_neighbor_search(distances(as.matrix(pos)), k = 11)[-1, ]
#'
#' # calculate the mean expression of the nearest neighbors for all markers
#' # for each cell in expr
#' nnexpr <- seq_len(nrow(expr)) %>%
#'   map_dfr(~ expr %>%
#'     slice(neighbors[, .x]) %>%
#'     colMeans())
#'
#' create_view("nearest", nnexpr, "nn")
#' @export
create_view <- function(name, data, abbrev = name) {
  new.list <- list(list(abbrev = abbrev, data = data))
  names(new.list)[1] <- name
  return(new.list)
}

#' Add custom views to the current view composition
#'
#' Add one or more custom views to the current view composition.
#'
#' @param current.views the current view composition.
#' @param new.views a view or a list of views created with
#'     \code{\link{create_view}()} or otherwise.
#'
#' @return A mistyR view composition containing an union of views from
#'     \code{current.views} and \code{new.views}.
#'
#' @seealso \code{\link{create_initial_view}()} for
#'     starting a view composition, with an intraview,
#'     \code{\link{create_view}()} for creating a custom view.
#'
#' @family view composition functions
#'
#' @examples
#'
#' # create random views
#' view1 <- data.frame(marker1 = rnorm(100, 10, 2), marker2 = rnorm(100, 15, 3))
#' view2 <- data.frame(marker1 = rnorm(100, 10, 5), marker2 = rnorm(100, 15, 5))
#'
#' misty.views <- create_initial_view(view1)
#'
#' new.view <- create_view("dummyname", view2, "dname")
#' add_views(misty.views, new.view)
#'
#' misty.views %>% add_views(create_view("dummyname", view2, "dname"))
#' @export
add_views <- function(current.views, new.views) {
  assertthat::assert_that(length(current.views) >= 1,
    !is.null(current.views[["intraview"]]),
    !is.null(current.views[["misty.uniqueid"]]),
    msg = "Intraview is missing."
  )

  assertthat::assert_that(is.list(new.views),
    msg = "The new views are not in a list or vector."
  )

  assertthat::assert_that(length(new.views %>%
    unlist(recursive = FALSE)) %% 2 == 0,
  msg = "The new view is malformed. Consider using create_view()."
  )

  assertthat::assert_that(!any(names(new.views) %in% names(current.views)),
    msg = "The list of new views contains a duplicate view name."
  )

  view.abbrev <- current.views %>%
    rlist::list.remove(c("misty.uniqueid")) %>%
    purrr::map_chr(~ .x[["abbrev"]])

  new.view.abbrev <- new.views %>% purrr::map_chr(~ .x[["abbrev"]])

  assertthat::assert_that(!any(new.view.abbrev %in% view.abbrev),
    msg = "The list of new views contains a duplicate abbreviation."
  )

  new.views %>% purrr::walk(function(new.view) {
    # check for naming of each element, abbreviation and existance of a table
    assertthat::assert_that(is.list(new.view),
      !is.null(new.view[["abbrev"]]),
      is.character(new.view[["abbrev"]]),
      !is.null(new.view[["data"]]),
      (is.data.frame(new.view[["data"]]) | tibble::is_tibble(new.view[["data"]])),
      msg = "The new view is malformed. Consider using create_view()."
    )

    # check for row compatibility
    assertthat::assert_that(nrow(current.views[["intraview"]][["data"]]) ==
      nrow(new.view[["data"]]),
    msg = "The new view should have the same number of rows as the intraview."
    )
  })

  # update
  return(append(current.views, new.views))
}




#' Get neighbors from Delauney triangulation
#'
#' Helper function for \code{\link{add_juxtaview}()}.
#'
#' @param ddobj deldir object.
#' @param id cell/spatial unit id.
#'
#' @return a vector of neighbors of \code{id}.
#' @noRd
get_neighbors <- function(ddobj, id) {
  dplyr::union(
    ddobj$delsgs$ind1[which(ddobj$delsgs$ind2 == id)],
    ddobj$delsgs$ind2[which(ddobj$delsgs$ind1 == id)]
  )
}


#' Generate and add a juxtaview to the current view composition
#'
#' The juxtaview captures the expression of all markers within the immediate
#' neighborhood of a spatial unit.
#'
#' The neighborhood of each spatial unit is estimated by constructing a graph by
#' 2D Delaunay triangulation following by removal of edges with length larger than
#' \code{neighbor.thr}. For each spatial unit the juxtaview contains the sum of
#' expressions across its estimated neighbors for each marker.
#'
#' @param current.views the current view composition.
#' @param positions a \code{data.frame}, \code{tibble} or a \code{matrix}
#'     with named coordinates in columns and rows for each spatial unit ordered
#'     as in the intraview.
#' @param neighbor.thr a threshold value used to indicate the largest distance
#'     between two spatial units that can be considered as neighboring.
#' @param prefix a prefix to add to the column names.
#' @param cached a \code{logical} indicating whether to cache the calculated view
#'     after the first calculation and to reuse a previously cached view if it
#'     already exists for this sample.
#' @param verbose a \code{logical} controlling the verbosity of the output of the
#'     function during execution.
#'
#' @return A mistyR view composition with added juxtaview.
#'
#' @seealso \code{\link{create_initial_view}()} for
#'     starting a view composition with an intraview only.
#'
#' @family view composition functions
#'
#' @examples
#' # Create a view composition of an intraview and a juxtaview.
#'
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # compose
#' misty.views <- create_initial_view(expr) %>% add_juxtaview(pos, neighbor.thr = 1.5)
#'
#' # preview
#' str(misty.views[["juxtaview.1.5"]])
#' @export
add_juxtaview <- function(current.views, positions, neighbor.thr = 15,
                          prefix = "", cached = FALSE, verbose = TRUE) {
  expr <- current.views[["intraview"]][["data"]]

  cache.location <- R.utils::getAbsolutePath(paste0(
    ".misty.temp", .Platform$file.sep,
    current.views[["misty.uniqueid"]]
  ))

  juxta.cache.file <- paste0(
    cache.location, .Platform$file.sep,
    "juxta.view.", neighbor.thr, ".rds"
  )

  if (cached && !dir.exists(cache.location)) {
    dir.create(cache.location, recursive = TRUE, showWarnings = TRUE)
  }

  if (cached & file.exists(juxta.cache.file)) {
    juxta.view <- readr::read_rds(juxta.cache.file)
  } else {
    if (verbose) message("\nComputing triangulation")
    delaunay <- deldir::deldir(as.data.frame(positions))

    if (verbose) message("\nGenerating juxtaview")
    juxta.view <- seq(nrow(expr)) %>% furrr::future_map_dfr(function(cid) {
      alln <- get_neighbors(delaunay, cid)
      # suboptimal placement of dists, but makes conflict if out of scope
      # probably due to lazy evaluations
      dists <- distances::distances(as.data.frame(positions))
      actualn <- alln[which(dists[alln, cid] <= neighbor.thr)]
      data.frame(t(colSums(expr[actualn, ])))
    }, .progress = verbose)

    if (cached) readr::write_rds(juxta.view, juxta.cache.file)
  }

  return(current.views %>% add_views(create_view(
    paste0("juxtaview.", neighbor.thr),
    juxta.view %>% dplyr::rename_with(~paste0(prefix, .x)),
    paste0("juxta.", neighbor.thr)
  )))
}



#' Sample a Nystrom approximated row
#'
#' Helper function for \code{\link{add_paraview}()}.
#'
#' Kumar et al. 2012. Sampling methods for the Nystrom method.
#' Journal of Machine Learning Research 13(1):981-1006
#' \url{https://www.jmlr.org/papers/volume13/kumar12a/kumar12a.pdf}
#'
#' @param K.approx a list containing elemnts \code{s} - indexes of sampled
#'     columns, \code{C} - sample of columns \code{s} of the original matrix, and
#'    \code{W.plus} - pseudo inverse of W (sample of rows \code{s} from \code{C}).
#' @param k row to be approximated.
#'
#' @return approximated values of row k.
#' @noRd
sample_nystrom_row <- function(K.approx, k) {
  cw <- seq(ncol(K.approx$W.plus)) %>%
    purrr::map_dbl(~ K.approx$C[k, ] %*% K.approx$W.plus[, .x])

  cwct <- seq(nrow(K.approx$C)) %>%
    purrr::map_dbl(~ cw %*% t(K.approx$C)[, .x])

  cwct
}


#' Get weights for paraview
#'
#' Helper function for \code{\link{add_paraview}()}.
#'
#' @param family the family of distances.
#' @param distances  the distances vecor/matrix object to calculate weights from.
#' @param parameter parameter of the family function.
#'
#' @return calculated weights with the same shape as \code{distances}.
#'
#' @noRd
get_weight <- function(family = c(
                         "gaussian", "exponential",
                         "linear", "constant"
                       ),
                       distances, parameter, zoi) {
  expr.family <- match.arg(family)

  distances[distances < zoi] <- Inf
  dim.orig <- dim(distances)

  switch(expr.family,
    "gaussian" = {
      exp(-distances^2 / parameter^2)
    },
    "exponential" = {
      exp(-distances / parameter)
    },
    "linear" = {
      weights <- pmax(0, 1 - distances / parameter)
      dim(weights) <- dim.orig
      weights
    },
    "constant" = {
      weights <- as.numeric(!is.infinite(distances))
      dim(weights) <- dim.orig
      weights
    }
  )
}


#' Generate and add a paraview to the current view composition
#'
#' The paraview captures the expression of all markers in  the broader
#' tissue structure.
#'
#' The paraview is generated by weighted sum of the expression of all spatial
#' units for each marker. The weights for each spatial unit \var{i} are dependent
#' on the \code{family} which can be one of "gaussian", "exponential", "linear"
#' or "constant".
#'
#' If "gaussian" the weights are calculated based on the distance to the spatial
#' unit \var{j} and the parameter \code{l} using the radial basis function
#' \deqn{w_{ij} = e^{-\frac{d_{ij}^2}{l^2}}}{w(i,j) = exp(-d(i,j)^2/l^2)}
#'
#' The parameter \code{l} here denotes the "effective" radius of influence.
#'
#' If "exponential" the weights are calculated based on the distance to the spatial
#' unit \var{j} and the parameter \code{l} using the exponential function
#' \deqn{w_{ij} = e^{-\frac{d_{ij}}{l}}}{w(i,j) = exp(-d(i,j)/l)}
#'
#' The parameter \code{l} here denotes signaling length. For more information
#' consult Oyler-Yaniv et. al. Immunity 46(4) 2017.
#'
#' If "linear" the weights are calculated based on the distance to the spatial
#' unit \var{j} and the parameter \code{l} using the linear function
#' \deqn{w_{ij} = 1- d(i,j)/l}{w(i,j) = 1- d(i,j)/l}
#'
#' The parameter \code{l} here denotes the intersect of the linear function. For
#' distances larger than \code{l} the weight is equal to 0.
#'
#' If "constant" the weights are always 1. The parameter \code{l} here denotes
#' the number of nearest neighbors to take into account if \code{nn} is not
#' defined.
#'
#' Since the generation of the paraview requires the calculation of pairwise distances
#' of all spatial units it can take a significant amount of computation time. The
#' parameters \code{approx} and \code{nn} can be set to speed up the calculation
#' by approximation. The approximation can be achieved by using the Nyström
#' low-rank approximation method or by limiting the calculation of the paraview
#' to a number of nearest neighbors around each spatial unit.
#'
#' If the value of \code{approx} is between 0 and 1 it will be interpreted as
#' fraction of the number of spatial units. Discrete values above 1 will be
#' interpreted as the size of the approximation block. The number of nearest
#' neighbors \code{nn} around each spatial unit are determined using a fast
#' nearest neighbor search.
#'
#' If both \code{approx} and \code{nn} have non-null values, \code{nn}
#' has priority and an approximation based on fast nearest neighbor
#' search will be used to generate the paraview.
#'
#' @inheritParams add_juxtaview
#' @param l effective radius of influence of expression in the broader tissue
#' structure.
#' @param zoi spatial units with distance smaller than the zone of indifference
#' will not be taken into account when generating the paraview.
#' @param family the family f functions used to generate weights. (see Details)
#' @param approx rank of the Nyström approximation matrix. (see Details)
#' @param nn the number of spatial units to be used for approximating the
#' paraview using a fast nearest neighbor search. (see Details)
#'
#'
#' @return A mistyR view composition with added paraview with parameter \code{l}.
#'
#' @seealso \code{\link{create_initial_view}()} for
#'     starting a view composition with an intraview only.
#'
#' @family view composition functions
#'
#' @examples
#' # Create a view composition of an intraview and a paraview with radius 10.
#'
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # compose
#' misty.views <- create_initial_view(expr) %>% add_paraview(pos, l = 10)
#'
#' # preview
#' str(misty.views[["paraview.10"]])
#' @export
add_paraview <- function(current.views, positions, l, zoi = 0,
                         family = c(
                           "gaussian", "exponential",
                           "linear", "constant"
                         ),
                         approx = 1, nn = NULL, prefix = "",
                         cached = FALSE, verbose = TRUE) {
  dists <- distances::distances(as.data.frame(positions))
  expr <- current.views[["intraview"]][["data"]]

  cache.location <- R.utils::getAbsolutePath(paste0(
    ".misty.temp", .Platform$file.sep,
    current.views[["misty.uniqueid"]]
  ))

  para.cache.file <- paste0(
    cache.location, .Platform$file.sep,
    "para.view.", l, ".rds"
  )

  if (cached && !dir.exists(cache.location)) {
    dir.create(cache.location, recursive = TRUE, showWarnings = TRUE)
  }

  if (match.arg(family) == "constant" & is.numeric(l) & is.null(nn)) {
    nn <- round(l)
  }

  if (cached & file.exists(para.cache.file)) {
    para.view <- readr::read_rds(para.cache.file)
  } else {
    if (is.null(nn)) {
      if (approx == 1) {
        if (verbose) message("\nGenerating paraview")
        para.view <- seq(nrow(expr)) %>%
          furrr::future_map_dfr(~ data.frame(t(colSums(expr[-.x, ] *
            get_weight(family, dists[, .x][-.x], l, zoi)))),
          .options = furrr::furrr_options(packages = "distances"),
          .progress = verbose
          )
      } else {
        if (approx < 1) approx <- base::round(approx * ncol(dists))

        if (verbose) {
          message("\nApproximating RBF matrix using the Nystrom method")
        }

        assertthat::assert_that(requireNamespace("MASS", quietly = TRUE),
          msg = "The package MASS is required to approximate the paraview using
          the Nystrom method."
        )

        # single Nystrom approximation expert, given RBF with parameter l
        s <- sort(sample.int(n = ncol(dists), size = approx))
        C <- get_weight(family, dists[, s], l, zoi)
        # pseudo inverse of W
        W.plus <- MASS::ginv(C[s, ])
        # return Nystrom list
        K.approx <- list(s = s, C = C, W.plus = W.plus)
        para.view <- seq(nrow(expr)) %>%
          furrr::future_map_dfr(~ data.frame(t(colSums(expr[-.x, ] *
            sample_nystrom_row(K.approx, .x)[-.x]))),
          .progress = verbose
          )
      }
    } else {
      if (verbose) {
        message(
          "\nGenerating paraview using ", nn,
          " nearest neighbors per unit"
        )
      }
      para.view <- seq(nrow(expr)) %>%
        furrr::future_map_dfr(function(rowid) {
          knn <- distances::nearest_neighbor_search(dists, nn + 1,
            query_indices = rowid
          )[-1, 1]
          data.frame(t(colSums(expr[knn, ] *
            get_weight(family, dists[knn, rowid], l, zoi))))
        },
        .options = furrr::furrr_options(packages = "distances"),
        .progress = verbose
        )
    }
    if (cached) readr::write_rds(para.view, para.cache.file)
  }
  
  return(current.views %>% add_views(create_view(
    paste0("paraview.", l),
    para.view %>% dplyr::rename_with(~paste0(prefix, .x)),
    paste0("para.", l)
  )))
}


#' Remove views from the current view composition
#'
#' Remove one or more views from the view composition.
#'
#' The intraview and the unique id cannot be removed with this function.
#'
#' @param current.views the current view composition.
#' @param view.names the names of one or more views to be removed.
#'
#' @return A mistyR view composition with \code{view.names} views removed.
#'
#' @family view composition functions
#'
#' @examples
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # compose
#' misty.views <- create_initial_view(expr) %>%
#'   add_juxtaview(pos, neighbor.thr = 1.5) %>%
#'   add_paraview(pos, l = 10)
#'
#' # preview
#' str(misty.views)
#'
#' # remove juxtaview and preview
#' misty.views %>%
#'   remove_views("juxtaview.1.5") %>%
#'   str()
#'
#' # remove juxtaview and paraview and preview
#' misty.views %>%
#'   remove_views(c("juxtaview.1.5", "paraview.10")) %>%
#'   str()
#' @export
remove_views <- function(current.views, view.names) {
  to.match <- !(view.names %in% c("intraview", "misty.uniqueid"))
  view.indexes <- match(view.names[to.match], names(current.views))
  if (length(view.indexes) > 0) {
    current.views %>% rlist::list.remove(view.indexes)
  } else {
    current.views
  }
}
saezlab/misty documentation built on March 25, 2024, 4:11 p.m.