R/tidy-data.R

Defines functions check_dimension set_list_name matrixs_to_df

Documented in matrixs_to_df

#' Tidy matrix to data frame
#' @description This function quickly converts a set of matrices into a data frame.
#' @param x a list of matrix-like object.
#' @param row.names NULL (default) or a string vector for setting row names of matrix.
#' @param col.names NULL (default) or a string vector for setting col names of matrix.
#' @param row.order NULL (default), a numeric vector, "hclust" or "dendrogram" object
#'     for reordering the rows of matrix.
#' @param col.order NULL (default), a numeric vector, "hclust" or "dendrogram" object
#'     for reordering the columns of matrix.
#' @param cluster.type "none" (default), "all", "row" or "col" for reordering matrix.
#' @param cluster.method a character string with the name of agglomeration method. Defaults
#'    to "complete" for numeric matrix, "osa" for ohter type matrix.
#' @param tree.type "none" (default), "all", "row" or "col" for adding cluster tree.
#' @param row.tree.position position of tree for rows, NULL (defualt), "right"  or "left".
#' @param col.tree.position NULL (defualt), "top" or "bottom",  position of tree for columns.
#' @param row.tree.max.height max height of tree for rows, NULL or postive value.
#' @param col.tree.max.height max height of tree for columns, NULL or postive value.
#' @details If there are more than one matrix, cluster only according to the first matrix.
#' @return a data frame.
#' @importFrom utils modifyList
#' @examples
#' matrixs_to_df(x = mtcars)
#' m1 <- matrix(rnorm(100), nrow = 20)
#' m2 <- matrix(rnorm(100), nrow = 20)
#' matrixs_to_df(list(m1 = m1, m2 = m2))
#' @export
matrixs_to_df <- function(x = list(),
                          row.names = NULL,
                          col.names = NULL,
                          row.order = NULL,
                          col.order = NULL,
                          cluster.type = "none",
                          cluster.method = NULL,
                          tree.type = "none",
                          row.tree.position = NULL,
                          col.tree.position = NULL,
                          row.tree.max.height = NULL,
                          col.tree.max.height = NULL
                          ) {
  cluster.type <- match.arg(cluster.type, c("none", "all", "row", "col"))
  tree.type <- match.arg(tree.type, c("none", "all", "row", "col"))
  x <- set_list_name(x)
  if(length(x) == 0) {
    return(new_data_frame())
  }
  name <- names(x)
  first <- x[[1]]
  x <- lapply(x, function(.x) {
    if(!is.matrix(.x)) as.matrix(.x) else .x
  })
  if(length(x) > 1) {
    lapply(x, function(.x) {
      check_dimension(first, .x)
    })
  }
  row.names <- row.names %||% rownames(first) %||% paste0("row", 1:nrow(first))
  col.names <- col.names %||% colnames(first) %||% paste0("col", 1:ncol(first))
  if(length(row.names) != nrow(first))
    stop("'row.names' must have same length as rows of matrix.", call. = FALSE)
  if(length(col.names) != ncol(first))
    stop("'col.names' must have same length as columns of matrix.", call. = FALSE)

  ## check tree.type
  if(tree.type != "none") {
    if(!is.null(row.order) && !inherits(row.order, "hclust")) {
      if(tree.type %in% c("all", "row")) {
        warning("'row.order' not a 'hclust' object.", call. = FALSE)
        tree.type <- switch (tree.type,
                             all = "col",
                             col = "col",
                             "none")
      }
    }
    if(!is.null(col.order) && !inherits(col.order, "hclust")) {
      if(tree.type %in% c("all", "col")) {
        warning("'col.order' not a 'hclust' object.", call. = FALSE)
        tree.type <- switch (tree.type,
                             all = "row",
                             row = "row",
                             "none")
      }
    }
    if(is.null(row.order)) {
      if(tree.type %in% c("all", "row") && !cluster.type %in% c("all", "row")) {
        warning("'tree.type' and 'cluster.type' don't match.", call. = FALSE)
        tree.type <- if(tree.type == "all") "col" else "none"
      }
    }
    if(is.null(col.order)) {
      if(tree.type %in% c("all", "col") && !cluster.type %in% c("all", "col")) {
        warning("'tree.type' and 'cluster.type' don't match.", call. = FALSE)
        tree.type <- if(tree.type == "all") "row" else "none"
      }
    }
  }

  ## check row.order, col.order and cluster.type
  all_not_null <- !is.null(row.order) && !is.null(col.order)
  all_null <- is.null(row.order) && is.null(col.order)
  only_row_null <- is.null(row.order) && !is.null(col.order)
  only_col_null <- !is.null(row.order) && is.null(col.order)

  if(all_not_null) {
    cluster.type <- "none"
  }
  if(only_col_null) {
    cluster.type <- switch (cluster.type,
      none = "none",
      row = "none",
      "col")
    }
  if(only_row_null) {
      cluster.type <- switch (cluster.type,
                              none = "none",
                              col = "none",
                              "row")
  }

  ## handle cluster and tree
  ord <- matrix_reorder(first, cluster.method)
  if(tree.type == "none") {
    row.hc <- col.hc <- NULL
  } else {
    row.hc <- row.order %||% attr(ord, "hclust")$row.hc
    col.hc <- col.order %||% attr(ord, "hclust")$col.hc
    if(tree.type == "row") col.hc <- NULL
    if(tree.type == "col") row.hc <- NULL
  }
  row.tree.data <- col.tree.data <- NULL

  if(!is.null(row.hc)) {
    row.tree.position <- match.arg(row.tree.position %||% "right", c("left", "right"))
    row.tree.data <- tidy_tree_data(row.hc,
                                    row.tree.position,
                                    row.tree.max.height %||% ncol(first) * 0.18)
    if(is.null(row.tree.position) || row.tree.position == "right")
      row.tree.data$x <- row.tree.data$x + ncol(first) + 0.5
  }
  if(!is.null(col.hc)) {
    col.tree.position <- match.arg(col.tree.position %||% "top", c("top", "bottom"))
    col.tree.data <- tidy_tree_data(col.hc,
                                    col.tree.position,
                                    col.tree.max.height %||% nrow(first) * 0.18)
    if(is.null(col.tree.position) || col.tree.position == "top")
      col.tree.data$y <- col.tree.data$y + nrow(first) + 0.5
  }

  if(!is.null(row.order) && !is.numeric(row.order)) {
    row.order <- extract_order(row.order)
  }
  if(!is.null(col.order) && !is.numeric(col.order)) {
    col.order <- extract_order(col.order)
  }

  if(cluster.type != "none") {
    row.order <- switch(cluster.type,
                        all = ord$row.ord,
                        row = ord$row.ord,
                        row.order)
    col.order <- switch(cluster.type,
                        all = ord$col.ord,
                        col = ord$col.ord,
                        col.order)
  }

  if(!is.null(row.order))
    row.names <- row.names[row.order]
  if(!is.null(col.order)) {
    col.names <- col.names[col.order]
  }

  if(!is.null(row.order) || !is.null(col.order)) {
    x <- lapply(x, function(.x) {
      if(is.null(row.order)) row.order <- 1:nrow(first)
      if(is.null(col.order)) col.order <- 1:ncol(first)
      .x[row.order, col.order]
    })
  }

  id <- list(.row.names = rep(row.names, ncol(first)),
             .col.names = rep(col.names, each = nrow(first)),
             .row.id = rep(nrow(first):1, ncol(first)),
             .col.id = rep(1:ncol(first), each = nrow(first))
  )
  out <- modifyList(id, setNames(lapply(x, as.vector), name))
  idx <- intersect(c(".row.names", ".col.names", name, ".row.id", ".col.id"),
                   names(out))
  structure(.Data = new_data_frame(out[idx]),
            .row.names = row.names,
            .col.names = col.names,
            .treedata = list(row.tree.data = row.tree.data,
                             col.tree.data = col.tree.data),
            class = c("matrix_tbl", "tbl_df", "tbl", "data.frame"))
}

#' @noRd
set_list_name <- function(x) {
  if(is.matrix(x) || is.data.frame(x))
    x <- list(x)
  if(!is.list(x)) {
    stop("'x' needs a list object.", call. = FALSE)
  }
  name <- names(x)
  if(is.null(name)) {
    name <- paste0("m", 1:length(x))
  }
  name <- make.unique(name, "")
  names(x) <- name
  x
}
#' @noRd
check_dimension <- function(x, y) {
  x_nm <- as.character(match.call()[["x"]])
  y_nm <- as.character(match.call()[["y"]])
  if(any(dim(x) != dim(y))) {
    msg <- paste0(" Dimension error: ", y_nm, " must have same dimension as ", x_nm)
    stop(msg, call. = FALSE)
  }
}
houyunhuang/ggtriangle documentation built on May 11, 2020, 2:02 p.m.