R/misc.R

Defines functions workIn setSavedir groupStat groupStatI list2graph get_attr split_matrix list2dt setrownames setcolnames

Documented in get_attr groupStat groupStatI list2dt list2graph setcolnames setrownames setSavedir split_matrix workIn

#' Rename Column Names of a Data Frame or Matrix
#'
#' This function renames the column names of a data frame or matrix to the
#' specified names.
#'
#' @param object A data frame or matrix whose column names will be renamed.
#' @param nm A character vector containing the new names for the columns.
#'
#' @return A data frame or matrix with the new column names.
#' @export
setcolnames <- function(object, nm) {
  if (length(nm) != ncol(object)) {
    stop("Length of 'nm' must equal the number of columns of 'object'")
  }
  colnames(object) <- nm
  object
}

#' Rename Row Names of a Data Frame or Matrix
#'
#' This function renames the row names of a data frame or matrix to the
#' specified names.
#'
#' @param object A data frame or matrix whose row names will be renamed.
#' @param nm A character vector containing the new names for the rows.
#'
#' @return A data frame or matrix with the new row names.
#' @export
setrownames <- function(object, nm) {
  if (length(nm) != nrow(object)) {
    stop("Length of 'nm' must equal the number of rows of 'object'")
  }
  rownames(object) <- nm
  object
}

#' Convert a List with Vector Values to a Long Data.table
#'
#' This function converts a named list with vector values in each element to a
#' long data.table. The list is first flattened into a single vector, and then
#' the data.table is created with two columns: one for the name of the original
#' list element and another for the value.
#'
#' @param x A named list where each element contains a vector of values.
#'
#' @return A long data.table with two columns: 'name' and 'value'.
#' @importFrom data.table data.table
#' @export
#' @examples
#' library(easybio)
#' list2dt(list(a = c(1, 1), b = c(2, 2)))
list2dt <- function(x) {
  data.table(name = rep(names(x), sapply(x, length)), value = unlist(x))
}


#' Split a Matrix into Smaller Sub-matrices by Column or Row
#'
#' This function splits a matrix into multiple smaller matrices by column or row.
#' It is useful for processing large matrices in chunks, such as when performing
#' analysis on a single computer with limited memory.
#'
#' @param matrix A numeric or logical matrix to be split.
#' @param chunk_size The number of columns or rows to include in each smaller matrix.
#' @param column  Divided by column(default is `TRUE`)
#'
#' @return A list of smaller matrices, each with `chunk_size` columns or rows.
#' @export
#' @examples
#' library(easybio)
#' split_matrix(mtcars, chunk_size = 2)
#' split_matrix(mtcars, chunk_size = 5, column = FALSE)
split_matrix <- function(matrix, chunk_size, column = TRUE) {
  n <- ifelse(column, ncol(matrix), nrow(matrix))
  chunk_number <- ifelse(n %% chunk_size == 0,
    n / chunk_size - 1,
    floor(n / chunk_size)
  )
  message(sprintf("matrix was divided to %d chunks", chunk_number + 1))
  start_end <- lapply(0:chunk_number, function(x) {
    c(1, chunk_size) + (chunk_size * x)
  })
  start_end[[chunk_number + 1]][[2]] <- n
  matrix_divided <- lapply(start_end, function(x) {
    if (column) {
      matrix[, x[[1]]:x[[2]], drop = FALSE]
    } else {
      matrix[x[[1]]:x[[2]], , drop = FALSE]
    }
  })

  matrix_divided
}

#' Retrieve Attributes from an R Object
#'
#' This function extracts a specified attribute from an R object.
#'
#' @param x An R object that has attributes.
#' @param attr_name The name of the attribute to retrieve.
#'
#' @return The value of the attribute with the given name.
#' @export
get_attr <- function(x, attr_name) {
  attributes(x)[[attr_name]]
}


#' Convert a Named List into a Graph Based on Overlap
#'
#' This function creates a graph from a named list, where the edges are determined
#' by the overlap between the elements of the list. Each node in the graph represents
#' an element of the list, and the weight of the edge between two nodes is the number
#' of overlapping elements between the two corresponding lists.
#'
#' @param nodes A named list where each element is a vector.
#'
#' @return A data.table representing the graph, with columns for the node names
#'   (`node_1` and `node_2`) and the weight of the edge (`interWeight`).
#' @import data.table
#' @export
list2graph <- function(nodes) {
  comb2 <- combn(names(nodes), m = 2, simplify = FALSE)
  inter <- lapply(comb2, \(x) length(intersect(nodes[[x[[1]]]], nodes[[x[[2]]]])))

  data.table(
    node1 = sapply(comb2, \(x) x[[1]]),
    node2 = sapply(comb2, \(x) x[[2]]),
    interWeight = as.integer(inter)
  )
}


#' Perform Summary Analysis by Group Using an column Index
#'
#' This function applies a specified function to each group defined by an column index,
#' and returns a summary of the results. It is useful for summarizing data by
#' group when the groups are defined by an  column index.
#'
#' @param f A function that takes a single argument and returns a summary of the data.
#' @param x A data frame or matrix containing the data to be summarized.
#' @param idx A list of indices or group names that define the column groups.
#'
#' @return A list containing the summary statistics for each group.
#' @export
#' @examples
#' library(easybio)
#' groupStatI(f = \(x) x + 1, x = mtcars, idx = list(c(1, 10), 2))
groupStatI <- function(f, x, idx) {
  sapply(idx, \(.x) force(f)(x[.x]), simplify = FALSE)
}

#' Perform Summary Analysis by Group Using Regular Expressions
#'
#' This function applies a specified function to each group defined by a regular expression
#' pattern applied to the names of a data object. It is useful for summarizing data when
#' groups are defined by a pattern in the names rather than a specific column or index.
#'
#' @param f A function that takes a single argument and returns a summary of the data.
#' @param x A data frame or matrix containing the data to be summarized.
#' @param xname A character vector containing the names of the variables in `x`.
#' @param patterns A list of regular expressions that define the groups.
#'
#' @return A list containing the summary statistics for each group.
#' @export
#' @examples
#' library(easybio)
#' groupStat(f = \(x) x + 1, x = mtcars, patterns = list("mp", "t"))
groupStat <- function(f, x, xname = colnames(x), patterns) {
  idx <- lapply(patterns, \(.x) which(xname %like% .x))
  groupStatI(f, x, idx)
}


#' Set a Directory for Saving Files
#'
#' This function sets a directory path for saving files, creating the directory if it
#' does not already exist. The directory path is created with the given arguments, which
#' are passed directly to `file.path()`.
#'
#' @param ... Arguments to be passed to `file.path()` to construct the directory path.
#'
#' @return The path to the newly created or existing directory.
#' @export
setSavedir <- function(...) {
  savedir <- file.path(...)
  if (!dir.exists(savedir)) dir.create(savedir, recursive = TRUE)

  return(savedir)
}

#' Perform Operations in a Specified Directory and Return to the Original Directory
#'
#' This function allows you to perform operations in a specified directory and then
#' return to the original directory. It is useful when you need to work with files or
#' directories that are located in a specific location, but you want to return to the
#' original working directory after the operation is complete.
#'
#' @param dir The directory path in which to operate. If the directory does not exist,
#'   it will be created recursively.
#' @param expr An R expression to be evaluated within the specified directory.
#'
#' @return The result of evaluating the expression within the specified directory.
#' @export
workIn <- function(dir, expr) {
  oldwd <- getwd()
  on.exit(setwd(oldwd))
  if (!dir.exists(dir)) dir.create(dir, recursive = TRUE)
  setwd(dir)

  res <- eval(substitute(expr))
  res
}

Try the easybio package in your browser

Any scripts or data that you put into this service are public.

easybio documentation built on April 12, 2025, 1:59 a.m.