R/util.R

Defines functions check_matrix read_csc_mtx read_tenx_metrics convert_char_na h5_list_transpose h5_list_cell_metadata get_list_path set_list_path stm

Documented in check_matrix convert_char_na get_list_path h5_list_cell_metadata h5_list_transpose read_csc_mtx read_tenx_metrics set_list_path stm

#' Write a stderr message with a leading date/time stamp
#'
#' @param x a character object with the message to display
#'
#' @return no return
#' @export
stm <- function(x) {
  assertthat::assert_that(class(x) == "character")
  assertthat::assert_that(length(x) == 1)
  
  write(paste0("[",Sys.time(),"] ",x), stderr())
}

#' Set a list value using path-style targeting
#'
#' @param l a list object
#' @param target a character object specifying the "path" to the target.
#' @param value an object or value to insert at the target location
#'
#' @return a list object
#' @export
#'
#' @examples
#'
#' l <- list(forest = list(country = "USA",
#'                         maple = list(height = 100)))
#'
#' l <- set_list_path(l,
#'                    target = "/forest/spruce/height",
#'                    value = 30)
#'
#' l <- set_list_path(l,
#'                    target = "/forest/maple/diameter",
#'                    value = 6)
#'
#' l <- set_list_path(l,
#'                    target = "/valley/country",
#'                    value = "Canada")
#'
set_list_path <- function(l,
                          target,
                          value) {
  if(target == "/") {
    l <- list(value)
  } else {
    target <- sub("^/","",target)
    if(grepl("/",target)) {
      parent_target <- sub("/.+","",target)
      nest_target <- sub("^[^/]+", "", target)
      l[[parent_target]] <- set_list_path(l[[parent_target]],
                                          target = nest_target,
                                          value)
    } else {
      if(length(l) == 0) {
        l <- list(value)
        names(l) <- target
      } else {
        if(target %in% names(l)) {
          l[[target]] <- value
        } else {
          new_l <- list(value)
          names(new_l) <- target
          l <- c(l, new_l)
        }
        
      }
      
      
    }
  }
  
  l
}

#' Retrieve an object from a list using path-style targeting
#'
#' @param l a list object
#' @param target  a character object specifying the "path" to the target.
#'
#' @return a list object
#' @export
#'
#' @examples
#'
#' l <- list(forest = list(country = "USA",
#'                         maple = list(height = 100)))
#'
#' maple_height <- get_list_path(l,
#'                               target = "/forest/maple/height")
#'
#' forest_list <- get_list_path(l,
#'                              target = "/forest")
#'
#' forest_country <- get_list_path(l,
#'                                 target = "/forest/country")
#'
get_list_path <- function(l,
                          target) {
  if(target == "/") {
    return(l)
  } else {
    target <- sub("^/","",target)
    if(grepl("/",target)) {
      parent_target <- sub("/.+","",target)
      nest_target <- sub("^[^/]+", "", target)
      l <- get_list_path(l[[parent_target]],
                         target = nest_target)
    } else {
      return(l[[target]])
    }
  }
  
  l
}

#' Extract a data.frame of cell metadata from an h5_list object
#'
#' @param h5_list an h5_list object
#'
#' @return a data.frame containing barcodes and all metadata stored in h5_list$matrix$observations.
#' @export
#'
h5_list_cell_metadata <- function(h5_list) {
  
  assertthat::assert_that(class(h5_list) == "list")
  assertthat::assert_that("matrix" %in% names(h5_list))
  
  meta <- data.frame(barcodes = h5_list$matrix$barcodes,
                     stringsAsFactors = FALSE)
  
  if("observations" %in% names(h5_list$matrix)) {
    meta <- cbind(meta,
                  as.data.frame(h5_list$matrix$observations,
                                stringsAsFactors = FALSE))
  }
  
  meta
}


#' Transpose an h5_list object
#'
#' This will transpose sparse matrices stored in an h5_list object, and retain the association of observations with columns and features with rows.
#'
#' @param h5_list an h5_list object generated by running h5dump() on a .h5 file.
#' @param sparse_matrices A character vector specifying which objects in the h5_list are sparse matrices which should be transposed.
#'
#' @return a modified h5_list object
#' @export
#'
h5_list_transpose <- function(h5_list,
                              sparse_matrices = "matrix") {
  
  assertthat::assert_that(class(h5_list) == "list")
  assertthat::assert_that(sum(sparse_matrices %in% names(h5_list)) == length(h5_list))
  
  for(mat in sparse_matrices) {
    use_obs <- "observations" %in% names(h5_list[[mat]])
    use_feat <- "features" %in% names(h5_list[[mat]])
    
    if(use_obs) {
      obs <- h5_list[[mat]]$observations
    }
    if(use_feat) {
      feat <- h5_list[[mat]]$features
    }
    h5_list <- BarMixer::h5_list_convert_to_dgCMatrix(h5_list,
                                                      target = mat)
    
    sparse_mat <- paste0(mat, "_dgCMatrix")
    h5_list[[sparse_mat]] <- Matrix::t(h5_list[[sparse_mat]])
    
    if(use_obs) {
      h5_list[[mat]]$features <- obs
    }
    if(use_feat) {
      h5_list[[mat]]$observations <- feat[names(feat) != "id"]
    }
    
    h5_list <- BarMixer::h5_list_convert_from_dgCMatrix(h5_list,
                                                        target = mat)
    
  }
  
  h5_list
}

#' Convert "NA" character entries to actual NAs
#'
#' @param x a character vector
#'
#' @return a character vector with NAs
#' @export
#'
convert_char_na <- function(x) {
  assertthat::assert_that(class(x) == "character")
  x[x == "NA"] <- NA
  x
}

#' Read and correct formatting of a 10x metrics_summary.csv file
#'
#' @param metrics_csv path to a metrics_summary.csv file generated by cellranger
#'
#' @return a data.frame of cellranger run metrics
#' @export
read_tenx_metrics <- function(metrics_csv) {
  
  metrics <- read.csv(metrics_csv)
  names(metrics) <- tolower(gsub("\\.","_",names(metrics)))
  metrics <- lapply(metrics,
                    function(x) {
                      gsub("[,%]","",x)
                    })
  metrics <- lapply(metrics,
                    as.numeric)
  
  as.data.frame(metrics)
  
}


#' Read a CITE-seq-Count .mtx directory as a standard R matrix
#'
#' @param csc_dir a directory containing matrix.mtx, barcodes.tsv, and features.tsv. gzipped versions will also work.
#'
#' @return a matrix
#' @export
read_csc_mtx <- function(csc_dir) {
  mtx_file <- list.files(csc_dir, pattern = "matrix.mtx")
  bc_file <- list.files(csc_dir, pattern = "barcodes.tsv")
  feat_file <- list.files(csc_dir, pattern = "features.tsv")
  
  mat <- Matrix::readMM(file.path(csc_dir,mtx_file))
  mat <- as(mat, "matrix")
  rownames(mat) <- data.table::fread(file.path(csc_dir, feat_file), header = FALSE)[[1]]
  colnames(mat) <- data.table::fread(file.path(csc_dir, bc_file), header = FALSE)[[1]]
  
  mat
}

#' Simple function to check for matrix or dgCMatrix classes for assertions
#'
#' @param x an object to check for matrix or dgCMatrix classes
#'
#' @return a logical value
#'
check_matrix <- function(x) {
  res <- FALSE
  x_classes <- class(x)
  if("matrix" %in% x_classes) {
    res <- TRUE
  }
  if("dgCMatrix" %in% x_classes) {
    res <- TRUE
  }
  res
}
AllenInstitute/BarMixer documentation built on Dec. 17, 2021, 8:42 a.m.