R/misc.R

Defines functions write_tome_sifter_stats values_to_colors varibow flip_table collapse_along

Documented in collapse_along flip_table values_to_colors varibow write_tome_sifter_stats

#' Cumulatively collapse along a vector
#'
#' @param x The character vector to collapse
#' @param collapse The character to use for collapsing
#'
#' @examples
#'
#' x <- c("","data","exon","i")
#' collapse_along(x)
#'
collapse_along <- function(x,
                           collapse = "/") {

  out <- vector(length = length(x))

   for(i in 1:length(x)) {
    out[i] <- paste(x[1:i], collapse = collapse)
  }

  out
}


#' Transpose a gene x sample data.frame without losing a gene_name or sample_name column
#'
#' @param df The data.frame to transpose
#' @param gene_col The column used for gene names. Default = "gene_name".
#' @param sample_col The column used for sample names. Default = "sample_name".
#'
flip_table <- function(df,
                       gene_col = "gene_name",
                       sample_col = "sample_name") {

  if(gene_col %in% names(df)) {
    genes <- unlist(df[,gene_col])
    df_t <- t(df[,names(df) != gene_col])
    samples <- rownames(df_t)
    df_out <- cbind(samples, as.data.frame(df_t))
    names(df_out) <- c(sample_col,genes)
    rownames(df_out) <- NULL

    df_out

  } else if(sample_col %in% names(df)) {

    samples <- unlist(df[,sample_col])
    df_t <- t(df[,names(df) != sample_col])
    genes <- rownames(df_t)
    df_out <- cbind(genes, as.data.frame(df_t))
    names(df_out) <- c(gene_col, samples)
    rownames(df_out) <- NULL

    df_out

  } else {
    print(paste("No column named",gene_col,"or",sample_col,"found."))
  }

}

#' Generate a rainbow palette with variation in saturation and value
#'
#' @param n_colors The number of colors to generate
#'
varibow <- function(n_colors) {
  sats <- rep_len(c(0.55,0.7,0.85,1),length.out = n_colors)
  vals <- rep_len(c(1,0.8,0.6),length.out = n_colors)
  cols = sub("FF$","",grDevices::rainbow(n_colors, s = sats, v = vals))
  cols = stringr::str_pad(cols, width=7, side="right", pad="0")
    
    return(cols)
}

#' Convert values to colors along a color ramp
#'
#' @param x a numeric vector to be converted to colors
#' @param min_val a number that's used to set the low end of the color scale (default = 0)
#' @param max_val a number that's used to set the high end of the color scale. If NULL (default),
#' use the highest value in x
#' @param colorset a set of colors to interpolate between using colorRampPalette
#' (default = c("darkblue","dodgerblue","gray80","orangered","red"))
#' @param missing_color a color to use for missing (NA) values.
#' @return a character vector of hex color values generated by colorRampPalette. Color values will
#' remain in the same order as x.
values_to_colors <- function(x,
                             min_val = NULL,
                             max_val = NULL,
                             colorset = c("darkblue","dodgerblue","gray80","orange","orangered"),
                             missing_color = "black") {

  heat_colors <- grDevices::colorRampPalette(colorset)(1001)

  if(is.null(max_val)) {
    max_val <- max(x, na.rm = T)
  } else {
    x[x > max_val] <- max_val
  }
  if (is.null(min_val)) {
    min_val <- min(x, na.rm = T)
  } else {
    x[x < min_val] <- min_val
  }

  if(sum(x == min_val, na.rm = TRUE) == length(x)) {
    colors <- rep(heat_colors[1],length(x))
  } else {
    if(length(x) > 1) {
      if(var(x, na.rm = TRUE) == 0) {
        colors <- rep(heat_colors[500], length(x))
      } else {
        heat_positions <- unlist(round((x - min_val) / (max_val - min_val) * 1000 + 1, 0))

        colors <- heat_colors[heat_positions]
      }
    } else {
      colors <- heat_colors[500]
    }
  }

  if(!is.null(missing_color)) {
    colors[is.na(colors)] <- grDevices::rgb(t(grDevices::col2rgb(missing_color)/255))
  }

  colors
}




#' Caculate default stats for sifter and then write to tome
#'
#' In this case, the target tome will need to have exon and intron data matrices, as well as annotations with the base "cluster".
#'
#' @param tome Path to the target tome file.1
#' @param overwrite Whether or not to overwrite existing annotations. Default is NULL, which will use the global settings defined with set_scrattch.io_global_overwrite().
#'
write_tome_sifter_stats <- function(tome,
                                    overwrite = NULL) {


  ## Read in the relevant data from tome
  genes     <- read_tome_gene_names(tome)
  samples   <- read_tome_sample_names(tome)
  anno      <- read_tome_anno(tome)
  exons     <- read_tome_dgCMatrix(tome, "data/t_exon")
  introns   <- read_tome_dgCMatrix(tome, "data/t_intron")
  countsIE  <- exons+introns
  log2cpmIE <- logCPM(countsIE)

  ## Labels for all clusters used in the statistics
  all_clusters <- unique(anno$cluster_id)
  all_clusters <- all_clusters[order(all_clusters)]
  allClust     <- paste0("cluster_",all_clusters)

  ## Generate the count statistics
  count_gt0 <- matrix(0, ncol = length(all_clusters), nrow = nrow(log2cpmIE))
  count_gt1 <- sums <- medianmat <- count_gt0

  for(i in 1:length(all_clusters)) {
    cluster         <- all_clusters[i]
    cluster_samples <- which(anno$cluster_id == cluster)
    cluster_data    <- log2cpmIE[,cluster_samples]
    cluster_counts  <- countsIE[,cluster_samples]
    count_gt0[,i]   <- Matrix::rowSums(cluster_counts > 0)
    count_gt1[,i]   <- Matrix::rowSums(cluster_counts > 1)
    sums[,i]        <- Matrix::rowSums(cluster_counts)
    medianmat[,i]   <- apply(cluster_data,1,median)
  }
  colnames(count_gt0) <- colnames(count_gt1) <- colnames(sums) <-
    colnames(medianmat) <- allClust

  count_gt0 <- cbind(gene = genes, as.data.frame(count_gt0))
  count_gt1 <- cbind(gene = genes, as.data.frame(count_gt1))
  sums      <- cbind(gene = genes, as.data.frame(sums))
  medianmat <- cbind(gene = genes, as.data.frame(medianmat))

  count_n <- anno %>%
    arrange(cluster_id) %>%
    group_by(cluster_id) %>%
    summarise(n_cells = n())

  ## Write the count statistics
  try(write_tome_stats(stats = count_gt0, stats_name = "count_gt0", tome = tome, overwrite = overwrite))
  try(write_tome_stats(stats = count_gt1, stats_name = "count_gt1", tome = tome, overwrite = overwrite))
  try(write_tome_stats(stats = count_n, stats_name = "count_n", tome = tome, overwrite = overwrite))
  try(write_tome_stats(stats = sums, stats_name = "sums", tome = tome, overwrite = overwrite))
  try(write_tome_stats(stats = medianmat, stats_name = "medians", tome = tome, overwrite = overwrite))


}
AllenInstitute/scrattch.io documentation built on Nov. 17, 2021, 10:06 a.m.