#' Cumulatively collapse along a vector
#'
#' @param x The character vector to collapse
#' @param collapse The character to use for collapsing
#'
#' @examples
#'
#' x <- c("","data","exon","i")
#' collapse_along(x)
#'
collapse_along <- function(x,
collapse = "/") {
out <- vector(length = length(x))
for(i in 1:length(x)) {
out[i] <- paste(x[1:i], collapse = collapse)
}
out
}
#' Transpose a gene x sample data.frame without losing a gene_name or sample_name column
#'
#' @param df The data.frame to transpose
#' @param gene_col The column used for gene names. Default = "gene_name".
#' @param sample_col The column used for sample names. Default = "sample_name".
#'
flip_table <- function(df,
gene_col = "gene_name",
sample_col = "sample_name") {
if(gene_col %in% names(df)) {
genes <- unlist(df[,gene_col])
df_t <- t(df[,names(df) != gene_col])
samples <- rownames(df_t)
df_out <- cbind(samples, as.data.frame(df_t))
names(df_out) <- c(sample_col,genes)
rownames(df_out) <- NULL
df_out
} else if(sample_col %in% names(df)) {
samples <- unlist(df[,sample_col])
df_t <- t(df[,names(df) != sample_col])
genes <- rownames(df_t)
df_out <- cbind(genes, as.data.frame(df_t))
names(df_out) <- c(gene_col, samples)
rownames(df_out) <- NULL
df_out
} else {
print(paste("No column named",gene_col,"or",sample_col,"found."))
}
}
#' Generate a rainbow palette with variation in saturation and value
#'
#' @param n_colors The number of colors to generate
#'
varibow <- function(n_colors) {
sats <- rep_len(c(0.55,0.7,0.85,1),length.out = n_colors)
vals <- rep_len(c(1,0.8,0.6),length.out = n_colors)
cols = sub("FF$","",grDevices::rainbow(n_colors, s = sats, v = vals))
cols = stringr::str_pad(cols, width=7, side="right", pad="0")
return(cols)
}
#' Convert values to colors along a color ramp
#'
#' @param x a numeric vector to be converted to colors
#' @param min_val a number that's used to set the low end of the color scale (default = 0)
#' @param max_val a number that's used to set the high end of the color scale. If NULL (default),
#' use the highest value in x
#' @param colorset a set of colors to interpolate between using colorRampPalette
#' (default = c("darkblue","dodgerblue","gray80","orangered","red"))
#' @param missing_color a color to use for missing (NA) values.
#' @return a character vector of hex color values generated by colorRampPalette. Color values will
#' remain in the same order as x.
values_to_colors <- function(x,
min_val = NULL,
max_val = NULL,
colorset = c("darkblue","dodgerblue","gray80","orange","orangered"),
missing_color = "black") {
heat_colors <- grDevices::colorRampPalette(colorset)(1001)
if(is.null(max_val)) {
max_val <- max(x, na.rm = T)
} else {
x[x > max_val] <- max_val
}
if (is.null(min_val)) {
min_val <- min(x, na.rm = T)
} else {
x[x < min_val] <- min_val
}
if(sum(x == min_val, na.rm = TRUE) == length(x)) {
colors <- rep(heat_colors[1],length(x))
} else {
if(length(x) > 1) {
if(var(x, na.rm = TRUE) == 0) {
colors <- rep(heat_colors[500], length(x))
} else {
heat_positions <- unlist(round((x - min_val) / (max_val - min_val) * 1000 + 1, 0))
colors <- heat_colors[heat_positions]
}
} else {
colors <- heat_colors[500]
}
}
if(!is.null(missing_color)) {
colors[is.na(colors)] <- grDevices::rgb(t(grDevices::col2rgb(missing_color)/255))
}
colors
}
#' Caculate default stats for sifter and then write to tome
#'
#' In this case, the target tome will need to have exon and intron data matrices, as well as annotations with the base "cluster".
#'
#' @param tome Path to the target tome file.1
#' @param overwrite Whether or not to overwrite existing annotations. Default is NULL, which will use the global settings defined with set_scrattch.io_global_overwrite().
#'
write_tome_sifter_stats <- function(tome,
overwrite = NULL) {
## Read in the relevant data from tome
genes <- read_tome_gene_names(tome)
samples <- read_tome_sample_names(tome)
anno <- read_tome_anno(tome)
exons <- read_tome_dgCMatrix(tome, "data/t_exon")
introns <- read_tome_dgCMatrix(tome, "data/t_intron")
countsIE <- exons+introns
log2cpmIE <- logCPM(countsIE)
## Labels for all clusters used in the statistics
all_clusters <- unique(anno$cluster_id)
all_clusters <- all_clusters[order(all_clusters)]
allClust <- paste0("cluster_",all_clusters)
## Generate the count statistics
count_gt0 <- matrix(0, ncol = length(all_clusters), nrow = nrow(log2cpmIE))
count_gt1 <- sums <- medianmat <- count_gt0
for(i in 1:length(all_clusters)) {
cluster <- all_clusters[i]
cluster_samples <- which(anno$cluster_id == cluster)
cluster_data <- log2cpmIE[,cluster_samples]
cluster_counts <- countsIE[,cluster_samples]
count_gt0[,i] <- Matrix::rowSums(cluster_counts > 0)
count_gt1[,i] <- Matrix::rowSums(cluster_counts > 1)
sums[,i] <- Matrix::rowSums(cluster_counts)
medianmat[,i] <- apply(cluster_data,1,median)
}
colnames(count_gt0) <- colnames(count_gt1) <- colnames(sums) <-
colnames(medianmat) <- allClust
count_gt0 <- cbind(gene = genes, as.data.frame(count_gt0))
count_gt1 <- cbind(gene = genes, as.data.frame(count_gt1))
sums <- cbind(gene = genes, as.data.frame(sums))
medianmat <- cbind(gene = genes, as.data.frame(medianmat))
count_n <- anno %>%
arrange(cluster_id) %>%
group_by(cluster_id) %>%
summarise(n_cells = n())
## Write the count statistics
try(write_tome_stats(stats = count_gt0, stats_name = "count_gt0", tome = tome, overwrite = overwrite))
try(write_tome_stats(stats = count_gt1, stats_name = "count_gt1", tome = tome, overwrite = overwrite))
try(write_tome_stats(stats = count_n, stats_name = "count_n", tome = tome, overwrite = overwrite))
try(write_tome_stats(stats = sums, stats_name = "sums", tome = tome, overwrite = overwrite))
try(write_tome_stats(stats = medianmat, stats_name = "medians", tome = tome, overwrite = overwrite))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.