#' Return the scaled transcript counts for a set of genes.
#'
#' @param sleuth_obj An existing Sleuth object as generated by sleuth_prep() and fit by sleuth_fit()
#' @param genes A vector containing gene names as strings that should be returned.
#'
#' @return Filtered scaled transcript counts for a specific set of genes.
#' @export
#' @examples
#' # Given a Sleuth object, run the Kruskal-Wallis test of the est_counts factor between isoform groups for all samples.
#' filtered_scaled_transcript_counts(so_nominal, "ABH1", "sex", iqf = 0.25)
filtered_scaled_transcript_counts <- function(sleuth_obj, genes) {
# Obtain est_counts for all genes from the sleuth_obj
# if sleuth object gene agg is false:
if (sleuth_obj$gene_mode == FALSE){
scaled_trancript_counts <- sleuth_obj$obs_norm_filt %>% mutate(est_counts = est_counts * sleuth_obj$est_counts_sf)
# Get all the transcripts for the specific gene
gene_transcript_df <- sleuth_obj$target_mapping[sleuth_obj$target_mapping$ext_gene %in% genes, ]
}
else {
# Obtain est_counts for all genes from the sleuth_obj
scaled_trancript_counts <- sleuth_obj$obs_norm_filt %>% mutate(est_counts = scaled_reads_per_base * sleuth_obj$est_counts_sf)
# Get all the transcripts for the specific gene
gene_transcript_df <- sleuth_obj$target_mapping[sleuth_obj$target_mapping$ext_gene %in% genes, ]
gene_transcript_df <- gene_transcript_df[-1]
colnames(gene_transcript_df) <- c("target_id", "ext_gene")
}
# Filter all the transcript est_counts based on the gene_transcript_df
scaled_trancript_counts <- scaled_trancript_counts[scaled_trancript_counts$target_id %in% gene_transcript_df$target_id, ]
scaled_trancript_counts <- right_join(gene_transcript_df, scaled_trancript_counts, by = "target_id")
# Confirm that the transcripts based on the selected gene do exist in the sleuth_obj
if (any(is.na(scaled_trancript_counts))) {
return("Check to confirm that the gene list is correct.")
}
return(scaled_trancript_counts)
}
#' Run a Kruskal-Wallis One-Way ANOVA on non-parametric data at the isoform level
#' for a selected gene. The quantile of data the test is run upon is calculated
#' from the the est_counts of all the data and the resulting value is used to
#' filter based upon the mean of the est_counts for each individual transcript.
#' For example, for a quantile probability of 0.25, the function will determine
#' the quantile probability for 0.25 and assume this value to be 50. The
#' function will then determine the mean est_counts for each transcript of the
#' gene and if the mean is greater than the quantile calculated value of 50, it
#' will be included when running the test.
#'
#' @param sleuth_obj An existing Sleuth object as generated by sleuth_prep() and fit by sleuth_fit()
#' @param gene The name of a gene as a string to asses the isoform expression level between samples
#' @param grouping A column name in the sample to covariate data frame provided when running sleuth_prep / sleuth_interpret
#' @param iqf A numeric value between 0 and 1 (inclusive) to denote the quantile of data to run the test upon.
#' @param threshold A numeric value for the p-value cutoff when running the resulting pairwise comparison.
#'
#' @return Results of a kruskal-wallis test at the isoform level
#' @export
#' @examples
#' # Given a Sleuth object, run the Kruskal-Wallis test of the est_counts factor between isoform groups for all samples.
#' sleuth_kruskal_wallis(so_nominal, "ABH1", "sex", iqf = 0.25)
sleuth_kruskal_wallis <- function(sleuth_obj, gene, grouping, iqf = 0, threshold = 0.05) {
if (length(gene) >= 2) {
return("Ensure the gene parameter contains only a single gene.")
}
# Retrieve the scaled transcript counts for the gene passed to the function
scaled_trancript_counts <- filtered_scaled_transcript_counts(sleuth_obj = sleuth_obj, genes = gene)
# Merge the sample to covariate data with the filtered transcripts
s2c_df <- so_dropped_nominal$sample_to_covariates
scaled_trancript_counts <- left_join(s2c_df, scaled_trancript_counts)
# Paste together the target id and s2c group to create a new var called measure factor
# Confirm that the user-selected grouping is present in the s2c data frame
if (grouping %in% colnames(s2c_df)) {
scaled_trancript_counts$sample <- paste(scaled_trancript_counts$sample, grouping, sep = "_")
} else {
return("Confirm that grouping is present in the sample to covariates.")
}
# Create a named list with the format (transcript name, mean est_counts) for each transcript
transcript_count_mean <- list()
for (transcript in unique(scaled_trancript_counts$target_id)) {
count_mean <- mean(scaled_trancript_counts[scaled_trancript_counts$target_id == transcript, "est_counts"])
transcript_count_mean[[transcript]] <- c(transcript_count_mean[[transcript]], count_mean)
}
# Calculate the quantile value based on all the est_counts for every transcript for the selected gene with a probability of the user selected iqf parameter
quant_val <- quantile(scaled_trancript_counts$est_counts, probs = iqf)
# Remove all the transcripts that do not have an average est_count that is greater than the quant_val
scaled_trancript_counts <- scaled_trancript_counts[scaled_trancript_counts$target_id %in% c(names(transcript_count_mean[transcript_count_mean > quant_val])), ]
# Calculate the quantile based upon the user-selected iqf that the means that must be calculated must exceed
assign("filtered_scaled_trancript_counts", scaled_trancript_counts, envir = .GlobalEnv)
kw_stat <- kruskal.test(est_counts ~ sample, data = scaled_trancript_counts)
print(kw_stat)
# Run the wilcox pairwise test
sleuth_wilcox_pairwise(filtered_scaled_trancript_counts, grouping, cutoff = threshold)
}
#' Run a Wilcox pairwise comparison for a group across est_counts
#'
#' @param data An existing Sleuth object as generated by sleuth_prep() and fit by sleuth_fit()
#' @param grouping A column name in the sample to covariate dataframe provided when running sleuth_prep / sleuth_interpret
#' @param cutoff The p-value cutoff to be used for the letter based output
#'
#' @return Results of a pairwise Wilcox test to identify the significantly different groups given as letters, where different letters indicate significance
#' @examples
#' # Given a dataframe that contains est_counts for a particular gene and grouping information, run the Wilcox pairwise comparison of the est_counts factor between the selected group.
#' sleuth_kruskal_wallis(scaled_transcript_counts, "sex")
sleuth_wilcox_pairwise <- function(data, grouping, cutoff = 0.05) {
pw_w_test <- pairwise.wilcox.test(data$est_counts, data[, grouping], p.adjust.method = "BH")
pw_w_test$p.value[which(is.nan(pw_w_test$p.value))] <- 1
pw_w_test_letter <- multcompLetters(fullPTable(pw_w_test$p.value), compare = "<", threshold = cutoff, Letters = letters, reverse = FALSE)
pw_w_test_letter$Letters
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.