#' @export
# Takes a tx2gene, filepath for quant.sf files, sample prefix (e.g. "HomoSapiens"), and list of sample names (e.g. "Male_1", "12345", "sample_A")
# Returns a tibble of count data for all subjects, and a tibble of TPM data for all subjects
getQuantData <- function(tx2gene_path, salmon_path, species_prefix, sample_names) {
# Normal installation
require(BiocManager)
require(tidyverse)
require(stringr)
# BiocManager installation
require(tximport)
require(GenomicFeatures) # Need mysql-client installed on system
# Reading in the tx2gene object
tx2gene <- as_tibble(read.delim(tx2gene_path, header = TRUE))
# Looping through the quant.sf output files from salmon, creating tximport files that are better structured
file_names <- dir(salmon_path, pattern =".sf")
tximport_list <- vector("list", length = length(file_names))
for (i in 1:length(file_names)) {
tximport_list[[i]] <- tximport(paste(salmon_path, file_names[i], sep = ""), type = "salmon", txOut = TRUE, countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM")
}
# Generating names for columns in the tables of quantification data
col_names <- vector("list", length = length(sample_names) + 1)
col_names[1] <- "Gene.name"
for (i in seq_along(sample_names)) {
col_names[i+1] <- paste(species_prefix, sample_names[i], sep = "_")
}
# Creating a list of the tibble objects with count data for individual subjects
counts_sep = vector("list", length = length(tximport_list))
for (i in 1:length(tximport_list)) {
rownames <- row.names(tximport_list[[i]][['counts']])
counts_sep[[i]] <- as_tibble(tximport_list[[i]][['counts']]) %>%
add_column(Gene.name = rownames) %>%
dplyr::select(Gene.name, V1)
}
# Merging all individual subject count data into a single dataframe
all_counts <- counts_sep[[1]]
for (i in c(2:length(counts_sep))) {
all_counts <- full_join(all_counts,
counts_sep[[i]],
by = "Gene.name")
}
colnames(all_counts) <- col_names
# Creating a list of the tibble objects with TPM data for individual subjects
tpm_sep = vector("list", length = length(tximport_list))
for (i in 1:length(tximport_list)) {
rownames <- row.names(tximport_list[[i]][['counts']])
tpm_sep[[i]] <- as_tibble(tximport_list[[i]][['counts']]) %>%
add_column(Gene.name = rownames) %>%
dplyr::select(Gene.name, V1)
}
# Merging all individual subject TPM data into a single dataframe
all_tpm <- tpm_sep[[1]]
for (i in c(2:length(tpm_sep))) {
all_tpm <- full_join(all_tpm,
tpm_sep[[i]],
by = "Gene.name")
}
colnames(all_tpm) <- col_names
data_list <- list()
data_list$TPM <- all_tpm
data_list$counts <- all_counts
return(data_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.