#' @export
addXenLaeData <- function() {
library(BiocManager) # Comp bio superpackage
library(tidyverse) # Data analysis/graphing
library(stringr)
library(tximport)
library(GenomicFeatures) # Need mysql-client installed on system
# Xenopus laevis data - made .csv files with python script (found in Thesis git repo) to make tx2gene file from the output of StringTie's merge function
# First files added were M_9 and F_1
# Using a tx2gene file generated using python for "summarizeToGene" function
xenLae_tx2gene <- as_tibble(read.delim("data-raw/xenLae_txt2gene.csv",header=TRUE))
# Individual files for each read
xenLae_M_3_tximport <- tximport(c("data-raw/xenLae_M_3_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_M_4_tximport <- tximport(c("data-raw/xenLae_M_4_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_M_5_tximport <- tximport(c("data-raw/xenLae_M_5_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_M_6_tximport <- tximport(c("data-raw/xenLae_M_6_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_M_9_tximport <- tximport(c("data-raw/xenLae_M_9_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_F_1_tximport <- tximport(c("data-raw/xenLae_F_1_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_F_2_tximport <- tximport(c("data-raw/xenLae_F_2_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_F_4_tximport <- tximport(c("data-raw/xenLae_F_4_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_F_5_tximport <- tximport(c("data-raw/xenLae_F_5_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
xenLae_F_6_tximport <- tximport(c("data-raw/xenLae_F_6_quant.sf"), type = "salmon",txOut=TRUE,countsFromAbundance = "lengthScaledTPM") %>%
summarizeToGene(tx2gene=xenLae_tx2gene, countsFromAbundance="lengthScaledTPM")
# Formatting Xenopus laevis data
# List of all above files
all_tximport <- list(xenLae_M_3_tximport, xenLae_M_4_tximport, xenLae_M_5_tximport, xenLae_M_6_tximport, xenLae_M_9_tximport,
xenLae_F_1_tximport, xenLae_F_2_tximport, xenLae_F_4_tximport, xenLae_F_5_tximport, xenLae_F_6_tximport)
col_names <- c("Gene.name",
"XenLae_Male_Brain_M2_3", "XenLae_Male_Brain_M2_4", "XenLae_Male_Brain_M2_5", "XenLae_Male_Brain_M2_6", "XenLae_Male_Brain_M2_9",
"XenLae_Female_Brain_F2_1", "XenLae_Female_Brain_F2_2", "XenLae_Female_Brain_F2_4", "XenLae_Female_Brain_F2_5", "XenLae_Female_Brain_F2_6")
# Converts each tximport object (a double type) to a tibble/dataframe, converting row names to a column (gene_name), then adds each to a list
counts_list = list()
for (i in seq_along(all_tximport)) {
rownames <- row.names(all_tximport[[i]][['counts']])
counts_list[[i]] <- as_tibble(all_tximport[[i]][['counts']]) %>%
add_column(Gene.name = rownames) %>%
dplyr::select(Gene.name, V1)
}
# Iterates through the list of data tibbles, merging into a single set
xenLae_counts <- counts_list[[1]]
for (i in c(2:length(counts_list))) {
xenLae_counts <- full_join(xenLae_counts,
counts_list[[i]],
by = "Gene.name"
)
}
# Renames the columns to their respective sample names
colnames(xenLae_counts) <- col_names
# Converting TPM data to tibbles
tpm_list = list()
for (i in seq_along(all_tximport)) {
rownames <- row.names(all_tximport[[i]][['abundance']])
tpm_list[[i]] <- as_tibble(all_tximport[[i]][['abundance']]) %>%
add_column(Gene.name = rownames) %>%
dplyr::select(Gene.name, V1)
}
# Merging TPM datasets
xenLae_tpm <- tpm_list[[1]]
for (i in c(2:length(tpm_list))) {
xenLae_tpm <- full_join(xenLae_tpm,
tpm_list[[i]],
by = "Gene.name"
)
}
# Naming columns
colnames(xenLae_tpm) <- col_names
# Creating a table of gene names and their corresponding gene IDs for L, S, and non-L/S genes
xenLae_orth <- as_tibble(read.delim("data-raw/xenLae_orth_table.csv",header=TRUE,na.strings=c("","NA")))
xenLae_orth <- dplyr::select(xenLae_orth, Gene.name = gene_name, Ref.ID = ref_ID)
# Creating a table of just L and S genes, matched when both chromosomal copies are present
xenLae_LS <- as_tibble(read.delim("data-raw/xenLae_LS_paralogs.csv",header=TRUE,na.strings=c("","NA")))
# Creating a table that takes L/S genes and only keeps the ones that have both L and S versions present
xenLae_paralogs <- xenLae_LS %>% filter(!is.na(xenLae_LS$L_chrom) & !is.na(xenLae_LS$S_chrom))
return(list(xenLae_tpm, xenLae_counts, xenLae_orth, xenLae_LS, xenLae_paralogs))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.