#' @export
# Adding new species data to the full set for Naqvi et al. analysis
addSpecies <- function(old_metadata, old_tpm, old_counts, old_orthologs, spec_orth, prefix, spec_tpm, spec_counts, tissues, sexes) {
library(tidyverse)
library(tidyselect)
library(rlang)
source("R/makeOrthologTable.R")
all_orthologs <- makeOrthologTable(old_orthologs, spec_orth, prefix)
# All genes are matched to standardized human gene names
ref_genes_tpm = refGeneNamesTPM
ref_genes_counts = refGeneNamesCounts
# Creating the column name for the new species, standardized to Naqvi et al. naming conventions
new_column <- paste(prefix, ".gene.stable.ID", sep = "")
# Creating tpm dataframe for new species
spec_tpm <- merge(spec_orth, spec_tpm, by = "Gene.name")
spec_tpm$rowname <- spec_tpm$Ref.ID
spec_tpm <- column_to_rownames(spec_tpm)
spec_tpm <- spec_tpm %>% dplyr::select(-c(Gene.name, Ref.ID))
# Creating counts dataframe for new species
spec_counts <- merge(spec_orth, spec_counts, by = "Gene.name")
spec_counts$rowname <- spec_counts$Ref.ID
spec_counts <- column_to_rownames(spec_counts)
spec_counts <- spec_counts %>% dplyr::select(-c(Gene.name, Ref.ID))
# Making metadata dataframe
label <- colnames(spec_tpm)
Species <- c(prefix)
Tissue <- tissues
Sex <- sexes
species_metadata <- cbind(label, Species, Tissue, Sex)
all_metadata <- rbind(old_metadata, species_metadata)
# Adding new species TPM data to TPM data matrix
genes_names_to_use_TPM <- all_orthologs %>%
filter(Gene.name %in% ref_genes_tpm)
spec_tpm <- spec_tpm %>%
mutate(Gene.ID = rownames(spec_tpm))
spec_tpm_to_use <- spec_tpm %>%
semi_join(genes_names_to_use_TPM, by = c("Gene.ID" = new_column)) %>%
left_join(all_orthologs[, c("Gene.name", new_column)], by = c("Gene.ID" = new_column)) %>% # Adds
rename("Gene.ID" = new_column)
all_tpm <- old_tpm %>%
as.data.frame() %>%
mutate(Gene.name = rownames(old_tpm)) %>%
right_join(spec_tpm_to_use, by = c("Gene.name" = "Gene.name")) %>% # only use genes from genes_names_to_use_TPM
column_to_rownames("Gene.name")
# Adding new species counts data to counts data matrix
genes_names_to_use_counts <- all_orthologs %>%
filter(Gene.name %in% ref_genes_counts)
spec_counts <- spec_counts %>%
mutate(Gene.ID = rownames(spec_counts))
spec_counts_to_use <- spec_counts %>%
semi_join(genes_names_to_use_counts, by = c("Gene.ID" = new_column)) %>%
left_join(all_orthologs[, c("Gene.name", new_column)], by = c("Gene.ID" = new_column)) %>% # Adds
rename("Gene.ID" = new_column)
all_counts <- old_counts %>%
as.data.frame() %>%
mutate(Gene.name = rownames(old_counts)) %>%
right_join(spec_counts_to_use, by = c("Gene.name" = "Gene.name")) %>% # only use genes from genes_names_to_use_counts
column_to_rownames("Gene.name")
# Creating final list of objects to return
to_return <- list()
to_return$metadata <- as.data.frame(all_metadata)
to_return$TPM <- as.matrix(all_tpm)
to_return$counts <- as.matrix(all_counts)
to_return$orthologs <- as.data.frame(all_orthologs)
return(to_return)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.