## code to prepare `DATASET` dataset goes here
library(tidyverse)
# Naqvi ortholog table
Naqvi_orth <- read.table("data-raw/one2oneorth_emblids.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t", fill = TRUE)
# Naqvi metadata table
human_metadata <- read.table("data-raw/human.metadata.txt", header = TRUE, stringsAsFactors = FALSE)
nonhuman_metadata <- read.table("data-raw/sample.metadata.txt", header = TRUE, stringsAsFactors = FALSE, sep="\t")
nonhuman_metadata <- subset(nonhuman_metadata, Tissue != "Testis")
nonhuman_metadata$comb_label <- paste(nonhuman_metadata$Species,
nonhuman_metadata$Sex,
nonhuman_metadata$Tissue,
nonhuman_metadata$label,
sep = "_")
Naqvi_metadata <- data.frame(rbind(as.matrix(human_metadata[,c("RUN", "Species", "Tissue", "Sex")]),
as.matrix(nonhuman_metadata[,c("comb_label", "Species", "Tissue", "Sex")])))
colnames(Naqvi_metadata) <- c("label", "Species", "Tissue", "Sex")
Naqvi_metadata$label <- as.character(Naqvi_metadata$label)
Naqvi_metadata$Species <- as.character(Naqvi_metadata$Species)
Naqvi_metadata$Tissue <- as.character(Naqvi_metadata$Tissue)
Naqvi_metadata$Sex <- as.character(Naqvi_metadata$Sex)
# Naqvi count/TPM data
human_counts <- as.matrix(read.table(gzfile("data-raw/gtex.filt.salmon.tximport.adj.counts.txt.gz"), header = TRUE, row.names = 1, sep = "\t")) # Over 100MB, go to https://zenodo.org/record/2658829#.XkrmgJNKhBx
human_tpm <- as.matrix(read.table(gzfile("data-raw/gtex.filt.salmon.tximport.unadj.tpm.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
cyno_counts <- as.matrix(read.table(gzfile("data-raw/cyno.salmon.tximport.counts.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
cyno_tpm <- as.matrix(read.table(gzfile("data-raw/cyno.salmon.tximport.tpm.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
mouse_counts <- as.matrix(read.table(gzfile("data-raw/mouse.salmon.tximport.counts.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
mouse_tpm <- as.matrix(read.table(gzfile("data-raw/mouse.salmon.tximport.tpm.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
rat_counts <- as.matrix(read.table(gzfile("data-raw/rat.salmon.tximport.counts.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
rat_tpm <- as.matrix(read.table(gzfile("data-raw/rat.salmon.tximport.tpm.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
dog_counts <- as.matrix(read.table(gzfile("data-raw/dog.salmon.tximport.counts.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
dog_tpm <- as.matrix(read.table(gzfile("data-raw/dog.salmon.tximport.tpm.txt.gz"), header = TRUE, row.names = 1, sep = "\t"))
Naqvi_tpm <- cbind(human_tpm[subset(Naqvi_orth, Gene.name %in% rownames(human_tpm))$Gene.name, ],
cyno_tpm[subset(Naqvi_orth,Gene.name %in% rownames(human_tpm))$Crab.eating.macaque.gene.stable.ID,],
mouse_tpm[subset(Naqvi_orth,Gene.name %in% rownames(human_tpm))$Mouse.gene.stable.ID,],
rat_tpm[subset(Naqvi_orth,Gene.name %in% rownames(human_tpm))$Rat.gene.stable.ID,],
dog_tpm[subset(Naqvi_orth,Gene.name %in% rownames(human_tpm))$Dog.gene.stable.ID,])
Naqvi_counts <- cbind(human_counts[subset(Naqvi_orth,Gene.name %in% rownames(human_counts))$Gene.name,],
cyno_counts[subset(Naqvi_orth,Gene.name %in% rownames(human_counts))$Crab.eating.macaque.gene.stable.ID,],
mouse_counts[subset(Naqvi_orth,Gene.name %in% rownames(human_counts))$Mouse.gene.stable.ID,],
rat_counts[subset(Naqvi_orth,Gene.name %in% rownames(human_counts))$Rat.gene.stable.ID,],
dog_counts[subset(Naqvi_orth,Gene.name %in% rownames(human_counts))$Dog.gene.stable.ID,])
# My Xenopus laevis data
source("R/getQuantData.R")
prefix <- "XenLae"
sample_names <- c("Male_Brain_M2_3", "Male_Brain_M2_4", "Male_Brain_M2_5", "Male_Brain_M2_6", "Male_Brain_M2_9",
"Female_Brain_F2_1", "Female_Brain_F2_2", "Female_Brain_F2_4", "Female_Brain_F2_5", "Female_Brain_F2_6")
xenLae_data <- getQuantData("data-raw/xenLae_txt2gene.csv", "data-raw/", prefix, sample_names)
xenLae_counts <- xenLae_data$counts
xenLae_tpm <- xenLae_data$TPM
xenLae_orth <- as_tibble(read_delim("data-raw/xenLae_orth_table.csv", "\t", col_names = TRUE, na = c("", "NA")))
xenLae_orth <- dplyr::select(xenLae_orth, Gene.name = gene_name, Ref.ID = ref_ID)
xenLae_LS <- as_tibble(read_delim("data-raw/xenLae_LS_paralogs.csv", "\t", col_names = TRUE, na = c("", "NA")))
xenLae_paralogs <- xenLae_LS %>% filter(!is.na(xenLae_LS$L_chrom) & !is.na(xenLae_LS$S_chrom))
# Combining my data with Naqvi data
source("R/makeOrthologTable.R")
source("R/addSpecies.R")
tissues <- c("Brain")
sexes <- c("Male", "Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female", "Female")
refGeneNamesTPM <- rownames(human_tpm)
refGeneNamesCounts <- rownames(human_counts)
final_data <- addSpecies(Naqvi_metadata, Naqvi_tpm, Naqvi_counts,
Naqvi_orth, xenLae_orth,
prefix, xenLae_tpm, xenLae_counts, tissues, sexes)
doakMetadata <- final_data$metadata
doakTPM <- final_data$TPM
doakCounts <- final_data$counts
doakOrthologs <- final_data$orthologs
source("R/getConservedSexBiasedGenes.R")
conservedGenes <- getConservedSexBiasedGenes(doakMetadata, doakTPM, doakCounts)
usethis::use_data(doakMetadata, doakOrthologs, doakTPM, doakCounts, refGeneNamesTPM, refGeneNamesCounts, conservedGenes, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.