#' Description of function
#' @param metadata a table of metadata
#' @param tpmData a table of TPM data, rows are genes and columns are samples
#' @param countsData a table of count data, rows are genes and columns are samples
#' @export
getConservedSexBiasedGenes <- function(metadata, tpmData, countsData) {
require(RcppGSL)
require(tidyverse)
require(edgeR)
require(limma)
require(qvalue)
tpmData[, 1:ncol(tpmData)] <- sapply(tpmData[, 1:ncol(tpmData)], as.character)
tpmData[, 1:ncol(tpmData)] <- sapply(tpmData[, 1:ncol(tpmData)], as.double)
countsData[, 1:ncol(countsData)] <- sapply(countsData[, 1:ncol(countsData)], as.character)
countsData[, 1:ncol(countsData)] <- sapply(countsData[, 1:ncol(countsData)], as.double)
# List of tissues to analyze
source("R/getTissues.R")
tissues <- getTissues(metadata)
# Remove samples from TPM/counts datasets that aren't in metadata
#tpmData <- tpmData %>%
# t() %>%
# as.data.frame() %>%
# rownames_to_column(var = "sampleLabel") %>%
# semi_join(metadata, by = c("sampleLabel" = "label")) %>%
# column_to_rownames(var = "sampleLabel") %>%
# t() %>%
# as.matrix()
#countsData <- countsData %>%
# t() %>%
# as.data.frame() %>%
# rownames_to_column(var = "sampleLabel") %>%
# semi_join(metadata, by = c("sampleLabel" = "label")) %>%
# column_to_rownames(var = "sampleLabel") %>%
# t() %>%
# as.matrix()
# Median tpm of replicates for each tissue-species-sex combination (e.g. median of all female-mouse-heart samples)
# A list of lists of matrices of median values (species_mf_med --> tissues --> sexes --> median values by gene-species pair)
species_mf_med <- list()
for (tissue in tissues) {
spec5_tissue_tpm_temp <- tpmData[, subset(metadata, Tissue == tissue)$label] # TPM data for the samples with data for the tissue in question
tmm <- DGEList(counts = spec5_tissue_tpm_temp) # edgeR: Creates a DGEList object (stores gene counts and relevant sample info)
tmm <- calcNormFactors(tmm, doWeighting = FALSE) # edgeR: Calculates normalization factors ("weighted trimmed mean of M-values (to the reference)") to scale library sizes -- NOT weighted here
tmm_cpm <- cpm(tmm) # edgeR: Computes Counts Per Million using normalized library sizes
print(tissue)
# Creating an empty matrix for each tissue-sex pair
# List --> (tissue) list --> (sex) matrix --> rows = genes, columns = species
species_mf_med[[tissue]] <- list()
species_mf_med[[tissue]][["Male"]] <- matrix(nrow = nrow(countsData),
ncol = length(unique(metadata$Species)),
dimnames = list(rownames(countsData),
unique(metadata$Species)))
species_mf_med[[tissue]][["Female"]] <- matrix(nrow = nrow(countsData),
ncol = length(unique(metadata$Species)),
dimnames = list(rownames(countsData),
unique(metadata$Species)))
# Filling in median values calculated across replicates for each tissue-sex-species combination
# Uses Counts Per Million object
for (species in unique(metadata$Species)){
print(species)
species_mf_med[[tissue]][["Male"]][, species] <- apply(tmm_cpm[, subset(metadata, (Species == species) & (Tissue == tissue) & (Sex == "Male"))$label, drop=FALSE],
1, # rowwise
median)
species_mf_med[[tissue]][["Female"]][, species] <- apply(tmm_cpm[, subset(metadata, (Species == species) & (Tissue == tissue) & (Sex == "Female"))$label, drop=FALSE],
1, # rowwise
median)
}
}
source("R/getSexBiasStats.R")
toptab_specsep_list <- list() #
toptab_spec5_list <- list() #
for (tissue in tissues) {
print(tissue)
weightopt <- 'y'
pcaopt <- 'n'
spec5_meta <- subset(metadata, Tissue == tissue)
rownames(spec5_meta) <- spec5_meta$label
spec5_tissue_tpm_temp <- tpmData[, rownames(spec5_meta)]
spec5_tissue_counts_temp <- countsData[, rownames(spec5_meta)]
keep <- (rowSums(species_mf_med[[tissue]][["Male"]] > 1) > 3) |
(rowSums(species_mf_med[[tissue]][["Female"]] > 1) > 3)
if (tissue == "Thyroid") {keep <- keep[-5825]} # By Naqvi; unclear on reasoning
retlist <- getSexBiasStats(spec5_tissue_counts_temp[keep, ],
spec5_meta,
'voom_random',
outlierWeight = 'y')
toptab_spec5_list[[tissue]] <- retlist[[1]]
retlist <- getSexBiasStats(spec5_tissue_counts_temp[keep, ],
spec5_meta,
'fisher_voom',
outlierWeight = 'n')
toptab_specsep_list[[tissue]] <- retlist[[1]]
}
spec5_pval_mat <- matrix(nrow = nrow(countsData),
ncol = length(tissues),
dimnames = list(rownames(countsData),
unlist(tissues)))
spec5_coef_mat <- spec5_pval_mat
for (tissue in tissues){
spec5_pval_mat[rownames(toptab_spec5_list[[tissue]]), tissue] <- toptab_spec5_list[[tissue]][, "pval"]
spec5_coef_mat[rownames(toptab_spec5_list[[tissue]]), tissue] <- toptab_spec5_list[[tissue]][, "coef"]
}
spec5_pval_mat <- as.matrix(spec5_pval_mat[which(apply(!is.na(spec5_pval_mat), 1, sum) > 0), ])
colnames(spec5_pval_mat) <- unlist(tissues)
spec5_coef_mat <- as.matrix(spec5_coef_mat[which(apply(!is.na(spec5_coef_mat), 1, sum) > 0), ])
colnames(spec5_coef_mat) <- unlist(tissues)
q <- qvalue(c(spec5_pval_mat)) # Gives the proportion of false positives in the set of p-values
# Matrix of q-values, rows = genes, cols = tissues
spec5_qval_mat <- matrix(q$qvalues,
nrow = nrow(spec5_pval_mat),
ncol = ncol(spec5_pval_mat),
dimnames = list(rownames(spec5_pval_mat),
colnames(spec5_pval_mat)))
# Matrix filled with 0 integer, rows = genes, cols = tissues
# Will be filled with coefficients representing sex bias degree/direction
bias_matrix_tissep_spec5 <- matrix(0,
nrow = nrow(spec5_coef_mat),
ncol = ncol(spec5_coef_mat),
dimnames = list(rownames(spec5_coef_mat),
colnames(spec5_coef_mat)))
# For each gene-tissue pair, sets values = 1 if FDR < 0.1, otherwise kept at 0
for (tissue in tissues){
fdr <- 0.1 # The maximum allowed (non-inclusive) false discovery rate
adj <- spec5_qval_mat[, tissue] # Subsets the matrix of q-values by the current tissue
bias_matrix_tissep_spec5[names(adj[which(adj < fdr)]), tissue] <- 1
}
# Modifying matrix of coefficients: -1 is female-biased, 1 is male-biased
bias_matrix_tissep_spec5 <- bias_matrix_tissep_spec5 * (spec5_coef_mat / abs(spec5_coef_mat))
# require same direction of bias > 1.05-fold in 4/5 species
lfc_cut <- log2(1)
bias_matrix_tissep_spec5_nospecint <- bias_matrix_tissep_spec5
for (tissue in tissues){
common_rownames <- intersect(rownames(spec5_coef_mat),
rownames(toptab_specsep_list[[tissue]][["coef"]]))
diff_rownames <- unique(c(setdiff(rownames(toptab_spec5_list[[tissue]]),
rownames(toptab_specsep_list[[tissue]][["coef"]])),
setdiff(rownames(toptab_specsep_list[[tissue]][["coef"]]),
rownames(toptab_spec5_list[[tissue]]))))
tissue_specdiffsign <- common_rownames[which((apply(toptab_specsep_list[[tissue]][["coef"]][common_rownames, ] *
spec5_coef_mat[common_rownames, tissue] / abs(spec5_coef_mat[common_rownames, tissue]) < log2(1.05), 1, sum) # rowwise
%in% c(2:ncol(toptab_specsep_list[[tissue]][["coef"]]))) |
(toptab_specsep_list[[tissue]][["coef"]][common_rownames, "Human"] * spec5_coef_mat[common_rownames, tissue] < 0) |
(abs(toptab_specsep_list[[tissue]][["coef"]][common_rownames, "Human"]) < log2(1.05)) |
(apply(toptab_specsep_list[[tissue]][["pval"]][common_rownames, 1:ncol(toptab_specsep_list[[tissue]][["pval"]]) - 1] > 1, 1, sum) # rowwise
> 1))]
tissue_specdiffsign <- unique(c(tissue_specdiffsign, diff_rownames))
bias_matrix_tissep_spec5_nospecint[intersect(rownames(bias_matrix_tissep_spec5_nospecint), tissue_specdiffsign), tissue] <- 0
}
bias_matrix_tissep_spec5_nospecint[is.na(bias_matrix_tissep_spec5_nospecint)] <- 0
bias_matrix_tissep_spec5_nospecint <- bias_matrix_tissep_spec5_nospecint[which(apply(abs(bias_matrix_tissep_spec5_nospecint), 1, sum) > 0), ]
bias_matrix_tissep_spec5_nospecint <- as.matrix(bias_matrix_tissep_spec5_nospecint)
colnames(bias_matrix_tissep_spec5_nospecint) <- colnames(bias_matrix_tissep_spec5)
bias_matrix_tissep_spec5_nospecint_coef <- bias_matrix_tissep_spec5_nospecint * abs(spec5_coef_mat[rownames(bias_matrix_tissep_spec5_nospecint),
colnames(bias_matrix_tissep_spec5_nospecint)])
bias_matrix_tissep_spec5_nospecint_coef[which(is.na(bias_matrix_tissep_spec5_nospecint_coef))] <- 0
conserved_genes <- list()
conserved_genes$binary <- as.data.frame(bias_matrix_tissep_spec5_nospecint)
conserved_genes$coef <- as.data.frame(bias_matrix_tissep_spec5_nospecint_coef)
return(conserved_genes)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.