#' @export
getSexBiasStats <- function(data, meta, edger_limma, outlierWeight = "n", permute_sex = "n", permute_compmat = "n", permute_cormax = 0.35, gene_chr = "n") {
if (edger_limma == "voom_random") {
# Permute sexes to check if expression values by sex are significantly different
if (permute_sex == "y") {
for (species in setdiff(unique(meta$Species), "Human")) {
meta[which(meta$Species == species), "Sex"] = sample(meta[which(meta$Species == species), "Sex"],
size = length(meta[which(meta$Species == species), "Sex"]))
}
}
lng <- DGEList(counts = data)
lng <- calcNormFactors(lng, method = c("TMM"))
tmmScaleFactors <- lng$samples$lib.size * lng$samples$norm.factors
lng_counts_tmm <- t(t(lng$counts) / tmmScaleFactors) * mean(tmmScaleFactors)
meta$Count <- table(meta$Species)[meta$Species]
sampw <- rep(1, times = ncol(data))
if (outlierWeight == "y") {
v1 <- voomWithQualityWeights(lng,
model.matrix(~ Species + Sex, meta),
var.design = model.matrix(~ Species, meta),
save.plot = TRUE)
}
if (outlierWeight == "n") {
v1 <- voom(lng,
model.matrix(~ Species + Sex, meta),
save.plot=TRUE)
}
corfit <- duplicateCorrelation(v1, model.matrix(~ Sex, meta), block = meta$Species)
fit <- lmFit(v1, model.matrix(~ Sex, meta), block = meta$Species, correlation = corfit$consensus.correlation)
fit <- eBayes(fit, robust = FALSE)
toptab <- topTable(fit, coef = "SexMale", number = Inf, sort.by = "P")
if (outlierWeight == "y") {
v2 <- voomWithQualityWeights(lng,
model.matrix(~ Species + Sex + Species:Sex, meta),
var.design = model.matrix(~ Species, meta),
save.plot=TRUE)
}
if (outlierWeight == "n"){
v2 <- voom(lng,
model.matrix(~ Species + Sex + Species:Sex, meta),
save.plot=TRUE)
}
fit_int <- lmFit(v2, model.matrix(~ Species + Sex + Species:Sex, meta))
fit_int <- eBayes(fit_int, robust = FALSE)
# contrasts: macaque, dog, human, mouse, rat, xenLae (a zero for each and then a set of 1/-1, positive if true and negative if false for that species)
toptab_specsexint <- topTable(fit_int, coef = grep(":", colnames(fit_int$coefficients)), number = Inf, sort.by = "F")
fit_int <- lmFit(v2, model.matrix(~ Species + Species:Sex, meta))
fit_human <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1,-1, 1,-1,-1,-1))) # see comment above re: "contrasts"
toptab_humansexspec <- topTable(fit_human, number = Inf, sort.by = "P")
fit_primate <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0, 1,-1, 1,-1,-1,-1)))
toptab_primatesexspec <- topTable(fit_primate, number = Inf, sort.by = "P")
fit_rodent <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1,-1,-1, 1, 1,-1)))
toptab_rodentsexspec <- topTable(fit_rodent, number = Inf, sort.by = "P")
fit_dog <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1, 1,-1,-1,-1,-1)))
toptab_dogsexspec <- topTable(fit_dog, number = Inf, sort.by = "P")
fit_xenLae <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1,-1,-1,-1,-1, 1)))
toptab_xenLaesexspec <- topTable(fit_xenLae, number = Inf, sort.by = "P")
coef_primate_se <- sqrt(fit_primate$s2.post) * fit_primate$stdev.unscaled
coef_human_se <- sqrt(fit_human$s2.post) * fit_human$stdev.unscaled
coef_rodent_se <- sqrt(fit_rodent$s2.post) * fit_rodent$stdev.unscaled # Is this supposed to have fit_primate? Changed to sqrt(fit_rodent$s2.post)
coef_dog_se <- sqrt(fit_dog$s2.post) * fit_dog$stdev.unscaled # Is this supposed to have fit_human? Changed to sqrt(fit_dog$s2.post)
coef_xenLae_se <- sqrt(fit_xenLae$s2.post) * fit_xenLae$stdev.unscaled
coef_se <- sqrt(fit$s2.post) * fit$stdev.unscaled
v1_var <- v1$voom.xy$y[rownames(fit$coefficients)]
v2_var <- v2$voom.xy$y[rownames(fit$coefficients)]
toptab2 <- cbind(fit$coefficients[, "SexMale"],
coef_se[, "SexMale"],
sqrt(fit$s2.post),
toptab[rownames(fit$coefficients), "P.Value"],
v1_var,
v2_var,
toptab_specsexint[rownames(fit$coefficients), "P.Value"],
sqrt(fit_human$s2.post), toptab_humansexspec[ rownames(fit$coefficients), c("logFC", "P.Value")], coef_human_se[, 1],
sqrt(fit_primate$s2.post), toptab_primatesexspec[rownames(fit$coefficients), c("logFC", "P.Value")], coef_primate_se[, 1],
sqrt(fit_rodent$s2.post), toptab_rodentsexspec[ rownames(fit$coefficients), c("logFC", "P.Value")], coef_rodent_se[, 1],
sqrt(fit_dog$s2.post), toptab_dogsexspec[ rownames(fit$coefficients), c("logFC", "P.Value")], coef_dog_se[, 1],
sqrt(fit_xenLae$s2.post), toptab_xenLaesexspec[ rownames(fit$coefficients), c("logFC", "P.Value")], coef_xenLae_se[, 1])
rownames(toptab2) <- rownames(fit$coefficients)
colnames(toptab2) <- c("coef",
"coef_se",
"resid_sd",
"pval",
"pval_specsexint",
"voom1_var",
"voom2_var",
"resid_sd_humansexint", "coef_humansexint", "pval_humansexint", "coef_humansexint_se",
"resid_sd_primatesexint","coef_primatesexint","pval_primatesexint","coef_primatesexint_se",
"resid_sd_rodentsexint", "coef_rodentsexint", "pval_rodentsexint", "coef_rodentsexint_se",
"resid_sd_dogsexint", "coef_dogsexint", "pval_dogsexint", "coef_dogsexint_se",
"resid_sd_xenLaesexint", "coef_xenLaesexint", "pval_xenLaesexint", "coef_xenLaesexint_se")
return(list(toptab2, v1$E, v1$sample.weights))
}
# perform testing separately in each species then combine by fisher's method
if (edger_limma == "fisher_voom") {
require(metap)
require(CombinePValue)
source("R/ebm.R")
fisherp <- function(pvec) {return(sumlog(pvec)$p)}
embp <- function(pvec, data_matrix) {return(empiricalBrownsMethod(data_matrix, pvec))}
spec_pval_mat <- matrix(nrow = nrow(data),
ncol = length(unique(meta$Species)),
dimnames = list(rownames(data),
unique(meta$Species)))
spec_coef_mat <- spec_pval_mat
spec_avexpr_mat <- spec_pval_mat
spec_coef_se_mat <- spec_pval_mat
spec_resid_sd_mat <- spec_pval_mat
fullmat <- DGEList(counts = data)
fullmat <- calcNormFactors(fullmat, method = c("TMM"))
tmmScaleFactors <- fullmat$samples$lib.size * fullmat$samples$norm.factors
fullmat_tmm <- t(t(fullmat$counts) / tmmScaleFactors) * mean(tmmScaleFactors)
for (species in colnames(spec_pval_mat)) {
print(species)
meta_specsub <- subset(meta, Species == species)
tmm <- DGEList(counts = data[, rownames(meta_specsub)])
tmm <- calcNormFactors(tmm)
if (species == "Human" & "Thyroid" %in% meta$Tissue) {outlierWeight = "n"}
if (outlierWeight == "y") {v1 = voomWithQualityWeights(tmm,
model.matrix(~ Sex, meta_specsub))}
if (outlierWeight == "n") {v1 = voom(tmm,
model.matrix(~ Sex, meta_specsub))}
data[, rownames(meta_specsub)] = v1$E
fit <- eBayes(lmFit(v1, model.matrix(~ Sex, meta_specsub)))
toptab <- topTable(fit, coef = "SexMale", number = Inf, sort.by = "P")
cor_noperm = 1.0
if (species == "Human") {cor_noperm = permute_cormax}
if (permute_sex == "y") {
while (abs(cor_noperm) > permute_cormax) {
meta_specsub$Sex <- sample(meta_specsub$Sex)
fit <- eBayes(lmFit(v1, model.matrix(~ Sex, meta_specsub)))
toptab <- topTable(fit, coef = "SexMale", number = Inf, sort.by = "P")
merge_perm_noperm <- merge(toptab,
data.frame(permute_compmat[["coef"]][, species, drop=FALSE]),
by = 0)
merge_perm_noperm <- subset(merge_perm_noperm, !(Row.names %in% names(gene_chr[which(gene_chr %in% c("chrX", "chrY"))])))
cor_noperm <- cor(merge_perm_noperm[, "logFC"], merge_perm_noperm[, species])
print(cor_noperm)
}
}
spec_pval_mat[rownames(toptab), species] <- toptab$P.Value
spec_coef_mat[rownames(toptab), species] <- toptab$logFC
spec_avexpr_mat[rownames(toptab), species] <- toptab$AveExpr
coef_se <- sqrt(fit$s2.post) * fit$stdev.unscaled
spec_coef_se_mat[rownames(coef_se), species] <- coef_se[, "SexMale"]
spec_resid_sd_mat[rownames(coef_se), species] <- sqrt(fit$s2.post)
}
cat("Combining p-values...\n")
avexpr_covar <- calculateCovariances(t(spec_avexpr_mat[rownames(toptab), ]))
comb_pvals_emb <- apply(spec_pval_mat, 1, combinePValues, covar_matrix = avexpr_covar)
comb_pvals_min <- apply(spec_pval_mat, 1, min)
pvals_mat <- cbind(spec_pval_mat, comb_pvals_emb)
colnames(pvals_mat) <- c(colnames(spec_pval_mat), "P.Value")
de_retlist <- list(spec_coef_mat, spec_coef_se_mat, spec_resid_sd_mat, pvals_mat)
names(de_retlist) = c("coef", "coef_se", "resid_sd", "pval")
return(list(de_retlist, data))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.