R/getSexBiasStats.R

Defines functions getSexBiasStats

#' @export

getSexBiasStats <- function(data, meta, edger_limma, outlierWeight = "n", permute_sex = "n", permute_compmat = "n", permute_cormax = 0.35, gene_chr = "n") {
  
  if (edger_limma == "voom_random") {
    # Permute sexes to check if expression values by sex are significantly different
    if (permute_sex == "y") {
      for (species in setdiff(unique(meta$Species), "Human")) {
        meta[which(meta$Species == species), "Sex"] = sample(meta[which(meta$Species == species), "Sex"],
                                                             size = length(meta[which(meta$Species == species), "Sex"]))
      }
    }
    lng <- DGEList(counts = data)
    lng <- calcNormFactors(lng, method = c("TMM"))
    tmmScaleFactors <- lng$samples$lib.size * lng$samples$norm.factors
    lng_counts_tmm <- t(t(lng$counts) / tmmScaleFactors) * mean(tmmScaleFactors)
    meta$Count <- table(meta$Species)[meta$Species]
    sampw <- rep(1, times = ncol(data))
    
    if (outlierWeight == "y") {
      v1 <- voomWithQualityWeights(lng,
                                   model.matrix(~ Species + Sex, meta),
                                   var.design = model.matrix(~ Species, meta),
                                   save.plot = TRUE)
    }
    if (outlierWeight == "n") {
      v1 <- voom(lng,
                 model.matrix(~ Species + Sex, meta),
                 save.plot=TRUE)
    }
    corfit <- duplicateCorrelation(v1, model.matrix(~ Sex, meta), block = meta$Species)
    
    fit <- lmFit(v1, model.matrix(~ Sex, meta), block = meta$Species, correlation = corfit$consensus.correlation)
    fit <- eBayes(fit, robust = FALSE)
    toptab <- topTable(fit, coef = "SexMale", number = Inf, sort.by = "P")
    
    if (outlierWeight == "y") {
      v2 <- voomWithQualityWeights(lng,
                                   model.matrix(~ Species + Sex + Species:Sex, meta),
                                   var.design = model.matrix(~ Species, meta),
                                   save.plot=TRUE)
    }
    if (outlierWeight == "n"){
      v2 <- voom(lng,
                 model.matrix(~ Species + Sex + Species:Sex, meta),
                 save.plot=TRUE)
    }
    
    fit_int <- lmFit(v2, model.matrix(~ Species + Sex + Species:Sex, meta))
    fit_int <- eBayes(fit_int, robust = FALSE)
    
    # contrasts: macaque, dog, human, mouse, rat, xenLae (a zero for each and then a set of 1/-1, positive if true and negative if false for that species)
    toptab_specsexint     <- topTable(fit_int, coef = grep(":", colnames(fit_int$coefficients)), number = Inf, sort.by = "F")
    fit_int               <- lmFit(v2, model.matrix(~ Species + Species:Sex, meta))
    
    fit_human             <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1,-1, 1,-1,-1,-1)))    # see comment above re: "contrasts"
    toptab_humansexspec   <- topTable(fit_human, number = Inf, sort.by = "P")
    
    fit_primate           <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0, 1,-1, 1,-1,-1,-1)))
    toptab_primatesexspec <- topTable(fit_primate, number = Inf, sort.by = "P")
    
    fit_rodent            <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1,-1,-1, 1, 1,-1)))
    toptab_rodentsexspec  <- topTable(fit_rodent, number = Inf, sort.by = "P")
    
    fit_dog               <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1, 1,-1,-1,-1,-1)))
    toptab_dogsexspec     <- topTable(fit_dog, number = Inf, sort.by = "P")
    
    fit_xenLae            <- eBayes(contrasts.fit(fit_int, c(0,0,0,0,0,0,-1,-1,-1,-1,-1, 1)))
    toptab_xenLaesexspec  <- topTable(fit_xenLae, number = Inf, sort.by = "P")
    
    coef_primate_se <- sqrt(fit_primate$s2.post) * fit_primate$stdev.unscaled
    coef_human_se   <- sqrt(fit_human$s2.post)   * fit_human$stdev.unscaled
    coef_rodent_se  <- sqrt(fit_rodent$s2.post)  * fit_rodent$stdev.unscaled      # Is this supposed to have fit_primate? Changed to sqrt(fit_rodent$s2.post)
    coef_dog_se     <- sqrt(fit_dog$s2.post)     * fit_dog$stdev.unscaled         # Is this supposed to have fit_human?   Changed to sqrt(fit_dog$s2.post)
    coef_xenLae_se  <- sqrt(fit_xenLae$s2.post)  * fit_xenLae$stdev.unscaled
    
    coef_se         <- sqrt(fit$s2.post)         * fit$stdev.unscaled
    
    v1_var  <- v1$voom.xy$y[rownames(fit$coefficients)]
    v2_var  <- v2$voom.xy$y[rownames(fit$coefficients)]
    toptab2 <- cbind(fit$coefficients[, "SexMale"],
                     coef_se[, "SexMale"],
                     sqrt(fit$s2.post),
                     toptab[rownames(fit$coefficients), "P.Value"],
                     v1_var,
                     v2_var,
                     toptab_specsexint[rownames(fit$coefficients), "P.Value"],
                     sqrt(fit_human$s2.post),   toptab_humansexspec[  rownames(fit$coefficients), c("logFC", "P.Value")], coef_human_se[,   1],
                     sqrt(fit_primate$s2.post), toptab_primatesexspec[rownames(fit$coefficients), c("logFC", "P.Value")], coef_primate_se[, 1],
                     sqrt(fit_rodent$s2.post),  toptab_rodentsexspec[ rownames(fit$coefficients), c("logFC", "P.Value")], coef_rodent_se[,  1],
                     sqrt(fit_dog$s2.post),     toptab_dogsexspec[    rownames(fit$coefficients), c("logFC", "P.Value")], coef_dog_se[,     1],
                     sqrt(fit_xenLae$s2.post),  toptab_xenLaesexspec[ rownames(fit$coefficients), c("logFC", "P.Value")], coef_xenLae_se[,  1])
    rownames(toptab2) <- rownames(fit$coefficients)
    colnames(toptab2) <- c("coef",
                           "coef_se",
                           "resid_sd",
                           "pval",
                           "pval_specsexint",
                           "voom1_var",
                           "voom2_var",
                           "resid_sd_humansexint",  "coef_humansexint",  "pval_humansexint",  "coef_humansexint_se",
                           "resid_sd_primatesexint","coef_primatesexint","pval_primatesexint","coef_primatesexint_se",
                           "resid_sd_rodentsexint", "coef_rodentsexint", "pval_rodentsexint", "coef_rodentsexint_se",
                           "resid_sd_dogsexint",    "coef_dogsexint",    "pval_dogsexint",    "coef_dogsexint_se",
                           "resid_sd_xenLaesexint", "coef_xenLaesexint", "pval_xenLaesexint", "coef_xenLaesexint_se")
    
    return(list(toptab2, v1$E, v1$sample.weights))
  }
  
  # perform testing separately in each species then combine by fisher's method
  if (edger_limma == "fisher_voom") {
    require(metap)
    require(CombinePValue)
    source("R/ebm.R")
    
    fisherp <- function(pvec)              {return(sumlog(pvec)$p)}
    embp    <- function(pvec, data_matrix) {return(empiricalBrownsMethod(data_matrix, pvec))}
    
    spec_pval_mat     <- matrix(nrow = nrow(data), 
                                ncol = length(unique(meta$Species)), 
                                dimnames = list(rownames(data), 
                                                unique(meta$Species)))
    spec_coef_mat     <- spec_pval_mat
    spec_avexpr_mat   <- spec_pval_mat
    spec_coef_se_mat  <- spec_pval_mat
    spec_resid_sd_mat <- spec_pval_mat
    
    fullmat <- DGEList(counts = data)
    fullmat <- calcNormFactors(fullmat, method = c("TMM"))
    tmmScaleFactors <- fullmat$samples$lib.size * fullmat$samples$norm.factors
    fullmat_tmm <- t(t(fullmat$counts) / tmmScaleFactors) * mean(tmmScaleFactors)
    
    for (species in colnames(spec_pval_mat)) {
      print(species)
      meta_specsub <- subset(meta, Species == species)
      tmm <- DGEList(counts = data[, rownames(meta_specsub)])
      tmm <- calcNormFactors(tmm)
      if (species == "Human" & "Thyroid" %in% meta$Tissue) {outlierWeight = "n"}
      if (outlierWeight == "y") {v1 = voomWithQualityWeights(tmm,
                                                             model.matrix(~ Sex, meta_specsub))}
      if (outlierWeight == "n") {v1 = voom(tmm,
                                           model.matrix(~ Sex, meta_specsub))}
      data[, rownames(meta_specsub)] = v1$E
      fit <- eBayes(lmFit(v1, model.matrix(~ Sex, meta_specsub)))
      toptab <- topTable(fit, coef = "SexMale", number = Inf, sort.by = "P")
      cor_noperm = 1.0
      if (species == "Human") {cor_noperm = permute_cormax}
      if (permute_sex == "y") {
        while (abs(cor_noperm) > permute_cormax) {
          meta_specsub$Sex <- sample(meta_specsub$Sex)
          fit <- eBayes(lmFit(v1, model.matrix(~ Sex, meta_specsub)))
          toptab <- topTable(fit, coef = "SexMale", number = Inf, sort.by = "P")
          merge_perm_noperm  <- merge(toptab, 
                                      data.frame(permute_compmat[["coef"]][, species, drop=FALSE]), 
                                      by = 0)
          merge_perm_noperm <- subset(merge_perm_noperm, !(Row.names %in% names(gene_chr[which(gene_chr %in% c("chrX", "chrY"))])))
          cor_noperm <- cor(merge_perm_noperm[, "logFC"], merge_perm_noperm[, species])
          print(cor_noperm)
        }
      }
      spec_pval_mat[rownames(toptab),      species] <- toptab$P.Value
      spec_coef_mat[rownames(toptab),      species] <- toptab$logFC
      spec_avexpr_mat[rownames(toptab),    species] <- toptab$AveExpr
      coef_se <- sqrt(fit$s2.post) * fit$stdev.unscaled
      spec_coef_se_mat[rownames(coef_se),  species] <- coef_se[, "SexMale"]
      spec_resid_sd_mat[rownames(coef_se), species] <- sqrt(fit$s2.post)
    }
    cat("Combining p-values...\n")
    avexpr_covar <- calculateCovariances(t(spec_avexpr_mat[rownames(toptab), ]))
    comb_pvals_emb <- apply(spec_pval_mat, 1, combinePValues, covar_matrix = avexpr_covar)
    comb_pvals_min <- apply(spec_pval_mat, 1, min)
    pvals_mat <- cbind(spec_pval_mat, comb_pvals_emb)
    colnames(pvals_mat) <- c(colnames(spec_pval_mat), "P.Value")
    de_retlist <- list(spec_coef_mat, spec_coef_se_mat, spec_resid_sd_mat, pvals_mat)
    names(de_retlist) = c("coef", "coef_se", "resid_sd", "pval")
    return(list(de_retlist, data))
  }
}
maddydoak/DoakThesis2020 documentation built on March 5, 2020, 12:53 a.m.