Nothing
#' Effect size combination meta analysis.
#'
#' Performs effect-size meta-analyses across all studies and seperately for
#' each tissue source.
#'
#'
#' Builds on \code{\link[GeneMeta]{zScores}} function from GeneMeta by allowing for genes
#' that were not measured in all studies. This implementation also uses moderated unbiased
#' effect sizes calculated by \code{\link[metaMA]{effectsize}} from metaMA and determines
#' false discovery rates using \code{\link[fdrtool]{fdrtool}}.
#'
#' @param diff_exprs Previous result of \code{\link{diff_expr}}, which can
#' be reloaded using \code{\link{load_diff}}.
#' @param cutoff Minimum fraction of contrasts that must have measured each gene.
#' Between 0 and 1.
#' @param by_source Should seperate meta-analyses be performed for each tissue
#' source added with \code{\link{add_sources}}?
#'
#' @return A list of named lists, one for each tissue source. Each list contains
#' two named data.frames. The first, \code{filt}, has all the columns below for genes
#' present in cutoff or more fraction of contrasts. The second, \code{raw}, has only
#' \code{dprime} and \code{vardprime} columns, but for all genes (NAs for genes
#' not measured by a given contrast).
#' \item{dprime}{Unbiased effect sizes (one column per contrast).}
#' \item{vardprime}{Variances of unbiased effect sizes (one column per contrast).}
#' \item{mu}{Overall mean effect sizes.}
#' \item{var}{Variances of overall mean effect sizes.}
#' \item{z}{Overall z score = \code{mu / sqrt(var)}.}
#' \item{fdr}{False discovery rates calculated from column \code{z} using fdrtool.}
#' \item{pval}{p-values calculated from column \code{z} using fdrtool.}
#'
#' @export
#'
#' @examples
#'
#' library(lydata)
#'
#' # location of data
#' data_dir <- system.file("extdata", package = "lydata")
#'
#' # gather GSE names
#' gse_names <- c("GSE9601", "GSE15069", "GSE50841", "GSE34817", "GSE29689")
#'
#' # load previous analysis
#' anals <- load_diff(gse_names, data_dir)
#'
#' # add tissue sources to perform seperate meta-analyses for each source (optional)
#' # anals <- add_sources(anals, data_dir)
#'
#' # perform meta-analysis
#' es <- es_meta(anals, by_source = TRUE)
es_meta <- function(diff_exprs, cutoff = 0.3, by_source = FALSE) {
# used for analysis per diff_exprs
es_meta_src <- function(diff_exprs, cutoff) {
# get dp and vardp
es <- get_es(diff_exprs, cutoff)
# if just one contrast, use its values
if (ncol(es$filt) == 2) {
es$filt$mu <- es$filt[[1]]
es$filt$var <- es$filt[[2]]
es$filt$fdr <- diff_exprs[[1]]$top_tables[[1]]$adj.P.Val
return(es)
}
# only proceed with genes present in 2+ studies
df <- es$filt
row_nas <- rowSums(is.na(df))
can_ma <- row_nas < ncol(df) / 2
df <- df[can_ma, ]
dp <- df[, seq(1, ncol(df), 2)]
var <- df[, seq(2, ncol(df), 2)]
# get Cochran Q statistic
Q <- f.Q(dp, var)
# get tau (between study variance)
tau <- tau2.DL(Q,
num.studies = apply(var, 1, function(x) sum(!is.na(x))),
my.weights = 1 / var)
# add tau to vardp then calculate mean effect sizes and variance
var <- var + tau
df$mu <- mu.tau2(dp, var)
df$var <- var.tau2(var)
# get z-score and fdr
df$z <- df$mu/sqrt(df$var)
fdr <- fdrtool::fdrtool(df$z, plot = FALSE, verbose = FALSE)
df$pval <- fdr$pval
df$fdr <- fdr$qval
es$filt <- df[order(df$fdr), ]
return(es)
}
if (by_source) {
# check for sources
null_sources <- sapply(diff_exprs, function(anal) is.null(anal$sources))
if (any(null_sources)) {
message("Sources missing from diff_exprs (to add, use add_sources).\nContinuing with by_source = FALSE.")
es <- list(all = es_meta_src(diff_exprs, cutoff))
} else {
anals_src <- list(all = diff_exprs)
anals_src <- c(anals_src, setup_src(diff_exprs, "top_tables"))
es <- lapply(anals_src, es_meta_src, cutoff)
}
} else {
es <- list(all = es_meta_src(diff_exprs, cutoff))
}
return(es)
}
# used by es_meta and path_meta to group top/padog tables by source
setup_src <- function(anals, ttype = c("top_tables", "padog_tables")) {
anals_srcs <- list()
# contrast sources
con_src <- unlist(sapply(unname(anals), `[[`, 'sources'))
# get added pairs
added_prs <- lapply(anals, function(anal) anal$pair)
added_prs <- unique(unlist(added_prs, recursive = FALSE, use.names = FALSE))
# get non-pair sources
is_prs <- con_src %in% unlist(added_prs)
added_src <- sapply(unique(con_src[!is_prs]), list, USE.NAMES = FALSE)
# get anals by source
for (src in c(added_prs, added_src)) {
src_name <- paste(src, collapse = ', ')
anals_srcs[[src_name]] <- lapply(anals, function(anal) {
is_src <- con_src[names(anal[[ttype]])] %in% src
if (any(is_src)) {
anal[[ttype]] <- anal[[ttype]][is_src]
anal
}
else NULL
})
}
# remove NULL entries
anals_srcs <- lapply(anals_srcs, function(anals_src) anals_src[!sapply(anals_src, is.null)])
return(anals_srcs)
}
# Get dprimes and vardprimes values for each contrast.
#
# @inheritParams es_meta
# @return data.frame with dprime and vardprime values.
get_es <- function(diff_exprs, cutoff = 0.3) {
# add dprimes and vardprimes to top tables
diff_exprs <- add_es(diff_exprs)
# get top tables
es <- lapply(diff_exprs, function(study) study$top_tables)
nm <- unlist(lapply(es, names))
nm <- paste(c('dprime', 'vardprime'), rep(nm, each=2), sep='.')
es <- unlist(es, recursive = FALSE, use.names = FALSE)
# get desired top table columns
es <- lapply(es, function(top) {
top$SYMBOL <- row.names(top)
top[, c("SYMBOL", "dprime", "vardprime")]
})
# merge dataframes
es <- merge_dataframes(es)
names(es) <- nm
# only keep genes where more than cutoff fraction of studies have data
filt <- apply(es, 1, function(x) sum(!is.na(x))) >= (ncol(es) * cutoff)
return(list(filt = es[filt, ], raw = es))
}
# Add metaMA effectsize values to top tables.
#
# Used internally by \code{setup_combo_data} and \code{\link[crossmeta]{es_meta}}
# to add moderated unbiased standardised effect sizes (dprimes) to top tables
# from differential expression analysis.
#
# @param diff_exprs Result from call to \code{\link[crossmeta]{diff_expr}}.
# @param cols Columns from \code{\link[metaMA]{effectsize}} result to add to
# top tables.
#
# @export
# @seealso \link[crossmeta]{diff_expr}, \link[crossmeta]{es_meta}.
#
# @return diff_exprs with specified columns added to top_tables for each contrast.
#
# @examples
# library(crossmeta)
# library(lydata)
#
# # location of raw data
# data_dir <- system.file("extdata", package = "lydata")
#
# # load previous analysis for eset
# anal <- load_diff("GSE9601", data_dir)
#
# # add dprime and vardprime to top tables
# anal <- add_es(anal)
add_es <- function(diff_exprs, cols = c("dprime", "vardprime")) {
for (i in seq_along(diff_exprs)) {
# get study degrees of freedom and group classes
study <- diff_exprs[[i]]
df <- study$ebayes_sv$df.residual + study$ebayes_sv$df.prior
classes <- study$pdata$group
for (con in names(study$top_tables)) {
# get group names for contrast
groups <- gsub("GSE.+?_", "", con)
groups <- strsplit(groups, "-")[[1]]
# get sample sizes for groups
ni <- sum(classes == groups[2], na.rm = TRUE)
nj <- sum(classes == groups[1], na.rm = TRUE)
# bind effect size values with top table
tt <- study$top_tables[[con]]
es <- metaMA::effectsize(tt$t, ((ni * nj)/(ni + nj)), df)[, cols, drop = FALSE]
tt <- cbind(tt, es)
# store results
study$top_tables[[con]] <- tt
}
diff_exprs[[i]] <- study
}
return(diff_exprs)
}
# Merge a list of data.frames.
#
# @param ls List of data.frames.
# @param key Column to merge data.frames on.
#
# @return A merged data.frame with \code{key} set to row names.
merge_dataframes <- function(ls, keys = "SYMBOL") {
# ensure non 'by' names are not duplicated
ls = Map(function(x, i)
stats::setNames(x, ifelse(names(x) %in% keys,
names(x),
sprintf('%s.%d', names(x), i))),
ls, seq_along(ls))
# merge list
res <- Reduce(function(...) merge(..., by = keys, all = TRUE), ls)
# format result
row.names(res) <- res[, keys[1]]
res[, keys[1]] <- NULL
return(res)
}
# Modifed f.Q from GeneMeta (allows NAs)
#
# Compute Cochran's Q statistic. Allows genes that were not measured in all
# studies.
#
# @param dadj Dataframe of unbiased effect sizes (dprimes) for each contrast.
# @param varadj Dataframe of variances for unbiased effect sizes (vardprimes)
# for each contrast.
#
# @return A vector of length equal to the number of rows of dadj with the Q
# statistics.
f.Q <- function (dadj, varadj) {
w <- 1/varadj
tmp1 <- w * dadj
mu <- rowSums(tmp1, na.rm = TRUE)/rowSums(w, na.rm = TRUE)
Q <- rowSums(w * (dadj - mu)^2, na.rm = TRUE)
}
# Modifed tau2.DL from GeneMeta (allows NAs)
# tau2.DL is an estimation of tau in a random effects model (REM) using
# Cochran's Q statistic. Allows genes that were not measured in all studies.
#
# @param Q A vector of Cochran's Q statistics.
# @param num.studies A vector specifying the number of experiments in which each
# gene was measured.
# @param my.weights A matrix with one column for each experiment containing the
# variances of the effects that should be combined.
#
# @return A vector of tau values.
tau2.DL <- function (Q, num.studies, my.weights) {
tmp1 <- rowSums(my.weights, na.rm = TRUE)
tmp2 <- rowSums(my.weights^2, na.rm = TRUE)
value <- cbind((Q - (num.studies - 1))/(tmp1 - (tmp2/tmp1)), 0)
apply(value, 1, max)
}
# Modifed mu.tau2 from GeneMeta (allows NAs)
#
# Estimate overall mean effect sizes. Allows genes that were not measured in all
# studies.
#
# @param my.d A matrix, with one column for each experiment, containing the
# effects that should be combined.
# @param my.vars.new A matrix, with one column for each experiment, containing
# the variances of the effects that should be combined.
#
# @return A vector with the estimates of the overall mean effect sizes.
mu.tau2 <- function (my.d, my.vars.new) {
w <- 1/my.vars.new
tmp1 <- w * my.d
mu <- rowSums(tmp1, na.rm = TRUE)/rowSums(w, na.rm = TRUE)
}
# Modifed var.tau2 from GeneMeta (allows NAs)
#
# Estimate variances of overall mean effect sizes. Allows genes that were not
# measured in all studies.
#
# @inheritParams mu.tau2
#
# @return
var.tau2 <- function (my.vars.new) {
w <- 1/my.vars.new
my.var <- 1/rowSums(w, na.rm = TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.