#' Determine which associations are significant
#'
#' Determines which associations are significant using emperical
#' bayes and false discovery rate adjustment.
#' @param scores Either a vector or a symmetric matrix of association scores.
#' @param threshold The threshold for false discovery rate, used to determine
#' significance; scores with ikelihood ratios above this threshold are
#' set to zero. If NULL, all scores are preserved.
#' @param transformation (option) Function applied to scores before
#' estimating the null distribution. If provided, the function should take
#' a vector as input and return a vector for output.
#' @param robust Should robust estimates of mean and variance be used?
#' @param gene_names (optional) Vector of gene names.
#' @param include_likelihood Should the matrix of likelihood ratios be provided
#' in the output?
#' @param show_plot If true, a histogram of scores with estimated density
#' and null distribution is plotted.
#' @param ignore_zeroes If true, zeroes in scores input will be ignored.
#' @return A list containing: `scores`, the scores with non-significant values
#' set to zero; `mu_f0`, the estimate of mu for the null distribution; `sigma_f0`,
#' the estimate of sigma for the null distribution; `f`, the empirical density
#' estimate; and `likelihood`, (optional) the likelihood ratios (fdr rates)
#' for each score.
#' @export
#' @note The robust estimators used are median and MAD. If the majority of scores
#' are zero, the robust estimate for sigma might be zero; in this case the
#' usual estimator is used instead.
fdr <- function(scores,
threshold = 0.05,
transformation = NULL,
robust = TRUE,
include_likelihood = FALSE,
show_plot = TRUE,
ignore_zeroes = TRUE) {
# Determine whether scores is a vector or matrix.
if(is.matrix(scores)) {
if(!isSymmetric(unname(scores), tol = 10^-10)) stop("scores is not symmetric.")
score_names <- colnames(scores) # Keep column names for scores.
scores <- scores[lower.tri(scores)] # Turn scores into vector.
matrix_provided <- TRUE
} else if(is.vector(scores)) {
matrix_provided <- FALSE
} else {
stop("scores is neither a vector nor symmetric matrix.")
}
# Check that scores are not all zero (or have zero variability).
if(all(scores == 0) || sd(scores) == 0) {
warning("All scores have the same value. Returning scores unchanged.")
if(matrix_provided) {
# Put scores back into matrix form.
scores <- vector_to_matrix(scores, diag_val = 0)
colnames(scores) <- score_names
}
fdr_return_list <- list(scores = scores,
mu_f0 = NA,
sigma_f0 = NA,
f = NA,
transformation = transformation)
return(fdr_return_list)
}
index_include <- 1:length(scores)
if(ignore_zeroes) {
index_include <- which(scores != 0)
}
# Apply transformation, if one is provided.
if(!is.null(transformation)) {
scores[index_include] <- transformation(scores[index_include])
if(!is.vector(scores)) stop("transformation did not return a vector as output.")
}
# Estimate paramters of null distribution (assumed to be Gaussian).
if(robust) {
mu_f0 <- median(scores[index_include]) # Use median to account for any skewness.
sigma_f0 <- 1.4826 * median(abs(scores[index_include] - median(scores[index_include]))) # Use 1.4826 * MAD.
}
if(!robust || (robust && sigma_f0 == 0)){
mu_f0 <- mean(scores[index_include])
sigma_f0 <- sd(scores[index_include])
}
# Emperical Bayes FDR. Save likelihood ratios.
f <- density(scores[index_include],
kernel = "gaussian")
likelihood <- rep(1, length(scores))
likelihood[index_include] <- dnorm(scores[index_include], mu_f0, sigma_f0) /
approx(f, xout = scores[index_include])$y
# Generate plot of requested.
if(show_plot) {
hist(scores[index_include], freq = FALSE)
par(xpd = FALSE) # Keep lines within plot
curve(approx(f, xout = x)$y, col = "blue", add = TRUE)
curve(dnorm(x, mu_f0, sigma_f0), col = "orange", add = TRUE)
if(!is.null(threshold)) {
lower <- scores[(likelihood < threshold) & (scores < mu_f0)]
if(length(lower) > 0) abline(v = max(lower), col = "red")
upper <- scores[(likelihood < threshold) & (scores > mu_f0)]
if(length(upper) > 0) abline(v = min(upper), col = "red")
}
}
# Apply thresholding if provided.
if(!is.null(threshold)) {
scores[likelihood > threshold] <- 0
} else {
# If no threshold is provided, return likelihood ratios.
include_likelihood <- TRUE
}
if(matrix_provided) {
# Put scores back into matrix form.
scores <- vector_to_matrix(scores, diag_val = 0)
colnames(scores) <- score_names
# If likelihoods are requested, put them into matrix form.
if(include_likelihood) {
likelihood <- vector_to_matrix(likelihood, diag_val = Inf)
colnames(likelihood) <- score_names
}
} else {
if(include_likelihood) {
names(likelihood) <- names(scores) # Pair score names with likelihoods.
}
}
fdr_return_list <- list(scores = scores,
mu_f0 = mu_f0,
sigma_f0 = sigma_f0,
f = f,
transformation = transformation)
if(include_likelihood) {
fdr_return_list <- c(fdr_return_list,
list(likelihood = likelihood))
}
return(fdr_return_list)
}
#' Turn a vector into a matrix
#'
#' The input vector is assumed to be the lower.tri of a symmetric matrix. This
#' function recreates the original matrix from the vector.
#' @param x the vector containing lower.tri entries of a symmetric matrix.
#' @param diag_val value to put along the diagonal.
#' @return A symmetric matrix.
#' @export
vector_to_matrix <- function(x, diag_val = 0) {
if(!is.vector(x)) stop("x should be a vector.")
p <- sqrt(1 + 8 * length(x)) / 2 + 0.5 # Dimension of original matrix.
if((p %% 1) != 0) stop("x is not the lower.tri entries of a square matrix.")
y <- matrix(0, p, p) # Setup the output matrix
y[lower.tri(y)] <- x # Add x to the lower.tri
y <- y + t(y) # Add x to the upper.tri (maintaining symmetry).
diag(y) <- diag_val
return(y)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.