# Author: Lerato E. Magosi
# R version: 3.1.0 (2014-04-10)
# Platform: x86_64-apple-darwin10.8.0 (64-bit)
# Date: 20Jul2018
# Acknowledgements: Wolfgang Viechtbauer
# Goal: Generate forestplots that show SPRE statistics to highlight overly influential
# outlier studies with the potential to inflate average genetic effects in genetic
# association meta-analyses.
# Required libraries ---------------------------
# metafor # for performing fixed and random-effects meta-analysis
# utils # needed for the following functions: str, head, tail
# dplyr # for sorting dataframes
# plotrix # for adding lines of predetermined length to plots
# colorspace # colorspace, RColorBrewer and colorRamps are for loading colour palettes
# RColorBrewer
# colorRamps
# Calling globalVariables on the following variables to address
# the note: "no visible binding for global variable" generated by "R CMD check"
utils::globalVariables(c("snp", "study", "str"))
# Function: generate_spre_forestplot
#
#
# parameters:
#
#
# beta_in (numeric) vector of effect-size estimates,
# se_in (numeric) vector of standard errors genomically corrected at study-level,
# study_names_in (character) vector of study names,
# variant_names_in (character) vector of variant names,
# tau2_method (character) method to estimate heterogeneity: either "DL" or "REML",
# spres_in (numeric) vector of SPRE statistics,
# spre_colour_palette (character) vector specifying the colour palette for observed study effects,
# set_studyNOs_as_studyIDs (boolean) specifying whether study numbers should be used as study IDs,
# set_study_field_width (character) vector of format strings akin to the fmt character vector in the sprintf function,
# set_cex (character) scalar and symbol expansion factor indicating scaling for text and symbols,
# set_xlim (numeric) vector of length 2 indicating the horizontal limits of the plot region,
# set_ylim (numeric) vector of length 2 indicating the y-axis limits of the plot,
# set_at (numeric) vector indicating position of the x-axis tick marks and corresponding labels,
# adjust_labels (numeric) scalar value that tweaks label positions,
# save_plot (boolean) specifying that the forestplot should be saved as a tiff file
#
#
# returns: a list containing:
#
#
# number_variants (numeric),
# number_studies (numeric),
# fixed_effect_results (list),
# random_effects_results (list),
# spre_forestplot_dataset (dataframe)
#
#
# ------------------------------------------------------------------------------------
generate_spre_forestplot <- function(beta_in, se_in, study_names_in, variant_names_in,
spres_in, spre_colour_palette = c("mono_colour", "black"),
set_studyNOs_as_studyIDs = FALSE, set_study_field_width = "%02.0f",
set_cex = 0.66, set_xlim, set_ylim, set_at, tau2_method = "DL",
adjust_labels = 1, save_plot = FALSE,
verbose_output = FALSE, ...) {
if (missing(set_xlim)) { set_xlim <- NULL }
if (missing(set_at)) { set_at <- NULL }
if (missing(set_ylim)) { set_ylim <- NULL }
# Assemble dataset
beta <- base::as.numeric(beta_in)
se <- base::as.numeric(se_in)
study_names <- base::factor(study_names_in)
variant_names <- base::factor(variant_names_in)
usta <- base::as.numeric(spres_in)
m <- base::data.frame(beta, se, variant_names, study_names, usta)
# View dataset structure
if (verbose_output) { base::writeLines("\nShowing dataset structure\u003A \n"); utils::str(m) }
# View first six lines of dataset
if (verbose_output) { base::writeLines("\nShowing first six lines of dataset\u003A \n"); base::print(utils::head(m)) }
# ---------------------------
# Assign study numbers
m$study <- m$study_names
base::levels(m$study) <- base::seq_along(base::levels(m$study))
# Calculate no. of studies
nstudies <- base::nlevels(m$study_names)
# Assign snp/variant numbers
m$snp <- m$variant_names
base::levels(m$snp) <- base::seq_along(base::levels(m$snp))
# Calculate no. of variants
nsnps <- base::nlevels(m$variant_names)
# Print numbers of studies and snps
if (verbose_output) {
base::writeLines("\n")
base::print(base::paste("Summary\u003A This analysis is based on ", nsnps, " SNP(s) and ", nstudies, " studies"))
base::print("***************** end of part 1\u003A assign snp and study numbers ****************")
base::writeLines("")
}
# Sort data.frame by snp then betas
m_sortby_betas <- dplyr::arrange(m, snp, beta)
if (verbose_output) { base::writeLines("Showing first six lines of dataset sorted by snp then by beta\u003A \n"); base::print(utils::head(m_sortby_betas)) }
# -----------------------------------
# Split dataset by snp to obtain mini-datasets for each snp, then call compute_fe and compute_re on each snp
list_snp_minidatasets <- base::split(m_sortby_betas, as.factor(m_sortby_betas$variant_names))
if (verbose_output) { base::writeLines("\nSplitting dataset by snp to obtain mini-datasets for each snp\u003A \n"); utils::str(list_snp_minidatasets) }
# Define function to compute inverse-variance weighted fixed-effect model
compute_fe <- function(dframe_current_snp) {
# Perform fixed-effect meta-analysis with the outlier studies present if any:
if (set_studyNOs_as_studyIDs) {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, slab = paste(formatC(dframe_current_snp[, "usta"], digits = 2, format = "f", flag = " "), sprintf(set_study_field_width, dframe_current_snp[, "study"]), sep = " "), method = "FE")
} else {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, slab = paste(formatC(dframe_current_snp[, "usta"], digits = 2, format = "f", flag = " "), dframe_current_snp[, "study_names"], sep = " "), method = "FE")
}
# ----------
# store results to return in a list
output <- base::list(metafor_results_outlier_present = metafor_res, current_snp_name = unique(dframe_current_snp[, "variant_names"]), current_snp_no = unique(dframe_current_snp[, "snp"]))
output
}
metafor_results_fe <- base::lapply(list_snp_minidatasets, compute_fe)
if (verbose_output) { base::writeLines("\nFixed-effect meta-analysis results\u003A \n"); base::print(metafor_results_fe) }
# Expect to get warnings on variants where some studies have NAs and that is ok.
# warnings()
# -----------------------------------
# Define function to compute inverse-variance weighted random effects model
compute_re <- function(dframe_current_snp) {
# Perform random effects meta-analyses with the outlier studies present if any:
# Run random effects meta-analysis (inverse variance weighted least sq regression) on current snp
if (tau2_method == "DL") {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, knha = TRUE, slab = sprintf("%02.0f", dframe_current_snp[, "study"]), method = tau2_method)
}
else {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, knha = TRUE, slab = sprintf("%02.0f", dframe_current_snp[, "study"]), method = tau2_method, control=list(stepadj=0.5, maxiter=10000))
}
# ----------
# store results to return in a list
output <- base::list(metafor_results_outlier_present = metafor_res, current_snp_name = unique(dframe_current_snp[, "variant_names"]), current_snp_no = unique(dframe_current_snp[, "snp"]))
output
}
metafor_results_re <- base::lapply(list_snp_minidatasets, compute_re)
if (verbose_output) { base::writeLines("\nRandom-effects meta-analysis results\u003A \n"); base::print(metafor_results_re) }
# Expect to get warnings on variants where some studies have NAs and that is ok.
# warnings()
if (verbose_output) base::print("***************** end of part 2: Conduct fixed and random-effects meta-analyses ****************")
# -----------------------------------
# Generating forest plots
generate_forestplot_outlier_present <- function(item_no) {
# Display structure of current mini-dataset
if (verbose_output) { base::writeLines("\nDisplaying dataset structure for current snp\u003A \n"); utils::str(list_snp_minidatasets[[item_no]]) }
# Set foresplot parameters
basic_forestplot_params <- metafor::forest(metafor_results_fe[[item_no]]$metafor_results_outlier_present)
grDevices::dev.off()
if(is.null(set_xlim)) { set_xlim <- basic_forestplot_params$xlim }
if(is.null(set_at)) { set_at <- basic_forestplot_params$at }
set_ilab.xpos <- set_xlim[1] - (0.36 * set_xlim[1])
if(is.null(set_ylim)) { set_ylim <- basic_forestplot_params$ylim }
# Display forestplot parameters
if (verbose_output) {
base::writeLines("Forestplot params\u003A \n");
base::print("set_ylim\u003A "); base::print(set_ylim)
base::print("set_xlim\u003A "); base::print(set_xlim)
base::print("set_at\u003A "); base::print(set_at)
base::print("rows\u003A "); base::print(basic_forestplot_params$rows)
}
# --------------------------------------
# set margins
opar <- graphics::par(no.readonly =TRUE)
base::on.exit(graphics::par(opar))
op <- graphics::par(cex=set_cex, mar=c(4,4,1,2))
# Generate plot
forestplot_name_oulier_present <- base::paste0("forestplot_fixed_effect_sortedby_betas_variant_name_", (metafor_results_fe[[item_no]])$current_snp_name, "_snp_no_", (metafor_results_fe[[item_no]])$current_snp_no, ".tif")
if (save_plot) { grDevices::tiff(forestplot_name_oulier_present, width = 17.35, height = 23.35, units = "cm", res = 600, compression = "lzw", pointsize = 14) }
metafor::forest(metafor_results_fe[[item_no]]$metafor_results_outlier_present, xlim=set_xlim, at=set_at, cex=0.64, xlab = "log odds ratios", addfit=TRUE, mlab = "Summary effect")
# Adding labels
graphics::text(set_xlim[2], set_ylim[2] - adjust_labels, "Effect-size [95% CI]", pos=2, cex=set_cex)
graphics::text(0.0, set_ylim[2] - adjust_labels, (metafor_results_fe[[item_no]])$current_snp_name, cex=set_cex)
graphics::text(set_xlim[1], set_ylim[2] - adjust_labels, "SPRE Study", pos=4, cex=set_cex)
#graphics::text(set_xlim[1] - (0.37 * set_xlim[1]), set_ylim[2] - 1, "SPRE", pos=2, cex=0.66)
plotrix::ablineclip(v = (metafor_results_fe[[item_no]]$metafor_results_outlier_present)$b[1], y1 = -1, y2 = set_ylim[2] - 2, lty="twodash", col = "grey0")
# --------------------------------------
# add colour gradient
wi <- 1/base::sqrt((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$vi)
psize <- wi/base::sum(wi)
psize <- (psize - base::min(psize)) / (base::max(psize) - base::min(psize))
psize <- (psize * 1.0) + 0.5
df_usta_color <- base::data.frame("usta" = list_snp_minidatasets[[item_no]][, "usta"])
# ---------
if (spre_colour_palette[1] == "mono_colour") {
df_usta_color$color <- spre_colour_palette[2]
} else if (spre_colour_palette[1] == "dual_colour") {
df_usta_color$color <- spre_colour_palette[2]
df_usta_color[, "color"][df_usta_color$usta < 0] <- spre_colour_palette[3]
} else if (spre_colour_palette[1] == "multi_colour") {
colorspace_hcl_hsv_palettes <- c("rainbow_hcl", "diverge_hcl", "terrain_hcl", "sequential_hcl", "diverge_hsv")
gr_devices_palettes <- c("rainbow", "cm.colors", "topo.colors", "terrain.colors", "heat.colors")
color_ramps_palettes <- c("matlab.like", "matlab.like2", "magenta2green", "cyan2yellow", "blue2yellow", "green2red", "blue2green", "blue2red")
# colorspace_hcl_hsv_palettes: rainbow_hcl
if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[1]) {
df_usta_color$color <- colorspace::rainbow_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: diverge_hcl
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[2]) {
df_usta_color$color <- colorspace::diverge_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: terrain_hcl
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[3]) {
df_usta_color$color <- colorspace::terrain_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: sequential_hcl
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[4]) {
df_usta_color$color <- colorspace::sequential_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: diverge_hsv
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[5]) {
df_usta_color$color <- colorspace::diverge_hsv(nrow(df_usta_color))
}
# gr_devices_palette: rainbow
if (spre_colour_palette[2] == gr_devices_palettes[1]) {
#df_usta_color$color <- grDevices::rainbow(nrow(df_usta_color))
df_usta_color$color <- grDevices::rainbow(nrow(df_usta_color), start = 0, end = 5/6)
# gr_devices_palette: cm.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[2]) {
df_usta_color$color <- grDevices::cm.colors(nrow(df_usta_color))
# gr_devices_palette: topo.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[3]) {
df_usta_color$color <- grDevices::topo.colors(nrow(df_usta_color))
# gr_devices_palette: terrain.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[4]) {
df_usta_color$color <- grDevices::terrain.colors(nrow(df_usta_color))
# gr_devices_palette: heat.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[5]) {
df_usta_color$color <- grDevices::heat.colors(nrow(df_usta_color))
}
# color_ramps_palettes: matlab.like
if (spre_colour_palette[2] == color_ramps_palettes[1]) {
df_usta_color$color <- colorRamps::matlab.like(nrow(df_usta_color))
# color_ramps_palettes: matlab.like2
} else if (spre_colour_palette[2] == color_ramps_palettes[2]) {
df_usta_color$color <- colorRamps::matlab.like2(nrow(df_usta_color))
# color_ramps_palettes: magenta2green
} else if (spre_colour_palette[2] == color_ramps_palettes[3]) {
df_usta_color$color <- colorRamps::magenta2green(nrow(df_usta_color))
# color_ramps_palettes: cyan2yellow
} else if (spre_colour_palette[2] == color_ramps_palettes[4]) {
df_usta_color$color <- colorRamps::cyan2yellow(nrow(df_usta_color))
# color_ramps_palettes: blue2yellow
} else if (spre_colour_palette[2] == color_ramps_palettes[5]) {
df_usta_color$color <- colorRamps::blue2yellow(nrow(df_usta_color))
# color_ramps_palettes: green2red
} else if (spre_colour_palette[2] == color_ramps_palettes[6]) {
df_usta_color$color <- colorRamps::green2red(nrow(df_usta_color))
# color_ramps_palettes: blue2green
} else if (spre_colour_palette[2] == color_ramps_palettes[7]) {
df_usta_color$color <- colorRamps::blue2green(nrow(df_usta_color))
# color_ramps_palettes: blue2red
} else if (spre_colour_palette[2] == color_ramps_palettes[8]) {
df_usta_color$color <- colorRamps::blue2red(nrow(df_usta_color))
}
# color_brewer_palettes
if (spre_colour_palette[2] == "color_brewer_palettes") {
# Example of spre_colour_palette vector for color_brewer_palettes: spre_colour_palette = c("multi_colour", "color_brewer_palettes", 9, "BuPu")
df_usta_color$color <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(as.numeric(spre_colour_palette[3]),spre_colour_palette[4]))(nrow(df_usta_color))
}
} # end of: else if (spre_colour_palette[1] == "multi_colour")
# ---------
graphics::points((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$yi, length((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$yi):1, pch=15, col=df_usta_color[, "color"], cex=psize)
graphics::par(op)
if (save_plot) grDevices::dev.off()
# --------------------------------------
} # end of: generate_forestplot_outlier_present
# Call function: generate_forestplot_outlier_present
# Generate a sequence of integers for the number of loci (lead variants)
loci <- seq(unique(m$variant_names))
base::writeLines("\nGenerating forest plots\u003A \u002E\u002E\u002E \n")
lapply(loci, generate_forestplot_outlier_present)
base::writeLines("Done\u002E ")
m_sortby_betas <- dplyr::rename(m_sortby_betas, spre = usta, study_numbers = study, variant_numbers = snp)
if (verbose_output) base::print("***************** end of part 3: Generate forest plots ****************")
# -----------------------------------
# List of items to return
base::list(number_variants = nsnps,
number_studies = nstudies,
fixed_effect_results = metafor_results_fe,
random_effects_results = metafor_results_re,
spre_forestplot_dataset = m_sortby_betas)
# ---------------------------
} # end of: generate_spre_forestplot
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.