Nothing
# Author: Lerato E. Magosi
# R version: 3.1.0 (2014-04-10)
# Platform: x86_64-apple-darwin10.8.0 (64-bit)
# Date: 20Jul2018
# Acknowledgements: Wolfgang Viechtbauer
# Goal: Generate forestplots that show SPRE statistics to highlight overly influential
# outlier studies with the potential to inflate average genetic effects in genetic
# association meta-analyses.
# Required libraries ---------------------------
# metafor # for performing fixed and random-effects meta-analysis
# utils # needed for the following functions: str, head, tail
# dplyr # for sorting dataframes
# plotrix # for adding lines of predetermined length to plots
# colorspace # colorspace, RColorBrewer and colorRamps are for loading colour palettes
# RColorBrewer
# colorRamps
# Calling globalVariables on the following variables to address
# the note: "no visible binding for global variable" generated by "R CMD check"
utils::globalVariables(c("snp", "study", "str"))
# Function: generate_spre_forestplot
#
#
# parameters:
#
#
# beta_in (numeric) vector of effect-size estimates,
# se_in (numeric) vector of standard errors genomically corrected at study-level,
# study_names_in (character) vector of study names,
# variant_names_in (character) vector of variant names,
# tau2_method (character) method to estimate heterogeneity: either "DL" or "REML",
# spres_in (numeric) vector of SPRE statistics,
# spre_colour_palette (character) vector specifying the colour palette for observed study effects,
# set_studyNOs_as_studyIDs (boolean) specifying whether study numbers should be used as study IDs,
# set_study_field_width (character) vector of format strings akin to the fmt character vector in the sprintf function,
# set_cex (character) scalar and symbol expansion factor indicating scaling for text and symbols,
# set_xlim (numeric) vector of length 2 indicating the horizontal limits of the plot region,
# set_ylim (numeric) vector of length 2 indicating the y-axis limits of the plot,
# set_at (numeric) vector indicating position of the x-axis tick marks and corresponding labels,
# adjust_labels (numeric) scalar value that tweaks label positions,
# save_plot (boolean) specifying that the forestplot should be saved as a tiff file
#
#
# returns: a list containing:
#
#
# number_variants (numeric),
# number_studies (numeric),
# fixed_effect_results (list),
# random_effects_results (list),
# spre_forestplot_dataset (dataframe)
#
#
# ------------------------------------------------------------------------------------
generate_spre_forestplot <- function(beta_in, se_in, study_names_in, variant_names_in,
spres_in, spre_colour_palette = c("mono_colour", "black"),
set_studyNOs_as_studyIDs = FALSE, set_study_field_width = "%02.0f",
set_cex = 0.66, set_xlim, set_ylim, set_at, tau2_method = "DL",
adjust_labels = 1, save_plot = FALSE,
verbose_output = FALSE, ...) {
if (missing(set_xlim)) { set_xlim <- NULL }
if (missing(set_at)) { set_at <- NULL }
if (missing(set_ylim)) { set_ylim <- NULL }
# Assemble dataset
beta <- base::as.numeric(beta_in)
se <- base::as.numeric(se_in)
study_names <- base::factor(study_names_in)
variant_names <- base::factor(variant_names_in)
usta <- base::as.numeric(spres_in)
m <- base::data.frame(beta, se, variant_names, study_names, usta)
# View dataset structure
if (verbose_output) { base::writeLines("\nShowing dataset structure\u003A \n"); utils::str(m) }
# View first six lines of dataset
if (verbose_output) { base::writeLines("\nShowing first six lines of dataset\u003A \n"); base::print(utils::head(m)) }
# ---------------------------
# Assign study numbers
m$study <- m$study_names
base::levels(m$study) <- base::seq_along(base::levels(m$study))
# Calculate no. of studies
nstudies <- base::nlevels(m$study_names)
# Assign snp/variant numbers
m$snp <- m$variant_names
base::levels(m$snp) <- base::seq_along(base::levels(m$snp))
# Calculate no. of variants
nsnps <- base::nlevels(m$variant_names)
# Print numbers of studies and snps
if (verbose_output) {
base::writeLines("\n")
base::print(base::paste("Summary\u003A This analysis is based on ", nsnps, " SNP(s) and ", nstudies, " studies"))
base::print("***************** end of part 1\u003A assign snp and study numbers ****************")
base::writeLines("")
}
# Sort data.frame by snp then betas
m_sortby_betas <- dplyr::arrange(m, snp, beta)
if (verbose_output) { base::writeLines("Showing first six lines of dataset sorted by snp then by beta\u003A \n"); base::print(utils::head(m_sortby_betas)) }
# -----------------------------------
# Split dataset by snp to obtain mini-datasets for each snp, then call compute_fe and compute_re on each snp
list_snp_minidatasets <- base::split(m_sortby_betas, as.factor(m_sortby_betas$variant_names))
if (verbose_output) { base::writeLines("\nSplitting dataset by snp to obtain mini-datasets for each snp\u003A \n"); utils::str(list_snp_minidatasets) }
# Define function to compute inverse-variance weighted fixed-effect model
compute_fe <- function(dframe_current_snp) {
# Perform fixed-effect meta-analysis with the outlier studies present if any:
if (set_studyNOs_as_studyIDs) {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, slab = paste(formatC(dframe_current_snp[, "usta"], digits = 2, format = "f", flag = " "), sprintf(set_study_field_width, dframe_current_snp[, "study"]), sep = " "), method = "FE")
} else {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, slab = paste(formatC(dframe_current_snp[, "usta"], digits = 2, format = "f", flag = " "), dframe_current_snp[, "study_names"], sep = " "), method = "FE")
}
# ----------
# store results to return in a list
output <- base::list(metafor_results_outlier_present = metafor_res, current_snp_name = unique(dframe_current_snp[, "variant_names"]), current_snp_no = unique(dframe_current_snp[, "snp"]))
output
}
metafor_results_fe <- base::lapply(list_snp_minidatasets, compute_fe)
if (verbose_output) { base::writeLines("\nFixed-effect meta-analysis results\u003A \n"); base::print(metafor_results_fe) }
# Expect to get warnings on variants where some studies have NAs and that is ok.
# warnings()
# -----------------------------------
# Define function to compute inverse-variance weighted random effects model
compute_re <- function(dframe_current_snp) {
# Perform random effects meta-analyses with the outlier studies present if any:
# Run random effects meta-analysis (inverse variance weighted least sq regression) on current snp
if (tau2_method == "DL") {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, knha = TRUE, slab = sprintf("%02.0f", dframe_current_snp[, "study"]), method = tau2_method)
}
else {
metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, knha = TRUE, slab = sprintf("%02.0f", dframe_current_snp[, "study"]), method = tau2_method, control=list(stepadj=0.5, maxiter=10000))
}
# ----------
# store results to return in a list
output <- base::list(metafor_results_outlier_present = metafor_res, current_snp_name = unique(dframe_current_snp[, "variant_names"]), current_snp_no = unique(dframe_current_snp[, "snp"]))
output
}
metafor_results_re <- base::lapply(list_snp_minidatasets, compute_re)
if (verbose_output) { base::writeLines("\nRandom-effects meta-analysis results\u003A \n"); base::print(metafor_results_re) }
# Expect to get warnings on variants where some studies have NAs and that is ok.
# warnings()
if (verbose_output) base::print("***************** end of part 2: Conduct fixed and random-effects meta-analyses ****************")
# -----------------------------------
# Generating forest plots
generate_forestplot_outlier_present <- function(item_no) {
# Display structure of current mini-dataset
if (verbose_output) { base::writeLines("\nDisplaying dataset structure for current snp\u003A \n"); utils::str(list_snp_minidatasets[[item_no]]) }
# Set foresplot parameters
basic_forestplot_params <- metafor::forest(metafor_results_fe[[item_no]]$metafor_results_outlier_present)
grDevices::dev.off()
if(is.null(set_xlim)) { set_xlim <- basic_forestplot_params$xlim }
if(is.null(set_at)) { set_at <- basic_forestplot_params$at }
set_ilab.xpos <- set_xlim[1] - (0.36 * set_xlim[1])
if(is.null(set_ylim)) { set_ylim <- basic_forestplot_params$ylim }
# Display forestplot parameters
if (verbose_output) {
base::writeLines("Forestplot params\u003A \n");
base::print("set_ylim\u003A "); base::print(set_ylim)
base::print("set_xlim\u003A "); base::print(set_xlim)
base::print("set_at\u003A "); base::print(set_at)
base::print("rows\u003A "); base::print(basic_forestplot_params$rows)
}
# --------------------------------------
# set margins
opar <- graphics::par(no.readonly =TRUE)
base::on.exit(graphics::par(opar))
op <- graphics::par(cex=set_cex, mar=c(4,4,1,2))
# Generate plot
forestplot_name_oulier_present <- base::paste0("forestplot_fixed_effect_sortedby_betas_variant_name_", (metafor_results_fe[[item_no]])$current_snp_name, "_snp_no_", (metafor_results_fe[[item_no]])$current_snp_no, ".tif")
if (save_plot) { grDevices::tiff(forestplot_name_oulier_present, width = 17.35, height = 23.35, units = "cm", res = 600, compression = "lzw", pointsize = 14) }
metafor::forest(metafor_results_fe[[item_no]]$metafor_results_outlier_present, xlim=set_xlim, at=set_at, cex=0.64, xlab = "log odds ratios", addfit=TRUE, mlab = "Summary effect")
# Adding labels
graphics::text(set_xlim[2], set_ylim[2] - adjust_labels, "Effect-size [95% CI]", pos=2, cex=set_cex)
graphics::text(0.0, set_ylim[2] - adjust_labels, (metafor_results_fe[[item_no]])$current_snp_name, cex=set_cex)
graphics::text(set_xlim[1], set_ylim[2] - adjust_labels, "SPRE Study", pos=4, cex=set_cex)
#graphics::text(set_xlim[1] - (0.37 * set_xlim[1]), set_ylim[2] - 1, "SPRE", pos=2, cex=0.66)
plotrix::ablineclip(v = (metafor_results_fe[[item_no]]$metafor_results_outlier_present)$b[1], y1 = -1, y2 = set_ylim[2] - 2, lty="twodash", col = "grey0")
# --------------------------------------
# add colour gradient
wi <- 1/base::sqrt((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$vi)
psize <- wi/base::sum(wi)
psize <- (psize - base::min(psize)) / (base::max(psize) - base::min(psize))
psize <- (psize * 1.0) + 0.5
df_usta_color <- base::data.frame("usta" = list_snp_minidatasets[[item_no]][, "usta"])
# ---------
if (spre_colour_palette[1] == "mono_colour") {
df_usta_color$color <- spre_colour_palette[2]
} else if (spre_colour_palette[1] == "dual_colour") {
df_usta_color$color <- spre_colour_palette[2]
df_usta_color[, "color"][df_usta_color$usta < 0] <- spre_colour_palette[3]
} else if (spre_colour_palette[1] == "multi_colour") {
colorspace_hcl_hsv_palettes <- c("rainbow_hcl", "diverge_hcl", "terrain_hcl", "sequential_hcl", "diverge_hsv")
gr_devices_palettes <- c("rainbow", "cm.colors", "topo.colors", "terrain.colors", "heat.colors")
color_ramps_palettes <- c("matlab.like", "matlab.like2", "magenta2green", "cyan2yellow", "blue2yellow", "green2red", "blue2green", "blue2red")
# colorspace_hcl_hsv_palettes: rainbow_hcl
if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[1]) {
df_usta_color$color <- colorspace::rainbow_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: diverge_hcl
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[2]) {
df_usta_color$color <- colorspace::diverge_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: terrain_hcl
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[3]) {
df_usta_color$color <- colorspace::terrain_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: sequential_hcl
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[4]) {
df_usta_color$color <- colorspace::sequential_hcl(nrow(df_usta_color))
# colorspace_hcl_hsv_palettes: diverge_hsv
} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[5]) {
df_usta_color$color <- colorspace::diverge_hsv(nrow(df_usta_color))
}
# gr_devices_palette: rainbow
if (spre_colour_palette[2] == gr_devices_palettes[1]) {
#df_usta_color$color <- grDevices::rainbow(nrow(df_usta_color))
df_usta_color$color <- grDevices::rainbow(nrow(df_usta_color), start = 0, end = 5/6)
# gr_devices_palette: cm.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[2]) {
df_usta_color$color <- grDevices::cm.colors(nrow(df_usta_color))
# gr_devices_palette: topo.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[3]) {
df_usta_color$color <- grDevices::topo.colors(nrow(df_usta_color))
# gr_devices_palette: terrain.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[4]) {
df_usta_color$color <- grDevices::terrain.colors(nrow(df_usta_color))
# gr_devices_palette: heat.colors
} else if (spre_colour_palette[2] == gr_devices_palettes[5]) {
df_usta_color$color <- grDevices::heat.colors(nrow(df_usta_color))
}
# color_ramps_palettes: matlab.like
if (spre_colour_palette[2] == color_ramps_palettes[1]) {
df_usta_color$color <- colorRamps::matlab.like(nrow(df_usta_color))
# color_ramps_palettes: matlab.like2
} else if (spre_colour_palette[2] == color_ramps_palettes[2]) {
df_usta_color$color <- colorRamps::matlab.like2(nrow(df_usta_color))
# color_ramps_palettes: magenta2green
} else if (spre_colour_palette[2] == color_ramps_palettes[3]) {
df_usta_color$color <- colorRamps::magenta2green(nrow(df_usta_color))
# color_ramps_palettes: cyan2yellow
} else if (spre_colour_palette[2] == color_ramps_palettes[4]) {
df_usta_color$color <- colorRamps::cyan2yellow(nrow(df_usta_color))
# color_ramps_palettes: blue2yellow
} else if (spre_colour_palette[2] == color_ramps_palettes[5]) {
df_usta_color$color <- colorRamps::blue2yellow(nrow(df_usta_color))
# color_ramps_palettes: green2red
} else if (spre_colour_palette[2] == color_ramps_palettes[6]) {
df_usta_color$color <- colorRamps::green2red(nrow(df_usta_color))
# color_ramps_palettes: blue2green
} else if (spre_colour_palette[2] == color_ramps_palettes[7]) {
df_usta_color$color <- colorRamps::blue2green(nrow(df_usta_color))
# color_ramps_palettes: blue2red
} else if (spre_colour_palette[2] == color_ramps_palettes[8]) {
df_usta_color$color <- colorRamps::blue2red(nrow(df_usta_color))
}
# color_brewer_palettes
if (spre_colour_palette[2] == "color_brewer_palettes") {
# Example of spre_colour_palette vector for color_brewer_palettes: spre_colour_palette = c("multi_colour", "color_brewer_palettes", 9, "BuPu")
df_usta_color$color <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(as.numeric(spre_colour_palette[3]),spre_colour_palette[4]))(nrow(df_usta_color))
}
} # end of: else if (spre_colour_palette[1] == "multi_colour")
# ---------
graphics::points((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$yi, length((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$yi):1, pch=15, col=df_usta_color[, "color"], cex=psize)
graphics::par(op)
if (save_plot) grDevices::dev.off()
# --------------------------------------
} # end of: generate_forestplot_outlier_present
# Call function: generate_forestplot_outlier_present
# Generate a sequence of integers for the number of loci (lead variants)
loci <- seq(unique(m$variant_names))
base::writeLines("\nGenerating forest plots\u003A \u002E\u002E\u002E \n")
lapply(loci, generate_forestplot_outlier_present)
base::writeLines("Done\u002E ")
m_sortby_betas <- dplyr::rename(m_sortby_betas, spre = usta, study_numbers = study, variant_numbers = snp)
if (verbose_output) base::print("***************** end of part 3: Generate forest plots ****************")
# -----------------------------------
# List of items to return
base::list(number_variants = nsnps,
number_studies = nstudies,
fixed_effect_results = metafor_results_fe,
random_effects_results = metafor_results_re,
spre_forestplot_dataset = m_sortby_betas)
# ---------------------------
} # end of: generate_spre_forestplot
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.