R/generate_forestplot_incl_spres.R
In getspres: SPRE Statistics for Exploring Heterogeneity in Meta-Analysis

# Author:    Lerato E. Magosi
# R version: 3.1.0 (2014-04-10)
# Platform:  x86_64-apple-darwin10.8.0 (64-bit)
# Date:      20Jul2018

# Acknowledgements: Wolfgang Viechtbauer


# Goal: Generate forestplots that show SPRE statistics to highlight overly influential
#       outlier studies with the potential to inflate average genetic effects in genetic 
#       association meta-analyses.



# Required libraries ---------------------------
# metafor            # for performing fixed and random-effects meta-analysis
# utils              # needed for the following functions: str, head, tail
# dplyr              # for sorting dataframes
# plotrix            # for adding lines of predetermined length to plots
# colorspace         # colorspace, RColorBrewer and colorRamps are for loading colour palettes
# RColorBrewer
# colorRamps


# Calling globalVariables on the following variables to address 
# the note: "no visible binding for global variable" generated by "R CMD check"
utils::globalVariables(c("snp", "study", "str"))


# Function: generate_spre_forestplot 
#
#
# parameters: 
#
#
#   beta_in                   (numeric)   vector of effect-size estimates, 
#   se_in                     (numeric)   vector of standard errors genomically corrected at study-level, 
#   study_names_in            (character) vector of study names, 
#   variant_names_in          (character) vector of variant names,
#   tau2_method               (character) method to estimate heterogeneity: either "DL" or "REML",
#   spres_in                  (numeric)   vector of SPRE statistics,
#   spre_colour_palette       (character) vector specifying the colour palette for observed study effects,
#   set_studyNOs_as_studyIDs  (boolean)   specifying whether study numbers should be used as study IDs,
#   set_study_field_width     (character) vector of format strings akin to the fmt character vector in the sprintf function,
#   set_cex                   (character) scalar and symbol expansion factor indicating scaling for text and symbols,
#   set_xlim                  (numeric)   vector of length 2 indicating the horizontal limits of the plot region,
#   set_ylim                  (numeric)   vector of length 2 indicating the y-axis limits of the plot,
#   set_at                    (numeric)   vector indicating position of the x-axis tick marks and corresponding labels,
#   adjust_labels             (numeric)   scalar value that tweaks label positions,
#   save_plot                 (boolean)   specifying that the forestplot should be saved as a tiff file
#
#
# returns: a list containing:
#
#
#   number_variants            (numeric),
#   number_studies             (numeric),
#   fixed_effect_results       (list),
#   random_effects_results     (list),           
# 	spre_forestplot_dataset    (dataframe)
#
#
# ------------------------------------------------------------------------------------



generate_spre_forestplot <- function(beta_in, se_in, study_names_in, variant_names_in,
                                     spres_in, spre_colour_palette = c("mono_colour", "black"), 
                                     set_studyNOs_as_studyIDs = FALSE, set_study_field_width = "%02.0f",
                                     set_cex = 0.66, set_xlim, set_ylim, set_at, tau2_method = "DL", 
                                     adjust_labels = 1, save_plot = FALSE, 
                                     verbose_output = FALSE, ...) {



	if (missing(set_xlim)) { set_xlim <- NULL }
	
	if (missing(set_at)) { set_at <- NULL }
	
	if (missing(set_ylim)) { set_ylim <- NULL }
	
	
	
	# Assemble dataset

	beta <- base::as.numeric(beta_in)
	se <- base::as.numeric(se_in)
	study_names <- base::factor(study_names_in)
	variant_names <- base::factor(variant_names_in)
	usta <- base::as.numeric(spres_in)
	
	m <- base::data.frame(beta, se, variant_names, study_names, usta)	
		


    # View dataset structure
    if (verbose_output) { base::writeLines("\nShowing dataset structure\u003A \n"); utils::str(m) }
    
    # View first six lines of dataset
    if (verbose_output) { base::writeLines("\nShowing first six lines of dataset\u003A \n"); base::print(utils::head(m)) }

    # ---------------------------
    
    
    # Assign study numbers
	m$study <- m$study_names
	base::levels(m$study) <- base::seq_along(base::levels(m$study))

    # Calculate no. of studies
    nstudies <- base::nlevels(m$study_names)
    
    # Assign snp/variant numbers
    m$snp <- m$variant_names
    base::levels(m$snp) <- base::seq_along(base::levels(m$snp))

    # Calculate no. of variants
    nsnps <- base::nlevels(m$variant_names)

    # Print numbers of studies and snps
	if (verbose_output) {

		base::writeLines("\n")
		base::print(base::paste("Summary\u003A This analysis is based on ", nsnps, " SNP(s) and ", nstudies, " studies"))
		base::print("***************** end of part 1\u003A assign snp and study numbers	 ****************")
		base::writeLines("")
		
		}

	# Sort data.frame by snp then betas
	m_sortby_betas <- dplyr::arrange(m, snp, beta)
    if (verbose_output) { base::writeLines("Showing first six lines of dataset sorted by snp then by beta\u003A \n"); base::print(utils::head(m_sortby_betas)) }

	# -----------------------------------



	# Split dataset by snp to obtain mini-datasets for each snp, then call compute_fe and compute_re on each snp
	list_snp_minidatasets <- base::split(m_sortby_betas, as.factor(m_sortby_betas$variant_names))
	if (verbose_output) { base::writeLines("\nSplitting dataset by snp to obtain mini-datasets for each snp\u003A \n"); utils::str(list_snp_minidatasets) }
	


	# Define function to compute inverse-variance weighted fixed-effect model
	compute_fe <- function(dframe_current_snp) {


		# Perform fixed-effect meta-analysis with the outlier studies present if any: 

		if (set_studyNOs_as_studyIDs) {
			
				metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, slab = paste(formatC(dframe_current_snp[, "usta"], digits = 2, format = "f", flag = " "), sprintf(set_study_field_width, dframe_current_snp[, "study"]), sep = "    "), method = "FE")
			
		
		} else {
		
				metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, slab = paste(formatC(dframe_current_snp[, "usta"], digits = 2, format = "f", flag = " "), dframe_current_snp[, "study_names"], sep = "    "), method = "FE")
			
		
		}
		# ----------
	
		
		# store results to return in a list
		output <- base::list(metafor_results_outlier_present = metafor_res, current_snp_name = unique(dframe_current_snp[, "variant_names"]), current_snp_no = unique(dframe_current_snp[, "snp"]))
	
		output
	
		}


	metafor_results_fe <- base::lapply(list_snp_minidatasets, compute_fe)

	if (verbose_output) { base::writeLines("\nFixed-effect meta-analysis results\u003A \n"); base::print(metafor_results_fe) }

	# Expect to get warnings on variants where some studies have NAs and that is ok.
	# warnings()


	# -----------------------------------



	# Define function to compute inverse-variance weighted random effects model
	compute_re <- function(dframe_current_snp) {


		# Perform random effects meta-analyses with the outlier studies present if any: 

        # Run random effects meta-analysis (inverse variance weighted least sq regression) on current snp
        
        if (tau2_method == "DL") {

			metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, knha = TRUE, slab = sprintf("%02.0f", dframe_current_snp[, "study"]), method = tau2_method)        
        } 
        else {
        
			metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = TRUE, knha = TRUE, slab = sprintf("%02.0f", dframe_current_snp[, "study"]), method = tau2_method, control=list(stepadj=0.5, maxiter=10000))        
        }
        


		# ----------
	

		# store results to return in a list
		output <- base::list(metafor_results_outlier_present = metafor_res, current_snp_name = unique(dframe_current_snp[, "variant_names"]), current_snp_no = unique(dframe_current_snp[, "snp"]))
	
		output
	
		}


	metafor_results_re <- base::lapply(list_snp_minidatasets, compute_re)

	if (verbose_output) { base::writeLines("\nRandom-effects meta-analysis results\u003A \n"); base::print(metafor_results_re) }

	# Expect to get warnings on variants where some studies have NAs and that is ok.
	# warnings()


	if (verbose_output) base::print("***************** end of part 2: Conduct fixed and random-effects meta-analyses	 ****************")

	# -----------------------------------


	# Generating forest plots 


	generate_forestplot_outlier_present <- function(item_no) {


		# Display structure of current mini-dataset
		if (verbose_output) { base::writeLines("\nDisplaying dataset structure for current snp\u003A \n"); utils::str(list_snp_minidatasets[[item_no]]) }

		# Set foresplot parameters
		basic_forestplot_params <- metafor::forest(metafor_results_fe[[item_no]]$metafor_results_outlier_present)
	    grDevices::dev.off()
	    
		if(is.null(set_xlim)) { set_xlim <- basic_forestplot_params$xlim }
				
		if(is.null(set_at)) { set_at <- basic_forestplot_params$at }
		
		set_ilab.xpos <- set_xlim[1] - (0.36 * set_xlim[1])
		
		if(is.null(set_ylim)) { set_ylim <- basic_forestplot_params$ylim }

        # Display forestplot parameters
	    if (verbose_output) {
		
			base::writeLines("Forestplot params\u003A \n"); 
			base::print("set_ylim\u003A "); base::print(set_ylim)
			base::print("set_xlim\u003A "); base::print(set_xlim)
			base::print("set_at\u003A "); base::print(set_at)
			base::print("rows\u003A "); base::print(basic_forestplot_params$rows)
			
			} 



		# --------------------------------------

		# set margins
		opar <- graphics::par(no.readonly =TRUE)
		base::on.exit(graphics::par(opar))
		 
		op <- graphics::par(cex=set_cex, mar=c(4,4,1,2))
	

		# Generate plot
		forestplot_name_oulier_present <- base::paste0("forestplot_fixed_effect_sortedby_betas_variant_name_", (metafor_results_fe[[item_no]])$current_snp_name, "_snp_no_", (metafor_results_fe[[item_no]])$current_snp_no, ".tif")
		if (save_plot) { grDevices::tiff(forestplot_name_oulier_present, width = 17.35, height = 23.35, units = "cm", res = 600, compression = "lzw", pointsize = 14) }

		metafor::forest(metafor_results_fe[[item_no]]$metafor_results_outlier_present, xlim=set_xlim, at=set_at, cex=0.64, xlab = "log odds ratios", addfit=TRUE, mlab = "Summary effect")

		# Adding labels		
		graphics::text(set_xlim[2], set_ylim[2] - adjust_labels, "Effect-size [95% CI]", pos=2, cex=set_cex)
		graphics::text(0.0, set_ylim[2] - adjust_labels, (metafor_results_fe[[item_no]])$current_snp_name, cex=set_cex)
		graphics::text(set_xlim[1], set_ylim[2] - adjust_labels, "SPRE  Study", pos=4, cex=set_cex)			
		#graphics::text(set_xlim[1] - (0.37 * set_xlim[1]), set_ylim[2] - 1, "SPRE", pos=2, cex=0.66)
		plotrix::ablineclip(v = (metafor_results_fe[[item_no]]$metafor_results_outlier_present)$b[1], y1 = -1, y2 = set_ylim[2] - 2, lty="twodash", col = "grey0")			
		# --------------------------------------


		# add colour gradient
		wi <- 1/base::sqrt((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$vi)
		psize <- wi/base::sum(wi)
		psize <- (psize - base::min(psize)) / (base::max(psize) - base::min(psize))
		psize <- (psize * 1.0) + 0.5

		df_usta_color <- base::data.frame("usta" = list_snp_minidatasets[[item_no]][, "usta"])
	
		# ---------
	

		if (spre_colour_palette[1] == "mono_colour") {
		
			df_usta_color$color <- spre_colour_palette[2]
			
			
		} else if (spre_colour_palette[1] == "dual_colour") {
		
			df_usta_color$color <- spre_colour_palette[2]
			
			df_usta_color[, "color"][df_usta_color$usta < 0] <- spre_colour_palette[3]
			
		
		} else if (spre_colour_palette[1] == "multi_colour") {

			colorspace_hcl_hsv_palettes <- c("rainbow_hcl", "diverge_hcl", "terrain_hcl", "sequential_hcl", "diverge_hsv")

			gr_devices_palettes <- c("rainbow", "cm.colors", "topo.colors",  "terrain.colors", "heat.colors")
			
			color_ramps_palettes <- c("matlab.like", "matlab.like2", "magenta2green", "cyan2yellow", "blue2yellow", "green2red", "blue2green", "blue2red")


			
			# colorspace_hcl_hsv_palettes: rainbow_hcl
			if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[1]) {
				df_usta_color$color <- colorspace::rainbow_hcl(nrow(df_usta_color))
	
				# colorspace_hcl_hsv_palettes: diverge_hcl
				} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[2]) {		    	
					df_usta_color$color <- colorspace::diverge_hcl(nrow(df_usta_color))
		
					# colorspace_hcl_hsv_palettes: terrain_hcl
					} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[3]) {
						df_usta_color$color <- colorspace::terrain_hcl(nrow(df_usta_color))
			
						# colorspace_hcl_hsv_palettes: sequential_hcl
						} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[4]) {		    			
							df_usta_color$color <- colorspace::sequential_hcl(nrow(df_usta_color))
				
							# colorspace_hcl_hsv_palettes: diverge_hsv
							} else if (spre_colour_palette[2] == colorspace_hcl_hsv_palettes[5]) {
								df_usta_color$color <- colorspace::diverge_hsv(nrow(df_usta_color))
					
								}

			
			# gr_devices_palette: rainbow
			if (spre_colour_palette[2] == gr_devices_palettes[1]) {
				#df_usta_color$color <- grDevices::rainbow(nrow(df_usta_color))
				df_usta_color$color <- grDevices::rainbow(nrow(df_usta_color), start = 0, end = 5/6)
	
				# gr_devices_palette: cm.colors
				} else if (spre_colour_palette[2] == gr_devices_palettes[2]) {
					df_usta_color$color <- grDevices::cm.colors(nrow(df_usta_color))
		
					# gr_devices_palette: topo.colors
					} else if (spre_colour_palette[2] == gr_devices_palettes[3]) {
						df_usta_color$color <- grDevices::topo.colors(nrow(df_usta_color))
			
						# gr_devices_palette: terrain.colors
						} else if (spre_colour_palette[2] == gr_devices_palettes[4]) {
							df_usta_color$color <- grDevices::terrain.colors(nrow(df_usta_color))
				
							# gr_devices_palette: heat.colors
							} else if (spre_colour_palette[2] == gr_devices_palettes[5]) {
								df_usta_color$color <- grDevices::heat.colors(nrow(df_usta_color))
					
								}

										
			# color_ramps_palettes: matlab.like					
			if (spre_colour_palette[2] == color_ramps_palettes[1]) {
				df_usta_color$color <- colorRamps::matlab.like(nrow(df_usta_color))
		
				# color_ramps_palettes: matlab.like2
				} else if (spre_colour_palette[2] == color_ramps_palettes[2]) {
					df_usta_color$color <- colorRamps::matlab.like2(nrow(df_usta_color))
			
					# color_ramps_palettes: magenta2green	
					} else if (spre_colour_palette[2] == color_ramps_palettes[3]) {
						df_usta_color$color <- colorRamps::magenta2green(nrow(df_usta_color))
				
						# color_ramps_palettes: cyan2yellow
						} else if (spre_colour_palette[2] == color_ramps_palettes[4]) {
							df_usta_color$color <- colorRamps::cyan2yellow(nrow(df_usta_color))
					
							# color_ramps_palettes: blue2yellow
							} else if (spre_colour_palette[2] == color_ramps_palettes[5]) {
								df_usta_color$color <- colorRamps::blue2yellow(nrow(df_usta_color))
						
								# color_ramps_palettes: green2red
								} else if (spre_colour_palette[2] == color_ramps_palettes[6]) {
									df_usta_color$color <- colorRamps::green2red(nrow(df_usta_color))
							
									# color_ramps_palettes: blue2green
									} else if (spre_colour_palette[2] == color_ramps_palettes[7]) {
										df_usta_color$color <- colorRamps::blue2green(nrow(df_usta_color))
								
										# color_ramps_palettes: blue2red
										} else if (spre_colour_palette[2] == color_ramps_palettes[8]) {
											df_usta_color$color <- colorRamps::blue2red(nrow(df_usta_color))
									
											}


			# color_brewer_palettes
			if (spre_colour_palette[2] == "color_brewer_palettes") {
				# Example of spre_colour_palette vector for color_brewer_palettes: spre_colour_palette = c("multi_colour", "color_brewer_palettes", 9, "BuPu")
				df_usta_color$color <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(as.numeric(spre_colour_palette[3]),spre_colour_palette[4]))(nrow(df_usta_color))
			
				}
		
		} # end of: else if (spre_colour_palette[1] == "multi_colour")

	
		# ---------	
	
		graphics::points((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$yi, length((metafor_results_fe[[item_no]]$metafor_results_outlier_present)$yi):1, pch=15, col=df_usta_color[, "color"], cex=psize)

		graphics::par(op)
	
		if (save_plot) grDevices::dev.off()


		# --------------------------------------

	} # end of: generate_forestplot_outlier_present


	# Call function: generate_forestplot_outlier_present

	# Generate a sequence of integers for the number of loci (lead variants)
	loci <- seq(unique(m$variant_names))
	
	base::writeLines("\nGenerating forest plots\u003A \u002E\u002E\u002E \n")
	lapply(loci, generate_forestplot_outlier_present)
	base::writeLines("Done\u002E ")
	
	m_sortby_betas <- dplyr::rename(m_sortby_betas, spre = usta, study_numbers = study, variant_numbers = snp)

	if (verbose_output) base::print("***************** end of part 3: Generate forest plots	 ****************")

	# -----------------------------------

	
	# List of items to return
	base::list(number_variants = nsnps,
	           number_studies = nstudies, 
	           fixed_effect_results = metafor_results_fe,
	           random_effects_results = metafor_results_re,
	           spre_forestplot_dataset = m_sortby_betas)


	# ---------------------------

} # end of: generate_spre_forestplot