knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(cowplot) library(ggthemes) # install.packages('ggthemes') library(viridis) library(grid) library(rmeta) library(CDBNgenomics) library(scales) theme_oeco <- theme_classic() + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.position = c(1, 0.9), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), #plot.margin = unit(c(0.35, 0, 0.25, 0), 'cm'), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.5, 'cm'))
Race_names <- c( `DurangoJalisco` = "Durango/Jalisco", `Mesoamerican` = "Mesoamerican", `Nueva Granada` = "Nueva Granada" ) Det_names <- c( `1` = "Determinate", `2` = "Indeterminate" ) hcl(h = c(15,135,255,75,195,315), c = 100, l = 65) # [1] "#F8766D" "#00BA38" "#619CFF" log10P <- expression(paste("-log"[10], plain(P))) getmode <- function(v) { uniqv <- unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } theme_set(theme_oeco)
mash_obj <- readRDS("../data-raw/Strong_Effects4000SNPs.rds") strong_eff <- readRDS("../data-raw/Pairwise_sharing_Strong_Effects_4000SNPs.rds") sign_eff <- readRDS("../data-raw/Pairwise_sharing_sign_Strong_Effects_4000SNPs.rds")
f1a_input <- mash_plot_sig_by_condition(mash_obj, saveoutput = TRUE) #f1a_in_thinned <- mash_thinned_vis_by_condition(mash_obj = mash_obj, saveoutput = TRUE) f1a_input[[1]] %>% ggplot(aes(x = Number_of_Conditions, y = Significant_SNPs)) + geom_point() + geom_line() + labs(x = "Phenotypes", y = "Significant SNPs") save_plot(filename = "mash_figure_panel_a.svg", last_plot(), base_aspect_ratio = 1.4, base_height = 1)
#' @title Thin significant mash markers to one per window. #' #' @description #' #' @param m An object of type mash #' @param cond A vector of conditions #' @param numcond Numeric. The number of conditions a SNP is significant in. #' Default is 0. #' @param saveoutput Logical. Save plot output to a file? Default is FALSE. #' @param window_bp Numeric. The window size in base pairs, within which to #' keep one significant SNP. #' @param thresh What is the threshold to call an effect significant? Default is #' 0.05. #' @param gtoeq Either '>=' or '==': greater-than-or-equal-to or equal to the #' number of conditions #' #' @return A tbl_df of Marker, Chr, Pos, the number of conditions with #' significant effects, and the minimum local false sign rate (combined #' across all conditions for plotting purposes using the Bonferroni method). #' #' @import dplyr #' @import tibble #' @import tidyr #' #' @note A final analysis should include LD pruning using a program like PLINK. #' However, this can give you a sense of your results for some window_bp #' equivalent to the normal distance at which LD decays. #' #' @export mash_thin_markers <- function(m, cond = NA, numcond = 0, window_bp = 20000, thresh = 0.05, gtoeq = c(">=", "==")){ numcond <- as.integer(numcond) thresh <- as.numeric(thresh) window_bp <- as.integer(window_bp) if(is.na(cond)[1]){ phe_group <- CDBNgenomics:::get_colnames(m = m) } else { # fix this phe_group <- cond } cond_sig_df <- CDBNgenomics:::get_n_significant_conditions(m, thresh = thresh, conditions = phe_group) %>% enframe(name = "Marker") %>% rename(Number_of_Conditions = value) if(gtoeq == ">="){ cond_sig_df <- cond_sig_df %>% filter(Number_of_Conditions >= numcond) } else if (gtoeq == "=="){ cond_sig_df <- cond_sig_df %>% filter(Number_of_Conditions == numcond) } else stop("Choose either '>=' or '==': greater-than-or-equal-to or equal to your number of conditions.") log10bf_df <- CDBNgenomics:::get_log10bf(m = m) %>% as.data.frame() %>% rownames_to_column(var = "value") %>% mutate(value = as.integer(value)) %>% as_tibble() %>% left_join(CDBNgenomics:::get_marker_df(m = m)) %>% rename(log10BayesFactor = V1) cond_sig_df <- cond_sig_df %>% separate(Marker, into = c("Chr", "Pos"), remove = FALSE, sep = 4) %>% mutate(Chr = as.numeric(str_sub(Chr,2,3)), Pos = as.numeric(Pos)) %>% left_join(log10bf_df, by = "Marker") minBFinbin <- cond_sig_df %>% arrange(Chr, Pos) %>% #gather(key = "Condition", value = "lfsr", -(1:4)) %>% #group_by(Marker) %>% #filter(lfsr != 0) %>% mutate(Posbin = ceiling(Pos / window_bp)) %>% filter(log10BayesFactor > -log10(thresh)) %>% group_by(Chr, Posbin) %>% slice(which.max(log10BayesFactor)) %>% ungroup() %>% mutate(Poslag = abs(lead(Pos) - Pos), bestBF = case_when( Poslag < window_bp & log10BayesFactor >= lead(log10BayesFactor) ~ "keep1", lag(Poslag) < window_bp & log10BayesFactor >= lag(log10BayesFactor) ~ "keep2", Poslag > window_bp & (lag(Poslag) > window_bp | is.na(lag(Poslag))) ~ "keep3" ) ) %>% filter(!is.na(bestBF)) %>% dplyr::select(-Posbin, -Poslag, -bestBF) return(minBFinbin) }
DEF_df <- mash_thin_markers(m = mash_obj, gtoeq = ">=", thresh = 0.05)%>% #arrange(desc(Number_of_Conditions)) %>% filter(Number_of_Conditions > 0 & log10BayesFactor > 2) %>% dplyr::select(Marker, log10BayesFactor, everything()) # get_significant_results(m = mash_obj)[1:5] get_significant_results(m = mash_obj)[83:100] # 29225 write_csv(DEF_df %>% dplyr::select(-value), path = "mash_table_output_BFgt2_2019-07-09.csv") mash_obj$result$lfsr[10866,] library(rmeta) show_col(c("#440154FF", "#BB3754FF")) twocol <- c("#000004FF", "#BB3754FF") sevencol <- viridis_pal(option = "B")(7) show_col(viridis_pal(option = "B")(7)) phenotypes <- str_sub(colnames(mash_obj$result$PosteriorMean), start = 6) #phenotypes[6] <- "Earliest_Year_CDBN" phe_col_7 <- c(1,3,4,1,1,7,4,1,6,3,1,1,5,3,2,3,4,1,3,3) phe_col_2 <- c(2,1,1,2,2,2,1,2,2,1,2,2,1,1,2,1,1,2,1,1) phe_info <- tibble(phenotypes, phe_col_7, phe_col_2) %>% mutate(seven_col = sevencol[phe_col_7], two_col = twocol[phe_col_2]) # nice_phenos$METGPHEN[6] <- "Earliest Year in the CDBN"
library(ashr) #' Plot metaplot for an effect based on posterior from mash #' @param m the result of a mash fit #' @param i index of the effect to plot #' @param xlab Character string specifying x-axis label. #' @param ylab Character string specifying y-axis label. #' @param ... Additional arguments passed to \code{\link[rmeta]{metaplot}}. #' @importFrom ashr get_pm get_psd #' @importFrom rmeta metaplot #' @export mash_plot_meta = function(m,i,xlab="Effect size", ylab="Condition",...){ metaplot(get_pm(m)[i,],get_psd(m)[i,],xlab=xlab,ylab=ylab,...) }
for(i in c(1,3,14)){ png(file = paste0("mash_effects_", DEF_df$Marker[i], "_", Sys.Date(), ".png"), width = 3, height = 5.5, units = "in", res = 600) mash_plot_meta(m = mash_obj, DEF_df$value[i], colors = meta.colors(lines = phe_info$seven_col)) dev.off() } for(i in seq_along(DEF_df$Marker)){ png(file = paste0("nonPv01effects/mash_effects_", DEF_df$Marker[i], "_", Sys.Date(), ".png"), width = 3, height = 5.5, units = "in", res = 600) mash_plot_meta(m = mash_obj, DEF_df$value[i], colors = meta.colors(lines = phe_info$seven_col)) dev.off() } png(file = paste0("mash_effects_labels_", Sys.Date(), ".png"), width = 11, height = 5.5, units = "in", res = 600) mash_plot_meta(m = mash_obj, 6053, labels = nice_phenos$METGPHEN, colors = meta.colors(lines = phe_info$seven_col)) dev.off() mash_save_effect_plot <- function(mash_obj, i){ png(file = paste0("mash_effects_", names(CDBNgenomics:::get_significant_results(m = mash_obj))[i], "_", Sys.Date(), ".png"), width = 3, height = 5.5, units = "in", res = 600) mash_plot_meta(m = mash_obj, CDBNgenomics:::get_significant_results(m = mash_obj)[i]) dev.off() }
For each SNP, how many conditions (and which conditions) is it significant in?
mash_BF <- mash_plot_manhattan_by_condition(m = mash_obj, thresh = 0.01, saveoutput = TRUE) mash_BF$ggman_df %>% arrange(desc(log10BayesFactor))
mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_BM") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_CB") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_CT") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_DF") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_DM") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_Earliest_Year_CDBN") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_EV") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_GH") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_HB") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_HI") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_LG") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_PH") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_RR") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_RU") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_SA") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_SF") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_SW") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_SY") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_WM") mash_plot_manhattan_by_condition(m = mash_obj, cond = "Bhat_ZN") get_colnames(m = mash_obj)
phenotypes <- str_sub(rownames(strong_eff), start = 6) PHE <- c("BM", "BR", "CB", "CM", "CT", "DF", "DM", "EV", "GH", "HB", "HI", "LG", "PH", "RR", "RU", "SA", "SF", "SW", "SY", "WM", "ZN") METGPHEN <- c("Biomass (kg)", "Blackroot BCMV response", "CBB damage score", "BCMV presence/absence", "CTV presence/absence", "Days to flowering", "Days to maturity", "Early vigor score", "Growth habit", "Halo blight damage score", "Harvest index (%)", "Lodging score", "Plant height (cm)", "Root rot damage score", "Rust damage score", "Seed appearance score", "Seedfill duration (days)", "Seed weight (mg)", "Seed yield (kg/ha)", "White mold damage score", "Zinc deficiency damage score") nice_phenos <- tibble(PHE, METGPHEN) nice_phenos <- nice_phenos %>% right_join(enframe(phenotypes, name = "Name"), by = c("PHE" = "value")) nice_phenos$METGPHEN[6] <- "Earliest Year in the CDBN" colnames(strong_eff) <- nice_phenos$METGPHEN row.names(strong_eff) <- nice_phenos$METGPHEN # row.names(shared_effects) <- c(paste0(PHE, "_", PHE_Loc)) phenotypess <- str_sub(rownames(sign_eff), start = 6) colnames(sign_eff) <- phenotypess row.names(sign_eff) <- phenotypess
library(dots) pairwise <- mash_plot_pairwise_sharing(corrmatrix = strong_eff, saveoutput = TRUE, base_height = 5, max_size = 4, filename = "Pairwise.svg")
mash_pairwise_sharing_plot <- function(mash_obj, saveoutput = FALSE, filename = "Mash_pairwise_shared_effects_", ...){ shared_effects <- get_pairwise_sharing(m = mash_out) corrplot <- shared_effects %>% reorder_cormat(.) %>% ggcorr(data = NULL, cor_matrix = ., geom = "circle", label = FALSE, label_alpha = TRUE, label_size = 3, hjust = 0.95, vjust = .3, layout.exp = 9, min_size = 0, max_size = 3.5) + scale_color_viridis(option = "B") if(saveoutput == TRUE){ save_plot(filename = paste0(filename, Sys.Date(), ".svg"), last_plot(), base_aspect_ratio = 1.1, base_height = 4.5) } return(list(corr_matrix = shared_effects, gg_corr = corrplot)) } strong_eff %>% reorder_cormat(.) %>% ggcorr(data = NULL, cor_matrix = ., geom = "circle", label = FALSE, label_alpha = TRUE, label_size = 3, hjust = 0.95, vjust = .3, layout.exp = 9, min_size = 0, max_size = 3.5) + scale_color_viridis(option = "B") save_plot(filename = "Correlation_plot_mash_strong_effects.svg", last_plot(), base_aspect_ratio = 1.1, base_height = 4.5) sign_eff %>% reorder_cormat(.) %>% ggcorr(data = NULL, cor_matrix = ., geom = "circle", label = FALSE, label_alpha = TRUE, label_size = 3, hjust = .95, vjust = .3, layout.exp = 9, min_size = 0, max_size = 6.4) + scale_color_viridis(option = "B")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.