#' @title Threshold a FBM
#'
#' @param X A FBM.
#' @param ind Numeric. Single column to threshold.
#' @param thr Numeric. Threshold above which row value is changed to "TRUE".
#' @param quantile Numeric. Top quantile/percentile to keep for each GWAS for
#' comparisons.
#'
thresholdFBM <- function(X, ind, thr, quantile = NA) {
if (!is.na(quantile)) {
thr <- quantile(X[, ind], quantile)
}
case_when(X[, ind] > thr ~ TRUE,
TRUE ~ FALSE)
}
#' @title Plot the -log10p-value distributions for two univariate GWAS against
#' one another using ggplot.
#'
#' @description This function takes GWAS results from the dive_ functions of
#' snpdiver that create FBM of univariate GWAS effects. It creates a
#' dataframe from these results suitable for an Upset plot, containing only
#' the rows/SNPs significant in at least one univariate GWAS at the
#' -log10p threshold specified.
#'
#' @param effects fbm created using 'dive_phe2effects' or 'dive_phe2mash'.
#' Saved under the name "gwas_effects_{suffix}.rds" and can be loaded into
#' R using the bigstatsr function "big_attach".
#' @param thr Numeric. Threshold above which SNP/row is kept for comparisons.
#' @param quantile Numeric. Top quantile/percentile to keep for each GWAS for
#' comparisons.
#' @param metadata Metadata created using 'dive_phe2effects' or 'dive_phe2mash'.
#' Saved under the name "gwas_effects_{suffix}_associated_metadata.csv".
#' @param ncores Optional integer to specify the number of cores to be used
#' for parallelization. You can specify this with bigparallelr::nb_cores().
#'
#' @export
big_upset_df <- function(effects, thr = 7, quantile = NA, metadata,
ncores = nb_cores){
gwas_ok <- floor(effects$ncol / 3)
ind_p <- (1:(gwas_ok))*3
colnames_fbm <- metadata$phe
if(effects$ncol < (sum(gwas_ok)*3 + 1)) {
effects$add_columns(ncol_add = 1)
} # add a column for the threshold score if there isn't one already
eff_sub <- big_copy(effects, ind.col = ind_p)
thr_df <- big_apply(eff_sub,
a.FUN = function(X, ind) max(X[ind, ]),
ind = rows_along(eff_sub),
a.combine = 'c', block.size = 100,
ncores = ncores)
effects[,(sum(gwas_ok)*3 + 1)] <- thr_df
thr_df <- which(thr_df > thr)
thr_df <- big_copy(effects, ind.row = thr_df, ind.col = ind_p)
for (j in seq_along(1:thr_df$ncol)) { # standardize one gwas at a time.
thr_df[, j] <- big_apply(thr_df, a.FUN = thresholdFBM, ind = j, thr = thr,
a.combine = 'plus')
}
thr_df1 <- thr_df[1:thr_df$nrow, 1:thr_df$ncol]
colnames(thr_df1) <- colnames_fbm
thr_df1 <- as_tibble(thr_df1)
return(thr_df1)
}
#' @title Plot the -log10p-value distributions for two univariate GWAS against
#' one another using ggplot.
#'
#' @description This function takes GWAS results from the dive_ functions of
#' snpdiver that create FBM of univariate GWAS effects. It uses ggplot2 to
#' plot these distributions against one another for comparison purposes.
#'
#' @param effects GWAS output saved as an FBM with an .rds and .bk file generated
#' by dive_phe2effects or dive_phe2mash functions of snpdiver. Load this
#' into the R environment using bigstatsr::big_attach.
#' @param metadata The metadata associated with GWAS output generated by
#' dive_phe2effects or dive_phe2mash functions of snpdiver. Eventually
#' this and gwas should be rolled into a new object type for R, but not yet.
#' @param e_row Integer. The row number of gwas_meta that corresponds to the
#' expected GWAS. This will be plotted on the x-axis for all comparisons.
#' @param o_row Integer vector. The rownumbers of gwas_meta that are the
#' observed GWAS. These will be compared to the expected GWAS in e_row.
#' @param thr Numeric. -log10p threshold. Only SNPs with an expected -log10p
#' value above this threshold will be plotted.
#' @param suffix Optional character vector to give saved files a unique search
#' string/name.
#'
#' @return Plots saved to disk in a "analysis/gwas_comps" folder comparing the
#' expected distribution, e_row, to all observed gwas distributions, o_row.
#'
#' @importFrom dplyr filter
#' @import ggplot2
#' @importFrom cowplot save_plot
#' @importFrom rlang .data
#' @import hexbin
#'
#' @export
plot_qq_two_gwas <- function(effects, metadata, e_row = 1,
o_row = 2:nrow(metadata), thr = 5,
suffix = NA) {
requireNamespace("here")
requireNamespace("hexbin")
e_row <- e_row*3
o_row <- o_row*3
if (!dir.exists(here::here("analysis", "gwas_comps"))) {
if (!dir.exists(here::here("analysis"))) {
dir.create(here::here("analysis"))
}
dir.create(here::here("analysis", "gwas_comps"))
}
if (!grepl("^_", suffix) & !is.na(suffix)){
suffix <- paste0("_", suffix)
} else if(is.na(suffix)) {
suffix <- ""
}
for (i in seq_along(o_row)) {
plot_qq <- filter(data.frame(effects[,c(e_row, o_row[i])]),
.data$X1 >= thr) %>%
ggplot() +
geom_hex(aes(x = .data$X1, y = .data$X2)) +
scale_fill_gradient(trans = "log") +
geom_smooth(aes(x = .data$X1, y = .data$X2), ) +
geom_abline(slope = 1, linetype = 2, color = "red") +
theme(legend.position = "right") +
labs(x = metadata[e_row/3,1], y = metadata[o_row[i]/3,1])
save_plot(filename = here::here("analysis", "gwas_comps",
paste0("GWAS_significance_thr_log10p_", thr,
"_", metadata[e_row/3,1], "_vs_",
metadata[o_row[i]/3,1], suffix, ".png")),
plot = plot_qq, base_height = 5, base_asp = 1.61)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.