R/effects-evaluation.R

Defines functions plot_qq_two_gwas big_upset_df thresholdFBM

Documented in big_upset_df plot_qq_two_gwas thresholdFBM

#' @title Threshold a FBM
#'
#' @param X A FBM.
#' @param ind Numeric. Single column to threshold.
#' @param thr Numeric. Threshold above which row value is changed to "TRUE".
#' @param quantile Numeric. Top quantile/percentile to keep for each GWAS for
#'     comparisons.
#'
thresholdFBM <- function(X, ind, thr, quantile = NA) {
  if (!is.na(quantile)) {
  thr <- quantile(X[, ind], quantile)
  }
  case_when(X[, ind] > thr ~ TRUE,
            TRUE ~ FALSE)
}

#' @title Plot the -log10p-value distributions for two univariate GWAS against
#'     one another using ggplot.
#'
#' @description This function takes GWAS results from the dive_ functions of
#'     snpdiver that create FBM of univariate GWAS effects. It creates a
#'     dataframe from these results suitable for an Upset plot, containing only
#'     the rows/SNPs significant in at least one univariate GWAS at the
#'     -log10p threshold specified.
#'
#' @param effects fbm created using 'dive_phe2effects' or 'dive_phe2mash'.
#'     Saved under the name "gwas_effects_{suffix}.rds" and can be loaded into
#'     R using the bigstatsr function "big_attach".
#' @param thr Numeric. Threshold above which SNP/row is kept for comparisons.
#' @param quantile Numeric. Top quantile/percentile to keep for each GWAS for
#'     comparisons.
#' @param metadata Metadata created using 'dive_phe2effects' or 'dive_phe2mash'.
#'     Saved under the name "gwas_effects_{suffix}_associated_metadata.csv".
#' @param ncores Optional integer to specify the number of cores to be used
#'     for parallelization. You can specify this with bigparallelr::nb_cores().
#'
#' @export
big_upset_df <- function(effects, thr = 7, quantile = NA, metadata,
                         ncores = nb_cores){
  gwas_ok <- floor(effects$ncol / 3)
  ind_p <- (1:(gwas_ok))*3
  colnames_fbm <- metadata$phe
  if(effects$ncol < (sum(gwas_ok)*3 + 1))  {
    effects$add_columns(ncol_add = 1)
  }  # add a column for the threshold score if there isn't one already
  eff_sub <- big_copy(effects, ind.col = ind_p)
  thr_df <- big_apply(eff_sub,
                          a.FUN = function(X, ind) max(X[ind, ]),
                          ind = rows_along(eff_sub),
                          a.combine = 'c', block.size = 100,
                          ncores = ncores)
  effects[,(sum(gwas_ok)*3 + 1)] <- thr_df
  thr_df <- which(thr_df > thr)
  thr_df <- big_copy(effects, ind.row = thr_df, ind.col = ind_p)

  for (j in seq_along(1:thr_df$ncol)) {  # standardize one gwas at a time.
    thr_df[, j] <- big_apply(thr_df, a.FUN = thresholdFBM, ind = j, thr = thr,
                             a.combine = 'plus')
  }

  thr_df1 <- thr_df[1:thr_df$nrow, 1:thr_df$ncol]
  colnames(thr_df1) <- colnames_fbm
  thr_df1 <- as_tibble(thr_df1)
  return(thr_df1)
}



#' @title Plot the -log10p-value distributions for two univariate GWAS against
#'     one another using ggplot.
#'
#' @description This function takes GWAS results from the dive_ functions of
#'     snpdiver that create FBM of univariate GWAS effects. It uses ggplot2 to
#'     plot these distributions against one another for comparison purposes.
#'
#' @param effects GWAS output saved as an FBM with an .rds and .bk file generated
#'     by dive_phe2effects or dive_phe2mash functions of snpdiver. Load this
#'     into the R environment using bigstatsr::big_attach.
#' @param metadata The metadata associated with GWAS output generated by
#'     dive_phe2effects or dive_phe2mash functions of snpdiver. Eventually
#'     this and gwas should be rolled into a new object type for R, but not yet.
#' @param e_row Integer. The row number of gwas_meta that corresponds to the
#'     expected GWAS. This will be plotted on the x-axis for all comparisons.
#' @param o_row Integer vector. The rownumbers of gwas_meta that are the
#'     observed GWAS. These will be compared to the expected GWAS in e_row.
#' @param thr Numeric. -log10p threshold. Only SNPs with an expected -log10p
#'     value above this threshold will be plotted.
#' @param suffix Optional character vector to give saved files a unique search
#'     string/name.
#'
#' @return Plots saved to disk in a "analysis/gwas_comps" folder comparing the
#'     expected distribution, e_row, to all observed gwas distributions, o_row.
#'
#' @importFrom dplyr filter
#' @import ggplot2
#' @importFrom cowplot save_plot
#' @importFrom rlang .data
#' @import hexbin
#'
#' @export
plot_qq_two_gwas <- function(effects, metadata, e_row = 1,
                             o_row = 2:nrow(metadata), thr = 5,
                             suffix = NA) {
  requireNamespace("here")
  requireNamespace("hexbin")
  e_row <- e_row*3
  o_row <- o_row*3
  if (!dir.exists(here::here("analysis", "gwas_comps"))) {
    if (!dir.exists(here::here("analysis"))) {
      dir.create(here::here("analysis"))
    }
    dir.create(here::here("analysis", "gwas_comps"))
  }
  if (!grepl("^_", suffix) & !is.na(suffix)){
    suffix <- paste0("_", suffix)
  } else if(is.na(suffix)) {
    suffix <- ""
  }
  for (i in seq_along(o_row)) {
    plot_qq <- filter(data.frame(effects[,c(e_row, o_row[i])]),
                      .data$X1 >= thr) %>%
      ggplot() +
      geom_hex(aes(x = .data$X1, y = .data$X2)) +
      scale_fill_gradient(trans = "log") +
      geom_smooth(aes(x = .data$X1, y = .data$X2), ) +
      geom_abline(slope = 1, linetype = 2, color = "red") +
      theme(legend.position = "right") +
      labs(x = metadata[e_row/3,1], y = metadata[o_row[i]/3,1])

    save_plot(filename = here::here("analysis", "gwas_comps",
                              paste0("GWAS_significance_thr_log10p_", thr,
                                     "_", metadata[e_row/3,1], "_vs_",
                                     metadata[o_row[i]/3,1], suffix, ".png")),
              plot = plot_qq, base_height = 5, base_asp = 1.61)
  }
}
Alice-MacQueen/snpdiver documentation built on Dec. 17, 2021, 8:41 a.m.