R/prs_grid.R

Defines functions prs_grid

Documented in prs_grid

#' @title Create a grid of polygenic risk scores with different tuning parameter combinations
#'
#' @description Create a full or fractional factorial grid of polygenic risk scores based on a
#' user-specified range of tuning parameter values. Useful to evaluate the influence of various
#' tuning parameters, and for the creation of stacked scores.
#'
#' @param variant_data An object of format output by extract_variants().
#' @param gwas_info An object generated by get_trait_variants() or get_pQTLs().
#' @param binary_outcome Set to TRUE for binary traits, and FALSE for continuous outcomes (including pQTLs)
#' @param imp_range A vector of imputation R^2 values.
#' @param pruning_range A vector of LD (R^2) pruning threshold values.
#' @param pval_range A vector of p-value thresholds.
#' @param optimal If TRUE, creates the grid according to a D-optimal fractional factorial design.
#' @param optimal_trials Number of rows in the fractional factorial design matrix.
#'
#' @return A list containing the grid design, a list of all results, and a data.frame with all scores.
#' # grid <- prs_grid(MI_variants, MI_gwas)
#' @export
#' @importFrom AlgDesign "gen.factorial" "optFederov"

prs_grid <- function(variant_data, gwas_info,
                     binary_outcome=TRUE,
                     imp_range=seq(0.8,1,0.05),
                     pruning_range=seq(0.7,1,0.05),
                     pval_range=quantile(gwas_info$pvalue,seq(0.1,1,0.1)),
                     optimal=FALSE,
                     optimal_trials=9){
  ff <- AlgDesign::gen.factorial(c(length(imp_range),
                                   length(pruning_range),
                                   length(pval_range)),
                                 factors='all',
                                 varNames=c('imp_threshold','pruning_threshold','pval_threshold'))
  ff$imp_threshold <- imp_range[as.numeric(as.character(ff$imp_threshold))]
  ff$pruning_threshold <- pruning_range[as.numeric(as.character(ff$pruning_threshold))]
  ff$pval_threshold <- pval_range[as.numeric(as.character(ff$pval_threshold))]

  prs_list <- list()
  if(optimal==TRUE){
    cat('> Running a D-optimal fractional factorial design (Fedorov algorithm).\n')
    fractional <- AlgDesign::optFederov(~imp_threshold+pruning_threshold+pval_threshold,ff,
                                        criterion='D',nTrials=optimal_trials)
    des <- fractional$design
  }
  else{
    cat('> Running a full factorial design.\n')
    des <- ff
  }
  for(r in 1:nrow(des)){
    cat('**Iteration ',r,' of ',nrow(des),'. (',round((r/nrow(des))*100,2),'%)\n',sep='')
    sink('NUL')
    prs <- create_prs(variant_data, gwas_info,
                      imp_threshold=des$imp_threshold[r],
                      pruning_threshold=des$pruning_threshold[r],
                      pval_threshold=des$pval_threshold[r],
                      binary_outcome=binary_outcome,
                      flowchart=FALSE)
    sink()
    prs_list[[r]] <- prs
  }
  closeAllConnections()
  prs_df <- lapply(prs_list,function(x) x$prs$prs)
  prs_df <- data.frame(cbind(id=prs_list[[1]]$prs$id,do.call('cbind',prs_df)))
  colnames(prs_df)[2:ncol(prs_df)] <- paste0('prs_',1:nrow(des))
  prs_df[,2:ncol(prs_df)] <- sapply(prs_df[,2:ncol(prs_df)],as.numeric)
  return(list(design=des,
              all_results=prs_list,
              result_df=prs_df))
}
vincent10kd/polygenic documentation built on Feb. 25, 2024, 10:17 a.m.