R/get_trait_variants.R

Defines functions get_trait_variants

Documented in get_trait_variants

#' @title Retrieve trait-related variants and summary statistics from the GWAS catalog
#'
#' @description Returns a formatted data.frame of variants for a given trait, including summary statistics
#'
#' @param trait Expects a regular expression to find the trait of interest
#' @param trait_list An object generated by init_gwas_dbs(). If NULL, init_gwas_dbs() will be run internally.
#' @param risk_frequency If TRUE, also includes the frequency of the risk allele in the output.
#' @param mapped_gene If TRUE, includes the mapped gene for each variant in the output.
#' @param ask_about_efo If TRUE, requires dynamic user input to specify which EFO id should be used.
#'
#' @return A formatted data.frame containing all trait-related variants, including summary statistics.
#' @examples
#' \donttest{MI_variants <- get_trait_variants('MI|myocardial infarction',ask_about_efo=FALSE)}
#' @export
#' @importFrom gwasrapidd "get_associations"

get_trait_variants <- function(trait,
                               trait_list=NULL, # expects list output from init_gwas_dbs
                               risk_frequency=TRUE,
                               mapped_gene=FALSE,
                               ask_about_efo=TRUE){
  starttime <- Sys.time()
  if(is.null(trait_list)){
    trait_list <- init_gwas_dbs()
    all_traits <- trait_list$all_traits
  }
  else{all_traits <- trait_list$all_traits}
  efo_df <- all_traits@traits[grepl(trait, all_traits@traits$trait),]
  cat('\nThe EFO ids for',trait,'are shown below:\n\n')
  print(efo_df)
  if(nrow(efo_df)<1) stop('No variants have been identified in relation to the named trait(s).')
  if(ask_about_efo==TRUE){
    efo_ind <- readline('\nWhich of these traits would you like to obtain the SNPs for? Indicate as numeric vector:\n')
    efo_ind <- eval(parse(text=efo_ind))
    if(!class(efo_ind)%in%c('numeric','integer')){
      stop('You did not enter a valid numeric vector. Please try again.')
    }
    cat('\nYou have selected these EFO ids:\n\n')
    print(efo_df[efo_ind,])
    efo_df <- efo_df[efo_ind,]
  }
  cat('\nRetrieving GWAS catalog-identified variants and corresponding information for these EFO identifiers...\n')
  retrieve_SNPs <- get_associations(efo_id = efo_df$efo_id)
  SNPs_with_effectsizes <- retrieve_SNPs@associations[c('association_id','beta_number',
                                                        'standard_error','or_per_copy_number',
                                                        'pvalue')]
  beta_sign <- ifelse(retrieve_SNPs@associations$beta_direction=='increase',1,-1)
  SNPs_with_effectsizes$beta_number <- ifelse(is.na(beta_sign),
                                              SNPs_with_effectsizes$beta_number,
                                              SNPs_with_effectsizes$beta_number*beta_sign)
  colnames(SNPs_with_effectsizes)[2] <- 'beta_coefficient'
  SNPs_risk_alleles <- retrieve_SNPs@risk_alleles
  SNPs_risk_alleles <- merge(SNPs_risk_alleles[c('association_id','variant_id','risk_allele')],
                             SNPs_with_effectsizes,by='association_id',all.x=TRUE)

  if(risk_frequency==TRUE){
    cat('>Obtaining risk allele frequency\n')
    SNPs_risk_alleles <- merge(SNPs_risk_alleles,
                               retrieve_SNPs@risk_alleles[c('association_id','risk_frequency')],
                               by='association_id',all.x=TRUE)
  }
  if(mapped_gene==TRUE){
    cat('>Collecting mapped genes\n')
    SNPs_risk_alleles <- merge(SNPs_risk_alleles,
                               retrieve_SNPs@genes[c('association_id','gene_name')],
                               by='association_id',all.x=TRUE)
  }
  SNPs_risk_alleles <- SNPs_risk_alleles[,-which(colnames(SNPs_risk_alleles)=='association_id')]
  endtime <- Sys.time()
  cat('\nRetrieving the information took',difftime(endtime,starttime,units='secs'),'seconds.\n')
  rm(starttime,endtime)
  return(invisible(SNPs_risk_alleles))
}
vincent10kd/polygenic documentation built on Feb. 25, 2024, 10:17 a.m.