#' @title Retrieve trait-related variants and summary statistics from the GWAS catalog
#'
#' @description Returns a formatted data.frame of variants for a given trait, including summary statistics
#'
#' @param trait Expects a regular expression to find the trait of interest
#' @param trait_list An object generated by init_gwas_dbs(). If NULL, init_gwas_dbs() will be run internally.
#' @param risk_frequency If TRUE, also includes the frequency of the risk allele in the output.
#' @param mapped_gene If TRUE, includes the mapped gene for each variant in the output.
#' @param ask_about_efo If TRUE, requires dynamic user input to specify which EFO id should be used.
#'
#' @return A formatted data.frame containing all trait-related variants, including summary statistics.
#' @examples
#' \donttest{MI_variants <- get_trait_variants('MI|myocardial infarction',ask_about_efo=FALSE)}
#' @export
#' @importFrom gwasrapidd "get_associations"
get_trait_variants <- function(trait,
trait_list=NULL, # expects list output from init_gwas_dbs
risk_frequency=TRUE,
mapped_gene=FALSE,
ask_about_efo=TRUE){
starttime <- Sys.time()
if(is.null(trait_list)){
trait_list <- init_gwas_dbs()
all_traits <- trait_list$all_traits
}
else{all_traits <- trait_list$all_traits}
efo_df <- all_traits@traits[grepl(trait, all_traits@traits$trait),]
cat('\nThe EFO ids for',trait,'are shown below:\n\n')
print(efo_df)
if(nrow(efo_df)<1) stop('No variants have been identified in relation to the named trait(s).')
if(ask_about_efo==TRUE){
efo_ind <- readline('\nWhich of these traits would you like to obtain the SNPs for? Indicate as numeric vector:\n')
efo_ind <- eval(parse(text=efo_ind))
if(!class(efo_ind)%in%c('numeric','integer')){
stop('You did not enter a valid numeric vector. Please try again.')
}
cat('\nYou have selected these EFO ids:\n\n')
print(efo_df[efo_ind,])
efo_df <- efo_df[efo_ind,]
}
cat('\nRetrieving GWAS catalog-identified variants and corresponding information for these EFO identifiers...\n')
retrieve_SNPs <- get_associations(efo_id = efo_df$efo_id)
SNPs_with_effectsizes <- retrieve_SNPs@associations[c('association_id','beta_number',
'standard_error','or_per_copy_number',
'pvalue')]
beta_sign <- ifelse(retrieve_SNPs@associations$beta_direction=='increase',1,-1)
SNPs_with_effectsizes$beta_number <- ifelse(is.na(beta_sign),
SNPs_with_effectsizes$beta_number,
SNPs_with_effectsizes$beta_number*beta_sign)
colnames(SNPs_with_effectsizes)[2] <- 'beta_coefficient'
SNPs_risk_alleles <- retrieve_SNPs@risk_alleles
SNPs_risk_alleles <- merge(SNPs_risk_alleles[c('association_id','variant_id','risk_allele')],
SNPs_with_effectsizes,by='association_id',all.x=TRUE)
if(risk_frequency==TRUE){
cat('>Obtaining risk allele frequency\n')
SNPs_risk_alleles <- merge(SNPs_risk_alleles,
retrieve_SNPs@risk_alleles[c('association_id','risk_frequency')],
by='association_id',all.x=TRUE)
}
if(mapped_gene==TRUE){
cat('>Collecting mapped genes\n')
SNPs_risk_alleles <- merge(SNPs_risk_alleles,
retrieve_SNPs@genes[c('association_id','gene_name')],
by='association_id',all.x=TRUE)
}
SNPs_risk_alleles <- SNPs_risk_alleles[,-which(colnames(SNPs_risk_alleles)=='association_id')]
endtime <- Sys.time()
cat('\nRetrieving the information took',difftime(endtime,starttime,units='secs'),'seconds.\n')
rm(starttime,endtime)
return(invisible(SNPs_risk_alleles))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.