knitr::opts_chunk$set(tidy = FALSE, message = FALSE)
library(MungeSumstats)
MungeSumstats now offers high throughput query and import functionality to data from the MRC IEU Open GWAS Project.
#### Search for datasets #### metagwas <- MungeSumstats::find_sumstats(traits = c("parkinson","alzheimer"), min_sample_size = 1000) head(metagwas,3) ids <- (dplyr::arrange(metagwas, nsnp))$id
#### Search for datasets #### #error_dwnld <- # tryCatch( # metagwas <- MungeSumstats::find_sumstats(traits = c("parkinson", # "alzheimer"), # min_sample_size = 1000), # error = function(e) e, # warning = function(w) w # ) #if(exists("metagwas")&&is.data.frame(metagwas)){#if exists downloaded fine # head(metagwas,3) # ids <- (dplyr::arrange(metagwas, nsnp))$id #} #to speed up build just create results metagwas2 <- data.frame( id=c("ieu-a-298","ieu-b-2","ieu-a-297"), trait = rep("Alzheimer's disease",3), group_name = rep("public",3), year=c(2013, 2019, 2013), author=c("Lambert","Kunkle BW","Lambert"), consortium=c("IGAP", "Alzheimer Disease Genetics Consortium (ADGC), European Alzheimer's Disease Initiative (EADI), Cohorts for Heart and Aging Research in Genomic Epidemiology Consortium (CHARGE), Genetic and Environmental Risk in AD/Defining Genetic, Polygenic and Environmental Risk for Alzheimer's Disease Consortium (GERAD/PERADES),", "IGAP" ), sex=rep("Males and Females",3), population=rep("European",3), unit=c("log odds","NA","log odds"), nsnp=c(11633,10528610,7055882), sample_size=c(74046,63926,54162), build=rep("HG19/GRCh37",3), category=c("Disease","Binary","Disease"), subcategory=rep("Psychiatric / neurological",3), ontology=rep("NA",3), mr=rep(1,3), priority=c(1,0,2), pmid=c(24162737,30820047,24162737), sd=rep(NA,3), note=c("Exposure only; Effect allele frequencies are missing; forward(+) strand", "NA","Effect allele frequencies are missing; forward(+) strand"), ncase=c(25580,21982,17008), ncontrol=c(48466,41944,37154), N=c(74046,63926,54162) ) print(head(metagwas2,3))
You can supply import_sumstats()
with a list of as many OpenGWAS IDs as you
want, but we'll just give one to save time.
datasets <- MungeSumstats::import_sumstats(ids = "ieu-a-298", ref_genome = "GRCH37")
#don't actually run as this takes some time, use stashed version datasets <- list("ieu-a-298"=paste0(tempdir(),"/ieu-a-298.tsv.gz")) #stashed value datasets_data <- list("ieu-a-298"=system.file("extdata","ieu-a-298.tsv.gz", package="MungeSumstats"))
By default, import_sumstats
results a named list where the names are the Open
GWAS dataset IDs and the items are the respective paths to the formatted summary
statistics.
print(datasets)
You can easily turn this into a data.frame as well.
results_df <- data.frame(id=names(datasets), path=unlist(datasets)) print(results_df)
Optional: Speed up with multi-threaded download via axel.
datasets <- MungeSumstats::import_sumstats(ids = ids, vcf_download = TRUE, download_method = "axel", nThread = max(2,future::availableCores()-2))
See the Getting started vignette for more information on how to use MungeSumstats and its functionality.
utils::sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.