knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval=FALSE )
library(simulateGP) library(jsonlite) library(data.table) library(tidyverse)
Use https://bitbucket.org/nygcresearch/ldetect-data from Berisa and Pickrell (2016) - a list of independent LD regions for Africans, Europeans and Asians.
Note: The Africans dataset has a couple of None values that I am interpolating in a simple way to avoid errors.
a <- data.table::fread("https://bitbucket.org/nygcresearch/ldetect-data/raw/ac125e47bf7ff3e90be31f278a7b6a61daaba0dc/AFR/fourier_ls-all.bed") midpoint <- round((108823642 + 111048570)/2) a$stop[a$stop=="None"] <- midpoint a$start[a$start=="None"] <- midpoint a$start <- as.numeric(a$start) a$stop <- as.numeric(a$stop) a$pop <- "AFR" b <- data.table::fread("https://bitbucket.org/nygcresearch/ldetect-data/raw/ac125e47bf7ff3e90be31f278a7b6a61daaba0dc/ASN/fourier_ls-all.bed") b$pop <- "EAS" c <- data.table::fread("https://bitbucket.org/nygcresearch/ldetect-data/raw/ac125e47bf7ff3e90be31f278a7b6a61daaba0dc/EUR/fourier_ls-all.bed") c$pop <- "EUR" ldetect <- dplyr::bind_rows(a,b,c) # Avoid overlap between regions ldetect$stop <- ldetect$stop - 1
This ldetect
object is saved as a data object in this package.
For each region create an .rds
object that contains a list of map
and ld
Create a generate_ldobj_config.json
file:
{ "dl_dir": "/path/to/downloads" }
Setup directories
conf <- read_json("generate_ldobj_config.json") dir.create(conf$dl_dir) dir.create(file.path(conf$dl_dir, "EUR_1kg_hm3")) dir.create(file.path(conf$dl_dir, "EAS_1kg_hm3")) dir.create(file.path(conf$dl_dir, "AFR_1kg_hm3")) setwd(conf$dl_dir)
Get the 1000 genomes files
wget -O 1kg.v3.tgz http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz tar xzvf 1kg.v3.tgz rm 1kg.v3.tgz wget https://github.com/MRCIEU/gwasglue/raw/master/inst/hapmap3/hapmap3_autosome.snplist.gz gunzip hapmap3_autosome.snplist.gz plink --bfile EUR --extract hapmap3_autosome.snplist --make-bed --keep-allele-order --out EUR_1kg_hm3 plink --bfile EAS --extract hapmap3_autosome.snplist --make-bed --keep-allele-order --out EAS_1kg_hm3 plink --bfile AFR --extract hapmap3_autosome.snplist --make-bed --keep-allele-order --out AFR_1kg_hm3
Generate matrices for each population
data(ldetect) map_eas <- generate_ldobj( outdir=file.path(conf$dl_dir, "EAS_1kg_hm3"), bfile=file.path(conf$dl_dir, "EAS_1kg_hm3"), regions=subset(ldetect, pop=="EAS"), nthreads=16 ) map_afr <- generate_ldobj( outdir=file.path(conf$dl_dir, "AFR_1kg_hm3"), bfile=file.path(conf$dl_dir, "AFR_1kg_hm3"), regions=subset(ldetect, pop=="AFR"), nthreads=16 ) map_eur <- generate_ldobj( outdir=file.path(conf$dl_dir, "EUR_1kg_hm3"), bfile=file.path(conf$dl_dir, "EUR_1kg_hm3"), regions=subset(ldetect, pop=="EUR"), nthreads=16 )
Package them up
cmd <- paste0("cd ", conf$dl_dir, "; tar cvf EUR_1kg_hm3_ldobj.tar EUR_1kg_hm3") system(cmd) cmd <- paste0("cd ", conf$dl_dir, "; tar cvf EAS_1kg_hm3_ldobj.tar EAS_1kg_hm3") system(cmd) cmd <- paste0("cd ", conf$dl_dir, "; tar cvf AFR_1kg_hm3_ldobj.tar AFR_1kg_hm3") system(cmd)
Using just one region with just one causal variant. Read in a regional LD matrix
set.seed(1234) fn <- list.files(file.path(conf$dl_dir, "EAS_1kg_hm3"), full.names=TRUE) %>% grep("ldobj_chr", ., value=TRUE) %>% {.[7]} ldobj_eas <- readRDS(fn)
Generate the LD-aware effects from a single causal variant
params <- ldobj_eas$map %>% generate_gwas_params(h2=0.01, Pi=1/nrow(.)) %>% add_ld_to_params(ldobj=ldobj_eas)
Add some random noise for a sample size of 100000 and plot
ss <- params %>% generate_gwas_ss(100000) ggplot(ss, aes(x=pos, y=-log10(pval))) + geom_point()
Now try whole genome with 100 causal variants - from files - takes less than 2 minutes for HapMap3 with 1 thread
# Generate effects params <- map_eas %>% generate_gwas_params(h2=0.01, Pi=100/nrow(.)) %>% add_ld_to_params(ldobjdir = file.path(conf$dl_dir, "EAS_1kg_hm3"), nthreads=16) # Generate sample estimates ss <- params %>% generate_gwas_ss(10000000) # Plot ggplot(ss, aes(x=pos, y=-log10(pval))) + geom_point() + facet_grid(. ~ chr, scale="free_x", space="free_x")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.