knitr::opts_chunk$set(message = FALSE,
                      warning = FALSE,
                      fig.width = 6,
                      fig.height = 4,
                      fig.align = "center",
                      fig.cap = " ",
                      dpi = 120)

In this tutorial, we show an example of performing fine-mapping and LD mismatch diagnosis (using susie_rss) with pre-computed LD matrices from UK Biobank reference.

We have pre-computed the LD matrices of European samples from UK Biobank. They can be downloaded here.

Load the packages.

library(mapgen)
library(tidyverse)
library(ggplot2)

Load an example Asthma GWAS summary statistics

gwas.file <- '/project2/xinhe/shared_data/mapgen/example_data/GWAS/aoa_v3_gwas_ukbsnps_susie_input.txt.gz'
gwas.sumstats <- vroom::vroom(gwas.file, col_names = TRUE, show_col_types = FALSE)
head(gwas.sumstats)

n = 336210

Load LD blocks

LD_Blocks <- readRDS(system.file('extdata', 'LD.blocks.EUR.hg19.rds', package='mapgen'))

Obtain a data frame of region_info, rows are LD block coordinates together with the filenames of UKBB reference LD matrices and SNP info.

region_info <- get_UKBB_region_info(LD_Blocks,
                                    LDREF.dir = "/project2/mstephens/wcrouse/UKB_LDR_0.1_b37", 
                                    prefix = "ukb_b37_0.1")

Read all SNP info from the LD reference

LD_snp_info <- read_LD_SNP_info(region_info)

Process GWAS summary statistics and add LD block information, harmonize GWAS summary statistics with LD reference, flip signs when reverse complement matches and remove strand ambiguous variants

gwas.sumstats <- process_gwas_sumstats(gwas.sumstats, 
                                       chr='chr', pos='pos', beta='beta', se='se', 
                                       a0='a0', a1='a1', snp='snp', pval='pval',
                                       LD_snp_info=LD_snp_info, 
                                       strand_flip=TRUE, 
                                       remove_strand_ambig=TRUE)

Select GWAS significant loci (pval < 5e-8)

if (max(gwas.sumstats$pval) > 1) {
  gwas.sumstats <- gwas.sumstats %>% dplyr::mutate(pval = 10^(-pval))
}

sig.loci <- gwas.sumstats %>% dplyr::filter(pval < 5e-8) %>% dplyr::pull(locus) %>% unique()
cat(length(sig.loci), "significant loci. \n")

Fine-mapping

Fine-mapping significant loci with pre-computed UKBB LD matrices (we use uniform prior in this tutorial).

If we set save = TRUE, it saves susie result as well as z-scores and R matrix for each locus, which could be used later for diagnosis.

selected.sumstats <- gwas.sumstats[gwas.sumstats$locus %in% sig.loci, ]

susie.results <- run_finemapping(selected.sumstats, 
                                 region_info = region_info, 
                                 priortype = 'uniform', n = n, L = 10,
                                 save = TRUE, outputdir = "./test", outname = "example")

susie.sumstats <- merge_susie_sumstats(susie.results, sumstats.locus)

run_finemapping() internally runs susie_finemap_region() for each of the loci. For finemapping a single locus, we can simply use susie_finemap_region() with sumstats and LD information (R and snp_info) for the locus.

locus <- sig.loci[2]
sumstats.locus <- gwas.sumstats[gwas.sumstats$locus == locus, ]

# load LD matrix and SNP info for this locus
LD_ref <- load_UKBB_LDREF(LD_Blocks, 
                          locus, 
                          LDREF.dir = "/project2/mstephens/wcrouse/UKB_LDR_0.1_b37", 
                          prefix = "ukb_b37_0.1")

# Match GWAS sumstats with LD reference, only keep variants included in LD reference.
matched.sumstat.LD <- match_gwas_LDREF(sumstats.locus, LD_ref$R, LD_ref$snp_info)
sumstats.locus <- matched.sumstat.LD$sumstats
z.locus <- sumstats.locus$zscore
R.locus <- matched.sumstat.LD$R
snp_info.locus <- matched.sumstat.LD$snp_info

susie.locus.res <- susie_finemap_region(sumstats.locus, R.locus, snp_info.locus, n = n, L = 10)
susieR::susie_plot(susie.locus.res, y='PIP')

susie.locus.sumstats <- merge_susie_sumstats(susie.locus.res, sumstats.locus)

LD mismatch diagnosis

Inconsistencies between GWAS z-scores and the LD reference could potentially inflate PIPs from SuSiE finemapping when L > 1. It would be helpful to check for potential LD mismatch issues, for example using DENTIST or susie_rss.

Here we perform LD mismatch diagnosis for this example locus using susie_rss:

condz <- LD_diagnosis_susie_rss(z.locus, R.locus, n)
condz$plot

The estimated lambda is very small, and the LD mismatch diagnosis plot looks good (with no allele flipping detected), which suggests that the GWAS z-scores and reference LD are consistent.

Next, we manually introduces LD mismatches by flipping alleles for 10 variants (with large effects) and see if we can detect those.

seed = 1
set.seed(seed)

flip_index <- sample(which(sumstats.locus$zscore > 2), 10)
sumstats.locus.flip <- sumstats.locus
sumstats.locus.flip$zscore[flip_index] <- -sumstats.locus$zscore[flip_index]

cat(length(flip_index), "allele flipped variants:\n", sort(sumstats.locus.flip$snp[flip_index]), "\n")

Then, we run fine-mapping including variants with flipped alleles.

susie.results <- run_finemapping(sumstats.locus.flip, region_info = region_info, priortype = 'uniform', n = n, L = 10)
susie.locus.res <- susie.results[[1]]
susieR::susie_plot(susie.locus.res, y='PIP')

Let's compare observed z-scores vs the expected z-scores.

condz <- LD_diagnosis_susie_rss(sumstats.locus.flip$zscore, R = R.locus, n = n)
# condz$plot

Detect possible allele flipped variants (logLR > 2 and abs(z) > 2).

detected_index <- which(condz$conditional_dist$logLR > 2 & abs(condz$conditional_dist$z) > 2)
cat(sprintf("Detected %d variants with possible allele flipping", length(detected_index)), "\n")
cat("Possible allele switched variants:", sort(sumstats.locus.flip$snp[detected_index]), "\n")

condz$conditional_dist$flipped <- 0
condz$conditional_dist$flipped[flip_index] <- 1
condz$conditional_dist$detected <- 0
condz$conditional_dist$detected[detected_index] <- 1

cat(sprintf("%d out of %d flipped variants detected with logLR > 2 and abs(z) > 2. \n", 
            length(intersect(detected_index, flip_index)), length(flip_index)))

condz$conditional_dist[union(flip_index, detected_index),]
ggplot(condz$conditional_dist, aes(x = condmean, y = z, col = factor(flipped))) +
  geom_point() +
  scale_colour_manual(values = c("0" = "black", "1" = "red")) + 
  labs(x = "Expected value", y = "Observed z scores", color = "Simulated allele flipping") + 
  theme_bw()

ggplot(condz$conditional_dist, aes(x = condmean, y = z, col = factor(detected))) +
  geom_point() +
  scale_colour_manual(values = c("0" = "black", "1" = "red")) + 
  labs(x = "Expected value", y = "Observed z scores", color = "Detected allele flipping") + 
  theme_bw()


kevinlkx/mapgen documentation built on March 31, 2024, 11:03 p.m.