This vignette demonstrates the performances of IBSS under SuSiE in genetic fine-mapping.

# install.packages("devtools")
devtools::install_github("statwangz/susie")
library(susie)
library(tidyverse)
library(ggplot2)
library(corrplot)
library(Rmisc)
data(N2finemapping)
attach(N2finemapping)

The loaded dataset contains data $X$ and $y$ and the true regression effect coefficients the data is simulated from.

which(true_coef[ , 1] != 0)

eff_ind <- ggplot(data = NULL, aes(x = 1:dim(X)[2], y = true_coef[ , 1])) +
  geom_point(size = 4) +
  xlab("Index") +
  ylab("Effect") +
  theme(title = element_text(size = 30),
        axis.text = element_text(size = 25))
ggsave("eff_ind.pdf", eff_ind)

There are the 2 effect variables in the first dataset: 337 and 999.

Correlation

# corrplot(cor(X[ , 100:200]), type = "lower", tl.pos = "n", cl.cex = 1.5)
pdf(file = "cor.pdf")
corrplot(cor(X[ , 900:1000]), type = "lower", tl.pos = "n", cl.cex = 1.5)
dev.off()

Preprocess the data

X_cen <- scale(X, center = T, scale = F)
y_cen <- scale(Y[ , 1], center = T, scale = F)

Fit IBSS

# fit_10 <- IBSS(X, Y[ , 1], L = 10, tol = 1e-5)
# fit_10 <- IBSS(X_cen, y_cen, L = 10, tol = 1e-5)
# save(fit_10, file = "fit_10.RData")
load("fit_10.RData")

Visualize

p1 <- PlotPIP(fit_10$PIP, iter = 1) +
  theme(title = element_text(size = 30),
        axis.text = element_text(size = 25))
ggsave("p1.pdf", p1)

p3 <- PlotPIP(fit_10$PIP, iter = 3) +
  theme(title = element_text(size = 30),
        axis.text = element_text(size = 25))
ggsave("p3.pdf", p3)

p5 <- PlotPIP(fit_10$PIP, iter = 5) +
  theme(title = element_text(size = 30),
        axis.text = element_text(size = 25))
ggsave("p5.pdf", p5)

p10 <- PlotPIP(fit_10$PIP, iter = 10) +
  theme(title = element_text(size = 30),
        axis.text = element_text(size = 25))
ggsave("p10.pdf", p10)
p <- list()
for (i in 1:18) {
  pi <- PlotPIP(fit_10$PIP, iter = i) +
    theme(title = element_text(size = 30),
          axis.text = element_text(size = 25))
  p <- c(p, list(pi))
}
# multiplot(p, cols = 3)

After one iteration, IBSS incorrectly identifies a credible set containing the strongest marginal association and no effect variable. After several iterations, IBSS corrects itself one step by one step. After around 8 iterations, the PIPs have converged and the IBSS algorithm has corrected itself.

Credible sets

cs <- list()
for (l in 1:10) {
  cs_l <- CredibleSet(fit_10$ser[[l]]$alpha, rho = 0.95)
  cs <- c(cs, list(cs_l))
}
# cs_prune <- CredibleSetPrune(cs, X_cen)
# save(cs_prune, file = "cs_prune.RData")
load("cs_prune.RData")

Credible sets after selected through corvariance

cs_prune

Note that these two credible sets each contain one different effect variable.

cs_group <- rep(0, dim(X_cen)[2])
cs_group[cs_prune[[1]]] <- 1
cs_group[cs_prune[[2]]] <- 2
cs_tibble <- tibble(SNPs = 1:dim(X_cen)[2], PIP = pull(fit_10$PIP, "iter18"), cs = as.character(cs_group))
p_cs <- ggplot(data = cs_tibble, aes(x = SNPs, y = PIP)) +
  geom_point(aes(colour = factor(cs)), size = 3, shape = 16, show.legend = FALSE) +
  xlab("Variable (SNP)") +
  scale_colour_manual(values = c("grey", "red", "green")) +
  theme(title = element_text(size = 30),
        axis.text = element_text(size = 25))
p_cs
ggsave("p_cs.pdf", p_cs)

The grey points mean non-effect variable, red and green points mean two different credible sets.



statwangz/SuSiE documentation built on Nov. 22, 2022, 5:21 p.m.