knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(snpbeadchip)
library(data.table)
library(mclust)
library(ggplot2)

Load data

Can use illuminaio::readIDAT() directly, or use this package's function read_idat_single().

Here is a skeleton:

library(data.table)
library(omni54manifest)

idat <- read_idat_single("/path/to/files/", "001_Red.idat", "002_Grn.idat")
setnames(idat, "Name", "Address_ID")

manifest <- load_manifest() # From omni54manifest
m_idat <- annotate_manifest_with_idat(manifest, idat)
m_idat

You end up with something like the included example dataset (which is just simulated data):

data(idat_ex, package = "snpbeadchip")
data(idat_ex)
colnames(idat_ex)
dim(idat_ex)
head(idat_ex)

Bookkeeping

The function channel_probe_to_alleles() is used to select the right addresses and colour channels to easier get mean signal intensity for allele A and B:

d <- cbind(idat_ex, channel_probe_to_alleles(idat_ex)) 
d$SNP1 <- substr(d$SNP, 2, 2)
d$SNP2 <- substr(d$SNP, 4, 4)
d <- d[, c("RsID", "SNP1", "SNP2", "RefStrand", "Allele_A_sig_Mean", "Allele_B_sig_Mean")]

d_trans <- data.table(x = log(d$Allele_A_sig_Mean + 1), 
                      y = log(d$Allele_B_sig_Mean + 1))

Call alleles

Either train own model like (not easy with this small dataset)

library(mclust)
fit <- mclust::Mclust(d_trans, G = 3, modelNames = "VVV")

Or use the supplied one:

fit <- get_call_model() 

Then call alleles:

pred <- predict(fit, newdata = d_trans)
grp <- apply(pred$z, 1, which.max) 
prb <- apply(pred$z, 1, max)

Convert clusters to alleles:

grp_AB <- cluster_to_AB(grp)
grp_PM <- to_plus_minus(grp_AB, d$SNP1, d$SNP2, d$RefStrand, sep = "")
table(grp_AB)
table(grp_PM)
table(grp_AB, grp_PM)

Want to impose a threshold?

grp_PM_NC <- ifelse(prb < 0.99, "NC", grp_PM)
sum(grp_PM_NC == "NC")
mean(grp_PM_NC == "NC")
table(grp_PM_NC)

Inspect information for certain posterior probability:

d_pred <- d
d_pred$prob <- prb
d_pred$PM <- grp_PM
d_pred[prob < 0.8, ]

idat_ex_pred <- idat_ex
idat_ex_pred$prob <- prb
idat_ex_pred$PM <- grp_PM
idat_ex_pred[prob < 0.8, ]

Visualisation

alpha <- 0.9
elipses <- do.call(rbind, lapply(seq_len(fit$G), function(j) {

    mu <- fit$parameters$mean[, j]
    S <- fit$parameters$variance$sigma[,,j]

    mat <- mixtools::ellipse(mu = mu, sigma = S, 
                               alpha = 1 - alpha, 
                               npoints = 250, 
                               newplot = FALSE,
                               draw = FALSE)
    colnames(mat) <- c("x", "y")
    mat <- as.data.frame(mat)
    mat$grp <- c("BB", "AB", "AA")[j]
    mat
}))
ggplot(d_pred, aes(log(Allele_A_sig_Mean + 1), 
                   log(Allele_B_sig_Mean + 1))) + 
  geom_hex() +
  geom_path(aes(x = x, y = y, group = grp, color = grp), 
            size = 1, alpha = 1, data = elipses) 

Concordance

We imagine we had the whole-genome sequencing result:

set.seed(1)
d_pred$WGS <- d_pred$PM
d_pred$WGS[sample(seq_along(d_pred$WGS), 100)] <- d_pred$WGS[sample(seq_along(d_pred$WGS), 100)]

d_pred_tmp <- as.data.frame(xtabs(~ WGS + PM, d_pred))
ggplot(d_pred_tmp, aes(WGS, PM)) +
  geom_tile(aes(fill = Freq)) + 
  scale_fill_gradient(low = "lightgrey", high = "red", na.value = NA)


mikldk/snpbeadchip documentation built on Sept. 15, 2022, 10:21 a.m.