knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

GTM Examples

Examples how to use GTM and GTM* from paper "Assessing allele-specific expression across multiple tissues from RNA-seq read data"

Import the package

library(asepirinen)

Generate some data

nv <- 40 #number of variants
nt <- sample(5:10, replace = TRUE, size = nv, prob = rep(1, 6)) #number of tissues for each variant
nreads <- 20 #number of reads per each tissue
states <- sample(1:5, size = nv, replace = TRUE, prob = c(1, 0, 0, 1, 0.5)) #true state for each variant

theta <- c(0.5, 0.25, 0.01) #non ref allele freq for each group (1=NOASE ,2=MODASE or 3=SNGASE)

y.list <- list()
true.groups <- list()
for(set in 1:nv){
  if (states[set] %in% c(1:3)) groups <- rep(states[set], nt[set])
  if (states[set] == 4) groups <- c(rep(1, 3), rep(2, nt[set] - 3)) #HET0
  if (states[set] == 5) groups <- c(rep(3, 3), rep(2, nt[set] - 3)) #HET1
  groups <- sample(groups)
  true.groups[[set]] <- groups
  y <- rep(NA, nt[set])
  for (i in 1:nt[set]) {
    y[i] = rbinom(1, size = nreads, prob = theta[groups[i]])
  }
  y <- cbind(nreads - y, y)
  rownames(y) <- paste("T", 1:nt[set], sep = "")
  y.list[[set]] <- y
}
y.list
y <- c(0, 5, 1, 2, 4, 1)
y <- cbind(20 - y, y)
rownames(y) <- paste("T", 1:6, sep = "")
y.list[[1]] <- y

Set parameters

pr.beta <- c(2000, 2000, 36, 12, 80, 1)
#pr.intv <- c(0.48, 0.52, 0.52, 0.95, 0.95, 1) #truncate to intervals
pr.intv <- rep(NA, 6) #do not truncate
indp <- FALSE #independent frequencies among tissues in same group?
two.sided <- FALSE #two sided frequency distributions?
niter <- 100
burnin <- 10
prior.pi <- rep(1, 5)
group.distance <- c(1, 1, 0.5)
res.list <- list() #will be results from single variant analyses
posteriors <- rep(0, 6) #combined state probabilities from single var analyses
for(i in 1:nv) {
  res.list[[i]] <- gtm(
    y.list[[i]],
    pr.beta = pr.beta,
    pr.intv = pr.intv,
    niter = niter,
    burnin = burnin,
    two.sided = two.sided,
    independent = indp
  )
  posteriors <- posteriors + as.numeric(res.list[[i]][["state.posteriors"]])
}
posteriors <- posteriors / nv
res.list[[1]][["state.posteriors"]]

apply hierarchical model:

hm.res <- gtm.star(
  y.list,
  pr.beta = pr.beta,
  pr.intv = pr.intv,
  pr.pi = prior.pi,
  niter = niter,
  burnin = burnin,
  two.sided = two.sided,
  independent = indp
)

sing.res <- matrix(NA, nrow = 5, ncol = 3) #matrix with 95% intervals for single variant analyses
sing.res[,1] <- c(posteriors[1:5]) #,sum(posteriors[4:5]))
sing.res[,2] <- sing.res[, 1] - 1.96 * sqrt(
  sing.res[,1] * (1 - sing.res[,1]) / nv
)
sing.res[sing.res[,2] < 0, 2] <- 0
sing.res[,3] <- sing.res[, 1] + 1.96 * sqrt(
  sing.res[,1] * (1 - sing.res[,1]) / nv
)

plot(
  0,
  xlim = c(0, 6),
  ylim = c(0, 1),
  col = "white",
  xlab = "state",
  ylab = "pi",
  xaxt = "n"
)
axis(
  1,
  at = c(1:5),
  labels = c("NOASE", "MODASE", "SNGASE", "HET0", "HET1")
)
for(i in 1:5) {
  points(i - 0.2, hm.res$prop.posteriors[i, 1], pch = 19, col = "springgreen")
  points(i + 0.2, sing.res[i,1], pch = 19, col = "blue")
  arrows(
    i - 0.2,
    hm.res$prop.posteriors[i, 2],
    i - 0.2,
    hm.res$prop.posteriors[i, 3],
    code = 3,
    angle = 90,
    length = 0.1
  )
  arrows(
    i + 0.2,
    sing.res[i, 2],
    i + 0.2, sing.res[i, 3],
    code = 3,
    angle = 90,
    length = 0.1
  )
  points(
    i,
    as.numeric((table(states) / nv)[as.character(i)]),
    pch = "X",
    col = "red"
  ) #plot true values
}

hm.res$prop.posteriors[,1] #state probabilities from hierarchical model
round(
  apply(
    matrix(unlist(hm.res$state.posteriors), byrow = TRUE, ncol = 6),
    2,
    mean
  ),
  2
) #another version from hier model, 6th state=TISSUE SPECIFIC
round(posteriors, 2) #single var analyses, 6th state=TISSUE SPECIFIC
table(states) / nv #true proportions


anthony-aylward/asepirinen documentation built on May 13, 2019, 11:29 a.m.