knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
Examples how to use GTM and GTM* from paper "Assessing allele-specific expression across multiple tissues from RNA-seq read data"
Import the package
library(asepirinen)
Generate some data
nv <- 40 #number of variants nt <- sample(5:10, replace = TRUE, size = nv, prob = rep(1, 6)) #number of tissues for each variant nreads <- 20 #number of reads per each tissue states <- sample(1:5, size = nv, replace = TRUE, prob = c(1, 0, 0, 1, 0.5)) #true state for each variant theta <- c(0.5, 0.25, 0.01) #non ref allele freq for each group (1=NOASE ,2=MODASE or 3=SNGASE) y.list <- list() true.groups <- list() for(set in 1:nv){ if (states[set] %in% c(1:3)) groups <- rep(states[set], nt[set]) if (states[set] == 4) groups <- c(rep(1, 3), rep(2, nt[set] - 3)) #HET0 if (states[set] == 5) groups <- c(rep(3, 3), rep(2, nt[set] - 3)) #HET1 groups <- sample(groups) true.groups[[set]] <- groups y <- rep(NA, nt[set]) for (i in 1:nt[set]) { y[i] = rbinom(1, size = nreads, prob = theta[groups[i]]) } y <- cbind(nreads - y, y) rownames(y) <- paste("T", 1:nt[set], sep = "") y.list[[set]] <- y } y.list y <- c(0, 5, 1, 2, 4, 1) y <- cbind(20 - y, y) rownames(y) <- paste("T", 1:6, sep = "") y.list[[1]] <- y
Set parameters
pr.beta <- c(2000, 2000, 36, 12, 80, 1) #pr.intv <- c(0.48, 0.52, 0.52, 0.95, 0.95, 1) #truncate to intervals pr.intv <- rep(NA, 6) #do not truncate indp <- FALSE #independent frequencies among tissues in same group? two.sided <- FALSE #two sided frequency distributions? niter <- 100 burnin <- 10 prior.pi <- rep(1, 5) group.distance <- c(1, 1, 0.5)
res.list <- list() #will be results from single variant analyses posteriors <- rep(0, 6) #combined state probabilities from single var analyses for(i in 1:nv) { res.list[[i]] <- gtm( y.list[[i]], pr.beta = pr.beta, pr.intv = pr.intv, niter = niter, burnin = burnin, two.sided = two.sided, independent = indp ) posteriors <- posteriors + as.numeric(res.list[[i]][["state.posteriors"]]) } posteriors <- posteriors / nv res.list[[1]][["state.posteriors"]]
apply hierarchical model:
hm.res <- gtm.star( y.list, pr.beta = pr.beta, pr.intv = pr.intv, pr.pi = prior.pi, niter = niter, burnin = burnin, two.sided = two.sided, independent = indp ) sing.res <- matrix(NA, nrow = 5, ncol = 3) #matrix with 95% intervals for single variant analyses sing.res[,1] <- c(posteriors[1:5]) #,sum(posteriors[4:5])) sing.res[,2] <- sing.res[, 1] - 1.96 * sqrt( sing.res[,1] * (1 - sing.res[,1]) / nv ) sing.res[sing.res[,2] < 0, 2] <- 0 sing.res[,3] <- sing.res[, 1] + 1.96 * sqrt( sing.res[,1] * (1 - sing.res[,1]) / nv ) plot( 0, xlim = c(0, 6), ylim = c(0, 1), col = "white", xlab = "state", ylab = "pi", xaxt = "n" ) axis( 1, at = c(1:5), labels = c("NOASE", "MODASE", "SNGASE", "HET0", "HET1") ) for(i in 1:5) { points(i - 0.2, hm.res$prop.posteriors[i, 1], pch = 19, col = "springgreen") points(i + 0.2, sing.res[i,1], pch = 19, col = "blue") arrows( i - 0.2, hm.res$prop.posteriors[i, 2], i - 0.2, hm.res$prop.posteriors[i, 3], code = 3, angle = 90, length = 0.1 ) arrows( i + 0.2, sing.res[i, 2], i + 0.2, sing.res[i, 3], code = 3, angle = 90, length = 0.1 ) points( i, as.numeric((table(states) / nv)[as.character(i)]), pch = "X", col = "red" ) #plot true values } hm.res$prop.posteriors[,1] #state probabilities from hierarchical model round( apply( matrix(unlist(hm.res$state.posteriors), byrow = TRUE, ncol = 6), 2, mean ), 2 ) #another version from hier model, 6th state=TISSUE SPECIFIC round(posteriors, 2) #single var analyses, 6th state=TISSUE SPECIFIC table(states) / nv #true proportions
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.