# R/simulate.R In chromstaR: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data

```#' Simulate univariate data
#'
#' Simulate known states, read counts and read coordinates using a univariate Hidden Markov Model with three states ("zero-inflation", "unmodified" and "modified").
#'
#' @param bins A \code{\link[GenomicRanges]{GRanges-class}} object for which reads will be simulated.
#' @param transition A matrix with transition probabilities.
#' @param emission A data.frame with emission distributions (see \code{\link{uniHMM}} entry 'distributions').
#' @return A \code{list} with entries \$bins containing the simulated states and read count, \$reads with simulated read coordinates and \$transition and \$emission.
#' @importFrom stats runif rnbinom aggregate
simulateUnivariate <- function(bins, transition, emission, fragLen=50) {

# Calculate some variables
numstates = ncol(transition)
if (numstates!=3) {
stop("The transition matrix is expected to have 3 columns and 3 rows")
}
numbins = length(bins)

## Make state vector from transition matrix
# Generate sample of random numbers
rsample = stats::runif(numbins,0,1)
# Integrate transition matrix by row and add -1 in front
cumtransition = cbind(rep(-1,numstates), t(apply(transition, 1, cumsum)))
# Generate the state vector by going through each state
ptm <- startTimedMessage("Generating states from transition matrix ...")
states = matrix(rep(NA,numstates*numbins), ncol=numstates)
for (irow in 1:numstates) {
states[,irow] = findInterval(rsample, cumtransition[irow,], rightmost.closed=TRUE)
}
statevec = rep(NA,numbins)
statevec = 1
for (ibin in 2:numbins) {
statevec[ibin] = states[ibin,statevec[ibin-1]]
}
stopTimedMessage(ptm)

## Make reads from state vector and distributions
# Generate the read counts by drawing from distribution
ptm <- startTimedMessage("Generating read counts from emission parameters and states ...")
numbins.in.state = stats::aggregate(rep(1,length(statevec)), list(state=statevec), sum)
if (!is.na(numbins.in.state[2,'x'])) {
}
if (!is.na(numbins.in.state[3,'x'])) {
}
stopTimedMessage(ptm)

## Combine in GRanges
bins.out <- bins
mcols(bins.out) <- NULL
bins.out\$state <- state.labels[statevec]

# Return the output
out = list(bins = bins.out,
transition = transition,
emission = emission
)
return(out)

}

#'
#'
#' @param fragLen Length of the simulated read fragments.

ptm <- startTimedMessage("Generating read coordinates ...")
## Do this for every chromosome
bins.splt <- split(bins, seqnames(bins))
for (chrom in names(bins.splt)) {
bins.chrom <- bins.splt[[chrom]]
if (length(bins.chrom) > 0) {
binsize <- mean(width(bins.chrom))
## For each read, get the start position
rlcounts <- rle(1)
rlcounts\$lengths <- bins.chrom\$counts
rlcounts\$values <- as.numeric(start(bins.chrom))
starts <- inverse.rle(rlcounts)
## For each read, simulate a random offset to the start of the bin
if (length(starts) > 0) {
} else {
}
}
}
stopTimedMessage(ptm)

}

#' Simulate multivariate data
#'
#' Simulate known states, read counts and read coordinates using a multivariate Hidden Markov Model.
#'
#' @param bins A \code{\link[GenomicRanges]{GRanges-class}} object for which reads will be simulated.
#' @param transition A matrix with transition probabilities.
#' @param emissions A list() with data.frames with emission distributions (see \code{\link{uniHMM}} entry 'distributions').
#' @param weights A list() with weights for the three univariate states.
#' @param correlationMatrices A list with correlation matrices.
#' @param combstates A vector with combinatorial states.
#' @param IDs A character vector with IDs.
#' @return A \code{list()} with entries \$bins containing the simulated states and read count, \$reads with simulated read coordinates.
#' @importFrom mvtnorm rmvnorm
# #' @examples
# #'#'### Get an example multiHMM ###
# #'file <- system.file("data","multivariate_mode-combinatorial_condition-SHR.RData",
# #'                     package="chromstaR")
# #'sim <- chromstaR:::simulateMultivariate(bins=model\$bins,
# #'                     transition=model\$transitionProbs,
# #'                     emission=model\$distributions,
# #'                     weights=model\$weights.univariate,
# #'                     correlationMatrices=model\$correlation.matrix,
# #'                     combstates=names(model\$mapping),
# #'                     IDs=model\$info\$ID)
simulateMultivariate = function(bins, transition, emissions, weights, correlationMatrices, combstates, IDs, fragLen=50) {

# Calculate some variables
numstates <- ncol(transition)
numbins <- length(bins)
numtracks <- length(emissions)

## Make state vector from transition matrix
# Generate sample of random numbers
rsample <- stats::runif(numbins,0,1)
# Integrate transition matrix by row and add -1 in front
cumtransition <- cbind(rep(-1,numstates), t(apply(transition, 1, cumsum)))
# Generate the state vector by going through each state
ptm <- startTimedMessage("Generating states from transition matrix...")
states <- matrix(NA, ncol=numstates, nrow=numbins)
for (irow in 1:numstates) {
states[,irow] <- findInterval(rsample, cumtransition[irow,], rightmost.closed=TRUE)
}
statevec <- rep(NA,numbins)
statevec <- 1
for (ibin in 2:numbins) {
statevec[ibin] <- states[ibin,statevec[ibin-1]]
}
# Replace the states by combinatorial states
statevec <- combstates[statevec]
stopTimedMessage(ptm)

## Make reads from state vector and emission distributions
counts <- matrix(rep(NA, numtracks*numbins), ncol=numtracks)
colnames(counts) <- IDs

sizes <- unlist(lapply(emissions, "[", 2:3, 'size'))
probs <- unlist(lapply(emissions, "[", 2:3, 'prob'))
ws1 <- unlist(lapply(weights,"[",1))
ws2 <- unlist(lapply(weights,"[",2))
ws3 <- unlist(lapply(weights,"[",3))
ws <- ws1 / (ws2+ws1)
for (istate in combstates) {
message("Generating counts for state ",istate)
i <- which(combstates==istate)

# Convert istate to binary representation
binary_state <- rev(as.integer(intToBits(istate))[1:numtracks])
binary_state <- dec2bin(istate, colnames=IDs)
binary_stateTF <- unlist(list(c(TRUE,FALSE), c(FALSE,TRUE))[binary_state+1])

# Draw from the multivariate normal
n <- length(which(statevec==istate))
if (n > 0) {
ptm <- startTimedMessage("  Drawing from multivariate normal ...")
z <- matrix(mvtnorm::rmvnorm(n, mean=rep(0,numtracks), sigma=correlationMatrices[,,i]), ncol=numtracks)
stopTimedMessage(ptm)
# Transform to uniform space
ptm <- startTimedMessage("  Transforming to uniform space ...")
u <- matrix(apply(z, 2, stats::pnorm), ncol=numtracks)
stopTimedMessage(ptm)
# Transform to count space using marginals
ptm <- startTimedMessage("  Transforming to count space ...")
isizes <- sizes[binary_stateTF]
iprobs <- probs[binary_stateTF]
for (imod in 1:numtracks) {
if (binary_state[imod] == 0) {
counts[mask,imod] <- qzinbinom(u[,imod], w=ws[imod], size=isizes[imod], prob=iprobs[imod])
} else {
}
}
stopTimedMessage(ptm)
}

}

## Combine in GRanges
bins.out <- bins
mcols(bins.out) <- NULL
bins.out\$counts <- counts
bins.out\$state <- factor(statevec, levels=combstates)

for (icol in 1:ncol(bins.out\$counts)) {
track <- colnames(bins.out\$counts)[icol]
bins.temp <- bins.out
bins.temp\$counts <- bins.out\$counts[,icol]
}

# Return the output
out <- list(bins = bins.out,
emissions = emissions,
transition = transition,
correlationMatrices = correlationMatrices,
combstates = combstates,
weights = weights,
IDs = IDs
)
return(out)

}
```

## Try the chromstaR package in your browser

Any scripts or data that you put into this service are public.

chromstaR documentation built on May 2, 2020, 2:01 a.m.