Nothing
#' @keywords internal
check.input <- function(Y,
rct.lst){
check.Y(Y)
check.rcts(Y,
rct.lst)
}
#' @keywords internal
check.Y <- function(Y){
if(!is.array(Y)){
stop("Y must be a 3-dimensional array or a matrix.\n")
}else{
if(length(dim(Y)) < 2 | length(dim(Y)) > 3){
stop("Y must be a 3-dimensional array or a matrix.\n")
}else{
if(length(dim(Y)) == 2){
if(is.null(rownames(Y))){
stop("Please specify row names in Y as the time-points.\n")
}else{
if(sum(is.na(as.numeric(rownames(Y)))) > 0){
stop("Please specify row names in Y as the time-points.\n")
}
}
if(is.null(colnames(Y))){
stop("Please specify column names in Y as the cell types.\n")
}
Y_arr <- array(Y, dim = c(dim(Y),1))
dimnames(Y_arr) <- list(rownames(Y),
colnames(Y),
"clone_1")
Y <- Y_arr
rm(Y_arr)
}else{
if(is.null(rownames(Y))){
stop("Please specify row names in Y as the time-points.\n")
}else{
if(sum(is.na(as.numeric(rownames(Y)))) > 0){
stop("Please specify row names in Y as the time-points.\n")
}
}
if(is.null(colnames(Y))){
stop("Please specify column names in Y as the cell types.\n")
}
if(is.null(dimnames(Y)[[3]])){
stop("Please specify third-dimension names of Y as the unique barcode/clone identifiers.\n")
}
}
}
}
}
#' @keywords internal
check.rcts <- function(Y,
rct.lst){
pattern <- "[A-Z0-9]{1,}->[A-Z0-9]{0,}[0-1]{0,}"
if(sum(grepl(pattern,
rct.lst,
ignore.case = FALSE)) < length(rct.lst)){
stop(paste("Unvalid reactions provided in 'rct.lst': ",
paste(rct.lst[which(!grepl(pattern,
c(rct.lst),
ignore.case = FALSE))], collapse = ", "), "\n", sep = ""))
}
if(sum(unlist(lapply(str_split(string = rct.lst, pattern = "->"), function(rct){rct[1]==rct[2]}))) > 0){
stop(paste("Unvalid reactions provided in 'rct.lst': ",
paste(rct.lst[which(unlist(lapply(str_split(string = rct.lst, pattern = "->"),
function(rct){rct[1]==rct[2]})))], collapse = ", "), "\n", sep = ""))
}
ct.rct.lst <- setdiff(as.vector(unlist(c(sapply(rct.lst, function(r){
as.vector(unlist(str_split(r, "->")))
}, simplify = "array")))), c("0","1"))
rgts.lst <- as.vector(unlist(lapply(str_split(string = rct.lst, pattern = "->"), function(s){s[1]})))
rgts_prod.lst <- unique(c(ct.rct.lst, rgts.lst))
if(sum(!(colnames(Y) %in% rgts_prod.lst)) > 0){
stop(paste("Observed cell types in 'Y' not present in 'rct.lst': ",
paste(as.vector(unlist(lapply(colnames(Y)[which(!(colnames(Y) %in% rgts_prod.lst))],
function(l){paste("'", l, "'", sep = "")}))), collapse = ", "),
"\nPlease provide compatible (observed) cell types in 'Y' and 'rct.lst'.\n",
sep = ""))
}
}
#' Rescaling a clonal tracking dataset
#'
#' Rescales a clonal tracking dataset based on the sequencing depth.
#' @param Y A 3-dimensional array whose dimensions are the time, the cell type and the clone respectively.
#' @details This function rescales a clonal tracking dataset Y according to the formula
#' \deqn{Y_{ijk} \leftarrow Y_{ijk} \cdot \frac{min_{ij}\sum_cY_{ijc}}{\sum_cY_{ijc}}}{Y_{ijk} <- Y_{ijk}*(min_{ij}sum_cY_{ijc})/(sum_cY_{ijc})}
#' @return A rescaled clonal tracking dataset.
#' @examples
#' get.rescaled(Y_RM[["ZH33"]])
#' @export
get.rescaled <- function(Y){
check.Y(Y)
f <- min(setdiff(c(apply(Y, c(1,2), sum)), 0))/apply(Y, c(1,2), sum)
f[is.infinite(f)] <- 0
Y_res <- sapply(1:dim(Y)[3], function(cl){round(Y[,,cl]*f)}, simplify = "array")
dimnames(Y_res) <- dimnames(Y)
return(Y_res)
}
#' Clonal pie-chart
#'
#' Draw a clonal pie-chart of a random-effects reaction network.
#' @param re.res output list returned by fit.re().
#' @param txt logical (defaults to FALSE). If TRUE, barcode names will be printed on the pies.
#' @param legend logical (defaults to FALSE). If TRUE, the legend of the pie-chart will be printed.
#' @details This function generates a clonal pie-chart given a previously fitted random-effects model.
#' In this representation each clone \eqn{k}{k} is identified with a pie whose slices are
#' lineage-specific and weighted with \eqn{w_k}{w_k}, defined as the difference between the
#' conditional expectations of the random-effects on duplication and death parameters, that is
#' \deqn{w_k = E_{u\vert \Delta Y; \hat{\psi}}[u^k_{\alpha_{lin}}] - E_{u\vert \Delta Y; \hat{\psi}}[u^k_{\delta_{lin}}]}{Eu|y;ψ[α_lin] - Eu|y;ψ[β_lin]},
#' where \eqn{\texttt{lin}}{lin} is a cell lineage.
#' The diameter of the \eqn{k}{k}-th pie is proportional to the euclidean 2-norm of \eqn{w_k}{w_k}.
#' Therefore, the larger the diameter, the more the corresponding clone is expanding
#' into the lineage associated to the largest slice.
#' @return No return value.
#' @examples
#' rcts <- c("A->1", "B->1", "C->1", "D->1",
#' "A->0", "B->0", "C->0", "D->0",
#' "A->B", "A->C", "C->D") ## set of reactions
#' ctps <- head(LETTERS,4)
#' nC <- 3 ## number of clones
#' S <- 10 ## trajectory length
#' tau <- 1 ## for tau-leaping algorithm
#' u_1 <- c(.2, .15, .17, .09*5,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_2 <- c(.2, .15, .17, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_3 <- c(.2, .15, .17*3, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' theta_allcls <- cbind(u_1, u_2, u_3) ## clone-specific parameters
#' rownames(theta_allcls) <- rcts
#' s20 <- 1 ## additional noise
#' Y <- array(data = NA,
#' dim = c(S + 1, length(ctps), nC),
#' dimnames = list(seq(from = 0, to = S*tau, by = tau),
#' ctps,
#' 1:nC)) ## empty array to store simulations
#' Y0 <- c(100,0,0,0) ## initial state
#' names(Y0) <- ctps
#' for (cl in 1:nC) { ## loop over clones
#' Y[,,cl] <- get.sim.tl(Yt = Y0,
#' theta = theta_allcls[,cl],
#' S = S,
#' s2 = s20,
#' tau = tau,
#' rct.lst = rcts,
#' verbose = TRUE)
#' }
#' null.res <- fit.null(Y = Y,
#' rct.lst = rcts,
#' maxit = 0, ## needs to be increased (>=100) for real applications
#' lmm = 0, ## needs to be increased (>=5) for real applications
#' ) ## null model fitting
#'
#' re.res <- fit.re(theta_0 = null.res$fit$par,
#' Y = Y,
#' rct.lst = rcts,
#' maxit = 0, ## needs to be increased (>=100) for real applications
#' lmm = 0, ## needs to be increased (>=5) for real applications
#' maxemit = 1 ## needs to be increased (>= 100) for real applications
#' ) ## random-effects model fitting
#'
#' get.scatterpie(re.res, txt = TRUE)
#' @export
get.scatterpie <- function(re.res, txt = FALSE, legend = FALSE){
V <- re.res$design$V ## net-effect matrix
M <- re.res$design$M ## base-model design matrix
linColMap <- brewer.pal(nrow(V), "Set2") ## color palette
names(linColMap) <- rownames(V)
K <- ncol(re.res$design$M_bdiag)/ncol(re.res$design$M) # dim(Y)[3] ## number of clones
M_bdiag <- re.res$design$M_bdiag ## random-effects design matrix
VEuy_sol <- re.res$fit$VEuy
euy_sol <- VEuy_sol[["euy"]] ## conditional expectaction E[u|y] of the random effects u given the data y
euy_allIS <- matrix(data = euy_sol, nrow = K, byrow = TRUE)
rownames(euy_allIS) <- unique(rownames(M))
colnames(euy_allIS) <- colnames(V)
t(euy_allIS)
d <- as.data.frame(euy_allIS[,1:(2*nrow(V))] %*% t(V[,1:(2*nrow(V))]))
d[d < 0] <- 0
n <- nrow(d)
A <- runif(n, 0, 1)
B <- runif(n, 0, 2*pi)
diameter <- sqrt(sum(sapply(apply(euy_allIS %*% t(V), 1, norm2), function(d){
d[d < 0] <- 0
return(pi*(d/2)^2)
}))/pi)*2
lat <- sqrt(A)*cos(B)*diameter
long <- sqrt(A)*sin(B)*diameter
d[, c("long","lat")] <- cbind(long, lat)
cellTypes <- rownames(V)
n <- nrow(d)
d$region <- factor(1:n)
d$radius <- apply(d[,colnames(d) %in% cellTypes], 1, norm2)
d <- d[, c("long","lat","radius", "region", cellTypes)]
XLIM <- (max(d$lat) + max(d$radius))
YLIM <- (max(d$long) + max(d$radius))
XLGEGEND <- YLEGEND <- 100
# piechart <- (ggplot() + scatterpie::geom_scatterpie(aes(x=long, y=lat, group=region, r=radius),
# data=d,
# cols=cellTypes,
# color="white")
# + scale_fill_manual(values=alpha(linColMap, alpha = .5)) + coord_equal()
# + xlab("") + ylab("")
# + theme(axis.line=element_blank(),
# axis.text.x=element_blank(),
# axis.text.y=element_blank(),
# axis.ticks=element_blank(),
# axis.title.x=element_blank(),
# axis.title.y=element_blank(),
# panel.background=element_blank(),
# panel.border=element_blank(),
# panel.grid.major=element_blank(),
# panel.grid.minor=element_blank(),
# plot.background=element_blank()))
piechart <- eval(parse(text='(ggplot() + geom_scatterpie(aes(x=long, y=lat, group=region, r=radius),
data=d,
cols=cellTypes,
color="white")
+ scale_fill_manual(values=alpha(linColMap, alpha = .5)) + coord_equal()
+ xlab("") + ylab("")
+ theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank()))'), envir = environment())
if(txt){
# piechart <- piechart + geom_text(aes(x=long, y=lat, group=region, label = region, fontface = "bold"),
# data = d, size = 10)
piechart <- piechart + eval(parse(text='geom_text(aes(x=long, y=lat, group=region, label = region, fontface = "bold"),
data = d, size = 10)'), envir = environment())
}
if(legend){
# piechart <- piechart + geom_scatterpie_legend(d$radius,
# x=-XLIM,
# y=-YLIM)
piechart <- piechart + eval(parse(text='geom_scatterpie_legend(d$radius,
x=-XLIM,
y=-YLIM)'), envir = environment())
}
return(piechart)
}
#' Clonal boxplots
#'
#' Draw clonal boxplots of a random-effects reaction network.
#' @param re.res output list returned by fit.re().
#' @details This function generates the boxplots of the conditional expectations
#' \deqn{w_k = E_{u\vert \Delta Y; \hat{\psi}}[u^k_{\alpha_{l}}] - E_{u\vert \Delta Y; \hat{\psi}}[u^k_{\delta_{l}}]}{Eu|y;ψ[α_l] - Eu|y;ψ[δ_l]},
#' computed from the estimated parameters \eqn{\hat{\psi}}{ψ} for the clone-specific net-duplication in each cell lineage l (different colors).
#' The whiskers extend to the data extremes.
#' @return No return value.
#' @examples
#' rcts <- c("A->1", "B->1", "C->1", "D->1",
#' "A->0", "B->0", "C->0", "D->0",
#' "A->B", "A->C", "C->D") ## set of reactions
#' ctps <- head(LETTERS,4)
#' nC <- 3 ## number of clones
#' S <- 10 ## trajectory length
#' tau <- 1 ## for tau-leaping algorithm
#' u_1 <- c(.2, .15, .17, .09*5,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_2 <- c(.2, .15, .17, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_3 <- c(.2, .15, .17*3, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' theta_allcls <- cbind(u_1, u_2, u_3) ## clone-specific parameters
#' rownames(theta_allcls) <- rcts
#' s20 <- 1 ## additional noise
#' Y <- array(data = NA,
#' dim = c(S + 1, length(ctps), nC),
#' dimnames = list(seq(from = 0, to = S*tau, by = tau),
#' ctps,
#' 1:nC)) ## empty array to store simulations
#' Y0 <- c(100,0,0,0) ## initial state
#' names(Y0) <- ctps
#' for (cl in 1:nC) { ## loop over clones
#' Y[,,cl] <- get.sim.tl(Yt = Y0,
#' theta = theta_allcls[,cl],
#' S = S,
#' s2 = s20,
#' tau = tau,
#' rct.lst = rcts,
#' verbose = TRUE)
#' }
#' null.res <- fit.null(Y = Y,
#' rct.lst = rcts,
#' maxit = 0, ## needs to be increased (>=100) for real applications
#' lmm = 0, ## needs to be increased (>=5) for real applications
#' ) ## null model fitting
#'
#' re.res <- fit.re(theta_0 = null.res$fit$par,
#' Y = Y,
#' rct.lst = rcts,
#' maxit = 0, ## needs to be increased (>=100) for real applications
#' lmm = 0, ## needs to be increased (>=5) for real applications
#' maxemit = 1 ## needs to be increased (>= 100) for real applications
#' ) ## random-effects model fitting
#'
#' get.boxplots(re.res)
#' @export
get.boxplots <- function(re.res){
K <- ncol(re.res$design$M_bdiag)/ncol(re.res$design$M)
euy_allIS <- matrix(data = re.res$fit$VEuy$euy, nrow = K, byrow = TRUE)
rownames(euy_allIS) <- unique(rownames(re.res$design$M))
colnames(euy_allIS) <- colnames(re.res$design$V)
linColMap <- brewer.pal(nrow(re.res$design$V), "Set2") ## color palette
names(linColMap) <- rownames(re.res$design$V)
bp <- boxplot(euy_allIS %*% t(re.res$design$V),
range = 0, lwd = 3,
col = linColMap, border = linColMap,
cex.axis = 2, cex.lab = 2,
horizontal = TRUE,
ylab = ""
# ylab = expression('E'['u|y'] (u[alpha]^k) - 'E'['u|y'] (u[delta]^k))
)
return(bp)
}
#' \eqn{\tau}{tau}-leaping simulation algorithm
#'
#' Simulate a trajectory of length S for a stochastic reaction network.
#' @param Yt starting point of the trajectory
#' @param theta vector parameter for the reactions.
#' @param S length of the simulated trajectory.
#' @param s2 noise variance (defaults to 0).
#' @param tau time interval length (defaults to 1).
#' @param rct.lst list of biochemical reactions.
#' @param verbose (defaults to TRUE) Logical value. If TRUE, then information messages
#' on the simulation progress are printed to the console.
#' @details This function allows to simulate a trajectory of a single clone given an
#' initial conditions \eqn{Y_0}{Y_0} for the cell counts, and obeying to a particular cell differentiation network
#' defined by a net-effect (stoichiometric) matrix \eqn{V}{V} and an hazard function \eqn{h()}{h()}.
#' The function allows to consider only three cellular events, such as
#' cell duplication (\eqn{Y_{it} \rightarrow 1}{Y_{it} -> 1}), cell death (\eqn{Y_{it} \rightarrow \emptyset}{Y_{it} -> 0})
#' and cell differentiation (\eqn{Y_{it} \rightarrow Y_{jt}}{Y_{it} -> Y_{jt}}) for a clone-specific time counting process
#' \deqn{Y_t = (Y_{1t},\dots,Y_{Nt})}{Y_t = (Y_{1t},...,Y_{Nt})}
#' observed in $N$ distinct cell lineages.
#' In particular, the cellular events of duplication, death and differentiation are
#' respectively coded with the character labels \eqn{\texttt{"A->1"}}{"A->1"}, \eqn{\texttt{"A->0"}}{"A->0"},
#' and \eqn{\texttt{"A->B"}}{"A->B"}, where \eqn{\texttt{A}}{A} and \eqn{\texttt{B}}{B} are two distinct cell types.
#' The output is a \eqn{3}{3}-dimensional array \eqn{Y}{Y} whose \eqn{ijk}{ijk}-entry \eqn{Y_{ijk}}{Y_{ijk}}
#' is the number of cells of clone \eqn{k}{k} for cell type \eqn{j}{j} collected at time \eqn{i}{i}.
#' More mathematical details can be found in the vignette of this package.
#' @return A \eqn{S \times p}{S by p} dimensional matrix of the simulated trajectory.
#' @examples
#' rcts <- c("A->1", "B->1", "C->1", "D->1",
#' "A->0", "B->0", "C->0", "D->0",
#' "A->B", "A->C", "C->D") ## set of reactions
#' ctps <- head(LETTERS,4)
#' nC <- 3 ## number of clones
#' S <- 10 ## trajectory length
#' tau <- 1 ## for tau-leaping algorithm
#' u_1 <- c(.2, .15, .17, .09*5,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_2 <- c(.2, .15, .17, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_3 <- c(.2, .15, .17*3, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' theta_allcls <- cbind(u_1, u_2, u_3) ## clone-specific parameters
#' rownames(theta_allcls) <- rcts
#' s20 <- 1 ## additional noise
#' Y <- array(data = NA,
#' dim = c(S + 1, length(ctps), nC),
#' dimnames = list(seq(from = 0, to = S*tau, by = tau),
#' ctps,
#' 1:nC)) ## empty array to store simulations
#' Y0 <- c(100,0,0,0) ## initial state
#' names(Y0) <- ctps
#' for (cl in 1:nC) { ## loop over clones
#' Y[,,cl] <- get.sim.tl(Yt = Y0,
#' theta = theta_allcls[,cl],
#' S = S,
#' s2 = s20,
#' tau = tau,
#' rct.lst = rcts,
#' verbose = TRUE)
#' }
#' Y
#' @export
get.sim.tl <- function(Yt, theta, S, s2 = 0, tau = 1, rct.lst, verbose = TRUE){
check.rcts(Y = t(as.matrix(Yt)),
rct.lst = rct.lst)
compile.h(rct.lst = rct.lst, envir = environment())
V <- get.V(rct.lst = rct.lst)
Y0 <- Yt
Ysim <- matrix(data = NA, nrow = S, ncol = nrow(V))
colnames(Ysim) <- names(Yt)
for (s in 1:S) {
if((s %% 100 == 0) & verbose){cat(s, "\n")} ## massage to keep track of the status
th_tr <- tau*theta
# N_tr <- as.numeric(get.h(Yt, th_tr))
N_tr <- as.numeric(eval(parse(text="get.h(x = Yt, theta = th_tr)"), envir = environment()))
Yt <- as.numeric(Yt + V %*% N_tr) ## update the molecule counts
names(Yt) <- head(LETTERS,4)
Ysim[s,] <- Yt ## store the current simulated counts
}
Ysim <- rbind(Y0, Ysim)
Ysim <- Ysim + rnorm(prod(dim(Ysim)), mean = 0, sd = sqrt(s2))
Ysim[Ysim < 0] <- 0
rownames(Ysim) <- seq(from = 0, to = S*tau, by = tau)
return(Ysim)
}
#' Net-effect matrix
#'
#' This function builds the net-effect matrix V.
#' @param rct.lst list of biochemical reactions.
#' A differentiation move from cell type "A" to cell type "B" must be coded as "A->B"
#' Duplication of cell "A" must be coded as "A->1"
#' Death of cell "A" must be coded as "A->0"
#' @return The net-effect matrix V
#' @keywords internal
get.V <- function(rct.lst){
ct.lst <- setdiff(as.vector(unlist(c(sapply(rct.lst, function(r){
as.vector(unlist(str_split(r, "->")))
}, simplify = "array")))), c("0","1"))
V <- matrix(data = 0, nrow = length(ct.lst), ncol = length(rct.lst))
rownames(V) <- ct.lst
colnames(V) <- rct.lst
for (r in rct.lst) {
rgts_prod <- as.vector(unlist(strsplit(r, split = "->", fixed = T)))
V[rgts_prod[1],r] <- -1
if(rgts_prod[2] != "0"){
if(rgts_prod[2] == "1"){
V[rgts_prod[1],r] <- 1
}else{
V[rgts_prod[2],r] <- 2
}
}
}
return(V)
}
#' Generate hazard function
#'
#' This function dynamically builds the function get.h() that will be used to get the hazard vector.
#' @param rct.lst list of biochemical reactions.
#' A differentiation move from cell type "A" to cell type "B" must be coded as "A->B"
#' Duplication of cell "A" must be coded as "A->1"
#' Death of cell "A" must be coded as "A->0"
#' @return No return value.
#' @keywords internal
compile.h <- function(rct.lst, envir){
constr.lst <- c()
get.h.string <- paste("get.h <- function(x, theta){
h <- c(",
paste(sapply(rct.lst, function(r){
rgts <- as.vector(unlist(strsplit(r, split = "->", fixed = T)))[1]
prds <- as.vector(unlist(strsplit(r, split = "->", fixed = T)))[2]
if(prds == "0"){
hx <- paste("x['", rgts, "']^2*theta['", r, "']", sep = "")
}else{
hx <- paste("x['", rgts, "']*theta['", r, "']", sep = "")
}
}, simplify = "array"), collapse = ",\n"),
")
return(h)
}", sep = "")
for (constr in constr.lst) {
constr <- as.vector(unlist(strsplit(constr, split = "=", fixed = TRUE)))
pattern <- constr[1]
replacement <- constr[2]
get.h.string <- gsub(pattern = pattern,
replacement = replacement,
x = get.h.string)
}
eval(parse(text=get.h.string), envir = envir)
}
#' Random generator from a Multivariate Normal distribution
#'
#' This function generate a random vector from a multivariate normal distribution.
#' @param d sample d.
#' @param p vector dimension.
#' @param Mu mean vector.
#' @param Sigma covariance matrix.
#' @return A \eqn{p \times d}{p by d} matrix. Each column is a p-variate random vector from
#' a multivariate normal with mean vector Mu and covariance matrix Sigma.
#' @keywords internal
rmvNorm <- function(d, p, Mu, Sigma){
Z <- matrix(rnorm(d*p), nrow = d, ncol = p)
X <- t(Z%*%chol(Sigma)) + matrix(Mu, ncol = 1)[,rep(1,d)]
return(X)
}
#' Time-adjacent increments of the cell counts
#'
#' This function generates time-adjacent increments from a cell counts matrix y.
#' @param y clone-specific \eqn{t \times p}{t by p} cell count matrix, where t is the number of time-points and p the number of cell types.
#' @return A \eqn{(t-1) \times p}{(t - 1) by p} dimensional vector containing the time-adjacent increments.
#' @keywords internal
get.dx <- function(y){
nCells <- ncol(y)
T<-nrow(y)
compdat<-t(y)
DX <- as.matrix(compdat[,-1]-compdat[,-T])
colnames(DX) <- as.character(apply(DX, 2, function(dx){sum(dx != 0)>0}))
colnames(DX) <- substr(x = colnames(DX), start = 1, stop = 1)
dx <- as.vector(DX)
names(dx) <- rep(colnames(DX), each = nCells)
return(dx)
}
#' Design matrix M
#'
#' This function generates time-adjacent increments from a cell counts matrix y.
#' @param y clone-specific \eqn{t \times p}{t by p} cell count matrix, where t is the number of time-points and p the number of cell types.
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param dT A (t-1)-dimensional vector of the time-increments.
#' @return A \eqn{tp \times K}{t*p by K} dimensional (design) matrix.
#' @keywords internal
get.M <- function(y, V, dT, get.h){
theta <- rep(1, ncol(V))
names(theta) <- colnames(V)
Mt <- lapply(matrix(1:(nrow(y))), FUN = function(t){
# Mt <- Matrix(V%*%diag(get.h(x = y[t,], theta)), sparse = TRUE);
Mt <- Matrix(V%*%diag(eval(parse(text="get.h(x = y[t,], theta = theta)"), envir = environment())), sparse = TRUE);
Mt_01 <- Mt;
Mt_01[Mt_01>0] <- 1;
if(sum(Mt_01 != 0)>0){
rownames(Mt) <- rep("T", nrow(Mt));
}else{
rownames(Mt) <- rep("F", nrow(Mt));
}
return(Mt)
})
M <- Reduce(rbind, Mt)
colnames(M) <- colnames(V)
M <- M*dT
return(M)
}
#' Stochastic covariance matrix W
#'
#' This function returns the stochastic covariance matrix W associated to a design matrix M, a net-effect matrix V, and a vector parameter theta.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param theta p-dimensional vector parameter.
#' @return A \eqn{n \times n}{n by n} dimensional stochastic covariance matrix.
#' @keywords internal
get.W <- function(M, V, VCNs, theta, nObs){
p <- ncol(M)
beta <- theta[1:p]
NN <- nrow(M)
nC <- nrow(V)
nLin <- nrow(V)
idx <- cbind(rep(1:nC, times = nC), rep(rep(1:nC, each = nC)))
idx <- idx[rep(1:nrow(idx), NN/nC),]
idx_unit <- matrix(data = rep((0:(NN/nC - 1))*nC, each = prod(dim(cbind(rep(1:nC, times = nC), rep(rep(1:nC, each = nC)))))), nrow = nrow(idx), ncol = 2, byrow = TRUE)
idx <- idx + idx_unit
nObs <- rep(nObs, times = table(rownames(M)))
nObsMat <- matrix(data = rep(nObs, times = nLin), ncol = nLin, byrow = FALSE)
W <- Matrix(data = 0, nrow = nrow(M), ncol = nrow(M), sparse = TRUE)
W[idx] <- t(((M%*%Matrix(diag(beta), sparse = TRUE)%*%t(V))*nObsMat))
diag(W) <- diag(W)*VCNs
rownames(W) <- colnames(W) <- rownames(M)
return(W) ## PLEASE NOTE: dT is already included in M, we don't need "W <- W*dT"
}
#' Negative log-likelihood of the base (null) model
#'
#' Compute the negative log-likelihood of the null model,
#' namely the linear model without rondom effects at all.
#' @param theta p-dimensional vector parameter.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param dW p-dimensional list of the partial derivatives of W w.r.t. theta.
#' @return the value of the negative log-likelihood.
#' @keywords internal
get.nl <- function(theta, M, y, V, VCNs, nObs, dW){
N <- nrow(M)
p <- ncol(M)
b <- head(theta, p)
s2 <- tail(theta, 1)
W <- get.W(M, V, VCNs, theta, nObs)
S <- W
diag(S) <- diag(S) + s2
Si <- chol2inv(chol(S))
nll_temp <- as.numeric(ldet(S) + t(y - M%*%b) %*% Si %*% (y - M%*%b))
gcRes <- gc()
return(nll_temp)
}
#' Gradient of the negative log-likelihood of the base (null) model
#'
#' Compute the gradient of the negative log-likelihood of the null model,
#' namely the linear model without rondom effects at all.
#' @param theta p-dimensional vector parameter.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param dW p-dimensional list of the partial derivatives of W w.r.t. theta.
#' @return p-dimensional vector of the gradient of the negative log-likelihood.
#' @keywords internal
get.gnl <- function(theta, M, y, V, VCNs, nObs, dW){
N <- nrow(M)
p <- ncol(M)
b <- head(theta, p)
s2 <- tail(theta, 1)
W <- get.W(M, V, VCNs, theta, nObs)
S <- W
diag(S) <- diag(S) + s2
Si <- chol2inv(chol(S))
gnq <- rep(NA, p + 1)
gnq[1:p] <- -2*as.numeric(t(y)%*%Si%*%M) + 2*as.numeric(t(M) %*% Si%*%M%*%b)
for (j in 1:p) {
djSi <- -Si%*%dW[[j]]%*%Si
djldetSi <- sum(t(Si)*dW[[j]])
gnq[j] <- gnq[j] + t(y)%*%djSi%*%y - 2*t(y)%*%djSi%*%M%*%b + t(b)%*%t(M)%*%djSi%*%M%*%b + djldetSi #
}
gnq[p+1] <- - t(y)%*%Si%*%Si%*%y + 2*t(b)%*%t(M)%*%Si%*%Si%*%y - t(b)%*%t(M)%*%Si%*%Si%*%M%*%b + tr(Si) #
gcRes <- gc()
return(gnq)
}
#' Fit base model
#'
#' Fit the base (null) model to the given data using a maximum likelihood approach.
#' @param theta_start p-dimensional vector parameter used as initial guess in the inference procedure.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param psiLB p-dimensional vector of lower bound values for theta.
#' @param psiUB p-dimensional vector of upper bound values for theta.
#' @param maxit maximum number of iterations for the optimization step.
#' This argument is passed to optim() function. Details on "maxit" can be found in "optim()" documentation page.
#' @param factr controls the convergence of the "L-BFGS-B" method.
#' Convergence occurs when the reduction in the objective is within this factor of the machine tolerance.
#' Default is 1e7, that is a tolerance of about 1e-8.
#' This argument is passed to optim() function.
#' @param pgtol helps control the convergence of the "L-BFGS-B" method.
#' It is a tolerance on the projected gradient in the current search direction.
#' This defaults to zero, when the check is suppressed.
#' This argument is passed to optim() function.
#' @param lmm is an integer giving the number of BFGS updates retained in the "L-BFGS-B" method, It defaults to 5.
#' This argument is passed to optim() function.
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @param trace Non-negative integer. If positive, tracing information on the progress of the optimization is produced.
#' This parameter is also passed to the optim() function.
#' Higher values may produce more tracing information: for method "L-BFGS-B" there are six levels of tracing.
#' (To understand exactly what these do see the source code: higher levels give more detail.)
#' @return A 3-length list. First element is the output returned by optim() function (see optim() documentation for details).
#' Second element is a vector of statistics associated to the fitted null model:
#' \tabular{ll}{
#' \code{nPar} \tab number of parameters of the base(null) model \cr
#' \tab \cr
#' \code{cll} \tab value of the conditional log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{mll} \tab value of the marginal log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{cAIC} \tab conditional Akaike Information Criterion (cAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{mAIC} \tab marginal Akaike Information Criterion (mAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{Chi2} \tab value of the \eqn{\chi^2}{Chi-squared} statistic \eqn{(y - M\theta)'S^{-1}(y - M\theta)}{(y - Mθ)'S^-1(y - Mθ)}. \cr
#' \tab \cr
#' \code{p-value} \tab p-value of the \eqn{\chi^2}{Chi-squared} test for the null hypothesis that Chi2 follows a \eqn{\chi^2}{Chi-squared} distribution with n - nPar degrees of freedom. \cr
#' }
#' The third element, called design, is a list including:
#' \tabular{ll}{
#' \code{M} \tab A \eqn{n \times K}{n by K} dimensional (design) matrix. \cr
#' \tab \cr
#' \code{V} \tab A \eqn{p \times K}{p by K} dimensional net-effect matrix. \cr
#' }
#' @keywords internal
nullModelFitting <- function(theta_start,
M,
y,
V,
psiLB,
psiUB,
maxit,
factr,
pgtol,
lmm,
VCNs,
nObs,
trace = TRUE){
p <- ncol(M)
N <- nrow(M)
dW <- apply(diag(1, nrow = p, ncol = p), 2, function(j){get.W(M, V, VCNs, j, nObs)})
res <- list();
res_null <- try(optim(par = theta_start,
fn = get.nl,
gr = get.gnl,
M = M, y = y, V = V, VCNs = VCNs, nObs = nObs, dW = dW,
method = "L-BFGS-B",
lower = psiLB,
upper = psiUB,
control = list(maxit = maxit,
factr = factr,
pgtol = pgtol,
lmm = lmm,
trace = trace)), silent = TRUE);
res$fit <- res_null
if(!inherits(res_null, "try-error")){
theta_res <- res_null$par
b_res <- head(theta_res, p)
s2_res <- tail(theta_res, 1)
W_res <- get.W(M, V, VCNs, theta_res, nObs)
S_res <- W_res
diag(S_res) <- diag(S_res) + s2_res
Si_res <- chol2inv(chol(S_res))
ll_res <- dmvNorm(x = y, Mu = M %*% b_res, Sigma = S_res, SigmaInv = Si_res) ## marginal distribution p(y)
nll_res <- -ll_res
p_null <- length(theta_res)
AIC_null <- 2*nll_res + 2*p_null ## AIC
AICc_null <- 2*nll_res + 2*(p_null + 1)*N/(N - p_null - 2) ## corrected AIC
BIC_null <- log(N)*p_null + 2*nll_res ## BIC
chi2Stat <- as.numeric(t(y - M%*%b_res)%*%S_res%*%(y - M%*%b_res))
chi2pVal <- 1 - pchisq(q = chi2Stat, df = N - p_null)
res$stats <- c(as.numeric(p_null), as.numeric(ll_res), as.numeric(ll_res), as.numeric(AIC_null), as.numeric(AICc_null), as.numeric(chi2Stat), as.numeric(chi2pVal))
names(res$stats) <- c("nPar","cll","mll","cAIC","mAIC","Chi2","p-value")
}
design <- list()
design$M <- M
design$V <- V
res$design <- design
return(res)
}
#' Fit random-effects model
#'
#' Fit the random-effects model to the given data using an expectation-maximization algorithm.
#' @param theta_0 p-dimensional vector parameter used as initial guess in the inference procedure.
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param M_bdiag A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix.
#' Each j-th block (\eqn{j = 1,\dots,J}{j = 1,..,J}) is a \eqn{n_j \times p}{n_j by p} dimensional design matrix for the j-th clone.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @param maxit maximum number of iterations for the optimization step.
#' This argument is passed to optim() function. Details on "maxit" can be found in "optim()" documentation page.
#' @param maxemit maximum number of iterations for the expectation-maximization algorithm.
#' @param eps relative error for the value x and the objective function f(x) that has to be optimized
#' in the expectation-maximization algorithm.
#' @param thetaLB p-dimensional vector of lower bound values for theta.
#' @param thetaUB p-dimensional vector of upper bound values for theta.
#' @param factr controls the convergence of the "L-BFGS-B" method.
#' Convergence occurs when the reduction in the objective is within this factor of the machine tolerance.
#' Default is 1e7, that is a tolerance of about 1e-8.
#' This argument is passed to optim() function.
#' @param pgtol helps control the convergence of the "L-BFGS-B" method.
#' It is a tolerance on the projected gradient in the current search direction.
#' This defaults to zero, when the check is suppressed.
#' This argument is passed to optim() function.
#' @param lmm is an integer giving the number of BFGS updates retained in the "L-BFGS-B" method, It defaults to 5.
#' This argument is passed to optim() function.
#' @param trace Non-negative integer. If positive, tracing information on the progress of the optimization is produced.
#' This parameter is passed to the optim() function.
#' Higher values may produce more tracing information: for method "L-BFGS-B" there are six levels of tracing.
#' (To understand exactly what these do see the source code: higher levels give more detail.)
#' @param verbose (defaults to TRUE) Logical value. If TRUE, then information messages on the progress of the
#' algorithm are printed to the console.
#' @return The output returned by "optim()" function (see "optim()" documentation for details) along with
#' the conditional expectation \eqn{E[u \vert y]}{E[u|y]} and variance \eqn{V[u \vert y]}{V[u|y]}
#' of the latent states u given the observed states y from the last step of the expectation-maximization algorithm.
#' @keywords internal
rndEffModelFitting <- function(theta_0,
V,
M,
M_bdiag,
y,
VCNs,
nObs,
maxit,
maxemit,
eps = 1e-5,
thetaLB,
thetaUB,
factr,
pgtol,
lmm,
trace = TRUE,
verbose = TRUE){
## for EM:
nIT <- 0 ## number of iterations
deltaQ <- 10^3 ## |Q(theta_new) - Q(theta_curent)|
deltaTheta <- 10^3 ## |theta_new - theta_curent|
p <- ncol(M)
theta_curr <- theta_start <- rep(1, 2*p + 1)
theta_curr[1:p] <- theta_start[1:p] <- head(theta_0, -1)
VEuy_curr <- VEuy(theta_curr, M, M_bdiag, y, V, VCNs, nObs)
euy_curr <- VEuy_curr[["euy"]] ## conditional expectaction \eqn{E[u \vert y]}{E[u|y]} of the random (latent) effects u given the observed data y
vuy_curr <- VEuy_curr[["vuy"]] ## conditional variance \eqn{V[u \vert y]}{V[u|y]} of the random (latent) effects u given the observed data y
dW <- apply(diag(1, nrow = p, ncol = p), 2, function(j){get.W(M, V, VCNs, j, nObs)})
nQ_curr <- nQ(theta_curr, euy_curr = euy_curr, vuy_curr = vuy_curr, M, M_bdiag, y, V, VCNs = VCNs, nObs = nObs, dW = dW)
ngQ_curr <- ngQ(theta_curr, euy_curr = euy_curr, vuy_curr = vuy_curr, M, M_bdiag, y, V, VCNs = VCNs, nObs = nObs, dW = dW)
while (nIT < maxemit & (deltaQ > eps | deltaTheta > eps)) { ## EM-algorithm
# while (nIT < maxIT & deltaQ > eps & deltaTheta > eps) { ## EM-algorithm
if(verbose){cat(paste("EM-alg. It. n. ", nIT, "\terr_Q = ", deltaQ, ";\terr_theta = ", deltaTheta, "...\n", sep = ""))}
res <- try(optim(par = theta_curr,
fn = nQ,
gr = ngQ,
euy_curr = euy_curr, vuy_curr = vuy_curr, M = M, M_bdiag = M_bdiag, y = y, V = V, VCNs = VCNs, nObs = nObs, dW = dW,
method = "L-BFGS-B",
lower = thetaLB,
upper = thetaUB,
control = list(maxit = maxit,
factr = factr,
pgtol = pgtol,
lmm = lmm,
trace = trace)), silent = TRUE);
theta_old <- theta_curr
theta_curr <- res$par
nQ_old <- nQ_curr
VEuy_curr <- VEuy(theta_curr, M, M_bdiag, y, V, VCNs, nObs)
euy_curr <- VEuy_curr[["euy"]] ## conditional expectaction \eqn{E[u \vert y]}{E[u|y]} of the random (latent) effects u given the observed data y
vuy_curr <- VEuy_curr[["vuy"]] ## conditional variance \eqn{V[u \vert y]}{V[u|y]} of the random (latent) effects u given the observed data y
nQ_curr <- nQ(theta_curr, euy_curr = euy_curr, vuy_curr = vuy_curr, M, M_bdiag, y, V, VCNs = VCNs, nObs = nObs, dW = dW)
deltaQ <- relErr(x = nQ_curr, y = nQ_old)
deltaTheta <- relErr(x = theta_old, y = theta_curr)
nIT <- nIT + 1
}
res$VEuy <- VEuy_curr
return(res)
}
#' E-step function Q
#'
#' Negative E-step function -Q of the expectation-maximization algorithm
#' @param theta p-dimensional vector parameter.
#' @param euy_curr current value of the conditional expectation \eqn{E[u \vert y]}{E[u|y]} of u given y,
#' where u and y are the latent and observed states respectively.
#' @param vuy_curr current value of the conditional variance \eqn{V[u \vert y]}{V[u|y]} of u given y,
#' where u and y are the latent and observed states respectively.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param M_bdiag A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix.
#' Each j-th block (\eqn{j = 1,\dots,J}{j = 1,..,J}) is a \eqn{n_j \times p}{n_j by p} dimensional design matrix for the j-th clone.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @param dW p-dimensional list of the partial derivatives of W w.r.t. theta.
#' @return The current value of the negative E-step function -Q.
#' @keywords internal
nQ <- function(theta, euy_curr, vuy_curr, M, M_bdiag, y, V, VCNs, nObs, dW){
p <- ncol(V)
K <- length(unique(rownames(M)))
b <- theta[1:p]
d2 <- theta[(p + 1): (2*p)]
s2 <- tail(theta, 1)
W <- get.W(M, V, VCNs, theta = c(b,s2), nObs)
S <- W
diag(S) <- diag(S) + s2
Si <- chol2inv(chol(S))
D2 <- kronecker(X = Diagonal(n = K, x = 1), Y = Diagonal(n = p, x = d2)) ## covariance matrix of the random effects u, using the free parameters
D2i <- kronecker(X = Diagonal(n = K, x = 1), Y = Diagonal(n = p, x = d2^-1))
bu <- kronecker(X = Matrix(data = 1, nrow = K, ncol = 1, sparse = TRUE), Y = Matrix(data = b, ncol = 1, sparse = TRUE)) ## expected values of the random effects u, using the free parameters
Q <- ( - .5*ldet(A = S)
- .5*( t(y)%*%Si%*%y
-2*t(euy_curr)%*%t(M_bdiag)%*%Si%*%y
+ sum( t(t(M_bdiag)%*%Si) * (M_bdiag%*%vuy_curr))
+ sum((t(M_bdiag)%*%Si%*%M_bdiag%*%euy_curr)*euy_curr) )
+ t(euy_curr)%*%D2i%*%bu
-.5*t(bu)%*%D2i%*%bu
- .5*ldet(A = D2)
- .5*(sum(D2i*vuy_curr) + sum((D2i%*%euy_curr)*(euy_curr))) );
Q <- as.numeric(Q)
gcRes <- gc()
return(-Q)
}
#' Gradient of the E-step function Q
#'
#' Gradient -\eqn{\nabla Q}{\nabla Q} of the negative E-step function -Q of the expectation-maximization algorithm
#' @param theta p-dimensional vector parameter.
#' @param euy_curr current value of the conditional expectation \eqn{E[u \vert y]}{E[u|y]} of u given y,
#' where u and y are the latent and observed states respectively.
#' @param vuy_curr current value of the conditional variance \eqn{V[u \vert y]}{V[u|y]} of u given y,
#' where u and y are the latent and observed states respectively.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param M_bdiag A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix.
#' Each j-th block (\eqn{j = 1,\dots,J}{j = 1,..,J}) is a \eqn{n_j \times p}{n_j by p} dimensional design matrix for the j-th clone.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @param dW p-dimensional list of the partial derivatives of W w.r.t. theta.
#' @return p-dimensional vector of the gradient -\eqn{\nabla Q}{\nabla Q} of the negative E-step function -Q.
#' @keywords internal
ngQ <- function(theta, euy_curr, vuy_curr, M, M_bdiag, y, V, VCNs, nObs, dW){
p <- ncol(V)
K <- length(unique(rownames(M)))
b <- theta[1:p]
d2 <- theta[(p + 1): (2*p)]
s2 <- tail(theta, 1)
W <- get.W(M, V, VCNs, theta = c(b,s2), nObs)
S <- W
diag(S) <- diag(S) + s2
Si <- chol2inv(chol(S))
D2 <- kronecker(X = Diagonal(n = K, x = 1), Y = Diagonal(n = p, x = d2)) ## covariance matrix of the random effects u, using the free parameters
D2i <- kronecker(X = Diagonal(n = K, x = 1), Y = Diagonal(n = p, x = d2^-1)) ## covariance matrix of the random effects u, using the free parameters
bu <- kronecker(X = Matrix(data = 1, nrow = K, ncol = 1, sparse = TRUE), Y = Matrix(data = b, ncol = 1, sparse = TRUE)) ## expected values of the random effects u, using the free parameters
dbu <- dBu(K, p)
dd2 <- dD2(K, p)
ngq <- rep(NA, length(theta)) ## dQ/dtheta
ngq[1:p] <- apply(matrix(data = 1:p, ncol = 1), MARGIN = 1,
FUN = function(j){ as.numeric( -.5*sum(t(Si)*dW[[j]])
-.5*( -t(y)%*%Si%*%dW[[j]]%*%Si%*%y
+ 2*t(euy_curr)%*%t(M_bdiag)%*%Si%*%dW[[j]]%*%Si%*%y
+ sum( t(t(M_bdiag)%*%(-Si%*%dW[[j]]%*%Si))*(M_bdiag%*%vuy_curr) )
+ sum( (t(M_bdiag)%*%(-Si%*%dW[[j]]%*%Si)%*%M_bdiag%*%euy_curr) * euy_curr ))
+ t(euy_curr)%*%D2i%*%dbu[[j]]
-t(bu)%*%D2i%*%dbu[[j]] )}) ## dQ/dbeta
ngq[(p + 1): (2*p)] <- 1*apply(matrix(data = 1:p, ncol = 1), MARGIN = 1,
FUN = function(j){ as.numeric( -.5*sum(solve(D2)*t(dd2[[j]]))
-.5*(-sum( t(solve(D2)%*%dd2[[j]]%*%solve(D2))*vuy_curr )
- sum( (solve(D2)%*%dd2[[j]]%*%solve(D2)%*%euy_curr)*(euy_curr) ))
-t(euy_curr)%*%solve(D2)%*%dd2[[j]]%*%solve(D2)%*%bu
+ .5*t(bu)%*%solve(D2)%*%dd2[[j]]%*%solve(D2)%*%bu )}) ## dQ/dd2
ngq[2*p + 1] <- as.numeric(-.5*tr(Si)
-.5*(-t(y)%*%Si%*%Si%*%y
+ 2*t(euy_curr)%*%t(M_bdiag)%*%Si%*%Si%*%y
+ (-sum( t(t(M_bdiag)%*%Si%*%Si)*(M_bdiag%*%(vuy_curr)) )
- sum( (t(M_bdiag)%*%Si%*%Si%*%M_bdiag%*%euy_curr)*euy_curr ))))
gcRes <- gc()
return(-ngq)
}
#' Gradient of \eqn{E[u \vert \theta]}{E[u|θ]}
#'
#' Gradient of the expected values of the random effects u w.r.t. the free parameters
#' @param K number of clones being analyzed.
#' @param p number of free parameters.
#' @return the p-dimensional gradient of u.
#' @keywords internal
dBu <- function(K, p){
dbu <- apply(diag(1, p, p), 2, function(cc){kronecker(X = Matrix(data = 1, nrow = K, ncol = 1, sparse = TRUE),
Y = Matrix(data = cc, ncol = 1, sparse = TRUE))})
gcRes <- gc()
return(dbu)
}
#' Gradient of \eqn{V[u \vert \theta]}{V[u|θ]}
#'
#' Gradient of the covariance matrix of the random effects u w.r.t. the free parameters
#' @param K number of clones being analyzed.
#' @param p number of free parameters.
#' @return the gradient of the covariance matrix of u.
#' @keywords internal
dD2 <- function(K, p){
dd2 <- apply(diag(1, p, p), 2, function(cc){kronecker(X = Diagonal(n = K, x = 1),
Y = Diagonal(n = p, x = cc))})
gcRes <- gc()
return(dd2)
}
#' \eqn{E[u \vert y]}{E[u|y]} and \eqn{V[u \vert y]}{V[u|y]}
#'
#' Conditional expectation \eqn{E[u \vert y]}{E[u|y]} and variance \eqn{V[u \vert y]}{V[u|y]} of the latent states u given the observed states y
#' @param theta_curr current p-dimensional vector parameter.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param M_bdiag A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix.
#' Each j-th block (\eqn{j = 1,\dots,J}{j = 1,..,J}) is a \eqn{n_j \times p}{n_j by p} dimensional design matrix for the j-th clone.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @return the conditional expectation \eqn{E[u \vert y]}{E[u|y]} and variance \eqn{V[u \vert y]}{V[u|y]} of the latent states u given the observed states y.
#' @keywords internal
VEuy <- function(theta_curr, M, M_bdiag, y, V, VCNs, nObs){
p <- ncol(V)
K <- length(unique(rownames(M)))
beta_curr <- theta_curr[1:p]
d2_curr <- theta_curr[(p + 1): (2*p)]
s2_curr <- tail(theta_curr, 1)
W_curr <- get.W(M, V, VCNs, theta = c(beta_curr,s2_curr), nObs)
S_curr <- W_curr
diag(S_curr) <- diag(S_curr) + s2_curr
Si_curr <- chol2inv(chol(S_curr))
D2_curr <- kronecker(X = Diagonal(n = K, x = 1), Y = Diagonal(n = p, x = d2_curr))
D2i_curr <- kronecker(X = Diagonal(n = K, x = 1), Y = Diagonal(n = p, x = d2_curr^-1))
bu_curr <- kronecker(X = Matrix(data = 1, nrow = K, ncol = 1, sparse = TRUE), Y = Matrix(data = beta_curr, ncol = 1, sparse = TRUE))
vuy <- chol2inv(chol(t(M_bdiag)%*%Si_curr%*%M_bdiag + D2i_curr))
euy <- vuy %*% ( t(M_bdiag)%*%Si_curr%*%y + D2i_curr%*%bu_curr)
euy[euy<0] <- 0
veuy <- list()
veuy$euy <- euy
veuy$vuy <- vuy
gcRes <- gc()
return(veuy)
}
#' Multivariate Normal density function
#'
#' Proability density function of a n-variate normal distribution
#' @param x n-dimensional vector.
#' @param Mu n-dimensional mean vector.
#' @param Sigma n-dimensional covariance matrix.
#' @param SigmaInv n-dimensional precision matrix.
#' @return the probability density of a n-variate normal distribution with mean vector Mu and covariance Sigma evaluated at x.
#' @keywords internal
dmvNorm <- function(x, Mu, Sigma, SigmaInv){
n <- length(Mu)
res <- as.numeric(-n/2*log(2*pi) - .5*ldet(Sigma) - .5*t(x - Mu)%*%SigmaInv%*%(x - Mu))
return(res)
}
#' Matrix trace
#'
#' Trace of a matrix
#' @param M a matrix.
#' @return the trace of M.
#' @keywords internal
tr <- function(M){
sum(diag(M))
}
#' Euclidean 2-norm
#'
#' Compute the euclidean 2-norm of a vector x.
#' @param x a vector.
#' @return the euclidean 2-norm of x.
#' @keywords internal
norm2 <- function(x){
return(sqrt(sum((x)^2)))
}
#' Multivariate relative error
#'
#' Compute the multivariate relative error between two vectors.
#' @param x a vector.
#' @param y a vector.
#' @return the multivariate relative error between x and y.
#' @keywords internal
relErr <- function(x,y){
norm2(x - y)/min(norm2(x), norm2(y))
}
#' Matrix determinant
#'
#' Compute the determinant of a matrix.
#' @param A a matrix
#' @param logarithm logical; if TRUE (default) return the logarithm of the modulus of the determinant.
#' @return the determinant of A.
#' @keywords internal
ldet <- function(A, logarithm = TRUE){
if(logarithm){
detA <- as.numeric(determinant(x = A, logarithm = TRUE)$modulus)
}else{
detA <- as.numeric(determinant(x = A, logarithm = FALSE)$modulus)
}
return(detA)
}
#' Base and random-effects model statistics
#'
#' Main statistics from the fitted base and random-effects models
#' @param theta_null the estimated p-dimensional vector parameter for the base (null) model.
#' @param theta_rndEff the estimated p-dimensional vector parameter for the random-effects model.
#' @param M A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param M_bdiag A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix.
#' Each j-th block (\eqn{j = 1,\dots,J}{j = 1,..,J}) is a \eqn{n_j \times p}{n_j by p} dimensional design matrix for the j-th clone.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @param verbose (defaults to TRUE) Logical value. If TRUE, then information messages on the progress of the
#' algorithm are printed to the console.
#' @return A vector of statistics associated to the fitted base and random-effects models:
#' \tabular{ll}{
#' \code{nPar} \tab number of parameters of the base(null) model \cr
#' \tab \cr
#' \code{cll} \tab value of the conditional log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{mll} \tab value of the marginal log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{cAIC} \tab conditional Akaike Information Criterion (cAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{mAIC} \tab marginal Akaike Information Criterion (mAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{Chi2} \tab value of the \eqn{\chi^2}{Chi-squared} statistic \eqn{(y - M\theta)'S^{-1}(y - M\theta)}{(y - Mθ)'S^-1(y - Mθ)}. \cr
#' \tab \cr
#' \code{p-value} \tab p-value of the \eqn{\chi^2}{Chi-squared} test for the null hypothesis that Chi2 follows a \eqn{\chi^2}{Chi-squared} distribution with n - nPar degrees of freedom. \cr
#' \tab \cr
#' \code{KLdiv} \tab Kullback-Leibler divergence of the random-effects model from the null model. \cr
#' \tab \cr
#' \code{KLdiv/N} \tab Rescaled Kullback-Leibler divergence of the random-effects model from the null model. \cr
#' \tab \cr
#' \code{BhattDist_nullCond} \tab Bhattacharyya distance between the random-effects model and the null model. \cr
#' \tab \cr
#' \code{BhattDist_nullCond/N} \tab Rescaled Bhattacharyya distance between the random-effects model and the null model. \cr
#' }
#' @keywords internal
rndEffModelStats <- function(theta_null,
theta_rndEff,
V,
M,
M_bdiag,
y,
VCNs,
nObs,
verbose = TRUE){
K <- length(unique(rownames(M)))
p <- ncol(M)
N <- nrow(M)
d2 <- (theta_rndEff[(p + 1): (2*p)])
D2 <- kronecker(X = Diagonal(n = K, x = 1), Y = Diagonal(n = p, x = d2)) ## covariance matrix of the random effects u, using the free parameter
q <- ncol(M_bdiag)
theta_res <- theta_rndEff
VEuy_res <- VEuy(theta_res, M, M_bdiag, y, V, VCNs, nObs)
euy_res <- VEuy_res[["euy"]] ## conditional expectaction \eqn{E[u \vert y]}{E[u|y]} of the random (latent) effects u given the observed data y
vuy_res <- VEuy_res[["vuy"]] ## conditional variance \eqn{V[u \vert y]}{V[u|y]} of the random (latent) effects u given the observed data y
thetaRndEffRes <- c(theta_res[1:p], as.numeric(euy_res), tail(theta_res,1))
if(verbose){cat("computing AIC...\n")}
cAIC <- condAIC(X = M, Z = M_bdiag, y = y, theta = thetaRndEffRes, Delta = D2, V, VCNs = VCNs, nObs = nObs, verbose = verbose)
b_re <- theta_res[1:p]
W_re <- get.W(M, V, VCNs, theta = c(thetaRndEffRes[1:p],tail(thetaRndEffRes,1)), nObs)
S_re <- W_re
diag(S_re) <- diag(S_re) + tail(thetaRndEffRes,1)
Si_re <- chol2inv(chol(S_re))
if(verbose){cat("computing Chi2...\n")}
chi2Stat <- as.numeric(t(y - M_bdiag %*% thetaRndEffRes[(p+1):(p+q)])%*%Si_re%*%(y - M_bdiag %*% thetaRndEffRes[(p+1):(p+q)]))
chi2pVal <- 1 - pchisq(q = chi2Stat, df = N - (cAIC$rho + 1))
b_null <- head(theta_null,p)
s2_null <- tail(theta_null,1)
muNull <- M%*%b_null
W_null <- get.W(M, V, VCNs, theta = c(b_null, s2_null), nObs)
S_null <- W_null
diag(S_null) <- diag(S_null) + s2_null
Si_null <- chol2inv(chol(S_null))
mu_condModel <- M%*%b_re + M_bdiag %*% thetaRndEffRes[(p+1):(p+q)]
if(verbose){cat("computing KLdiv...\n")}
klDiv_nullCond <- KLDiv(mu1 = muNull, S1 = S_null, mu2 = mu_condModel, S2 = S_re)
if(verbose){cat("computing BhattDist...\n")}
BhattDist_nullCond <- BhattDist(mu1 = muNull, S1 = S_null, mu2 = mu_condModel, S2 = S_re)
stats <- c(cAIC$rho + 1, NA, NA, NA, cAIC$cAIC, chi2Stat, chi2pVal, klDiv_nullCond, klDiv_nullCond/N, BhattDist_nullCond, BhattDist_nullCond/N)
names(stats) <- c("nPar","cll","mll","cAIC","mAIC","Chi2","p-value","KLdiv","KLdiv/N", "BhattDist_nullCond", "BhattDist_nullCond/N")
return(stats)
}
#' Conditional AIC (cAIC)
#'
#' Conditional AIC (cAIC) of the conditional log-likelihood \eqn{l(y \vert u)}{l(y|u)} of y given the random effects u
#' @param X A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' @param Z A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix.
#' Each j-th block (\eqn{j = 1,\dots,J}{j = 1,..,J}) is a \eqn{n_j \times p}{n_j by p} dimensional design matrix for the j-th clone.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param theta p-dimensional vector parameter.
#' @param Delta covariance matrix of the random effects u
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @param verbose (defaults to TRUE) Logical value. If TRUE, then information messages on the progress of
#' the algorithm are printed to the console.
#' @return Conditional AIC (cAIC) of the conditional log-likelihood \eqn{l(y \vert u)}{l(y|u)} of y given the random effects u.
#' @keywords internal
condAIC <- function(X, Z, y, theta, Delta, V, VCNs, nObs, verbose = TRUE){
p <- ncol(X)
q <- ncol(Z)
b <- theta[1:p]
s2 <- tail(theta, 1)
W <- get.W(X, V, VCNs, theta = c(b,s2), nObs)
S <- W
diag(S) <- diag(S) + s2
S <- (S + t(S))/2
Si <- chol2inv(chol(S))
n <- nrow(X)
D_fit <- Delta
D_fiti <- D_fit
diag(D_fiti) <- diag(D_fiti)^-1
if(verbose){cat("ny_u...\n")}
n2ll <- 2*(ny_u(theta, Z, X, y, V, VCNs, nObs = nObs)-n/2*log(2*pi))
if(verbose){cat("computing A and B...\n")}
A11 <- B11 <- t(X)%*%Si%*%X
A12 <- B12 <- ((t(X)%*%Si)%*%Z)
A21 <- B21 <- ((t(Z)%*%Si)%*%X)
B22 <- (t(Z)%*%Si)%*%Z
A22 <- B22 + D_fiti
if(verbose){cat("computing A^-1...\n")}
A22 <- (A22 + t(A22))/2
A22inv <- chol2inv(chol(A22))
A22inv_X_A21 <- A22inv%*%A21
Ei <- A11 - A12%*%(A22inv_X_A21)
Ei <- (Ei + t(Ei))/2
E <- solve(Ei)
A11i <- E
A12i <- -E%*%(A12%*%A22inv)
A21i <- -A22inv%*%(A21%*%E)
if(verbose){cat("computing rho...\n")}
rho <- sum(A11i*B11) + 2*sum(t(A12i)*B21)
rho <- rho + sum(A22inv*B22) - sum(t(A22inv_X_A21)*(A12i%*%B22))
cAIC <- n2ll + 2*(rho + 1)
AICres <- list()
AICres$cAIC <- cAIC
AICres$rho <- rho
return(AICres)
}
#' Conditional negative log-likelihood
#'
#' Conditional negative log-likelihood \eqn{-l(y \vert u)}{-l(y|u)} of y given the random effects u
#' @param theta p-dimensional vector parameter.
#' @param Z A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix.
#' @param X A \eqn{n \times K}{n by K} dimensional (design) matrix.
#' Each j-th block (\eqn{j = 1,\dots,J}{j = 1,..,J}) is a \eqn{n_j \times p}{n_j by p} dimensional design matrix for the j-th clone.
#' @param y n-dimensional vector of the time-adjacent cellular increments
#' @param V A \eqn{p \times K}{p by K} dimensional net-effect matrix.
#' @param VCNs A n-dimensional vector including values of the vector copy number corresponding to the cell counts of y.
#' @param nObs A K-dimensional vector including the frequencies of each clone k (\eqn{k = 1,\dots,K}{k = 1,..,K}).
#' @return Conditional negative log-likelihood \eqn{-l(y \vert u)}{-l(y|u)} of y given the random effects u.
#' @keywords internal
ny_u <- function(theta, Z, X, y, V, VCNs, nObs){
p <- ncol(V)
q <- ncol(Z)
b <- theta[1:p]
u <- theta[(p+1):(p+q)]
s2 <- tail(theta, 1)
W <- get.W(X, V, VCNs, theta = c(b,s2), nObs)
S <- W
diag(S) <- diag(S) + s2
Si <- chol2inv(chol(S))
n <- nrow(Z)
nll <- as.numeric(-.5*ldet(S) -.5*t(y - Z%*%u)%*%Si%*%(y - Z%*%u))
return(-nll)
}
#' Kullback-Leibler divergence
#'
#' Kullback-Leibler divergence \eqn{KL(P \Vert Q)}{KL(P || Q)} of Q from P for multivariate normal distributions
#' @param mu1 mean vector of P.
#' @param S1 covariance matrix of P.
#' @param mu2 mean vector of Q.
#' @param S2 covariance matrix of Q.
#' @return Kullback-Leibler divergence \eqn{KL(P \Vert Q)}{KL(P || Q)} of Q from P.
#' @keywords internal
KLDiv <- function(mu1, S1, mu2, S2){
d <- length(mu1)
klDiv <- as.numeric(.5*(ldet(S2) - ldet(S1) - d + tr(solve(S2)*S1) + t(mu2 - mu1)%*%(solve(S2)%*%(mu2 - mu1))))
return(klDiv)
}
#' Bhattacharyya distance
#'
#' Bhattacharyya distance between P and Q for multivariate normal distributions
#' @param mu1 mean vector of P.
#' @param S1 covariance matrix of P.
#' @param mu2 mean vector of Q.
#' @param S2 covariance matrix of Q.
#' @return Bhattacharyya distance between P and Q.
#' @keywords internal
BhattDist <- function(mu1, S1, mu2, S2){
d <- length(mu1)
S <- (S1 + S2)/2
res <- as.numeric(1/8*t(mu1 - mu2)%*%S%*%(mu1 - mu2) +.5*ldet(S) -1/4*ldet(S1) -1/4*ldet(S2))
return(res)
}
#' Fit the base (null) model
#'
#' This function builds the design matrix of the null model and returns the fitted values and the corresponding statistics.
#' @param Y A 3-dimensional array whose dimensions are the time, the cell type and the clone respectively.
#' @param rct.lst list of biochemical reactions.
#' A differentiation move from cell type "A" to cell type "B" must be coded as "A->B"
#' Duplication of cell "A" must be coded as "A->1"
#' Death of cell "A" must be coded as "A->0"
#' @param maxit maximum number of iterations for the optimization step.
#' This argument is passed to optim() function. Details on "maxit" can be found in "optim()" documentation page.
#' @param factr controls the convergence of the "L-BFGS-B" method.
#' Convergence occurs when the reduction in the objective is within this factor of the machine tolerance.
#' Default is 1e7, that is a tolerance of about 1e-8.
#' This argument is passed to optim() function.
#' @param pgtol helps control the convergence of the "L-BFGS-B" method.
#' It is a tolerance on the projected gradient in the current search direction.
#' This defaults to zero, when the check is suppressed.
#' This argument is passed to optim() function.
#' @param lmm is an integer giving the number of BFGS updates retained in the "L-BFGS-B" method, It defaults to 5.
#' This argument is passed to optim() function.
#' @param trace Non-negative integer. If positive, tracing information on the progress of the optimization is produced.
#' This parameter is also passed to the optim() function.
#' Higher values may produce more tracing information: for method "L-BFGS-B" there are six levels of tracing.
#' (To understand exactly what these do see the source code: higher levels give more detail.)
#' @param verbose (defaults to TRUE) Logical value. If TRUE, then information messages on the progress of
#' the algorithm are printed to the console.
#' @return A 3-length list. First element is the output returned by "optim()" function (see "optim()" documentation for details).
#' Second element is a vector of statistics associated to the fitted null model:
#' \tabular{ll}{
#' \code{nPar} \tab number of parameters of the base(null) model \cr
#' \tab \cr
#' \code{cll} \tab value of the conditional log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{mll} \tab value of the marginal log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{cAIC} \tab conditional Akaike Information Criterion (cAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{mAIC} \tab marginal Akaike Information Criterion (mAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{Chi2} \tab value of the \eqn{\chi^2}{Chi-squared} statistic \eqn{(y - M\theta)'S^{-1}(y - M\theta)}{(y - Mθ)'S^-1(y - Mθ)}. \cr
#' \tab \cr
#' \code{p-value} \tab p-value of the \eqn{\chi^2}{Chi-squared} test for the null hypothesis that Chi2 follows a \eqn{\chi^2}{Chi-squared} distribution with n - nPar degrees of freedom. \cr
#' }
#' The third element, called "design", is a list including:
#' \tabular{ll}{
#' \code{M} \tab A \eqn{n \times K}{n by K} dimensional (design) matrix.\cr
#' \tab \cr
#' \code{V} \tab A \eqn{p \times K}{p by K} dimensional net-effect matrix.\cr
#' }
#' @examples
#' rcts <- c("A->1", "B->1", "C->1", "D->1",
#' "A->0", "B->0", "C->0", "D->0",
#' "A->B", "A->C", "C->D") ## set of reactions
#' ctps <- head(LETTERS,4)
#' nC <- 3 ## number of clones
#' S <- 10 ## trajectory length
#' tau <- 1 ## for tau-leaping algorithm
#' u_1 <- c(.2, .15, .17, .09*5,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_2 <- c(.2, .15, .17, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_3 <- c(.2, .15, .17*3, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' theta_allcls <- cbind(u_1, u_2, u_3) ## clone-specific parameters
#' rownames(theta_allcls) <- rcts
#' s20 <- 1 ## additional noise
#' Y <- array(data = NA,
#' dim = c(S + 1, length(ctps), nC),
#' dimnames = list(seq(from = 0, to = S*tau, by = tau),
#' ctps,
#' 1:nC)) ## empty array to store simulations
#' Y0 <- c(100,0,0,0) ## initial state
#' names(Y0) <- ctps
#' for (cl in 1:nC) { ## loop over clones
#' Y[,,cl] <- get.sim.tl(Yt = Y0,
#' theta = theta_allcls[,cl],
#' S = S,
#' s2 = s20,
#' tau = tau,
#' rct.lst = rcts,
#' verbose = TRUE)
#' }
#' null.res <- fit.null(Y = Y,
#' rct.lst = rcts,
#' maxit = 0, ## needs to be increased (>=100) for real applications
#' lmm = 0, ## needs to be increased (>=5) for real applications
#' ) ## null model fitting
##' @export
fit.null <- function(Y,
rct.lst,
maxit = 10000,
factr = 1e7,
pgtol = 1e-8,
lmm = 100,
trace = TRUE,
verbose = TRUE){
check.input(Y = Y,
rct.lst = rct.lst)
compile.h(rct.lst = rct.lst, envir = environment())
V <- get.V(rct.lst = rct.lst)
Y <- Y[,rownames(V),,drop = FALSE]
Y_rbind <- Reduce(rbind, lapply(1:dim(Y)[3], function(cl){return(head(Y[,,cl],-1))}))
dT <- rep(rep(diff(as.numeric(rownames(Y))), each = ncol(Y)), times = dim(Y)[3])
if(verbose){cat("creating design matrix...")}
# M <- get.M(y = Y_rbind, V = V, dT = dT, get.h = get.h)
M <- eval(parse(text="get.M(y = Y_rbind, V = V, dT = dT, get.h = get.h)"), envir = environment())
dx <- unlist(lapply(1:dim(Y)[3], function(cl){return(get.dx(Y[,,cl]))}))
VCNs <- rep(1, length(dx))
clones <- as.vector(unlist(dimnames(Y)[3]))
y <- dx
y_all <- y
M_all <- M
clones_all <- clones <- rep(clones, each = (nrow(Y) - 1)*ncol(Y))
VCNs_all <- VCNs
yfull <- y
Vfull <- V
yM <- cbind(y,M)
yM <- yM[rownames(M) == "T" | names(y) == "T",]
clones <- clones[rownames(M) == "T" | names(y) == "T"]
VCNs <- VCNs[rownames(M) == "T" | names(y) == "T"]
dT <- dT[rownames(M) == "T" | names(y) == "T"]
y <- dx <- yM[,1]
Mfull <- M <- yM[,-1]
rownames(M) <- names(y) <- names(VCNs) <- rownames(yM) <- clones
nrow(M) == length(dx) & length(dx) == length(VCNs) & length(VCNs) == length(dT)
if(verbose){cat(" DONE\n")}
nObs <- apply(apply(Y != 0, 3, rowSums) != 0, 2, sum)/max(apply(apply(Y != 0, 3, rowSums) != 0, 2, sum))
if(verbose){cat(paste("Fitting null model...\n", sep = ""))}
p <- ncol(V)
epsLB <- 1e-7
resNullFullV <- nullModelFitting(theta_start = rep(1, p + 1),
M = M,
y = y,
V = V,
psiLB = rep(epsLB, p + 1),
psiUB = rep(+Inf, p + 1),
maxit = maxit,
factr = factr,
pgtol = pgtol,
lmm = lmm,
VCNs = VCNs,
nObs = nObs,
trace = trace);
if(verbose){cat(" DONE\n")}
return(resNullFullV)
}
#' Fit the random-effects model
#'
#' This function builds the design matrix of the random-effects model and returns the fitted values and the corresponding statistics.
#' @param theta_0 A p-dimensional vector parameter as the initial guess for the inference.
#' @param Y A 3-dimensional array whose dimensions are the time, the cell type and the clone respectively.
#' @param rct.lst list of biochemical reactions.
#' A differentiation move from cell type "A" to cell type "B" must be coded as "A->B"
#' Duplication of cell "A" must be coded as "A->1"
#' Death of cell "A" must be coded as "A->0"
#' @param maxit maximum number of iterations for the optimization step.
#' This argument is passed to optim() function. Details on "maxit" can be found in "optim()" documentation page.
#' @param factr controls the convergence of the "L-BFGS-B" method.
#' Convergence occurs when the reduction in the objective is within this factor of the machine tolerance.
#' Default is 1e7, that is a tolerance of about 1e-8.
#' This argument is passed to optim() function.
#' @param pgtol helps control the convergence of the "L-BFGS-B" method.
#' It is a tolerance on the projected gradient in the current search direction.
#' This defaults to zero, when the check is suppressed.
#' This argument is passed to optim() function.
#' @param lmm is an integer giving the number of BFGS updates retained in the "L-BFGS-B" method, It defaults to 5.
#' This argument is passed to optim() function.
#' @param maxemit maximum number of iterations for the expectation-maximization algorithm.
#' @param eps relative error for the value x and the objective function f(x) that has to be optimized
#' in the expectation-maximization algorithm.
#' @param trace Non-negative integer. If positive, tracing information on the progress of the optimization is produced.
#' This parameter is also passed to the optim() function.
#' Higher values may produce more tracing information: for method "L-BFGS-B" there are six levels of tracing.
#' (To understand exactly what these do see the source code: higher levels give more detail.)
#' @param verbose (defaults to TRUE) Logical value. If TRUE, then information messages on the progress of the
#' algorithm are printed to the console.
#' @return A 3-length list. First element is the output returned by "optim()" function (see "optim()" documentation for details)
#' along with the conditional expectation \eqn{E[u \vert y]}{E[u|y]} and variance \eqn{V[u \vert y]}{V[u|y]}
#' of the latent states u given the observed states y from the last step of the expectation-maximization algorithm.
#' Second element is a vector of statistics associated to the fitted random-effects model:
#' \tabular{ll}{
#' \code{nPar} \tab number of parameters of the base(null) model \cr
#' \tab \cr
#' \code{cll} \tab value of the conditional log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{mll} \tab value of the marginal log-likelihood, in this case just the log-likelihood \cr
#' \tab \cr
#' \code{cAIC} \tab conditional Akaike Information Criterion (cAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{mAIC} \tab marginal Akaike Information Criterion (mAIC), in this case simply the AIC. \cr
#' \tab \cr
#' \code{Chi2} \tab value of the \eqn{\chi^2}{Chi-squared} statistic \eqn{(y - M\theta)'S^{-1}(y - M\theta)}{(y - Mθ)'S^-1(y - Mθ)}. \cr
#' \tab \cr
#' \code{p-value} \tab p-value of the \eqn{\chi^2}{Chi-squared} test for the null hypothesis that Chi2 follows a \eqn{\chi^2}{Chi-squared} distribution with n - nPar degrees of freedom. \cr
#' \tab \cr
#' \code{KLdiv} \tab Kullback-Leibler divergence of the random-effects model from the null model. \cr
#' \tab \cr
#' \code{KLdiv/N} \tab Rescaled Kullback-Leibler divergence of the random-effects model from the null model. \cr
#' \tab \cr
#' \code{BhattDist_nullCond} \tab Bhattacharyya distance between the random-effects model and the null model. \cr
#' \tab \cr
#' \code{BhattDist_nullCond/N} \tab Rescaled Bhattacharyya distance between the random-effects model and the null model. \cr
#' }
#' The third element, called "design", is a list including:
#' \tabular{ll}{
#' \code{M} \tab A \eqn{n \times K}{n by K} dimensional (design) matrix. \cr
#' \tab \cr
#' \code{M_bdiag} \tab A\eqn{n \times Jp}{n by J*p} dimensional block-diagonal design matrix. \cr
#' \tab \cr
#' \code{V} \tab A \eqn{p \times K}{p by K} dimensional net-effect matrix. \cr
#' }
#' @examples
#' rcts <- c("A->1", "B->1", "C->1", "D->1",
#' "A->0", "B->0", "C->0", "D->0",
#' "A->B", "A->C", "C->D") ## set of reactions
#' ctps <- head(LETTERS,4)
#' nC <- 3 ## number of clones
#' S <- 10 ## trajectory length
#' tau <- 1 ## for tau-leaping algorithm
#' u_1 <- c(.2, .15, .17, .09*5,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_2 <- c(.2, .15, .17, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' u_3 <- c(.2, .15, .17*3, .09,
#' .001, .007, .004, .002,
#' .13, .15, .08)
#' theta_allcls <- cbind(u_1, u_2, u_3) ## clone-specific parameters
#' rownames(theta_allcls) <- rcts
#' s20 <- 1 ## additional noise
#' Y <- array(data = NA,
#' dim = c(S + 1, length(ctps), nC),
#' dimnames = list(seq(from = 0, to = S*tau, by = tau),
#' ctps,
#' 1:nC)) ## empty array to store simulations
#' Y0 <- c(100,0,0,0) ## initial state
#' names(Y0) <- ctps
#' for (cl in 1:nC) { ## loop over clones
#' Y[,,cl] <- get.sim.tl(Yt = Y0,
#' theta = theta_allcls[,cl],
#' S = S,
#' s2 = s20,
#' tau = tau,
#' rct.lst = rcts,
#' verbose = TRUE)
#' }
#' null.res <- fit.null(Y = Y,
#' rct.lst = rcts,
#' maxit = 0, ## needs to be increased (>=100) for real applications
#' lmm = 0, ## needs to be increased (>=5) for real applications
#' ) ## null model fitting
#'
#' re.res <- fit.re(theta_0 = null.res$fit$par,
#' Y = Y,
#' rct.lst = rcts,
#' maxit = 0, ## needs to be increased (>=100) for real applications
#' lmm = 0, ## needs to be increased (>=5) for real applications
#' maxemit = 1 ## needs to be increased (>= 100) for real applications
#' ) ## random-effects model fitting
##' @export
fit.re <- function(theta_0,
Y,
rct.lst,
maxit = 10000,
factr = 1e7,
pgtol = 1e-8,
lmm = 100,
maxemit = 100,
eps = 1e-5,
trace = TRUE,
verbose = TRUE){
check.input(Y = Y,
rct.lst = rct.lst)
compile.h(rct.lst = rct.lst, envir = environment())
V <- get.V(rct.lst = rct.lst)
Y <- Y[,rownames(V),,drop = FALSE]
Y_rbind <- Reduce(rbind, lapply(1:dim(Y)[3], function(cl){return(head(Y[,,cl],-1))}))
dT <- rep(rep(diff(as.numeric(rownames(Y))), each = ncol(Y)), times = dim(Y)[3])
if(verbose){cat("creating design matrix...")}
# M <- get.M(y = Y_rbind, V = V, dT = dT, get.h = get.h)
M <- eval(parse(text="get.M(y = Y_rbind, V = V, dT = dT, get.h = get.h)"), envir = environment())
dx <- unlist(lapply(1:dim(Y)[3], function(cl){return(get.dx(Y[,,cl]))}))
VCNs <- rep(1, length(dx))
clones <- as.vector(unlist(dimnames(Y)[3]))
y <- dx
y_all <- y
M_all <- M
clones_all <- clones <- rep(clones, each = (nrow(Y) - 1)*ncol(Y))
VCNs_all <- VCNs
yfull <- y
Vfull <- V
yM <- cbind(y,M)
yM <- yM[rownames(M) == "T" | names(y) == "T",]
clones <- clones[rownames(M) == "T" | names(y) == "T"]
VCNs <- VCNs[rownames(M) == "T" | names(y) == "T"]
dT <- dT[rownames(M) == "T" | names(y) == "T"]
y <- dx <- yM[,1]
Mfull <- M <- yM[,-1]
rownames(M) <- names(y) <- names(VCNs) <- rownames(yM) <- clones
nrow(M) == length(dx) & length(dx) == length(VCNs) & length(VCNs) == length(dT)
if(verbose){cat(" DONE\n")}
nObs <- apply(apply(Y != 0, 3, rowSums) != 0, 2, sum)/max(apply(apply(Y != 0, 3, rowSums) != 0, 2, sum))
p <- ncol(V)
epsLB <- 1e-7
if(verbose){cat(paste("Fitting RE model with full V...\n", sep = ""))}
p <- ncol(V)
re.fit <- rndEffModelFitting(theta_0 = theta_0,
V = V,
M = M,
M_bdiag = bdiag(lapply(unique(rownames(M)), function(cl){return(M[which(rownames(M) == cl),])})),
y = y,
VCNs = VCNs,
nObs = nObs,
maxit = maxit,
maxemit = maxemit,
eps = eps,
thetaLB = rep(epsLB, 2*p + 1),
thetaUB = rep(+Inf, 2*p + 1),
factr = factr,
pgtol = pgtol,
lmm = lmm,
trace = trace,
verbose = verbose)
if(verbose){cat(" DONE\n")}
if(verbose){cat(paste("Computing statistics...\n", sep = ""))}
re.stats <- rndEffModelStats(theta_null = theta_0,
theta_rndEff = re.fit$par,
V = V,
M = M,
M_bdiag = bdiag(lapply(unique(rownames(M)), function(cl){return(M[which(rownames(M) == cl),])})),
y = y,
VCNs = VCNs,
nObs = nObs,
verbose = verbose)
res <- list()
res$fit <- re.fit
res$stats <- re.stats
design <- list()
design$M <- M
design$M_bdiag <- bdiag(lapply(unique(rownames(M)), function(cl){return(M[which(rownames(M) == cl),])}))
design$V <- V
res$design <- design
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.