# R/invisible_utilities.R In lcmix: Layered and chained mixture models

```### invisible_utilities:  invisible utility functions for the lcmix package

# Replace bad numeric values (NA, NaN, Inf) with 0
{
x[is.na(x)] = 0
x[is.nan(x)] = 0
x[is.infinite(x)] = 0

return(x)
}

# Model E-step functions:  given no arguments; return functions of parameters and PDF matrices in the single-data case; or functions of parameters, PDF matrices, vector of data source names, and vectors of possible component values in the multiple-data case; which calculate weights.
.estep_mixmod <- function()
{
function(pdfs, params)
{
lW = as.matrix(t(t(pdfs\$lG) + params\$hidden\$lprob) - pdfs\$lgamma)
# "as.matrix" to deal with the pathological K=1 case
W = exp(lW)

namedList(W)
}
}
.estep_mdmixmod_layered <- function()
{
function(pdfs, params, zvec, kvec, k0vec)
{
## put parameters and PDFs into mathematical notation

lp0 = params\$hidden\$lprob0
lQ = params\$hidden\$lcprob

lG = pdfs\$lG
lalpha = pdfs\$lalpha
lbeta = pdfs\$lbeta
lgamma = pdfs\$lgamma

## now do the actual calculations

lU = t(t(lbeta) + lp0) - lgamma
U = exp(lU)

lV = lapply(zvec, function(z) lapply(k0vec, function(k0)
{
lnumerator = t(t(lG[[z]]) + lQ[[z]][k0,])
ldenominator = lalpha[[z]][,k0]
lU[,k0] + lnumerator - ldenominator
}))
V = rlapply(lV, exp, "matrix")

W = lapply(V, listSum)

## send it back

namedList(U, V, W)
}
}
.estep_mdmixmod_chained <- function()
{
function(pdfs, params, zvec, kvec, k0vec)
{
## put parameters and PDFs into mathematical notation

lp0 = params\$hidden\$lprob0
lP = params\$hidden\$lprobz
lQ = params\$hidden\$lcprob
lR = params\$hidden\$lrprob

lalpha = pdfs\$lalpha
lbeta = pdfs\$lbeta
lgamma = pdfs\$lgamma

## now do the actual calculations

lfy1gx = lbeta[[1]] - lgamma
tlfy1gx = t(lfy1gx)
lV1 = lapply(k0vec, function(k0) t(tlfy1gx + lR[[1]][,k0]))

lV2plus = lapply(2:length(zvec), function(z)
{
term3 = outer(lgamma, lP[[z]], "+") # more efficient to do this here

lapply(kvec[[z-1]], function(kzm1)
{
term1 = t(lalpha[[z-1]][,kzm1] + lbeta[[z]])
term2 = lQ[[z]][kzm1, ]

t(term1 + term2) - term3
})
})

#lV426:  stay away!

lV = c(list(lV1), lV2plus)
names(lV) = zvec
V = rlapply(lV, exp, "matrix")

U = sapply(V[[1]], rowSums)
W = lapply(V, listSum)

## send it back

namedList(U, V, W)
}
}

# Return a fully formed "dvec" vector given a matrix or data frame
.extract_dvec <- function(X)
{
retn = 1:ncol(X)
names(retn) = colnames(X)

return(retn)
}

# Quickly calculates the inverse of double (NOT INTEGER) valued matrix X; does no sanity checking, offers no options, and in general should only be used when you're sure this is what you want.  To iterate over a list, and really speed things up, you can pass in the identity matrix; e.g, if Y is a list of 3x3 matrices, you can write "I3 = diag(3) ; res = lapply(Y, .fast_inv, I3)" which will be considerably faster than just "res = lapply(Y, .fast_inv)".  The fastest way to convert X from integer to double is just to call ".fast_inv(X+0.0)".  The ".list_fast_inv" wrapper is for convenience and speed; applied to a list, it returns a list.  Finally, "*sym()" guarantees that the return value will be symmetric, because inversion can sometimes introduce slight asymmetries; these methods should of course be used only with matrices that are supposed to be symmetric, like covariance matrices.
#*# TO DO:  either find a way to write equally fast code using pure R (no calls to "La_dgesv" or other base package ".Call" functions) or link directly to the relevant LAPACK functions.  Per <http://stats.stackexchange.com/questions/14951/efficient-calculation-of-matrix-inverse-in-r>, "chol2inv(chol(.))" may be much faster than "solve(.)" (and guarantee symmetry of the results?)
#*# .fast_inv <- function(X, ID=diag(nrow(X)))
#*# 	.Call("La_dgesv", X, ID, LC_EPS, PACKAGE="base")
.fast_inv  <- function(X, ID=diag(nrow(X))) solve(X, ID)
.fast_inv_sym <- function(X, ID=diag(nrow(X)))
{
res = .fast_inv(X, ID)
(res + t(res)) / 2.0
}
.list_fast_inv <- function(L)
lapply(L, .fast_inv, ID=diag(nrow(L[[1]])))
.list_fast_inv_sym <- function(L)
lapply(L, .fast_inv_sym, ID=diag(nrow(L[[1]])))

# Quickly calculates the logarithm of the absolute value of the determinant of double (NOT INTEGER) valued matrix X; does no sanity checking, offers no options, and in general should only be used when you're sure this is what you want.  The ".vec_fast_labs_det" wrapper is for convenience; applied to a list, it returns a vector.
#*# TO DO:  either find a way to write equally fast code using pure R (no calls to "det_ge_real" or other base package ".Call" functions) or link directly to the relevant LAPACK functions.
#*# .fast_labs_det <- function(X)
#*# 	.Call("det_ge_real", X, TRUE, PACKAGE="base")\$modulus
.fast_labs_det <- function(X) determinant(X, logarithm=TRUE)\$modulus
.vec_fast_labs_det <- function(L) sapply(L, .fast_labs_det)

# Calculate the initial weight matrix ("W") for data "X" with "K" means, means being chosen appropriately. The matrix that is returned is N-by-K.
.init_weights <- function(X, K, prior=NULL, rowsmask=NULL)
{
X = apply(as.matrix(X), 2, rank)
mu = uniformQuantiles(X, K, decreasing=TRUE)

D = sapply(1:K, function(k) manhattan(X, mu[k,])) # distance matrix
S = 1/(D+LC_EPS) # similarity matrix
if(!is.null(prior)) S = t(t(S) * prior)

retn = S / rowSums(S)
return(retn)
}

# TESTING

.inititer_norm <- function(x, mu)
{
# initialization (partial M- and P-steps)
sigma = sqrt(colMeans(outer(x, mu, "-")^2))
# we do E[X - mu]^2 here to avoid negative variances since the "means" are randomly chosen; below in the M-step, we use the simpler E[X^2] - mu^2 method
G = mapply(function(mk, sk) dnorm(x, mk, sk), mu, sigma)

# E- and M-steps
W = rowDiv(G)
csw = colSums(W)
p = colMeans(W)
mu = drop(x %*% W) / csw
sigma = sqrt(colSums(x^2 * W)/csw - mu^2)

# P- and S-steps
G = mapply(function(mk, sk) dnorm(x, mk, sk), mu, sigma)
gamma = drop(G %*% p)
lgamma = log(gamma)
llik = sum(lgamma)

# prevent inversion
muorder = order(mu, decreasing=TRUE)
W = W[,muorder]
mu = mu[muorder]
p = p[muorder]

# entropy
entr = -2 * sum(W*log(W))

# package and return
namedList(W, mu, p, llik, entr)
}

.inititer_mvnorm <- function(X, mu)
{
# gather useful statistics and munge data
N = nrow(X)
K = nrow(mu)
kvec = 1:K
tX = t(X)

# initialization (partial M- and P-steps)
Sigma = lapply(kvec, function(k) tcrossprod(tX - mu[k,]) / N)
# see note on the initial "sigma" in .inititer_norm()
G = sapply(kvec, function(k) dmvnorm(X, mu[k,], Sigma[[k]]))

# E- and M-steps
W = rowDiv(G)
csw = colSums(W)
p = colMeans(W)
mu = t(tX %*% W) / csw
Sigma = lapply(kvec, function(k)
crossprod(X, W[,k]*X)/csw[k] - tcrossprod(mu[k,]))

# P- and S-steps
G = sapply(kvec, function(k) dmvnorm(X, mu[k,], Sigma[[k]]))
gamma = drop(G %*% p)
lgamma = log(gamma)
llik = sum(lgamma)

# prevent inversion
muorder = order(rowSums(mu), decreasing=TRUE)
W = W[,muorder]
mu = mu[muorder,]
p = p[muorder]

# entropy
entr = -2 * sum(W*log(W))

# package and return
namedList(W, mu, p, llik, entr)
}

.init_weights2 <- function(X, K,  nstart=100, seed=123) # nstart=100, seed=NULL
{
X = drop(as.matrix(X))

multivariate = is.matrix(X)
iterfun <- if(multivariate) .inititer_mvnorm else .inititer_norm
sampfun <- if(multivariate) function()
rowSort(rowSample(X, K), decreasing=TRUE)
else function()
sort(sample(X, K), decreasing=TRUE)

N = ifelse(multivariate, nrow(X), length(X))
if(multivariate) tX = t(X)
if(!is.null(seed)) set.seed(seed)

res = lapply(1:nstart, function(.) iterfun(X, sampfun()))
llik = sapply(res, function(r) r\$llik)
idx = which.max(llik)

retn = res[[idx]]\$W
attr(retn, "mu") = res[[idx]]\$mu
attr(retn, "p") = res[[idx]]\$p
attr(retn, "llik") = res[[idx]]\$llik
return(retn)
}

# /TESTING

# Given weight matrices "A" and "B", with column means a and b, calculate the transition matrix C such that:
# (1) a C = b
# (2) the value sum((A C - B)^2) is minimized
# using .dependentDiscreteCpdf (implying positive dependence).
.least_squares_transition_matrix <- function(A, B)
{
a = colMeans(A)
b = colMeans(B)
toptim <- function(d) sum(((A %*% .dependentDiscreteCpdf(a, b, d)) - B)^2)

.dependentDiscreteCpdf(a, b, optimize(toptim, c(0,1))\$minimum)
}

# Deal with outliers in log-pdf-by-component matrices so extreme their density is effectively 0 for all components; the value -300 is sufficient to prevent instability but not actually change the results in any meaningful way
.lGfilter <- function(lG)
{
if(min(lG) < -300) {
K = ncol(lG)
lG[(rowSums(lG < -300) == K), ] = -300
}
return(lG)
}

# Workhorse function for (unweighted) MLE of covariance; takes a D-by-N matrix, a D-length vector of means, and N, and returns a MLE covariance matrix.
.mleTcov <- function(tX, mu, N) { tXadj = tX - mu ; tcrossprod(tXadj) / N }

# Model M-step functions:  given distribution(s), return functions of data and weights (and data source names in the multiple-data-model case) which calculate parameters.
.mstep_mixmod <- function(distn)
{
mstep.observed <- .mstep_observed(distn)

function(X, weights, theta=NULL, prior=NULL) {

prob = if(is.null(prior)) colMeans(weights\$W) else(prior)
lprob = log(prob)
hidden = namedList(prob, lprob)

observed = mstep.observed(X, weights\$W, theta=theta\$observed)

namedList(hidden, observed)
}
}
.mstep_mdmixmod_layered <- function(distn)
{
mstep.observed <- mapply(.mstep_observed, distn)

function(X, weights, K, kvec, K0, k0vec, zvec, params=NULL, prior=NULL)
{
prob. = colMeans(weights\$U)

prob0 = if(is.null(prior)) prob. else(prior)
lprob0 = log(prob0)
lcprob = lapply(weights\$V, function(Vz)
log(t(sapply(Vz, colMeans))) - log(prob.))
cprob = lapply(lcprob, exp)
hidden = namedList(prob0, cprob, lprob0, lcprob)

observed = lapply(zvec, function(z)
mstep.observed[[z]](X[[z]], weights\$W[[z]],
theta=params\$observed[[z]]))
namedList(hidden, observed)
}
}
.mstep_mdmixmod_chained <- function(distn)
{
mstep.observed <- mapply(.mstep_observed, distn)

function(X, weights, K, kvec, K0, k0vec, zvec, params=NULL, prior=NULL)
{
Z = length(zvec)

tH = lapply(weights\$V, function(V_z) sapply(V_z, colMeans))
ltH = lapply(tH, log)
H = lapply(tH, t)
lH = lapply(ltH, t)

prob. = rowSums(H[[1]])
prob0 = if(is.null(prior)) prob. else prior
lprob0 = log(prob0)
probz = lapply(H, colSums)
lprobz = lapply(probz, log)

lQ1 = lH[[1]] - log(prob.)
lQ2plus = lapply(2:Z, function(z) lH[[z]] - lprobz[[z-1]])
lcprob = c(list(lQ1), lQ2plus)
names(lcprob) = zvec
cprob = lapply(lcprob, exp)

lrprob = lapply(zvec, function(z) ltH[[z]] - lprobz[[z]])
rprob = lapply(lrprob, exp)

hidden = namedList(prob0, probz, cprob, rprob,
lprob0, lprobz, lcprob, lrprob)

observed = lapply(zvec, function(z)
mstep.observed[[z]](X[[z]], weights\$W[[z]],
theta=params\$observed[[z]]))

namedList(hidden, observed)
}
}

# Return a function which performs weighted observed-distribution parameter estimation for the single-data mixture model M-step (can also be used in the multiple-data mixture model if handled appropriately.)
.mstep_observed <- function(distn) get(sprintf(".thetahat_%s", distn))

# Combined E- and M-steps for univariate Pearson Type VII parameter estimation.  Takes data, transposed data, weights, and a list "theta" containing the sum of weights, log of sum of weights, and current parameter values.  Returns updated theta with an additional element:  the value of the objective function.
.mvpvii_naive_emstep <- function(X, tX, w, sw, theta)
{
# E-step

D = ncol(X)
N = nrow(X)
X.prime = t(t(X) - theta\$mean)
iSigma = .fast_inv_sym(theta\$scale)
I = diag(D)

alpha.prime = theta\$shape + 0.5
Psi = lapply(1:N, function(n) tcrossprod(X.prime[n,]))
Lambda.prime = lapply(Psi, function(Psin) I + 0.5*Psin%*%iSigma)

iLambda.prime = .list_fast_inv_sym(Lambda.prime)
T.prime = lapply(iLambda.prime, "*", y=alpha.prime)
mgammaterm = sum(digamma(alpha.prime - (0:(D-1))/2))
labsdetterm = .vec_fast_labs_det(Psi)
t.star = mgammaterm - labsdetterm

wtp = lapply(1:N, function(n) w[n] * T.prime[[n]])
swtp = listSum(wtp)
wtpx = lapply(1:N, function(n) wtp[[n]] %*% X[n,])
swtpx = listSum(wtpx)
wpsitp = lapply(1:N, function(n) w[n] * Psi[[n]] %*% T.prime[[n]])
swpsitp = listSum(wpsitp)
swts = sum(w * t.star)

print(swpsitp)

# M-step

mu = drop(.fast_inv_sym(swtp) %*% swtpx)
Sigma = swpsitp / sw

toptim <- function(alpha) -lmgamma(alpha, D)*sw - alpha*swts
res = nlminb(theta\$shape, toptim, lower=LC_EPS+(D-1)/2)
alpha = res\$par

# send it back
list(mean=mu, scale=Sigma, shape=alpha, obj=res\$objective)
}
.mvpvii_emstep <- function(X, tX, w, sw, theta)
{
# E-step

shape.prime = theta\$shape + 0.5*ncol(X)
tX.prime = tX - theta\$mean
X.prime = t(tX.prime)
delta = rowSums((X.prime %*% .fast_inv_sym(theta\$scale)) * X.prime)
rate.prime = 1 + 0.5*delta

tprime  = shape.prime / rate.prime
tstar   = digamma(shape.prime) - log(rate.prime)

# M-step

wtp = w * tprime
mu = colSums(wtp * X) / sum(wtp)

Sigma = tX.prime %*% (wtp * X.prime) / sw

swts = sum(w * tstar)
toptim <- function(alpha) lgamma(alpha)*sw - alpha*swts
res = nlminb(theta\$shape, toptim, lower=1+LC_EPS)
alpha = res\$par

# send it back
list(mean=mu, scale=Sigma, shape=alpha, obj=res\$objective)
}

# Initial parameter estimation for multivariate Pearson Type VII.  Takes data, weights.  Returns the estimated parameters.
.mvpvii_init_params <- function(X, w)
{
theta.mvnorm = thetahat.mvnorm(X, w)

mean = theta.mvnorm\$mean
shape = 2
scale = theta.mvnorm\$cov * shape

namedList(mean, scale, shape)
}

# Calculate the number of parameters for distribution of observed data in a single-data mixture model
.npar_observed <- function(distn, K, D)
{
K * switch(distn,
norm    = 2,
mvnorm  = D + D*(D+1)/2,
weisd   = 2,
mvweisd = 2*D + D*(D-1)/2,
gamma   = 2,
mvgamma = 2*D + D*(D-1)/2,
exp     = 1,
mvexp   = D + D*(D-1)/2,
pvii    = 3,
mvpvii  = D + D*(D+1)/2 + 1,
stop(sprintf("unknown distribution '%s'", distn))
)
}

# Return a nicely formatted string version of a family name, suitable for printing
.pretty_family <- function(family)
{
switch(family,
weibull = "Weibull",
pvii = "PVII",
family)
}

# Model P-step functions:  given distribution(s), return functions of data, number(s) of components, and parameters (and data source names in the multiple-data-model case) which calculate PDF matrices
.pstep_mixmod <- function(distn)
{
pstep.observed <- .pstep_observed(distn)

function(X, params, kvec)
{
lG = .lGfilter(pstep.observed(X, kvec, params\$observed))
G = exp(lG) # g_n,k = f(x_n | y=k)
gamma = drop(G %*% params\$hidden\$prob) # f(X)
# "drop" so we have an N-length vector rather than a N-by-1 matrix
lgamma = log(gamma)

namedList(G, gamma, lG, lgamma)
}
}
.pstep_mdmixmod_layered <- function(distn)
{
pstep.observed <- mapply(.pstep_observed, distn)

function(X, params, kvec, zvec)
{
lG = lapply(zvec, function(z) .lGfilter(pstep.observed[[z]](
X[[z]], kvec[[z]], params\$observed[[z]])))
G = lapply(lG, exp)

alpha = lapply(zvec, function(z)
G[[z]] %*% t(params\$hidden\$cprob[[z]]))
lalpha = lapply(alpha, log)
lbeta = listSum(lalpha)
beta = exp(lbeta)
gamma = drop(beta %*% params\$hidden\$prob0) # = f(X)
# "drop" so we have an N-length vector rather than a N-by-1 matrix
lgamma = log(gamma)

namedList(G, lG, alpha, beta, gamma, lalpha, lbeta, lgamma)
}
}
.pstep_mdmixmod_chained <- function(distn)
{

pstep.observed <- mapply(.pstep_observed, distn)

function(X, params, kvec, zvec)
{
## initial component-specific PDF calculations

lG = lapply(zvec, function(z) .lGfilter(pstep.observed[[z]](
X[[z]], kvec[[z]], params\$observed[[z]])))

G = lapply(lG, exp)

## put parameters into mathematical notation

P = params\$hidden\$probz
Q = params\$hidden\$cprob
R = params\$hidden\$rprob

lP = params\$hidden\$lprobz

## now do the actual calculations

Z = length(zvec)

alpha = vector(mode="list", length=Z)
lalpha = vector(mode="list", length=Z)
lalpha[[1]] = t(t(lG[[1]]) + lP[[1]])
alpha[[1]] = exp(lalpha[[1]])
for(z in 2:Z)
{
lalpha[[z]] = lG[[z]] + log(alpha[[z-1]] %*% Q[[z]])
alpha[[z]] = exp(lalpha[[z]])
}
names(alpha) = zvec

beta = vector(mode="list", length=Z)
lbeta = vector(mode="list", length=Z)
lbeta[[Z]] = t(t(lG[[Z]]) + lP[[Z]])
beta[[Z]] = exp(lbeta[[Z]])
for(z in (Z-1):1)
{
lbeta[[z]] = lG[[z]] + log(beta[[z+1]] %*% R[[z+1]])
beta[[z]] = exp(lbeta[[z]])
}
names(beta) = zvec

gamma = rowSums(alpha[[Z]])
lgamma = log(gamma)

## send it back

namedList(G, lG, alpha, beta, gamma, lalpha, lbeta, lgamma)
}
}

# Return a function which performs PDF estimation for the observed data given the components of a single-data mixture model (can also be used in multiple-data mixture models if handled appropriately.)
.pstep_observed <- function(distn)
{
switch(distn,

norm = function(X, kvec, obspar)
sapply(kvec, function(k)
dnorm(X, obspar\$mean[k], obspar\$sd[k], log=TRUE)),
mvnorm = function(X, kvec, obspar)
sapply(kvec, function(k)
dmvnorm(X, obspar\$mean[k,], obspar\$cov[[k]], log=TRUE)),

weisd = function(X, kvec, obspar)
sapply(kvec, function(k)
dweisd(X, obspar\$shape[k], obspar\$decay[k], log=TRUE)),
mvweisd = function(X, kvec, obspar)
sapply(kvec, function(k)
dmvweisd(X, obspar\$shape[k,], obspar\$decay[k,],
obspar\$corr[[k]], log=TRUE)),

gamma = function(X, kvec, obspar)
sapply(kvec, function(k)
dgamma(X, obspar\$shape[k], obspar\$rate[k], log=TRUE)),
mvgamma = function(X, kvec, obspar)
sapply(kvec, function(k)
dmvgamma(X, obspar\$shape[k,], obspar\$rate[k,],
obspar\$corr[[k]], log=TRUE)),

exp = function(X, kvec, obspar)
sapply(kvec, function(k)
dexp(X, obspar\$rate[k], log=TRUE)),
mvexp = function(X, kvec, obspar)
sapply(kvec, function(k)
dmvexp(X, obspar\$rate[k,],
obspar\$corr[[k]], log=TRUE)),

pvii = function(X, kvec, obspar)
sapply(kvec, function(k)
dpvii(X, obspar\$mean[k], obspar\$scale[k],
obspar\$shape[k], log=TRUE)),
mvpvii = function(X, kvec, obspar)
sapply(kvec, function(k)
dmvpvii(X, obspar\$mean[k,], obspar\$scale[[k]],
obspar\$shape[k], log=TRUE))

)
}

# Combined E- and M-steps for univariate Pearson Type VII parameter estimation.  Takes data, weights, and a list "theta" containing the sum of weights, log of sum of weights, and current parameter values.  Returns updated theta with an additional element:  the value of the objective function.
.pvii_emstep <- function(x, w, sw, theta)
{
# E-step

shape.prime = theta\$shape + 0.5
Delta = (x - theta\$mean)^2
delta = Delta / theta\$scale
rate.prime  = 1 + 0.5*delta

tprime  = shape.prime / rate.prime
tstar   = digamma(shape.prime) - log(rate.prime)

# M-step

wtp = w * tprime
mu = sum(wtp * x) / sum(wtp)

sigmasq = sum(wtp * Delta) / sw

swts = sum(w * tstar)
toptim <- function(alpha) lgamma(alpha)*sw - alpha*swts
res = nlminb(theta\$shape, toptim, lower=1+LC_EPS)
alpha = res\$par

# send it back

list(mean=mu, scale=sigmasq, shape=alpha, obj=res\$objective)
}

# Initial parameter estimation for univariate Pearson Type VII.  Takes data, weights.  Returns the estimated parameters.
.pvii_init_params <- function(x, w)
{
theta.norm = thetahat.norm(x, w)

mean = theta.norm\$mean
shape = 2
scale = theta.norm\$var * shape

namedList(mean, scale, shape)
}

# "Q-function" (expected log-likelihood) for single-data (.qfun_mixmod()) and multiple-data (.qfun_mdmixmod()) mixture models. If map==FALSE, the expected value will be returned; if TRUE, the MAP (maximum a posteriori) estimate will be returned.  The .bad2zero() calls are to deal with what happens when we try to take the log of 0 probabilities.
.qfun_mixmod <- function(weights, params, pdfs, map=FALSE)
{
W = weights\$W
if(map) W = isRowMax(W)

namedVector(hidden, observed)
}
.qfun_mdmixmod <- function(weights, params, pdfs, topology, map=FALSE)
{
U = weights\$U
V = weights\$V
W = weights\$W

zvec = names(W)
names(zvec) = zvec
Z = length(zvec)

if(map) {
U = isRowMax(U)
V = lapply(V, .v2map)
W = lapply(W, isRowMax)
}

lp = params\$hidden\$lprob0
lQ = params\$hidden\$lcprob
lG = pdfs\$lG

term2 = sum(sapply(zvec, function(z)
sum(sapply(1:length(V[[z]]), function(kparens)

term3 = sum(sapply(zvec, function(z)

c(hidden=term1+term2, observed=term3)
}

# Weighted multivariate standard normal (means all equal to 0, variances all equal to 1) correlation (equivalent to covariance) matrix estimation.
.rhohat <- function(X, w) cov2cor(t(X) %*% (w*X) / sum(w))
# the columns of X are already assumed to have mean 0, so no need to estimate means and center; "cov2cor" is a hack to make sure the result is a valid correlation matrix, and fixing it is TO DO

# Return a list containing the outer products of the rows of matrix X.
.tcrossprodcols <- function(X) lapply(1:ncol(X), function(n) tcrossprod(X[,n]))
.tcrossprodrows <- function(X) lapply(1:nrow(X), function(n) tcrossprod(X[n,]))

# Ensure that input data to a single-data mixture model (or a list component in a multiple-data mixture model) meets the requirements of the given distribution family.
.sanity_check_data <- function(X, family)
{
if((family %in% LC_NONNEGFAM) && min(X) <= 0)
{
family = .pretty_family(family)
stop(sprintf("%s family requires non-negative data", family))
}

return(0) # normal result, got through the above
}

# Turn a weight matrix into a matrix of almost-0 and almost-1 values, where the maximum in each row gets assigned almost-1 and all other positions almost-0
.sharpen_weight_matrix <- function(W)
{
res = diag(ncol(W))[apply(W, 1, which.max),] + LC_EPS

res / rowSums(res)
}

# Hidden data simulation:  take a number of samples to simulate, a distribution, and hidden parameters (and a topology for .simulate_hidden_mdmixdata()) and return hidden data (Y for .simulate_hidden_mixdata(), a list with elements \$Y and \$Y0 for .simulate_hidden_mdmixdata()).  No sanity checking is done, so everything has to be properly prepared; e.g., see calls to .simulate_hidden_mdmixdata() from simulateMdmixdata() to see how arguments should be set up.
.simulate_hidden_mixdata <- function(n, par)
{
rcategorical(n, par\$prob)
}
.simulate_hidden_mdmixdata <- function(n, par, topology)
{
# gather information
K0 = length(par\$prob0)
K = sapply(par\$cprob, ncol)
Z = length(par\$cprob)
zvec = names(par\$cprob)
names(zvec) = zvec

# simulate top-level
Y0 = rcategorical(n, par\$prob0)

# simulate lower-level
if(topology == "layered") {

Y = lapply(zvec, function(z)
{
res = vector(mode="numeric", length=n)
for(k0 in 1:K0)
{
isk0 = (Y0 == k0)
nisk0 = sum(isk0)
res[isk0] = rcategorical(nisk0, par\$cprob[[z]][k0,])
}
return(res)
})

} else if(topology == "chained") {

Y = vector(mode="list", length=Z)
Y[[1]] = vector(mode="numeric", length=n)

for(k0 in 1:K0)
{
isk0 = (Y0 == k0)
nisk0 = sum(isk0)
Y[[1]][isk0] = rcategorical(nisk0,
par\$cprob[[1]][k0,])
}

if(Z > 1) for(z in 2:Z)
{
Y[[z]] = vector(mode="numeric", length=n)
for(kzm1 in 1:K[[z-1]])
{
iskzm1 = (Y[[z-1]] == kzm1)
niskzm1 = sum(iskzm1)
Y[[z]][iskzm1] = rcategorical(niskzm1,
par\$cprob[[z]][kzm1,])
}
}

names(Y) = zvec

} else stop(sprintf("unrecognized topology '%s'", topology))

# package it up, send it back
namedList(Y, Y0)
}

# Model S-step functions:  given no arguments, return functions of iteration, weights, parameters, PDFs, and BIC adjustment factor (size of parameter space times log of sample size) which calculate iteration statistics
.sstep_mixmod <- function(bicsub)
{
function(iter, weights, params, pdfs)
{
llik = sum(pdfs\$lgamma)
qval = sum(.qfun_mixmod(weights, params, pdfs))
bic = 2*llik - bicsub
iclbic = 2*qval - bicsub

namedVector(iter, llik, qval, bic, iclbic)
}
}
.sstep_mdmixmod <- function()
{
function(iter, weights, params, pdfs, bicsub)
{
llik = sum(pdfs\$lgamma)
qval = sum(.qfun_mdmixmod(weights, params, pdfs))
bic = 2*llik - bicsub
iclbic = 2*qval - bicsub

namedVector(iter, llik, qval, bic, iclbic)
}
}

# Weighted parameter estimation functions for mixtures.  If argument "theta" is not NULL, it should be a list of parameters with the same names as those to be estimated, reflecting previous iteration's values of those parameters.  (Note that it will have no effect in the case of distributions with closed form MLEs.)
.thetahat_exp <- function(x, W, aslist=TRUE, theta=NULL)
{
res = sapply(1:ncol(W), function(k)
thetahat.exp(x, W[,k], aslist=FALSE))

lambda = as.vector(res)
# the kth element of lambda is the rate for the kth component

if(aslist)
list(rate=lambda)
else
c(rate=lambda)
}
.thetahat_mvexp <- function(X, W, aslist=TRUE, theta=NULL)
{
kvec = 1:ncol(W)

res = sapply(kvec, function(k)
thetahat.mvexp(X, W[,k], aslist=TRUE), simplify=FALSE)

lambda = t(sapply(kvec, function(k) res[[k]]\$rate))
# the kth row of lambda is the vector of the column rates of X for the kth component
rho = lapply(kvec, function(k) res[[k]]\$corr)

if(aslist)
list(rate=lambda, corr=rho)
else
c(rate=lambda, corr=rho)
}
.thetahat_gamma <- function(x, W, aslist=TRUE, theta=NULL)
{
res = sapply(1:ncol(W), function(k)
thetahat.gamma(x, W[,k], aslist=FALSE, shape.guess=theta\$shape[k]))

sigma = res["shape",]
lambda = res["rate",]
# the kth element of sigma is the shape for the kth component, and similarly for lambda (rate)

if(aslist)
list(shape=sigma, rate=lambda)
else
c(shape=sigma, rate=lambda)
}
.thetahat_mvgamma <- function(X, W, aslist=TRUE, theta=NULL)
{
kvec = 1:ncol(W)

res = sapply(kvec,
function(k)
thetahat.mvgamma(X, W[,k], aslist=TRUE,
shape.guess=theta\$shape[k,]),
simplify=FALSE)

sigma = t(sapply(kvec, function(k) res[[k]]\$shape))
# the kth row of sigma is the vector of the column shapes of X for the kth component
lambda = t(sapply(kvec, function(k) res[[k]]\$rate))
# the kth row of lambda is the vector of the column rates of X for the kth component
rho = lapply(kvec, function(k) res[[k]]\$corr)

if(aslist)
list(shape=sigma, rate=lambda, corr=rho)
else
c(shape=sigma, rate=lambda, corr=rho)
}
.thetahat_norm <- function(x, W, aslist=TRUE, theta=NULL)
{
res = sapply(1:ncol(W), function(k)
thetahat.norm(x, W[,k], aslist=FALSE))

mu = res["mean",]
sigmasq = res["var",]
sigma = res["sd",]
# the kth element of mu is the mean for the kth component, and similarly for sigmasq (variance) and sigma (std. dev.)

if(aslist)
list(mean=mu, var=sigmasq, sd=sigma)
else
c(mean=mu, var=sigmasq, sd=sigma)
}
.thetahat_mvnorm <- function(X, W, aslist=TRUE, theta=NULL)
{

kvec = 1:ncol(W)

res = sapply(kvec, function(k)
thetahat.mvnorm(X, W[,k], aslist=TRUE), simplify=FALSE)

mu = t(sapply(kvec, function(k) res[[k]]\$mean))
# the kth row of mu is the vector of the column means of X for the kth component
Sigma = lapply(kvec, function(k) res[[k]]\$cov)
# the kth list element of Sigma is the covariance matrix for the kth component

if(aslist)
list(mean=mu, cov=Sigma)
else
c(mean=mu, cov=Sigma)
}
.thetahat_pvii <- function(x, W, aslist=TRUE, theta=NULL)
{
res = sapply(1:ncol(W), function(k) {
if(!is.null(theta)) theta = lapply(theta, function(par) par[k])
# get component-specific parameters to pass to estimation fn.
thetahat.pvii(x, W[,k], aslist=FALSE, iter.max=0, theta=theta)
})

mean  = res["mean", ]
scale = res["scale",]
shape = res["shape",]

if(aslist)
namedList(mean, scale, shape)
else
namedVector(mean, scale, shape)
}
.thetahat_mvpvii <- function(X, W, aslist=TRUE, theta=NULL)
{
kvec = 1:ncol(W)

res = lapply(kvec, function(k) {
if(!is.null(theta)) theta = lapply(theta, function(par)
if(is.list(par)) par[[k]]
else if(is.matrix(par)) par[k,]
else par[k])
# get component-specific parameters to pass on
thetahat.mvpvii(X, W[,k], aslist=TRUE, iter.max=0, theta=theta)
})

mean  = t(sapply(kvec, function(k) res[[k]]\$mean))
scale = lapply(kvec, function(k) res[[k]]\$scale)
shape = sapply(kvec, function(k) res[[k]]\$shape)
# see .thetahat_mvnorm() above to understand the formation of these data structures

if(aslist)
namedList(mean, scale, shape)
else
namedVector(mean, scale, shape)
}
.thetahat_weisd <- function(x, W, aslist=TRUE, theta=NULL)
{
res = sapply(1:ncol(W), function(k)
thetahat.weisd(x, W[,k], aslist=FALSE, shape.guess=theta\$shape[k]))

sigma = res["shape",]
delta = res["decay",]
# the kth element of sigma is the shape for the kth component, and similarly for delta (decay)

if(aslist)
list(shape=sigma, decay=delta)
else
c(shape=sigma, decay=delta)
}
.thetahat_mvweisd <- function(X, W, aslist=TRUE, theta=NULL)
{
kvec = 1:ncol(W)

res = sapply(kvec,
function(k)
thetahat.mvweisd(X, W[,k], aslist=TRUE,
shape.guess=theta\$shape[k,]),
simplify=FALSE)

sigma = t(sapply(kvec, function(k) res[[k]]\$shape))
# the kth row of sigma is the vector of the column shapes of X for the kth component
delta = t(sapply(kvec, function(k) res[[k]]\$decay))
# the kth row of delta is the vector of the column decays of X for the kth component
rho = lapply(kvec, function(k) res[[k]]\$corr)

list(shape=sigma, decay=delta, corr=rho)
}

# Build "Q" (transition) matrices under under local independence assumption (.qgen) or under linear dependence assumption (.qgenDep)
.qgen <- function(A, B)
{
J = crossprod(A, B) # N f(A,B)

J / rowSums(J) # f(B|A)
}
.qgenDep <- function(A, B)
{
res = sapply(1:ncol(B), function(kb) coef(nnls(A, B[,kb]))) # N f(B|A)

res / rowSums(res) # f(B|A)
}

# Build "V" (joint weight) matrices under local independence assumption (.vgen) or under linear dependence assumption (.vgenDep)
.vgen <- function(A, B) lapply(1:ncol(A), function(ka) A[,ka] * B)
.vgenDep <- function(A, B)
{
R = .qgenDep(B, A) # f(A|B)
tB = t(B)

lapply(1:ncol(A), function(ka) t(tB * R[,ka]))
}

# Convert "V" (joint weight) matrix into MAP estimate.  TO DO:  this is horribly slow.
.v2map <- function(V)
{
N = nrow(V[[1]])
Ka = length(V)

map.raw = lapply(1:N, function(n) { # MAP(V) indexed by n instead of ka
res = t(sapply(1:Ka, function(ka) V[[ka]][n,]))
1 * (res == max(res))
})

lapply(1:Ka, function(ka) t(sapply(1:N, function(N) map.raw[[N]][ka,])))
}
```

## Try the lcmix package in your browser

Any scripts or data that you put into this service are public.

lcmix documentation built on May 31, 2017, 5 a.m.