Nothing
.aitken <- function(loglik) {
if(!is.numeric(loglik) ||
length(loglik) != 3) stop("'loglik' must be a numeric vector of length 3", call.=FALSE)
l1 <- loglik[1L]
l2 <- loglik[2L]
l3 <- loglik[3L]
if(any(is.infinite(loglik))) {
linf <-
ldiff <- Inf
a <- NA
} else {
a <- ifelse(l2 > l1, (l3 - l2) / (l2 - l1), 0L)
denom <- max(1L - a, .Machine$double.eps)
linf <- ifelse(a < 1L, l2 + (l3 - l2) / denom, Inf)
ldiff <- ifelse(is.numeric(a) && a < 0, 0L, abs(linf - l2))
ldiff[is.nan(ldiff)] <- Inf
}
return(list(ll = l3, linf = linf,
a = a, ldiff = ldiff))
}
.char_to_num <- function(x) {
utf8ToInt(x)
}
#' @importFrom matrixStats "rowMaxs"
#.choice_crit <- function(ll, seqs, z, gp, modtype = c("CC", "UC", "CU", "UU", "CCN", "UCN", "CUN", "UUN"), type = c("new", "old"), gamma = 0) {
.choice_crit <- function(ll, seqs, z, gp, modtype = c("CC", "UC", "CU", "UU", "CCN", "UCN", "CUN", "UUN"), type = c("new", "old")) {
#if(length(gamma) > 1 ||
# !is.numeric(gamma) ||
# (gamma < 0 ||
# gamma > 1)) stop("gamma must be a single digit in the interval [0, 1]", call.=FALSE)
#if(gamma < (1 - 1/(2 * log(attr(seqs, "T"))/
# log(attr(seqs, "W"))))) warning("Invalid gamma", call.=FALSE, immediate.=TRUE)
type <- match.arg(type)
G <- attr(seqs, ifelse(is.element(modtype, c("CCN", "UCN", "CUN", "UUN")), "G0", "G"))
P <- attr(seqs, "T")
mp <- switch(EXPR=type,
new=attr(seqs, "VTS"),
old=P) * G
kpar <- switch(EXPR= modtype,
CC = mp + 1L,
CCN = mp + (G > 0),
UC =,
UCN = mp + G,
CU =,
CUN = mp + P,
UU =,
UUN = switch(EXPR=type,
new=mp + G * P,
old=mp * 2L)) + gp
ll2 <- ll * 2L
bic <- ll2 - kpar * log(attr(seqs, "W"))
#ebic <- bic - kpar * 2L * gamma * log(P)
return(list(bic = bic, icl = bic + 2L * sum(log(rowMaxs(z, useNames=FALSE)), na.rm=TRUE), aic = ll2 - kpar * 2L, df = kpar))
}
#' @importFrom matrixStats "colSums2"
.colSumProd <- function(x, y) {
colSums2(x * y, useNames=FALSE, na.rm=TRUE)
}
.crits_names <- function(x) {
unlist(lapply(seq_along(x), function(i) stats::setNames(x[[i]], paste0(names(x[i]), "|", names(x[[i]])))))
}
#' @importFrom stringdist "stringdistmatrix"
.dbar <- function(seqs, theta) {
mean(.dseq(seqs, theta))
}
.drop_constants <- function(dat, formula, sub = NULL) {
if(!is.data.frame(dat)) stop("'dat' must be a data.frame", call.=FALSE)
Nseq <- seq_len(nrow(dat))
sub <- if(missing(sub)) Nseq else sub
numsubs <- all(is.numeric(sub))
if(!any(numsubs, all(is.logical(sub)) &&
length(sub) == nrow(dat))) stop("'sub' must be a numeric vector, or logical vector with length equal to the number of rows in 'dat'", call.=FALSE)
if(numsubs &&
any(match(sub, Nseq,
nomatch = 0) == 0)) stop("Numeric 'sub' must correspond to row indices of data", call.=FALSE)
if(!inherits(formula,
"formula")) stop("'formula' must actually be a formula!", call.=FALSE)
intercept <- attr(stats::terms(formula), "intercept")
dat <- dat[sub,colnames(dat) %in% attr(stats::terms(stats::update.formula(formula, NULL ~ .)), "term.labels"), drop=FALSE]
ind <- names(which(!apply(dat, 2L, function(x) all(x == x[1L], na.rm=TRUE))))
fterms <- attr(stats::terms(formula), "term.labels")
ind <- unique(c(ind[ind %in% fterms], fterms[grepl(":", fterms) | grepl("I\\(", fterms)]))
response <- all.vars(stats::update.formula(formula, . ~ NULL))
form <- if(length(ind) > 0) stats::reformulate(ind, response=response) else stats::as.formula(paste0(response, " ~ 1"))
form <- if(intercept == 0) stats::update.formula(form, ~ . -1) else form
environment(form) <- environment(formula)
form
}
.drop_levels <- function(fit, newdata) {
if(!is.data.frame(newdata)) stop("'newdata' must be a data.frame", call.=FALSE)
dat.fac <- vapply(newdata, is.factor, logical(1L))
if(!any(dat.fac)) return(newdata)
factors <- rep(names(fit$xlevels), lengths(fit$xlevels))
factorLevels <- unname(unlist(fit$xlevels))
modelFactors <- cbind.data.frame(factors, factorLevels)
predictors <- names(newdata[names(newdata) %in% factors])
for(i in seq_along(predictors)) {
ind <- newdata[,predictors[i]] %in% modelFactors[modelFactors$factors == predictors[i],]$factorLevels
if(any(!ind)) {
newdata[!ind,predictors[i]] <- NA
newdata[,predictors[i]] <- factor(newdata[,predictors[i]], levels=modelFactors[modelFactors$factors == predictors[i],]$factorLevels)
}
}
newdata
}
#' @importFrom stringdist "stringdistmatrix"
.dseq <- function(seqs, theta) {
stringdistmatrix(seqs, theta, method="hamming")
}
#' @importFrom matrixStats "colSums2" "logSumExp" "rowAlls" "rowLogSumExps" "rowSums2"
#' @importFrom stringdist "stringdistmatrix"
.E_step <- function(seqs, params, modtype = c("CC", "UC", "CU", "UU", "CCN", "UCN", "CUN", "UUN"), ctrl, numseq = NULL, HAM.mat = NULL, z.old = NULL) {
G <- attr(seqs, "G")
N <- attr(seqs, "N")
P <- attr(seqs, "T")
V1 <- attr(seqs, "V1")
lPV <- attr(seqs, "lPV")
G0 <- ifelse((noise <- attr(seqs, "Noise")), attr(seqs, "G0"), G)
Gseq <- seq_len(G0)
N1 <- N > 1
theta <- params$theta
lambda <- switch(EXPR=modtype, CUN=, UU=, UUN=params$lambda, drop(params$lambda))
dG.X <- is.null(params$dG)
opti <- ctrl$opti
if(is.null(numseq) &&
isFALSE(ctrl$numseq) &&
ctrl$nmeth && dG.X) {
numseq <- unname(sapply(seqs, .char_to_num))
}
if(G == 1L) {
return(if(ctrl$do.wts) {
dG <- if(dG.X) switch(EXPR=modtype, CC=switch(EXPR=opti, medoid=HAM.mat[,attr(theta, "Ind"), drop=FALSE], .dseq(seqs, theta)), CU=numseq != .char_to_num(theta)) else params$dG
switch(EXPR=modtype,
CC = -ifelse(lambda == 0, attr(seqs, "W") * lPV, lambda * sum(attr(seqs, "Weights") * dG, na.rm=TRUE) + attr(seqs, "W") * P * log1p(V1 * exp(-lambda))),
CCN = -attr(seqs, "W") * lPV,
CU = -sum(sweep(dG * drop(lambda), 2L, attr(seqs, "Weights"), FUN="*", check.margin=FALSE), na.rm=TRUE) - attr(seqs, "W") * sum(log1p(V1 * exp(-lambda))))
} else {
switch(EXPR=modtype,
CC = -ifelse(0 == lambda, N * lPV, sum(lambda * N * .dbar(seqs, theta), N * P * log1p(V1 * exp(-lambda)), na.rm=TRUE)),
CCN = -N * lPV,
CU = -sum((numseq != .char_to_num(theta)) * drop(lambda), na.rm=TRUE) - N * sum(log1p(V1 * exp(-lambda))))
})
} else {
dG <- if(dG.X) switch(EXPR=modtype,
CC=, UC=, CCN=, UCN=switch(EXPR=opti, medoid=HAM.mat[,attr(theta, "Ind"), drop=FALSE], .dseq(seqs, theta[Gseq])),
CU=, UU=, CUN=, UUN=lapply(Gseq, function(g) numseq != .char_to_num(theta[g]))) else params$dG
log.tau <- .mat_byrow(log(params$tau), nrow=N, ncol=G)
if(noise) {
tau0 <- log.tau[,G]
log.tau <- log.tau[,-G, drop=FALSE]
}
if(N1) {
numer <- switch(EXPR=modtype,
CC = t(-t(dG) * lambda - P * log1p(V1 * exp(-lambda))) + log.tau,
UC = t(-t(dG) * lambda - P * log1p(V1 * exp(-lambda))) + log.tau,
CU = -vapply(dG, .colSumProd, numeric(N), lambda) - sum(log1p(V1 * exp(-lambda))) + log.tau,
UU = t(t(-vapply(Gseq, function(g) colSums2(dG[[g]] * lambda[g,], useNames=FALSE, na.rm=TRUE), numeric(N))) - rowSums2(log1p(V1 * exp(-lambda)), useNames=FALSE)) + log.tau,
CCN = cbind(t(-t(dG) * lambda[1L] - P * log1p(V1 * exp(-lambda[1L]))) + log.tau, tau0 - lPV),
UCN = cbind(t(-t(dG) * lambda[-G] - P * log1p(V1 * exp(-lambda[-G]))) + log.tau, tau0 - lPV),
CUN = cbind(-vapply(dG, .colSumProd, numeric(N), lambda[1L,]) - sum(log1p(V1 * exp(-lambda[1L,]))) + log.tau, tau0 - lPV),
UUN = cbind(t(t(-vapply(Gseq, function(g) colSums2(dG[[g]] * lambda[g,], useNames=FALSE, na.rm=TRUE), numeric(N))) - rowSums2(log1p(V1 * exp(-lambda[-G,, drop=FALSE])), useNames=FALSE)) + log.tau, tau0 - lPV))
} else {
numer <- switch(EXPR=modtype,
CC = -dG * lambda - P * log1p(V1 * exp(-lambda)),
UC = -dG * lambda - P * log1p(V1 * exp(-lambda)),
CU = -colSums2(simplify2array(dG, higher=FALSE) * drop(lambda), useNames=FALSE, na.rm=TRUE) - sum(log1p(V1 * exp(-lambda))),
UU = -vapply(Gseq, function(g) colSums2(dG[[g]] * lambda[g,], useNames=FALSE, na.rm=TRUE), numeric(N)) - rowSums2(log1p(V1 * exp(-lambda)), useNamse=FALSE),
CCN = c(-dG * lambda[1L] - P * log1p(V1 * exp(-lambda[1L])), - lPV),
UCN = c(-dG * lambda[-G] - P * log1p(V1 * exp(-lambda[-G])), - lPV),
CUN = c(-colSums2(simplify2array(dG, higher=FALSE) * lambda[1L,], useNames=FALSE, na.rm=TRUE) - sum(log1p(V1 * exp(-lambda[1L,]))), - lPV),
UUN = c(-vapply(Gseq, function(g) colSums2(dG[[g]] * lambda[g,], useNames=FALSE, na.rm=TRUE), numeric(N)) - rowSums2(log1p(V1 * exp(-lambda[-G,, drop=FALSE])), useNames=FALSE), - lPV))
numer <- matrix(numer, nrow=1L) + switch(EXPR=modtype, CC=, UC=, CU=, UU=log.tau, c(log.tau, tau0))
}
if(any(iLAM <- is.infinite(lambda))) {
switch(EXPR = modtype,
CC =,
CCN = numer[is.na(numer)] <- log.tau - lPV,
UC =,
UCN = {
i.x <- which(dG == 0, arr.ind=TRUE)
i.x <- i.x[i.x[,2L] %in% which(switch(EXPR=modtype, UC=iLAM, UCN=iLAM[-G])),, drop=FALSE]
numer[i.x] <- log.tau[i.x] - lPV
})
}
if(ctrl$do.cv && !noise &&
any(i.x <- rowAlls(is.infinite(numer), useNames=FALSE))) {
numer[i.x,] <- log.tau[i.x,, drop=FALSE] - lPV
}
switch(EXPR=ctrl$algo,
EM= {
if(N1) {
denom <- rowLogSumExps(numer, useNames=FALSE)
loglike <- sum(if(ctrl$do.wts) denom * attr(seqs, "Weights") else denom)
if(ctrl$do.cv) {
return(loglike)
} else {
return(list(loglike = loglike, z = exp(numer - denom)))
}
} else {
return(if(ctrl$do.wts) logSumExp(numer * attr(seqs, "Weights")) else logSumExp(numer))
}
}, CEM= {
if(N1) {
if(!ctrl$random && ctrl$equalPro &&
is.element(modtype, c("CC", "CU", "CCN", "CUN"))) {
z <- apply(numer, 1L, function(x) which(max(x) == x))
if(is.list(z)) {
ty <- which(lengths(z) > 1)
z[ty] <- vapply(ty, function(i) .random_ass(numer[i,], which.max(z.old[i,]), fun=max, na.rm=TRUE), integer(1L))
}
z <- .unMAP(unlist(z), groups=seq_len(G))
} else {
z <- .unMAP(max.col(numer), groups=seq_len(G))
}
loglike <- sum(if(ctrl$do.wts) z * numer * attr(seqs, "Weights") else z * numer, na.rm=TRUE)
if(ctrl$do.cv) {
return(loglike)
} else {
return(list(loglike = loglike, z = z))
}
} else {
return(max(numer) * ifelse(ctrl$do.wts, attr(seqs, "Weights"), 1L))
}
})
}
}
#' @importFrom matrixStats "colSums2" "logSumExp" "rowAlls" "rowLogSumExps" "rowMeans2" "rowSums2"
.EM_algorithm <- function(SEQ, numseq, g, modtype, z, ctrl, gating = NULL, covars = NULL, HAM.mat = NULL, ll = NULL, MLRconverge = TRUE) {
itmax <- ctrl$itmax
st.ait <- ctrl$stopping == "aitken"
tol <- ctrl$tol
if(!(runEM <- g > 1)) {
ctrl$ties <- TRUE
Mstep <- .M_step(SEQ, modtype=modtype, ctrl=ctrl, numseq=numseq, HAM.mat=HAM.mat)
ll <- .E_step(SEQ, modtype=modtype, ctrl=ctrl, numseq=numseq, params=Mstep, HAM.mat=HAM.mat)
j <- 1L
ERR <- FALSE
} else {
ll <- if(is.null(ll)) c(-Inf, -sqrt(.Machine$double.xmax)) else ll
j <- 2L
emptywarn <- TRUE
ctrl$ties <- FALSE
noty <- TRUE
while(isTRUE(runEM)) {
j <- j + 1L
Mstep <- .M_step(SEQ, modtype=modtype, ctrl=ctrl, numseq=numseq, gating=gating, covars=covars, z=z, HAM.mat=HAM.mat, MLRconverge = MLRconverge)
check <- any(attr(Mstep$theta, "NonUnique")) || isFALSE(attr(Mstep$theta, "NoTies"))
ctrl$ties <- j < 4 ||
(noty <- noty && !check)
MLRconverge <- Mstep$MLRconverge
Estep <- .E_step(SEQ, modtype=modtype, ctrl=ctrl, numseq=numseq, params=Mstep, HAM.mat=HAM.mat, z.old=z)
z <- Estep$z
ERR <- any(is.nan(z))
if(isTRUE(ERR)) break
if(isTRUE(emptywarn) && ctrl$warn &&
any(zsum <- (colSums2(z, useNames=FALSE)
== 0))) { warning(paste0("\tThere were empty components: ", modtype, " (G=", g, ")\n"), call.=FALSE, immediate.=TRUE)
emptywarn <- FALSE
z[,zsum] <- sqrt(.Machine$double.neg.eps)
}
ll <- c(ll, Estep$loglike)
if(isTRUE(st.ait)) {
dX <- .aitken(ll[seq(j - 2L, j, 1L)])$ldiff
} else {
dX <- abs(ll[j] - ll[j - 1L])/(1 + abs(ll[j]))
}
runEM <- dX >= tol && j < itmax && !ERR
} # while (j)
}
return(list(ERR = ERR, j = j, Mstep = Mstep, ll = ll, z = z, MLRconverge = MLRconverge))
}
#' @importFrom matrixStats "rowSums2"
.entropy <- function(p, obs = FALSE) {
if(length(obs) > 1 ||
!is.logical(obs)) stop("'obs' must be a single logical indicator", call.=FALSE)
if(isTRUE(obs)) {
p[p == 0] <- NA
rowSums2(-p * log(p), useNames=FALSE, na.rm=TRUE)
} else {
p <- p[p > 0]
sum(-p * log(p))
}
}
.fac_to_num <- function(x) {
Vseq <- seq_len(nlevels(x[[1L]]))
as.data.frame(lapply(x, function(y) { levels(y) <- Vseq; y} ))
}
.heat_legend <- function(data, col = NULL, cex.lab = 1, ...) {
if(length(cex.lab) > 1 ||
(!is.numeric(cex.lab) ||
cex.lab <= 0)) stop("Invalid 'cex.lab' supplied", call.=FALSE)
if(!is.numeric(data)) stop("'data' must be numeric", call.=FALSE)
if(missing(col)) {
col <- grDevices::heat.colors(30L, rev=TRUE)
}
bx <- graphics::par("usr")
xpd <- graphics::par()$xpd
box.cx <- c(bx[2L] + (bx[2L] - bx[1L])/1000, bx[2L] + (bx[2L] - bx[1L])/1000 + (bx[2L] - bx[1L])/50)
box.cy <- c(bx[3L], bx[3L])
box.sy <- (bx[4L] - bx[3L]) / length(col)
xx <- rep(box.cx, each = 2L)
graphics::par(xpd = TRUE)
for(i in seq_along(col)) {
yy <- c(box.cy[1L] + (box.sy * (i - 1L)),
box.cy[1L] + (box.sy * (i)),
box.cy[1L] + (box.sy * (i)),
box.cy[1L] + (box.sy * (i - 1L)))
graphics::polygon(xx, yy, col = col[i], border = col[i])
}
graphics::par(new = TRUE)
yrange <- range(data, na.rm = TRUE)
base::plot(0, 0, type = "n", ylim = yrange, yaxt = "n", ylab = "", xaxt = "n", xlab = "", frame.plot = FALSE)
graphics::axis(side = 4, las = 2, tick = FALSE, line = 0.1, cex.axis = cex.lab)
suppressWarnings(graphics::par(xpd = xpd))
invisible()
}
.km_dist <- function(mode, obj, frwts = NULL) {
if(is.null(frwts)) return(sum(mode != obj))
obj <- as.character(obj)
mode <- as.character(mode)
different <- which(mode != obj)
n_mode <-
n_obj <- numeric(length(different))
for(i in seq_along(different)) {
frwt <- frwts[[different[i]]]
names <- names(frwt)
n_mode[i] <- frwt[which(names == mode[different[i]])]
n_obj[i] <- frwt[which(names == obj[different[i]])]
}
return(sum(1/n_mode + 1/n_obj))
}
.lab_width <- function(x, cex = 2/3, offset = 1.5) {
(offset + max(graphics::strwidth(x, units = "inches")) * graphics::par("mar")[1L]/graphics::par("mai")[1L]) * cex
}
#' @importFrom matrixStats "colSums2" "rowMeans2" "rowSums2"
#' @importFrom stringdist "stringdistmatrix"
.lambda_mle <- function(seqs, params, modtype = c("CC", "UC", "CU", "UU", "CCN", "UCN", "CUN", "UUN"), ctrl, numseq = NULL, HAM.mat = NULL) {
theta <- params$theta
P <- attr(seqs, "T")
V1V <- attr(seqs, "V1V")
lV1 <- attr(seqs, "logV1")
W <- attr(seqs, "W")
l.meth <- match.arg(modtype)
noise <- attr(seqs, "Noise")
n.meth <- is.element(l.meth, c("CU", "UU", "CUN", "UUN"))
p.meth <- is.element(l.meth, c("UC", "UU", "UCN", "UUN"))
opti <- ctrl$opti
if(is.null(numseq) && n.meth) {
numseq <- unname(sapply(seqs, .char_to_num))
}
if((G <- attr(seqs, "G")) == 1L) {
if(l.meth == "CCN") {
return(list(lambda = matrix(0L, nrow=1L, ncol=1L)))
}
numer <- switch(EXPR=l.meth, CC=P, CU=1L)
dG <- switch(EXPR=l.meth, CC=switch(EXPR=opti, medoid=HAM.mat[,attr(theta, "Ind"), drop=FALSE], .dseq(seqs, theta)), numseq != .char_to_num(theta))
if(ctrl$do.wts) {
ws <- attr(seqs, "Weights")
denom <- switch(EXPR=l.meth, CC=sum(dG * ws)/W, CU=rowSums2(sweep(dG, 2L, ws, FUN="*", check.margin=FALSE), useNames=FALSE)/W)
} else denom <- switch(EXPR=l.meth, CC=mean(dG), CU=rowMeans2(dG, refine=FALSE, useNames=FALSE))
} else {
N <- attr(seqs, "N")
G0 <- ifelse(noise, attr(seqs, "G0"), G)
if(is.null(z <- params$z)) stop("'z' must be supplied if 'G'>1", call.=FALSE)
if(p.meth &&
is.null(prop <-
params$prop)) stop("'prop' must be supplied if 'G'>1", call.=FALSE)
if(noise) {
z <- z[,-G, drop=FALSE]
switch(EXPR=l.meth, UCN=, UUN= {
prop <- prop[-G]
})
}
switch(EXPR=l.meth, CU=, UU=, CUN=, UUN= {
dG <- lapply(seq_len(G0), function(g) numseq != .char_to_num(theta[g]))
dGp <- vapply(seq_len(G0), function(g) rowSums2(sweep(dG[[g]], 2L, z[,g], FUN="*", check.margin=FALSE), useNames=FALSE), numeric(P))
}, {
pN <- P * switch(EXPR=l.meth, CCN=sum(z), W)
dG <- switch(EXPR=opti, medoid=HAM.mat[,attr(theta, "Ind"), drop=FALSE], .dseq(seqs, theta[seq_len(G0)]))
})
numer <- switch(EXPR=l.meth, CC=, CCN=pN,
UC=, UCN=pN * prop,
CUN=sum(z), W)
denom <- switch(EXPR=l.meth, CC=, CCN=sum(z * dG),
UC=, UCN=colSums2(z * dG, useNames=FALSE),
CU=, CUN=rowSums2(dGp, useNames=FALSE),
UU=, UUN=sweep(dGp, 2L, prop, FUN="/", check.margin=FALSE))
}
lambda <- suppressWarnings(ifelse(denom < (numer * V1V), lV1 + log(numer - denom) - log(denom), 0L))
lambda <- matrix(lambda, nrow=switch(EXPR=l.meth, UC=, UU=, UCN=, UUN=G0, 1L), ncol=switch(EXPR=l.meth, CU=, UU=, CUN=, UUN=P, 1L), byrow=is.element(l.meth, c("UU", "UUN")))
switch(EXPR=l.meth, UC=, UU=,
UCN=, UUN= {
lambda[prop == 0,] <- Inf
})
return(list(lambda = if(noise) rbind(lambda, 0L) else lambda, dG = dG))
}
#' @importFrom matrixStats "colMeans2" "colSums2" "rowSums2"
#' @importFrom nnet "multinom"
.M_step <- function(seqs, modtype = c("CC", "UC", "CU", "UU", "CCN", "UCN", "CUN", "UUN"), ctrl,
gating = NULL, covars = NULL, z = NULL, numseq = NULL, HAM.mat = NULL, MLRconverge = TRUE) {
if(!is.null(gating)) {
environment(gating) <- environment()
}
G <- attr(seqs, "G")
noise <- attr(seqs, "Noise")
if(is.null(numseq) && isFALSE(ctrl$numseq)) {
numseq <- unname(sapply(seqs, .char_to_num))
attr(numseq, "G") <- G
attr(numseq, "T") <- attr(seqs, "T")
}
if(G > 1L) {
if(is.null(z)) stop("'z' must be supplied when 'G'>1", call.=FALSE)
if(ctrl$do.wts) {
z <- z * attr(seqs, "Weights")
}
if((gate.g <- ctrl$gate.g)) {
prop <- if((need.prop <- is.element(modtype, c("UC", "UU", "UCN", "UUN")))) {
if(ctrl$do.wts) colSums2(z,
useNames=FALSE)/attr(seqs, "W") else colMeans2(z, refine=FALSE, useNames=FALSE) }
if(!noise || ctrl$noise.gate) {
fitG <- multinom(gating, trace=FALSE, data=covars, maxit=ctrl$g.itmax, reltol=ctrl$g.tol, MaxNWts=ctrl$MaxNWts)
tau <- fitG$fitted.values
} else {
zN <- z
z <- z[,-G, drop=FALSE]
z <- if(ctrl$do.wts) .renorm_z(z) * attr(seqs, "Weights") else .renorm_z(z)
z[is.nan(z)] <- .Machine$double.eps
fitG <- multinom(gating, trace=FALSE, data=covars, maxit=ctrl$g.itmax, reltol=ctrl$g.tol, MaxNWts=ctrl$MaxNWts)
tau <- .tau_noise(fitG$fitted.values, ifelse(need.prop, prop[G], ifelse(ctrl$do.wts, sum(zN[,G])/attr(seqs, "W"), mean(zN[,G]))))
z <- zN
}
MLRconverge <- MLRconverge && fitG$convergence == 0
} else {
prop <- if(ctrl$do.wts) colSums2(z,
useNames=FALSE)/attr(seqs, "W") else colMeans2(z, refine=FALSE, useNames=FALSE)
tau <- if(isFALSE(ctrl$equalPro)) prop else if(noise && !ctrl$equalNoise) c(rep((1 - prop[G])/attr(seqs, "G0"), attr(seqs, "G0")), prop[G]) else rep(1/G, G)
}
} else prop <- tau <- 1L
theta <- .optimise_theta(seqs=seqs, ctrl=ctrl, z=z, numseq=numseq, HAM.mat=HAM.mat)
MLE <- .lambda_mle(seqs=seqs, params=list(theta=theta, z=z, prop=prop), modtype=modtype, ctrl=ctrl, numseq=numseq, HAM.mat=HAM.mat)
param <- list(theta=theta, lambda=MLE$lambda, dG=MLE$dG, tau=tau, fitG=if(G > 1 && gate.g) fitG, MLRconverge=MLRconverge)
attr(param, "modtype") <- modtype
return(param)
}
.mat_byrow <- function(x, nrow, ncol) {
matrix(x, nrow=nrow, ncol=ncol, byrow=any(dim(as.matrix(x)) == 1))
}
#' @importFrom matrixStats "colMaxs" "rowMaxs"
.misclass <- function(classification, class) {
.q <- function(map, len, x) {
x <- as.character(x)
map <- lapply(map, as.character)
y <- vapply(map, "[", character(1L), 1L)
best <- y != x
if(all(len) == 1) return(best)
errmin <- sum(as.numeric(best))
z <- vapply(map, function(x) x[length(x)], character(1L))
mask <- len != 1
count <- rep(0L, length(len))
k <- sum(as.numeric(mask))
j <- 0L
while(y != z) {
i <- k - j
m <- mask[i]
count[m] <- (count[m] %% len[m]) + 1L
y[x == names(map)[m]] <- map[[m]][count[m]]
temp <- y != x
err <- sum(as.numeric(temp))
if(err < errmin) {
errmin <- err
best <- temp
}
j <- (j + 1L) %% k
}
return(best)
}
.mapClass <- function(a, b) {
l <- length(a)
x <- y <- rep(NA, l)
if(length(b) != l) { warning("unequal lengths")
return(x)
}
if(is.factor(a) & is.factor(b) & nlevels(a) == nlevels(b)) {
aTOb <- as.list(levels(b))
names(aTOb) <- levels(a)
bTOa <- as.list(levels(a))
names(bTOa) <- levels(b)
out <- list(aTOb = aTOb, bTOa = bTOa)
return(out)
}
if(is.character(a) & is.character(b) & length(unique(a)) == length(unique(b))) {
aTOb <- as.list(unique(b))
names(aTOb) <- unique(a)
bTOa <- as.list(unique(a))
names(bTOa) <- unique(b)
out <- list(aTOb = aTOb, bTOa = bTOa)
return(out)
}
Tab <- table(a, b)
Ua <- dimnames(Tab)[[1L]]
Ub <- dimnames(Tab)[[2L]]
aTOb <- rep(list(Ub), length(Ua))
names(aTOb) <- Ua
bTOa <- rep(list(Ua), length(Ub))
names(bTOa) <- Ub
k <- nrow(Tab)
Map <- rep(0L, k)
Max <- rowMaxs(Tab, useNames=FALSE)
for(i in seq_len(k)) {
I <- match(Max[i], Tab[i,], nomatch = 0)
aTOb[[i]] <- Ub[I]
}
if(is.numeric(b)) aTOb <- lapply(aTOb, as.numeric)
k <- ncol(Tab)
Map <- rep(0L, k)
Max <- colMaxs(Tab, useNames=FALSE)
for(j in seq_len(k)) {
J <- match(Max[j], Tab[,j])
bTOa[[j]] <- Ua[J]
}
if(is.numeric(a)) bTOa <- lapply(bTOa, as.numeric)
out <- list(aTOb = aTOb, bTOa = bTOa)
return(out)
}
if(any(isNA <- is.na(classification))) {
classification <- as.character(classification)
nachar <- paste(unique(classification[!isNA]), collapse = "")
classification[isNA] <- nachar
}
MAP <- .mapClass(classification, class)
len <- lengths(MAP[[1L]])
if(all(len) == 1) {
CT <- unlist(MAP[[1L]])
I <- match(as.character(classification), names(CT), nomatch = 0)
one <- CT[I] != class
} else {
one <- .q(MAP[[1L]], len, class)
}
len <- lengths(MAP[[2L]])
if(all(len) == 1) {
TC <- unlist(MAP[[2L]])
I <- match(as.character(class), names(TC), nomatch = 0)
two <- TC[I] != classification
} else {
two <- .q(MAP[[2L]], len, classification)
}
err <- as.vector(if(sum(as.numeric(one)) > sum(as.numeric(two))) one else two)
bad <- seq_along(classification)[err]
return(list(misclassified = bad, errorRate = length(bad)/length(class)))
}
.modal <- function(x) {
ux <- unique(x)
tab <- tabulate(match(x, ux))
ux[max(tab) == tab]
}
.num_to_char <- function(x) {
intToUtf8(as.integer(x))
}
#' @importFrom matrixStats "colSums2" "rowSums2"
#' @importFrom stringdist "stringdistmatrix"
.optimise_theta <- function(seqs, ctrl, z = NULL, numseq = NULL, HAM.mat = NULL) {
opti <- ctrl$opti
ordering <- ctrl$ordering
V <- attr(seqs, "V")
P <- attr(seqs, "T")
nmeth <- ctrl$nmeth
G <- attr(seqs, "G") - nmeth
Gseq <- seq_len(G)
noties <- TRUE
if(G == 0) {
return(rep(NA, P))
}
if(G > 1L && is.null(z)) stop("'z' must be supplied when 'G'>1", call.=FALSE)
if(opti == "mode" || ordering != "none") {
if(is.null(numseq) && isFALSE(ctrl$numseq)) {
numseq <- unname(sapply(seqs, .char_to_num))
attr(numseq, "V") <- V
attr(numseq, "T") <- P
}
attr(numseq, "G") <- G
if(opti == "mode") {
if(G > 1 || ctrl$do.wts) {
theta <- .weighted_mode(numseq=numseq,
z=if(G == 1) as.matrix(attr(seqs, "Weights")) else if(nmeth) z[,Gseq, drop=FALSE] else z)
if(is.list(theta) &&
(any(ties_t <- vapply(theta, is.matrix, logical(1L)))) ||
(any(t_ties <- apply(theta, c(1L, 2L), function(x) any(nchar(x) > 1))))) {
noties <- FALSE
if(any(ties_t)) {
nonu <- ties_t
theta[ties_t] <- lapply(theta[ties_t], apply, 2L, if(isTRUE(ctrl$random)) sample else "[[", 1L)
theta <- do.call(cbind, theta)
t_ties <- vapply(as.list(theta), function(x) any(nchar(x) > 1), logical(1L))
t_ties <- matrix(t_ties, nrow=nrow(theta), ncol=ncol(theta))
}
if(any(t_ties)) {
t_ties <- which(t_ties, arr.ind=TRUE)
nonu <- replace(rep(FALSE, G), unique(t_ties[,2L]), TRUE)
nonu <- if(any(ties_t)) ties_t | nonu else nonu
theta[t_ties] <- lapply(theta[t_ties], if(isTRUE(ctrl$random)) sample else "[[", 1L)
}
if(ctrl$ties &&
ctrl$verbose) {
if(ctrl$random) { message("\tTie for modal sequence position broken at random\n")
} else message("\tTie found for modal sequence position\n")
}
} else nonu <- rep(FALSE, G)
theta <- apply(theta, 2L, .num_to_char)
} else {
theta <- apply(numseq, 1L, .modal)
if(nonu <- is.list(theta)) {
noties <- FALSE
unon <- which(lengths(theta) > 1)
theta[unon] <- lapply(theta[unon], if(isTRUE(ctrl$random)) sample else "[[", 1L)
if(ctrl$ties &&
ctrl$verbose) {
if(ctrl$random) { message("\tTie for modal sequence position broken at random\n")
} else message("\tTie found for modal sequence position\n")
}
}
theta <- .num_to_char(theta)
}
theta <- if(nmeth) c(theta, NA) else theta
attr(theta, "NonUnique") <- nonu
attr(theta, "NoTies") <- noties
return(theta)
}
}
theta.opt <- .theta_data(seqs=seqs, z=z, ctrl=ctrl, HAM.mat=HAM.mat)
inds <- attr(theta.opt$theta, "Ind")
nonu <- attr(theta.opt$theta, "NonUnique")
noties <- attr(theta.opt$theta, "NoTies")
if(opti == "medoid") {
theta <- if(nmeth) c(theta.opt$theta, NA) else theta.opt$theta
attr(theta, "Ind") <- inds
attr(theta, "NonUnique") <- nonu
attr(theta, "NoTies") <- noties
return(theta)
}
pseq <- seq_len(P)
vseq <- seq_len(V)
opt <- theta.opt$dsum
if(G > 1L) {
theta <- lapply(theta.opt$theta, .char_to_num)
} else {
z <- matrix(if(ctrl$do.wts) attr(seqs, "Weights") else 1L, nrow=attr(seqs, "N"))
theta <- list(.char_to_num(theta.opt$theta))
}
if(ordering != "none") {
stab <- if(G == 1 && !ctrl$do.wts) list(apply(numseq, 1L, tabulate, V)) else lapply(Gseq, function(g) apply(sweep(numseq, 2L, z[,g], FUN="*", check.margin=FALSE), 1L, tabulate, V))
sorder <- lapply(Gseq, function(g) order(apply(stab[[g]], 2L, .entropy), decreasing=ordering == "decreasing"))
theta <- lapply(Gseq, function(g) theta[[g]][sorder[[g]]])
seqs <- lapply(Gseq, function(g) apply(numseq[sorder[[g]],], 2L, .num_to_char))
} else seqs <- replicate(G, list(seqs))
for(g in Gseq) {
p.opt <- Inf
switch(EXPR = opti,
first = {
opts <- rep(NA, V)
while(p.opt > opt[g]) {
p.opt <- opt[g]
for(p in pseq) {
vdiff <- setdiff(vseq, theta[[g]][p])
for(v in vdiff) {
opts[v] <- sum(z[,g] * .dseq(seqs[[g]], paste(replace(theta[[g]], p, v), collapse="")))
}
opts[theta[[g]][p]] <- opt[g]
opt[g] <- min(opts)
ind <- which(opts == opt[g])
if((nonu[g] <- length(ind) > 1)) {
ind <- ifelse(ctrl$random, sample(ind, 1L), ind)
}
if(nonu[g] &&
noties) {
noties <- FALSE
}
theta[[g]][p] <- ind
}
}
}, GA = {
opts <- matrix(NA, nrow=P, ncol=V)
while(p.opt > opt[g]) {
p.opt <- opt[g]
for(p in pseq) {
vdiff <- setdiff(vseq, theta[[g]][p])
for(v in vdiff) {
opts[p, v] <- sum(z[,g] * .dseq(seqs[[g]], paste(replace(theta[[g]], p, v), collapse="")))
}
opts[p, theta[[g]][p]] <- opt[g]
}
opt[g] <- min(opts)
if(p.opt > opt[g]) {
oi <- which(opts == opt[g], arr.ind=TRUE)
if((nonu[g] <- NROW(oi) > 1)) {
oi <- if(ctrl$random) oi[sample.int(nrow(oi), 1L),] else oi[1L,]
}
if(nonu[g] &&
noties) {
noties <- FALSE
}
theta[[g]][oi[1L]] <- oi[2L]
}
}
})
}
if(!noties && any(nonu) &&
ctrl$ties &&
ctrl$verbose) {
if(ctrl$random) { message("\tTie for estimated sequence position broken at random\n")
} else message("\tTie found for estimated sequence position\n")
}
theta <- switch(EXPR=ordering,
none=if(G > 1) {
do.call(base::c, lapply(theta, .num_to_char))
} else .num_to_char(theta[[1L]]),
if(G > 1) {
do.call(base::c, lapply(Gseq, function(g) .num_to_char(theta[[g]][match(pseq, sorder[[g]])])))
} else .num_to_char(theta[[1L]][match(pseq, sorder[[1L]])]))
theta <- if(nmeth) c(theta, NA) else theta
attr(theta, "NonUnique") <- nonu
attr(theta, "NoTies") <- noties
return(theta)
}
.pick_MEDCrit <- function(x, pick = 3L) {
if(!inherits(x,
"MEDcriterion")) stop("'x' must be an object of class 'MEDcriterion'", call.=FALSE)
x <- replace(x, !is.finite(x), NA)
pick <- min(pick, length(x[!is.na(x)]))
decrease <- !is.element(attr(x, "Criterion"), c("DF", "ITERS", "NEC"))
x.sx <- sort(x, decreasing=decrease)[pick]
x.crit <- if(decrease) x >= x.sx else x <= x.sx
x.ind <- which(x.crit, arr.ind=TRUE)
x.val <- sort(x[x.ind], decreasing=decrease)
ind.x <- order(x[x.ind], decreasing=decrease)
x.ind <- x.ind[ind.x,, drop=FALSE]
x.ind[,1L] <- gsub(".*= ", "", rownames(x)[x.ind[,1L]])
x.ind[,2L] <- colnames(x)[as.numeric(x.ind[,2L])]
return(list(crits = stats::setNames(x.val[seq_len(pick)], vapply(seq_len(pick), function(p, b=x.ind[p,]) paste0(b[2L], ",", b[1L]), character(1L))), pick = pick))
}
.rand_MIN <- function(dist, random = TRUE) {
if(!random) return(which.min(dist))
a <- which(dist == min(dist, na.rm=TRUE))
a <- if(length(a) > 1) sample(a, 1L) else a
return(a)
}
.rand_MAX <- function(dist, random = TRUE) {
if(!random) return(which.max(dist))
a <- which(dist == max(dist, na.rm=TRUE))
a <- if(length(a) > 1) sample(a, 1L) else a
return(a)
}
.random_ass <- function(x, y, fun = max, random = TRUE, ...) {
a <- which(fun(x, ...) == x)
a <- if(length(a) > 1) ifelse(y %in% a, y, ifelse(random, sample(a, 1L), a[1L])) else a
return(a)
}
.rDirichlet <- function(G, shape = 1L) {
tmp <- if(all(shape == 1)) stats::rexp(G, rate=1L) else stats::rgamma(G, shape=shape, rate=1L)
tmp/sum(tmp)
}
#' @importFrom matrixStats "rowSums2"
.renorm_z <- function(z) z / rowSums2(z, useNames=FALSE)
.replace_levels <- function(seq, levels = NULL) {
seq <- utf8ToInt(seq)
if(is.null(levels)) factor(seq) else factor(seq, levels=seq_along(levels) - any(seq == 0), labels=as.character(levels))
}
.seq_grid <- function(length, ncat) {
if(!is.numeric(length) ||
length(length) != 1 ||
length <= 0) stop("'length' must a strictly positive scalar", call.=FALSE)
if(!is.numeric(ncat) ||
length(ncat) != 1 ||
ncat <= 0) stop("'ncat' must a strictly positive scalar", call.=FALSE)
do.call(expand.grid, replicate(length, list(0L:(ncat - 1L))))
}
.tau_noise <- function(tau, z0) {
t0 <- ifelse(length(z0) == 1, z0, mean(z0))
cbind(tau * (1 - t0), unname(t0))
}
#' @importFrom matrixStats "colSums2" "rowSums2"
#' @importFrom stringdist "stringdistmatrix"
.theta_data <- function(seqs, z = NULL, ctrl = NULL, HAM.mat = NULL) {
if((G <- attr(seqs, "G")) == 1L) {
sumdist <- if(ctrl$do.wts) colSums2(HAM.mat * attr(seqs, "Weights"), useNames=FALSE) else rowSums2(HAM.mat, useNames=FALSE)
distmin <- min(sumdist)
ind <- which(sumdist == distmin)
if((nonu <- length(ind) > 1)) {
ind <- ifelse(ctrl$random, sample(ind, 1L), ind)
if(ctrl$ties &&
ctrl$verbose) {
if(ctrl$random) { message("\tTie for medoid sequence position broken at random\n")
} else message("\tTie found for medoid sequence position\n")
}
}
theta <- seqs[ind]
attr(theta, "Ind") <- ind
attr(theta, "NonUnique") <- nonu
attr(theta, "NoTies") <- !nonu
return(list(theta = theta, dsum = distmin))
} else {
if(is.null(z)) stop("'z' must be supplied if 'G'>1", call.=FALSE)
if(is.null(ctrl$nmeth)) stop("'nmeth' must be supplied if 'G'>1", call.=FALSE)
theta <- dsum <- list()
noties <- TRUE
G0 <- G - ctrl$nmeth
nonu <-
inds <- rep(FALSE, G0)
for(g in seq_len(G0)) {
sumdist <- colSums2(HAM.mat * z[,g], useNames=FALSE)
distmin <- min(sumdist)
ind <- which(sumdist == distmin)
if((nonu[g] <- length(ind) > 1)) {
ind <- ifelse(ctrl$random, sample(ind, 1L), ind)
if(noties &&
ctrl$ties) {
noties <- FALSE
if(ctrl$verbose) {
if(ctrl$random) { message("\tTie for medoid sequence position broken at random\n")
} else message("\tTie found for medoid sequence position\n")
}
}
}
theta[[g]] <- seqs[ind]
dsum[[g]] <- distmin
inds[[g]] <- ind
}
theta <- do.call(base::c, theta)
attr(theta, "Ind") <- inds
attr(theta, "NonUnique") <- nonu
attr(theta, "NoTies") <- noties
return(list(theta = theta, dsum = do.call(base::c, dsum)))
}
}
.tidy_breaks <- function(x) {
x <- sprintf("%.2f", x)
x <- paste(x[-length(x)],
x[-1L], sep=",")
x[1L] <- paste0("[", x[1L], "]")
x[-1L] <- paste0("(", x[-1L], "]")
x
}
.unique_list <- function(x) {
x <- lapply(x, function(x) { attributes(x) <- NULL; x} )
sum(duplicated.default(x, nmax = 1L)) == (length(x) - 1L)
}
.unMAP <- function(classification, groups = NULL, noise = NULL, ...) {
n <- length(classification)
u <- sort(unique(classification))
if(is.null(groups)) {
groups <- u
} else {
if(any(match(u, groups,
nomatch = 0) == 0)) stop("'groups' incompatible with classification", call.=FALSE)
miss <- match(groups, u, nomatch = 0) == 0
}
cgroups <- as.character(groups)
if(!is.null(noise)) {
noiz <- match(noise, groups, nomatch = 0)
if(any(noiz == 0)) stop("noise incompatible with classification", call.=FALSE)
groups <- c(groups[groups != noise], groups[groups == noise])
noise <- as.numeric(factor(as.character(noise), levels = unique(groups)))
}
groups <- as.numeric(factor(cgroups, levels = unique(cgroups)))
classification <- as.numeric(factor(as.character(classification), levels = unique(cgroups)))
k <- length(groups) - length(noise)
nam <- levels(groups)
if(!is.null(noise)) {
k <- k + 1L
nam <- nam[seq_len(k)]
nam[k] <- "noise"
}
z <- matrix(0L, n, k, dimnames = c(names(classification), nam))
for(j in seq_len(k)) {
z[classification == groups[j], j] <- 1L
}
return(z)
}
.update_mode <- function(num, cluster, data, random = TRUE, weights = NULL) {
diff <- which(cluster == num)
clust <- data[diff,, drop=FALSE]
apply(clust, 2L, function(cat) {
cat <- if(is.null(weights)) table(cat) else tapply(weights[diff], cat, FUN=sum)
return(names(cat)[.rand_MAX(cat, random)])
})
}
.version_above <- function(pkg, versi) {
pkg <- as.character(utils::packageVersion(pkg))
identical(pkg, versi) || (utils::compareVersion(pkg, versi) >= 0)
}
.weighted_mode <- function(numseq, z) {
Gseq <- seq_len(attr(numseq, "G"))
Pseq <- seq_len(attr(numseq, "T"))
Vseq <- seq_len(attr(numseq, "V"))
sapply(Gseq, function(g)
sapply(Pseq, function(p,
x=vapply(Vseq, function(v) sum(z[numseq[p,] == v,g]), numeric(1L))) which(x == max(x))))
}
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.