#' Condenses list of numbers to convenient printable form, e.g. c(1,2,3,5) -> '1-3,5'
agglom <- function(xs) {
res <- Reduce(agglom.reduc, xs, list())
res <- lapply(res, function(x) { if(x[1]==x[2]) { x[1] } else { paste0(x[1],'-',x[2]) }})
paste0(res, collapse=',')
}
#' Reduction function for turning list of integers into lists of consecutive sets
agglom.reduc <- function(xs,x) {
idx <- length(xs)
if(idx == 0) {
xs <- list(c(x,x))
} else {
if((x - xs[[idx]][2]) == 1) {
xs[[idx]] = c(xs[[idx]][1],x)
} else {
xs[[idx+1]] = c(x,x)
}
}
xs
}
#' Reporting function for parallel categorizations
process_reports <- function(f, sims, max.iter) {
nsim <- length(sims)
sim.map <- seq_along(sims)
names(sim.map) <- sims
done <- integer(length=nsim)
ks <- vector(mode='list', length=nsim)
length.wipe <- min(6, getOption('mc.cores', default=2L)) * (11 + log(max.iter, 10))
wipe.str <- paste0(rep(' ', length.wipe), collapse='')
while(!isIncomplete(f)) {
msg <- readBin(f, 'character')
msgs <- as.numeric(strsplit(msg, ':', fixed=TRUE)[[1]])
# extract simulation id, and find its runner index
simi <- msgs[1]
simidx <- sim.map[as.character(simi)]
# extract last completed iteration
done[simidx] <- msgs[2]
ks[[simidx]] <- c(ks[[simidx]], msgs[3])
if(msgs[2] == max.iter) {
#cat('\r', rep(' ', getOption('width')), sep='', collapse='')
#cat('\r', rep(' ', length.wipe), sep='', collapse='')
cat('\r', wipe.str, '\r')
cat(sprintf('\r(%5.1f%%) %d %s\n',
100*sum(done==max.iter) / nsim,
simi,
agglom(sort(unique(ks[[simidx]])))))
#cat('\r', simi, ' ', agglom(sort(unique(ks[[simi]]))), '\n',
# sep='', collapse='')
}
curr <- which((done>0) & (done<max.iter))
fl <- order(done[curr], decreasing=TRUE)[c(1, length(curr))]
pcts <- 100*done[curr]/max.iter
if(length(curr) > 6) {
cat(sprintf('\r%s\r%d %6.2f%% ...[+%d]... %d %6.2f%%\r',
wipe.str, sims[curr[fl[1]]], pcts[fl[1]], length(curr) - 2, sims[curr[fl[2]]], pcts[fl[2]]))
} else {
cat('\r', paste0(sprintf('%d %6.2f%%', sims[curr], 100*done[curr]/max.iter),
collapse=', '),
sep='')
}
}
cat('\n')
parallel:::mcexit()
}
#' Factory for passing data to reporting function
make_report_function <- function(f, simi) {
rf <- function(i, k) {
writeBin(sprintf('%d:%d:%d', simi, i, k), f)
}
rf
}
#' Runner method for generating categories
#'
#' @param positions positions to categorize, as generated by simulation code
#' @param nsim number of simulation runs to categorize
#' @param max.iter number of iterations per market to categorize
#' @export
make_categories_runner <- function(positions, nsim=NULL, max.iter=NULL, ...) {
#nsim <- if(is.null(nsim)) max(positions$sim) else nsim
sim_seq <- unique(positions$sim)
#cat('Make categories runner, max.iter', max.iter, '\n')
#print(sim_seq)
#nsim <- length(sim_seq)
niter <- if(is.null(max.iter)) max(positions$id) else max.iter
f <- fifo(tempfile(), open="w+b", blocking=T)
if (inherits(parallel:::mcfork(), "masterProcess")) {
process_reports(f, sim_seq, max.iter=niter)
}
result <- parallel::mclapply(sim_seq, function(simi) {
reportf <- make_report_function(f, simi)
make_categories(positions=positions[sim==simi],
reportf=reportf,
max.iter=niter,
...)
})
close(f)
result
}
#' Make 'optimal' categories for a set of positions
#'
#' Finds the optimal number and position of categories for the evolution of a market
#' Optimal in the sense of information criterion
#' @param positions set of market positions
#' @param init.iter initial iteration to categorize (usually after insertions)
#' @param max.iter maximum iteration to categorize
#' @param package which clustering package to use
#' @param reportf verbosity function (NULL to be quiet)
#' @param ... capture additional arguments
make_categories <- function(positions,
min.iter=positions[agent.id>1, min(id)],
max.iter=positions[, max(id)],
package='mclust',
reportf=NULL,
...) {
# assign NULL mixture to all uncategorized iterations
#ems <- rep(list(NULL), min.iter-1)
#ems <- rep(list(NULL), max.iter)
ems <- vector('list', max.iter)
xcat.f <- switch(package,
'mclust'=categorize.mclust,
'BAMBI'=categorize.BAMBI,
'EMCluster'=categorize.EMCluster)
max.errors <- 3
#max.errors <- sample(0:1, 1)
errorf <- function(x, e, remain=max.errors) {
#if(remain < max.errors) cat('DEBUG errorf', remain, ', simi', positions[, unique(sim)], '\n')
if(remain > 0) {
xcat <- tryCatch({
xcat.f(x, ...)
}, error=function(e) {
cat(sprintf('\nERROR (-%d) on %d: %s\n', remain, positions[, unique(sim)], e$message))
errorf(x, e, remain-1)
})
} else {
cat(sprintf('\nPERSISTENT ERROR on %d: %s\n', positions[, unique(sim)], e$message))
stop(e)
}
xcat
}
for(i in min.iter:max.iter) {
x <- positions[id <= i, x]
#cat('DEBUG initial, iter', i, ', simi', positions[, unique(sim)], '\n')
xcat <- errorf(x, NULL)
#tryCatch({
# xcat <- xcat.f(x, ...)
#}, error=function(e) cat('\nERROR:', e$message, '\n'))
#xcat <- switch(package,
# 'mclust'=categorize.mclust(x, ...),
# 'BAMBI'=categorize.BAMBI(x, ...),
# 'EMCluster'=categorize.EMCluster(x, ...))
ems[[i]] <- xcat$mix
if(!is.null(reportf)) reportf(i, xcat$k)
}
ems
}
#' @import mclust
categorize.mclust <- function(x, min.k=1, max.k=9, ...) {
.min.k <- min.k
.max.k <- max.k
res <- mclust::Mclust(x, G=.min.k:.max.k, ...)
while(res$G == .max.k) {
.min.k <- .max.k
.max.k <- 2*.max.k
res <- mclust::Mclust(x, G=.min.k:.max.k, ...)
}
list(mix=res, k=res$G)
}
#' @import BAMBI
categorize.BAMBI <- function(x, min.k=1, max.k=10, ic='BIC', ...) {
subcommand <- function(x, ..min.k, ..max.k) {
capture.output({
res <- BAMBI::fit_incremental_angmix(model='wnorm',
data=x,
show.progress=FALSE,
silent=TRUE,
crit=ic,
start_ncomp=..min.k,
max_ncomp=..max.k)
res <- BAMBI::bestmodel(res)
})
res
}
.min.k <- min.k
.max.k <- max.k
xang <- scales::rescale(x, to=c(0, 2*pi))
res <- subcommand(xang, .min.k, .max.k)
while(res$ncomp == .max.k) {
.min.k <- .max.k
.max.k <- 2*.max.k
res <- subcommand(xang, .min.k, .max.k)
}
if (res$ncomp > 1) capture.output(res <- BAMBI::fix_label(res))
mix <- BAMBI::pointest(res)
list(mix=mix, k=res$ncomp)
}
categorize.EMCluster <- function(x, min.k=1, ic='BIC', ...) {
nx <- length(x)
mx <- as.matrix(x)
catf <- function(k) {
mix <- EMCluster::init.EM(mx, nclass=k, EMC=EMCluster::.EMC)
ic <- EMCluster::em.ic(mx, mix)
list(mix=mix, ic=ic)
}
# search space
k <- min.k
cata <- catf(k)
# do while we keep seeing IC improvements
repeat {
# make sure we have enough observations to estimate k means and variances (2*k dofs)
# plus 1 dof for optimization i think? otherwise we have collinearity
if(2*(k+1) > nx - 1) break
#if(k+1 > i/2) break
# search at higher k
catb <- catf(k+1)
# get IC differences
ics <- names(cata$ic)
ic.diffs <- sapply(ics, function(ic) catb$ic[[ic]] - cata$ic[[ic]])
names(ic.diffs) <- ics
# if ic improvement, accept bump in k
if(ic.diffs[ic] < 0) {
k <- k+1
cata <- catb
} else {
break
}
}
list(mix=cata$mix, k=k)
}
#' Assigns category grade of membership to positions
#'
#' @param x positions
#' @param mix category mixture distribution
#' @param peak.difference grade of membership = difference from peak GOM within category
#' @importFrom EMCluster dmixmvn
#' @export
categorize_positions <- function(x, mix, peak.difference=TRUE) {
res <- sapply(1:mix$nclass, function(ci) {
cat.ps <- dmixmvn(as.matrix(x), emobj=NULL, log=TRUE,
pi=mix$pi[ci],
Mu=mix$Mu[ci,,drop=F],
LTSigma=mix$LTSigma[ci,,drop=FALSE])
if(peak.difference) {
cat.ps - max(cat.ps)
} else {
cat.ps
}
})
res
}
#' Get grades of membership for an object according to a mixture
#'
#' @param x object to categorize
#' @param mix mixtures distributions
#' @param logp return grades-of-membership in log form
#' @export
get_goms <- function(x, mix, logp=TRUE) {
mixps <- get_mix_parameters(mix)
sapply(1:mixps$k, function(i) {
ps <- dnorm(x, mean=mixps$mean[i], sd=mixps$sd[i], log=logp)
peak <- dnorm(mixps$mean[i], mean=mixps$mean[i], sd=mixps$sd[i], log=logp)
if(logp) {
ps - peak
} else {
ps/peak
}
})
}
#' Get grades of membership for an object according to a mixture (linear landscape)
#'
#' @param x object to categorize
#' @param mix mixtures distributions
#' @param logp return grades-of-membership in log form
#' @export
get_goms_linear <- function(x, mix, logp = TRUE) {
mixps <- get_mix_parameters(mix)
sapply(1:mixps$k, function(i) {
ps <- dnorm(x, mean=mixps$mean[i], sd=mixps$sd[i], log=logp)
peak <- dnorm(mixps$mean[i], mean=mixps$mean[i], sd=mixps$sd[i], log=logp)
if(logp) {
ps - peak
} else {
ps/peak
}
})
}
#' Get grades of membership for an object according to a mixture (Salop landscape)
#'
#' Currently assuming BAMBI
#' @param x object to categorize
#' @param mix mixtures distributions
#' @param logp return grades-of-membership in log form
#' @export
get_goms_salop <- function(x, mix, logp = TRUE) {
mixps <- get_mix_parameters.BAMBI(mix)
xang <- scales::rescale(x, to=c(0, 2*pi))
sapply(1:mixps$k, function(i) {
ps <- dwnorm(xang, mu=mixps$mean[i], kappa=mixps$kappa[i], log=logp)
peak <- dwnorm(mixps$mean[i], mu=mixps$mean[i], kappa=mixps$kappa[i], log=logp)
if(logp) {
ps - peak
} else {
ps/peak
}
})
}
get_goms.EMCluster <- function(x, mix, logp=TRUE) {
res <- sapply(1:mix$nclass, function(ci) {
cat.ps <- dmixmvn(as.matrix(x), emobj=NULL, log=logp,
pi=mix$pi[ci],
Mu=mix$Mu[ci,,drop=F],
LTSigma=mix$LTSigma[ci,,drop=FALSE])
#if(peak.difference) {
# cat.ps - max(cat.ps)
#} else {
# cat.ps
#}
cat.ps
})
res
}
get_goms.mclust <- function(x, mix, logp=TRUE) {
sapply(1:mix$G, function(i) {
mean <- mix$parameters$mean[i]
sigma <- sqrt(mix$parameters$variance$sigmasq[i])
ps <- dnorm(x, mean=mean, sd=sigma, log=logp)
peak <- dnorm(mean, mean=mean, sd=sigma, log=logp)
})
if(logp) {
log(mix$z)
} else {
mix$z
}
}
#' Extract mixture parameters from a mixture object
#'
#' Generalizes over the various ways of calculating mixtures that we have
get_mix_parameters <- function(mix) UseMethod('get_mix_parameters')
get_mix_parameters.Mclust <- function(mix) {
list(k=mix$G,
prob=mix$parameters$pro,
mean=mix$parameters$mean,
sd=rep(sqrt(mix$parameters$variance$sigmasq), length.out=mix$G))
}
get_mix_parameters.BAMBI <- function(mix) {
list(k=ncol(mix),
prob=mix['pmix',],
mean=mix['mu',],
kappa=mix['kappa',])
}
# TODO: for completeness, implement:
# get_mix_parameters.EMCluster
#' Find GOM for top two categories
#'
#' @param goms grades of membership for all categories
#' @return list(goms=GOM for top, second categories; ords=category ids)
#' @export
top_two_categories <- function(goms) {
# find distance to second category
# within each row, sort goms from highest to lowest
goms_ords <- sapply(1:nrow(goms), function(ri) {
ord <- order(goms[ri,], decreasing=TRUE)
c(goms[ri,ord], ord) }
)
goms_ords <- t(goms_ords)
#if(iter.cats$nclass > 1) {
if(ncol(goms) > 1) {
#res <- goms_ords[,c(1:2, iter.cats$nclass + 1:2)]
#res <- goms_ords[,c(1:2, ncol(goms) + 1:2)]
res <- list(goms=goms_ords[,1:2], ords=goms_ords[,1:2 + ncol(goms)])
} else {
# if we only have one category, second category has -Inf GOM, and no order
#res <- cbind(goms[,1], -Inf, 1, NA)
res <- list(goms=cbind(goms[,1], -Inf), ords=cbind(goms_ords[,2], NA))
}
res
}
############################################################
# GOM agent M(ean) adjustment
gom.log.Madj <- function(xdelta, gom.mean, gom.sd, gom.peak, verbose=.verbose$NONE, ...) {
res <- dnorm(xdelta, mean=gom.mean, sd=gom.sd, log=TRUE) - gom.peak
if(verbose >= .verbose$DEBUG2)
cat(sprintf('Madj log, xdelta %s, mu %s, sd %s, res %s\n',
xdelta, gom.mean, gom.sd, res))
res
}
gom.log.Mpadj <- function(xdelta, gom.mean, gom.sd, gom.peak, verbose=.verbose$NONE, ...) {
res <- -2*(xdelta-gom.mean)/(gom.sd^2)
if(verbose >= .verbose$DEBUG2)
cat(sprintf('Mpadj log, xdelta %s, mu %s, sd %s, res %s\n',
xdelta, gom.mean, gom.sd, res))
res
}
gom.Madj <- function(xdelta, gom.mean, gom.sd, gom.peak, verbose=.verbose$NONE, ...) {
res <- exp(dnorm(xdelta, mean=gom.mean, sd=gom.sd, log=TRUE) - gom.peak)
if(verbose >= .verbose$DEBUG2)
cat(sprintf('Madj, xdelta %s, mu %s, sd %s, res %s\n',
xdelta, gom.mean, gom.sd, res))
res
}
gom.Mpadj <- function(xdelta, gom.mean, gom.sd, gom.peak, verbose=.verbose$NONE, ...) {
res <- -2*((xdelta-gom.mean)/(gom.sd^2))*gom.Madj(xdelta, gom.mean, gom.sd, gom.peak, verbose)
if(verbose >= .verbose$DEBUG2)
cat(sprintf('Mpadj, xdelta %s, mu %s, sd %s, res %s\n',
xdelta, gom.mean, gom.sd, res))
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.