#' @title Penalized Mixtures of Experts
#'
#' @description
#' Train a penalized mixture of experts model by IPOPT.
#'
#' @param formula A \code{formula} with two right-hand sides of the form \code{y ~ expert model | gating model}.
#' The normal formula syntax applies (see \code{\link[Formula]{Formula}}).
#' In order to use all available columns one or both right-hand sides can be dots (\code{y ~ . | .}, \code{y ~ . | gating model}).
#' Both right-hand sides normally contain an intercept (which is not penalized) that can be removed by adding \code{0} or \code{- 1}.
#' Offsets are allowed, too.
# In the binary case numeric vector as long as the number of training observations.
# In the multi-class case matrix or vector? FIXME
#' The left-hand side should be a vector indicating the class membership, preferably a \code{factor}.
#' @param data A \code{data.frame} from which variables specified in \code{formula} are to be taken.
#' @param X (Required if no \code{formula} is given as principal argument.) A \code{matrix} or \code{data.frame} or \code{Matrix} containing
#' the explanatory variables (must not contain an intercept column).
#' @param y (Required if no \code{formula} is given as principal argument.) A \code{factor} specifying
#' the class membership for each observation.
#' @param colsGating Names or indices of columns in \code{X} to be used for the gating model. Default is all columns.
#' @param colsExperts Names or indices of columns in \code{X} to be used for the expert models. Default is all columns.
#' @param interceptGating Logical. Does the gating model include an intercept? If \code{TRUE}, an intercept column is added to \code{X}.
#' Defaults to \code{TRUE}.
#' @param interceptExperts Logical. Does the expert model include an intercept? If \code{TRUE}, an intercept column is added to \code{X}.
#' Defaults to \code{TRUE}.
#' @param offsetGating Offset term for the gating model.
# A numeric vector. FIXME: matrix?
#' @param offsetExperts Offset term for the expert model.
# A numeric vector. FIXME: matrix?
#' @param J The number of experts / mixture components. Defaults to 2.
#' @param lambda Penalty parameter. Can be a scalar or a vector of length \code{1+J} with different components for the gating and the
#' \code{J} expert models. All components must be >= 0.
#' @param alpha Mixing parameter for the elastic net penalty. Can be a scalar or a vector of length \code{1+J} with different components
#' for the gating and the \code{J} expert models. All components must be in [0,1].
#' Defaults to 1.
#' @param type.multinomial \code{"grouped"}, \code{"ungrouped"}.
#' @param standardize Logical. Should the columns of \code{X} be standardized prior to fitting the model?
#' Defaults to \code{FALSE}. If \code{TRUE} the coefficients are returned on the original scale.
# when necessary? return on original scale? macht das sinn fuer spase X?
#' @param genetic Logical.
#' @param ipopt.max.iter The maximum number of IPOPT iterations.
#' @param ipopt.tol Tolerance for IPOPT convergence.
#' @param subset A subset...
#' @param na.action ...
#' @param penalty ...
#' @param model ...
#' @param \dots Further arguments.
#'
#' @return An object of class \code{pmoe}.
# A \code{list} with entries:
# \item{}{.}
# \item{}{.}
# \item{}{.}
# \item{}{.}
# \item{}{.}
# \item{}{.}
# \item{}{.}
# \item{}{.}
# \item{}{.}
# \item{}{.}
#'
#' @references
#' Waechter, A. and Biegler, L. T. (2006),
#' On the Implementation of an Interior-Point Filter Line-Search Algorithm for Large-Scale Nonlinear Programming,
#' \emph{Mathematical Programming}, \strong{106}, 25-57.
#'
#' @rdname pmoe
#'
#' @export
pmoe = function(X, ...) {
UseMethod("pmoe")
}
#' @rdname pmoe
#'
#' @export
pmoe.formula = function(formula, data, subset, na.action, ...) {
mf = match.call(expand.dots = FALSE)
m = match(c("formula", "data", "subset", "na.action"), names(mf), 0)
mf = mf[c(1,m)]
f = Formula(formula)
mf[[1]] = as.name("model.frame")
mf$formula = f
mf = eval(mf, parent.frame())
y = model.response(mf)
## offsets are not in model.matrix
offsetGating = model.offset(model.part(f, data = mf, rhs = 2, terms = TRUE)) ## either a numerical vector/matrix or NULL
offsetExperts = model.offset(model.part(f, data = mf, rhs = 1, terms = TRUE)) ## not included in model.matrix
## intercepts
TermsGating = terms(f, data = data, rhs = 2)
TermsExperts = terms(f, data = data, rhs = 1)
interceptGating = as.logical(attr(TermsGating, "intercept"))
interceptExperts = as.logical(attr(TermsExperts, "intercept"))
## build model matrix (w/o intercept column)
Terms = terms(f, data = data, rhs = NULL)
attr(Terms, "intercept") = 0
# l = vector(attr(Terms, "terms.labels"), )
X = model.matrix(Terms, mf, rhs = NULL) ## contrast
colsGating = attr(TermsGating, "term.labels")
colsExperts = attr(TermsExperts, "term.labels")
res = pmoe.default(X, y, colsGating, colsExperts, interceptGating, interceptExperts, offsetGating, offsetExperts, ...)
res$terms = Terms # FIXME: all what is needed to reconstruct model.frame from data
res$contrasts = attr(X, "contrasts")
res$xlevels = .getXlevels(Terms, mf)
res
}
#' @rdname pmoe
#'
#' @export
pmoe.default = function(X, y, colsGating = 1:ncol(X), colsExperts = 1:ncol(X), interceptGating = TRUE, interceptExperts = TRUE,
offsetGating = NULL, offsetExperts = NULL, J = 2, lambda, alpha = 1, penalty = c("ungrouped", "grouped"), type.multinomial = c("ungrouped", "grouped"), model = c("binomial", "multinomial"), standardize = FALSE, genetic = FALSE, ipopt.max.iter = 500, ipopt.tol = 1e-6, ...) {
## checks
model = match.arg(model)
type.multinomial = match.arg(type.multinomial)
penalty = match.arg(penalty)
# if (K == 2 && type.multinomial == "grouped") {
# warning("")
# type.multinomial = "ungrouped"
# }
# if (J == 2)
## FIXME: man braucht type.multinomial eigentlich fuer gating und experts separat, vektor?
## check X
if (is.null(dim(X)))
stop("'X' is not a matrix")
if (!is.matrix(X) & !inherits(X, "Matrix"))
X = as.matrix(X)
## FIXME: Matrix
# if (any(!is.finite(X)))
# stop("infinite, NA or NaN values in 'X'")
## check that X is numeric
# FIXME if (!is.numeric(X@x))
# if (mode(X) != "numeric") funktioniert nicht fuer sparse
# stop("'X' is not 'numeric'")
## FIXME: remove not needed columns (neither in colsGating nor colsExperts from X)
## this might change colsGating, colsExperts
## scan for constant columns
## check if intercept column is there after all
## FIXME: Matrix kann zu groß werden
const = (apply(X, 2, sd) < 1e-08) ## FIXME: < tolerance parameter
if (any(const)) {
warning(sprintf(ngettext(sum(const), "Column %s appears to be constant",
"columns %s appear to be constant"), paste(colnames(X)[const], collapse = ", ")), domain = NA)
}
# FIXME: entfernen? in scrime nachschauen?
## standardize numeric columns in X (except intercept)
cen = sc = NULL
if (standardize) {
X = scale(X)
cen = attr(X, "scaled:center")
sc = attr(X, "scaled:scale")
}
## FIXME: backscaling, scaling for sparse X
## check colsGating, colsExperts
V = ncol(X)
VGating = length(colsGating)
VExperts = length(colsExperts)
if (is.character(colsGating)) {
## correct names
## coerce to numeric?
if (interceptGating)
colsGating = c("(Intercept)", colsGating)
} else if (is.numeric(colsGating)) {
##
if (any(colsGating > V))
stop("'colsGating' contains undefined columns")
if (interceptGating)
colsGating = c(1, colsGating + 1)
else if (interceptExperts)
colsGating = colsGating + 1
} else
stop("'colsGating' is neither a character nor a numeric vector")
if (is.character(colsExperts)) {
## correct names
## coerce to numeric?
if (interceptExperts)
colsExperts = c("(Intercept)", colsExperts)
} else if (is.numeric(colsExperts)) {
##
if (any(colsExperts > V))
stop("'colsExperts' contains undefined columns")
if (interceptExperts)
colsExperts = c(1, colsExperts + 1)
else if (interceptGating)
colsExperts = colsExperts + 1
} else
stop("'colsExperts' is neither a character nor a numeric vector")
## X and intercept column
if (interceptGating || interceptExperts)
X = cbind(`(Intercept)` = 1, X) ## geht das schlauer?
## Important: for glmnet initialization no intercept column is needed
## check for intercept column, add if it is not there?
## FIXME: requirements on X and intercepts
# should X contain intercept column, what indices are in colsGating, colsExperts?
## check offset
# FIXME: binomial, multinomial, matrix
N = nrow(X)
# if (N != length(weights))
# stop("nrow(X) and length(weights) are different")
# if (any(weights < 0))
# stop("weights have to be larger or equal to zero")
# if (all(weights == 0))
# stop("all weights are zero")
# names(weights) = rownames(x)
## check y
if (N != length(y))
stop("'nrow(X)' and 'length(y)' are different")
if (!is.factor(y))
warning("'y' was coerced to a factor")
y = as.factor(y)
lev = lev1 = levels(y)
counts = as.vector(table(y))
if (any(counts == 0)) {
empty = lev[counts == 0]
warning(sprintf(ngettext(length(empty), "group %s is empty",
"groups %s are empty"), paste(empty, collapse = ", ")),
domain = NA)
lev1 = lev[counts > 0]
y = factor(y, levels = lev1)
counts = as.vector(table(y))
}
if (length(lev1) < 2L)
stop("training data from only one group given")
names(counts) = lev1
prior = counts/N
## check for groups with small numbers of training observations
if (any(counts == 1))
stop("One or more class has only 1 training observation")
# if (any(counts < 8))
# warning("One or more class has small number of training observations (< 8)") ## FIXME: welche toleranz ist sinnvoll? glmnet?
K = length(lev1)
## J
if (J < 2)
stop("'J' must be at least 2")
## lambda
if (any(lambda < 0))
stop("'lambda' must be >= 0")
if (length(lambda) == 1) {
lambda = rep(lambda, 2 + J)
} else if (length(lambda) != 1 + J) {
stop("'length(lambda)' is not 1 + J")
}
# } else if (length(lambda) != 2 + J) {
# stop("'length(lambda)' is not 2 + J")
# }
## alpha
if (any(alpha < 0 | alpha > 1))
stop("'alpha' must be in [0,1]")
if (length(alpha) == 1) {
alpha = rep(alpha, 1 + J)
} else if (length(alpha) != 1 + J) {
stop("'length(alpha)' is not 1 + J")
}
## set up the model and choose correct ipopt helper functions
# VGating = length(colsGating) - interceptGating # w/o intercept
# VExperts = length(colsExperts) - interceptExperts # w/o intercept
## FIXME: clean up, make this more practical
indGating = indExperts = integer(0) ## indices of intercept coefficients
if (J == 2 && model == "binomial") {
if (K == 2) {
lenW = (VGating + interceptGating) + 2 * (VExperts + interceptExperts)
lenU = VGating + 2 * VExperts
indWGating = 1:(VGating + interceptGating)
indWExperts = matrix((VGating + interceptGating) + 1:(2 * (VExperts + interceptExperts)), nrow = 2, byrow = TRUE) ## 2 x (VExperts + interceptExperts) matrix
indUGating = 1:VGating
indUExperts = matrix(VGating + 1:(2 * VExperts), nrow = 2, byrow = TRUE) ## 2 x VExperts matrix
if (interceptGating)
indGating = 1
if (interceptExperts)
indExperts = seq((VGating + interceptGating) + 1, lenW, VExperts+1) ## indWExperts[,1]
## FIXME: 1. Spalte der Matrix indWExperts nehmen?
# print(indGating)
# print(indExperts)
eval_f = eval_f_BinaryBinary
eval_grad_f = eval_grad_f_BinaryBinary
}
else if (K > 2) {
lenW = (VGating + interceptGating) + 2 * K * (VExperts + interceptExperts)
lenU = VGating + 2 * K * VExperts
indWGating = 1:(VGating + interceptGating)
indWExperts = matrix((VGating + interceptGating) + 1:(2 * K * (VExperts + interceptExperts)), nrow = 2, byrow = TRUE) ## 2 x K*(VExperts + interceptExperts) matrix
indUGating = 1:VGating
indUExperts = matrix(VGating + 1:(2 * K * VExperts), nrow = 2, byrow = TRUE) ## 2 x K*VExperts matrix
if (interceptGating)
indGating = 1
if (interceptExperts)
indExperts = seq((VGating + interceptGating) + 1, lenW, VExperts+1)
eval_f = eval_f_BinaryMulti
eval_grad_f = eval_grad_f_BinaryMulti
}
else {
stop("incorrect specification of 'K'")
}
}
else if (J > 2 || model == "multinomial") {
if (K == 2 && model == "binomial") {
lenW = J * (VGating + interceptGating) + J * (VExperts + interceptExperts)
lenU = J * VGating + J * VExperts
indWGating = 1:(J * (VGating + interceptGating))
indWExperts = matrix(J * (VGating + interceptGating) + 1:(J * (VExperts + interceptExperts)), nrow = J, byrow = TRUE) ## J x (VExperts + interceptExperts) matrix
indUGating = 1:(J * VGating)
indUExperts = matrix(J * VGating + 1:(J * VExperts), nrow = J, byrow = TRUE) ## J x VExperts matrix
if (interceptGating)
indGating = seq(1, J * (VGating + interceptGating), VGating + 1)
if (interceptExperts)
indExperts = seq(J * (VGating + interceptGating) + 1, lenW, VExperts+1)
eval_f = eval_f_MultiBinary
eval_grad_f = eval_grad_f_MultiBinary
}
else if (K > 2 || model == "multinomial") {
lenW = J * (VGating + interceptGating) + J * K * (VExperts + interceptExperts)
lenU = J * VGating + J * K * VExperts
indWGating = 1:(J * (VGating + interceptGating))
indWExperts = matrix(J * (VGating + interceptGating) + 1:(J * K * (VExperts + interceptExperts)), nrow = J, byrow = TRUE) ## J x K*(VExperts + interceptExperts) matrix
indUGating = 1:(J * VGating)
indUExperts = matrix(J * VGating + 1:(J * K * VExperts), nrow = J, byrow = TRUE) ## J x K*VExperts matrix
if (interceptGating)
indGating = seq(1, J * (VGating + 1), VGating + 1)
if (interceptExperts)
indExperts = seq(J * (VGating + interceptGating) + 1, lenW, VExperts+1)
eval_f = eval_f_MultiMulti
eval_grad_f = eval_grad_f_MultiMulti
}
else {
stop("incorrect specification of 'K'")
}
} else {
stop("incorrect specification of 'J'")
}
indIntercept = c(indGating, indExperts)
# f = function(x, auxdata) 100*eval_f(x, auxdata)
# grad_f = function(x, auxdata) 100*eval_grad_f(x, auxdata)
## initialization by an EM step
# # initPmoe = function(X, y, J, K, alpha, lambda, lenW, lenU, colsGating, interceptGating, offsetGating, indWGating, colsExperts, interceptExperts, offsetExperts, indWExperts, indIntercept, standardize) {
# ## y should be a factor
# ## FIXME: important: X without intercept
# x0 = numeric(lenW + lenU) ## sparse?
# cluster = kmeans(x = X[,colsGating][,-1], centers = J)$cluster ## fuer welche Dimensionen geht das noch? Clustern mit Dim-Reduktion? FIXME: ordinal data FIXME: OHNE INTERCEPT
# ## FIXME: kmedoids/other method for discrete data???
# cluster = diag(J)[cluster,] # J column matrix, FIXME: softness/overlap parameter statt 0.9? an hme orientieren,
# cluster[cluster == 1] = 0.9
# cluster[cluster == 0] = (1 - 0.9)/(J - 1)
# ## calculate models for lambda sequence, extract coefficients and fitted values for the user-defined lambda
# ## even if there is no intercept in the model, there is an intercept row in the coef matrix that needs to be excluded
# if (J == 2) {
# gating = glmnet::glmnet(as.matrix(X[,colsGating][,-1]), cluster, family = "binomial", alpha = alpha[1], intercept = interceptGating, offset = offsetGating, standardize = standardize)
# ## FIXME: max und min lambda merken
# if (interceptGating)
# x0[indWGating] = glmnet::coef.glmnet(gating, s = lambda[1])[,1] ## sparse lassen? FIXME
# else
# x0[indWGating] = glmnet::coef.glmnet(gating, s = lambda[1])[-1,1] ## sparse lassen? FIXME
# } else {
# gating = glmnet::glmnet(X[,colsGating][,-1], cluster, family = "multinomial", alpha = alpha[1], intercept = interceptGating, offset = offsetGating, standardize = standardize)
# #, type.multinomial = type.multinomial[1])
# ## FIXME: glmnet docu says offset has to be a N x K matrix for multinomial
# if (interceptGating)
# x0[indWGating] = as.vector(sapply(1:J, function(j) glmnet::coef.glmnet(gating, s = lambda[1])[[j]][, 1])) ## FIXME
# else
# x0[indWGating] = as.vector(sapply(1:J, function(j) glmnet::coef.glmnet(gating, s = lambda[1])[[j]][-1, 1])) ## FIXME
# }
# # weights = getS3method("predict", class(gating))(gating, newx = X[,colsGating][,-1], s = lambda[1], type = "response", offset = offsetGating) ## probabilities
# weights = predict(gating, newx = X[,colsGating][,-1], s = lambda[1], type = "response", offset = offsetGating) ## probabilities
# print(summary(weights))
# if (J == 2) {
# weights = cbind(1 - weights, weights)
# } else {
# weights = weights[,,1]
# }
# if (K == 2) {
# for (j in 1:J) {
# expert = glmnet::glmnet(as.matrix(X[,colsExperts][,-1]), y, weights = weights[,j], family = "binomial", alpha = alpha[1+j], intercept = interceptExperts,
# offset = offsetExperts, standardize = standardize)
# if (interceptExperts)
# x0[indWExperts[j,]] = glmnet::coef.glmnet(expert, s = lambda[1+j])[,1] ## FIXME
# else
# x0[indWExperts[j,]] = glmnet::coef.glmnet(expert, s = lambda[1+j])[-1,1] ## FIXME
# }
# } else {
# for (j in 1:J) {
# expert = glmnet::glmnet(X[,colsExperts][,-1], y, weights = weights[,j], family = "multinomial", alpha = alpha[1+j], intercept = interceptExperts,
# offset = offsetExperts, standardize = standardize) #, type.multinomial = type.multinomial[1+j])
# if (interceptExperts)
# x0[indWExperts[j,]] = as.vector(sapply(1:K, function(k) glmnet::coef.glmnet(expert, s = lambda[1+j])[[k]][, 1]))
# else
# x0[indWExperts[j,]] = as.vector(sapply(1:K, function(k) glmnet::coef.glmnet(expert, s = lambda[1+j])[[k]][-1, 1]))
# }
# }
# ## initialize u
# if (length(indIntercept)) {
# x0[lenW + 1:lenU] = abs(x0[1:lenW][-indIntercept]) + 0.2
# } else {
# x0[lenW + 1:lenU] = abs(x0[1:lenW]) + 0.2
# }
# # # return(x0)
# # # }
## sparsity structure
# eval_jac_g_structure = lapply(rep(1:lenU, 2), function(x) {
# if (length(indIntercept)) {
# c((1:lenW)[-indIntercept][x], lenW + x)
# } else {
# c((1:lenW)[x], lenW + x)
# }
# })
# if (length(indIntercept)) {
# eval_jac_g_structure = lapply(rep(1:lenU, 2), function(x) c((1:lenW)[-indIntercept][x], lenW + x))
# } else {
# eval_jac_g_structure = lapply(rep(1:lenU, 2), function(x) c((1:lenW)[x], lenW + x))
# }
## habe ich nicht schon Vektoren, die so aussehen?
eval_jac_g_structure = rep(1:lenU, 2) + lenW
if (length(indIntercept)) {
m1 = rep((1:lenW)[-indIntercept], 2)
} else {
m1 = rep((1:lenW), 2)
}
eval_jac_g_structure = cbind(m1, eval_jac_g_structure, deparse.level = 0)
eval_jac_g_structure = lapply(1:(2*lenU), function(i) eval_jac_g_structure[i,])
# eval_jac_g_structure2 = lapply(1:lenU, function(x) {
# lenW + x
# })
# c eval_jac_g_structure[[2*lenU + 1]] = lenW + indUGating
# The constraint functions are bounded from below by zero
constraint_lb = rep(0.0, 2 * lenU)
constraint_ub = rep(Inf, 2 * lenU)
# c constraint_lb = c(rep(0.0, 2 * lenU), tol)
# c constraint_ub = rep(Inf, 2 * lenU + 1)
y = unclass(y)
## FIXME: check if entries are 1:K
## auxiliary data
# auxdata = new.env()
auxdata = list()
auxdata$X = X
auxdata$y = y
auxdata$lenW = lenW
auxdata$lenU = lenU
auxdata$colsGating = colsGating
auxdata$colsExperts = colsExperts
auxdata$indWGating = indWGating
auxdata$indWExperts = indWExperts
auxdata$indUGating = indUGating
auxdata$indUExperts = indUExperts
auxdata$indIntercept = indIntercept
auxdata$lambda = lambda
auxdata$alpha = alpha
auxdata$N = N
auxdata$J = J
auxdata$K = K
auxdata$type.multinomial = type.multinomial
auxdata$penalty = penalty
# auxdata$tol = tol
## rm(list = names(auxdata))
# # x0 = initPmoe(X, y, K, J, alpha, lambda, lenW, lenU, colsGating, interceptGating, offsetGating, indWGating, colsExperts, interceptExperts, offsetExperts, indWExperts, indIntercept, standardize)
if (genetic) {
# gen = GenSA(par = NULL, fn = eval_f, lower = rep(c(-5,0),c(lenW,lenU)), upper = rep(5, lenW + lenU), control = list(max.call = max.call, verbose = TRUE), auxdata = auxdata)
gen = GenSA(par = NULL, fn = function(x, ...) eval_f(c(x, abs(x[-indIntercept])), ...), lower = rep(-5,lenW), upper = rep(5, lenW), control = list(max.time = 60, verbose = TRUE), auxdata = auxdata)
# print(gen)
x0 = c(gen$par, abs(gen$par[-indIntercept]) + 0.1)
} else {
# x0Experts = rnorm(length(indWExperts[1,]), sd = 1)
# x0 = c(rnorm(length(indWGating), sd = 1), x0Experts, -x0Experts, rep(1,lenU))
x0 = c(rnorm(lenW, sd = 1), rep(1,lenU))
# x0 = rep(0:1, c(lenW, lenU))
# x0[4] = 1
# x0[indWExperts[1,]][2] = 1
# x0[indWExperts[2,]][2] = -1
}
## ipopt options
ipoptr_opts = list(
"jac_d_constant" = 'yes', # Indicates whether all inequality constraints are linear
"hessian_approximation" = 'limited-memory',
#"derivative_test" = 'first-order',
"obj_scaling_factor" = 100,
# "point_perturbation_radius" = 0.1,
# "derivative_test_print_all" = 'yes',
#"hessian_approximation_space" = 'all-variables',
#"hessian_constant" = 'yes', ## FIXME: BFGS?
"bound_relax_factor" = 0,
"mu_strategy" = 'adaptive',
"max_iter" = ipopt.max.iter,
"tol" = ipopt.tol,
print_frequency_iter = 20
#acceptable_tol = 1e-10
)
# print(address(`auxdata$X`))
# library(numDeriv)
# print("numDeriv")
# print(grad(eval_f, x0, auxdata = auxdata))
# print("formula")
# print(eval_grad_f(x0, auxdata))
## call ipopt
res = ipoptr(
x0 = x0,
eval_f = eval_f,
eval_grad_f = eval_grad_f,
eval_g = eval_g,
eval_jac_g = eval_jac_g,
eval_jac_g_structure = eval_jac_g_structure,
constraint_lb = constraint_lb,
constraint_ub = constraint_ub,
opts = ipoptr_opts,
auxdata = auxdata
)
## FIXME: names x0
## FIXME: active
# res$loglik = eval_f(res$solution, auxdata)
# res$penGating = pen*()
# res$penExperts = pen*()
res$lev = lev
res$lev1 = lev1
res$prior = prior
res$counts = counts
res$model = model
# VGating
# VExperts
## extract coefficients
res$coefs = res$solution[1:lenW]
## für path besser alles untereinanderschreiben und nur vernünftig benenen
## schwellwert wählen
## coefs sparse machen
names(res$coefs) = c(
paste("gating", if (J == 2 && model == "binomial") 2 else rep(1:J, each = (VGating + interceptGating)), colnames(X)[colsGating], sep = "."),
paste("expert", if (K == 2 && model == "binomial") rep(1:J, each = (VExperts + interceptExperts)) else rep(1:J, each = K * (VExperts+interceptExperts)), if (K == 2 && model == "binomial") lev1[2] else rep(lev1, each = (VExperts+interceptExperts)), colnames(X)[colsExperts], sep = ".")
)
if (J == 2 && model == "binomial") {
## length(colsGating) x 1 matrix
res$coefs.gating = as.matrix(res$coefs[indWGating])
rownames(res$coefs.gating) = colnames(X)[colsGating]
colnames(res$coefs.gating) = "2"
} else if (J > 2 || model == "multinomial") {
## length(colsGating) x J matrix
res$coefs.gating = matrix(res$coefs[indWGating], ncol = J)
rownames(res$coefs.gating) = colnames(X)[colsGating]
colnames(res$coefs.gating) = paste("gate", 1:J, sep = ".")
}
if (K == 2 && model == "binomial") {
## length(colsExperts) x 1 x J array
res$coefs.experts = array(res$coefs[t(indWExperts)], dim = c(VExperts+interceptExperts, 1, J),
dimnames = list(colnames(X)[colsExperts], lev1[2], 1:J))
} else if (K > 2 || model == "multinomial") {
## length(colsExperts) x K x J array
res$coefs.experts = array(res$coefs[t(indWExperts)], dim = c(VExperts+interceptExperts, K, J),
dimnames = list(colnames(X)[colsExperts], lev1, 1:J))
}
## coefs sparse machen
## log-likelihood and penalties
attr = eval_f(res$solution, auxdata)
res$loglik = attr(attr, "loglik")
res$pen = attr(attr, "pen")
res$penGating = attr(attr, "penGating")
res$penExperts = attr(attr, "penExperts")
res$cen = cen
res$sc = sc
res = c(res, auxdata[-c(1:2)])
## loglik
## penalties zurückgeben
## offset
## check if some experts are the same and remove them?
## extract results, i.e. parameters, log-likelihood, penalties
## sparse matrices gating, experts
## FIXME: path?
# g = Matrix(res$solution[indWGating])
# e = Matrix(res$solution[indWExperts], ncol = J)
class(res) = "pmoe" ## "binary", "multi"
res
}
#' @title Predict Penalized Mixtures of Experts
#'
#' @description Predict penalized mixtures of experts.
#'
# @details ...
#'
#' @param object An object of class \code{"pmoe"}.
#' @param newdata New data to be predicted.
#' A \code{data.frame} of cases to be classified or, if \code{object} has a
#' \code{formula}, a \code{data.frame} with columns of the same names as the
#' variables used. A vector will be interpreted as a row
#' vector.
# If \code{newdata} is missing, an attempt will be made to
# retrieve the data used to fit the \code{wlda} object.
#' @param \dots Further arguments. Currently unused.
#'
#' @return A \code{list} with components:
#' \item{class}{The predicted class labels (a \code{factor}).}
#' \item{posterior}{Matrix of class posterior probabilities.}
#' \item{gating}{Matrix of gating probabilities.}
#' \item{experts}{Matrix of class posterior probabilities from individual experts.}
#'
#' @rdname predict.pmoe
#'
#' @export
#'
# @examples
#
#
## FIXME: also return linear predictors for gating and experts?
## FIXME: retrieve data if newdata is missing?
predict.pmoe = function(object, newdata, ...) {
if (!inherits(object, "pmoe"))
stop("'object' not of class 'pmoe'")
## newdata -> X FIXME
if (!is.null(Terms <- object$terms)) {
Terms = delete.response(Terms)
newdata = model.frame(Terms, newdata, na.action = na.pass, xlev = object$xlevels) ##?? save xlevels
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, newdata)
X = model.matrix(Terms, newdata, contrasts = object$contrasts, rhs = NULL) ## save contrast???
} else {
if (is.null(dim(newdata))) # a vector is treated as one row
dim(newdata) = c(1, length(newdata))
X = as.matrix(newdata)
}
if (!is.null(object$cen))
X = scale(X, object$cen, object$sc)
if (length(object$indIntercept))
X = cbind(`(Intercept)` = 1, X)
# if (ncol(X) != length(union(object$colsGating, object$colsExperts)))
# stop("wrong number of variables")
# if (length(colnames(x)) > 0L && any(colnames(x) != dimnames(object$means)[[2L]]))
# warning("variable names in 'newdata' do not match those in 'object'")
object$X = X
object$N = nrow(X)
# w = coef(object)
w = object$solution[1:object$lenW] ## FIXME: better extraction method
## calculate class posterior probabilities
## FIXME: gating and experts functions are defined differently, environmant, X?
if (object$J == 2 && object$model == "binomial") {
if (object$K == 2) {
gating = gatingBinary(w, object) # N x 1 matrix (group 2)
experts = expertsBinary(w, object) # N x 2 matrix (class 2)
post = rowSums(cbind(1 - gating, gating) * experts) # N x 1 matrix (class 2)
post = cbind(1 - post, post) # N x 2 matrix
} else if (object$K > 2) {
gating = gatingBinary(w, object) # N x 1 matrix (group 2)
experts = expertsMulti(w, object) # N x K x 2 array
post = (1 - gating[,1]) * experts[,,1] + gating[,1] * experts[,,2] # N x K matrix , FIXME: make gating a vector?
} else {
stop("something is wrong with 'object$K'")
}
} else if (object$J > 2 || object$model == "multinomial") {
if (object$K == 2 && object$model == "binomial") {
gating = gatingMulti(w, object) # N x J matrix
experts = expertsBinary(w, object) # N x J matrix (class 2)
post = rowSums(gating * experts) # N x 1 matrix (class 2)
post = cbind(1 - post, post) # N x 2
} else if (object$K > 2 || object$model == "multinomial") {
gating = gatingMulti(w, object) # N x J matrix
experts = expertsMulti(w, object) # N x K x J array
post = matrix(rowSums(sapply(1:object$J, function(z) gating[,z] * experts[,,z])), ncol = object$K) # N x K matrix
} else {
stop("something is wrong with 'object$K'")
}
} else {
stop("something is wrong with 'object$J'")
}
rownames(post) = rownames(X)
colnames(post) = object$lev1
cl = factor(object$lev1[max.col(post)], levels = object$lev)
return(list(class = cl, posterior = post, gating = gating, experts = experts))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.