Nothing
## A grid search over K
#' Grid search over K for NPML estimation of random effect and variance component models
#' @rdname Kfind
#' @import "npmlreg"
#'
#'
#' @description A grid search over the parameter \code{K}, to set the best number of
#' mass-points.
#'
#' @details Not only the shape of the distribution causes the skewness it may due to the use of an
#' insufficient number of classes, \code{K}. For this, the \code{Kfind.boxcox()} function
#' was created to search over a selected range of \code{K} and find the best. For each number
#' of classes, a grid search over \code{tol} is performed and the \code{tol} with the lowest
#' \code{aic} or \code{bic} value is considered as the optimal. Having the minimal \code{aic} or \code{bic} values for a whole range of
#' \code{K} that have been specified beforehand, the \code{Kfind.boxcox()} function can find
#' the best number of the component as the one with the smallest value. It also plots the \code{aic} or \code{bic} values against
#' the selected range of \code{K}, including a vertical line indicating the best value of \code{K}
#' that minimizes the model selection criteria. The full range of
#' classes and their corresponding optimal \code{tol} can be printed off from the \code{Kfind.boxcox()}'s
#' output and used with other \pkg{boxcoxmix} functions as starting points.
#'
#' @param formula a formula describing the transformed response and the fixed
#' effect model (e.g. y ~ x).
#' @param groups the random effects. To fit overdispersion models , set \code{groups} = 1.
#' @param data a data frame containing variables used in the fixed and random
#' effect models.
#' @param find.k search in a range of \code{K}, with default (2,10)
#' in step of 1.
#' @param find.tol search in a range of \code{tol}, with default (0,1.5)
#' in step of 1.
#' @param steps.tol number of points in the grid search of \code{tol}.
#' @param lambda a transformation parameter, setting \code{lambda}=1 means 'no
#' transformation'.
#' @param EMdev.change a small scalar, with default 0.0001, used to determine
#' when to stop EM algorithm.
#' @param steps maximum number of iterations for the EM algorithm.
#' @param start a description of the initial values to be used in the fitted
#' model, Quantile-based version "quantile" or Gaussian Quadrature "gq" can be
#' set.
#' @param model.selection Set \code{model.selection="aic"}, to use Akaike information criterion
#' as model selection criterion or \code{model.selection="bic"}, to use Bayesian information criterion
#' as model selection criterion.
#'
#' @param \dots extra arguments will be ignored.
#' @return \item{MinDisparity}{the minimum disparity found.} \item{Best.K}{the
#' value of \code{K} corresponding to \code{MinDisparity}.}
#' \item{AllMinDisparities }{a vector containing all minimum disparities calculated on the
#' grid.} \item{AllMintol }{list of \code{tol} values used in the grid.}
#' \item{All.K }{list of \code{K} values used in the grid.}
#' \item{All.aic}{the Akaike information criterion of all fitted regression models.}
#' \item{All.bic}{the Bayesian information criterion of all fitted regression models.}
#'
#'
#' @author Amani Almohaimeed and Jochen Einbeck
#' @seealso \code{\link{tolfind.boxcox}}.
#'
#' @keywords Kfind boxcox
#' @examples
#' # Fabric data
#' \donttest{data(fabric, package = "npmlreg")
#' teststr<-Kfind.boxcox(y ~ x, data = fabric, start = "gq", groups=1,
#' find.k = c(2, 3), model.selection = "aic", steps.tol=5)
#' # Minimal AIC: 202.2114 at K= 2 }
#'
#'
#'
#'
#'
#'
#'
#'
#'
#' @export
Kfind.boxcox<- function (formula, groups=1, data, lambda=1, EMdev.change = 1e-04, steps=500,
find.k = c(2, 10), model.selection = "aic", start="gq", find.tol = c(0,1.5),steps.tol = 15, ...)
{
call <- match.call()
lam <-lambda
g<-as.numeric(find.k)
d<-g[2]-g[1]
D <- 0:d
k <- find.k[1] + (find.k[2] - find.k[1]) *
D/d
bic<-aic<-min.Disp<-tol.min<-rep(0, (d+1) )
for (t in 1:(d+1) ) {
fit <- try(tolfind.boxcox(formula=formula, groups= groups, data=data, K=k[t], lambda=lam, start=start, EMdev.change = EMdev.change, plot.opt = 0, s = steps.tol, steps= steps,
find.in.range = find.tol, verbose = FALSE, noformat = FALSE))
if (class(fit) == "try-error") {
cat("boxcox.tolfind failed using K=", k[t], ". Hint: specify another range of K values and try again. ")
return()
}
min.Disp[t] <- fit$min.Disp
tol.min[t]<-fit$tol.min
aic[t]<-fit$aic
bic[t]<-fit$bic
}
AIC<-min(aic)
BIC<-min(bic)
if(model.selection == 'bic'){
max.result <- which.min(bic)
best.K <- k[max.result]
md<-paste("Minimal BIC:", round(BIC,3), "at K=", best.K, "\n")
graphics::par(mfrow=c(1,1))
graphics::plot(k, bic, type = "o", xlab = "K",
ylab = "BIC", col= "green", main=md)
plims <- graphics::par("usr")
y0 <- plims[3]
graphics::segments(best.K, y0, best.K, BIC, lty = 2,col="red", lwd = 2)
cat("Minimal BIC:", BIC, "at K=", best.K, "\n")
result <- list( "call"=call, AllMinDisparities = min.Disp, AllMintol = tol.min,
Best.K =best.K, All.K = k, md=md, Allbic=bic,
MinBIC =BIC, Allaic=aic,
MinAIC =AIC,"kind"=4,"mselect"=2)
class(result)<-"boxcoxmix"
}else{
max.result <- which.min(aic)
best.K <- k[max.result]
md<-paste("Minimal AIC:", round(AIC,3), "at K=", best.K, "\n")
graphics::par(mfrow=c(1,1))
graphics::plot(k, aic, type = "o", xlab = "K",
ylab = "AIC", col= "green", main=md)
plims <- graphics::par("usr")
y0 <- plims[3]
graphics::segments(best.K, y0, best.K, AIC, lty = 2,col="red", lwd =2)
cat("Minimal AIC:", AIC, "at K=", best.K, "\n")
result <- list( "call"=call, AllMinDisparities = min.Disp, AllMintol = tol.min,
Best.K =best.K, All.K = k, md=md, Allaic=aic,
MinAIC =AIC, Allbic=bic,
MinBIC =BIC, "kind"=4,"mselect"=1)
class(result)<-"boxcoxmix"
}
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.