R/kfind.R

## A grid search over K




































#' Grid search over K for NPML estimation of random effect and variance component models
#' @rdname Kfind 
#' @import "npmlreg"
#' 
#'
#' @description A grid search over the parameter \code{K}, to set the best number of
#' mass-points.
#' 
#' @details Not only the shape of the distribution causes the skewness it may due to the use of an 
#' insufficient number of classes, \code{K}. For this, the \code{Kfind.boxcox()} function 
#' was created to search over a selected range of \code{K} and find the best. For each number
#'  of classes, a grid search over \code{tol} is performed and the \code{tol} with the lowest 
#'  \code{aic} or \code{bic} value is considered as the optimal. Having the minimal \code{aic} or \code{bic} values for a whole range of
#'   \code{K} that have been specified beforehand, the \code{Kfind.boxcox()} function can find 
#'   the best number of the component as the one with the smallest value. It also plots the \code{aic} or \code{bic} values against
#'  the selected range of \code{K}, including a vertical line indicating the best value of \code{K}
#'  that minimizes the model selection criteria. The full range of 
#'   classes and their corresponding optimal \code{tol} can be printed off from the \code{Kfind.boxcox()}'s
#'    output and used with other \pkg{boxcoxmix} functions as starting points. 
#' 
#' @param formula a formula describing the transformed response and the fixed
#' effect model (e.g. y ~ x).
#' @param groups the random effects. To fit overdispersion models , set \code{groups} = 1.
#' @param data a data frame containing variables used in the fixed and random
#' effect models.
#' @param find.k search in a range of \code{K}, with default (2,10)
#' in step of 1.
#' @param find.tol search in a range of \code{tol}, with default (0,1.5)
#' in step of 1.
#' @param steps.tol number of points in the grid search of \code{tol}.
#' @param lambda a transformation parameter, setting \code{lambda}=1 means 'no
#' transformation'.
#' @param EMdev.change a small scalar, with default 0.0001, used to determine
#' when to stop EM algorithm.
#' @param steps maximum number of iterations for the EM algorithm.
#' @param start a description of the initial values to be used in the fitted
#' model, Quantile-based version "quantile" or Gaussian Quadrature "gq" can be
#' set.
#' @param model.selection Set \code{model.selection="aic"}, to use Akaike information criterion 
#' as model selection criterion or \code{model.selection="bic"}, to use Bayesian information criterion 
#' as model selection criterion.
#' 
#' @param \dots extra arguments will be ignored.
#' @return \item{MinDisparity}{the minimum disparity found.} \item{Best.K}{the
#' value of \code{K} corresponding to \code{MinDisparity}.}
#' \item{AllMinDisparities }{a vector containing all minimum disparities calculated on the
#' grid.} \item{AllMintol }{list of \code{tol} values used in the grid.}
#' \item{All.K }{list of \code{K} values used in the grid.}
#' \item{All.aic}{the Akaike information criterion of all fitted regression models.}
#'  \item{All.bic}{the Bayesian information criterion of all fitted regression models.}
#'  
#'  
#' @author Amani Almohaimeed and Jochen Einbeck
#' @seealso \code{\link{tolfind.boxcox}}.
#' 
#' @keywords Kfind boxcox
#' @examples
#' # Fabric data
#' \donttest{data(fabric, package = "npmlreg")
#' teststr<-Kfind.boxcox(y ~ x, data = fabric,  start = "gq", groups=1, 
#' find.k = c(2, 3), model.selection = "aic", steps.tol=5)
#' # Minimal AIC: 202.2114 at K= 2 }
#' 
#' 
#' 
#' 
#' 
#' 
#' 
#' 
#' 
#' @export
Kfind.boxcox<- function (formula, groups=1, data,  lambda=1, EMdev.change = 1e-04,  steps=500,
                         find.k = c(2, 10), model.selection = "aic", start="gq", find.tol = c(0,1.5),steps.tol = 15, ...) 
{
  call <- match.call()
  lam <-lambda
  g<-as.numeric(find.k)
  d<-g[2]-g[1]
    D <- 0:d
    k <- find.k[1] + (find.k[2] - find.k[1]) * 
      D/d
    bic<-aic<-min.Disp<-tol.min<-rep(0, (d+1) )
    for (t in 1:(d+1)  ) {
    fit <- try(tolfind.boxcox(formula=formula, groups= groups, data=data, K=k[t],  lambda=lam, start=start, EMdev.change = EMdev.change,  plot.opt = 0, s = steps.tol, steps= steps, 
                              find.in.range = find.tol,  verbose = FALSE, noformat = FALSE))
    if (class(fit) == "try-error") {
      cat("boxcox.tolfind failed using K=", k[t], ". Hint:  specify another range of K values and try again. ")
      return()
    }
    min.Disp[t] <- fit$min.Disp
    tol.min[t]<-fit$tol.min
    aic[t]<-fit$aic
    bic[t]<-fit$bic
    }  
  AIC<-min(aic)
  BIC<-min(bic)
  if(model.selection == 'bic'){
    max.result <- which.min(bic)
    best.K <- k[max.result]
    md<-paste("Minimal BIC:", round(BIC,3), "at K=", best.K, "\n")
    graphics::par(mfrow=c(1,1))
      graphics::plot(k, bic, type = "o", xlab = "K", 
                     ylab = "BIC", col= "green", main=md)
      plims <- graphics::par("usr")
      y0 <- plims[3]
      graphics::segments(best.K, y0, best.K, BIC, lty = 2,col="red", lwd = 2)
      cat("Minimal BIC:", BIC, "at K=", best.K, "\n")
    result <- list( "call"=call, AllMinDisparities = min.Disp, AllMintol = tol.min, 
                    Best.K =best.K, All.K = k,  md=md, Allbic=bic,
                    MinBIC =BIC, Allaic=aic,
                    MinAIC =AIC,"kind"=4,"mselect"=2)
    class(result)<-"boxcoxmix"
  }else{
  max.result <- which.min(aic)
  best.K <- k[max.result]
  md<-paste("Minimal AIC:", round(AIC,3), "at K=", best.K, "\n")
  graphics::par(mfrow=c(1,1))
    graphics::plot(k, aic, type = "o", xlab = "K", 
                   ylab = "AIC", col= "green", main=md)
    plims <- graphics::par("usr")
    y0 <- plims[3]
    graphics::segments(best.K, y0, best.K, AIC, lty = 2,col="red", lwd =2)
    cat("Minimal AIC:", AIC, "at K=", best.K, "\n")
  result <- list( "call"=call, AllMinDisparities = min.Disp, AllMintol = tol.min, 
                  Best.K =best.K, All.K = k,  md=md, Allaic=aic,
                  MinAIC =AIC, Allbic=bic,
                  MinBIC =BIC, "kind"=4,"mselect"=1)
  class(result)<-"boxcoxmix"
  }
  return(result)
}

Try the boxcoxmix package in your browser

Any scripts or data that you put into this service are public.

boxcoxmix documentation built on Sept. 17, 2020, 9:31 a.m.