# grid search for good values of tuning parameters

# arguments:

#   dataset: data frame or equivalent, containing the "X" and "Y"
#      columns; will be split by the function to 'dtrn' and 'dtst',
#      located on same level as the call
#   pars: named R list, one component for each grid parameter; component 
#      'x' is the set of desired values for the parameter 'x'
#   regCall: call for given regression/classification method, together with 
#      the associated prediction function and loss evaluation; the 
#      elements of names(pars) will appear; args are
#      'dtrn', 'dtst' and 'comb', the latter being a given 
#      combination of parameter values; see example below
#   nCombs: number of possible combinations of 'pars'to evaluate; NULL
#      means all, otherwise randomly chosen
#   nTst: desired size of holdout set
#   nXval: number of cross-validation runs to perform
#   up: if TRUE, results table will be printed in increasing order of 'smoothed'
#   k: k-NN smoothing parameter for the results; if NULL, then no smoothing
#   dispOrderSmoothed: if TRUE and k is non-null, then output will be
#      arranged in order of the 'smoothed' column; otherwise in order of
#      the meanAcc column
#   showProgress: if TRUE, rows of the output will be printed as they
#      become available
#   outDim: number of components in the value returned by theCall
#   ...: application-specific values needed by regCall() but constant
#      across combinations

# value:

#   vector of length outDim

# examples
# tc <- function(dtrn,dtst,cmbi,...)
# {
#    knnout <- kNN(dtrn[,-3],dtrn[,3],dtst[,-3],as.integer(cmbi[1]))
#    preds <- knnout$regests
#    mean(abs(preds - dtst[,3]))
# }
# data(mlb,package='regtools')
# mlb <- mlb[,3:6]
# mlb.d <- factorsToDummies(mlb) 
# fineTuningMult(mlb.d,list(k=c(5,25)),tc,nTst=100,nXval=2) 
# library(qeML)
# data(svcensus)
# tc1 <- function(dtrn,dtst,cmbi,...)
# {
#    knnout <- qeKNN(dtrn,'wageinc',as.integer(cmbi[1]),holdout=NULL)
#    preds <- predict(knnout,dtst[,-4])
#    mape <- mean(abs(preds - dtst[,4]))
#    bigprobs75 <- mean(preds > 75000)
#    c(mape,bigprobs75)
# }
# fineTuningMult(svcensus,list(k = c(10,25)),tc1,outDim=2)

fineTuningMult <- function(dataset,pars,regCall,nCombs=NULL,
   # parameter checks
   if (!interactive()) showProgress <- FALSE
   ### if (!is.null(specCombs)) nCombs <- nrow(specCombs)
   # holding off for now on general smoothing
   if (!is.null(k) && length(pars) > 1) 
      stop('smoothing is currently recommended only for 1 parameter')

   # generate the basic output data frame 
   outdf <- makeOutdf(pars,nCombs)
   nCombs <- nrow(outdf)

   meanAccs <- matrix(rep(NA,outDim*nCombs),nrow=nCombs,ncol=outDim)
   losses <- matrix(nrow=nXval,ncol=outDim)
   done <- FALSE

   # OK, ready to loop through the combinations
   for (combI in 1:nCombs) {
      for (xv in 1:nXval) {
         # create training and test sets
         tmp <- partTrnTst(dataset,nTest=nTst)
         dtrn <- tmp$trn
         dtst <- tmp$tst
         cmbi <- outdf[combI,,drop=FALSE]

         # loss <- try(
         #    R.utils::withTimeout(
         #       regCall(dtrn,dtst,cmbi,...),
         #       timeout = 300.0)  # later, make it an arg
         # )
         loss <- regCall(dtrn,dtst,cmbi,...)
         losses[xv,] <- loss
      meanAccs[combI,] <- colMeans(losses)
      if (showProgress) {
         cat(' ',meanAccs[combI,],'\n')
      if (done) break
   outdf$meanAccs <- meanAccs

# creates the basic parameter grid; called by both fineTuning() and
# fineTuningPar() 

# if specCombs is given, it will be returned, with no further processing
# wrt nCombs

makeOutdf <- function(pars,nCombs=NULL) {
   outdf <- expand.grid(pars,stringsAsFactors=FALSE) 
   if (!is.null(nCombs)) {
      idxsToKeep <- sample(1:nrow(outdf),nCombs)
      outdf <- outdf[idxsToKeep,]

doSmoothing <- function(outdf,k,pars,meanAcc,dispOrderSmoothed) {
   if (k > nrow(outdf)) {
      warning('reducing k')
      k <- nrow(outdf)
   x <- outdf[,1:length(pars)]
   x <- mmscale(x)
   kout <- kNN(x,meanAcc,x,k,scaleX=FALSE,smoothingFtn=loclin)
   smoothed <- kout$regests
   outdf$smoothed <- smoothed
   if (dispOrderSmoothed) outdf <- outdf[order(smoothed),]

# parallel interface to fineTuning(); will split the set of
# hyperparameter combinations into chunks, have each worker node process
# a chunk, then collect and combine the results into a single object of
# class 'tuner'

# arguments:

#    cls: parallel platform; if a positive integer, then a 'parallel' cluster
#    will be formed on the host machine, with the specified number of cores
#    ftCall: character string giving the fineTuning() call to be executed at
#    each worker
# value:

#   'tuner' object that combines the ones produced by the worker nodes

fineTuningPar <- function(cls,dataset,pars,regCall,nCombs=NULL,specCombs=NULL,
   # set up cluster
   if (is.numeric(cls)) {
      cls <- parallel::makeCluster(cls)
   } else if (inherits(cls,'cluster')) {
      partoolsenv <- NULL  # for CRAN
      resp <- try(
      if (inherits(resp,'try-error')) {
         stop('setclsinfo() not called')
   } else stop('invalid cls')

   # export all args to the cluster nodes
   argNames <- clusterExportArgs(cls)
   argNames <- argNames[argNames != 'cls']

   # create the full grid data frame, and parcel it out to the cluster
   # nodes
   specCombs <- makeOutdf(pars,specCombs)

   # now create the fineTuning() call (in character form)
   ftCall <- 'fineTuning('
   for (i in 1:length(argNames)) {
      nm <- argNames[i]
      ftCall <- paste0(ftCall,nm,'=',nm)
      if (i < length(argNames)) {
         ftCall <- paste0(ftCall,',')
      } else {
         ftCall <- paste0(ftCall,',showProgress=FALSE)')

   # and now do the call, and combine the results
   resp <- partools::doclscmd(cls,ftCall)
   adls <- function(ll1,ll2) partools::addlists(ll1,ll2,rbind)
   combinedChunks <- Reduce(adls,resp)
   outdf <- combinedChunks$outdf
   zval <- -qnorm(0.025/nrow(outdf))
   outdf$bonfAcc <- zval * outdf$seAcc
   combinedChunks$outdf <- outdf

# parallel coordinates plot to visualize the grid; tunerObject is output
# of fineTuning(); disp is number to display, with 0, -m and +m meaning
# cases with the m smallest 'smoothed' value, all cases and the m
# largest values of 'smoothed', respectively

#  obj: an object returned from fineTuning()
#  col: column to be plotted
#  disp: if non-0, number of lines to plot, largest or smallest values,
#     depending on whether disp is positive or negative
#  jit: avoids plotting coincident lines by adding jitter; amount is
#     jit * range(x) * runif(n,-0.5,0.5)
# plot.tuner <- function(tunerObject,col='meanAcc',disp=0,jit=0.05) {
plot.tuner <- function(x,...) 
   arglist <- list(...)
   tmp <- arglist['col'][[1]]
   col <- if (!is.null(tmp)) tmp else 'meanAcc'
   tmp <- arglist['disp'][[1]]
   disp <- if (!is.null(tmp)) tmp else 0
   tmp <- arglist['jit'][[1]]
   jit <- if (!is.null(tmp)) tmp else 0.05

   outdf <- x$outdf
   macol <- which(names(outdf) == 'meanAcc')
   outdf <- outdf[,1:macol]
   if (jit > 0.0) {
      nc <- ncol(outdf)
      for (i in 1:(nc-3)) {
         dfCol <- outdf[,i]
         if (is.numeric(dfCol)) {
            rng <- max(dfCol) - min(dfCol)
            outdf[,i] <- dfCol + jit * rng * runif(length(dfCol),-0.5,0.5)
   if (col == 'smoothed') outdf$meanAcc <- NULL
   else outdf$smoothed <- NULL
   if (disp != 0) {
      nc <- ncol(outdf)
      if (abs(disp) < nc - 1) stop('disp too small')
      ord <- order(outdf[,nc],decreasing=(disp > 0))
      outdf <- outdf[ord[1:abs(disp)],]
   nr <- nrow(outdf)

pltt <- plot.tuner

# change the display order, between meanAcc and smoothed; ftout is
# output of fineTuning(); returns the full 'tuner' object, updated
reorder.tuner <- function(ftout) {
   if (is.null(ftout$outdf$smoothed)) stop('no smoothing column')
   dispOrderSmoothed <- ftout$dispOrderSmoothed
   up <- ftout$up
   outdf <- ftout$outdf
   if (dispOrderSmoothed) {
      outdf <- outdf[order(outdf$meanAcc,decreasing=!up),]
   } else {
      outdf <- outdf[order(outdf$smoothed,decreasing=!up),]
   ftout$outdf <- outdf
   ftout$dispOrderSmoothed <- !ftout$dispOrderSmoothed

# partition into training, test sets; if desire training, validation,
# test, use twice

# arguments:

#    fullData:  matrix or data frame, one data point per row
#    nTest:  number of data points for the test set

# value:

#    R list, consisting of the training and test sets

partTrnTst <- function(fullData,nTest=min(1000,round(0.2*nrow(fullData)))) {
   idxs <- sample(1:nrow(fullData),nTest)
   trn <- fullData[-idxs,]
   tst <- fullData[idxs,]

# called from within a function f(), it will export all the arguments of
# the latter, including defaults, to the nodes in cluster cls; it will
# also return the names; ellipsis args are skipped

clusterExportArgs <- function(cls) {
   argNames <- names(formals(sys.function(sys.parent(1))))
   argNames <- argNames[argNames != '...']

# argVec is a row from the grid data frame in fineTuning(), with
# parameter names; converts the values in argVec to variables of those

getNamedArgs <- function(argVec) 
   for (nm in names(argVec)) {

# apply regtools:::fineTuning() to choosing the expansion factors in
# kNN()

# arguments:

#    data: data frame (or matrix with col names), including both "X" and "Y"
#    yName: name of the data column containing "Y"
#    expandVars: indices of the column numbers 

knnFineTune <- function(data,yName,k,expandVars,ws,classif=FALSE,
   if (classif) stop('not ready for classification problems')

   ycol <- which(names(data) == yName)
   # may need to shift some of expandVars over, once "Y" is removed
   if (ycol < ncol(data)) {
      topvars <- which(expandVars > ycol)
      if (length(topvars) > 0) {
         expandVars[topvars] <- expandVars[topvars] - 1
   expandNms <- colnames(data[,-ycol])[expandVars]

   theCall <- function(dtrn,dtst,cmbi) {
      x <- dtrn[,-ycol]
      y <- dtrn[,ycol]
      newx <- dtst[,-ycol]
      newy <- dtst[,ycol]
      cmbi <- as.numeric(cmbi)
      knnout <- kNN(x,y,newx,k,expandVars=expandVars,expandVals=cmbi)
      mean(abs(knnout$regests - newy))
   # wcols <- paste0('w',1:length(expandVars),'=ws',collapse=',')
   # wcols <- paste0('list(',wcols,')')
   wcols <- paste0(expandNms,'=ws',collapse=',')
   wcols <- paste0('list(',wcols,')')

