R/gmdh.R

#' rGMDH: an implementation of the Group Method of Data Handling regression method
#'
#' An implementation of the Group Method of Data Handling proposed by A.G. Ivakhnenko in 1966.  The package allows solving regression tasks by creating high degree polynomials thus only numerical attributes are allowed.
#'
#' @docType package
#' @name rGMDH
#' @import plyr
NULL


ColumnView <- function(col.idx){
  
  idx <- col.idx
  function(x){
    x[,idx]
  }
}

QuadraticFun <- function(uFun, vFun, w){
  if(length(w)!=6){
    stop("Invalid w vector length for quadratic function")  
  }
  
  function(x){
    u <- uFun(x)
    v <- vFun(x)
    
    w[1] + w[2]*u + w[3]*v + w[4]*u^2 + w[5]*v^2 + w[6]*u*v
  }
}

initialZ <- function(x){
  lapply(1:ncol(x), ColumnView)
}

evaluateZ <- function(zList, x, y){
  sapply(zList, FUN=function(z) mse(z(x), y) )
}

mse <- function(v1, v2){
  mean((v1-v2)^2)
}


fitQuadraticFun <- function(u,v, x, y){
  res <- optim(rep(1,6), function(w){
    mse(QuadraticFun(u,v, w)(x), y)
  }, method = "BFGS")
  
  QuadraticFun(u,v, res$par)
}


generateQuadraticFuns <- function(zLst, x, y){
  plyr::llply( combn(zLst, m = 2, simplify = F), function(l){
    fitQuadraticFun(l[[1]], l[[2]], x, y)
  })
}

doIteration <- function(xTrain, yTrain, xVal, yVal, zLst, M){
  zNew <- c(zLst, generateQuadraticFuns(zLst, xTrain, yTrain))
  sortedIndices <- sort(evaluateZ(zNew, xVal, yVal), index.return=T)$ix
  
  zNew[sortedIndices[1:min(M, length(zNew))]]
}


doTrain <- function(xTrain, yTrain, xVal, yVal, M){
  
  z <- initialZ(xTrain)  
  bestErr <- function(z) min(evaluateZ(z, xVal, yVal)) 
  
  err <- Inf
  currErr <- bestErr(z)
  
  while(currErr<err){
    
    z <- doIteration(xTrain, yTrain, xVal, yVal, z, M)
    err <- currErr
    currErr <- bestErr(z)
  }
  
  m <- z[[1]]
  class(m) <- 'gmdh'
  
  m
}


#' Calculates GMDH model predictions for new data.
#' @param object GMDH model - an object of class 'gmdh' 
#' @param data a data matrix for which the predictions are calculated
#' @param ... other arguments passed to this function
#' 
#' @return a vector of predictions generated by the model
#' @export 
predict.gmdh <- function(object, data,...){
  object(data)
}

#' Trains a GMDH model which solves the y=f(x) regression task.
#' @param x a matrix with independent (input) variables 
#' @param y a numeric vector with the dependent (output) variable
#' @param M the number of polynomials which get selected at the end of i-th iteration, and are processed durint iteration i+1
#' @param val.size fraction of the available data used for validation
#' 
#' @return a trained GMDH model - an object of class 'gmdh'
#' 
#' @export
train <- function(x, y, M=ncol(x), val.size = 0.25){
  if(is.numeric(x) && is.numeric(y)){
    idx <- sample(nrow(x), size = ceiling(nrow(x)*val.size) )
    
    doTrain(x[idx,],y[idx], x[-idx,], y[-idx], M)
    
  }else{
    stop("GMDH requires numerical inputs")
  }
}
pzawistowski/rGMDH documentation built on May 26, 2019, 11:35 a.m.