#' rGMDH: an implementation of the Group Method of Data Handling regression method
#'
#' An implementation of the Group Method of Data Handling proposed by A.G. Ivakhnenko in 1966. The package allows solving regression tasks by creating high degree polynomials thus only numerical attributes are allowed.
#'
#' @docType package
#' @name rGMDH
#' @import plyr
NULL
ColumnView <- function(col.idx){
idx <- col.idx
function(x){
x[,idx]
}
}
QuadraticFun <- function(uFun, vFun, w){
if(length(w)!=6){
stop("Invalid w vector length for quadratic function")
}
function(x){
u <- uFun(x)
v <- vFun(x)
w[1] + w[2]*u + w[3]*v + w[4]*u^2 + w[5]*v^2 + w[6]*u*v
}
}
initialZ <- function(x){
lapply(1:ncol(x), ColumnView)
}
evaluateZ <- function(zList, x, y){
sapply(zList, FUN=function(z) mse(z(x), y) )
}
mse <- function(v1, v2){
mean((v1-v2)^2)
}
fitQuadraticFun <- function(u,v, x, y){
res <- optim(rep(1,6), function(w){
mse(QuadraticFun(u,v, w)(x), y)
}, method = "BFGS")
QuadraticFun(u,v, res$par)
}
generateQuadraticFuns <- function(zLst, x, y){
plyr::llply( combn(zLst, m = 2, simplify = F), function(l){
fitQuadraticFun(l[[1]], l[[2]], x, y)
})
}
doIteration <- function(xTrain, yTrain, xVal, yVal, zLst, M){
zNew <- c(zLst, generateQuadraticFuns(zLst, xTrain, yTrain))
sortedIndices <- sort(evaluateZ(zNew, xVal, yVal), index.return=T)$ix
zNew[sortedIndices[1:min(M, length(zNew))]]
}
doTrain <- function(xTrain, yTrain, xVal, yVal, M){
z <- initialZ(xTrain)
bestErr <- function(z) min(evaluateZ(z, xVal, yVal))
err <- Inf
currErr <- bestErr(z)
while(currErr<err){
z <- doIteration(xTrain, yTrain, xVal, yVal, z, M)
err <- currErr
currErr <- bestErr(z)
}
m <- z[[1]]
class(m) <- 'gmdh'
m
}
#' Calculates GMDH model predictions for new data.
#' @param object GMDH model - an object of class 'gmdh'
#' @param data a data matrix for which the predictions are calculated
#' @param ... other arguments passed to this function
#'
#' @return a vector of predictions generated by the model
#' @export
predict.gmdh <- function(object, data,...){
object(data)
}
#' Trains a GMDH model which solves the y=f(x) regression task.
#' @param x a matrix with independent (input) variables
#' @param y a numeric vector with the dependent (output) variable
#' @param M the number of polynomials which get selected at the end of i-th iteration, and are processed durint iteration i+1
#' @param val.size fraction of the available data used for validation
#'
#' @return a trained GMDH model - an object of class 'gmdh'
#'
#' @export
train <- function(x, y, M=ncol(x), val.size = 0.25){
if(is.numeric(x) && is.numeric(y)){
idx <- sample(nrow(x), size = ceiling(nrow(x)*val.size) )
doTrain(x[idx,],y[idx], x[-idx,], y[-idx], M)
}else{
stop("GMDH requires numerical inputs")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.