Description Arguments Value Examples
Perform one job in a grid
one |
one job, as defined by makeGrid |
parent.function.name |
doOneGrid apply a parent function of this name |
Dim |
is training set X a list dim=1, or data.frame dim=2 |
... |
other args passed to model.function |
result from model.function
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | library(randomForest)
library(fastRditijuu)
#have some training data called X and y
N=1000
var = 8
X = data.frame(replicate(var,rnorm(1000)))
y = with(X,X1*X2+sin(X3))
#design a model function
model.function = function(Xtrain,ytrain,Xtest,ytest=NULL,
trainArgs = list(...)){
std.args = list(x=Xtrain,y=ytrain,mtry=10,ntree=1000,importance=T)
run.args = c(trainArgs,std.args[!names(std.args) %in% names(trainArgs)])
rf2 = do.call(randomForest,run.args)
out = list(
ypred = predict(rf2,Xtest),
ytest = ytest,
explVar.OOB = tail(rf2$rsq,1)
)
}
#make the a grid here with any combination mtry (1 to 5) and ntree (1:10)
trainArgs = list(mtry=1:3,ntree=1:10)
folds = 5
reps = 6
grid = makeGrid(N=N,reps=reps,folds=folds,trainArgs = trainArgs)
length(grid)
#map the grid with doOneGrid
gridOut = lapply(grid,doOneGrid)
#score predictions by some metric, (root mean square error)
CVout = sapply(gridOut, function(x) sqrt(sum((x$ypred-x$ytest)^2)))
#make array (folds x reps x trainArg1 x trainArg2 x trainArg3 ...)
Dims = attr(grid,"gridDim") #grid dims can be found here
CVarr = array(CVout, dim=Dims)
#reduce array over folds by mean
CVarr2 = apply(
X = CVarr,
MARGIN = which(names(Dims) != "folds"),
FUN = mean
)
#reduce array over reps by mean
CVarr.mean = apply(
X = CVarr2,
MARGIN = which(names(dim(CVarr2)) %in% names(trainArgs)),
FUN = mean
)
#reduce array over reps by mean
CVarr.sd = apply(
X = CVarr2,
MARGIN = which(names(dim(CVarr2)) %in% names(trainArgs)),
FUN = sd
)
#get best model
best.ind = which(min(CVarr.mean)==CVarr.mean,arr.ind=T)
best.arg = mapply(trainArgs,best.ind,
FUN=function(thisArg,thisInd) thisArg[thisInd])
print(best.arg)
#plot train performance
for(i in 1:dim(CVarr.mean)[1]) {
if(i==1) {
plot (trainArgs[[2]],CVarr.mean[i,],type="l",col=i,
ylim=range(CVarr.mean))
} else {
points(trainArgs[[2]],CVarr.mean[i,],type="l",col=i)
}
segments(x0=jitter(trainArgs[[2]]),
y0=CVarr.mean[i,]+CVarr.sd[i,],
y1=CVarr.mean[i,]-CVarr.sd[i,], col =i)
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.