doOneGrid: Perform one job in a grid

Description Arguments Value Examples

View source: R/CVfunctions.R

Description

Perform one job in a grid

Arguments

one

one job, as defined by makeGrid

parent.function.name

doOneGrid apply a parent function of this name

Dim

is training set X a list dim=1, or data.frame dim=2

...

other args passed to model.function

Value

result from model.function

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(randomForest)
library(fastRditijuu)
#have some training data called X and y
N=1000
var = 8
X = data.frame(replicate(var,rnorm(1000)))
y = with(X,X1*X2+sin(X3))

#design a model function
model.function = function(Xtrain,ytrain,Xtest,ytest=NULL,
                         trainArgs = list(...)){
 std.args = list(x=Xtrain,y=ytrain,mtry=10,ntree=1000,importance=T)
 run.args = c(trainArgs,std.args[!names(std.args) %in% names(trainArgs)])
 rf2 = do.call(randomForest,run.args)
 out = list(
   ypred = predict(rf2,Xtest),
   ytest = ytest,
   explVar.OOB = tail(rf2$rsq,1)
 )
}

#make the a grid here with any combination mtry (1 to 5) and ntree (1:10)
trainArgs = list(mtry=1:3,ntree=1:10)
folds = 5
reps  = 6
grid = makeGrid(N=N,reps=reps,folds=folds,trainArgs = trainArgs)
length(grid)

#map the grid with doOneGrid
gridOut = lapply(grid,doOneGrid)

#score predictions by some metric, (root mean square error)
CVout = sapply(gridOut, function(x) sqrt(sum((x$ypred-x$ytest)^2)))

#make array (folds x reps x trainArg1 x trainArg2 x trainArg3 ...)
Dims = attr(grid,"gridDim") #grid dims can be found here
CVarr = array(CVout, dim=Dims)

#reduce array over folds by mean
CVarr2 = apply(
 X      = CVarr,
 MARGIN = which(names(Dims) != "folds"),
 FUN    = mean
)

#reduce array over reps by mean
CVarr.mean = apply(
 X      = CVarr2,
 MARGIN = which(names(dim(CVarr2)) %in% names(trainArgs)),
 FUN    = mean
)

#reduce array over reps by mean
CVarr.sd = apply(
 X      = CVarr2,
 MARGIN = which(names(dim(CVarr2)) %in% names(trainArgs)),
 FUN    = sd
)

#get best model
best.ind = which(min(CVarr.mean)==CVarr.mean,arr.ind=T)
best.arg = mapply(trainArgs,best.ind,
                 FUN=function(thisArg,thisInd) thisArg[thisInd])
print(best.arg)

#plot train performance

for(i in 1:dim(CVarr.mean)[1]) {
 if(i==1) {
   plot  (trainArgs[[2]],CVarr.mean[i,],type="l",col=i,
          ylim=range(CVarr.mean))
 } else {
   points(trainArgs[[2]],CVarr.mean[i,],type="l",col=i)
 }
 segments(x0=jitter(trainArgs[[2]]),
          y0=CVarr.mean[i,]+CVarr.sd[i,],
          y1=CVarr.mean[i,]-CVarr.sd[i,], col =i)
}

sorhawell/fastRditijuu documentation built on May 30, 2019, 6:32 a.m.