demo/scglrCrossVal.r

library(SCGLR)

# load sample data
data(genus)

# get variable names from dataset
n <- names(genus)
ny <- n[grep("^gen",n)]    # Y <- names that begins with "gen"
nx <- n[-grep("^gen",n)]   # X <- remaining names

# remove "geology" and "surface" from nx
# as surface is offset and we want to use geology as additional covariate
nx <-nx[!nx%in%c("geology","surface")]

# build multivariate formula
# we also add "lat*lon" as computed covariate
form <- multivariateFormula(ny,c(nx,"I(lat*lon)"), A=c("geology"))

# define family 
fam <- rep("poisson",length(ny))

# cross validation
genus.cv <- scglrCrossVal(formula=form, data=genus, family=fam, K=12, offset=genus$surface)

# find best K
mean.crit <- t(apply(genus.cv,1,function(x) x/mean(x)))
mean.crit <- apply(mean.crit,2,mean)
K.cv <- which.min(mean.crit)-1

#plot(mean.crit, type="l")
SCnext/SCGLR documentation built on Feb. 10, 2024, 1:44 p.m.