########################### temporary example file to test functions and ensure updates are ok
rm(list=ls())
# .rs.restartR()
# library(foreach)
library(devtools)
library(rstudioapi)
setwd(dirname(getActiveDocumentContext()$path))
document(pkg = "../")
load_all("../", TRUE)
Wind <- ProbCast::Wind
Wind$WS100 <- sqrt(Wind$U100^2+Wind$V100^2)
Wind$Power <- pmin(Wind$WS100,11)^3
## Set-up simple kfold CV. NB --- For scenario forecasting make sure the CV folds don't cross issue times
Wind$kfold <- "fold1"
Wind$kfold[Wind$ISSUEdtm>as.POSIXct("2012-06-30",tz="UTC")] <- "fold2"
Wind$kfold[Wind$ISSUEdtm>as.POSIXct("2012-12-31",tz="UTC")] <- "fold3"
Wind$kfold[Wind$ISSUEdtm>as.POSIXct("2013-06-30",tz="UTC")] <- "Test"
Wind$bad_data <- runif(n = nrow(Wind))<0.99
#################################################################################
### Multiple Quantile Regression using GBM ####
#################################################################################
# data = test1$data
# formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2)))
# cv_folds = 5
# interaction.depth = 3
# n.trees = 100
# shrinkage = 0.05
# n.minobsinnode = 20
# bag.fraction = 1
# keep.data = F
# quantiles = seq(0.1,0.9,by=0.1)
# sort = T
# sort_limits = list(U=0.999,L=0.001)
# pred_ntree = 100
# cores = 1
# pckgs = NULL
# save_models_path = NULL
# only_mqr = FALSE
# rm(data,formula,cv_folds,interaction.depth,n.trees,shrinkage,n.minobsinnode,bag.fraction,keep.data,
# quantiles,sort,sort_limits,pred_ntree,cores,pckgs,only_mqr,save_models_path)
test1<-list(data=Wind)
## test in series
test1$gbm_mqr <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = "kfold",
interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
pred_ntree = 100)
print(test1$gbm_mqr)
summary(test1$gbm_mqr)
tmp <- predict(test1$gbm_mqr,
newdata = test1$data,
pred_ntree = 100,
sort = T,
sort_limits = list(U=0.999,L=0.001))
data.table(tmp)
tmp <- predict(test1$gbm_mqr,
newdata = test1$data,
model_name = "fold1",
pred_ntree = 100,
sort = T,
sort_limits = list(U=0.999,L=0.001))
data.table(tmp)
### change kfold column & exclude data
test1$data$kfolindx <- test1$data$kfold
test1$data$kfold <- NULL
data.table(test1$data)
test1$gbm_mqr <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = "kfolindx",
# cv_folds = NULL,
# cv_folds = test1$data$kfolindx,
# cv_folds = rep(1:10,length.out=nrow(test1$data)),
# cv_folds = rep(c("hjjr","gffd",'ptbnm'),length.out=nrow(test1$data)),
# cv_folds = rep(1,length.out=nrow(test1$data)),
# exclude_train = "bad_data",
# exclude_train = runif(n = nrow(Wind))<0.001,
interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
pred_ntree = 100)
summary(test1$gbm_mqr)
plot(test1$gbm_mqr$mqr_pred[1:100,])
test1$gam_mqr <- qreg_mboost(data = test1$data,
cv_folds = "kfolindx",
formula = TARGETVAR~bbs(WS100,knots=8),
quantiles = seq(0.1,0.9,by=0.1),
# quantiles = 0.5,
exclude_train = "bad_data",
sort = T,
sort_limits = list(U=0.999,L=0.001),
# only_mqr = T,
cores = 8)
class(test1$gam_mqr)
print(test1$gam_mqr)
summary(test1$gam_mqr)
plot(test1$gam_mqr$mqr_pred[1:100,])
test1$gam_mqr$exclude_index
test1$data <- Wind
data.table(test1$data)
# test with modified hyperparameters
test1$gbm_mqr2 <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = "kfold",
interaction.depth = 5,
n.trees = 100,
shrinkage = 0.5,
n.minobsinnode = 50,
bag.fraction = 1,
keep.data = F,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
pred_ntree = 100)
# test in parallel ---> should equal model 1
test1$gbm_mqr3 <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = "kfold",
interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
pred_ntree = 100,
cores=8L)
# test with save model function 1 worker
tmp <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = "kfold",
interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
pred_ntree = 100,
save_models_path = "./tmp")
rm(tmp)
file.remove(list.files(pattern = "*.rda"))
# test with save model function 8 workers
tmp <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = "kfold",
interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
pred_ntree = 100,
save_models_path = "./tmp",
cores=8L,
only_mqr = TRUE)
file.remove(list.files(pattern = "*.rda"))
rm(tmp)
#####################
## back compatibility test
test1$gbm_mqr4 <- MQR_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
gbm_params = list(interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F),
quantiles = seq(0.1,0.9,by=0.1),
Sort = T,
SortLimits = list(U=0.999,L=0.001),
pred_ntree = 100)
test1$gbm_mqr5 <- MQR_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
gbm_params = list(interaction.depth = 5,
n.trees = 100,
shrinkage = 0.5,
n.minobsinnode = 50,
bag.fraction = 1,
keep.data = F),
quantiles = seq(0.1,0.9,by=0.1),
Sort = T,
SortLimits = list(U=0.999,L=0.001),
pred_ntree = 100)
test1$gbm_mqr6 <- MQR_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
gbm_params = list(interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F),
quantiles = seq(0.1,0.9,by=0.1),
Sort = T,
SortLimits = list(U=0.999,L=0.001),
pred_ntree = 100,
cores = 8,
parallel = TRUE,
para_over_q = FALSE)
### default operational model
test1$gbm_mqr7 <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001))
class(test1$gbm_mqr7)
print(test1$gbm_mqr7)
summary(test1$gbm_mqr7)
tmp <- predict(test1$gbm_mqr7,
newdata = test1$data,
sort = T,
sort_limits = list(U=0.999,L=0.001))
data.table(tmp)
length(test1$gbm_mqr7$models$all_data$q10$fit)==length(na.omit(test1$data$TARGETVAR))
##### auto cv
## supress warnings?
test1$gbm_mqr8 <- qreg_gbm(data = test1$data,
quantiles = seq(0.1,0.9,by=0.1),
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = 5, # probcast cv folds
cores = 8)
class(test1$gbm_mqr8)
print(test1$gbm_mqr8)
summary(test1$gbm_mqr8)
### use OOB estimate for tree
tmp <- predict(test1$gbm_mqr8,
newdata = test1$data,
sort = T,
sort_limits = list(U=0.999,L=0.001),
perf.plot = TRUE)
data.table(tmp)
# specify the tree number for prediction
tmp <- predict(test1$gbm_mqr8,
newdata = test1$data,
pred_ntree = 100,
quantiles = c(.1,.5,.9))
data.table(tmp)
### use gbm internal cv to estimate the best tree
# bug in internal gbm cv where formulas like (log(var)) in formula don't work
test1$gbm_mqr9 <- qreg_gbm(data = test1$data,
# formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
formula = TARGETVAR~U100+V100+U10+V10,
cv_folds = 5, # probcast cv folds
cores = 5,
cv.folds=5, # gbm internal folds for n.tree
perf.plot = FALSE)
class(test1$gbm_mqr9)
print(test1$gbm_mqr9)
summary(test1$gbm_mqr9)
### use gbm internal cv to estimate the best tree
tmp <- predict(test1$gbm_mqr9,
newdata = test1$data,
sort = T,
sort_limits = list(U=0.999,L=0.001),
perf.plot = TRUE)
data.table(tmp)
##### auto cv return only mqr
test1$gbm_mqr9 <- qreg_gbm(data = test1$data,
formula = TARGETVAR~U100+V100+U10+V10+(sqrt((U100^2+V100^2))),
cv_folds = 5,
interaction.depth = 3,
n.trees = 100,
shrinkage = 0.05,
n.minobsinnode = 20,
bag.fraction = 1,
keep.data = F,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
pred_ntree = 100,
cores=5L,
only_mqr = TRUE)
plot(test1$gbm_mqr9[1:100,])
par(mar=c(3,3,0.5,1)) # Trim margin around plot [b,l,t,r]
par(tcl=0.35) # Switch tick marks to insides of axes
par(mgp=c(1.5,0.2,0)) # Set margin lines; default c(3,1,0) [title,labels,line]
par(xaxs="r",yaxs="r") # Extend axis limits by 4% ("i" does no extension)
i_ts <- unique(test1$data$ISSUEdtm)[3]
plot(test1$gbm_mqr$mqr_pred[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gbm_mqr3$mqr_pred[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gbm_mqr4[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gbm_mqr6[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gbm_mqr8$mqr_pred[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gbm_mqr2$mqr_pred[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gbm_mqr5[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
rel <- rbindlist(lapply(names(test1)[-1][c(1,3,4,6,8,2,5)],function(x){
if(class(test1[[x]])[1]!="MultiQR"){
reliability(qrdata = test1[[x]]$mqr_pred,
realisations = test1$data$TARGETVAR,
kfolds = test1$data$kfold,main=x)
} else{
reliability(qrdata = test1[[x]],
realisations = test1$data$TARGETVAR,
kfolds = test1$data$kfold,main=x)
}
}),idcol="model")
pball <- rbindlist(lapply(names(test1)[-1][c(1,3,4,6,8,2,5)],function(x){
if(class(test1[[x]])[1]!="MultiQR"){
pinball(qrdata = test1[[x]]$mqr_pred,
realisations = test1$data$TARGETVAR,
kfolds = test1$data$kfold,main=x)
} else{
pinball(qrdata = test1[[x]],
realisations = test1$data$TARGETVAR,
kfolds = test1$data$kfold,main=x)
}
}),idcol="model")
fwrite(rel,"reltest.csv")
fwrite(pball,"pballtest.csv")
file.remove(list.files(pattern = "*.csv"))
#################################################################################
### Multiple Quantile Regression using QGAM ####
#################################################################################
load_all("./", TRUE)
test1<-list(data=Wind)
# data = test1$data
# formula = TARGETVAR~bbs(WS100,knots=8)
# cv_folds = 5
# quantiles = seq(0.1,0.9,by=0.1)
# sort = T
# sort_limits = list(U=0.999,L=0.001)
# pred_ntree = 100
# cores = 1
# pckgs = NULL
# save_models_path = NULL
# only_mqr = FALSE
# exclude_train = NULL
# w = rep(1,nrow(data))
# rm(data,formula,cv_folds,interaction.depth,n.trees,shrinkage,n.minobsinnode,bag.fraction,keep.data,
# quantiles,sort,sort_limits,pred_ntree,cores,pckgs,only_mqr,save_models_path,cl,cv_labs,opts,output,pb,exclude_idx,exclude_train)
## test in series
test1$gam_mqr <- qreg_mboost(data = test1$data,
cv_folds = "kfold",
formula = TARGETVAR~bbs(WS100,knots=8),
quantiles = seq(0.1,0.9,by=0.1),
# quantiles = 0.5,
# exclude_train = "bad_data",
sort = T,
sort_limits = list(U=0.999,L=0.001),
cores = 8,
only_mqr = T)
class(test1$gam_mqr)
print(test1$gam_mqr)
summary(test1$gam_mqr)
plot(test1$gam_mqr$mqr_pred[1:100,])
### check exclude train
tmp <- predict(test1$gam_mqr,
newdata = test1$data,
sort = T,
sort_limits = list(U=0.999,L=0.001))
data.table(tmp)
tmp <- predict(test1$gam_mqr,
newdata = test1$data,
model_name = "fold1",
sort = T,
sort_limits = list(U=0.999,L=0.001))
data.table(tmp)
# test with modified hyperparameters
test1$gam_mqr2 <- mqr_qreg_mboost(data = test1$data,
formula = TARGETVAR~bbs(WS100,knots=8),
control = mboost::boost_control(mstop=100,nu=0.5),
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001))
# test in parallel ---> should equal model 1
test1$gam_mqr3 <- mqr_qreg_mboost(data = test1$data,
formula = TARGETVAR~bbs(WS100,knots=8),
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
cores = 8L)
# test with save model function 1 worker
tmp <- mqr_qreg_mboost(data = test1$data,
formula = TARGETVAR~bbs(WS100,knots=8),
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
save_models_path = "./tmp")
rm(tmp)
file.remove(list.files(pattern = "*.rda"))
# test with save model function 8 workers
tmp <- mqr_qreg_mboost(data = test1$data,
formula = TARGETVAR~bbs(WS100,knots=8),
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
save_models_path = "./tmp",
cores=8L)
file.remove(list.files(pattern = "*.rda"))
rm(tmp)
#####################
## back compatibility test
## test in series
test1$gam_mqr4 <- MQR_qreg_mboost(data = test1$data,
formula = TARGETVAR~bbs(WS100,knots=8),
bc_mstop = 100,
bc_nu = 0.1,
quantiles = seq(0.1,0.9,by=0.1),
Sort = T,
SortLimits = list(U=0.999,L=0.001))
# test with modified hyperparameters
test1$gam_mqr5 <- MQR_qreg_mboost(data = test1$data,
formula = TARGETVAR~bbs(WS100,knots=8),
bc_mstop = 100,
bc_nu = 0.5,
quantiles = seq(0.1,0.9,by=0.1),
Sort = T,
SortLimits = list(U=0.999,L=0.001))
# test in parallel ---> should equal model 1
test1$gam_mqr6 <- MQR_qreg_mboost(data = test1$data,
formula = TARGETVAR~bbs(WS100,knots=8),
bc_mstop = 100,
bc_nu = 0.1,
quantiles = seq(0.1,0.9,by=0.1),
Sort = T,
SortLimits = list(U=0.999,L=0.001),
cores = 8L,
parallel = TRUE)
### default
test1$gam_mqr7 <- mqr_qreg_mboost(data = test1$data,
formula = TARGETVAR~WS100,
quantiles = seq(0.1,0.9,by=0.1),
sort = T,
sort_limits = list(U=0.999,L=0.001),
cores = 8L,
baselearner="btree")
par(mar=c(3,3,0.5,1)) # Trim margin around plot [b,l,t,r]
par(tcl=0.35) # Switch tick marks to insides of axes
par(mgp=c(1.5,0.2,0)) # Set margin lines; default c(3,1,0) [title,labels,line]
par(xaxs="r",yaxs="r") # Extend axis limits by 4% ("i" does no extension)
i_ts <- unique(test1$data$ISSUEdtm)[3]
plot(test1$gam_mqr[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gam_mqr3[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gam_mqr4[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gam_mqr6[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gam_mqr7[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gam_mqr2[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
plot(test1$gam_mqr5[which(test1$data$ISSUEdtm==i_ts),],xlab="Time Index [Hours]",ylab="Power [Capacity Factor]",axes=F,Legend = 1,ylim=c(0,1)); axis(1,1:24,pos=-0.07); axis(2,las=1)
lines(test1$data$TARGETVAR[which(test1$data$ISSUEdtm==i_ts)],lwd=3)
lapply(names(test1)[-1][c(1,3,4,6,7,2,5)],function(x){
reliability(qrdata = test1[[x]],
realisations = test1$data$TARGETVAR,
kfolds = test1$data$kfold,main=x)
})
lapply(names(test1)[-1][c(1,3,4,6,7,2,5)],function(x){
pinball(qrdata = test1[[x]],
realisations = test1$data$TARGETVAR,
kfolds = test1$data$kfold,main=x)
})
#################################################################################
### Parametric Regression using GAMLSS ####
#################################################################################
load_all("./", TRUE)
test1<-list(data=Wind)
# library(gamlss)
## test in series
test1$ppd <- ppd_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
method = mixed(20,10))
summary(test1$ppd$Test)
# test with modified hyperppdmeters
test1$ppd2 <- ppd_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
sigma.start = 0.05,#NO, #
method = mixed(20,10))
summary(test1$ppd2$Test)
identical(coef(test1$ppd2$Test),coef(test1$ppd$Test))
# test in pllel ---> should equal model 1
test1$ppd3 <- ppd_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
method = mixed(20,10),
cores = 6)
summary(test1$ppd3$Test)
identical(coef(test1$ppd3$Test),coef(test1$ppd$Test))
# test with save model function 1 worker
tmp <- ppd_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
method = mixed(20,10),
save_models_path = "./tmp")
rm(tmp)
file.remove(list.files(pattern = "*.rda"))
# test with save model function 8 workers
tmp <- ppd_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
method = mixed(20,10),
cores=8L,
save_models_path = "./tmp")
file.remove(list.files(pattern = "*.rda"))
rm(tmp)
#####################
## back compatibility test
## test in series
# library(gamlss)
test1$ppd4 <- Para_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
method = mixed(20,10))
identical(coef(test1$ppd4$Test),coef(test1$ppd$Test))
# test with modified hyperparameters
test1$ppd5 <- Para_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
sigma.start=0.05,
method = mixed(20,10))
identical(coef(test1$ppd5$Test),coef(test1$ppd2$Test))
# test in parallel ---> should equal model 1
test1$ppd6 <- Para_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF(),
method = mixed(20,10),
parallel = TRUE,
cores = 6)
identical(coef(test1$ppd6$Test),coef(test1$ppd$Test))
### default
test1$ppd7 <- ppd_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF())
test1$ppd8 <- Para_gamlss(data = test1$data,
formula = TARGETVAR~bs(WS100,df=3),
sigma.formula = ~WS100,
nu.formula = ~WS100,
tau.formula = ~WS100,
family = gamlss.dist::BEINF())
identical(coef(test1$ppd7$Test),coef(test1$ppd8$Test))
#################################################################################
### Parametric Regression using gamboostLSS ####
#################################################################################
load_all("./", TRUE)
test1<-list(data=Wind)
### when the function runs in series, each kfold model is started from the same offset in default mode ---> family = gamboostLSS::GaussianLSS().
### this looks as if it's due to the "family" argument retaining a default after it's ran once
### in parallel the mean of each subset is used as the offset, which is what we would expect.
### this isn't a big deal really, except for **exact** reproducability when changing from parallel to series computation
### let's just override the default behavior and start all models at the same offset in case we want to turn this into a real test
test1$ppd <- ppd_gamboostlss(data = test1$data,
formula = TARGETVAR~bbs(WS100),
family = gamboostLSS::GaussianLSS(mu=.3,sigma = .3))
summary(test1$ppd$Test)
# test with modified hyperppdmeters
test1$ppd2 <- ppd_gamboostlss(data = test1$data,
formula = TARGETVAR~bbs(WS100),
control = mboost::boost_control(mstop=100,nu=0.25),
family = gamboostLSS::GaussianLSS(mu=.3,sigma = .3))
identical(coef(test1$ppd2$Test),coef(test1$ppd$Test))
# test in pllel ---> should equal model 1 ---
test1$ppd3 <- ppd_gamboostlss(data = test1$data,
formula = TARGETVAR~bbs(WS100),
cores = 4,
family = gamboostLSS::GaussianLSS(mu=.3,sigma = .3))
identical(coef(test1$ppd3$Test),coef(test1$ppd$Test))
# test with save model function 1 worker
tmp <- ppd_gamboostlss(data = test1$data,
formula = TARGETVAR~bbs(WS100),
family = gamboostLSS::GaussianLSS(mu=.3,sigma = .3),
save_models_path = "./tmp")
rm(tmp)
file.remove(list.files(pattern = "*.rda"))
# test with save model function 8 workers
tmp <- ppd_gamboostlss(data = test1$data,
formula = TARGETVAR~bbs(WS100),
family = gamboostLSS::GaussianLSS(mu=.3,sigma = .3),
save_models_path = "./tmp",
cores=8L)
file.remove(list.files(pattern = "*.rda"))
rm(tmp)
#####################
## back compatibility test
## test in series
# library(gamlss)
test1$ppd4 <- Para_gamboostLSS(data = test1$data,
formula = TARGETVAR~bbs(WS100),
families = gamboostLSS::GaussianLSS(mu=.3,sigma = .3),
parallel = FALSE)
identical(coef(test1$ppd4$Test),coef(test1$ppd$Test))
# test with modified hyperparameters
test1$ppd5 <- Para_gamboostLSS(data = test1$data,
formula = TARGETVAR~bbs(WS100),
families = gamboostLSS::GaussianLSS(mu=.3,sigma = .3),
parallel = FALSE,
control = mboost::boost_control(mstop=100,nu=0.25))
identical(coef(test1$ppd5$Test),coef(test1$ppd2$Test))
# test in parallel ---> should equal model 1
test1$ppd6 <- Para_gamboostLSS(data = test1$data,
formula = TARGETVAR~bbs(WS100),
families = gamboostLSS::GaussianLSS(mu=.3,sigma = .3),
parallel = TRUE,
cores = 6)
identical(coef(test1$ppd6$Test),coef(test1$ppd$Test))
### default
test1$ppd7 <- ppd_gamboostlss(data = test1$data,
formula = TARGETVAR~bbs(WS100))
test1$ppd8 <- Para_gamboostLSS(data = test1$data,
formula = TARGETVAR~bbs(WS100))
identical(coef(test1$ppd7$Test),coef(test1$ppd8$Test))
########################################################################################
# example of offset problem --- run 1st half of fitting function then this
#
# for (i in unique(tempdata$kfold)){
#
#
#
# GAMLSSmodelList[[i]] <- gamboostLSS::gamboostLSS(data = tempdata[tempdata$kfold!=i & tempdata$kfold != "Test",],
# formula = formula,
# families = family)
#
#
#
#
#
# }
#
# ### this should be the mean of the subset of data as below! But they're all the same
# GAMLSSmodelList$`fold1`$mu$offset
# GAMLSSmodelList$Test$mu$offset
#
# mean(tempdata[tempdata$kfold!="fold1" & tempdata$kfold != "Test",]$TARGETVAR,na.rm=T)
# mean(tempdata[tempdata$kfold != "Test",]$TARGETVAR,na.rm=T)
#
# the models contain all the correct subsets of data though, think it comes down to the <<family>> object we defined updating with a default...
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.