### ComparerRun7
# Created 6/4/19
# We found that power exp was the only corr func
# that didn't bounce up at end in ExternalComp 5/6.
# Then we found it had a nug, while the Cauchy funcs
# didn't. But Matern3/2 had a nug and still bounced.
# The purpose of InternalComp7 is to run the corr funcs
# with and without nug to see if nug affects the bounce.
# I made each corr func have no nug, then added a version
# with a nug, same name with Nug at the end.
sggpexp_func <- function(corr, sel.method, f, d, batchsize,
sup.method, Nsupppd,
replicate) {#browser()
require("CGGP")
f <- eval(parse(text=paste0("TestFunctions::", f)))
set.seed(0)
nval <- 1e4
Xval <- matrix(runif(nval*d), nval, d)
Yval <- apply(Xval, 1, f)
nmax <- 10000 * d#8192 #16384
# Reset seed so you don't get repeats
set.seed(Sys.time())
# grid_size <- if (grid_size=="fast") {c(1, 2, 4, 4, 4, 6, 8, 32)}
# grid_size <- if (grid_size=="fast") {c(1,2,4,4,8,12,32)}
# else if (grid_size=="slow") {c(1, 2, 2, 2, 4, 4, 4, 4, 4, 6, 32)}
# else if (grid_size=="medium") {c(1, 2, 4, 4, 4, 6, 8, 32)}
# else {stop("bad grid size")}
Ns <- d*Nsupppd
if (Ns>0) {
Xs <- lhs::maximinLHS(Ns, d)
Ys <- apply(Xs, 1, f)
} else {
Xs <- NULL
Ys <- NULL
}
# is_power_of_2 <- function(x) {
# abs(log(x, 2) - floor(log(x, 2)+.5)) < 1e-8
# }
start.time <- Sys.time()
# First batch is zero points if supp data available
batchsize0 <- if (Ns>0) 0 else batchsize
sg <- CGGPcreate(d, batchsize0, corr=corr,
Xs=Xs, Ys=Ys, HandlingSuppData = sup.method)
if (Ns == 0) {
y <- apply(sg$design, 1, f)
sg <- CGGPfit(sg, Y=y)
}
nallotted <- batchsize0
if (sg$HandlingSuppData != sup.method) {stop(paste("HandlingSuppData is wrong"))}
sg.stats <- NULL
# while(nallotted < 1024) {
# sg <- CGGPappend(sg, batchsize=batchsize, selectionmethod=sel.method)
# y <- apply(sg$design, 1, f)
# sg <- CGGPfit(sg, Y=y,
# Xs=Xs, Ys=Ys, HandlingSuppData=sup.method)
# if (sg$HandlingSuppData != sup.method) {stop(paste("HandlingSuppData is wrong"))}
# nallotted <- nallotted + batchsize
# if (nallotted >= 256 && is_power_of_2(nallotted)) {
# newstats <- CGGPvalstats(sg, Xval = Xval, Yval = Yval)
# newstats$nallotted <- nallotted
# newstats$ngrid <- nrow(sg$design)
# newstats$nsupp <- if (is.null(sg$Xs)) {0} else {nrow(sg$Xs)}
# newstats$ntotal <- nrow(sg$design) + if (!is.null(sg$Xs)) {nrow(sg$Xs)} else {0}
# newstats$elapsedtime <- as.numeric(Sys.time() - start.time, units='secs')
# sg.stats <- if (is.null(sg.stats)) newstats else rbind(sg.stats, newstats)
# }
# print(nallotted)
# }
# Aafter 1024 add in batches of larger size
# batchsize2 <- 512 # was 512
while(nallotted < nmax) { #16384) {
Nalready <- (if (!is.null(sg$design)) {nrow(sg$design)} else {0}) + nrow(sg$Xs)
# ni <- if (Nalready<1000) {200} else if (Nalready<10000) {500} else if (Nalready<20000) {2000} else {10000}
batchsizei <- if (Nalready<1000) {200} else if (Nalready<4000) {500} else if (Nalready<20000) {2000} else {10000}
sg <- CGGPappend(sg, batchsize=batchsizei, selectionmethod=sel.method)
y <- apply(sg$design, 1, f)
sg <- CGGPfit(sg, Y=y,
Xs=Xs, Ys=Ys, HandlingSuppData=sup.method)
if (sg$HandlingSuppData != sup.method) {stop(paste("HandlingSuppData is wrong"))}
nallotted <- nallotted + batchsizei
if (TRUE) { # Always save #nallotted >= batchsize2 && is_power_of_2(nallotted)) {
newstats <- CGGPvalstats(sg, Xval = Xval, Yval = Yval)
newstats$nallotted <- nallotted
newstats$ngrid <- nrow(sg$design)
newstats$nsupp <- if (is.null(sg$Xs)) {0} else {nrow(sg$Xs)}
newstats$ntotal <- nrow(sg$design) + if (!is.null(sg$Xs)) {nrow(sg$Xs)} else {0}
newstats$elapsedtime <- as.numeric(Sys.time() - start.time, units='secs')
sg.stats <- if (is.null(sg.stats)) newstats else rbind(sg.stats, newstats)
}
print(nallotted)
}
sg.stats
}
require("comparer")
# The supp options when it is 0 are a waste, they are same as ignore
# sup.df <- data.frame(sup.method=rep(
# c("Ignore", "Only", "Correct","Mixture", "MarginalValidation","FullValidation"), 4),
# Nsupppd=rep(c(0,5,10,20), each=6), stringsAsFactors=F)
# sup.df <- sup.df[!(sup.df$Nsupppd==0 & sup.df$sup.method!="Ignore"),]
if (F) {
e2 <- ffexp$new(
eval_func = sggpexp_func,
# corr = c("cauchysq", "powerexp")[1], #"cauchysqt", "gaussian", "powerexp", "cauchy", "cauchysq"), #, "m32", "m52", "cauchysq", "cauchy"),
corr = c("cauchysq", "cauchysqt", "cauchy", "powerexp", "m32", "m52", #"gauss",
"cauchysqnug", "cauchysqtnug", "cauchynug", "powerexpnug", "m32nug", "m52nug", "gaussnug"),
sel.method = c("Greedy"), #c("TS", "UCB", "Greedy"),
fd=data.frame(f=c("beambending","OTL_Circuit","piston","borehole","wingweight"), d=c(3,6,7,8,10),
row.names = c("beam","OTL","piston","borehole","wingweight"), stringsAsFactors = F),
batchsize=128,
# sup.method=c("Ignore", "Only", "Correct","Mixture", "MarginalValidation","FullValidation"),
# Nsupppd=c(0,5,10,20),
sup.method="Correct",
Nsupppd=10,
# sup.df=sup.df,
parallel=if (version$os =="linux-gnu") {TRUE} else {FALSE},
parallel_cores = if (version$os =="linux-gnu") {20} else {3},
replicate=1:3,
folder_path= if (version$os =="linux-gnu") {"/home/collin/scratch/SGGP/scratch/InternalComparison/ComparerRun7/"}
else {"./scratch/InternalComparison/ComparerRun7"}
)
e2$rungrid
# try because it gave delete error before, but shouldn't need it now
try(e2$recover_parallel_temp_save(delete_after = FALSE))
e2$save_self()
# e2$run_one(1)
# if (F) {
# e2$run_all(parallel_temp_save=TRUE, delete_parallel_temp_save_after=FALSE,
# write_start_files=!TRUE, write_error_files=!T, run_order = "random")
e2$run_all(parallel_temp_save=TRUE, delete_parallel_temp_save_after=FALSE,
write_start_files=TRUE, write_error_files=T)
# }
e2$recover_parallel_temp_save(delete_after = F)
e2$save_self()
print("Completed all runs in ComparerRun7.R")
}
if (F) {
# e2 <- readRDS("./scratch/InternalComparison/ComparerRun6_object_328_of_360.rds")
e2 <- readRDS("C:/Users/cbe117/Documents/GitHub/CGGP/scratch/redTime/redTimeData/ComparerRun7_completed.rds")
e2$completed_runs %>% table
e2dfcomp <- e2$outcleandf[e2$completed_runs,]
e2df <- e2dfcomp[!is.na(e2dfcomp$RMSE),]
colnames(e2df)[1] <- "corr.func"
e2df$sup.method[e2df$nsupp==0] <- "Ignore"
library(ggplot2)
ggplot(data=e2df, mapping=aes(nallotted, RMSE, color=as.factor(Nsupppd))) + geom_point(size=4) + facet_grid(f ~ sup.method, scales="free_y") + scale_y_log10()
ggplot(data=e2df, mapping=aes(nallotted, RMSE, color=as.factor(Nsupppd))) + geom_point(size=4) + facet_grid(f ~ sup.method, scales="free_y") + scale_y_log10() + scale_x_log10()
ggplot(data=e2df, mapping=aes(nallotted, score, color=as.factor(Nsupppd))) + geom_point(size=4) + facet_grid(f ~ sup.method, scales="free_y")
ggplot(data=e2df, mapping=aes(nallotted, CRPscore, color=as.factor(Nsupppd))) + geom_point(size=4) + facet_grid(f ~ sup.method, scales="free_y") + scale_y_log10()
ggplot(data=e2df, mapping=aes(nallotted, elapsedtime, color=as.factor(Nsupppd))) + geom_point(size=4) + facet_grid(f ~ sup.method, scales="free_y") + scale_y_log10()
}
if (F) {
e7 <- readRDS("./scratch/InternalComparison/ComparerRun7_completed.rds")
require(ggplot2); require(dplyr); require(magrittr)
e7$completed_runs %>% table
# e7df <- e7$outcleandf[e7$completed_runs, ]
# outcleandf doesn't work when each has different number of rows, this fixes it
e7df <- NULL
for (irow in 1:length(e7$completed_runs)) {
e7df <- rbind(e7df,
cbind(e7$rungrid2()[irow,], e7$outlist[[irow]]))
}
colnames(e7df)[1] <- "corr.func"
e7df$RMSE %>% summary
e7df$RMSE %>% is.na %>% summary
e7df$nug <- sapply(e7df$corr.func, function(xx) {grepl("nug", xx)})
e7df$corr.funcnn <- sapply(e7df$corr.func, function(xx) {stringr::str_remove(xx, "nug")})
ggplot(data=e7df, mapping=aes(nallotted, RMSE, color=corr.funcnn)) + geom_point() + scale_y_log10() + facet_grid(. ~ nug)
ggplot(data=e7df, mapping=aes(nallotted, RMSE, color=corr.funcnn)) + geom_point() + scale_y_log10() + facet_grid(nug ~ corr.funcnn)
ggplot(data=e7df %>% mutate(RMSE=pmin(10, RMSE)), mapping=aes(nallotted, RMSE, color=corr.funcnn)) + geom_point() + scale_y_log10() + facet_grid(nug ~ corr.funcnn)
ggplot(data=e7df %>% mutate(RMSE=pmin(10, RMSE)) %>% sample_n(n()), mapping=aes(nallotted, RMSE, color=nug)) + geom_point() + scale_y_log10() + facet_grid(f ~ corr.funcnn)
ggplot(data=e7df %>% mutate(elapsedtime=elapsedtime) %>% sample_n(n()), mapping=aes(nallotted, elapsedtime, color=nug)) + geom_point() + scale_y_log10() + facet_grid(f ~ corr.funcnn)
ggplot(data=e7df %>% mutate(CRPscore=pmin(1e4, CRPscore)) %>% sample_n(n()), mapping=aes(nallotted, CRPscore, color=nug)) + geom_point() + scale_y_log10() + facet_grid(f ~ corr.funcnn)
}
if (F) {
# Run comparison on Wendland corr funcs (3)
ew <- ffexp$new(
eval_func = sggpexp_func,
corr = c('wendland0', 'wendland1', 'wendland2'),
sel.method = c("MAP"),
fd=data.frame(f=c("beambending","OTL_Circuit","piston","borehole","wingweight"), d=c(3,6,7,8,10),
row.names = c("beam","OTL","piston","borehole","wingweight"), stringsAsFactors = F),
batchsize=128,
sup.method="Correct",
Nsupppd=10,
parallel=if (version$os =="linux-gnu") {TRUE} else {FALSE},
parallel_cores = if (version$os =="linux-gnu") {20} else {3},
replicate=1:3,
folder_path= if (version$os =="linux-gnu") {"/home/collin/scratch/SGGP/scratch/InternalComparison/ComparerRun7wendland/"}
else {"./scratch/InternalComparison/ComparerRun7wendland"}
)
ew$recover_parallel_temp_save(delete_after=F)
table(ew$completed_runs)
ew$run_all(parallel=F,
parallel_temp_save=TRUE, delete_parallel_temp_save_after=FALSE,
write_start_files=TRUE, write_error_files=T)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.