#
#
# Copyright (c) 2017-2023 King Abdullah University of Science and Technology
# All rights reserved.
#
# ExaGeoStat-R is a software package provided by KAUST
#
#
#
# @file bm.R
# ExaGeoStat R wrapper functions
#
# @version 1.2.0
#
# @author Faten Alamri & Sameh Abdulah
# @date 2022-01-02
#' Benchmark function to run a given FUN on a set of synthetic datasets generated by ExaGeoStatR
#' @param FUN: A predefined function to perform both modeling and prediction operations
#' @param dmetric: A string - distance metric - "euclidean" or "great_circle"
#' @param n: An integer - numer of spatial locations
#' @param min_seed: An integer - initial seed to generate the synthetic datasets
#' @param max_seed: An integer - last seed to generate the synthetic datasets
#' @param ncores: An integer - numer of CPU cores to use
#' @param ngpus: An integer - numer of GPUs to use
#' @return a list of prediction errors (MSE_vec, RMSE_vec, MAE_vec, MSLE_vec)
bm <- function(FUN, dmetric = "euclidean", n = 400, min_seed = 0, max_seed = 9, ts = 320,
lts = 0, ncores = 4, ngpus = 0, pgrid = 1, qgrid = 1)
{
k <- 10
out1<-list()
Datatray<-c() # Container to store all the data in three vectors (x, y, z) for all the seeds.
Datatray_training<- c() # Container to store the training data in three vectors (x, y, z) for each seed.
Datatray_testing<-c() # Container to store the training data in three vectors (x, y, z) for each seed.
splited_datatray <- c()
no_datasets = (max_seed-min_seed + 1 ) * 8 # Number of used datasets (8: number of theta vectors * number of seeds).
# Vectors to store the average SE of each dataset.
MSE_vec = rep(0, max_seed-min_seed + 1)
RMSE_vec = rep(0, max_seed-min_seed + 1)
MAE_vec = rep(0, max_seed-min_seed + 1)
MSLE_vec = rep(0, max_seed-min_seed + 1)
MLOE_vec = rep(0, max_seed-min_seed + 1)
MMOM_vec = rep(0, max_seed-min_seed + 1)
Time_vec = rep(0, max_seed-min_seed + 1)
variance_fisher_vec = rep(0, max_seed-min_seed+1)
smoothness_fisher_vec = rep(0, max_seed-min_seed+1)
range_fisher_vec = rep(0, max_seed-min_seed+1)
variance_fisher_true_vec = rep(0, max_seed-min_seed+1)
smoothness_fisher_true_vec = rep(0, max_seed-min_seed+1)
range_fisher_vec_true = rep(0, max_seed-min_seed+1)
# Different theta vectors.
THETA<- matrix(0, 8, 3)
THETA[4,]<-c(1.5, 0.017526, 2.3)
THETA[5,]<-c(1.5, 0.021080, 1.5)
THETA[3,]<-c(1.5, 0.030933, 0.6)
THETA[6,]<-c(1.5, 0.052579, 2.3)
THETA[7,]<-c(1.5, 0.063240, 1.5)
THETA[1,]<-c(1.5, 0.092798, 0.6)
THETA[8,]<-c(1.5, 0.168639, 1.5)
THETA[2,]<-c(1.5, 0.247462, 0.6)
# Container to store the output vector for each seed.
result_mloe <- list()
theta_out <- list()
# Container to store all the output vectors for all the seeds.
theta_out_all <-list()
# Output vector for each seed.
sigma_vec = rep(0,(max_seed-min_seed+1))
beta_vec = rep(0,(max_seed-min_seed+1))
nu_vec = rep(0,(max_seed-min_seed+1))
nuggets_vec = rep(0,(max_seed-min_seed+1))
Time = rep(0,(max_seed-min_seed+1))
B = c()
for(theta in 1:8){
print(paste("Data generation step for theta = (", paste(THETA[theta,], collapse=", "), ") ....."))
for (seed in ( min_seed: max_seed)) {
exageostat_init(hardware = list (ncores, ngpus, ts, lts , pgrid, qgrid)) # Initiate exageostat instance.
B = simulate_data_exact("ugsm-s", THETA[theta,], dmetric, n, seed-1) # Generate Z observation vector.
exageostat_finalize() # Finalize exageostat instance.
B_matrix=matrix(unlist(B), nrow=n, ncol=3)
Datatray = rbind(Datatray,B_matrix)
print(paste("Data spliting (training and testing) step for theta = (", paste(THETA[theta,], collapse=", "), ") ....."))
splited_datatray = splitting_data(B, k, n) # Splited_datatray with 6 vectors (x_tain, y_train, z_train, x_test, y_test, z_test).
Datatray_training = splited_datatray$train
Datatray_testing = splited_datatray$test
Data_train <- as.data.frame( Datatray_training)
Data_test <- as.data.frame( Datatray_testing)
Data_train_list <- list("x" = Data_train$x_train, "y" = Data_train$y_train, "z" = Data_train$z_train )
Data_predict_list <- list("x" = Data_test$x_test, "y" =Data_test$y_test)
print("Done\n")
print(paste("Data modeling/prediction step for theta = (", paste(THETA[theta,], collapse=", "), ") ....."))
tryCatch({
start.time <- Sys.time()
# Modeling_predicting.
result <- FUN(Data_train_list, Data_predict_list)
theta_out <- result$theta
z_out <- result$z_out
end.time <- Sys.time()
Time[seed] <- end.time - start.time
exageostat_init(hardware = list (ncores, ngpus, ts, lts , pgrid, qgrid)) # Initiate exageostat instance.
result_mloe <- exact_mloe_mmom( Data_train_list, Data_predict_list, theta_out, THETA[theta,], "ugsm-s", dmetric)
# Fisher matrix.
result_fisher_true_list <- fisher_general(list(x = Data_train_list$x, y = Data_train_list$y), THETA[theta,], dmetric)
result_fisher_true <- matrix(unlist(result_fisher_true_list), nrow = 3, ncol = 3)
# Invers fisher matrix.
print(result_fisher_true[1,1])
print(result_fisher_true[2,2])
print(result_fisher_true[3,3])
variance_fisher_true_vec[seed] <- result_fisher_true[1,1]
smoothness_fisher_true_vec[seed] <- result_fisher_true[2,2]
range_fisher_vec_true[seed] <- result_fisher_true[3,3]
result_fisher_list <- fisher_general(list(x = Data_train_list$x, y = Data_train_list$y),
c(theta_out[1], theta_out[2], theta_out[3]), dmetric)
result_fisher <- matrix(unlist(result_fisher_list), nrow = 3, ncol = 3)
print(result_fisher[1,1])
print(result_fisher[2,2])
print(result_fisher[3,3])
variance_fisher_vec[seed] <- result_fisher[1,1]
smoothness_fisher_vec[seed] <- result_fisher[2,2]
range_fisher_vec[seed] <- result_fisher[3,3]
exageostat_finalize() #Finalize exageostat instance.
MLOE_vec[seed] <- result_mloe[1]
MMOM_vec[seed] <- result_mloe[2]
write.table( cbind(theta_out[1],theta_out[2],theta_out[3], theta_out[4], MLOE_vec[seed],
MMOM_vec[seed]), paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-",
paste(THETA[theta,], "-", collapse = ""), n, "-boxplot_estimation.csv", sep = ""),
sep = ",", row.names = FALSE, col.names = FALSE, append = TRUE )
sigma_vec[seed] = theta_out[1]
beta_vec[seed] = theta_out[2]
nu_vec[seed] = theta_out[3]
nuggets_vec[seed] = theta_out[4]
MSE_vec[seed] <- mean( Data_test$z - z_out)^2
RMSE_vec[seed] <- sqrt( mean(Data_test$z - z_out)^2)
MAE_vec[seed] <- mean(abs(Data_test$z - z_out))
MSLE_vec[seed] <- mean(log(abs( z_out + 1))-log(abs(Data_test$z + 1)))^2
write.table( cbind( MSE_vec[seed], RMSE_vec[seed], MAE_vec[seed], MSLE_vec[seed]),
paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-", paste(THETA[theta,], "-", collapse = ""),
n, "-boxplot_assessment.csv", sep = ""), sep = ",", row.names = FALSE, col.names = FALSE, append = TRUE )
Time_vec[seed] <- mean(Time)
},
error = function(cond){
message(paste("Error occure here"))
message("Here's the original error message:")
message(cond)
})
}
print("Done\n")
print("done all the seeds...........\n")
AV_Time<-mean(Time)
print(sigma_vec)
print(beta_vec)
print(nu_vec)
SV_variance = var(sigma_vec)
SV_smoothness = var(beta_vec)
SV_range = var(nu_vec)
AV_variance = mean(variance_fisher_vec)
AV_smoothness = mean(smoothness_fisher_vec)
AV_range = mean(range_fisher_vec)
TV_variance = mean(variance_fisher_true_vec)
TV_smoothness = mean(smoothness_fisher_true_vec)
TV_range = mean(range_fisher_vec_true)
write.table( cbind(AV_variance, AV_smoothness, AV_range, SV_variance, SV_smoothness, SV_range, TV_variance, TV_smoothness,
TV_range ), paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-",
paste(THETA[theta,], "-", collapse=""), n, "-fisher_info.csv", sep=""),
sep = ",", row.names = FALSE, col.names = FALSE,append=TRUE )
print("AV_Time (all the seeds) ")
print(AV_Time)
print(paste("Generate boxplots for theta = ", paste(THETA[theta,], ",", collapse=" ")," and seed = ", seed, "....." ))
pdf(paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-", paste(THETA[theta,], "-", collapse=""),
n, "-boxplot_estimation.pdf", sep=""))
# Plot paramter estimate boxplot.
boxplot(sigma_vec, beta_vec, nu_vec, nuggets_vec,
las = 1,
main ="Paramter Estimate",
names = expression(sigma^2,beta, nu, tau^2),
medcol = "red", boxlty = 1, whisklty = 5,
staplelwd = 7)
dev.off()
print("Done\n")
print(paste("Generate boxplots for theta = ", paste(THETA[theta,], ",", collapse=" ")," and seed = ", seed, "....." ))
pdf(paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-", paste(THETA[theta,], "-", collapse=""),n, "-boxplot_assessment.pdf", sep=""))
boxplot(MSE_vec, RMSE_vec, MAE_vec, MSLE_vec, main="Prediction Error",las = 1, # Plot assessment values box plot.
names = c("MSE","RMSE","MAE", "MSLE"),
medcol = "red", boxlty = 1, whisklty = 5,
staplelwd = 7 )
dev.off()
print("Done\n")
}
return(list(MSE_vec, RMSE_vec, MAE_vec, MSLE_vec))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.