R/bm.R

Defines functions bm

Documented in bm

#
#
# Copyright (c) 2017-2023 King Abdullah University of Science and Technology
# All rights reserved.
#
# ExaGeoStat-R is a software package provided by KAUST
#
#
#
# @file bm.R
# ExaGeoStat R wrapper functions
#
# @version 1.2.0
#
# @author Faten Alamri & Sameh Abdulah
# @date 2022-01-02

#' Benchmark function to run a given FUN on a set of synthetic datasets generated by ExaGeoStatR
#' @param FUN: A predefined function to perform both modeling and prediction operations
#' @param dmetric:  A string -  distance metric - "euclidean" or "great_circle"
#' @param n:  An integer -  numer of spatial locations
#' @param min_seed:  An integer -  initial seed to generate the synthetic datasets
#' @param max_seed:  An integer -  last seed to generate the synthetic datasets
#' @param ncores:  An integer -  numer of CPU cores to use
#' @param ngpus:  An integer -  numer of GPUs to use
#' @return a list of prediction errors (MSE_vec, RMSE_vec, MAE_vec, MSLE_vec)

bm <- function(FUN, dmetric = "euclidean", n = 400, min_seed = 0, max_seed = 9, ts = 320, 
               lts = 0, ncores = 4, ngpus = 0, pgrid = 1, qgrid = 1)
{
    k <- 10
    out1<-list()

    Datatray<-c()                                     # Container to store all the data in three vectors (x, y, z) for all the seeds.
    Datatray_training<- c()                           # Container to store the training data in three vectors (x, y, z) for each seed.
    Datatray_testing<-c()                             # Container to store the training data in three vectors (x, y, z) for each seed.
    splited_datatray <- c()

    no_datasets = (max_seed-min_seed + 1 ) * 8        # Number of used datasets (8: number of theta vectors * number of seeds).
    # Vectors to store the average SE of each dataset.
    MSE_vec  = rep(0, max_seed-min_seed + 1)
    RMSE_vec = rep(0, max_seed-min_seed + 1)
    MAE_vec  = rep(0, max_seed-min_seed + 1)
    MSLE_vec = rep(0, max_seed-min_seed + 1)
    MLOE_vec = rep(0, max_seed-min_seed + 1)
    MMOM_vec = rep(0, max_seed-min_seed + 1)
    Time_vec = rep(0, max_seed-min_seed + 1)
    variance_fisher_vec        = rep(0, max_seed-min_seed+1)
    smoothness_fisher_vec      = rep(0, max_seed-min_seed+1)
    range_fisher_vec           = rep(0, max_seed-min_seed+1)
    variance_fisher_true_vec   = rep(0, max_seed-min_seed+1)
    smoothness_fisher_true_vec = rep(0, max_seed-min_seed+1)
    range_fisher_vec_true      = rep(0, max_seed-min_seed+1)

    # Different theta vectors.
    THETA<- matrix(0, 8, 3)
    THETA[4,]<-c(1.5, 0.017526, 2.3)
    THETA[5,]<-c(1.5, 0.021080, 1.5)
    THETA[3,]<-c(1.5, 0.030933, 0.6)
    THETA[6,]<-c(1.5, 0.052579, 2.3)
    THETA[7,]<-c(1.5, 0.063240, 1.5)
    THETA[1,]<-c(1.5, 0.092798, 0.6)
    THETA[8,]<-c(1.5, 0.168639, 1.5)
    THETA[2,]<-c(1.5, 0.247462, 0.6)

    # Container to store the output vector for each seed.
    result_mloe <- list()
    theta_out   <- list()
    # Container to store all the output vectors for all the seeds.
    theta_out_all <-list()

    # Output vector for each seed.
    sigma_vec   = rep(0,(max_seed-min_seed+1))	
    beta_vec    = rep(0,(max_seed-min_seed+1))
    nu_vec      = rep(0,(max_seed-min_seed+1))
    nuggets_vec = rep(0,(max_seed-min_seed+1))
    Time        = rep(0,(max_seed-min_seed+1))

    B = c()

    for(theta in 1:8){
        print(paste("Data generation step for theta = (", paste(THETA[theta,], collapse=", "), ") ....."))
        for (seed in ( min_seed: max_seed)) {
            exageostat_init(hardware =   list (ncores, ngpus, ts, lts , pgrid, qgrid))      # Initiate exageostat instance.
            B = simulate_data_exact("ugsm-s", THETA[theta,], dmetric, n, seed-1)            # Generate Z observation vector.
            exageostat_finalize()                                                           # Finalize exageostat instance.

            B_matrix=matrix(unlist(B), nrow=n, ncol=3)  
            Datatray = rbind(Datatray,B_matrix)

            print(paste("Data spliting (training and testing) step for theta = (", paste(THETA[theta,], collapse=", "), ") ....."))
            splited_datatray = splitting_data(B, k, n)                                  # Splited_datatray with 6 vectors (x_tain, y_train, z_train, x_test, y_test, z_test).

            Datatray_training = splited_datatray$train
            Datatray_testing  = splited_datatray$test

            Data_train <- as.data.frame( Datatray_training)
            Data_test <- as.data.frame( Datatray_testing)


            Data_train_list <- list("x" = Data_train$x_train, "y" = Data_train$y_train, "z" = Data_train$z_train )
            Data_predict_list <- list("x" = Data_test$x_test, "y" =Data_test$y_test)

            print("Done\n")
            print(paste("Data modeling/prediction step for theta = (", paste(THETA[theta,], collapse=", "), ") ....."))

            tryCatch({
                start.time <- Sys.time()
                # Modeling_predicting.
                result <- FUN(Data_train_list, Data_predict_list)
                theta_out <- result$theta
                z_out <- result$z_out
                end.time <- Sys.time()
                Time[seed] <- end.time - start.time
                exageostat_init(hardware =   list (ncores, ngpus, ts, lts , pgrid, qgrid))     # Initiate exageostat instance.
                result_mloe <- exact_mloe_mmom( Data_train_list, Data_predict_list, theta_out, THETA[theta,], "ugsm-s", dmetric)

                # Fisher matrix.
                result_fisher_true_list <- fisher_general(list(x = Data_train_list$x, y = Data_train_list$y), THETA[theta,], dmetric) 
                result_fisher_true <- matrix(unlist(result_fisher_true_list), nrow = 3, ncol = 3)

                # Invers fisher matrix.
                print(result_fisher_true[1,1])
                print(result_fisher_true[2,2])
                print(result_fisher_true[3,3])
                variance_fisher_true_vec[seed] <- result_fisher_true[1,1]
                smoothness_fisher_true_vec[seed] <- result_fisher_true[2,2]
                range_fisher_vec_true[seed] <- result_fisher_true[3,3]

                result_fisher_list <- fisher_general(list(x = Data_train_list$x, y = Data_train_list$y), 
                                                     c(theta_out[1], theta_out[2], theta_out[3]), dmetric)
                result_fisher <- matrix(unlist(result_fisher_list), nrow = 3, ncol = 3)

                print(result_fisher[1,1])
                print(result_fisher[2,2])
                print(result_fisher[3,3])
                variance_fisher_vec[seed] <- result_fisher[1,1]
                smoothness_fisher_vec[seed] <- result_fisher[2,2]
                range_fisher_vec[seed] <- result_fisher[3,3]

                exageostat_finalize()                          #Finalize exageostat instance.
                MLOE_vec[seed] <- result_mloe[1]
                MMOM_vec[seed] <- result_mloe[2]
                write.table( cbind(theta_out[1],theta_out[2],theta_out[3], theta_out[4],  MLOE_vec[seed],  
                                   MMOM_vec[seed]), paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-",
                                   paste(THETA[theta,], "-", collapse = ""),  n, "-boxplot_estimation.csv", sep = ""),
                            sep = ",", row.names = FALSE, col.names = FALSE, append = TRUE )

                sigma_vec[seed]   = theta_out[1]
                beta_vec[seed]    = theta_out[2]
                nu_vec[seed]      = theta_out[3]
                nuggets_vec[seed] = theta_out[4]

                MSE_vec[seed] <- mean( Data_test$z - z_out)^2
                RMSE_vec[seed] <- sqrt( mean(Data_test$z - z_out)^2)
                MAE_vec[seed] <- mean(abs(Data_test$z - z_out))
                MSLE_vec[seed] <- mean(log(abs( z_out + 1))-log(abs(Data_test$z + 1)))^2
                write.table( cbind( MSE_vec[seed], RMSE_vec[seed], MAE_vec[seed], MSLE_vec[seed]),
                            paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-", paste(THETA[theta,], "-", collapse = ""),
                                  n, "-boxplot_assessment.csv", sep = ""), sep = ",", row.names = FALSE, col.names = FALSE, append = TRUE )
                Time_vec[seed] <- mean(Time)
            },

            error = function(cond){
                message(paste("Error occure here"))
                message("Here's the original error message:")
                message(cond)
            })
        }

        print("Done\n")
        print("done all the seeds...........\n")

        AV_Time<-mean(Time)
        print(sigma_vec)
        print(beta_vec)
        print(nu_vec)      
        SV_variance = var(sigma_vec)
        SV_smoothness = var(beta_vec)
        SV_range = var(nu_vec)

        AV_variance = mean(variance_fisher_vec)
        AV_smoothness = mean(smoothness_fisher_vec)
        AV_range = mean(range_fisher_vec)

        TV_variance = mean(variance_fisher_true_vec)
        TV_smoothness = mean(smoothness_fisher_true_vec)
        TV_range = mean(range_fisher_vec_true)


        write.table( cbind(AV_variance, AV_smoothness, AV_range, SV_variance, SV_smoothness,  SV_range, TV_variance, TV_smoothness,
                           TV_range ), paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-", 
                           paste(THETA[theta,], "-", collapse=""),  n, "-fisher_info.csv", sep=""), 
                    sep = ",", row.names = FALSE, col.names = FALSE,append=TRUE )

        print("AV_Time (all the seeds) ")
        print(AV_Time)
        print(paste("Generate boxplots for theta = ", paste(THETA[theta,],  ",", collapse=" ")," and  seed = ", seed, "....." ))

        pdf(paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-", paste(THETA[theta,], "-", collapse=""),
                  n, "-boxplot_estimation.pdf", sep=""))

        # Plot paramter estimate boxplot.
        boxplot(sigma_vec, beta_vec, nu_vec, nuggets_vec,       
                las  = 1,
                main ="Paramter Estimate",
                names = expression(sigma^2,beta, nu, tau^2),
                medcol = "red", boxlty = 1, whisklty = 5,
                staplelwd = 7)
        dev.off()

        print("Done\n")
        print(paste("Generate boxplots for theta = ", paste(THETA[theta,],  ",", collapse=" ")," and  seed = ", seed, "....." ))
        pdf(paste("/tmp/exageostatr/", as.character(substitute(FUN)), "-", paste(THETA[theta,],  "-", collapse=""),n, "-boxplot_assessment.pdf", sep=""))

        boxplot(MSE_vec, RMSE_vec, MAE_vec, MSLE_vec, main="Prediction Error",las = 1,                         # Plot assessment values box plot.
                names = c("MSE","RMSE","MAE", "MSLE"),
                medcol = "red", boxlty = 1, whisklty = 5,
                staplelwd = 7 )
        dev.off()
        print("Done\n")
    }
    return(list(MSE_vec, RMSE_vec, MAE_vec, MSLE_vec))
}
ecrc/exageostatR documentation built on June 9, 2025, 9:06 p.m.