src_Case_Study/tests.R

# K = 1, b = 1, 1 dim case working

library(tidyverse)
library(tictoc)
library(maptools)
library(rgeos)
library(sp)
library(igraph)
library(mgcv)
library(RTriangle)
library(geoR)
library(fdagstat)
#For plots
library(fda)
library(fields)
#For parallel
library(foreach)
library(doParallel)
library(parallel)
library(furrr)
K<- b <- 2

centers_list <- get_centers_list(distances.list$graph.distance.complete,
                                 replications_number = b,
                                 replication_coef = coef,
                                 min_point_number =  nk_min,
                                 start_id = border.length +1,
                                 end_id = nsub,
                                 K  = K)

bootstrap_names <- get_bootstrap_names(b, K)
bootstrap_subdomains <- get_bootstrap_subdomains(centers_list, distances.list$dist_from_grid_to_point_graph,
                                                 distances.list$graph.distance, bootstrap_names , border.length  )
list_of_domains_by_id <- lapply(1:b, function(x) which(grepl(paste0('^',x,'_'), bootstrap_names$id)))

###
kergrid <- lapply(bootstrap_subdomains, function(x){
  return(distances.list$kernel_grid_points_dist[x$center_id, x$grid_point_id])
})


kriging <- rdd_kriging(data = data, grid = grid, model = model, ker, bootstrap_subdomains,
                       initial.variogram.list,
                       distances.list,
                       nugget_ini_user =nugget_ini  ,
                       sill_ini_user = sill_ini,
                       range_ini_user = range_ini,
                       tol = 1e6,
                       parallel = F,
                       type = 'foreach',
                       NU_cores = 0)

names(kriging) <- bootstrap_names$id

kriging_domain_results <- rdd_domain_results(kriging,
                                             bootstrap_subdomains,
                                             no.assg.grid = no.assg.grid,
                                             parallel = F,
                                             type = 'foreach')



aggregation_results <- rdd_aggregate_kriging(kriging_domain_results)


kriging_variance <- rdd_kriging_variance(kriging_domain_results)
bootstrap_variance <- rdd_bootstrap_variance(kriging_domain_results, aggregation_results, B = b)



bootstrap_variance[5:10]






data =  do.call('cbind',
                sapply(kriging[id],
                       function(x)
                         return(x$fpred$values))) %>%
  t() %>%
  'rownames<-'(sapply(bootstrap_subdomains[id],
                      '[',
                      "grid_point_id")
               %>% unlist(.)) %>%
  rbind(data.frame(matrix(NA,
                          nrow = length(no.assg.grid),
                          ncol = kriging[[1]]$fpred$values %>% nrow() )
  )  %>%
    'rownames<-'(no.assg.grid) %>%
    'colnames<-'(kriging[[1]]$fpred$values %>% rownames())
  , .) %>%
  .[order(rownames(.)
          %>% as.numeric()), ] %>%
  .



data <- sapply(kriging_domain_results, '[','data') %>%
  simplify2array(.) %>%
  apply(c(1, 2), mean, na.rm = T)




nugget_ini_user = NULL
sill_ini_user = NULL
range_ini_user = NULL
tol = 1e6
suppressMes = F
subdomain_kriging(v, data, model, grid, sFstat,kernel_data_points_dist, ker, subdomains_list, punto_lag,
                  nugget_ini_user, sill_ini_user, range_ini_user,  suppressMes, tol=tol)















subdomain_kriging <- function(v, data, modl, grid,  sFstat, kernel_data_points_dist, ker.width, subdomains_list, punto_lag,
                              nugget_ini_user = NULL,
                              sill_ini_user = NULL,
                              range_ini_user = NULL,
                              suppressMes = F,
                              tol = 1e6){

  nstr <- length(modl)
  model <- modl
  datak <- data.frame(data[subdomains_list$data_point_id, ])
  coordinates.datak <- datak[, 1:2]      # extract coordinates
  ### ------------- 1.1 Create object
  sFstatk  <-  fstat(NULL, 'Z1',
                     as.data.frame(coordinates.datak),
                     as.data.frame(t(datak[, -c(1, 2)])))
  sFstatk$KrigFormula <- sFstat$KrigFormula

  ### ------------- 1.2 Estimate empirical variogram
  if(ker.width == 0) {
    # no kernel is used
    sFstatk <- sFstat
    vkB <- vk <- sFstat$variogram
  }

  if(ker.width > 0) {
    # else compute the weighted estimator
    vkB <- sFstat$variogram

    KER <- kernel_data_points_dist[subdomains_list$center_id, !is.na(data$z1)]
    KER <- KER%*%t(KER)

    for(l in 1:dim(punto_lag)[1]) {
      inVh <- which(v$dist >= punto_lag[l, 1] &
                      v$dist < punto_lag[l, 2])
      indKER <- as.matrix(as.data.frame(v)[inVh, 6:7])
      vkB$gamma[l] <- sum(v$gamma[inVh]*KER[indKER])/(sum(KER[indKER]))
    }

    vkB <- vkB[complete.cases(vkB), ]
    sFstatk$variogram <- vkB
    vk <- vkB
  }

  ### ------------- 2.3 Fit valid model
  # Initialize variogram
  #**new**
  vm.ini <- ini_variopar(vk, nugget_ini_user, sill_ini_user, range_ini_user, nstr)
  vm <- vm.ini$vm
  sill_ini <- vm.ini$sill_ini
  nugget_ini <- vm.ini$nugget_ini
  range_ini <- vm.ini$range_ini
  #**end new**

  # Fit model
  #-- attempt 1
  sFstatk <- fitVariograms(sFstatk,
                           model = vm,
                           fitSills = TRUE,
                           forceNugget = FALSE)

  #-- attempt 2
  if(sFstatk$model$omni$Z1$psill[1] < 0 |
     sFstatk$model$omni$Z1$psill[2] < 0 |
     sFstatk$model$omni$Z1$range[2] > tol) {
    if(suppressMes == F){
      #print("Trying with fixed nugget")
    }
    sill_ini <- sill_ini+nugget_ini
    nugget_ini <- 0
    vm <- vgm(sill_ini, model[1], range_ini[1], nugget_ini[1])
    nstr <- length(model)
    if(nstr > 1){
      for(j in 2:nstr)
        vm <- vgm(psill = sill_ini,
                  model = model[j],
                  range = range_ini[j],
                  add.to = vm)
    }

    sFstatk.tmp <- try(fitVariograms(sFstatk,
                                     model = vm,
                                     fitSills = TRUE,
                                     forceNugget = TRUE))
    if(class(sFstatk.tmp)[1] == "try-error"){
      if(suppressMes == F)
        print('Warning: unable to fit the variogram, using the initial one')
      sFstatk$model$omni$Z1 <- vm
    } else sFstatk <- sFstatk.tmp
  }

  #-- use initial variogram model
  sFstatk$model$omni$Z1$range[which(sFstatk$model$omni$Z1$range < 0)] <- range_ini[which(sFstatk$model$omni$Z1$range < 0) - 1]
  if (sum(sFstatk$model$omni$Z1$psill) == 0 |
      sFstatk$model$omni$Z1$psill[1] < 0 |
      sFstatk$model$omni$Z1$psill[2] < 0 |
      sFstatk$model$omni$Z1$range[2] < 0 |
      is.null(sFstatk$model) ){
    if (suppressMes == F)
      print('Warning: unable to fit the variogram, using the initial one')
    sFstatk$model$omni$Z1 <- vm
  }

  # ------------- 2. Perform kriging within the neighborhood
  # Add covariance
  cov.tmp <- try(addCovariance(sFstatk, 'omni'))

  if (class(cov.tmp)[1] == "try-error") {
    if (suppressMes == F)
      print('Warning: unable to fit the variogram, using the initial one')
    sFstatk$model$omni$Z1 <- vm
    sFstatk <- addCovariance(sFstatk, 'omni')
  }

  if (class(cov.tmp)[1] != "try-error") {
    sFstatk <- cov.tmp
  }

  if (is.null(sFstatk$covariance)) {
    if (suppressMes == F)
      print('Warning: unable to fit the variogram, using the initial one')
    sFstatk$model$omni$Z1 <- vm
    sFstatk <- addCovariance(sFstatk, 'omni')
  }

  # Kriging
  pFstat.predictTr <- predictFstat(.g=sFstatk,
                                   .newCoordinates = data.frame(grid[subdomains_list$grid_point_id, ]),
                                   .what = 'Z1',
                                   .type = "OK")
  fpred <- c()
  vfitk <- sFstatk$model$omni$Z1

  if (length(subdomains_list$grid_point_id) > 0) {
    vfit_res_list <- list(val = c(vfitk[1,  2],
                                  vfitk[-1, 2],
                                  vfitk[-1, 3]),
                          nrow = length(subdomains_list$grid_point_id),
                          ncol = 1 + 2*nstr)

    ### don`t forget about transpose part`
    if (dim(pFstat.predictTr$Forecast)[1] > dim(data)[2] - 2) {
      fpred[['values']] <-  pFstat.predictTr$Forecast[-c(1, 2), ]
    }
    else {
      fpred[['values']] <- pFstat.predictTr$Forecast
    }
    fpred[['variance']] <- pFstat.predictTr$Variance
  }

  return(list(vfit = vfit_res_list, fpred = fpred ))
}

rdd_kriging <- function(data, grid, model, ker,
                        bootstrap_subdomains,
                        initial.variogram.list,
                        distances.list,
                        nugget_ini_user = NULL,
                        sill_ini_user = NULL,
                        range_ini_user = NULL,
                        tol = 1e6,
                        parallel = T, type = 'foreach', NU_cores = 0,
                        suppressMes = F){
  #Kriging

  v <- initial.variogram.list$v
  sFstat <- initial.variogram.list$sFstat
  punto_lag <- initial.variogram.list$punto_lag
  kernel_data_points_dist <- distances.list$kernel_data_points_dist
  model1 <- model

  if (parallel == T) {
    if (type == 'foreach') {

      detectCores()
      cl <- detectCores() - NU_cores

      doParallel::registerDoParallel(cl)
      kriging <- foreach(subdomains_list = bootstrap_subdomains,
                         .combine = list,
                         .export = c("v",'data','grid','sFstat','kernel_data_points_dist','ker','punto_lag','nugget_ini_user',
                                     'sill_ini_user','range_ini_user','subdomain_kriging', 'ini_variopar','suppressMes', 'model1'),
                         .packages = c('dplyr','fdagstat'),
                         .multicombine = T,
                         .maxcombine = length(bootstrap_subdomains))%dopar%{
                           subdomain_kriging(v, data, modl = model1, grid, sFstat,kernel_data_points_dist, ker, subdomains_list, punto_lag,
                                             nugget_ini_user, sill_ini_user, range_ini_user,  suppressMes, tol=tol)
                         }

      doParallel::stopImplicitCluster()
    }
    else {
      plan(multiprocess)
      kriging <- furrr::future_map(bootstrap_subdomains,
                                   function(subdomains_list){
                                     subdomain_kriging(v, data, model, grid, sFstat,kernel_data_points_dist, ker, subdomains_list, punto_lag,
                                                       nugget_ini_user, sill_ini_user, range_ini_user, suppressMes = F, tol = tol)
                                   })

    }
  }

  if (parallel == F) {
    kriging <- purrr::map(bootstrap_subdomains,
                          function(subdomains_list){
                            subdomain_kriging(v, data, model, grid, sFstat,kernel_data_points_dist, ker, subdomains_list, punto_lag,
                                              nugget_ini_user, sill_ini_user, range_ini_user, suppressMes = F, tol = tol)
                          })
  }

  return(kriging)
}





kriging <- rdd_kriging(data = data, grid = grid, model = 'Exp', ker, bootstrap_subdomains,
                       initial.variogram.list,
                       distances.list,
                       nugget_ini_user =nugget_ini  ,
                       sill_ini_user = sill_ini,
                       range_ini_user = range_ini,
                       tol = 1e6,
                       parallel = T,
                       type = 'foreach',
                       NU_cores = 0)





library(profvis)

NU_cores <- 0
no.assg.grid <- distances.list$no.assg.grid
id <- list_of_domains_by_id[[1]]


tic()
profvis({detectCores()
  cl <- detectCores() - NU_cores

  doParallel::registerDoParallel(cl)
  kriging_domains_results <- foreach(id = list_of_domains_by_id,
                                     .combine = list,
                                     .export = c('kriging', 'bootstrap_subdomains','kriging_domains_aggregation','no.assg.grid'),
                                     .packages = c('dplyr',"magrittr"),
                                     .multicombine = T,
                                     .maxcombine = length(bootstrap_names$id))%dopar%{
                                       return(kriging_domains_aggregation(id, bootstrap_subdomains, kriging, no.assg.grid ))
                                     }

  doParallel::stopImplicitCluster()})
toc()


library(tictoc)
tic()
profvis({
  kriging_domains_results <- foreach(id = list_of_domains_by_id,
                                     .combine = list,
                                     .export = c('kriging', 'bootstrap_subdomains','kriging_domains_aggregation','no.assg.grid'),
                                     .packages = c('dplyr',"magrittr"),
                                     .multicombine = T,
                                     .maxcombine = length(bootstrap_names$id))%dopar%{
                                       return(kriging_domains_aggregation(id, bootstrap_subdomains, kriging, no.assg.grid ))
                                     }

})
toc()
rm(kriging_domains_results)

tic()
profvis({
  kriging_domains_results <- purrr::map(list_of_domains_by_id,
                                        function(id){
                                          kriging_domains_aggregation(id, bootstrap_subdomains, kriging, no.assg.grid )
                                        })
})
toc()


v <- initial.variogram.list$v
sFstat <- initial.variogram.list$sFstat
punto_lag <- initial.variogram.list$punto_lag
kernel_data_points_dist <- distances.list$kernel_data_points_dist
nugget_ini_user <- NULL
range_ini_user <- NULL
sill_ini_user <- NULL
suppressMes <- F
tol = 1e6
cl <- detectCores() - NU_cores

doParallel::registerDoParallel(cl)

tic()
profvis({

      kriging <- foreach(subdomains_list = bootstrap_subdomains,
                         .combine = list,
                         .export = c("v",'data', 'model','grid','sFstat','kernel_data_points_dist','ker','punto_lag','nugget_ini_user',
                                     'sill_ini_user','range_ini_user', 'ini_variopar','suppressMes','subdomain_kriging'),
                         .packages = c('dplyr','fdagstat'),
                         .multicombine = F,
                         .maxcombine = length(bootstrap_subdomains))%dopar%{
                           subdomain_kriging(v, data,model, grid, sFstat,kernel_data_points_dist, ker, subdomains_list, punto_lag,
                                             nugget_ini_user, sill_ini_user, range_ini_user,  suppressMes, tol=tol)
                         }

})
toc()



detectCores()
cl <- detectCores() - NU_cores

doParallel::registerDoParallel(cl)
tic()
type = 'foreach'
parallel = T
v <- initial.variogram.list$v
sFstat <- initial.variogram.list$sFstat
punto_lag <- initial.variogram.list$punto_lag
kernel_data_points_dist <- distances.list$kernel_data_points_dist

tic()
profvis({


  if (parallel == T) {
    if (type == 'foreach') {

      kriging <- foreach(subdomains_list = bootstrap_subdomains,
                         .combine = list,
                         .export = c("v",'data', 'model','grid','sFstat','kernel_data_points_dist','ker','punto_lag','nugget_ini_user',
                                     'sill_ini_user','range_ini_user', 'ini_variopar','suppressMes','subdomain_kriging'),
                         .packages = c('dplyr','fdagstat'),
                         .multicombine = T,
                         .maxcombine = length(bootstrap_subdomains))%dopar%{
                           subdomain_kriging(v, data,model, grid, sFstat,kernel_data_points_dist, ker, subdomains_list, punto_lag,
                                             nugget_ini_user, sill_ini_user, range_ini_user,  suppressMes, tol=tol)
                         }

    }
  }
})
toc()
doParallel::stopImplicitCluster()
library(profvis)
tic()
profvis({

  kriging <- purrr::map(bootstrap_subdomains,
                        function(subdomains_list){
                          subdomain_kriging(v, data, model, grid, sFstat,kernel_data_points_dist, ker, subdomains_list, punto_lag,
                                            nugget_ini_user, sill_ini_user, range_ini_user, suppressMes = F, tol = tol)
                        })
})

toc()

a <- sapply(kriging_domain_results, '[', 'variance') %>% do.call('rbind', .)

aggreagated_results[]

fpred.d <- t(clr2density.mv(t(aggregated_results), t, t_step))
clr2density.mv <- function(clr.df, z,z_step){
  # transform a dataset of clr into densities

  N_samples <- dim(clr.df)[2]
  dens.df <- matrix(0, nrow = length(z), ncol = N_samples)

  for(j in 1:N_samples) {
    dens.df[,j] <- clr2density(clr.df[,j], z, z_step)
  }
  return(dens.df)
}




nstr <- length(model)

class(data)
datak <- data.frame(data[subdomains_list$data_point_id, ])
coordinates.datak <- datak[, 1:2]      # extract coordinates
### ------------- 1.1 Create object
sFstatk  <-  fstat(NULL, 'Z1',
                   as.data.frame(coordinates.datak),
                   as.data.frame(t(datak[, -c(1, 2)])))
sFstatk$KrigFormula <- sFstat$KrigFormula

### ------------- 1.2 Estimate empirical variogram
if(ker.width == 0) {
  # no kernel is used
  sFstatk <- sFstat
  vkB <- vk <- sFstat$variogram
}

if(ker.width > 0) {
  # else compute the weighted estimator
  vkB <- sFstat$variogram

  KER <- kernel_data_points_dist[subdomains_list$center_id, !is.na(data$z1)]
  KER <- KER%*%t(KER)

  for(l in 1:dim(punto_lag)[1]) {
    inVh <- which(v$dist >= punto_lag[l, 1] &
                    v$dist < punto_lag[l, 2])
    indKER <- as.matrix(as.data.frame(v)[inVh, 6:7])
    vkB$gamma[l] <- sum(v$gamma[inVh]*KER[indKER])/(sum(KER[indKER]))
  }

  vkB <- vkB[complete.cases(vkB), ]
  sFstatk$variogram <- vk <- vkB
}

### ------------- 2.3 Fit valid model
# Initialize variogram
#**new**
vm.ini <- ini_variopar(vk, model, nugget_ini_user, sill_ini_user, range_ini_user, nstr)
vm <- vm.ini$vm
sill_ini <- vm.ini$sill_ini
nugget_ini <- vm.ini$nugget_ini
range_ini <- vm.ini$range_ini
#**end new**

# Fit model
#-- attempt 1
sFstatk <- fitVariograms(sFstatk,
                         model = vm,
                         fitSills = TRUE,
                         forceNugget = FALSE)

#-- attempt 2
if(sFstatk$model$omni$Z1$psill[1] < 0 |
   sFstatk$model$omni$Z1$psill[2] < 0 |
   sFstatk$model$omni$Z1$range[2] > tol) {
  if(suppressMes == F){
    #print("Trying with fixed nugget")
  }
  sill_ini <- sill_ini+nugget_ini
  nugget_ini <- 0
  vm <- vgm(sill_ini, model[1], range_ini[1], nugget_ini[1])
  nstr <- length(model)
  if(nstr > 1){
    for(j in 2:nstr)
      vm <- vgm(psill = sill_ini,
                model = model[j],
                range = range_ini[j],
                add.to = vm)
  }

  sFstatk.tmp <- try(fitVariograms(sFstatk,
                                   model = vm,
                                   fitSills = TRUE,
                                   forceNugget = TRUE))
  if(class(sFstatk.tmp)[1] == "try-error"){
    if(suppressMes == F)
      print('Warning: unable to fit the variogram, using the initial one')
    sFstatk$model$omni$Z1 <- vm
  } else sFstatk <- sFstatk.tmp
}

#-- use initial variogram model
sFstatk$model$omni$Z1$range[which(sFstatk$model$omni$Z1$range < 0)] <- range_ini[which(sFstatk$model$omni$Z1$range < 0) - 1]
if (sum(sFstatk$model$omni$Z1$psill) == 0 |
    sFstatk$model$omni$Z1$psill[1] < 0 |
    sFstatk$model$omni$Z1$psill[2] < 0 |
    sFstatk$model$omni$Z1$range[2] < 0 |
    is.null(sFstatk$model) ){
  if (suppressMes == F)
    print('Warning: unable to fit the variogram, using the initial one')
  sFstatk$model$omni$Z1 <- vm
}

# ------------- 2. Perform kriging within the neighborhood
# Add covariance
cov.tmp <- try(addCovariance(sFstatk, 'omni'))

if (class(cov.tmp)[1] == "try-error") {
  if (suppressMes == F)
    print('Warning: unable to fit the variogram, using the initial one')
  sFstatk$model$omni$Z1 <- vm
  sFstatk <- addCovariance(sFstatk, 'omni')
}

if (class(cov.tmp)[1] != "try-error") {
  sFstatk <- cov.tmp
}

if (is.null(sFstatk$covariance)) {
  if (suppressMes == F)
    print('Warning: unable to fit the variogram, using the initial one')
  sFstatk$model$omni$Z1 <- vm
  sFstatk <- addCovariance(sFstatk, 'omni')
}

# Kriging
pFstat.predictTr <- predictFstat(.g=sFstatk,
                                 .newCoordinates = data.frame(grid[subdomains_list$grid_point_id, ]),
                                 .what = 'Z1',
                                 .type = "OK")
fpred <- c()
vfitk <- sFstatk$model$omni$Z1

if (length(subdomains_list$grid_point_id) > 0) {
  vfit_res_list <- list(val = c(vfitk[1,  2],
                                vfitk[-1, 2],
                                vfitk[-1, 3]),
                        nrow = length(subdomains_list$grid_point_id),
                        ncol = 1 + 2*nstr)

  if (dim(pFstat.predictTr$Forecast)[1] > dim(data)[2] - 2) {
    fpred[['values']] <-  pFstat.predictTr$Forecast[-c(1, 2), ]
  }
  else {
    fpred[['values']] <- pFstat.predictTr$Forecast
  }
  fpred[['variance']] <- pFstat.predictTr$Variance
}

return(list(vfit = vfit_res_list, fpred = fpred ))



datak[, -c(1, 2)]%>% t() %>% as.data.frame()



kerfn= function(newdata,center,dist='euclidean',ker.type='Gau',param, distance.matrix = NULL)
{
  # This function compute the value of the kernel for a point given a reference center
  # Input:
  # newdata = coordinates of the point
  # center = coordinates of the reference center
  # dist = method to compute distances ('euclidean' or 'graph.distance')
  #
  # ker.type = type of kernel (only Gaussian kernel implemented so far)
  # param = parameters that define the kernel
  # distance.matrix = 1. if the point is an observed location (data point)
  #                      distance.matrix = graph.distance nsub*nsub
  #                                        (the (i,j) element is the length of the shortest path between the observed points i and j)
  #                   2. if the point is an unobserved location (grid point)
  #                      distance.matrix = graph.distance.grid.centers K*ngrid
  #                                        (the (k,l) element is the distance on the graph between the k-th center and the l-th grid point)
  # Output:
  # value of the kernel function
  center = as.numeric(center)
  if(dist == 'euclidean'){
    d=as.matrix(dist(rbind(center[1:2],newdata), 'eucl'))[1,-1]
  }

  if(dist == 'graph.distance'){
    # d = k-th row of the distance matrix
    d = distance.matrix[as.integer(center[3]),]
  }

  if(ker.type!='Gau')
  {
    print('Error: only Gaussian kernel implemented so far (*Gau*)')
  }
  eps=param[1]
  return(exp(-1/(2*eps^2)*(d^2)))
}


kernel_data_points_dist <- vapply(graph.distance.complete, function(x){
  exp(-1/(2*ker^2)*(x^2))
}, numeric(1)) %>%
  matrix(dim(graph.distance.complete))
alexdidkovskyi/RDDOOK3 documentation built on Oct. 16, 2019, 1:35 p.m.