R/RDD-OOK_Parallel_f.R

Defines functions is.natural create.trvcloud ini_variopar get_centers_matrix

# Check if natural
is.natural <- function(x, tol = .Machine$double.eps^0.5) {
  # This function checks is the input natural
  (abs(Im(x)) < tol) &&
    (abs(Re(x)) > tol) &&
    isTRUE(all.equal(x, round(x),
                     tolerance=tol,
                     check.attributes=FALSE,
                     check.names=FALSE))
}

##### Create.trvcloud
create.trvcloud <- function(data,
                            n1,
                            t_step) {
  # This function creates initial trvcloud (variogram structure)
  # Input:
  # data - data matrix with locations and values
  # n1 - number of points
  # t_step - step
  # Output:
  # v - initial variogram structure
  data.aux <- data[, 1:3]
  coordinates(data.aux) <- c('x','y')
  L2norm <- matrix(NA, n1, n1)
  for(i in 1:n1) {
    for(j in i:n1) {
      L2norm[i, j] <- trapzc(t_step = t_step, y = (data[, -(1:2)][, i] - data[, -(1:2)][, j])^2)
      L2norm[j, i] <- L2norm[i, j]
    }
  }
  # This is just used to build the variogram structure
  v <- variogram(z1 ~ 1, data = data.aux, cloud = T)
  v$gamma <- diag(L2norm[data.frame(v)$left, data.frame(v)$right])
  return(v)
}


ini_variopar <- function(v,
                         model,
                         nugget_ini_user,
                         sill_ini_user,
                         range_ini_user,
                         nstr = 1) {
  # This function updates initial variogram and nugget, sill, range in case of NULL input
  # INPUT:
  # v - variogram
  # model - model type
  # nugget_ini_user, sill_ini_user, range_ini_user - initial values
  # nstr - number of models
  # OUTPUT:
  # v - variogram
  # nugget_ini_user, sill_ini_user, range_ini_user - updated values

  # If the initial parameters are not given, estimate from the data
  if (is.null(nugget_ini_user)) {
    nugget_ini <- .1*median(c(rep(v$gamma[1], v$np[1]),
                              rep(v$gamma[2], v$np[2])))
  }

  if (is.null(sill_ini_user)) {
    sill_ini <- median(unlist(purrr::map2(v$gamma[(length(v$gamma)-1):(length(v$gamma)-4)],
                                          v$np[(length(v$gamma)-1):(length(v$gamma)-4)], rep))) - nugget_ini
  }

  if (is.null(range_ini_user)) {
    if (nstr > 1) {
      # if nested structure are given, initial ranges are asked to the user
      print("Please provide initial range for variogram estimation")
      return(-1)
    }

    if (model == 'Exp') {
      range_ini <- v$dist[min(which(v$gamma > .95*(sill_ini + nugget_ini)))]/3
    } else {
      range_ini <- v$dist[min(which(v$gamma > (sill_ini + nugget_ini)))]
    }
  }

  # Build initial variogram
  vm <- vgm(sill_ini, model[1], range_ini[1], nugget_ini[1])
  nstr <- length(model)

  if (nstr > 1) {
    for (j in 2:nstr)
      vm <- vgm(psill = sill_ini,
                model = model[j],
                range = range_ini[j],
                add.to = vm)
  }

  return(list(vm = vm,
              sill_ini = sill_ini,
              range_ini = range_ini,
              nugget_ini = nugget_ini))
}

get_centers_matrix <- function(graph.distance.complete,
                               replications_number,
                               replication_coef,
                               min_point_number,
                               start_id,
                               end_id, K) {
  # This function provides centers of subdomains for a given number of subdomains and bootstrap iterations
  # INPUT:
  # graph.distance.complete - graph distance between points
  # replication number - number of bootstrap iterations
  # replication coef - additional replications
  # min_point_number - min size of subdomain
  # start_id - initial id of observations
  # end_id - last id of observations
  # K - number of subdomaines
  # OUTPUT:
  # centers_list - matrix (replication_number*K), rows consist of centers of subdomains for a bootstrap iteration



    if (K < 1 | abs(is.natural(K) == F )) {
      'Error! K have to be natural number'
    } else {
      if (K == 1)  {
        centers_list <- matrix(start_id + 10, 1, 1)
      } else {
        centers_list <- replicate(replication_coef*replications_number, sample(start_id:end_id, size = K) )

        df_temp <- centers_list %>%
          as.data.frame() %>%
          gather() %>%
          .[, 2]  %>%
          unname() %>%
          graph.distance.complete[., start_id:end_id] %>%
          as.data.frame(.)%>%
          mutate(id = ((1:nrow(.) -1) %/% K ) + 1) %>%
          group_by(id) %>%
          summarise_all(which.min) %>%
          as.data.frame() %>%
          ungroup() %>%
          dplyr::select(-id) %>%
          apply(., 1, table) %>%
          {.} >= nk_min

        a <- which(colSums( df_temp) == K)
        if (length(a) > replications_number) {
          a <- a[1:replications_number]
        }

        centers_list <- centers_list[, a]

        return(centers_list)
      }
    }
  }

kriging_domain_aggregation <- function(id,
                                       bootstrap_subdomains,
                                       kriging,
                                       no.assg.grid){
  # This function aggregates results of kriging in each subdomain
  # INPUT:
  # id - id of bootstrap iteration
  # bootstrap_subdomains - subdomain related grid points - id
  # kriging - kriging results
  # no.assg.grid - ids of grid points which are not in domain
  # OUTPUT:
  # list of kriging predictions and kriging variances
  if (length(bootstrap_subdomains) == 1) {
    return(list(data = kriging[[1]]$fpred$values %>% t() %>%
                  rbind(matrix(NA,
                               nrow = length(no.assg.grid),
                               ncol = kriging[[1]]$fpred$values %>% nrow()
                  )  %>%
                    'rownames<-'(no.assg.grid) %>%
                    'colnames<-'(kriging[[1]]$fpred$values %>% rownames())
                  , .) %>%
                  .[order(rownames(.)
                          %>% as.numeric()), ],
                variance = kriging[[1]]$fpred$variance %>%
                  c(., rep(NA, length(no.assg.grid))) %>%
                  'names<-'(c(sapply(bootstrap_subdomains[id],
                                     '[',
                                     "grid_point_id") %>%
                                unlist(.), no.assg.grid))%>%
                  .[order(names(.) %>%
                            as.numeric())]))
  } else {
  return( list(data =  do.call('cbind',
                               sapply(kriging[id],
                                               function(x)
                                                 return(x$fpred$values))) %>%
                       t() %>%
                       'rownames<-'(sapply(bootstrap_subdomains[id],
                                           '[',
                                           "grid_point_id")
                                    %>% unlist(.)) %>%
                 rbind(matrix(NA,
                                         nrow = length(no.assg.grid),
                                         ncol = kriging[[1]]$fpred$values %>% nrow()
                                  )  %>%
                         'rownames<-'(no.assg.grid) %>%
                         'colnames<-'(kriging[[1]]$fpred$values %>% rownames())
                       , .) %>%
                 .[order(rownames(.)
                         %>% as.numeric()), ],
                     variance = sapply(kriging[id], function(x) return(x$fpred$variance)) %>%
                       unlist(.) %>% c(., rep(NA, length(no.assg.grid))) %>%
                       'names<-'(c(sapply(bootstrap_subdomains[id],
                                        '[',
                                        "grid_point_id") %>%
                                   unlist(.), no.assg.grid))%>%
                       .[order(names(.) %>%
                                 as.numeric())]))
  }
}

distance.on.graph.constrained <- function(data,
                                          mesh){
  # INPUT
  # data = data matrix (nsub*3 dim)
  # mesh = mesh obtained with Delaunay triangulation (with triangulate)
  # OUTPUT
  # graph.distance = nsub*nsub symmetric matrix (element (i,j) is the shortest length path between i and j on the graph defined by the mesh)

  # Edges list : edge.num * 2 matrix (each row contains the indeces of the nodes i and j defining the edge e = {ij})
  edge.list <- mesh$E
  edge.num <- dim(edge.list)[1]

  # Construct the graph (undirected)
  graph <- graph_from_edgelist(edge.list, directed = FALSE)

  # Given two linked nodes i and j the weigth of edge (i,j) is:
  # w(i,j) = euclidian distance (i, j)

  edge.weights <- apply(edge.list, 1, function(i) {
    dist(rbind(mesh$P[i[1], 1:2],
               mesh$P[i[2], 1:2]),
         method = 'euclidean')
  })

  graph.distance <- distances(graph, v = V(graph),
                              to = V(graph), mode = c("all"),
                              weights = edge.weights,
                              algorithm = c("automatic"))
  return(graph.distance)
}

grid2triangle.constrained <- function(grid.matrix,
                                      data.matrix,
                                      mesh){
  # INPUT:
  # grid : ngrid*2 matrix (the i-th row contains the coordianates of the i-th grid point)
  # data : nsub*2 matrix (the i-th row contains the coordinates of the i-th data point)
  # mesh : mesh obtaines with Delauny Triangulation (using create.MESH.2D function)
  # OUTPUT:
  # assign.matrix : ngrid*num.triangles (the (i,j) element = TRUE if the point grid[i,] is inside the j-th triangle, FALSE otherwise)
  # no.assg.complete :indices of grid points that the function does not assign to any triangle
  num.triangles <- dim(triangles)[1]
  ngrid <- dim(grid.matrix)[1]

  assign.matrix <- apply(mesh$T,
                         1,
                         function(i){
                           in.out(bnd = as.matrix(mesh$P[i, 1:2]),
                                  x = as.matrix(grid.matrix[, 1:2]))})

  no.assg.complete <- which(rowSums(assign.matrix) == 0)
  output <- list(assign.matrix, no.assg.complete)

  return(output)
}

get_bootstrap_names <- function(b, K){
  # This function provides names for each bootrstrap iteration
  # b - number of bootstrap iterations
  # K - number of subdomains
  # INPUT:
  # id of bootstrap_iteration
  return(expand.grid(1:b, 1:K) %>%
    arrange(Var1) %>%
    mutate(id = paste(Var1, Var2, sep = '_')) %>%
    dplyr::select(id))
}

get_bootstrap_subdomains <- function(centers_list,
                                     dist_from_grid_to_point_graph,
                                     graph.distance,
                                     bootstrap_names,
                                     border.length){
  # This function provides center of subdomain, related data points and grid points
  # INPUT:
  # centers list - matrix of centers for each bootstrap subdomains
  # dist_from_grid_to_point_graph - matrix of distances between grid points and data points
  # graph.distance - distance between data points
  # bootstrap_names - vec of names of bootstrap iterations
  # boorder.length - data border length
  # OUTPUT:
  # lists of subdomain centers, grid points and data points related to the center


  if (nrow(bootstrap_names) == 1) {
    bootstrap_subdomains <- list(list(
                                (border.length + 1):nrow(graph.distance),
                                which(!is.na(dist_from_grid_to_point_graph[1, ])), centers_list[1, 1]))

  } else {
    bootstrap_subdomains <- flatten(apply(centers_list, 2, function(temp_centers){
        grid_points_2_center <- apply(as.matrix(dist_from_grid_to_point_graph[temp_centers, ]), 2, which.min)
        grid_points_2_center[sapply(grid_points_2_center, function(x) length(x) == 0)] <- NA
        grid_points_2_center <- unlist(grid_points_2_center)

        df_grid_points_2_center <- data.frame(id = 1:length(grid_points_2_center),
                                             val = grid_points_2_center) %>%
                                   split.data.frame(.$val, drop = T) %>%
                                   sapply(., '[','id')

    data_points_2_center <- apply(graph.distance[temp_centers, ], 2 , which.min) %>% unlist()
    df_data_points_2_center <- data.frame(id = 1:length(data_points_2_center),
                                          val = data_points_2_center) %>%
                               filter(id > border.length) %>%
                               split.data.frame(.$val ) %>%
                               sapply(., '[','id')

    list_ret <- Map(list, df_data_points_2_center, df_grid_points_2_center, temp_centers)
    return(list_ret)
  }))
  }

  names(bootstrap_subdomains) <- bootstrap_names$id

  for (i in 1:length(bootstrap_subdomains)){
    names(bootstrap_subdomains[[i]]) <- c('data_point_id','grid_point_id', 'center_id')
  }
  return(bootstrap_subdomains)
}

subdomain_kriging <- function(v,
                              data,
                              model,
                              grid,
                              sFstat,
                              kernel_data_points_dist,
                              ker.width,
                              subdomains_list,
                              punto_lag,
                              nugget_ini_user = NULL,
                              sill_ini_user = NULL,
                              range_ini_user = NULL,
                              suppressMes = F,
                              tol = 1e6){
  # This function provides kriging for a particular subdomain
  # INPUT:
  # v - initial variogram
  # model - kriging model
  # grid - grid points locations
  # ...
  # OUTPUT:
  # list variogram fit values and grid predictoins

  nstr <- length(model)
  datak <- data[subdomains_list$data_point_id, ]
  coordinates.datak <- datak[, 1:2]      # extract coordinates
  ### ------------- 1.1 Create object
  sFstatk  <-  fstat(NULL, 'Z1',
                     coordinates.datak,
                     datak[, -c(1, 2)] %>% t() %>% as.data.frame())
  sFstatk$KrigFormula <- sFstat$KrigFormula

  ### ------------- 1.2 Estimate empirical variogram
  if(ker.width == 0) {
    # no kernel is used
    sFstatk <- sFstat
    vkB <- vk <- sFstat$variogram
  }

  if(ker.width > 0) {
    # else compute the weighted estimator
    vkB <- sFstat$variogram

    KER <- kernel_data_points_dist[subdomains_list$center_id, 1:nrow(data)]
    KER <- KER%*%t(KER)

    for(l in 1:dim(punto_lag)[1]) {
      inVh <- which(v$dist >= punto_lag[l, 1] &
                      v$dist < punto_lag[l, 2])
      indKER <- as.data.frame(v)[inVh, 6:7]%>% as.matrix()
      vkB$gamma[l] <- sum(v$gamma[inVh]*KER[indKER])/(sum(KER[indKER]))
    }

    vkB <- vkB[complete.cases(vkB), ]
    sFstatk$variogram <- vk <- vkB
  }

  ### ------------- 2.3 Fit valid model
  # Initialize variogram
  #**new**
  vm.ini <- ini_variopar(vk, model, nugget_ini_user, sill_ini_user, range_ini_user, nstr)
  vm <- vm.ini$vm
  sill_ini <- vm.ini$sill_ini
  nugget_ini <- vm.ini$nugget_ini
  range_ini <- vm.ini$range_ini
  #**end new**

  # Fit model
  #-- attempt 1
  sFstatk <- fitVariograms(sFstatk,
                           model = vm,
                           fitSills = TRUE,
                           forceNugget = FALSE)

  #-- attempt 2
  if(sFstatk$model$omni$Z1$psill[1] < 0 |
     sFstatk$model$omni$Z1$psill[2] < 0 |
     sFstatk$model$omni$Z1$range[2] > tol) {
    if(suppressMes == F){
      #print("Trying with fixed nugget")
    }
    sill_ini <- sill_ini+nugget_ini
    nugget_ini <- 0
    vm <- vgm(sill_ini, model[1], range_ini[1], nugget_ini[1])
    nstr <- length(model)
    if(nstr > 1){
      for(j in 2:nstr)
        vm <- vgm(psill = sill_ini,
                  model = model[j],
                  range = range_ini[j],
                  add.to = vm)
    }

    sFstatk.tmp <- try(fitVariograms(sFstatk,
                                     model = vm,
                                     fitSills = TRUE,
                                     forceNugget = TRUE))
    if(class(sFstatk.tmp)[1] == "try-error"){
      if(suppressMes == F)
        print('Warning: unable to fit the variogram, using the initial one')
      sFstatk$model$omni$Z1 <- vm
    } else sFstatk <- sFstatk.tmp
  }

  #-- use initial variogram model
  sFstatk$model$omni$Z1$range[which(sFstatk$model$omni$Z1$range < 0)] <- range_ini[which(sFstatk$model$omni$Z1$range < 0) - 1]
  if (sum(sFstatk$model$omni$Z1$psill) == 0 |
      sFstatk$model$omni$Z1$psill[1] < 0 |
      sFstatk$model$omni$Z1$psill[2] < 0 |
      sFstatk$model$omni$Z1$range[2] < 0 |
      is.null(sFstatk$model) ){
    if (suppressMes == F)
      print('Warning: unable to fit the variogram, using the initial one')
    sFstatk$model$omni$Z1 <- vm
  }

  # ------------- 2. Perform kriging within the neighborhood
  # Add covariance
  cov.tmp <- try(addCovariance(sFstatk, 'omni'))

  if (class(cov.tmp)[1] == "try-error") {
    if (suppressMes == F)
      print('Warning: unable to fit the variogram, using the initial one')
    sFstatk$model$omni$Z1 <- vm
    sFstatk <- addCovariance(sFstatk, 'omni')
  }

  if (class(cov.tmp)[1] != "try-error") {
    sFstatk <- cov.tmp
  }

  if (is.null(sFstatk$covariance)) {
    if (suppressMes == F)
      print('Warning: unable to fit the variogram, using the initial one')
    sFstatk$model$omni$Z1 <- vm
    sFstatk <- addCovariance(sFstatk, 'omni')
  }

  # Kriging
  pFstat.predictTr <- predictFstat(.g=sFstatk,
                                   .newCoordinates = grid[subdomains_list$grid_point_id, ],
                                   .what = 'Z1',
                                   .type = "OK")
  fpred <- c()
  vfitk <- sFstatk$model$omni$Z1

  if (length(subdomains_list$grid_point_id) > 0) {
    vfit_res_list <- list(val = c(vfitk[1,  2],
                                  vfitk[-1, 2],
                                  vfitk[-1, 3]),
                          nrow = length(subdomains_list$grid_point_id),
                          ncol = 1 + 2*nstr)

    if (dim(pFstat.predictTr$Forecast)[1] > dim(data)[2] - 2) {
      fpred[['values']] <-  pFstat.predictTr$Forecast[-c(1, 2), ]
    }
    else {
      fpred[['values']] <- pFstat.predictTr$Forecast
    }
    fpred[['variance']] <- pFstat.predictTr$Variance
  }

  return(list(vfit = vfit_res_list, fpred = fpred ))
}
alexdidkovskyi/RDDOOK3 documentation built on Oct. 16, 2019, 1:35 p.m.