src_Case_Study/Case_Study_package.R

### CASE STUDY DATA LOADING
library(devtools)
install_github("alexdidkovskyi/RDDOOK3")
library(RDDOOK3)
library(tictoc)
path_to_data <- c('data/Case study/Initial Data/')
data.file.full <- read.table(file = paste0(path_to_data,'clrPDFs_smoothedBsplines.txt'), header=TRUE)
t <- data.file.full[, 1]
lepdf.smsp <- data.file.full[, -1]

coords.file <- read.table(file = paste0(path_to_data,'CoordPDFs_NA.txt'), header = TRUE)[, 1:2] # this contains also coords of NA data
data.NA <- matrix(NA, ncol = dim(lepdf.smsp)[1], nrow = nrow(coords.file) - ncol(lepdf.smsp))
data.file <- data.frame(coords.file, rbind(data.NA, t(lepdf.smsp))) # this contains also NA data
#data.file <- data.file[, 1:3]
rm(data.NA)
nsub <- dim(data.file)[1]


# Upload densities
epdf.smsp <- read.table(paste0(path_to_data,'PDFs_smoothedBsplines.txt'), header = TRUE)[,-1]
n1 <- dim(epdf.smsp)[2] #Number of observations
t_step <- t[2] - t[1]

# Upload the grid
grid <- read.table(file = paste0(path_to_data,'Grid2Predict.txt'), header = TRUE)
grid <- data.frame(grid)
colnames(grid) <- c('x','y')
ngrid <- dim(coordinates(grid))[1] #Number of grid points

# Upload binary matrix describing the domain
in.poly.matrix <- as.matrix(read.table(paste0(path_to_data,'InPolyMatrix.txt'))) #Matrix that describes the domain


# Upload shape
shape <- readShapePoly(paste0(path_to_data,'estuaries.shp'))
shape1 <- shape[shape@data$ESTUARY != "James River" &
                  shape@data$ESTUARY != "VA Coastal Bays" &
                  shape@data$ESTUARY != "Delaware Bay" &
                  shape@data$ESTUARY != "Chincoteague Bay",]
shape <- gUnaryUnion(shape1) #final shape

# Points (NA) defining the borders
border.index <- which(is.na(data.file[, 3])) #border coordinates
border.coordinates <- data.file[border.index, 1:2]
border.length <- length(border.index)

rm(border.index, border.coordinates, shape1)
#______________________________________________________________________________________
### Define Parameters (User)
aggregation.type <- 'equal'
spdist.vector <- c('graph.distance')
spdist <- spdist.vector[1]
ker <- 0.75
model <- c('Exp') #VGM
nk_min <- 2
K <- 16
b <- 1000
coef <- 10
sill_ini <- range_ini <- nugget_ini <- NULL
n <- dim(lepdf.smsp)[1]
max_triangle_area = 0.01
type = 'foreach'


data <- data.frame(data.file)
colnames(data) <- c('x', 'y', paste0('z', 1:(ncol(data)-2)))
#1st-step - get mesh
mesh <- get_mesh(data.file, border.length, max_triangle_area)
distances_list <- get_distances_and_kernels(data.file, grid, mesh)
initial_variogram_list <- get_initial_variogram(data, border.length, t_step, nrow(data))
tic()
p <- Sys.time()
#Here we define centers list
centers_matrix <- get_centers_matrix(distances_list$graph.distance.complete,
                                     replications_number = b,
                                     replication_coef = coef,
                                     min_point_number =  nk_min,
                                     start_id = border.length +1,
                                     end_id = nsub,
                                     K  = K)


bootstrap_names <- get_bootstrap_names(b, K)
bootstrap_subdomains <- get_bootstrap_subdomains(centers_matrix, distances_list$dist_from_grid_to_point_graph,
                                                 distances_list$graph.distance, bootstrap_names, border.length)

list_of_subdomains_by_bootstrap_iteration <- lapply(1:b, function(x) which(grepl(paste0('^',x,'_'), bootstrap_names$id)))

###
kergrid <- lapply(bootstrap_subdomains, function(x){
  return(distances_list$kernel_grid_points_dist[x$center_id, x$grid_point_id])
})

#saveRDS(tibble::lst(data, grid, model, ker, bootstrap_subdomains,
#                    initial_variogram_list,   distances_list,
#                    nugget_ini_user = nugget_ini,
#                    sill_ini_user = sill_ini,
#                    range_ini_user = range_ini,
#                    tol = 1e6,
#                    parallel = T,
#                    type = 'foreach',  tol = 1e6,
#                    NU_cores = 0),
#        'initial_data.RDS')

tic()
kriging <- get_rdd_kriging(data = data, grid = grid, model = model, ker, bootstrap_subdomains,
                           initial_variogram_list,
                           distances_list,
                           nugget_ini_user =nugget_ini  ,
                           sill_ini_user = sill_ini,
                           range_ini_user = range_ini,
                           tol = 1e6,
                           parallel = T,
                           type = 'foreach',
                           NU_cores = 0)
toc()
names(kriging) <- bootstrap_names$id




tic()
kriging_domain_results <- get_domain_results(kriging,
                                                 bootstrap_subdomains,
                                                 list_of_subdomains_by_bootstrap_iteration,
                                                 distances_list$no.assg.grid,
                                                 parallel = F,
                                                 type = 'forh')
toc()
tic()
aggregated_results <- get_aggregated_results(kriging_domain_results)
toc()
tic()
kriging_variance <- get_kriging_variance(kriging_domain_results)
toc()
tic()
bootstrap_variance <- get_bootstrap_variance(kriging_domain_results, aggregated_results, B = b, t_step)

toc()

tic()
general_kriging <- get_rdd_kriging_general(data.file, grid, border.length, model, t_step,
                                           sill_ini = NULL, range_ini = NULL,
                                           nugget_ini = NULL,
                                           nk_min = 2,  b = 500, K = 8, parallel = T, type = 'furrr',
                                           NU_cores = 0, max_triangle_area = 0.01, SuppresMes = F, tol = 1e6)
toc()
print(Sys.time() - p)
str(general_kriging$kriging_domain_res)
str(general_kriging$fpred)
str(general_kriging$centers_matrix)
alexdidkovskyi/RDDOOK3 documentation built on Oct. 16, 2019, 1:35 p.m.