This notebook simulates images according to the log Cox Matern Process discussed in Section 3.2 and plotted in Figure 3. Note that these data are already available in the stability_data_sim.tar.gz archive within raw_data.tar.gz linked in Supplementary Section A. However, if the goal was to regenerate those input data archives, this script could be used.

Generating each image takes a few seconds, and we need many training images, so we've parallized generation across cores. This is why we're loaded the doParallel, foreach and parallel packages.

library(LFBCR)
library(doParallel)
library(foreach)
library(parallel)
library(reticulate)
library(tidyverse)
np <- reticulate::import("numpy")

This is a specification of all the parameters given in Table 1.

x <- expand.grid(seq(0.1, 1, 0.05), seq(0.1, 1, 0.05))
n_original <- 100 # number of points
beta_r <- c(-0.5, 0, 0.5)
nu_r <- 1 # smoothness of the relative intensity functions (higher is smoother)
alpha_r <- 0.5 # bandwidth of the relative processes (higher is smoother)
nu <- 2 # smoothness of the overall process
alpha <- 2 # bandwidth of the overall process
tau <- 1.5 # temperature for the marking (0 -> uniform, inf -> hard assignment)
lambdas <- c(20, 50, 100) # rate parameters in gamma controlling size

The block below shows an example image.

set.seed(10)
result <- sim_wrapper(x, n_original, nu, alpha, beta_r, nu_r, alpha_r, tau, lambdas)
plot_matern(result)

Simulating Images

Next we relate the parameters underlyign each image to the outcome $y$ associated with each image. We made the following assumptions,

so that the signs are in the intuitive direction and so that no one group of terms dominates.

We generate the parameters according to a few different distributions, depending on whether they are allowed to be negative.

n_images <- 1e4
n_original <- runif(n_images, 50, 1000)
beta_r <- 0.3 * runif(n_images * 3, -.5, .5) %>%
  matrix(ncol = 3)
nu_r <- runif(n_images, 0, 3)
alpha_r <- runif(n_images, 0, 3)
nu <- runif(n_images, 0, 8) # overall process is (usually) smoother
alpha <- runif(n_images, 0, 8)
tau <- runif(n_images, 0, 3)
lambdas <- runif(n_images * 3, 100, 500) %>%
  matrix(ncol = 3)

# simulate the response
X <- scale(cbind(n_original, beta_r, nu_r, alpha_r, nu, alpha, tau, lambdas[, 1]))
colnames(X) <- c("N", paste0("beta_", 1:ncol(beta_r)), "nu_r", "alpha_r", "nu", "alpha", "tau", "lambda_1")
y <- X %*% c(.5, 1, -1, -1, -.5, -.5, -.5, -.5, .5, 1) / sqrt(ncol(X))
y <- y - mean(y)

This block plots two examples each for three very different $y$ ranges. Each component in plot_list is one of the panels appearing in Figure 3.

plot_ix <- order(y)[c(1:2, c(4998, 5004), c(9998, 10000))]
plot_list <- list()
raster_list <- list()

for (i in seq_along(plot_ix)) {
  j <- plot_ix[i]
  res <- sim_wrapper(x, n_original[j], nu[j], alpha[j], beta_r[j, ], nu_r[j], alpha_r[j], tau[j], lambdas[j, ])
  raster_list[[i]] <- plot_raster(make_raster(res$marks, 3))
  plot_list[[i]] <- plot_matern(res)
}

This combines all the figures in plot_list and raster_list above to create Figure 3.

library(gridExtra)
grid.arrange(
  plot_list[[1]], raster_list[[1]],
  plot_list[[2]], raster_list[[2]],
  plot_list[[3]], raster_list[[3]],
  plot_list[[4]], raster_list[[4]],
  plot_list[[5]], raster_list[[5]],
  plot_list[[6]], raster_list[[6]],
  ncol = 4,
  widths = c(4, 1, 4, 1)
)

We are almost ready to simulate all the images. The block below saves the directory to which all the simulated outputs will be saved.

tiles_dir <- file.path(params$output_dir, "tiles")
dir.create(tiles_dir)

Finally, we generate the images. The Xy.csv file saved at the very end saves all the raw parameter and response values associated with each generated image.

# can convert foreach into a standard for loop if no parallel backend
cl <- makeCluster(12)
registerDoParallel(cl)

pkgs <- c("inference", "dplyr", "MASS", "stringr", "sf", "raster", "reticulate")
paths <- foreach(j = seq_len(n_images), .combine = c, .packages = pkgs) %dopar% {
  # generate the shapefile representing cells
  result <- sim_wrapper(x, n_original[j], nu[j], alpha[j], beta_r[j, ], nu_r[j],
                        alpha_r[j], tau[j], lambdas[j, ])
  marks <- result$marks %>%
    mutate(variable = paste0("X", mark))

  # convert to raster and save as numpy
  path <- sprintf(file.path(tiles_dir, "image-%s.npy"), str_pad(j, 4, "left", 0))
  r <- make_raster(marks, 3)
  np$save(path, as.array(r))
  return(path)
}
stopCluster(cl)

write_csv(data.frame(path = paths, X = X, y = y), file.path(params$output_dir, "Xy.csv"))


krisrs1128/MSLF documentation built on March 21, 2023, 10:21 a.m.