This notebook combines the statistical and geostatistical models to make a prediction of geomechanical properties across the basin. Steps include:

library(duvernaygeomechmodel)
library(stars)
study_bounds = st_read('../data/study_dv_bounds.GeoJSON')
study_raster = raster::raster('../data/study_raster.tif')

Rasterize sv_sf for mbgs datum (needed for absolute stress conversion)

sv_sf = st_read('../data/sv_sf.GeoJSON') %>% 
  drop_na() %>% 
  select(datum_mbgs)

logs_sf = st_read('../data/facies_data.GeoJSON')

datum_mbgs = st_rasterize(sv_sf, template = st_as_stars(study_raster)) %>% 
  as(., 'Raster')

Load stress realizations and log inputs realizations (carbonate + shale)

stress = readRDS('../output/geostatistical_models/stress_pred_trans.rds')
shale_logs = readRDS('../output/geostatistical_models/shale_logs_pred_trans.rds')
carb_logs = readRDS('../output/geostatistical_models/carb_logs_pred_trans.rds')

# make a predictions spdf to contain predictions
predictions = stress
predictions@data$x = predictions@coords[,1]
predictions@data$y = predictions@coords[,2]
predictions@data <- predictions@data %>% select(x,y)

Use Stress Polygon method to predict SHmax.

The polygon method requires an R-factor, which we can either set deterministically, or simulate it by drawing values from a distribution. Since we lack data to constrain this distribution, we use a deterministic assumption of r = 0.62. This produces mean shmax values across the simulations of 33.3 +/- 1 kpa/m, which is consistent with the value reported by Shen et al. (2019). Without further constraints, simulating the r-factor is roughly equivalent to adding random noise into the workflow and of little interpretational benefit.

We also calculate deviatoric and confining stress, assuming a Biot coefficient of 0.7.

r=0.62

sim_vec = str_replace_all(
  colnames(stress@data)[grepl("sv.*", colnames(stress@data))],
  "sv.",""
  )

for (sim in sim_vec){
  shmin = stress@data[, paste0('shmin.',sim)]
  sv = stress@data[, paste0('sv.',sim)]
  pp = stress@data[, paste0('pp.',sim)]
  sigma_2 = ifelse(sv > shmin, sv, shmin)
  sigma_3 = ifelse(sv > shmin, shmin, sv)
  # stress polygon calc
  sigma_1 = (sigma_3 * r - sigma_2)/(r - 1)
  sig_ceq = (sigma_1 + sigma_2 + sigma_3)/2 - 0.7 * pp
  sig_deq = sqrt(0.5*((sigma_1 - sigma_2)^2 + (sigma_2 - sigma_3)^2 + (sigma_3 - sigma_1)^2))
  stress@data[, paste0("shmax.", sim)] <- sigma_1
  # confining and deviatoric stress calcs
  stress@data[, paste0("sigc.", sim)] <- sig_ceq
  stress@data[, paste0("sigd.", sim)] <- sig_deq
}

Check mean stress values for calculated parameters

# find mean & sd of shmax values
shmax_summary = stress@data %>%
  rowwise() %>%
  summarize(
    shmax_mean = mean(c_across(contains('shmax'))),
    shmax_sd = sd(c_across(contains('shmax'))),
    sigc_mean = mean(c_across(contains('sigc.'))),
    sigc_sd = sd(c_across(contains('sigc.'))),
    sigd_mean = mean(c_across(contains('sigd.'))),
    sigd_sd = sd(c_across(contains('sigd.')))
  )

mean(shmax_summary$shmax_mean)
sd(shmax_summary$shmax_mean)
mean(shmax_summary$sigc_mean)
sd(shmax_summary$sigc_sd)
mean(shmax_summary$sigd_mean)
sd(shmax_summary$sigd_sd)

Convert stresses to absolute values

# functions to make absolute stress
grad_to_abs_val <- function(x, datum_mbgs){
  x * datum_mbgs@data@values / 1000
}

# remove na mask
mask_na <- function(x, datum_mbgs){
  x * datum_mbgs@data@values / datum_mbgs@data@values
}

stress_abs = stress
stress_abs@data <- stress@data %>%
  mutate(across(contains('sv'), grad_to_abs_val, datum_mbgs)) %>%
  mutate(across(contains('shmin'), grad_to_abs_val, datum_mbgs)) %>%
  mutate(across(contains('pp'), grad_to_abs_val, datum_mbgs)) %>%
  mutate(across(contains('shmax'), grad_to_abs_val, datum_mbgs)) %>%
  mutate(across(contains('sigc'), grad_to_abs_val, datum_mbgs)) %>%
  mutate(across(contains('sigd'), grad_to_abs_val, datum_mbgs)) %>%
  na_if(0)

stress@data <- stress@data %>%
  mutate(across(contains('sv'), mask_na, datum_mbgs)) %>%
  mutate(across(contains('shmin'), mask_na, datum_mbgs)) %>%
  mutate(across(contains('pp'), mask_na, datum_mbgs)) %>%
  mutate(across(contains('shmax'), mask_na, datum_mbgs)) %>%
  mutate(across(contains('sigc'), mask_na, datum_mbgs)) %>%
  mutate(across(contains('sigd'), mask_na, datum_mbgs)) %>%
  na_if(0)

Convert logs to vp and vs and remove NA areas

# convert slowness to velocity and remove masked areas
dt_to_v <- function(x, datum_mbgs){
  valid_mask = ifelse(datum_mbgs@data@values>0,1,0)
  x^-1*1e6*valid_mask
}

# convert rhob to density and remove masked areas
rhob_to_density <- function(x, datum_mbgs){
  valid_mask = ifelse(datum_mbgs@data@values>0,1,0)
  x/1000*valid_mask
}

shale_logs_v = shale_logs
shale_logs_v@data <- shale_logs@data %>%
  mutate(across(contains('rhob'), rhob_to_density, datum_mbgs)) %>%
  mutate(across(contains('dtc'), dt_to_v, datum_mbgs)) %>%
  mutate(across(contains('dts'), dt_to_v, datum_mbgs)) %>%
  na_if(0)

colnames(shale_logs_v@data) <- colnames(shale_logs_v@data) %>%
  str_replace_all(., "dtc", "vp") %>%
  str_replace_all(., "dts", "vs") %>%
  str_replace_all(., "rhob", "rho")

carb_logs_v = carb_logs
carb_logs_v@data <- carb_logs@data %>%
  mutate(across(contains('rhob'), rhob_to_density, datum_mbgs)) %>%
  mutate(across(contains('dtc'), dt_to_v, datum_mbgs)) %>%
  mutate(across(contains('dts'), dt_to_v, datum_mbgs)) %>%
  na_if(0)

colnames(carb_logs_v@data) <- colnames(carb_logs_v@data) %>%
  str_replace_all(., "dtc", "vp") %>%
  str_replace_all(., "dts", "vs") %>%
  str_replace_all(., "rhob", "rho")

Plot individual realizations

sp_col_to_df <- function(spdf, col){
  x = spdf@coords[,1]
  y = spdf@coords[,2]
  data = spdf@data[,col]
  df_out = data.frame(x,y,data)
  names(df_out) = c('x', 'y', col)
  df_out
}

col = 'sigc.sim5'
ggplot(data = sp_col_to_df(stress_abs, col)) +
  geom_tile(aes(x=x,y=y, fill=get(col)))

Plot the mean and standard deviation of the stress features

subset_inputs <- function(inputs, feature){
  inputs@data <- inputs@data %>% 
    select(contains(feature))
  inputs
}

for (subset_name in c('sv','shmin','shmax')){
file_prefix = str_c("../output/geostat_input_summaries/", subset_name)

stress_subset = subset_inputs(stress, subset_name)

subset_summary = stress_subset@data %>%
    rowwise() %>%
    summarize(
      mean = mean(c_across()),
      sd = sd(c_across()),
      p05 = quantile(c_across(), 0.05, na.rm=TRUE),
      p95 = quantile(c_across(), 0.95, na.rm=TRUE)
    ) %>%
    mutate(ci90 = p95-p05) %>%
    mutate(x = stress_subset@coords[,1]) %>%
    mutate(y = stress_subset@coords[,2])

ggplot(data = subset_summary) +
    geom_tile(aes(x=x, y=y, fill=mean)) +
    geom_sf(data=logs_sf, size = 0.75) +
    scale_fill_viridis_c(option='D') +
    coord_sf() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " mean")) +
    ggsave(str_c(file_prefix, "_meanplot.pdf"))

  ggplot(data = subset_summary) +
    geom_tile(aes(x=x, y=y, fill=sd)) +
    geom_sf(data=logs_sf, size = 0.75) +
    scale_fill_viridis_c(option='E') +
    coord_sf() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " sd")) +
    ggsave(str_c(file_prefix, "_sdplot.pdf"))

  ggplot(data = subset_summary) +
    geom_tile(aes(x=x, y=y, fill=ci90)) +
    geom_sf(data=logs_sf, size = 0.75) +
    scale_fill_viridis_c(option='C') +
    coord_sf() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " ci90")) +
    ggsave(str_c(file_prefix, "_ci90plot.pdf"))
  }

Do shale logs - need to set limits for non-physical predictions in Vp. Vs and rho look okay. Revisit the input parameters to make sure what's going on is physical, suspect that it may due to a) older vp logs directly beside newer ones causing some numerical instabilities, or b) outliers and data busts.

subset_name = 'vp'
log_subset_sf = logs_sf %>% filter(facies == 'dv_shale')
for (subset_name in c("vp", "vs", "rho")){
  file_prefix = str_c("../output/geostat_input_summaries/", "shale_", subset_name)
  stress_subset = subset_inputs(shale_logs_v, subset_name)

  if (subset_name == 'vp'){
    mean_lim = c(4000, 6500)
    sd_lim = c(0,2000)
    ci90_lim = c(0,2000)
  } else {
    mean_lim = NULL
    sd_lim = NULL
    ci90_lim = NULL
  }

  quantile(subset_summary$sd, na.rm=TRUE)

  subset_summary = stress_subset@data %>%
      rowwise() %>%
      summarize(
        mean = mean(c_across()),
        sd = sd(c_across()),
        p05 = quantile(c_across(), 0.05, na.rm=TRUE),
        p95 = quantile(c_across(), 0.95, na.rm=TRUE)
      ) %>%
      mutate(ci90 = p95-p05) %>%
      mutate(x = stress_subset@coords[,1]) %>%
      mutate(y = stress_subset@coords[,2])

  ggplot(data = subset_summary) +
      geom_tile(aes(x=x, y=y, fill=mean)) +
      geom_sf(data=log_subset_sf, size = 0.75) +
      scale_fill_viridis_c(limits=mean_lim, oob=scales::squish, option='D') +
      coord_sf() +
      theme_minimal() +
      ylab("") + xlab("") +
      ggtitle(str_c(subset_name, " mean")) +
      ggsave(str_c(file_prefix, "_meanplot.pdf"))

  ggplot(data = subset_summary) +
    geom_tile(aes(x=x, y=y, fill=sd)) +
    geom_sf(data=log_subset_sf, size = 0.75) +
    scale_fill_viridis_c(limits=sd_lim, oob=scales::squish, option='E') +
    coord_sf() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " sd")) +
    ggsave(str_c(file_prefix, "_sdplot.pdf"))

  ggplot(data = subset_summary) +
    geom_tile(aes(x=x, y=y, fill=ci90)) +
    geom_sf(data=log_subset_sf, size = 0.75) +
    scale_fill_viridis_c(limits=ci90_lim, oob=scales::squish, option='C') +
    coord_sf() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " ci90")) +
    ggsave(str_c(file_prefix, "_ci90plot.pdf"))
}
log_subset_sf = logs_sf %>% filter(facies == 'dv_carb')
for (subset_name in c("vp", "vs", "rho")){
  file_prefix = str_c("../output/geostat_input_summaries/", "carb_", subset_name)
  stress_subset = subset_inputs(carb_logs_v, subset_name)

  quantile(subset_summary$sd, na.rm=TRUE)

  subset_summary = stress_subset@data %>%
      rowwise() %>%
      summarize(
        mean = mean(c_across()),
        sd = sd(c_across()),
        p05 = quantile(c_across(), 0.05, na.rm=TRUE),
        p95 = quantile(c_across(), 0.95, na.rm=TRUE)
      ) %>%
      mutate(ci90 = p95-p05) %>%
      mutate(x = stress_subset@coords[,1]) %>%
      mutate(y = stress_subset@coords[,2])

  ggplot(data = subset_summary) +
      geom_tile(aes(x=x, y=y, fill=mean)) +
      geom_sf(data=log_subset_sf, size = 0.75) +
      scale_fill_viridis_c(limits=mean_lim, oob=scales::squish, option='D') +
      coord_sf() +
      theme_minimal() +
      ylab("") + xlab("") +
      ggtitle(str_c(subset_name, " mean")) +
      ggsave(str_c(file_prefix, "_meanplot.pdf"))

  ggplot(data = subset_summary) +
    geom_tile(aes(x=x, y=y, fill=sd)) +
    geom_sf(data=log_subset_sf, size = 0.75) +
    scale_fill_viridis_c(limits=sd_lim, oob=scales::squish, option='E') +
    coord_sf() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " sd")) +
    ggsave(str_c(file_prefix, "_sdplot.pdf"))

  ggplot(data = subset_summary) +
    geom_tile(aes(x=x, y=y, fill=ci90)) +
    geom_sf(data=log_subset_sf, size = 0.75) +
    scale_fill_viridis_c(limits=ci90_lim, oob=scales::squish, option = 'C') +
    coord_sf() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " ci90")) +
    ggsave(str_c(file_prefix, "_ci90plot.pdf"))
}

Iterate through each of the 1000 realizations, load the 54 models, and make predictions for shale and carbonate. This loop takes a long time to run...

We predict for vertical orientation only.

for (sim in sim_vec){
  print(sim)

  # stresses
  deviatoric_stress = stress_abs@data[, paste0('sigd.',sim)]
  confining_pressure = stress_abs@data[, paste0('sigc.',sim)]

  # shale logs
  pred_vs = shale_logs_v@data[, paste0('vs.',sim)]
  pred_vp = shale_logs_v@data[, paste0('vp.',sim)]
  bulk_density = shale_logs_v@data[, paste0('rho.',sim)]

  na_mask = ifelse(is.na(pred_vs), NA, 1)
  shale_df_out = data.frame(
    deviatoric_stress , confining_pressure, pred_vs, pred_vp, bulk_density
    ) %>% replace(is.na(.), 0) %>%
    mutate(shale_group = 1) %>%
    mutate(horiz_orient= 0)

  # carbonate logs
  pred_vs = carb_logs_v@data[, paste0('vs.',sim)]
  pred_vp = carb_logs_v@data[, paste0('vp.',sim)]
  bulk_density = carb_logs_v@data[, paste0('rho.',sim)]

  carb_df_out = data.frame(
    deviatoric_stress , confining_pressure, pred_vs, pred_vp, bulk_density
    ) %>% replace(is.na(.), 0) %>%
    mutate(shale_group = 0) %>%
    mutate(horiz_orient= 0)

  # cycle through models and make predictions for shale and carbonate
  # load model(s)
  for (dir in list.dirs("../output/statistical_models/",recursive = FALSE)){
    print(dir)
    tryCatch({
      if (str_replace(dir, '../output/statistical_models//', '') == "wave") next
    model_name = tail(str_split(dir,'/')[[1]],1)
    model = readRDS(paste0(dir,'/resample_results.rds'))$model
    predictions@data[, paste0('shale.',model_name,".",sim)] <- predict(
      model, newdata=shale_df_out %>% select(model$features))$data$response * na_mask
    predictions@data[, paste0('carb.',model_name,".",sim)] <- predict(
      model, newdata=carb_df_out %>% select(model$features))$data$response * na_mask  
    },
    finally = {
      next
    })

  }
}

saveRDS(predictions,'../output/geostatistical_models/predictions.rds')

Plot some predictions - looks as expected, with lots of variance when the geostatistical variance is applied the models.

colnames(predictions@data)

col = 'carb.dev_conf_vp_vs_rho_glm_pr.sim1'

ggplot(data = sp_col_to_df(predictions, col)) +
  geom_tile(aes(x=x,y=y, fill=get(col)))


ScottHMcKean/duvernay_geomech_model documentation built on Sept. 12, 2022, 10:08 a.m.