This notebook combines the statistical and geostatistical models to make a prediction of geomechanical properties across the basin. Steps include:
library(duvernaygeomechmodel) library(stars) study_bounds = st_read('../data/study_dv_bounds.GeoJSON') study_raster = raster::raster('../data/study_raster.tif')
Rasterize sv_sf for mbgs datum (needed for absolute stress conversion)
sv_sf = st_read('../data/sv_sf.GeoJSON') %>% drop_na() %>% select(datum_mbgs) logs_sf = st_read('../data/facies_data.GeoJSON') datum_mbgs = st_rasterize(sv_sf, template = st_as_stars(study_raster)) %>% as(., 'Raster')
Load stress realizations and log inputs realizations (carbonate + shale)
stress = readRDS('../output/geostatistical_models/stress_pred_trans.rds') shale_logs = readRDS('../output/geostatistical_models/shale_logs_pred_trans.rds') carb_logs = readRDS('../output/geostatistical_models/carb_logs_pred_trans.rds') # make a predictions spdf to contain predictions predictions = stress predictions@data$x = predictions@coords[,1] predictions@data$y = predictions@coords[,2] predictions@data <- predictions@data %>% select(x,y)
Use Stress Polygon method to predict SHmax.
The polygon method requires an R-factor, which we can either set deterministically, or simulate it by drawing values from a distribution. Since we lack data to constrain this distribution, we use a deterministic assumption of r = 0.62. This produces mean shmax values across the simulations of 33.3 +/- 1 kpa/m, which is consistent with the value reported by Shen et al. (2019). Without further constraints, simulating the r-factor is roughly equivalent to adding random noise into the workflow and of little interpretational benefit.
We also calculate deviatoric and confining stress, assuming a Biot coefficient of 0.7.
r=0.62 sim_vec = str_replace_all( colnames(stress@data)[grepl("sv.*", colnames(stress@data))], "sv.","" ) for (sim in sim_vec){ shmin = stress@data[, paste0('shmin.',sim)] sv = stress@data[, paste0('sv.',sim)] pp = stress@data[, paste0('pp.',sim)] sigma_2 = ifelse(sv > shmin, sv, shmin) sigma_3 = ifelse(sv > shmin, shmin, sv) # stress polygon calc sigma_1 = (sigma_3 * r - sigma_2)/(r - 1) sig_ceq = (sigma_1 + sigma_2 + sigma_3)/2 - 0.7 * pp sig_deq = sqrt(0.5*((sigma_1 - sigma_2)^2 + (sigma_2 - sigma_3)^2 + (sigma_3 - sigma_1)^2)) stress@data[, paste0("shmax.", sim)] <- sigma_1 # confining and deviatoric stress calcs stress@data[, paste0("sigc.", sim)] <- sig_ceq stress@data[, paste0("sigd.", sim)] <- sig_deq }
Check mean stress values for calculated parameters
# find mean & sd of shmax values shmax_summary = stress@data %>% rowwise() %>% summarize( shmax_mean = mean(c_across(contains('shmax'))), shmax_sd = sd(c_across(contains('shmax'))), sigc_mean = mean(c_across(contains('sigc.'))), sigc_sd = sd(c_across(contains('sigc.'))), sigd_mean = mean(c_across(contains('sigd.'))), sigd_sd = sd(c_across(contains('sigd.'))) ) mean(shmax_summary$shmax_mean) sd(shmax_summary$shmax_mean) mean(shmax_summary$sigc_mean) sd(shmax_summary$sigc_sd) mean(shmax_summary$sigd_mean) sd(shmax_summary$sigd_sd)
Convert stresses to absolute values
# functions to make absolute stress grad_to_abs_val <- function(x, datum_mbgs){ x * datum_mbgs@data@values / 1000 } # remove na mask mask_na <- function(x, datum_mbgs){ x * datum_mbgs@data@values / datum_mbgs@data@values } stress_abs = stress stress_abs@data <- stress@data %>% mutate(across(contains('sv'), grad_to_abs_val, datum_mbgs)) %>% mutate(across(contains('shmin'), grad_to_abs_val, datum_mbgs)) %>% mutate(across(contains('pp'), grad_to_abs_val, datum_mbgs)) %>% mutate(across(contains('shmax'), grad_to_abs_val, datum_mbgs)) %>% mutate(across(contains('sigc'), grad_to_abs_val, datum_mbgs)) %>% mutate(across(contains('sigd'), grad_to_abs_val, datum_mbgs)) %>% na_if(0) stress@data <- stress@data %>% mutate(across(contains('sv'), mask_na, datum_mbgs)) %>% mutate(across(contains('shmin'), mask_na, datum_mbgs)) %>% mutate(across(contains('pp'), mask_na, datum_mbgs)) %>% mutate(across(contains('shmax'), mask_na, datum_mbgs)) %>% mutate(across(contains('sigc'), mask_na, datum_mbgs)) %>% mutate(across(contains('sigd'), mask_na, datum_mbgs)) %>% na_if(0)
Convert logs to vp and vs and remove NA areas
# convert slowness to velocity and remove masked areas dt_to_v <- function(x, datum_mbgs){ valid_mask = ifelse(datum_mbgs@data@values>0,1,0) x^-1*1e6*valid_mask } # convert rhob to density and remove masked areas rhob_to_density <- function(x, datum_mbgs){ valid_mask = ifelse(datum_mbgs@data@values>0,1,0) x/1000*valid_mask } shale_logs_v = shale_logs shale_logs_v@data <- shale_logs@data %>% mutate(across(contains('rhob'), rhob_to_density, datum_mbgs)) %>% mutate(across(contains('dtc'), dt_to_v, datum_mbgs)) %>% mutate(across(contains('dts'), dt_to_v, datum_mbgs)) %>% na_if(0) colnames(shale_logs_v@data) <- colnames(shale_logs_v@data) %>% str_replace_all(., "dtc", "vp") %>% str_replace_all(., "dts", "vs") %>% str_replace_all(., "rhob", "rho") carb_logs_v = carb_logs carb_logs_v@data <- carb_logs@data %>% mutate(across(contains('rhob'), rhob_to_density, datum_mbgs)) %>% mutate(across(contains('dtc'), dt_to_v, datum_mbgs)) %>% mutate(across(contains('dts'), dt_to_v, datum_mbgs)) %>% na_if(0) colnames(carb_logs_v@data) <- colnames(carb_logs_v@data) %>% str_replace_all(., "dtc", "vp") %>% str_replace_all(., "dts", "vs") %>% str_replace_all(., "rhob", "rho")
Plot individual realizations
sp_col_to_df <- function(spdf, col){ x = spdf@coords[,1] y = spdf@coords[,2] data = spdf@data[,col] df_out = data.frame(x,y,data) names(df_out) = c('x', 'y', col) df_out } col = 'sigc.sim5' ggplot(data = sp_col_to_df(stress_abs, col)) + geom_tile(aes(x=x,y=y, fill=get(col)))
Plot the mean and standard deviation of the stress features
subset_inputs <- function(inputs, feature){ inputs@data <- inputs@data %>% select(contains(feature)) inputs } for (subset_name in c('sv','shmin','shmax')){ file_prefix = str_c("../output/geostat_input_summaries/", subset_name) stress_subset = subset_inputs(stress, subset_name) subset_summary = stress_subset@data %>% rowwise() %>% summarize( mean = mean(c_across()), sd = sd(c_across()), p05 = quantile(c_across(), 0.05, na.rm=TRUE), p95 = quantile(c_across(), 0.95, na.rm=TRUE) ) %>% mutate(ci90 = p95-p05) %>% mutate(x = stress_subset@coords[,1]) %>% mutate(y = stress_subset@coords[,2]) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=mean)) + geom_sf(data=logs_sf, size = 0.75) + scale_fill_viridis_c(option='D') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " mean")) + ggsave(str_c(file_prefix, "_meanplot.pdf")) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=sd)) + geom_sf(data=logs_sf, size = 0.75) + scale_fill_viridis_c(option='E') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " sd")) + ggsave(str_c(file_prefix, "_sdplot.pdf")) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=ci90)) + geom_sf(data=logs_sf, size = 0.75) + scale_fill_viridis_c(option='C') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " ci90")) + ggsave(str_c(file_prefix, "_ci90plot.pdf")) }
Do shale logs - need to set limits for non-physical predictions in Vp. Vs and rho look okay. Revisit the input parameters to make sure what's going on is physical, suspect that it may due to a) older vp logs directly beside newer ones causing some numerical instabilities, or b) outliers and data busts.
subset_name = 'vp' log_subset_sf = logs_sf %>% filter(facies == 'dv_shale') for (subset_name in c("vp", "vs", "rho")){ file_prefix = str_c("../output/geostat_input_summaries/", "shale_", subset_name) stress_subset = subset_inputs(shale_logs_v, subset_name) if (subset_name == 'vp'){ mean_lim = c(4000, 6500) sd_lim = c(0,2000) ci90_lim = c(0,2000) } else { mean_lim = NULL sd_lim = NULL ci90_lim = NULL } quantile(subset_summary$sd, na.rm=TRUE) subset_summary = stress_subset@data %>% rowwise() %>% summarize( mean = mean(c_across()), sd = sd(c_across()), p05 = quantile(c_across(), 0.05, na.rm=TRUE), p95 = quantile(c_across(), 0.95, na.rm=TRUE) ) %>% mutate(ci90 = p95-p05) %>% mutate(x = stress_subset@coords[,1]) %>% mutate(y = stress_subset@coords[,2]) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=mean)) + geom_sf(data=log_subset_sf, size = 0.75) + scale_fill_viridis_c(limits=mean_lim, oob=scales::squish, option='D') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " mean")) + ggsave(str_c(file_prefix, "_meanplot.pdf")) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=sd)) + geom_sf(data=log_subset_sf, size = 0.75) + scale_fill_viridis_c(limits=sd_lim, oob=scales::squish, option='E') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " sd")) + ggsave(str_c(file_prefix, "_sdplot.pdf")) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=ci90)) + geom_sf(data=log_subset_sf, size = 0.75) + scale_fill_viridis_c(limits=ci90_lim, oob=scales::squish, option='C') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " ci90")) + ggsave(str_c(file_prefix, "_ci90plot.pdf")) }
log_subset_sf = logs_sf %>% filter(facies == 'dv_carb') for (subset_name in c("vp", "vs", "rho")){ file_prefix = str_c("../output/geostat_input_summaries/", "carb_", subset_name) stress_subset = subset_inputs(carb_logs_v, subset_name) quantile(subset_summary$sd, na.rm=TRUE) subset_summary = stress_subset@data %>% rowwise() %>% summarize( mean = mean(c_across()), sd = sd(c_across()), p05 = quantile(c_across(), 0.05, na.rm=TRUE), p95 = quantile(c_across(), 0.95, na.rm=TRUE) ) %>% mutate(ci90 = p95-p05) %>% mutate(x = stress_subset@coords[,1]) %>% mutate(y = stress_subset@coords[,2]) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=mean)) + geom_sf(data=log_subset_sf, size = 0.75) + scale_fill_viridis_c(limits=mean_lim, oob=scales::squish, option='D') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " mean")) + ggsave(str_c(file_prefix, "_meanplot.pdf")) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=sd)) + geom_sf(data=log_subset_sf, size = 0.75) + scale_fill_viridis_c(limits=sd_lim, oob=scales::squish, option='E') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " sd")) + ggsave(str_c(file_prefix, "_sdplot.pdf")) ggplot(data = subset_summary) + geom_tile(aes(x=x, y=y, fill=ci90)) + geom_sf(data=log_subset_sf, size = 0.75) + scale_fill_viridis_c(limits=ci90_lim, oob=scales::squish, option = 'C') + coord_sf() + theme_minimal() + ylab("") + xlab("") + ggtitle(str_c(subset_name, " ci90")) + ggsave(str_c(file_prefix, "_ci90plot.pdf")) }
Iterate through each of the 1000 realizations, load the 54 models, and make predictions for shale and carbonate. This loop takes a long time to run...
We predict for vertical orientation only.
for (sim in sim_vec){ print(sim) # stresses deviatoric_stress = stress_abs@data[, paste0('sigd.',sim)] confining_pressure = stress_abs@data[, paste0('sigc.',sim)] # shale logs pred_vs = shale_logs_v@data[, paste0('vs.',sim)] pred_vp = shale_logs_v@data[, paste0('vp.',sim)] bulk_density = shale_logs_v@data[, paste0('rho.',sim)] na_mask = ifelse(is.na(pred_vs), NA, 1) shale_df_out = data.frame( deviatoric_stress , confining_pressure, pred_vs, pred_vp, bulk_density ) %>% replace(is.na(.), 0) %>% mutate(shale_group = 1) %>% mutate(horiz_orient= 0) # carbonate logs pred_vs = carb_logs_v@data[, paste0('vs.',sim)] pred_vp = carb_logs_v@data[, paste0('vp.',sim)] bulk_density = carb_logs_v@data[, paste0('rho.',sim)] carb_df_out = data.frame( deviatoric_stress , confining_pressure, pred_vs, pred_vp, bulk_density ) %>% replace(is.na(.), 0) %>% mutate(shale_group = 0) %>% mutate(horiz_orient= 0) # cycle through models and make predictions for shale and carbonate # load model(s) for (dir in list.dirs("../output/statistical_models/",recursive = FALSE)){ print(dir) tryCatch({ if (str_replace(dir, '../output/statistical_models//', '') == "wave") next model_name = tail(str_split(dir,'/')[[1]],1) model = readRDS(paste0(dir,'/resample_results.rds'))$model predictions@data[, paste0('shale.',model_name,".",sim)] <- predict( model, newdata=shale_df_out %>% select(model$features))$data$response * na_mask predictions@data[, paste0('carb.',model_name,".",sim)] <- predict( model, newdata=carb_df_out %>% select(model$features))$data$response * na_mask }, finally = { next }) } } saveRDS(predictions,'../output/geostatistical_models/predictions.rds')
Plot some predictions - looks as expected, with lots of variance when the geostatistical variance is applied the models.
colnames(predictions@data) col = 'carb.dev_conf_vp_vs_rho_glm_pr.sim1' ggplot(data = sp_col_to_df(predictions, col)) + geom_tile(aes(x=x,y=y, fill=get(col)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.