This notebook summarizes the model predictions. We try to answer the following:

By each model we mean model & facies.

Uncertainty = mean model variance + different model uncertainty + facies uncertainty + geostatistical uncertainty

library(duvernaygeomechmodel)
study_bounds = st_read('../data/study_dv_bounds.GeoJSON')
study_raster = raster::raster('../data/study_raster.tif')

Load predictions from the previous notebook

predictions = readRDS('../output/geostatistical_models/predictions.rds')
rock_phys_pred = readRDS('../output/geostatistical_models/rock_phys_predictions.rds')
predictions@data <- cbind(predictions@data, rock_phys_pred@data)

sp_col_to_df <- function(spdf, col){
  x = spdf@coords[,1]
  y = spdf@coords[,2]
  data = spdf@data[,col]
  df_out = data.frame(x,y,data)
  names(df_out) = c('x', 'y', col)
  df_out
}

col = 'shale.rock_physics_pr.sim2'
ggplot(data = sp_col_to_df(predictions, col)) +
  geom_tile(aes(x=x,y=y, fill=get(col)))

Pick a feature set

feat_sets = c("vp_rho", "vp_vs_rho", "dev_vp_rho", "dev_vp_vs_rho", 
              "dev_conf_vp_rho", "dev_conf_vp_vs_rho")

targets = c("br", "ym", "pr")

facies = c("shale", "carb")

models = c("glm", "mars", "rf")

Subset predictions and make summary graphs / table. This takes a long time so don't run unless necessary.

subset_options = expand.grid(
  feat_sets=feat_sets, targets=targets, facies=facies, models=models,
  stringsAsFactors=FALSE)

rock_phys_subset_options = expand.grid(
  feat_sets=c("_"), targets=c("ym","pr"), facies=facies, models=c("rock_physics"),
  stringsAsFactors=FALSE)

subset_options = rbind(subset_options, rock_phys_subset_options)

subset_predictions <- function(predictions, feat_set, target, facies, model){
  predictions@data <- predictions@data %>% 
    select(contains(feat_set)) %>%
    select(contains(target)) %>%
    select(contains(facies)) %>%
    select(contains(model))
  predictions
}

for (row in 1:nrow(subset_options)){
  print(row)
  feat_set = subset_options[row, 'feat_sets']
  target = subset_options[row, 'targets']
  facie = subset_options[row, 'facies']
  model = subset_options[row, 'models']

  subset_name = str_c(facie, "_", feat_set, "_", model, "_", target)
  file_prefix = str_c("../output/prediction_summaries/", subset_name)

  # select a single set of realizations
  pred_subset = subset_predictions(
    predictions, feat_set, target, facie, model
    )

  #Summarize a subset
  # get rowwise summary statistics for the feature subset
  pred_subset_summary = pred_subset@data %>%
    rowwise() %>%
    summarize(
      mean = mean(c_across()),
      sd = sd(c_across()),
      p05 = quantile(c_across(), 0.05, na.rm=TRUE),
      p95 = quantile(c_across(), 0.95, na.rm=TRUE)
    ) %>%
    mutate(ci90 = p95-p05) %>%
    mutate(x = pred_subset@coords[,1]) %>%
    mutate(y = pred_subset@coords[,2])

  # output
  write_csv(pred_subset_summary, 
    str_c(file_prefix, "_predsummary.csv")
    )

  #Make plots
  ## Make some rules for plot censoring
  if (target == 'ym'){
    mean_lim = c(5,65)
    sd_lim = c(0,30)
    ci90_lim = c(0,30)
  } else if (target == 'pr'){
    mean_lim = c(0.15,0.35)
    sd_lim = c(0,0.2)
    ci90_lim = c(0,0.2)
  } else {
    mean_lim = c(0.0,0.6)
    sd_lim = c(0,0.3)
    ci90_lim = c(0,0.3)
  }

  ## Make plots
  ggplot(data = pred_subset_summary) +
    geom_tile(aes(x=x, y=y, fill=mean)) +
    scale_fill_viridis_c(limits=mean_lim, oob=scales::squish) +
    coord_equal() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " mean")) +
    ggsave(str_c(file_prefix, "_meanplot.pdf"))

  ggplot(data = pred_subset_summary) +
    geom_tile(aes(x=x, y=y, fill=sd)) +
    scale_fill_viridis_c(limits=sd_lim, oob=scales::squish) +
    coord_equal() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " sd")) +
    ggsave(str_c(file_prefix, "_sdplot.pdf"))

  ggplot(data = pred_subset_summary) +
    geom_tile(aes(x=x, y=y, fill=ci90)) +
    scale_fill_viridis_c(limits=ci90_lim, oob=scales::squish) +
    coord_equal() +
    theme_minimal() +
    ylab("") + xlab("") +
    ggtitle(str_c(subset_name, " ci90")) +
    ggsave(str_c(file_prefix, "_ci90plot.pdf"))
}

Make a boxplot of predictions for each facies for each target

subset_pred_target <- function(predictions, target){
  predictions@data <- predictions@data %>% 
    select(contains(target)) 
  predictions
}

for (target in c('br', 'pr', 'ym')){
  if (target == 'br'){
    target_title = 'Brittleness'
  } else if (target == 'pr'){
    target_title = 'Poisson\'s Ratio'
  } else {
    target_title = 'Young\'s Modulus'
  }
  print(target_title)

  filename = str_c("../output/prediction_summaries/", target, "_boxplot.pdf")

  # select a single set of realizations
  pred_subset = subset_pred_target(predictions, target)

  # transform subset to a long dataframe for plotting
  mod_feat = expand.grid(model = models, feat_set = feat_sets, facie = c('shale','carb'))
  mod_feat_cols = c(
    str_c(mod_feat$facie, ".", mod_feat$feat_set, "_", mod_feat$model, "_",target)
  )

  if (target == 'ym'){
    mod_feat_cols = c(mod_feat_cols, "shale.rock_physics_ym", "carb.rock_physics_ym")
  } 

  if (target == 'pr'){
    mod_feat_cols = c(mod_feat_cols, "shale.rock_physics_pr", "carb.rock_physics_pr")
  }

  # combine all features
  comb_vectors = c()
  for (mod_feat_col in mod_feat_cols){
    x = pred_subset@data %>%
      select(contains(mod_feat_col)) %>%
      gather() %>%
      select(value) %>%
      rename(!!mod_feat_col := value)
    comb_vectors = append(comb_vectors, x)
  }

  # wide to long for plotting
  comb_df = data.frame(comb_vectors) %>%
    gather(key='mod_feat', value='prediction') %>%
    drop_na() %>%
    slice_sample(n=1E6) %>%
    mutate(model_type = case_when(
      str_detect(.$mod_feat, 'glm') ~ 'glm',
      str_detect(.$mod_feat, 'mars') ~ 'mars',
      str_detect(.$mod_feat, 'rf') ~ 'rf',
      str_detect(.$mod_feat, 'rock_phys') ~ 'wave'
    )) %>%
    mutate(facies = case_when(
      str_detect(.$mod_feat, 'shale') ~ 'Shale',
      str_detect(.$mod_feat, 'carb') ~ 'Carbonate'
    )) %>%
    mutate(mod_feat = mod_feat %>%
       str_replace_all(., 'carb.', '') %>%
       str_replace_all(., 'shale.', '') %>%
       str_replace_all(., '_ym', '') %>%
       str_replace_all(., '_pr', '') %>%
       str_replace_all(., '_br', '')) %>%
    mutate(feat = mod_feat %>%
       str_replace_all(., '_glm', '') %>%
       str_replace_all(., '_mars', '') %>%
       str_replace_all(., '_rf', '')
       )

  comb_df$feat = factor(
    comb_df$feat, 
    levels=c("vp_rho", "vp_vs_rho", "rock_physics", "dev_vp_rho", "dev_vp_vs_rho", 
             "dev_conf_vp_rho", "dev_conf_vp_vs_rho")
    )

  if (target == 'br'){
    ylims=c(0,0.6)
  } else if (target == 'pr'){
    ylims=c(0.1,0.4)
  } else {
    ylims=c(5,65)
  }

  plt = ggplot(comb_df) +
      geom_boxplot(
        aes(x=feat, y=prediction, fill=model_type),
        outlier.size=0.5
        )

  plt +
    ylim(ylims) +
    facet_wrap(. ~ facies, scales='free_x') +
    ylab("") +
    xlab("") +
    ggtitle(target_title) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
          plot.margin = margin(10, 10, 10, 20)) +
    ggsave(filename)
}


ScottHMcKean/duvernay_geomech_model documentation built on Sept. 12, 2022, 10:08 a.m.