library(geosciencebc2019008) run_date = Sys.Date() %>% str_replace_all(., "-", "_") model_prefix = 'regr_model_mars_wcfd/mars_wcfd' input_data = "../wcfd_data/final_wcfd_mldf_regr.rds" model_type = "regr.mars" pset= makeParamSet( makeDiscreteParam('degree',values=c(1,2,3)), makeIntegerParam('nk', lower = 1, upper = 30) ) wa_nums = c(32768, 32950, 33857, 33665, 32582, 33161, 29429, 32301, 33113, 24035, 33894, 33162, 32574, 32575, 33550, 37346, 30376, 30940, 28640, 29976, 30735) shap_wells = which((read_rds(input_data) %>% dplyr::filter(seismogenic == 1) %>% pull(wa_num)) %in% wa_nums) target = "max_mag" regr_comp_feats = c( "calc_total_fluid_m3", "calc_total_proppant_t", "calc_completed_length_m", "n_stages", "min_midpoint_dist" ) regr_geo_feats = c( "pressure_depth_ratio_kpa_m", "top_montney_structure_mss", "third_order_residual_m", "geothermal_gradient_degc_km", "distance_listric_faults_berger_m", "distance_normal_faults_berger_m" ) final_feats = c(regr_comp_feats, regr_geo_feats) ml_df <- read_rds(input_data) %>% dplyr::filter(seismogenic == 1) %>% dplyr::select(all_of(target), all_of(final_feats)) set.seed(2019008) train_rows = sample(nrow(ml_df)*0.9) train = ml_df[train_rows,] test = ml_df[-train_rows,] learner = makeLearner(model_type) train_task = makeRegrTask(data = train, target = target) test_task = makeRegrTask(data = test, target = target) all_task = makeRegrTask(data = ml_df, target = target) meas = list(mae, rmse) # hyperparameter tuning with 5-fold cross-validation & MBO optimization tune_res = tuneParams( learner, train_task, resampling=cv5, par.set=pset, control=makeTuneControlMBO(budget = 100L), measures=meas ) write_rds(tune_res, paste0('../output/',model_prefix,"_tune_res.rds")) # set hyperparameters tuned_learner = setHyperPars(learner = learner, par.vals = tune_res$x) # train final model for performance and interpretation model = train(tuned_learner, train_task) test_predict = predict(model, newdata=test) train_predict = predict(model, newdata=train) predictor = Predictor$new(model, data = ml_df) # monte-carlo performance measures resample = mlr::resample( tuned_learner, all_task, makeResampleDesc("Subsample", iters=500, split=4/5, predict='both'), measures = meas, show.info = FALSE ) model_res = get_resample_regr_res(resample) model_res$train_perf = performance(predict(model, newdata=train), measures = meas) model_res$test_perf = performance(predict(model, newdata=test), measures = meas) model_res$model = model model_res$tuned_learner = tuned_learner sink(paste0('../output/',model_prefix,"_model_results.txt")) print(model_res) sink() write_rds(model_res, paste0('../output/',model_prefix,"_resample.rds")) p1 = plotResiduals(train_predict) + geom_abline(slope=1,linetype = "dashed") + ggtitle("Training Set") + theme_minimal() p2 = plotResiduals(test_predict) + geom_abline(slope=1,linetype = "dashed") + ggtitle("Test Set") + theme_minimal() ggsave(file = paste0('../output/',model_prefix,"_resplot.jpg"), arrangeGrob(grobs = list(p1,p2), nrow=1), width = 12, height = 8) p1 = plotLearnerPrediction( tuned_learner, task = all_task, features = c("calc_total_fluid_m3", "calc_total_proppant_t"), measures = mae ) p2 = plotLearnerPrediction( tuned_learner, task = all_task, features = c("min_midpoint_dist", "n_stages"), measures = mae ) p3 = plotLearnerPrediction( tuned_learner, task = all_task, features = c("third_order_residual_m", "geothermal_gradient_degc_km"), measures = mae ) p4 = plotLearnerPrediction( tuned_learner, task = all_task, features = c("distance_listric_faults_berger_m", "distance_normal_faults_berger_m"), measures = mae ) ggsave(file = paste0('../output/',model_prefix,"_learnpreds.jpg"), arrangeGrob(grobs = list(p1,p2,p3,p4), nrow=2), width = 12, height = 8) # importance plot imp = FeatureImp$new(predictor, loss='mae', n.repetitions = 50) p1 = ggplot(imp$results) + geom_col(aes(x = feature, y = importance)) + labs(x = 'Feature', y = 'Permutation Importance') + coord_flip() + theme_minimal() interact = Interaction$new(predictor) p2 = ggplot(as.data.frame(interact$results)) + geom_col(aes(x = .feature, y = .interaction)) + labs(x = '', y = 'Interaction') + coord_flip() + theme_minimal() ggsave(file = paste0('../output/',model_prefix,"_impintplot.jpg"), arrangeGrob(grobs = list(p1,p2), nrow=1), width = 12, height = 8, dpi=600) # partial dependence plot make_pdp_plot <- function(i, predictor, feats){ pdp <- FeatureEffect$new(predictor, method = 'pdp+ice', feature = feats[i], center.at = 2) plot(pdp) + theme_minimal() + theme(axis.title.y=element_blank()) } plist <- map(.x = 1:length(final_feats), .f = make_pdp_plot, predictor = predictor, feats = final_feats) ggsave(file = paste0('../output/',model_prefix,"_pdpplot.jpg"), arrangeGrob(grobs = plist, ncol = 3, left = 'Magnitude'), width = 12, height = 8, dpi=600) for (well in shap_wells){ lime.explain = LocalModel$new(predictor, x.interest = ml_df[well,], k = length(final_feats)) p1 = ggplot(lime.explain$results) + geom_col(aes(x = feature, y = effect)) + labs(x = 'Feature', y = 'LIME Effect') + coord_flip() + theme_minimal() shapley = Shapley$new(predictor, x.interest = ml_df[well,] %>% dplyr::select(all_of(final_feats)), sample.size = 100) p2 = ggplot(shapley$results) + geom_col(aes(x = feature, y = phi)) + labs(x = 'Feature', y = 'SHAP Phi') + coord_flip() + theme_minimal() ggsave(file = paste0('../output/',model_prefix,"_",well,"_limeshap.jpg"), arrangeGrob(grobs = list(p1,p2), nrow=1), width = 12, height = 4, dpi=600) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.