DriveML: Machine Learning Projects
In DriveML: Self-Drive Machine Learning Projects

library(rmarkdown)
library(DriveML)
library(knitr)
library(scales)
library(ggplot2)

modelobject <- params$mlobject

Machine Learning Classification Model

Automated Machine Learning (DriveML) mainly refers to the automated methods for model selection and hyper-parameter optimization of various algorithms such as random forests, gradient boosting etc..

Summary of trained data and model function

Dimensions of the dataset and other information

t1 <- modelobject$datasummary$train;
t2 <- modelobject$datasummary$test;
t3 <- modelobject$datasummary$score; 

t4 <- modelobject$call
mdata <- NULL
for(j in 2: length(t4)){
  fnam <- as.character(names(t4[j]))
  ivalue <- as.character(t4[[j]])
  if(length(ivalue) == 0) ivalue <- "NULL"
  md <- data.frame(parameter = fnam, input = ivalue)
  mdata <- rbind(mdata, md)
  }

 modename <- names(modelobject$trainedModels)
    manme <- data.frame(model = c("glmnet", "logreg", "randomForest", "ranger", "xgboost", "rpart"),descriptions = c("Regularised regression  from glmnet R package",
                                        "logistic regression from stats R package",
                                        "Random forests using the randomForest R package",
                                        "Random forests using the ranger R package",
                                        "Gradient boosting using xgboost R package",
                                        "decision tree classification using rpart R package"))
 drmodel <- subset(manme, model = modename)

## section2
result <- modelobject$results
rownames(result) <- NULL

## Section ROC plot
exe_modl <- names(modelobject$trainedModels)
pl_glmnet <- pl_logreg <- pl_randomForest <- pl_ranger <- pl_xgboost <- pl_rpart <- FALSE

for(j in exe_modl){
  assign(paste0("pl_",j), TRUE)
}

## variable importance
vi_randomForest <- vi_ranger <- vi_xgboost <- vi_rpart <- vi_logreg <- vi_glmnet <- FALSE
for(j in exe_modl){
  if(j == "randomForest") assign(paste0("vi_",j), TRUE)
  if(j == "ranger") assign(paste0("vi_",j), TRUE)
  if(j == "xgboost") assign(paste0("vi_",j), TRUE)
  if(j == "glmnet") assign(paste0("vi_",j), TRUE)
  if(j == "logreg") assign(paste0("vi_",j), TRUE)
  if(j == "rpart") assign(paste0("vi_",j), TRUE)
}

Training data set

t1 <- t1[t1$Value!=0,]; rownames(t1) <- NULL
kable(t1)

Validation data set

t2 <- t2[t2$Value!=0,]; rownames(t2) <- NULL
kable(t2)

Scoring data set

if(!is.null(t3)) {
  t3 <- t3[t3$Value!=0,]; rownames(t3) <- NULL
  kable(t3)
} else {
    cat("No score data set")
  }

DriveML Model selected parameters

  kable(mdata)

List of Machine learning classification algorithm used

  kable(drmodel)

Model Performance comparision

Summary table

Table has Model fitting time and performance metric like AUC, Accuaracy, Precision, Recall and F1 score

  kable(result)

ROC curve

#masterModel <- modelobject$trainedModels[["glmnet"]]
#masterModel$modelPlots$TrainROC
#masterModel$modelPlots$TestROC
print("DD")

masterModel <- modelobject$trainedModels[["logreg"]]
masterModel$modelPlots$TrainROC
masterModel$modelPlots$TestROC

masterModel <- modelobject$trainedModels[["randomForest"]]
masterModel$modelPlots$TrainROC
masterModel$modelPlots$TestROC

masterModel <- modelobject$trainedModels[["ranger"]]
masterModel$modelPlots$TrainROC
masterModel$modelPlots$TestROC

masterModel <- modelobject$trainedModels[["xgboost"]]
masterModel$modelPlots$TrainROC
masterModel$modelPlots$TestROC

masterModel <- modelobject$trainedModels[["rpart"]]
masterModel$modelPlots$TrainROC
masterModel$modelPlots$TestROC

Variable importance or coefficients

masterModel <- modelobject$trainedModels[["xgboost"]]
masterModel$modelPlots$VarImp[[1]]

masterModel <- modelobject$trainedModels[["randomForest"]]
masterModel$modelPlots$VarImp[[1]]

masterModel <- modelobject$trainedModels[["ranger"]]
masterModel$modelPlots$VarImp[[1]]

masterModel <- modelobject$trainedModels[["rpart"]]
masterModel$modelPlots$VarImp[[1]]

masterModel <- modelobject$trainedModels[["glmnet"]]
masterModel$modelPlots$VarImp[[1]]

masterModel <- modelobject$trainedModels[["logreg"]]
masterModel$modelPlots$VarImp[[1]]

Best Model Explainability

Used lift charts and PDP plots

Lift charts and table

Lift chart

modelobject$modelexp$Lift_plot

Lift table

Top decile (2%) lift catpured by model level

cc <- modelobject$modelexp$Lift_data
cc1 <- cc[cc$groups==1, ]; rownames(cc1) <- NULL
cc2 <- cc[cc$groups==5, ]; rownames(cc2) <- NULL
ccd <-  data.frame(model = cc1$model, top_2 = cc1$lift, top_10 = cc2$lift)
kable(ccd)

Partial Dependency Plots (PDP)

Note: Plot available for top important variables

lapply(names(modelobject$modelexp$pdp$plots), function(x) {cc = modelobject$modelexp$pdp$plots[[x]]; cc})

Sample view of predicted score - validation set

cc <- modelobject$predicted_score$test
cc <-  data.frame(cc[1:10, ])
kable(cc)

Any scripts or data that you put into this service are public.

DriveML documentation built on Dec. 2, 2022, 5:14 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

DriveML
Self-Drive Machine Learning Projects

DriveML: Machine Learning Projects
In DriveML: Self-Drive Machine Learning Projects

Machine Learning Classification Model

Summary of trained data and model function

DriveML Model selected parameters

List of Machine learning classification algorithm used

Model Performance comparision

Summary table

ROC curve

Variable importance or coefficients

Best Model Explainability

Lift charts and table

Lift chart

Lift table

Partial Dependency Plots (PDP)

Sample view of predicted score - validation set

Try the DriveML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DriveML Self-Drive Machine Learning Projects

DriveML: Machine Learning Projects In DriveML: Self-Drive Machine Learning Projects

Machine Learning Classification Model

Summary of trained data and model function

DriveML Model selected parameters

List of Machine learning classification algorithm used

Model Performance comparision

Summary table

ROC curve

Variable importance or coefficients

Best Model Explainability

Lift charts and table

Lift chart

Lift table

Partial Dependency Plots (PDP)

Sample view of predicted score - validation set

Try the DriveML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DriveML
Self-Drive Machine Learning Projects

DriveML: Machine Learning Projects
In DriveML: Self-Drive Machine Learning Projects