knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" )
The goal of ngboost is to provide an R interface for the Python package NGBoost.
"NGBoost is a method for probabilistic prediction with competitive state-of-the-art performance on a variety of datasets. NGBoost combines a multiparameter boosting algorithm with the natural gradient to efficiently estimate how parameters of the presumed outcome distribution vary with the observed features. NGBoost performs as well as existing methods for probabilistic regression but retains major advantages: NGBoost is flexible, scalable, and easy-to-use." (From the paper, Duan, et at., 2019, see here)
The development version from GitHub with:
# install.packages("devtools") devtools::install_github("Akai01/ngboost")
A probabilistic regression example on the Boston housing dataset:
library(ngboost) data(Boston, package = "MASS") dta <- rsample::initial_split(Boston) train <- rsample::training(dta) test <- rsample::testing(dta) x_train = train[,1:13] y_train = train[,14] x_test = test[,1:13] y_test = test[,14] model <- NGBRegression$new(Dist = Dist("Exponential"), Base = sklearner(), Score = Scores("MLE"), natural_gradient =TRUE, n_estimators = 600, learning_rate = 0.002, minibatch_frac = 0.8, col_sample = 0.9, verbose = TRUE, verbose_eval = 100, tol = 1e-5) model$fit(X = x_train, Y = y_train, X_val = x_test, Y_val = y_test) model$feature_importances() model$plot_feature_importance() model$predict(x_test)%>%head() distt <- model$pred_dist(x_test) # it returns a NGBDist class(distt) ?NGBDist # see the available methods distt$interval(confidence = .9)
Classification example:
data(BreastCancer, package = "mlbench") dta <- na.omit(BreastCancer) dta <- rsample::initial_split(dta) train <- rsample::training(dta) test <- rsample::testing(dta) x_train = train[,2:10] y_train = as.integer(train[,11]) x_test = test[,2:10] y_test = as.integer(test[,11]) model <- NGBClassifier$new(Dist = Dist("k_categorical", k = 3), Base = sklearner(), Score = Scores("LogScore"), natural_gradient = TRUE, n_estimators = 100, tol = 1e-5, random_state = NULL) model$fit(x_train, y_train, X_val = x_test, Y_val = y_test) model$feature_importances() model$plot_feature_importance() model$predict(x_test) model$predict_proba(x_test)%>%head()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.