knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(ggplot2) library(dplyr) library(stringr) library(dbarts) library(possum) library(cowplot) # Better default plotting theme_set(theme_minimal_grid())
data(calif) glimpse(calif) ## Outcome: log median house values by census tract y <- log(calif$Median_house_value) ## Dataframe of regressors, and log income and pop califDf <- calif %>% dplyr::select(Median_household_income, POPULATION, Median_rooms, LONGITUDE, LATITUDE) %>% mutate(Median_household_income = log(Median_household_income), POPULATION = log(POPULATION)) ## Convert regressors to matrix x <- califDf %>% as.matrix()
set.seed(1) mybart <- bart(x, y, ndpost=1000, nskip=1000, nchain = 1) ## Posterior and posterior mean for fitted values yhat <- mybart$yhat.train.mean yhatSamples <- t(mybart$yhat.train)
First, define the summary. Here, we look at the individual additive effects of each of the five covariates.
summaryform <- yhat ~ s(Median_household_income) + s(POPULATION) + s(Median_rooms) + s(LONGITUDE) + s(LATITUDE)
Now, we estimate and plot the summary.
## Check the dimensions of posterior of yhat (matrix) and the ## posterior mean (vector) length(yhat) dim(yhatSamples) #NOTE: Each *column* is a posterior draw # mypossum <- additive_summary_fast(summaryform, yhatSamples, yhat, califDf, verbose=TRUE) mypossum <- additive_summary(summaryform, yhatSamples, yhat, califDf, verbose=TRUE) mypossum$gamDf %>% glimpse() mypossum$triangleDf %>% glimpse() additive_summary_plot(mypossum, windsor = 0.02)
Look at triangles:
additive_summary_triangle_plot(mypossum) additive_summary_triangle_plot(mypossum, windsor=0.02)
Summary R-sq, i.e., variance explained by summary.
hist(mypossum$summaryRsq, main = "", xlab = "Summary R-sq")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.