knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(ggplot2)
library(dplyr)
library(stringr)
library(dbarts)
library(possum)

library(cowplot) # Better default plotting
theme_set(theme_minimal_grid())

Load and prepare California housing data

data(calif)
glimpse(calif)

## Outcome: log median house values by census tract
y <- log(calif$Median_house_value) 

## Dataframe of regressors, and log income and pop
califDf <- calif %>%
  dplyr::select(Median_household_income,
                POPULATION,
                Median_rooms,
                LONGITUDE,
                LATITUDE) %>%
  mutate(Median_household_income = log(Median_household_income),
         POPULATION = log(POPULATION))

## Convert regressors to matrix
x <- califDf %>% as.matrix()

Estimate Bayesian nonparametric regression model

set.seed(1)

mybart <- bart(x, y, ndpost=1000, nskip=1000, nchain = 1)

## Posterior and posterior mean for fitted values
yhat <- mybart$yhat.train.mean
yhatSamples <- t(mybart$yhat.train)

Construct and plot additive summary

First, define the summary. Here, we look at the individual additive effects of each of the five covariates.

summaryform <- yhat ~ s(Median_household_income) + s(POPULATION) +
  s(Median_rooms) + s(LONGITUDE) + s(LATITUDE)

Now, we estimate and plot the summary.

## Check the dimensions of posterior of yhat (matrix) and the
## posterior mean (vector)

length(yhat)
dim(yhatSamples) #NOTE: Each *column* is a posterior draw

# mypossum <- additive_summary_fast(summaryform, yhatSamples, yhat, califDf, verbose=TRUE)
mypossum <- additive_summary(summaryform, yhatSamples, yhat, califDf, verbose=TRUE)

mypossum$gamDf %>% glimpse()
mypossum$triangleDf %>% glimpse()

additive_summary_plot(mypossum, windsor = 0.02)

Look at triangles:

additive_summary_triangle_plot(mypossum)
additive_summary_triangle_plot(mypossum, windsor=0.02)

Summary diagnostics

Summary R-sq, i.e., variance explained by summary.

hist(mypossum$summaryRsq, main = "", xlab = "Summary R-sq")


spencerwoody/possum documentation built on Aug. 5, 2022, 12:18 a.m.