rmarkdown::find_pandoc(dir  = "/usr/local/bin/")
knitr::opts_chunk$set(echo = FALSE)
library(ggplot2)
library(devtools)
library(data.table)
load_all()
df1 <- bikes_atom[bikes_atom$method == "BREGLOG", ]
df1[df1$t == 667, "lpdens"] <- 0 # sandy correction

Introduction

This document looks at the regression agent using the bikeshare data. A script for generating the predictions (which is needed to run this code) can be found in data-raw.

Predictive mean

df <- data.frame(
    true_val = bikes_d_log$logcnt[-c(1:200)],
    lm_fitted = lm(logcnt ~ . -t, data = bikes_d_log)$fitted[-c(1:200)],
    bart_fitted = df1$pmean,
    time = bikes_d_log$t[201:length(bikes_d_log$t)],
    group = rep(1:5, each = 106)
)

dfx <- melt(
    data.table(df),
    measure = c("lm_fitted", "bart_fitted", "true_val"))

plt <- ggplot(dfx, aes(y = value, x = time, col = variable)) +
    geom_line() +
    facet_wrap(~group, ncol = 1, scales = "free_x") +
    labs(
        title = "Predictive mean of regression (lm for comparison)",
        x = "Time",
        y = "Log count"
    )
plt

Log predictive density

Plotting the lpdens of regression, we run into the problem of Sandy which completely messes up the scale.

df <- data.frame(
    true_val = bikes_d_log$logcnt[-c(1:200)],
    bart_lpdens = df1$lpdens,
    time = bikes_d_log$t[201:length(bikes_d_log$t)],
    group = rep(1:5, each = 106)
)

dfx <- melt(
    data.table(df),
    measure = c("bart_lpdens", "true_val"))

plt <- ggplot(dfx, aes(y = value, x = time, col = variable)) +
    geom_line() +
    facet_wrap(~group, ncol = 2, scales = "free_x") +
    labs(
        title = "lpdens regression",
        x = "Time",
        y = "Log count"
    )
plt

We set the Sandy lpdens to zero.

df <- data.frame(
    true_val = bikes_d_log$logcnt[-c(1:200)],
    bart_lpdens = df1$lpdens,
    time = bikes_d_log$t[201:length(bikes_d_log$t)],
    group = rep(1:5, each = 106)
)

dfx <- melt(
    data.table(df),
    measure = c("bart_lpdens", "true_val"))

plt <- ggplot(dfx, aes(y = value, x = time, col = variable)) +
    geom_line() +
    facet_wrap(~group, ncol = 1, scales = "free_x") +
    labs(
        title = "lpdens regression",
        x = "Time",
        y = "Log count"
    )
plt

Local predictive ability

sotw_all <- cbind(
    subset(bikes_d_log, select = c(t, temp, hum, windspeed)),
    time = bikes_d_log$t / sqrt(var(bikes_d_log$t))
)
local_predictive_ability(df1, sotw_all)


ooelrich/oscbvar documentation built on Sept. 8, 2021, 3:31 p.m.