Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 6, fig.height = 4
)
## ----install------------------------------------------------------------------
# install.packages("devtools")
# devtools::install_github("drkowal/SeBR")
library(SeBR)
## ----sims---------------------------------------------------------------------
set.seed(123) # for reproducibility
# Simulate data from a transformed linear model:
dat = simulate_tlm(n = 200, # number of observations
p = 10, # number of covariates
g_type = 'step' # type of transformation (here, positive data)
)
# Training data:
y = dat$y; X = dat$X
# Testing data:
y_test = dat$y_test; X_test = dat$X_test
## ----sblm---------------------------------------------------------------------
# Fit the semiparametric Bayesian linear model:
fit = sblm(y = y,
X = X,
X_test = X_test)
names(fit) # what is returned
## ----ppd, echo = FALSE--------------------------------------------------------
# Posterior predictive checks on testing data: empirical CDF
# Construct a plot w/ the unique testing values:
y0 = sort(unique(y_test))
plot(y0, y0, type='n', ylim = c(0,1),
xlab='y', ylab='F_y', main = 'Posterior predictive ECDF: testing data')
# Add the ECDF of each simulated predictive dataset
temp = sapply(1:nrow(fit$post_ypred), function(s)
lines(y0, ecdf(fit$post_ypred[s,])(y0),
col='gray', type ='s'))
# Add the ECDF for the observed testing data
lines(y0, ecdf(y_test)(y0), # ECDF of testing data
col='black', type = 's', lwd = 3)
## ----pi-----------------------------------------------------------------------
# Evaluate posterior predictive means and intervals on the testing data:
plot_pptest(fit$post_ypred,
y_test,
alpha_level = 0.10) # coverage should be >= 90%
## ----comp, echo = FALSE-------------------------------------------------------
# Summarize the transformation:
# post_g contains draws of g evaluated at the sorted and unique y-values:
y0 = sort(unique(y))
# Construct a plot:
plot(y0, fit$post_g[1,], type='n', ylim = range(fit$post_g),
xlab = 'y', ylab = 'g(y)', main = "Posterior draws of the transformation")
# Add the posterior draws of g:
temp = sapply(1:nrow(fit$post_g), function(s)
lines(y0, fit$post_g[s,], col='gray')) # posterior draws
# Add the posterior mean of g:
lines(y0, colMeans(fit$post_g), lwd = 3) # posterior mean
# Add the true transformation:
lines(y, dat$g_true, type='p', pch=2) # true transformation
legend('bottomright', c('Truth'), pch = 2) # annotate the true transformation
## ----comp-theta---------------------------------------------------------------
# Summarize the parameters (regression coefficients):
# Posterior means:
coef(fit)
# Check: correlation with true coefficients
cor(dat$beta_true, coef(fit))
# 95% credible intervals:
theta_ci = t(apply(fit$post_theta, 2, quantile, c(.025, 0.975)))
# Check: agreement on nonzero coefficients?
which(theta_ci[,1] >= 0 | theta_ci[,2] <=0) # 95% CI excludes zero
which(dat$beta_true != 0) # truly nonzero
## ----sims-qr------------------------------------------------------------------
# Simulate data from a heteroskedastic linear model (no transformation):
dat = simulate_tlm(n = 200, # number of observations
p = 10, # number of covariates
g_type = 'box-cox', lambda = 1, # no transformation
heterosked = TRUE # heteroskedastic errors
)
# Training data:
y = dat$y; X = dat$X
# Testing data:
y_test = dat$y_test; X_test = dat$X_test
## ----pack-qr, message = FALSE, warning = FALSE--------------------------------
library(quantreg) # traditional QR for initialization
library(statmod) # for rinvgauss sampling
## ----fit-qr-------------------------------------------------------------------
# Quantile to target:
tau = 0.05
# (Traditional) Bayesian quantile regression:
fit_bqr = bqr(y = y,
X = X,
tau = tau,
X_test = X_test,
verbose = FALSE # omit printout
)
# Semiparametric Bayesian quantile regression:
fit = sbqr(y = y,
X = X,
tau = tau,
X_test = X_test,
verbose = FALSE # omit printout
)
names(fit) # what is returned
## ----ppd-bqr, echo = FALSE----------------------------------------------------
# Posterior predictive checks on testing data: empirical CDF
# Construct a plot w/ the unique testing values:
y0 = sort(unique(y_test))
plot(y0, y0, type='n', ylim = c(0,1),
xlab='y', ylab='F_y',
main = 'Posterior predictive ECDF: testing data (black) \n bqr (red) vs. sbqr (gray)')
# Add the ECDF of each simulated predictive (bqr) dataset
temp = sapply(1:nrow(fit_bqr$post_ypred), function(s)
lines(y0, ecdf(fit_bqr$post_ypred[s,])(y0),
col='red', type ='s'))
# Same, for sbqr:
temp = sapply(1:nrow(fit$post_ypred), function(s)
lines(y0, ecdf(fit$post_ypred[s,])(y0),
col='gray', type ='s'))
# Add the ECDF for the observed testing data
lines(y0, ecdf(y_test)(y0), # ECDF of testing data
col='black', type = 's', lwd = 3)
## ----bqr-test-----------------------------------------------------------------
# Quantile point estimates:
q_hat_bqr = fitted(fit_bqr)
# Empirical quantiles on testing data:
(emp_quant_bqr = mean(q_hat_bqr >= y_test))
# Evaluate posterior predictive means and intervals on the testing data:
(emp_cov_bqr = plot_pptest(fit_bqr$post_ypred,
y_test,
alpha_level = 0.10))
## ----sbqr-test----------------------------------------------------------------
# Quantile point estimates:
q_hat = fitted(fit)
# Empirical quantiles on testing data:
(emp_quant_sbqr = mean(q_hat >= y_test))
# Evaluate posterior predictive means and intervals on the testing data:
(emp_cov_sbqr = plot_pptest(fit$post_ypred,
y_test,
alpha_level = 0.10))
## ----sim-gp-------------------------------------------------------------------
# Training data:
n = 200 # sample size
x = seq(0, 1, length = n) # observation points
# Testing data:
n_test = 1000
x_test = seq(0, 1, length = n_test)
# True inverse transformation:
g_inv_true = function(z)
qbeta(pnorm(z),
shape1 = 0.5,
shape2 = 0.1) # approx Beta(0.5, 0.1) marginals
# Training observations:
y = g_inv_true(
sin(2*pi*x) + sin(4*pi*x) + .25*rnorm(n)
)
# Testing observations:
y_test = g_inv_true(
sin(2*pi*x_test) + sin(4*pi*x_test) + .25*rnorm(n)
)
plot(x_test, y_test,
xlab = 'x', ylab = 'y',
main = "Training (gray) and testing (black) data")
lines(x, y, type='p', col='gray', pch = 2)
## ----pack-gp, message = FALSE, warning = FALSE--------------------------------
library(GpGp) # fast GP computing
library(fields) # accompanies GpGp
## ----fit-bc-------------------------------------------------------------------
# Fit the Box-Cox Gaussian process model:
fit_bc = bgp_bc(y = y,
locs = x,
locs_test = x_test)
# Fitted values on the testing data:
y_hat_bc = fitted(fit_bc)
# 90% prediction intervals on the testing data:
pi_y_bc = t(apply(fit_bc$post_ypred, 2, quantile, c(0.05, .95)))
# Average PI width:
(width_bc = mean(pi_y_bc[,2] - pi_y_bc[,1]))
# Empirical PI coverage:
(emp_cov_bc = mean((pi_y_bc[,1] <= y_test)*(pi_y_bc[,2] >= y_test)))
# Plot these together with the actual testing points:
plot(x_test, y_test, type='n',
ylim = range(pi_y_bc, y_test), xlab = 'x', ylab = 'y',
main = paste('Fitted values and prediction intervals: \n Box-Cox Gaussian process'))
# Add the intervals:
polygon(c(x_test, rev(x_test)),
c(pi_y_bc[,2], rev(pi_y_bc[,1])),
col='gray', border=NA)
lines(x_test, y_test, type='p') # actual values
lines(x_test, y_hat_bc, lwd = 3) # fitted values
## ----fit-gp-------------------------------------------------------------------
# library(GpGp) # loaded above
# Fit the semiparametric Gaussian process model:
fit = sbgp(y = y,
locs = x,
locs_test = x_test)
names(fit) # what is returned
coef(fit) # estimated regression coefficients (here, just an intercept)
## ----oos-gp-------------------------------------------------------------------
# Fitted values on the testing data:
y_hat = fitted(fit)
# 90% prediction intervals on the testing data:
pi_y = t(apply(fit$post_ypred, 2, quantile, c(0.05, .95)))
# Average PI width:
(width = mean(pi_y[,2] - pi_y[,1]))
# Empirical PI coverage:
(emp_cov = mean((pi_y[,1] <= y_test)*(pi_y[,2] >= y_test)))
# Plot these together with the actual testing points:
plot(x_test, y_test, type='n',
ylim = range(pi_y, y_test), xlab = 'x', ylab = 'y',
main = paste('Fitted values and prediction intervals: \n semiparametric Gaussian process'))
# Add the intervals:
polygon(c(x_test, rev(x_test)),
c(pi_y[,2], rev(pi_y[,1])),
col='gray', border=NA)
lines(x_test, y_test, type='p') # actual values
lines(x_test, y_hat, lwd = 3) # fitted values
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.