require(rstan) # observe startup messages
require(mvtnorm)
require(matrixcalc)
require(tidyverse)
require(ggplot2)
require(gridExtra)
#' Drift term for the fOU process.
#'
#' @param y_n is a real number.
#' @param theta is a list with the parameters of the fOU process (where sigma is permanently 1):
#' \describe{
#' \item{H}{Hurst index which is between 0 and 1.}
#' \item{mu}{Mean of the fOU process which is greater than 0.}
#' \item{gamma}{Mean reverting speed of the process which is greater than 0.}
#' }
#' @return A real number.
#' @export
#'
fou_mu <- function(y_n, theta) {
-1 * theta$gamma * (y_n - theta$mu)
}
#' Autocorrelation function for the fOU process.
#'
#' @param theta is a list with the parameters of the fOU process (where sigma is permanently 1):
#' \describe{
#' \item{H}{Hurst index which is between 0 and 1.}
#' \item{mu}{Mean of the fOU process which is positive.}
#' \item{gamma}{Mean reverting speed of the process which is positive.}
#' }
#' @param dt is the interobservation time between each datapoint of the fOU process.
#' @param n Number of observations of the fOU process.
#' @return An `n` dimensional vector of covariances.
#' @details The toeplitz matrix of the autocorrelation function of the fOU process is defined by the following vector of covariances:
#' ```
#' V[i] = (abs(i)^(2H) + abs(i - 2)^(2H) - 2 x abs(i - 1)^(2H)
#'
#' ```
#' @export
fou_gamma <- function(theta, dt, n){
gamma <- rep(0, n)
H <- theta$H
for (i in 1:n) {
gamma[i] <- (dt^(2*H)/2) * (abs(i)^(2*H) + abs(i - 2)^(2*H) - 2*abs(i - 1)^(2*H))
}
gamma
}
#' Generate cSDE observations.
#'
#' @param N Number of observations.
#' @param X0 Initial cSDE value at time `t = 0`.
#' @param dt The interobservation time between each observation of the cSDE.
#' @param theta List of parameters of the cSDE.
#' @param mu_fun Drift term of the cSDE which is a function of Xt and theta.
#' @param sigma_fun Stochastic deviationof the cSDE which is a function of theta.
#' @param gamma_fun Autocorrelation of the cSDE which is a function of theta, dt and N.
#' @param fft Whether to use fast (but sometimes less stable) FFT simulation method. See [SuperGauss::rnormtz()].
#' @return A vector of `N+1` cSDE observations recorded at intervals of `dt` starting from `X0`.
#' @details Function provided by Prof. Lysy that simulates data generation for any coloured-noise stochastic differential equation.Tthanks to mlysy for writing this function!
#' @export
csde_sim <- function(N, dt, X0, theta,
mu_fun, sigma_fun, gamma_fun, fft = TRUE) {
# dXt = X_{t-1} + mu_fun(X_{t-1}, theta) * dt + sigma * dG
sig <- sigma_fun(theta) # diffusion
print(sig)
gam <- gamma_fun(theta, dt, N) # autocorrelation
dG <- SuperGauss::rnormtz(acf = gam, fft = fft) # noise increments
Xt <- rep(NA, N+1) # cSDE time series
Xt[1] <- X0 # initialize
for(ii in 1:N) {
# recursion
Xt[ii + 1] <- Xt[ii] + mu_fun(Xt[ii], theta) * dt + sig * dG[ii]
}
Xt
}
#' Generate fOU observations.
#'
#' @param N Number of observations to generate.
#' @param theta is a list with the parameters of the fOU process fOU process (where sigma is permanently 1):
#' \describe{
#' \item{H}{Hurst index which is between 0 and 1.}
#' \item{mu}{Mean of the fOU process which is positive.}
#' \item{gamma}{Mean reverting speed of the process which is positive.}
#' }
#' @param X0 Initial fOU value at time `t = 0`.
#' @param delta_t The interobservation time between each observation of the fOU process.
#' @return A vector of `N+1` fOU observations recorded at intervals of `dt` starting from `X0`.
#' @details Generates data for the fOU which is characterized by the following equation:
#' \deqn{
#' dX_t = \gamma(X_t - \mu)*\Delta t + \sigma B^{H}_t
#' }
#' @export
fOU_sim <- function(N, theta, X0, delta_t) {
Xt <- csde_sim(N, delta_t, X0, theta, fou_mu, function(theta) { theta$sigma }, fou_gamma)
list(
Xt = Xt,
theta = theta,
X0 = X0,
delta_t = delta_t
)
}
#' Fit an fOU process.
#'
#' Fits an fOU process to the given data using a pre-compiled stan mode.
#'
#' @param fOU_data is a list of
#' \describe{
#' \item{Xt}{Vector of observations at level k.}
#' \item{delta_t}{The interobservation time between each observation of the fOU process.}
#' \item{X0}{Initial fOU value at time `t = 0`.}
#' }
#' @param K The level of euler-approximation which is a positive number less than N.
#' @param iter Maximum number of total iterations.
#' @param control List of control parameter for rstan::sampling. See [rstan::sampling()] for more details.
#' @return A stanfit object that is fitted to `c(X0,Xt)`.
#' @export
fit_fOU_process <- function(fOU_data, K, iter = 2e3, control = list(adapt_delta = 0.95)) {
Xt <- fOU_data$Xt
delta_t <- fOU_data$delta_t
X0 <- fOU_data$X0
N <- length(Xt) - 1 # remove X0
# when we're using k-level approximations, the stan model
# requires us to specify which timesteps have data, and which timesteps don't have data.
ii_obs <- seq(K, N * K, K)
ii_mis <- setdiff(seq(1, N * K), ii_obs)
fit <- rstan::sampling(
stanmodels$test_fou_process,
data = list(
Xt = tail(Xt, N),
N = N,
K = K,
delta_t = delta_t,
X0 = X0,
ii_obs = ii_obs,
ii_mis = ii_mis
),
control = control,
iter = iter,
init = function() { list(H=fOU_data$theta$H, gamma=fOU_data$theta$gamma, mu=fOU_data$theta$mu)} # added nice initial values to make things converge faster.
)
fit
}
#' Fit multiple levels of an fOU process.
#'
#' Fits an fOU process to the given data across multiple k-level approximations using a pre-compiled stan mode.
#'
#' @param fOU_data fOU_data object returned by fOU_sim.
#' @param Ks is vector of levels for the k-level approximations to be fitted.
#' @return A list of `Ks` stanfit objects that is fitted to `c(X0,Xt)` for each level k.
#' @export
fit_fOU_multiple_K <- function(fOU_data, Ks) {
all_samples <- data.frame()
fits <- sapply(Ks, function(K) {
fit_fOU_process(fOU_data, K)
})
for (i in 1:length(Ks)) {
post_samples <- rstan::extract(fits[[i]])
param_samples <- as.data.frame(post_samples)[c('H', 'gamma', 'mu')]
param_samples$K <- Ks[i]
all_samples <- rbind(all_samples, param_samples)
}
all_samples <- dplyr::as_tibble(all_samples)
list(post_samples=all_samples, fits=fits)
}
#' Log-density of cSDE observations.
#'
#' Calculates the log-density of `p(Xt | theta)`, where `Xt` are observations of a cSDE recorded at interobservation time `dt`.
#' Calculate log density of cSDE observations.
#'
#' @param Xt A vector of cSDE observations.
#' @param dt The interobservation time between each observation of the cSDE.
#' @param theta List of parameters of the cSDE
#' @param mu_fun Drift term of the cSDE which is a function of Xt and theta.
#' @param sigma_fun Stochastic deviationof the cSDE which is a function of theta.
#' @param gamma_fun Autocorrelation of the cSDE which is a function of theta, dt and N.
#' @details Calculates the log-density of `p(Xt | theta)`, where `Xt` are observations of a cSDE recorded at interobservation time `dt`. Thanks to mlysy for writing this function!
#' @return A scalar containing the log-density of the cSDE evaluated at its arguments.
#' @export
csde_logdens <- function(Xt, dt, theta,
mu_fun, sigma_fun, gamma_fun) {
dX <- diff(Xt)
N <- length(dX)
mu <- mu_fun(Xt[1:N], theta) # drift
sig <- sigma_fun(theta) # diffusion
gam <- gamma_fun(theta, dt, N) # autocorrelation
dG <- (dX - mu * dt) / sig # noise increments
NTz <- SuperGauss::NormalToeplitz$new(N) # instantiate NTz distribution
ld <- NTz$logdens(dG, acf = gam)
ld - N * log(sig) # jacobian for change-of-variables dX <-> dG
}
#' Log-density of fOU observations.
#'
#' @param Xt A vector of fOU observations.
#' @param delta_t is the interobservation time between each observation of the fOU process.
#' @param theta is a list with the parameters of the fOU process fOU process (where sigma is permanently 1):
#' \describe{
#' \item{H}{Hurst index which is between 0 and 1.}
#' \item{mu}{Mean of the fOU process which is positive.}
#' \item{gamma}{Mean reverting speed of the process which is positive.}
#' }
#' @return A scalar containing the log-density of the fOU evaluated at its arguments.
#' @export
fou_logdens <- function(Xt, delta_t, theta) {
csde_logdens(Xt, delta_t, theta, fou_mu, function(theta) { theta$sigma }, fou_gamma) +
dunif(theta$H, 0, 1, log = TRUE) + # H ~ uniform(1)
dunif(theta$gamma, 0, 2, log = TRUE) +
dnorm(theta$mu, 0, 10, log = TRUE) +
dunif(theta$sigma, 0, 10, log=TRUE)
}
#' Plot likelihoods of fOU parameters.
#'
#' Plots log likelihoods for each of the fOU parameters - H, gamma and mu.
#'
#' @param fOU_data is a list of
#' \describe{
#' \item{Xt}{Vector of observations at level k.}
#' \item{delta_t}{The interobservation time between each observation of the fOU process.}
#' \item{X0}{Initial fOU value at time `t = 0`.}
#' }
#' @param fit Stan fit object fitted on the `fOU_data`.
#' @param K The level of euler-approximation which is a positive number less than N.
#' @param thresholds List of thresholds for mu, gamma and H to perturb each term to grid search over.
#' @return Plot of the likelihoods of each parameter.
#' @details Check that theta_hat is a global AND local maximum.This is done by essentially grid searching one parameter while holding everything else constant.This is expensive, but feasible since our parameter constraints are fairly tight.
#' @export
plot_likelihoods <- function(fOU_data, fit, K, thresholds=list(mu=0.1, gamma=0.1, H = 0.05)) {
post_samples <- rstan::extract(fit, pars=c("gamma", "mu", "H"))
Xtk_samples <- rstan::extract(fit)$Xt_k
Xtk_hat <- apply(Xtk_samples, 2, mean)
mu_hat <- mean(post_samples$mu)
H_hat <- mean(post_samples$H)
gamma_hat <- mean(post_samples$gamma)
theta_hat <- list(gamma = gamma_hat, mu = mu_hat, H = H_hat)
delta_t <- fOU_data$delta_t / K
# global checks
range_H <- seq(0.0, 1, length.out = 1000)
loglikelihoods <- sapply(range_H, function(H) { fou_logdens(Xtk_hat, delta_t, theta = list(mu=theta_hat$mu, gamma=theta_hat$gamma, H = H) )})
plot(range_H, loglikelihoods, xlab = "H", main = "Global L(H|Xt)")
abline(v = theta_hat$H, col = "red")
range_gamma <- seq(0, 2, length.out = 1000)
loglikelihoods <- sapply(range_gamma, function(gamma) { fou_logdens(Xtk_hat, delta_t, theta = list(mu=theta_hat$mu, gamma=gamma, H = theta_hat$H) )})
plot(range_gamma, loglikelihoods, xlab = "gamma", main = "Global L(gamma|Xt)")
abline(v = theta_hat$gamma, col = "red")
range_mu <- seq(-5, 5, length.out = 1000)
loglikelihoods <- sapply(range_mu, function(mu) { fou_logdens(fOU_data$Xt, delta_t, theta = list(mu=mu, gamma=theta_hat$gamma, H = theta_hat$H) )})
plot(range_mu, loglikelihoods, xlab = "mu", main = "Global L(mu|Xt)")
abline(v = theta_hat$mu, col = "red")
# local checks
range_H <- seq(theta_hat$H - thresholds$H, theta_hat$H + thresholds$H, length.out = 1000)
loglikelihoods <- sapply(range_H, function(H) { fou_logdens(Xtk_hat, delta_t, theta = list(mu=theta_hat$mu, gamma=theta_hat$gamma, H = H) )})
plot(range_H, loglikelihoods, xlab = "H", main = "Local L(H|Xt)")
abline(v = theta_hat$H, col = "red")
range_gamma <- seq(theta_hat$gamma - thresholds$gamma, theta_hat$gamma + thresholds$gamma, length.out = 1000)
loglikelihoods <- sapply(range_gamma, function(gamma) { fou_logdens(Xtk_hat, delta_t, theta = list(mu=theta_hat$mu, gamma=gamma, H = theta_hat$H) )})
plot(range_gamma, loglikelihoods, xlab= "gamma", main = "Local L(gamma|Xt)")
abline(v = theta_hat$gamma, col = "red")
range_mu <- seq(theta_hat$mu - thresholds$mu, theta_hat$mu + thresholds$mu, length.out = 1000)
loglikelihoods <- sapply(range_mu, function(mu) { fou_logdens(fOU_data$Xt, delta_t, theta = list(mu=mu, gamma=theta_hat$gamma, H = theta_hat$H) )})
plot(range_mu, loglikelihoods, xlab = "mu", main = "Local L(mu|Xt)")
abline(v = theta_hat$mu, col = "red")
}
#' Given fOU_data, calculates the observed fBM increments wrt theta
#'
#' @param fOU_data fOU_data object returned from fOU_sim
#' @param theta parameters used to generate fOU_data
#' @return Vector of points corresponding to the fBM increments
#' @export
get_dGs <- function(fOU_data, theta) {
Xt <- fOU_data$Xt
dX <- diff(Xt)
N <- length(dX)
mu <- fou_mu(Xt[1:N], theta) # drift
gam <- fou_gamma(theta, fOU_data$delta_t, N) # autocorrelation
dG <- (dX - mu * fOU_data$delta_t) / theta$sigma # noise increments
dG
}
#' Draws a dG_future sample from the distribution dG_future | dG_obs
#' @param dGs_obs observed dG values
#' @param n_points number of dG points to sample
#' @param theta sampled parameters from the posterior distribution
#' @param delta_t delta_t
#' @return sampled dG values at time [t, .... t + n_points]
#' @export
get_dGs_future <- function(dGs_obs, n_points, theta, delta_t) {
N <- length(dGs_obs)
big_toep <- toeplitz(fou_gamma(theta, delta_t, N + n_points))
sigma_11 <- big_toep[1:n_points, 1:n_points]
sigma_12 <- big_toep[1:n_points, (n_points+1):(N+n_points)]
sigma_21 <- big_toep[(n_points+1):(N+n_points), 1:n_points]
sigma_22 <- big_toep[(n_points+1):(N+n_points), (n_points+1):(N+n_points)]
# dg_future | dg_obs= g ~ Normal(sigma_12 * inverse(sigma_22) * g, sigma_11 - sigma_12 * inverse(sigma_22) * sigma_21)
# TODO: use solveV
inv_sigma_22 <- solve(sigma_22)
cond_sigma <- sigma_11 - sigma_12 %*% inv_sigma_22 %*% sigma_21
cond_mu <- sigma_12 %*% solve(sigma_22) %*% dGs_obs
dGs_future <- mvtnorm::rmvnorm(1, cond_mu, cond_sigma)
dGs_future
}
#' Make predictions using an fOU fit.
#'
#' Given a Stan fit object, it predicts the next n data points using the parameters' posterior distributions.
#'
#' @param fOU_data fOU_data object returned from fOU_sim
#' @param fit Stan fit object of an fOU process.
#' @param X0 Initial fOU value at time `t = 0`.
#' @param delta_t Interobservation time between each observation of the fOU process.
#' @param n_points Number of points to predict.
#' @param n_samples Number of sample paths to simulate.
#' @return list object with the observed dG values (dG_obs | theta = theta_sample), future dG values (dG_future | dG_obs), and the predictions (Xt_future)
#' @details Draws parameters from the posterior distributions and simulates fOU data. The new fOU data is then aggregated to produce prediction confidence intervals. This is an expensive computation.
#' @export
fOU_predict <- function(fOU_data, fit, X0, delta_t, n_points, n_samples) {
# each row contains a sample path taken by the fitted fOU process
dGs_obs_matrix <- matrix(nrow = n_samples, ncol = length(fOU_data$Xt) - 1)
dGs_future_matrix <- matrix(nrow = n_samples, ncol = n_points)
pred_matrix <- matrix(nrow = n_samples, ncol = n_points + 1)
posterior_samples <- rstan::extract(fit, pars=c("mu", "gamma", "H"))
for (i in 1:n_samples) {
# draw parameters from posterior distribution
mu <- sample(posterior_samples$mu, 1)
gamma <- sample(posterior_samples$gamma, 1)
H <- sample(posterior_samples$H, 1)
theta <- list(mu=mu, gamma=gamma, H=H)
# get observed dGs
dGs_obs <- get_dGs(fOU_data, theta)
dGs_future <- get_dGs_future(dGs_obs, n_points, theta, fOU_data$delta_t)
dGs_future_matrix[i,] <- dGs_future
dGs_obs_matrix[i,] <- dGs_obs
sig <- theta$sigma
# construct Xt_future
Xt_future <- rep(0, n_points+1) # cSDE time series
Xt_future[1] <- X0 # initialize
for(ii in 1:n_points) {
# recursion
Xt_future[ii + 1] <- Xt_future[ii] + fou_mu(Xt_future[ii], theta) * delta_t + sig * dGs_future[ii]
}
pred_matrix[i, ] <- Xt_future
}
list(pred_matrix=pred_matrix, dGs_future_matrix=dGs_future_matrix, dGs_obs_matrix=dGs_obs_matrix)
}
#' Plot prediction intervals.
#'
#' Given fOU_data, uses fOU_predict to generate confidence intervals, then plots n sample paths from the `true`` fOU parameters.
#' @param fOU_data is a list of
#' \describe{
#' \item{Xt}{Vector of observations at level k.}
#' \item{delta_t}{The interobservation time between each observation of the fOU process.}
#' \item{X0}{Initial fOU value at time `t = 0`.}
#' }
#' @param pred_matrix Predictions of Xt. Each row is a sample.
#' @return Plot of prediction intervals of each of the fOU parameters.
#' @export
plot_prediction_interval <- function(fOU_data, pred_matrix) {
Xt <- tail(fOU_data$Xt, 20)
N <- length(Xt)
n_points <- ncol(pred_matrix)
plot(NULL, ylim=c(-20,20), xlim = c(1, N + n_points), xlab = "time (t)", ylab = "Xt", main = "Predictions with sample paths")
points(1:N, Xt)
# 90% confidence interval
CI <- apply(pred_matrix, 2, function(ts) { quantile(ts, probs = c(0.05, 0.95)) })
polygon(c(N:(N + n_points - 1), rev(N:(N + n_points - 1))), c(CI[1,], rev(CI[2,])), col = "slateblue1", border = NA)
# 50% confidence interval
CI <- apply(pred_matrix, 2, function(ts) { quantile(ts, probs = c(0.25, 0.75)) })
polygon(c(N:(N + n_points - 1), rev(N:(N + n_points - 1))), c(CI[1,], rev(CI[2,])), col = "slateblue4", border = NA)
}
#' Plot confindence intervals.
#'
#' Given fOU_data, uses fOU_predict to generate confidence intervals, then plots n sample paths from the `true`` fOU parameters.
#' @param fOU_data is a list of
#' \describe{
#' \item{Xt}{Vector of observations at level k.}
#' \item{delta_t}{The interobservation time between each observation of the fOU process.}
#' \item{X0}{Initial fOU value at time `t = 0`.}
#' }
#' @param fit Stan fit object of an fOU process.
#' @param K The level of euler-approximation which is a positive number less than N.
#' @param N Number of observations of the fOU process.
#' @param title Title of the plot.
#' @return Plot of confidence intervals of each of the fOU parameters.
#' @export
plot_CI <- function(fOU_data, fit, K, N, title = "Xt and 99% CIs of interpolated points") {
t_k <- (1 : (K * N)) / K
CI_lower <- apply(as.matrix(rstan::extract(fit)$Xt_k), 2, function(ts) { quantile(ts, probs = c(0.01)) })
CI_upper <- apply(as.matrix(rstan::extract(fit)$Xt_k), 2, function(ts) { quantile(ts, probs = c(0.99)) })
plot(NULL, xlim=c(0, N), ylim=c(min(fOU_data$Xt[1:N]), max(fOU_data$Xt[1:N])), xlab="time (t)", ylab="X(t)", main = title)
polygon(c(t_k, rev(t_k)), c(CI_upper[1:(K * N)], rev(CI_lower[1:(K * N)])), col = "grey90", border = NA)
points(1:N, fOU_data$Xt[2:(N + 1)], col="black")
}
#' Plot posterior distribution for parameters.
#'
#' Given a tibble of posterior samples (from fit_fOU_process), calculate the posterior distribution and plot them.
#'
#' @param post_tb Tibble of posterior samples.
#' @param fOU_data data object returned by fOU_sim
#' @return Plot of the posterior distribution of each parameter of the fOU process.
#' @export
plot_param_posterior_distributions <- function(post_tb, fOU_data) {
p1 <- ggplot(data=post_tb) +
geom_density(mapping=aes(x=post_tb$H, group=post_tb$K, color = post_tb$K)) +
geom_vline(xintercept = fOU_data$theta$H)
p2 <- ggplot(data=post_tb) +
geom_density(mapping=aes(x=post_tb$gamma, group=post_tb$K, color = post_tb$K)) +
geom_vline(xintercept = fOU_data$theta$gamma)
p3 <- ggplot(data=post_tb) +
geom_density(mapping=aes(x=post_tb$mu, group=post_tb$K, color = post_tb$K)) +
geom_vline(xintercept = fOU_data$theta$mu)
grid.arrange(p1, p2, p3, nrow=3)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.