#' Fitting 1-D Log-Gaussian Cox Process with lag.
#'
#' @export
#'
#' @param events A list of vectors of timestamps, one vector for each observed process.
#' @param max_lag Lag parameter constrained to lie within [-max_lag, max_lag].
#' @param time_unit Unit of measurement for the timestamps.
#' @param likelihood Either 'poisson' or 'coalescent'. If 'coalescent', param EVENTS must contain
#' vectors named 'coal_times', 'samp_times', and 'n_sampled'.
#' @param n_bins Number of bins to use for binning the events.
#' @param max_n_frequencies Maximum number of cosine and sine basis functions to use in spectral representation.
#' @param min_percent_padding How far to extend the domain of the latent (periodic) Gaussian process.
#' @param prob_quantiles Which quantiles of the latent parameters should be returned?
#' @param return_stanfit If TRUE, the returned list includes the stanfit object containing all MCMC samples.
#' @param fitting_args Additional list of arguments that affect the marginal GMO fitting and MCMC simulation code.
#'
#' @useDynLib laggedLGCP, .registration = TRUE
#'
#' @examples
#' # To simulate a coalescent process with lagged sampling times
#' sim <- sim_lag_coalescent(lag=-1, c=1, beta=2, scaling=0.05)
#' fit <- fit_LGCP_lag(sim$events, max_lag=3, likelihood='coalescent')
#' # Plot inferred and 'true' effective population size trajectory
#' par(mfrow=c(3,1))
#' plot_coal_result(fit, traj=sim$rate_functions$coal_fun, main="Lagged Preferential Sampling", ylim=c(0.5,15))
#' # Plot non-Pref. sampling and PS without lag inferences``
#' plot_BNPR(BNPR(sim$events, lengthout = 100), traj=sim$rate_functions$coal_fun, main="No PS", ylim=c(0.5,15))
#' plot_BNPR(BNPR_PS(sim$events, lengthout = 100), traj=sim$rate_functions$coal_fun, main="PS without lag", ylim=c(0.5,15))
fit_LGCP_lag <- function(events,
max_lag,
time_unit = 'Weeks',
likelihood = 'poisson',
n_bins = 100,
max_n_frequencies = 1023,
min_percent_padding = 10.0,
prob_quantiles = c(.025, .25),
return_stanfit = FALSE,
fitting_args = list()) {
if (all(sort(names(events)) == c('coal_times', 'n_sampled', 'samp_times')))
likelihood <- 'coalescent'
stopifnot(any(likelihood == c('poisson', 'coalescent')))
stopifnot(max_lag > 0)
stopifnot(max(prob_quantiles) < 0.5)
# set defaults for GMO and stan fitting
fitting_defaults <- list(n_chains = 1,
n_samples = 1000,
refresh = 10,
gmo_iter = 100,
gmo_draws = 15,
gmo_tol = 1e-4,
gmo_eta = 1.,
gmo_init_lengthscale = -1,
gmo_max_block_size = 256)
# Override defaults if specified
for (arg_name in names(fitting_args)) {
stopifnot(any(arg_name == names(fitting_defaults)))
fitting_defaults[[arg_name]] <- fitting_args[[arg_name]]
}
# adjust starting lengthscale value if unspecified
if (fitting_defaults$gmo_init_lengthscale <= 0) {
fitting_defaults$gmo_init_lengthscale <- 2 * max_lag
}
# fill out probability quantiles with median and symmetric values
prob_quantiles <- c(prob_quantiles, 0.5, rev(1 - prob_quantiles))
# Make sure event data is a list of numeric vectors
stopifnot(all(vapply(events, function(x) is.numeric(x) & is.vector(x), logical(1))))
# Bin the event data
if (likelihood == 'poisson') {
pars <- bin_poisson(dat$events, n_bins=n_bins)
} else { # have coalescent data
pars <- bin_coal(events$coal_times, events$samp_times, events$n_sampled, n_bins)
}
# Set the prior on lag parameter to N(mu = 0, sigma = max_lag / 2)
pars$max_lag <- max_lag
# number of bins WITH padding -- must be a power of 2.
pars$Ntot <- nextpow2(pars$N, min_percent_padding)
# how many hyperparameters to optimize
pars$npars <- 4
# number of frequencies to use
pars$n_nonzero_freq <- min(max_n_frequencies, floor(pars$Ntot/2 - 1))
# marginal process fitting function
marginal_optimize <- function(k) {
ind_pars <- split_data(pars, k)
if (k==2 && pars$coalescent) {
# second process (sampling) does not have the coalescent likelihood.
ind_pars$coalescent = 0
}
print(ind_pars$coalescent)
sfit=sampling(stanmodels$single_gp_fixed_gmo, data = c(ind_pars, list(GMO_FLAG = FALSE, fixed_phi = double())),
chains = 0, iter = 1)
opt_res <- gmo(full_model=sfit,
data = ind_pars,
iter = as.integer(fitting_defaults$gmo_iter),
tol = fitting_defaults$gmo_tol,
eta = fitting_defaults$gmo_eta,
draws = as.integer(fitting_defaults$gmo_draws),
init = get_init(ind_pars, lengthscale=fitting_defaults$gmo_init_lengthscale),
max_block_size = fitting_defaults$gmo_max_block_size)
u_opt <- opt_res$par
return(c(mu=u_opt[1], sigma=exp(u_opt[2]), lengthscale=exp(u_opt[3]), nu=exp(u_opt[4])))
}
# optimize hyperparameters of each marginal latent GP
hyp_opt <- vapply(1:pars$n_series, FUN=marginal_optimize, FUN.VALUE=numeric(pars$npars))
# Package the hyperparameters together
pars <- c(pars, as.list(data.frame(t(hyp_opt))))
# Now train joint GP stan model, with the above fixed hyperparameters
fit <- sampling(stanmodels$multiple_gp_nonsep_fixed,
data=pars,
chains=fitting_defaults$n_chains,
iter=fitting_defaults$n_samples,
control=list(adapt_delta=0.95),
refresh=fitting_defaults$refresh,
pars=c("re", "im", "x", "cor"), include=FALSE) # ignore high-dimensional latent variables
# extract quantiles for all posterior samples
samples <- rstan::extract(fit, permuted=TRUE)
rates <- samples$rates
quantiles <- list()
for (param in names(samples)) {
samp <- samples[[param]]
if (is.vector(samp) || length(samp) == dim(samp)[1])
quantiles[[param]] <- quantile(samp, probs = prob_quantiles)
else {
n_dim <- length(dim(samp))
quantiles[[param]] <- apply(samp, MARGIN=2:n_dim,
FUN=function(x) quantile(x, probs=prob_quantiles))
}
}
print(quantiles$shifts)
# Naive cross-correlation estimate of lag
shifts_ccf <- rep(0, pars$n_series - 1)
for (k in 1:(pars$n_series - 1)) {
crosscor <- ccf(pars$counts[, pars$n_series], pars$counts[,k], plot=FALSE)
shifts_ccf[k] <- crosscor$lag[which.max(crosscor$acf)] * pars$delta
}
# Data to return
res <- list(quantiles = quantiles, shifts_ccf = shifts_ccf, data = pars,
events=events)
if (return_stanfit)
res$stanfit <- fit
return(res)
}
#' Return marginal GP hyperparameters fitted by optimizing the marginal likelihood.
#'
#' @export
#' @param res Result from fit_LGCP_lag() function.
#'
get_hypers <- function(res)
return(res$data[c('mu', 'sigma', 'lengthscale', 'nu')])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.