| PF_EM | R Documentation |
Method to estimate the hyper parameters with an EM algorithm.
PF_EM( formula, data, model = "logit", by, max_T, id, a_0, Q_0, Q, order = 1, control = PF_control(...), trace = 0, seed = NULL, type = "RW", fixed = NULL, random = NULL, Fmat, fixed_effects, G, theta, J, K, psi, phi, ... )
formula |
|
data |
|
model |
either |
by |
interval length of the bins in which parameters are fixed. |
max_T |
end of the last interval interval. |
id |
vector of ids for each row of the in the design matrix. |
a_0 |
vector a_0 for the initial coefficient vector for the first iteration (optional). Default is estimates from static model (see |
Q_0 |
covariance matrix for the prior distribution. |
Q |
initial covariance matrix for the state equation. |
order |
order of the random walk. |
control |
see |
trace |
argument to get progress information. Zero will yield no info and larger integer values will yield incrementally more information. |
seed |
seed to set at the start of every EM iteration. See
|
type |
type of state model. Either |
fixed |
two-sided |
random |
one-sided |
Fmat |
starting value for F when |
fixed_effects |
starting values for fixed effects if any. See
|
G, theta, J, K, psi, phi |
parameters for a restricted |
... |
optional way to pass arguments to |
Estimates a state model of the form
α_t = F α_t + Rε_t, \qquad ε_t \sim N(0, Q)
where F\in{\rm I\!R}^{p\times p} has full rank, α_t\in {\rm I\!R}^p, ε_t\in {\rm I\!R}^r, r ≤q p, and R = (e_{l_1},e_{l_2},…,e_{l_r}) where e_k is column from the p dimensional identity matrix and l_1<l_2<…<l_r. The time zero state is drawn from
α_0\sim N(a_0, Q_0)
with Q_0 \in {\rm I\!R}^{p\times p}. The latent states, α_t, are related to the output through the linear predictors
η_{it} = X_t(R^+α_t) + Z_tβ
where X_t\in{\rm I\!R}^{n_t\times r} and Z_t{\rm I\!R}^{n_t\times c} are design matrices and the outcome for a individual i at time t is distributed according to an exponential family member given η_{it}. β are constant coefficients.
See vignette("Particle_filtering", "dynamichazard") for details.
An object of class PF_EM.
The function is still under development so the output and API may change.
PF_forward_filter to get a more precise estimate of
the final log-likelihood.
See the examples at https://github.com/boennecd/dynamichazard/tree/master/examples.
#####
# Fit model with lung data set from survival
# Warning: long-ish computation time
library(dynamichazard)
.lung <- lung[!is.na(lung$ph.ecog), ]
# standardize
.lung$age <- scale(.lung$age)
# fit
set.seed(43588155)
pf_fit <- PF_EM(
Surv(time, status == 2) ~ ddFixed(ph.ecog) + age,
data = .lung, by = 50, id = 1:nrow(.lung),
Q_0 = diag(1, 2), Q = diag(.5^2, 2),
max_T = 800,
control = PF_control(
# these number should be larger! Small for CRAN checks
N_fw_n_bw = 100L, N_first = 250L, N_smooth = 100L,
n_max = 50, eps = .001, Q_tilde = diag(.2^2, 2), est_a_0 = FALSE,
n_threads = 2))
# Plot state vector estimates
plot(pf_fit, cov_index = 1)
plot(pf_fit, cov_index = 2)
# Plot log-likelihood
plot(pf_fit$log_likes)
######
# example with fixed intercept
# prepare data
temp <- subset(pbc, id <= 312, select=c(id, sex, time, status, edema, age))
pbc2 <- tmerge(temp, temp, id=id, death = event(time, status))
pbc2 <- tmerge(pbc2, pbcseq, id=id, albumin = tdc(day, albumin),
protime = tdc(day, protime), bili = tdc(day, bili))
pbc2 <- pbc2[, c("id", "tstart", "tstop", "death", "sex", "edema",
"age", "albumin", "protime", "bili")]
pbc2 <- within(pbc2, {
log_albumin <- log(albumin)
log_protime <- log(protime)
log_bili <- log(bili)
})
# standardize
for(c. in c("age", "log_albumin", "log_protime", "log_bili"))
pbc2[[c.]] <- drop(scale(pbc2[[c.]]))
# fit model with extended Kalman filter
ddfit <- ddhazard(
Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) +
ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili,
pbc2, Q_0 = 100, Q = 1e-2, by = 100, id = pbc2$id,
model = "exponential", max_T = 3600,
control = ddhazard_control(eps = 1e-5, NR_eps = 1e-4, n_max = 1e4))
summary(ddfit)
# fit model with particle filter
set.seed(88235076)
pf_fit <- PF_EM(
Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) +
ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili,
pbc2, Q_0 = 2^2, Q = ddfit$Q * 100, # use estimate from before
by = 100, id = pbc2$id,
model = "exponential", max_T = 3600,
control = PF_control(
# these number should be larger! Small for CRAN checks
N_fw_n_bw = 100, N_smooth = 250, N_first = 100, eps = 1e-3,
method = "AUX_normal_approx_w_cloud_mean", est_a_0 = FALSE,
Q_tilde = as.matrix(.1^2),
n_max = 25, # just take a few iterations as an example
n_threads = 2))
# compare results
plot(ddfit)
plot(pf_fit)
sqrt(ddfit$Q * 100)
sqrt(pf_fit$Q)
rbind(ddfit$fixed_effects, pf_fit$fixed_effects)
#####
# simulation example with `random` and `fixed` argument and a restricted
# model
# g groups with k individuals in each
g <- 3L
k <- 400L
# matrices for state equation
p <- g + 1L
G <- matrix(0., p^2, 2L)
for(i in 1:p)
G[i + (i - 1L) * p, 1L + (i == p)] <- 1L
theta <- c(.9, .8)
# coefficients in transition density
(F. <- matrix(as.vector(G %*% theta), 4L, 4L))
J <- matrix(0., ncol = 2L, nrow = p)
J[-p, 1L] <- J[p, 2L] <- 1
psi <- c(log(c(.3, .1)))
K <- matrix(0., p * (p - 1L) / 2L, 2L)
j <- 0L
for(i in (p - 1L):1L){
j <- j + i
K[j, 2L] <- 1
}
K[K[, 2L] < 1, 1L] <- 1
phi <- log(-(c(.8, .3) + 1) / (c(.8, .3) - 1))
V <- diag(exp(drop(J %*% psi)))
C <- diag(1, ncol(V))
C[lower.tri(C)] <- 2/(1 + exp(-drop(K %*% phi))) - 1
C[upper.tri(C)] <- t(C)[upper.tri(C)]
(Q <- V %*% C %*% V) # covariance matrix in transition density
cov2cor(Q)
Q_0 <- get_Q_0(Q, F.) # time-invariant covariance matrix
beta <- c(rep(-6, g), 0) # all groups have the same long run mean intercept
# simulate state variables
set.seed(56219373)
n_periods <- 300L
alphas <- matrix(nrow = n_periods + 1L, ncol = p)
alphas[1L, ] <- rnorm(p) %*% chol(Q_0)
for(i in 1:n_periods + 1L)
alphas[i, ] <- F. %*% alphas[i - 1L, ] + drop(rnorm(p) %*% chol(Q))
alphas <- t(t(alphas) + beta)
# plot state variables
matplot(alphas, type = "l", lty = 1)
# simulate individuals' outcome
n_obs <- g * k
df <- lapply(1:n_obs, function(i){
# find the group
grp <- (i - 1L) %/% (n_obs / g) + 1L
# left-censoring
tstart <- max(0L, sample.int((n_periods - 1L) * 2L, 1) - n_periods + 1L)
# covariates
x <- c(1, rnorm(1))
# outcome (stop time and event indicator)
osa <- NULL
oso <- NULL
osx <- NULL
y <- FALSE
for(tstop in (tstart + 1L):n_periods){
sigmoid <- 1 / (1 + exp(- drop(x %*% alphas[tstop + 1L, c(grp, p)])))
if(sigmoid > runif(1)){
y <- TRUE
break
}
if(.01 > runif(1L) && tstop < n_periods){
# sample new covariate
osa <- c(osa, tstart)
tstart <- tstop
oso <- c(oso, tstop)
osx <- c(osx, x[2])
x[2] <- rnorm(1)
}
}
cbind(
tstart = c(osa, tstart), tstop = c(oso, tstop),
x = c(osx, x[2]), y = c(rep(FALSE, length(osa)), y), grp = grp,
id = i)
})
df <- data.frame(do.call(rbind, df))
df$grp <- factor(df$grp)
# fit model. Start with "cheap" iterations
fit <- PF_EM(
fixed = Surv(tstart, tstop, y) ~ x, random = ~ grp + x - 1,
data = df, model = "logit", by = 1L, max_T = max(df$tstop),
Q_0 = diag(1.5^2, p), id = df$id, type = "VAR",
G = G, theta = c(.5, .5), J = J, psi = log(c(.1, .1)),
K = K, phi = log(-(c(.4, 0) + 1) / (c(.4, 0) - 1)),
control = PF_control(
N_fw_n_bw = 100L, N_smooth = 100L, N_first = 500L,
method = "AUX_normal_approx_w_cloud_mean",
nu = 5L, # sample from multivariate t-distribution
n_max = 60L, averaging_start = 50L,
smoother = "Fearnhead_O_N", eps = 1e-4, covar_fac = 1.2,
n_threads = 2L # depends on your cpu(s)
),
trace = 1L)
plot(fit$log_likes) # log-likelihood approximation at each iterations
# you can take more iterations by uncommenting the following
# cl <- fit$call
# ctrl <- cl[["control"]]
# ctrl[c("N_fw_n_bw", "N_smooth", "N_first", "n_max",
# "averaging_start")] <- list(500L, 2000L, 5000L, 200L, 30L)
# cl[["control"]] <- ctrl
# cl[c("phi", "psi", "theta")] <- list(fit$phi, fit$psi, fit$theta)
# fit_extra <- eval(cl)
plot(fit$log_likes) # log-likelihood approximation at each iteration
# check estimates
sqrt(diag(fit$Q))
sqrt(diag(Q))
cov2cor(fit$Q)
cov2cor(Q)
fit$F
F.
# plot predicted state variables
for(i in 1:p){
plot(fit, cov_index = i)
abline(h = 0, lty = 2)
lines(1:nrow(alphas) - 1, alphas[, i] - beta[i], lty = 3)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.