PF_EM | R Documentation |
Method to estimate the hyper parameters with an EM algorithm.
PF_EM( formula, data, model = "logit", by, max_T, id, a_0, Q_0, Q, order = 1, control = PF_control(...), trace = 0, seed = NULL, type = "RW", fixed = NULL, random = NULL, Fmat, fixed_effects, G, theta, J, K, psi, phi, ... )
formula |
|
data |
|
model |
either |
by |
interval length of the bins in which parameters are fixed. |
max_T |
end of the last interval interval. |
id |
vector of ids for each row of the in the design matrix. |
a_0 |
vector a_0 for the initial coefficient vector for the first iteration (optional). Default is estimates from static model (see |
Q_0 |
covariance matrix for the prior distribution. |
Q |
initial covariance matrix for the state equation. |
order |
order of the random walk. |
control |
see |
trace |
argument to get progress information. Zero will yield no info and larger integer values will yield incrementally more information. |
seed |
seed to set at the start of every EM iteration. See
|
type |
type of state model. Either |
fixed |
two-sided |
random |
one-sided |
Fmat |
starting value for F when |
fixed_effects |
starting values for fixed effects if any. See
|
G, theta, J, K, psi, phi |
parameters for a restricted |
... |
optional way to pass arguments to |
Estimates a state model of the form
α_t = F α_t + Rε_t, \qquad ε_t \sim N(0, Q)
where F\in{\rm I\!R}^{p\times p} has full rank, α_t\in {\rm I\!R}^p, ε_t\in {\rm I\!R}^r, r ≤q p, and R = (e_{l_1},e_{l_2},…,e_{l_r}) where e_k is column from the p dimensional identity matrix and l_1<l_2<…<l_r. The time zero state is drawn from
α_0\sim N(a_0, Q_0)
with Q_0 \in {\rm I\!R}^{p\times p}. The latent states, α_t, are related to the output through the linear predictors
η_{it} = X_t(R^+α_t) + Z_tβ
where X_t\in{\rm I\!R}^{n_t\times r} and Z_t{\rm I\!R}^{n_t\times c} are design matrices and the outcome for a individual i at time t is distributed according to an exponential family member given η_{it}. β are constant coefficients.
See vignette("Particle_filtering", "dynamichazard")
for details.
An object of class PF_EM
.
The function is still under development so the output and API may change.
PF_forward_filter
to get a more precise estimate of
the final log-likelihood.
See the examples at https://github.com/boennecd/dynamichazard/tree/master/examples.
##### # Fit model with lung data set from survival # Warning: long-ish computation time library(dynamichazard) .lung <- lung[!is.na(lung$ph.ecog), ] # standardize .lung$age <- scale(.lung$age) # fit set.seed(43588155) pf_fit <- PF_EM( Surv(time, status == 2) ~ ddFixed(ph.ecog) + age, data = .lung, by = 50, id = 1:nrow(.lung), Q_0 = diag(1, 2), Q = diag(.5^2, 2), max_T = 800, control = PF_control( # these number should be larger! Small for CRAN checks N_fw_n_bw = 100L, N_first = 250L, N_smooth = 100L, n_max = 50, eps = .001, Q_tilde = diag(.2^2, 2), est_a_0 = FALSE, n_threads = 2)) # Plot state vector estimates plot(pf_fit, cov_index = 1) plot(pf_fit, cov_index = 2) # Plot log-likelihood plot(pf_fit$log_likes) ###### # example with fixed intercept # prepare data temp <- subset(pbc, id <= 312, select=c(id, sex, time, status, edema, age)) pbc2 <- tmerge(temp, temp, id=id, death = event(time, status)) pbc2 <- tmerge(pbc2, pbcseq, id=id, albumin = tdc(day, albumin), protime = tdc(day, protime), bili = tdc(day, bili)) pbc2 <- pbc2[, c("id", "tstart", "tstop", "death", "sex", "edema", "age", "albumin", "protime", "bili")] pbc2 <- within(pbc2, { log_albumin <- log(albumin) log_protime <- log(protime) log_bili <- log(bili) }) # standardize for(c. in c("age", "log_albumin", "log_protime", "log_bili")) pbc2[[c.]] <- drop(scale(pbc2[[c.]])) # fit model with extended Kalman filter ddfit <- ddhazard( Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) + ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili, pbc2, Q_0 = 100, Q = 1e-2, by = 100, id = pbc2$id, model = "exponential", max_T = 3600, control = ddhazard_control(eps = 1e-5, NR_eps = 1e-4, n_max = 1e4)) summary(ddfit) # fit model with particle filter set.seed(88235076) pf_fit <- PF_EM( Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) + ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili, pbc2, Q_0 = 2^2, Q = ddfit$Q * 100, # use estimate from before by = 100, id = pbc2$id, model = "exponential", max_T = 3600, control = PF_control( # these number should be larger! Small for CRAN checks N_fw_n_bw = 100, N_smooth = 250, N_first = 100, eps = 1e-3, method = "AUX_normal_approx_w_cloud_mean", est_a_0 = FALSE, Q_tilde = as.matrix(.1^2), n_max = 25, # just take a few iterations as an example n_threads = 2)) # compare results plot(ddfit) plot(pf_fit) sqrt(ddfit$Q * 100) sqrt(pf_fit$Q) rbind(ddfit$fixed_effects, pf_fit$fixed_effects) ##### # simulation example with `random` and `fixed` argument and a restricted # model # g groups with k individuals in each g <- 3L k <- 400L # matrices for state equation p <- g + 1L G <- matrix(0., p^2, 2L) for(i in 1:p) G[i + (i - 1L) * p, 1L + (i == p)] <- 1L theta <- c(.9, .8) # coefficients in transition density (F. <- matrix(as.vector(G %*% theta), 4L, 4L)) J <- matrix(0., ncol = 2L, nrow = p) J[-p, 1L] <- J[p, 2L] <- 1 psi <- c(log(c(.3, .1))) K <- matrix(0., p * (p - 1L) / 2L, 2L) j <- 0L for(i in (p - 1L):1L){ j <- j + i K[j, 2L] <- 1 } K[K[, 2L] < 1, 1L] <- 1 phi <- log(-(c(.8, .3) + 1) / (c(.8, .3) - 1)) V <- diag(exp(drop(J %*% psi))) C <- diag(1, ncol(V)) C[lower.tri(C)] <- 2/(1 + exp(-drop(K %*% phi))) - 1 C[upper.tri(C)] <- t(C)[upper.tri(C)] (Q <- V %*% C %*% V) # covariance matrix in transition density cov2cor(Q) Q_0 <- get_Q_0(Q, F.) # time-invariant covariance matrix beta <- c(rep(-6, g), 0) # all groups have the same long run mean intercept # simulate state variables set.seed(56219373) n_periods <- 300L alphas <- matrix(nrow = n_periods + 1L, ncol = p) alphas[1L, ] <- rnorm(p) %*% chol(Q_0) for(i in 1:n_periods + 1L) alphas[i, ] <- F. %*% alphas[i - 1L, ] + drop(rnorm(p) %*% chol(Q)) alphas <- t(t(alphas) + beta) # plot state variables matplot(alphas, type = "l", lty = 1) # simulate individuals' outcome n_obs <- g * k df <- lapply(1:n_obs, function(i){ # find the group grp <- (i - 1L) %/% (n_obs / g) + 1L # left-censoring tstart <- max(0L, sample.int((n_periods - 1L) * 2L, 1) - n_periods + 1L) # covariates x <- c(1, rnorm(1)) # outcome (stop time and event indicator) osa <- NULL oso <- NULL osx <- NULL y <- FALSE for(tstop in (tstart + 1L):n_periods){ sigmoid <- 1 / (1 + exp(- drop(x %*% alphas[tstop + 1L, c(grp, p)]))) if(sigmoid > runif(1)){ y <- TRUE break } if(.01 > runif(1L) && tstop < n_periods){ # sample new covariate osa <- c(osa, tstart) tstart <- tstop oso <- c(oso, tstop) osx <- c(osx, x[2]) x[2] <- rnorm(1) } } cbind( tstart = c(osa, tstart), tstop = c(oso, tstop), x = c(osx, x[2]), y = c(rep(FALSE, length(osa)), y), grp = grp, id = i) }) df <- data.frame(do.call(rbind, df)) df$grp <- factor(df$grp) # fit model. Start with "cheap" iterations fit <- PF_EM( fixed = Surv(tstart, tstop, y) ~ x, random = ~ grp + x - 1, data = df, model = "logit", by = 1L, max_T = max(df$tstop), Q_0 = diag(1.5^2, p), id = df$id, type = "VAR", G = G, theta = c(.5, .5), J = J, psi = log(c(.1, .1)), K = K, phi = log(-(c(.4, 0) + 1) / (c(.4, 0) - 1)), control = PF_control( N_fw_n_bw = 100L, N_smooth = 100L, N_first = 500L, method = "AUX_normal_approx_w_cloud_mean", nu = 5L, # sample from multivariate t-distribution n_max = 60L, averaging_start = 50L, smoother = "Fearnhead_O_N", eps = 1e-4, covar_fac = 1.2, n_threads = 2L # depends on your cpu(s) ), trace = 1L) plot(fit$log_likes) # log-likelihood approximation at each iterations # you can take more iterations by uncommenting the following # cl <- fit$call # ctrl <- cl[["control"]] # ctrl[c("N_fw_n_bw", "N_smooth", "N_first", "n_max", # "averaging_start")] <- list(500L, 2000L, 5000L, 200L, 30L) # cl[["control"]] <- ctrl # cl[c("phi", "psi", "theta")] <- list(fit$phi, fit$psi, fit$theta) # fit_extra <- eval(cl) plot(fit$log_likes) # log-likelihood approximation at each iteration # check estimates sqrt(diag(fit$Q)) sqrt(diag(Q)) cov2cor(fit$Q) cov2cor(Q) fit$F F. # plot predicted state variables for(i in 1:p){ plot(fit, cov_index = i) abline(h = 0, lty = 2) lines(1:nrow(alphas) - 1, alphas[, i] - beta[i], lty = 3) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.