Nothing
#' @title Calculate leave-one-out residuals
#'
#' @description Calculates quantile residuals using the predictive distribution from
#' a jacknife (i.e., leave-one-out predictive distribution)
#'
#' @param object Output from \code{\link{dsem}}
#' @param nsim Number of simulations to use if \code{family!="fixed"} for some variable,
#' such that simulation residuals are required.
#' @param what whether to return quantile residuals, or samples from the leave-one-out predictive
#' distribution of data, or a table of leave-one-out predictions and standard errors for the
#' latent state
#' @param track_progress whether to track runtimes on terminal
#' @param ... Not used
#'
#' @details
#' Conditional quantile residuals cannot be calculated when using \code{family = "fixed"}, because
#' state-variables are fixed at available measurements and hence the conditional distribution is a Dirac
#' delta function. One alternative is to use leave-one-out residuals, where we calculate the predictive distribution
#' for each state value when dropping the associated observation, and then either use that as the
#' predictive distribution, or sample from that predictive distribution and then calculate
#' a standard quantile distribution for a given non-fixed family. This appraoch is followed here.
#' It is currently only implemented when all variables follow \code{family = "fixed"}, but
#' could be generalized to a mix of families upon request.
#'
#' @return
#' A matrix of residuals, with same order and dimensions as argument \code{tsdata}
#' that was passed to \code{dsem}.
#'
#' @export
loo_residuals <-
function( object,
nsim = 100,
what = c("quantiles","samples","loo"),
track_progress = TRUE,
... ){
# Extract and make object
what = match.arg(what)
tsdata = object$internal$tsdata
parlist = object$obj$env$parList()
df = expand.grid( time(tsdata), colnames(tsdata) )
df$obs = as.vector(tsdata)
df = na.omit(df)
df = cbind( df, est=NA, se=NA )
# Loop through observations
for(r in 1:nrow(df) ){
if( (r %% floor(nrow(df)/10))==1 ){
if(isTRUE(track_progress)) message("Running leave-one-out fit ",r," of ",nrow(df)," at ",Sys.time())
}
ts_r = tsdata
ts_r[match(df[r,1],time(tsdata)),match(df[r,2],colnames(tsdata))] = NA
#
control = object$internal$control
control$quiet = TRUE
control$run_model = FALSE
# Build inputs
#fit_r = dsem( sem = object$internal$sem,
# tsdata = ts_r,
# family = object$internal$family,
# estimate_delta0 = object$internal$estimate_delta0,
# control = control )
fit_r = list( tmb_inputs = object$tmb_inputs )
Data = fit_r$tmb_inputs$data
fit_r$tmb_inputs$map$x_tj = factor(ifelse( is.na(as.vector(ts_r)) | (Data$familycode_j[col(tsdata)] %in% c(1,2,3,4)), seq_len(prod(dim(ts_r))), NA ))
# Modify inputs
map = fit_r$tmb_inputs$map
parameters = fit_r$tmb_inputs$parameters
parameters[c("beta_z","lnsigma_j","mu_j","delta0_j")] = parlist[c("beta_z","lnsigma_j","mu_j","delta0_j")]
for( v in c("beta_z","lnsigma_j","mu_j","delta0_j") ){
map[[v]] = factor( rep(NA,length(as.vector(parameters[[v]]))))
}
# Build object
obj = TMB::MakeADFun( data = fit_r$tmb_inputs$data,
parameters = parameters,
random = NULL,
map = map,
profile = control$profile,
DLL="dsem",
silent = TRUE )
# Rerun and record
opt = nlminb( start = obj$par,
objective = obj$fn,
gradient = obj$gr )
sdrep = TMB::sdreport( obj )
df[r,'est'] = as.list(sdrep, what="Estimate")$x_tj[match(df[r,1],time(tsdata)),match(df[r,2],colnames(tsdata))]
df[r,'se'] = as.list(sdrep, what="Std. Error")$x_tj[match(df[r,1],time(tsdata)),match(df[r,2],colnames(tsdata))]
}
# Compute quantile residuals
resid_tjr = array( NA, dim=c(dim(tsdata),nsim) )
if( all(object$internal$family == "fixed") ){
# analytical quantile residuals
resid_tj = array( NA, dim=dim(tsdata) )
resid_tj[cbind(match(df[,1],time(tsdata)),match(df[,2],colnames(tsdata)))] = pnorm( q=df$obs, mean=df$est, sd=df$se )
# Simulations from predictive distribution for use in DHARMa etc
for(z in 1:nrow(df) ){
resid_tjr[match(df[z,1],time(tsdata)),match(df[z,2],colnames(tsdata)),] = rnorm( n=nsim, mean=df[z,'est'], sd=df[z,'se'] )
}
}else{
# Sample from leave-one-out predictive distribution for states
resid_rz = apply( df, MARGIN=1, FUN=function(vec){rnorm(n=nsim, mean=as.numeric(vec['est']), sd=as.numeric(vec['se']))} )
# Sample from predictive distribution of data given states
for(r in 1:nrow(resid_rz) ){
parameters = object$obj$env$parList()
parameters$x_tj[which(!is.na(tsdata))] = resid_rz[r,]
obj = TMB::MakeADFun( data = object$tmb_inputs$data,
parameters = parameters,
random = NULL,
map = object$tmb_inputs$map,
profile = NULL,
DLL="dsem",
silent = TRUE )
resid_tjr[,,r] = obj$simulate()$y_tj
}
# Calculate quantile residuals
resid_tj = apply( resid_tjr > outer(tsdata,rep(1,nsim)), MARGIN=1:2, FUN=mean )
}
if(what=="quantiles") return( resid_tj )
if(what=="samples") return( resid_tjr )
if(what=="loo") return( df )
}
#' @title Calculate total effects
#'
#' @description
#' Calculate a data frame of total effects, resulting from a pulse experiment
#' (i.e., an exogenous and temporary change in a single variable in time \code{t=0}) or
#' a press experiment (i.e., an exogenous and permanent change in a single variable
#' starting in time \code{t=0} and continuing for \code{n_lags} times), representing the
#' estimated effect of a change in any variable on every other variable and any time-lag
#' from 0 (simultaneous effects) to a user-specified maximum lag.
#'
#' @param object Output from \code{\link{dsem}}
#' @param n_lags Number of lags over which to calculate total effects
#' @param type Whether a pulse or press experiment is intended. A pulse experiment
#' answers the question: ``What happens if a variable is changed for only a single time-interval?"
#' A press experiment answers the question: ``What happens if a variable is permanently changed
#' starting in a given time-interval?
#'
#' @details
#' Total effects are taken from the Leontief matrix \eqn{\mathbf{(I-P)^{-1}}},
#' where \eqn{\mathbf{P}} is the path matrix across variables and times.
#' \eqn{\mathbf{(I-P)}^{-1} \mathbf{\delta} }
#' calculates the effect of a perturbation represented by vector \eqn{\mathbf{\delta}}
#' with length \eqn{n_{\mathrm{lags}} \times n_{\mathrm{J}}} where \eqn{n_{\mathrm{J}}} is the number of variables.
#' \eqn{\mathbf{(I-P)}^{-1} \mathbf{\delta} } calculates the total effect of
#' a given variable (from)
#' upon any other variable (to) either in the same time (\eqn{t=0}), or subsequent times
#' (\eqn{t \geq 1}), where \eqn{\mathbf{\delta} = \mathbf{i}_{\mathrm{T}} \otimes \mathbf{i}_{\mathrm{J}}},
#' where \eqn{\mathbf{i}_{\mathrm{J}}} is one for the \code{from} variable and zero otherwise.
#' For a pulse experiment, \eqn{\mathbf{i}_{\mathrm{T}}} is one at \eqn{t=0} and zero for other times.
#' For a press experiment, \eqn{\mathbf{i}_{\mathrm{T}}} is one for all times.
#'
#' We compute and list the total effect at each time from time \code{t=0}
#' to \code{t=n_lags-1}. For press experiments, this includes transient values as the the total effect
#' approaches its asymptotic value (if this exists) as \eqn{t} approaches infinity.
#' If the analyst wants an asymptotic effect from a press experiment, we recommend
#' using a high lag (e.g., \code{n_lags = 100}) and then confirming that it has
#' reached it's asymptote (i.e., the total effect is almost identical for the last
#' and next-to-last lag), and then reporting the value for that last lag.
#'
#' @return
#' A data frame listing the time-lag (lag), variable that is undergoing some
#' exogenous change (from), and the variable being impacted (to), along with the
#' total effect (total_effect) including direct and indirect pathways, and the
#' partial "direct" effect (direct_effect)
#'
#' @examples
#' ### EXAMPLE 1
#' # Define linear model with slope of 0.5
#' sem = "
#' # from, to, lag, name, starting_value
#' x -> y, 0, slope, 0.5
#' "
#' # Build DSEM with specified value for path coefficients
#' mod = dsem(
#' sem = sem,
#' tsdata = ts(data.frame(x=rep(0,20),y=rep(0,20))),
#' control = dsem_control( run_model = FALSE )
#' )
#'
#' # Show that total effect of X on Y from pulse experiment is 0.5 but does not propagate over time
#' pulse = total_effect(mod, n_lags = 2, type = "pulse")
#' subset( pulse, from=="x" & to=="y")
#'
#'
#' ### EXAMPLE 2
#' # Define linear model with slope of 0.5 and autocorrelated response
#' sem = "
#' x -> y, 0, slope, 0.5
#' y -> y, 1, ar_y, 0.8
#' "
#' mod = dsem(
#' sem = sem,
#' tsdata = ts(data.frame(x=rep(0,20),y=rep(0,20))),
#' control = dsem_control( run_model = FALSE )
#' )
#'
#' # Show that total effect of X on Y from pulse experiment is 0.5 with decay of 0.8 for each time
#' pulse = total_effect(mod, n_lags = 4, type = "pulse")
#' subset( pulse, from=="x" & to=="y")
#'
#' # Show that total effect of X on Y from press experiment asymptotes at 2.5
#' press = total_effect(mod, n_lags = 50, type = "press")
#' subset( press, from=="x" & to=="y")
#'
#' @export
total_effect <-
function( object,
n_lags = 4,
type = c("pulse","press") ){
#
type = match.arg(type)
# Unpack stuff
Z = object$internal$tsdata
if(is.null(object$internal$parhat)){
object$internal$parhat = object$obj$env$parList()
}
# Extract path matrix
P_kk = make_matrices(
beta_p = object$internal$parhat$beta,
model = object$sem_full,
times = seq_len(n_lags),
variables = colnames(Z)
)$P_kk
# Define innovations
if( type == "pulse" ){
delta_kj = kronecker( Diagonal(n=ncol(Z)),
sparseMatrix(i=1, j=1, x=1, dims=c(n_lags,1)) )
}
if( type == "press" ){
delta_kj = kronecker( Diagonal(n=ncol(Z)),
sparseMatrix(i=seq_len(n_lags), j=rep(1,n_lags), x=rep(1,n_lags), dims=c(n_lags,1)) )
}
IminusRho_kk = Diagonal(n=nrow(P_kk)) - P_kk
# Calculate partial effect
Partial_kj = P_kk %*% delta_kj
# Calculate total effect using sparse matrices
Total_kj = solve( IminusRho_kk, delta_kj )
# Make into data frame
out = expand.grid( "lag" = seq_len(n_lags)-1,
"to" = colnames(Z),
"from" = colnames(Z) )
out$total_effect = as.vector(Total_kj)
out$direct_effect = as.vector(Partial_kj)
return(out)
}
#' @title Partition variance in one variable due to another (EXPERIMENTAL)
#'
#' @description
#' Calculate the proportion of variance for a response variable that is
#' attributed to another set of predictor variables, calculated across lags from
#' from 0 (simultaneous effects) to a user-specified maximum lag.
#'
#' @param object Output from \code{\link{dsem}}
#' @param which_response string matching colnames from \code{tsdata}
#' identifying response variable
#' @param n_times Number of lags over which to calculate total effects
#'
#' @details
#' This function calculates the variance for each variable and lag, and then
#' recalculates it when setting exogenous variance to zero for all variables except
#' \code{which_pred}. It then calculates the ratio of the diagonal of these two.
#' This represents the proportion of variance in the full model that is attributable
#' to one or more variables.
#'
#' This function is under development and may still change or be removed.
#'
#' @return
#' A list with two elements:
#' \describe{
#' \item{total_variance}{A matrix of the total variance for each variable (column)
#' and each time from 1 to \code{n_times}}
#' \item{proportion_variance_explained}{A matrix of the proportion of variance
#' explained for variable \code{which_response} by each model variable
#' (column) and each time from 1 to \code{n_times}}
#' }
#' Note that in a model with lagged effects, the total_variance and variance_explained
#' will vary for each time (row), and the analyst might want to either choose a time
#' for which the value has stabilized.
#'
#' @examples
#' # Simulate linear model
#' x = rnorm(100)
#' y = 1 + 1 * x + rnorm(100)
#' data = data.frame(x=x, y=y)
#'
#' # Fit as DSEM
#' fit = dsem( sem = "x -> y, 0, beta",
#' tsdata = ts(data),
#' control = dsem_control(quiet=TRUE) )
#'
#' # Apply
#' partition_variance( fit,
#' which_response = "y",
#' n_times = 10 )
#'
#' @export
partition_variance <-
function( object,
which_response,
n_times = 10 ){
# Unpack stuff
Z = object$internal$tsdata
if(is.null(object$internal$parhat)){
object$internal$parhat = object$obj$env$parList()
}
# Error checks
if( !(which_response %in% colnames(Z)) ){
stop("`which_response` not found in colnames of `tsdata`")
}
# Extract path matrix
matrices = make_matrices(
beta_p = object$internal$parhat$beta,
model = object$sem_full,
times = seq_len(n_times),
variables = colnames(Z)
)
out = expand.grid(lag = seq_len(n_times), variable = colnames(Z) )
#
IminusP_kk = matrices$IminusP_kk
invIminusP_kk = Matrix::solve(IminusP_kk)
# Extract variance for fitted model
G_kk = matrices$G_kk
V_kk = Matrix::t(G_kk) %*% G_kk
Sigma0_kk = invIminusP_kk %*% V_kk %*% Matrix::t(invIminusP_kk)
# Zero out variances
match_vals = which( out$variable %in% which_response )
prop_tj = array(NA, dim=c(n_times,ncol(Z)), dimnames=list(paste0("t_",seq_len(n_times)),colnames(Z)))
for( which_pred in colnames(Z) ){
match_cols = which( out$variable %in% which_pred )
G0_kk = matrices$G_kk
G0_kk[,-match_cols] = 0
V0_kk = Matrix::t(G0_kk) %*% G0_kk
Sigma1_kk = invIminusP_kk %*% V0_kk %*% Matrix::t(invIminusP_kk)
prop_tj[,which_pred] = Matrix::diag(Sigma1_kk)[match_vals] / Matrix::diag(Sigma0_kk)[match_vals]
}
#
var_tj = prop_tj
var_tj[] = Matrix::diag( Sigma0_kk )
out = list( "total_variance" = var_tj,
"proportion_variance_explained" = prop_tj )
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.