Nothing
generate_params_vector <- function(value, timestamps_n, regressors_n,
lin_related_regressors_n) {
alpha <- value
phi_0 <- value
err_var <- value
dep_vars <- rep(value, timestamps_n)
beta <- rep(value, lin_related_regressors_n)
phi_1 <- rep(value, lin_related_regressors_n)
phis_n <- regressors_n*(timestamps_n - 1)
phis <- rep(value, phis_n)
psis_n <- regressors_n*timestamps_n*(timestamps_n - 1)/2
psis <- rep(value, psis_n)
matrix(c(alpha, phi_0, err_var, dep_vars, phi_1, beta, phis, psis))
}
SEM_params_to_list <- function(params, periods_n, tot_regressors_n,
lin_related_regressors_n) {
phis_n <- tot_regressors_n*(periods_n - 1)
psis_n <- tot_regressors_n*periods_n*(periods_n - 1)/2
alpha <- params[1]
phi_0 <- params[2]
err_var <- params[3]
dep_vars <- params[4:(4 + periods_n - 1)]
betas_first_ind <- 4 + periods_n
if (tot_regressors_n == 0) {
beta <- c()
phi_1 <- c()
phis <- c()
psis <- c()
} else {
if (lin_related_regressors_n == 0) {
beta <- c()
phi_1 <- c()
} else {
beta <-
params[betas_first_ind:(betas_first_ind + lin_related_regressors_n - 1)]
phi_1_first_ind <- betas_first_ind + lin_related_regressors_n
phi_1 <-
params[phi_1_first_ind:(phi_1_first_ind + lin_related_regressors_n - 1)]
}
phis <-
params[(4 + 2*lin_related_regressors_n + periods_n):(3 + 2*lin_related_regressors_n + periods_n + phis_n)]
psis <-
params[(4 + 2*lin_related_regressors_n + periods_n + phis_n):(3 + 2*lin_related_regressors_n + periods_n + phis_n + psis_n)]
}
list(alpha = alpha, phi_0 = phi_0, err_var = err_var, dep_vars = dep_vars,
beta = beta, phi_1 = phi_1, phis = phis, psis = psis)
}
#' List of matrices for SEM model
#'
#' @param df Dataframe with data for the likelihood computations.
#' @param timestamp_col Column which determines time stamps. For now only
#' natural numbers can be used.
#' @param entity_col Column which determines entities (e.g. countries, people)
#' @param dep_var_col Column with dependent variable
#' @param lin_related_regressors Vector of strings of column names. Which subset of regressors is in non trivial
#' linear relation with the dependent variable (\code{dep_var_col}). In other
#' words regressors with non-zero \code{beta} parameters.
#' @param which_matrices character vector with names of matrices which should be
#' computed. Possible matrices are
#' \code{"Y1"}, \code{"Y2"}, \code{"Z"}, \code{"cur_Y2"}, \code{"cur_Z"},
#' \code{"res_maker_matrix"}. Default is
#' \code{c("Y1", "Y2", "Z", "cur_Y2","cur_Z", "res_maker_matrix")} in which case
#' all possible matrices are generated
#' @importFrom dplyr select all_of
#' @importFrom magrittr "%>%"
#' @return
#' Named list with matrices as its elements
#' @export
#'
#' @examples
#' matrices_from_df(economic_growth, year, country, gdp, c("pop", "sed"),
#' c("Y1", "Y2"))
matrices_from_df <- function(df, timestamp_col, entity_col, dep_var_col,
lin_related_regressors = NULL,
which_matrices = c("Y1", "Y2", "Z", "cur_Y2",
"cur_Z", "res_maker_matrix")) {
Y1 <- if ("Y1" %in% which_matrices) {
df %>% SEM_dep_var_matrix(
timestamp_col = {{ timestamp_col }}, entity_col = {{ entity_col }},
dep_var_col = {{ dep_var_col }})
} else NULL
Y2 <- if ("Y2" %in% which_matrices) {
df %>% SEM_regressors_matrix(
timestamp_col = {{ timestamp_col }}, entity_col = {{ entity_col }},
dep_var_col = {{ dep_var_col }})
} else NULL
cur_Y2 <- if ("cur_Y2" %in% which_matrices) {
df %>%
dplyr::select({{ timestamp_col }}, {{ entity_col }}, {{ dep_var_col }},
all_of(lin_related_regressors)) %>%
SEM_regressors_matrix(timestamp_col = {{ timestamp_col }},
entity_col = {{ entity_col }},
dep_var_col = {{ dep_var_col }})
} else NULL
cur_Z <- if ("cur_Z" %in% which_matrices) {
df %>%
dplyr::select({{ timestamp_col }}, {{ entity_col }}, {{ dep_var_col }},
all_of(lin_related_regressors)) %>%
exogenous_matrix(timestamp_col = {{ timestamp_col }},
entity_col = {{ entity_col }},
dep_var_col = {{ dep_var_col }})
} else NULL
Z <- if ("Z" %in% which_matrices) {
df %>%
exogenous_matrix(timestamp_col = {{ timestamp_col }},
entity_col = {{ entity_col }},
dep_var_col = {{ dep_var_col }})
} else NULL
res_maker_matrix <- if ("res_maker_matrix" %in% which_matrices) {
if (!"Z" %in% which_matrices) {
warning('For res_maker_matrix the Z matrix has to be computed anyway')
Z <- df %>%
exogenous_matrix(timestamp_col = {{ timestamp_col }},
entity_col = {{ entity_col }},
dep_var_col = {{ dep_var_col }})
}
residual_maker_matrix(Z)
} else NULL
list(Y1 = Y1, Y2 = Y2, cur_Y2 = cur_Y2, Z = Z, cur_Z = cur_Z,
res_maker_matrix = res_maker_matrix)
}
#' Likelihood for the SEM model
#'
#' @param params Parameters describing the model. Can be either a vector or a
#' list with named parameters. See 'Details'
#' @param data Data for the likelihood computations. Can be either a list of
#' matrices or a dataframe. If the dataframe, additional parameters are
#' required to build the matrices within the function.
#' @param timestamp_col Column which determines time stamps. For now only
#' natural numbers can be used.
#' @param entity_col Column which determines entities (e.g. countries, people)
#' @param dep_var_col Column with dependent variable
#' @param lin_related_regressors Which subset of columns should be used as
#' regressors for the current model. In other words \code{regressors} are the
#' total set of regressors and \code{lin_related_regressors} are the ones for
#' which linear relation is not set to zero for a given model.
#' @param per_entity Whether to compute overall likelihood or a vector of
#' likelihoods with per entity value
#' @param exact_value Whether the exact value of the likelihood should be
#' computed (\code{TRUE}) or just the proportional part (\code{FALSE}).
#' Currently \code{TRUE} adds: 1. a normalization constant coming from Gaussian
#' distribution, 2. a term disappearing during likelihood simplification in
#' Likelihood-based Estimation of Dynamic Panels with Predetermined Regressors
#' by Moral-Benito (see Appendix A.1). The latter happens when transitioning
#' from equation (47) to equation (48), in step 2: the term \code{trace(HG_22)} is
#' dropped, because it can be assumed to be constant from Moral-Benito
#' perspective. To get the exact value of the likelihood we have to take this
#' term into account.
#'
#' @details
#' The \code{params} argument is a list that should contain the following
#' components:
#'
#' \code{alpha} scalar value which determines linear dependence on lagged
#' dependent variable
#'
#' \code{phi_0} scalar value which determines linear dependence on the value
#' of dependent variable at the lowest time stamp
#'
#' \code{err_var} scalar value which determines classical error component
#' (Sigma11 matrix, sigma_epsilon^2)
#'
#' \code{dep_vars} double vector of length equal to the number of time stamps
#' (i.e. time stamps greater than or equal to the second lowest time stamp)
#'
#' \code{beta} double vector which determines the linear dependence on
#' regressors different than the lagged dependent variable; The vector should
#' have length equal to the number of regressors.
#'
#' \code{phi_1} double vector which determines the linear dependence on
#' initial values of regressors different than the lagged dependent variable;
#' The vector should have length equal to the number of regressors.
#'
#' \code{phis} double vector which together with \code{psis} determines upper
#' right and bottom left part of the covariance matrix; The vector should have
#' length equal to the number of regressors times number of time stamps minus 1,
#' i.e. \code{regressors_n * (periods_n - 1)}
#'
#' \code{psis} double vector which together with \code{psis} determines upper
#' right and bottom left part of the covariance matrix; The vector should have
#' length equal to the number of regressors times number of time stamps minus 1
#' times number of time stamps divided by 2, i.e.
#' \code{regressors_n * (periods_n - 1) * periods_n / 2}
#'
#'
#' @return
#' The value of the likelihood for SEM model (or a part of interest of the
#' likelihood)
#'
#' @export
#'
#' @examples
# TODO: sometimes generates NaN and positive values - why?
#' set.seed(1)
#' df <- data.frame(
#' entities = rep(1:4, 5),
#' times = rep(seq(1960, 2000, 10), each = 4),
#' dep_var = stats::rnorm(20), a = stats::rnorm(20), b = stats::rnorm(20)
#' )
#' df <-
#' feature_standardization(df, timestamp_col = times, entity_col = entities)
#' SEM_likelihood(0.5, df, times, entities, dep_var)
SEM_likelihood <- function(params, data, timestamp_col, entity_col, dep_var_col,
lin_related_regressors = NULL,
per_entity = FALSE,
exact_value = TRUE) {
if (is.list(params) && !is.data.frame(data)) {
alpha <- params$alpha
phi_0 <- params$phi_0
err_var <- params$err_var
dep_vars <- params$dep_vars
beta <- if(is.null(params$beta)) c() else params$beta
phi_1 <- if(is.null(params$phi_1)) c() else params$phi_1
phis <- if(is.null(params$phis)) c() else params$phis
psis <- if(is.null(params$psis)) c() else params$psis
Y1 <- data$Y1
Y2 <- data$Y2
cur_Y2 <- data$cur_Y2
cur_Z <- data$cur_Z
res_maker_matrix <- residual_maker_matrix(cur_Z)
n_entities <- nrow(Y1)
periods_n <- length(dep_vars)
tot_regressors_n <- ncol(data$Y2) / (periods_n - 1)
lin_related_regressors_n <- length(beta)
B <- SEM_B_matrix(alpha, periods_n, beta)
C <- SEM_C_matrix(alpha, phi_0, periods_n, beta, phi_1)
S <- SEM_sigma_matrix(err_var, dep_vars, phis, psis)
U1 <- if (lin_related_regressors_n == 0) {
t(tcrossprod(B[[1]], Y1) - tcrossprod(C, cur_Z))
} else {
t(tcrossprod(B[[1]], Y1) + tcrossprod(B[[2]], cur_Y2) -
tcrossprod(C, cur_Z))
}
S11_inverse <- solve(S[[1]])
M <- Y2 - U1 %*% S11_inverse %*% S[[2]]
H <- crossprod(M, res_maker_matrix) %*% M
gaussian_normalization_const <- log(2 * pi) *
n_entities * (periods_n + (periods_n-1) * tot_regressors_n) / 2
trace_simplification_term <-
1/2 * n_entities * (periods_n - 1) * tot_regressors_n
likelihood <- -n_entities/2 * log(det(S[[1]]) * det(H/n_entities))
if(exact_value) {
likelihood <- likelihood -
gaussian_normalization_const - trace_simplification_term
}
likelihood <- if(!per_entity) {
likelihood - 1/2 * sum(diag(S11_inverse %*% crossprod(U1)))
} else {
likelihood / n_entities - 1/2 * diag(U1 %*% S11_inverse %*% t(U1))
}
} else {
if (is.data.frame(data)) {
data <- data %>%
matrices_from_df(timestamp_col = {{ timestamp_col }},
entity_col = {{ entity_col }},
dep_var_col = {{ dep_var_col }},
lin_related_regressors = lin_related_regressors)
}
if (!is.list(params)) {
periods_n <- ncol(data$Y1)
tot_regressors_n <- ncol(data$Y2) / (periods_n - 1)
lin_related_regressors_n <- if (is.null(data$cur_Y2)) {
0
} else {
ncol(data$cur_Y2) / (periods_n - 1)
}
if (is.double(params)) {
params <-
generate_params_vector(
value = params, timestamps_n = periods_n,
regressors_n = tot_regressors_n,
lin_related_regressors_n = lin_related_regressors_n
)
}
params <-
SEM_params_to_list(params, periods_n = periods_n,
tot_regressors_n = tot_regressors_n,
lin_related_regressors_n = lin_related_regressors_n)
}
likelihood <-
SEM_likelihood(params = params, data = data, per_entity = per_entity,
exact_value = exact_value)
}
likelihood
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.