#' Get a matrix version of a balanced panel
#'
#' Convert a balanced panel tbl into a matrix,
#' id_var as rows, t_var as columns
#'
#' @param tbl A tbl of panel data
#'
#' @param id_var The (quoted or unquoted) name of the id variable
#'
#' @param t_var The (quoted or unquoted) name of the time variable
#'
#' @param x_var The (quoted or unquoted) name of the variable to put in the matrix
#'
#' @return A matrix
#' @keywords panel matrix
#'
#'
#' @examples
#'
#' asm <- fakedata::fake_panel(N = 50, T = 10, missing = 0.1)
#' get_matrix(asm, id, year, g)
#'
#' @export
get_matrix <- function(tbl, id_var = id, t_var = year, x_var = g_i) {
# process variable names to handle quoted or unquoted inputs
id_var <- dplyr::quo_name(dplyr::enquo(id_var))
t_var <- dplyr::quo_name(dplyr::enquo(t_var))
x_var <- dplyr::quo_name(dplyr::enquo(x_var))
# make sure the tbl is ordered
tbl <- tbl %>% arrange(!!rlang::sym(t_var), !!rlang::sym(id_var))
# convert it to matrix, row-wise
matrix(tbl[[x_var]],
nrow = length(unique(tbl[[t_var]])),
ncol = length(unique(tbl[[id_var]])),
byrow = TRUE)
}
#' Calculate ICP rules for common factors
#'
#' Given a matrix, calculate ICP rules for
#' the number of common factors
#'
#' @param X A matrix
#'
#' @return A list of factor numbers calculated using
#' icp rules.
#'
#' @keywords ICP, factor analysis, PCA
#'
#'
#' @examples
#'
#' asm <- fakedata::fake_panel(N = 50, T = 10, missing = 0.1)
#' asm <- asm %>% rename(industry = I, g_i = g) %>% add_weights() %>% balance_panel(id, year)
#' X <- get_matrix(asm)
#' icpx(X)
#'
#' @export
icpx <- function(X) {
N <- dim(X)[2] # number of individuals...wait?
T <- dim(X)[1] # number of years
k <- min(N, T) # maximum number of factors.
# Perform PCA
pca <- prcomp(X) #, scale. = F, center = F)
# residuals
res <- (X - pca$x[,1:k] %*% t(pca$rotation[,1:k]))
c2nt <- min(N,T) # = k, but renamed for the purposes of the formula
icp1 <- (N * T)^(-1) * sum(res^2) + k * (N + T) * log(N * T) / ((N * T) * log(N + T))
icp2 <- (N * T)^(-1) * sum(res^2) + k * (N + T) * log(c2nt) / (N * T)
icp3 <- (N * T)^(-1) * sum(res^2) + k * log(c2nt) / c2nt
# lhatr: list of \hat{r}, estimated number of common factors
lhatr <- purrr::map(list(icp1, icp2, icp3, k), function(x) max(1, floor(x))) %>% unique()
names(lhatr) <- lhatr
list(lhatr = lhatr, pca = pca) # need to pass back `pca` too?
}
#' Predict aggregate growth based on PCA decomposition
#'
#' A function that predicts an annual aggregate growth rate
#' by using a PCA decomposition of individual growth rates,
#' using a given number of common factors (estimated elsewhere).
#'
#' @param hatr The number of common factors (estimated by some other means)
#'
#' @param X A matrix of data
#'
#' @param W A matrix of weights
#'
#' @param pca The PCA decomposition of X
#'
#' @return A tbl of predictions of aggregate growth by year,
#' using the number of common factors given by hatr
#'
#' @keywords ICP, factor analysis, PCA, predict
#'
#'
#' @export
predict_using_factors <- function(hatr, X, W, pca = prcomp(X)) { # not sure if I have to pass pca or not.
# hatr = the (estimated) number of factors.
# X is the growth matrix
# W is the weight matrix
# pca may or may not be the same as before; may or may not need to pass it in, or re-calculate it.
res_icp <- (X - pca$x[,1:hatr] %*% t(pca$rotation[, 1:hatr]))
# element-wise multiply the residuals by the weights, then aggregate back up.
# residuals (e.g., errors) are common across individuals, hmm.
rowSums(res_icp * W) %>%
dplyr::tbl_df() %>%
tibble::rownames_to_column(var = "year")
}
#' Predict aggregate growth based on sparse PCA decomposition
#'
#' A function that predicts an annual aggregate growth rate
#' by using a PCA decomposition of individual growth rates,
#' using a sparse PCA based on glmnet.
#'
#' @param X A matrix of data
#'
#' @param W A matrix of weights
#'
#' @param pca The PCA decomposition of X
#'
#' @return A tbl of predictions of aggregate growth by year,
#' using the number of common factors given by hatr
#'
#' @keywords lasso, factor analysis, sparse PCA, predict
#'
#'
#' @export
sparse_pca <- function(X, W, pca = prcomp(X)) {
N <- dim(X)[2] # number of firms/plants
T <- dim(X)[1] # number of years
pca <- prcomp(X) #, scale. = F, center = F)
hatpci <- function(pci) {
cvc <- glmnet::glmnet(x = X, y = pci, alpha = 0.1, intercept = FALSE)
hatcoefi <- coef(cvc, s=cvc$lambda[length(cvc$lambda)]) # ok cool. now normalize.
hatcoefi <- hatcoefi / ifelse(sum(hatcoefi) == 0, 1, hatcoefi^2 %>% sum() %>% sqrt())
hatpci <- X %*% hatcoefi[-1, ] # minus the intercept, always 0.
return(hatpci)
}
lx <- map(as.list(as.data.frame(pca$x)), hatpci)
hatpc <- Reduce(cbind, lx) # if you use bind_cols, you get a df, we need a matrix.
colnames(hatpc) = c(1:min(T, N))
# residuals
res_sp <- (X - hatpc %*% t(pca$rotation))
rowSums(res_sp * W) %>%
tbl_df() %>%
tibble::rownames_to_column(var = "year")
}
#' A function to return PCA predictions based on ICP
#' estimated common factors
#'
#' A wrapper function that calls icpx() to estimate the number
#' of common factors, and then calls predict_using_factors()
#' to return the aggregate predictions.
#'
#' @param X A matrix of data
#'
#' @param W A matrix of weights
#'
#' @return A tbl of predictions of aggregate growth by year,
#' using the number of common factors given by hatr
#'
#' @keywords ICP, factor analysis, PCA, predict
#'
#'
#' @export
icpx_pca <- function(X, W) {
icp <- icpx(X)
purrr::map(icp$lhatr, predict_using_factors, X = X, W = W, pca = icp$pca)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.