library(tidyverse)
library(idiosyncratics)
set.seed(403)
N <- 25
T <- 5
# it has a growth rate, but the real data won't.
asm <- fakedata::fake_panel(N = N, T = T, I = 2, missing = 0.01)
a <- asm %>% rename(industry = I, g_i = g)
# it has a growth rate, but the real data won't.
a <- a %>% add_weights(x_var = sales) %>% lag_panel(x_var = w, l_var = w)
a <- a %>% add_growth_rates(x_var = sales, drop = TRUE)
a <- a %>% add_quantiles(g_var = 'g_i') %>% drop_outliers(g_var = g_i, method = 'winsorize')
g <- a %>% group_by(year) %>% summarize(g = sum(w * g, na.rm = TRUE)) %>% tibble::rownames_to_column(var = 'index')
a <- a %>% balance_panel(id, year)
# need balance
tbl <- a
# that's to drop the 0 column; might need to deal with that in a different manner, pre-this
# need to convert years indices to years?
X <- get_matrix(a, id, year, g_i)
W <- get_matrix(a, id, year, w)
# pass in X, a
# wrapper to calculate icpx_pca? icpx_pca
# map, keep names
# bind_rows(
# xz <- purrr::map(icp$lhatr, predict_using_factors, X = X, W = W, pca = icp$pca) #, .id = "type" )
xz <- icpx_pca(X, W)
# ok, call things, put it all together, with type and year and everything else I need.
xy <- list('sparse' = sparse_pca(X, W))
# components <- bind_rows(xz, xy, .id = 'type') %>%
# left_join(g %>% select(index, year), by = c('year' = 'index')) %>%
# select(-year) %>% rename(year = year.y) %>%
# mutate(type = ifelse(type == 'sparse', "Sparse", type))
components <- bind_rows(xz, xy, .id = 'type') %>%
left_join(g, by = c('year' = 'index')) %>%
select(-year) %>%
rename(year = year.y, e = value) %>%
mutate(type = ifelse(type == 'sparse', "Sparse", type))
tidy_components <- bind_rows(components %>% select(-g),
g %>% mutate(index = 'Aggregate') %>% rename(e = g, type = index))
tidy_components
# %>%
# select(year = year.y, type, value)
# # and do a correct legend + scale
ggplot(tidy_components,
aes(x = year, y = e, colour = type)) +
geom_line() +
labs(x = "Year", y = "Growth") +
scale_colour_discrete(name="No. fctr")
# ,
# breaks=c("e", "g", "m"),
# labels=c("Idiosyncratic", "Aggregate", "Common"))
# maybe this., list-col strat.
# bind_rows(enframe(xz), enframe(xy))
lms <- . %>% lm(g ~ e, data = .) %>% summary() %>% .$r.squared
dlm <- function(tbl) {
cov <- tbl %>% select(one_of(c('e', 'm', 'g'))) %>% var()
(cov / cov[dim(cov)[1], dim(cov)[2]]) %>% diag() %>% sqrt()
}
# hmm.
tab <- left_join(split(components, components$type) %>% map_dbl(lms) %>% round(2) %>% enframe(),
split(components, components$type) %>% map(dlm) %>% map_dbl("e") %>% round(2) %>% enframe(), by = 'name')
names(tab) <- c('No. of factors', 'e_t, R^2', 'e_t, DLM')
tab %>% knitr::kable()
tab <- cbind(components %>% lms() %>% t(), components %>% dlm() %>% t()) %>% tbl_df()
names(tab) <- c("R^2", "e_t", "m_t", "g_t")
kable(tab %>% round(2), caption = 'Contribution to aggregate growth')
# do separate vignettes for PCA and the regular things?
# also do that for all the "datasets"
# ok, then what. get the other stuff, the printing things.
xxx
# fakedata::fake_panel(N = N, T = T, I = 2)
# imap!
set.seed(403)
z <- fakedata::fake_panel(N = 50, T = 50, I = 2, missing = 0.1)
z %>% rename(industry = I, g_i = g) %>% add_weights() %>% View()
# ugh, missing stuff. can't just drop things, can't just calculate weights,
# need to be sequential.
l <- 10
tbl <- tibble(N = floor(seq(50, 2500, length.out = l)), #seq(10, 50, 10),
T = rep_len(50, l))
tbl
a <- bind_cols(tbl, tbl %>% { map2(.$N, .$T, .f = fakedata::fake_panel, I = 2, missing = 0.1)} %>% enframe()) %>% select(-name) %>% rename(data = value)
b <- a %>%
mutate(data = data %>% map(rename, industry = I, g_i = g)) %>%
mutate(data = map(data, add_weights)) %>%
mutate(data = map(data, add_quantiles, g_var = 'g_i')) %>%
mutate(data = map(data, drop_outliers, g_var = 'g_i', method = 'drop')) %>%
mutate(data = map(data, demean, g_var = 'g_i'))
c <- b %>%
mutate(comps = map(data, aggregate_components, g_var = 'g_i', w_var = 'lag_w'))
c
# ok, now do lm summary stuff I guess.
# then write the pca function.
# asm <- asm %>% rename(industry = I, g_i = g)
# asm <- asm %>%
# add_weights() %>% # add lagged weights to data
# add_quantiles(g_var = 'g_i') %>% # add quantiles to data
# drop_outliers(g_var = 'g_i', method = 'drop') # drop outliers based on quantiles
# then what.
# asm <- asm %>% demean(g_var = 'g_i')
# components <- asm %>% aggregate_components(g_var = 'g_i', w_var = 'lag_w')
# what does the table look like. just need results from each thing.
lms <- . %>% lm(g2 ~ e, data = .) %>% summary() %>% .$r.squared
dlm <- function(df) {
cov <- df %>% select(one_of(c('e', 'm', 'g2'))) %>% var()
(cov / cov[dim(cov)[1], dim(cov)[2]]) %>% diag() %>% sqrt()
}
c %>% mutate(r2 = map_dbl(comps, lms),
dlm = map(comps, dlm)) %>% .$dlm
c[[1,"comps"]]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.