data-raw/2_decompose_common_idiosyncratic.R

# Decompose data into common and idiosyncratic components
# 1. do one of (fixed effects, common factor, LASSO)
# 2. aggregate into common and idiosyncratic components
# 3. and that's it. do DLM/Gabaix next.

# fixed effects. lfe.
# lfe()
# or just `year`
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(xtable)
# library(purrr)
# library(lfe)
a73 <- asm73
a00 <- asm00 %>% filter(!(year %in% c(2004,2012)))
t2 <- t2x

# a73 <- read_csv('data/asm7399-clean.csv') #%>% mutate(industry = industry %>% as.factor(), year = year %>% as.factor())
# a00 <- read_csv('data/asm0012-clean.csv') #%>% mutate(industry = industry %>% as.factor(), year = year %>% as.factor())
# t2 <- read_csv('data/t2-clean.csv') #%>% mutate(industry = industry %>% as.factor(), year = year %>% as.factor())

#demean
demean <- . %>% group_by(year, industry) %>%
  mutate(mgindt = mean(git, na.rm = TRUE),
         eit = git - mgindt)

# aggregate
aggregate_components <- . %>% group_by(year) %>%
  summarize(mt = sum(l.wit * mgindt),
            et = sum(l.wit * eit),
            gt = mean(g_t, na.rm = TRUE)) %>%
  mutate(gt2 = mt + et)


decomp <- . %>% demean() %>% aggregate_components()

lms <- . %>% lm(gt ~ et, data = .) %>% summary() %>% .$r.squared
dlm <- function(df) {
  cov <- df %>% select(one_of(c('et', 'mt', 'gt'))) %>% var()
  (cov / cov[dim(cov)[1], dim(cov)[2]]) %>% diag() %>% sqrt()
}

# cor(a73 %>% decomp() %>% .$mt, a73 %>% decomp() %>% .$et)
# cor(a00 %>% decomp() %>% .$mt, a00 %>% decomp() %>% .$et)
# cor(t2 %>% decomp() %>% .$mt, t2 %>% decomp() %>% .$et)

# lapply(list(a73, a00, t2), decomp)
list(a73, a00, t2) %>% lapply(decomp) %>% lapply(lms) # get r-squareds. this is for later. now write one that does DLM too?

# graph things.
# all <- rbind(a73 %>% decomp() %>% mutate(type='a73'), a00 %>% decomp() %>% mutate(type='a00'), t2x %>% decomp() %>% mutate(type='t2'))
# ax <- all %>% gather(key = rate_type, value = rate, mt:gt)

# drop 2004 and 2012 in ASM.

# ax %>% filter(type != 't2') %>% ggplot(aes(x = year, y = rate, group = rate_type, colour = rate_type)) + geom_line() + geom_point()
# sample problem in 2004, yep.
# facet:
# ax %>% ggplot(aes(x = year, y = rate, group = rate_type, colour = rate_type)) +
#   geom_line() + geom_point() + facet_wrap(~ type, scales = 'free')


# to get results:

prep <- . %>% mutate(f = paste0(data, ':', type)) %>% split(.$f)
fix <- . %>% tbl_df() %>% t() %>% as.data.frame()
fix_fed <- . %>% fix() %>% tibble::rownames_to_column(var = 'data')
fix_pd <- . %>% fix() %>% tibble::rownames_to_column(var = 'data:type')
# tables:
to_table <- . %>% separate(`data:type`, into = c('data', 'icpx'), sep=':') %>% rename(et = V1) %>% spread(key = data, value = et) %>% xtable(align=c('lrrrr'), digits=3)


# fed = fixed effects data
# pd = PCA data
fed <- rbind(a73 %>% decomp %>% mutate(data='asm73_99'),
      a00 %>% decomp %>% mutate(data='asm00_12'),
      t2 %>% decomp %>% mutate(data='t2_00_12'))
inner_join(fed %>% split(.$data) %>% lapply(lms) %>% fix_fed() %>% rename(R2 = V1),
           fed %>% split(.$data) %>% lapply(dlm) %>% fix_fed() %>% select(-V3) %>% rename(et = V1, mt = V2), by=c('data')) %>%
  xtable(align=c('lrrrr'), digits=3)

# find this.
a7_pca <- decompose_pca(a73)
a0_pca <- decompose_pca(a00)
t2_pca <- decompose_pca(t2)

join_gt <- function(df, dfx) {
  left_join(df, dfx %>% decomp() %>% select(year, gt), by=c('year'))
}

# need to put all these lists and all these functions together.
pd <- rbind(a7_pca %>% rename(et=value) %>% join_gt(a73) %>% mutate(data='asm73_99'),
            a0_pca %>% rename(et=value) %>% join_gt(a00) %>% mutate(data='asm00_12'),
            t2_pca %>% rename(et=value) %>% join_gt(t2) %>% mutate(data='t2_00_12'))


pd %>% prep() %>% lapply(lms) %>% fix_pd() %>% to_table()
pd %>% prep() %>% lapply(dlm) %>% fix_pd() %>% select(-V2) %>% to_table()
tweed1e/idiosyncratics documentation built on May 29, 2019, 10:51 a.m.