# 1. f <- compare weighted avg of growth rates to aggregate. sd of difference? like SSR.
# 2. loop through 1 to K or whatever.
# 3. graph
library(ggplot2)
# get aggregate growth rates
# start with a73 only.
a73 <- asm73
a00 <- asm00 #%>% filter(!(year %in% c(2004,2012)))
t2 <- t2x # a73g <- a73 %>% group_by(year) %>% summarize(gt = mean(g_t, na.rm = TRUE))
# lms <- . %>% lm(g_t ~ git, data = .) %>% summary() %>% .$r.squared
#
# lms_lag <- . %>% arrange(year) %>% lm(g_t ~ git + lag(git), data = .) %>% summary() %>% .$r.squared
by_year <- function(k, df, weight = TRUE) {
library(dplyr)
lms <- . %>% lm(g_t ~ git, data = .) %>% summary() %>% .$r.squared
lms_lag <- . %>% arrange(year) %>% lm(g_t ~ git + lag(git), data = .) %>% summary() %>% .$r.squared
df %>% group_by(year) %>%
arrange(year, -l.wit) %>%
slice(1:k) %>%
select(id, year, l.wit, git, g_t) %>%
mutate(l.wit = l.wit / sum(l.wit, na.rm = TRUE)) %>%
summarize(git = ifelse(weight == TRUE, sum(l.wit * git, na.rm = TRUE)),
mean(git, na.rm = TRUE),
g_t = mean(g_t, na.rm = TRUE)) %>%
lms()
}
by_year_industry <- function(k, df, weight = TRUE) {
df %>%
mutate(industry = str_sub(as.character(industry), 1, 2)) %>%
group_by(year, industry) %>%
arrange(year, industry, -l.wit) %>%
slice(1:k) %>%
select(id, year, industry, l.wit, git, gt) %>%
ungroup() %>%
group_by(year) %>%
mutate(l.wit = l.wit / sum(l.wit, na.rm = TRUE)) %>%
summarize(git = ifelse(weight == TRUE, sum(l.wit * git, na.rm = TRUE)),
mean(git, na.rm = TRUE),
g_t = mean(g_t, na.rm = TRUE)) %>%
lms()
}
# now want to save the ks, and the resulting R^2, and graph them somehow.
as.df <- function(k, FUN, df, weight = TRUE, name) {
df_ret <- tibble::tibble(k = c(1:k), type = name)
cl <- makeCluster(8)
df_ret$r2 <- parLapply(cl, 1:k, FUN, df = df, weight = weight) %>% unlist()
stopCluster(cl)
return(df_ret)
}
library(parallel)
N <- 100
# cl <- makeCluster(2)
# w <- parLapply(cl, z, g)
# clusterMap(cl, as.df, df = list(a73, a00, t2), name = list("w a7", "w a0", "w t2"), MoreArgs = list(k = N, FUN = by_year), SIMPLIFY = FALSE)
# stopCluster(cl)
# xxx
system.time(x <- mapply(as.df, df = list(a73, a00, t2), name = list("w a7", "w a0", "w t2"), MoreArgs = list(k = N, FUN = by_year), SIMPLIFY = FALSE))
# System.time(x <- mapply(as.df, df = list(a73, a00, t2), name = list("w a7", "w a0", "w t2"), MoreArgs = list(k = N, FUN = by_year), SIMPLIFY = FALSE))
# bind_rows(x)
Reduce(rbind, x) %>% ggplot(aes(x = k, y = r2, colour = type)) + geom_line() + geom_point() + ylim(0, 1)
t2 %>%
group_by(year) %>%
arrange(year, -l.wit) %>%
slice(1:100) %>%
select(id, year, l.wit, git, g_t) %>%
mutate(l.wit = l.wit / sum(l.wit, na.rm = TRUE)) %>%
summarize(git = ifelse(TRUE == TRUE, sum(l.wit * git, na.rm = TRUE)),
mean(git, na.rm = TRUE),
g_t = mean(g_t, na.rm = TRUE)) %>%
arrange(year) %>%
lm(g_t ~ git + lag(git), data = .) %>%
summary()
# # Ni <- floor(N / 22)
# # dfi <- tibble(k = c(1:Ni)*22, type = "ind weighted")
# # dfi$r2 <- lapply(1:Ni, by_year_industry, df = a73, weight = TRUE) %>% unlist()
# ok, what next. try to use that info to predict? or use on bigger sample etc?
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.