# --- SETTING COUNTRY OPTIONS ----
#' get valid suffix set for a given country, perhaps with additional specifications
#' @export
Do.Country.Suffix <- function(country){
# defaults
suffix.set <- list()
suffix.set$AKM <- c("_connected","_leaveout")
suffix.set$CRE <- c("_connected_clusters10","_all_clusters10","_leaveout_clusters10")
suffix.set$TraceHO <- c("_connected","_leaveout")
suffix.set$TraceHE <- c("_leaveout")
suffix.set$interacted <- NA # c("_connected_clusters10")
# US
if(substr(country,1,3)=='US-'){
if(country %in% c('US-early','US-late')){
suffix.set$AKM <- c("_connected","_leaveout")
suffix.set$CRE <- c("_connected_clusters10","_all_clusters10",paste0("_connected_clusters",2:5*10))
suffix.set$CRE <- c("_connected_clusters10","_leaveout_clusters10")
suffix.set$TraceHO <- NA
suffix.set$TraceHE <- NA
suffix.set$interacted <- NA #c("_connected_clusters10")
}
if(country %in% c('US-early-noendpoints','US-late-noendpoints')){
suffix.set$AKM <- c("_connected")
suffix.set$CRE <- c("_connected_clusters10")
suffix.set$TraceHO <- NA
suffix.set$TraceHE <- NA
suffix.set$interacted <- NA
}
if(grepl("minearn",country) | grepl("minfirmsize",country)){
suffix.set$AKM <- c("_connected")
suffix.set$CRE <- c("_connected_clusters10")
suffix.set$TraceHO <- c("_connected")
suffix.set$TraceHE <- NA
suffix.set$interacted <- NA
}
# for states, use defaults
}
# AT, IT, IT_Kline
if ((country=='AT') | (country=='IT')){
# use defaults
}
# Sweden
if((country=='SW')){
# use defaults
}
# testing code on Brad's laptop
if(country=='Testing-Brad'){
# use defaults
}
return(suffix.set)
}
#' get valid directory paths for a given country, perhaps with additional specifications
#' @export
Do.Country.Paths <- function(country){
# defaults
paths <- list()
# US--------------------------------------------------
if (substr(country,1,3)=='US-'){
paths$raw <- "~/manageCDW/raw_data/"
if(country %in% c('US-early','US-late','US-early-noendpoints','US-late-noendpoints')){ # these are the only available US national samples
# packages: due to the massive size of the US data, additional packages are required to handle it.
library(akm) # this package provides a memory-efficient implementation of the AKM twoway-FE estimator
library(paneltools) # this package includes a memory-efficient way to residualize the outcome variable on covariates
library(parallel) # this package allows us to run certain steps in parallel
dir.create(file.path("~/bhlmms/national"), showWarnings = FALSE)
paths$data <- sprintf("~/bhlmms/national/%s/jdata_sdata/",country)
paths$res <- sprintf("~/bhlmms/national/%s/results/",country)
paths$final <- "~/bhlmms/national/final-output/"
paths$attrition <- sprintf("~/bhlmms/national/%s/attrition/",country)
dir.create(file.path(sprintf("~/bhlmms/national/%s",country)), showWarnings = FALSE)
} else { # if not using a US national sample, it must be using a US state-specific subsample.
# note: no additional packages are needed since the state-specific samples are small.
dir.create(file.path("~/bhlmms/states"), showWarnings = FALSE)
paths$data <- sprintf("~/bhlmms/states/%s/jdata_sdata/",country)
paths$res <- sprintf("~/bhlmms/states/%s/results/",country)
paths$final <- "~/bhlmms/states/final-output/"
paths$attrition <- sprintf("~/bhlmms/states/%s/results/",country)
dir.create(file.path(sprintf("~/bhlmms/states/%s",country)), showWarnings = FALSE)
}
}
# AT, IT-------------------------------------------------------
if (grepl('AT', country) | grepl('IT', country)){
proj_path<-"~/smfe/"
if (grepl('IT', country)) {
paths$raw<-paste0(proj_path, "/Data_IT/1985_2001_veneto.dta")
if (grepl('ITKline', country)) {
paths$raw<-paste0(proj_path, "/Data_IT/All_Veneto_1999and2001_T_equal_2.dta")
}
}
if (grepl('AT', country)) {
paths$raw<-paste0(proj_path, "/Data_AT/2000_2017_austria.dta")
}
paths$data = paste0(proj_path,'/smfe-res-',country, '/')
paths$res = paths$data
paths$final = paths$data
paths$attrition=paths$data
}
# SW------------------------------------------------------------------------
if (substr(country,1,2) == "SW") {
paths$data <- sprintf("../smfe-res-%s/",country)
paths$res <- sprintf("../smfe-res-%s/",country)
paths$final <- sprintf("../smfe-res-%s/",country)
paths$attrition <- "../tmp/"
}
# Sweden for attrition
if (country=='SW-attrition'){
paths$data <- "../tmp2/data/"
paths$res <- "../tmp2/tmp/"
paths$final <- "../tmp2/final/"
paths$attrition <- "../tmp2/attrition/"
}
# NO------------------------------------------------------------------------
if (country=='NO6thresh'){
dir.create(file.path("~/labor/NO6thresh/"), showWarnings = FALSE)
paths$data <- "~/labor/NO6thresh/temp/"
paths$res <- "~/labor/NO6thresh/estimates/"
paths$final <- "~/labor/NO6thresh/results/"
paths$attrition <- "~/labor/NO6thresh/attrition/"
}
if (country=='NO3thresh'){
dir.create(file.path("~/labor/NO3thresh/"), showWarnings = FALSE)
paths$data <- "~/labor/NO3thresh/temp/"
paths$res <- "~/labor/NO3thresh/estimates/"
paths$final <- "~/labor/NO3thresh/results/"
paths$attrition <- "~/labor/NO3thresh/attrition/"
}
if (country=='NO6thresh-hourlywage'){
dir.create(file.path("~/labor/NO6thresh-hourlywage/"), showWarnings = FALSE)
paths$data <- "~/labor/NO6thresh_hourlywage/temp/"
paths$res <- "~/labor/NO6thresh_hourlywage/estimates/"
paths$final <- "~/labor/NO6thresh_hourlywage/results/"
paths$attrition <- "~/labor/NO6thresh_hourlywage/attrition/"
}
if (country=='NO3thresh-hourlywage'){
dir.create(file.path("~/labor/NO3thresh_hourlywage/"), showWarnings = FALSE)
paths$data <- "~/labor/NO3thresh_hourlywage/temp/"
paths$res <- "~/labor/NO3thresh_hourlywage/estimates/"
paths$final <- "~/labor/NO3thresh_hourlywage/results/"
paths$attrition <- "~/labor/NO3thresh_hourlywage/attrition/"
}
if (country=='NO6thresh-annualearnings'){
dir.create(file.path("~/labor/NO6thresh-annualearnings/"), showWarnings = FALSE)
paths$data <- "~/labor/NO6thresh_annualearnings/temp/"
paths$res <- "~/labor/NO6thresh_annualearnings/estimates/"
paths$final <- "~/labor/NO6thresh_annualearnings/results/"
paths$attrition <- "~/labor/NO6thresh_annualearnings/attrition/"
}
if (country=='NO3thresh-annualearnings'){
dir.create(file.path("~/labor/NO3thresh_annualearnings/"), showWarnings = FALSE)
paths$data <- "~/labor/NO3thresh_annualearnings/temp/"
paths$res <- "~/labor/NO3thresh_annualearnings/estimates/"
paths$final <- "~/labor/NO3thresh_annualearnings/results/"
paths$attrition <- "~/labor/NO3thresh_annualearnings/attrition/"
}
# Testing on Brad's local machine--------------------------------------------
if (country=='Testing-Brad'){
paths$data <- "~/Downloads/smfe-temp/jdata_sdata/"
paths$res <- "~/Downloads/smfe-temp/results/"
paths$final <- "~/Downloads/smfe-temp/final-output/"
paths$attrition <- "~/Downloads/smfe-temp/attrition/"
dir.create(file.path("~/Downloads/smfe-temp"), showWarnings = FALSE)
}
# make sure directories exist
dir.create(file.path(paths$data), showWarnings = FALSE)
dir.create(file.path(paths$res), showWarnings = FALSE)
dir.create(file.path(paths$final), showWarnings = FALSE)
dir.create(file.path(paths$attrition), showWarnings = FALSE)
return(paths)
}
#' set loading parameters by country
#' @export
Do.Country.Loading <- function(country){
# defaults
loading <- list(data_type = 'rds',separate = FALSE)
return(loading)
}
#' get attrition exercise options
#' @export
Do.Country.Attrition <- function(country){
# defaults
attrition.exercise=list()
attrition.exercise <- list(keep.shares=1:9/10,draws=1:10,moversperfirm=15,samefirms=FALSE)
attrition.exercise$methods=list()
attrition.exercise$methods$connected <- c('AKM','CRE','TraceHO')
attrition.exercise$methods$leaveout <- c('AKM','CRE','TraceHO','TraceHE')
# US
if(country=='US-early'|country=='US-late'){
#default
}
# AT, IT
if (country=='AT'| country=='IT' ){
#default
}
# Sweden
if (country=='SW-attrition'){
# default
}
# Testing
if(country=='Testing-Brad'){
attrition.exercise$keep.shares <- 3/4
attrition.exercise$draws <- 1:2
}
return(attrition.exercise)
}
#' get miscellaneous options
#' @export
Do.Country.Misc <- function(country){
# defaults
misc <- list(ncores=1, # number of cores to use in parallelization
nstart=100, # number of random starts when finding k-means clusters
preserve.estimates=TRUE, # do not overwrite existing estimates if TRUE
preserve.initdata=TRUE, # do not overwrite existing estimates if TRUE
preserve.rawdata=TRUE, # do not overwrite existing estimates if TRUE
cre.sub=FALSE, # use a random subsampling approach in CRE estimation if TRUE
posterior_var=TRUE, # estimate the posterior variance in CRE estimation if TRUE
mover_cluster_max_share=0.1,
data_only=FALSE, # do not perform any FE estimation if TRUE, simply save the intermediate data sets for later analyses
connectivity=FALSE,
cluster_bothyears=FALSE, # use both pre-move and post-move data when estimating clusters if TRUE
grouping_var='wages' # estimate the clusters based on this variable
)
# US
if (substr(country,1,3)=='US-'){
if(country %in% c('US-early','US-late','US-early-noendpoints','US-late-noendpoints')){
misc$ncores <- 4
misc$nstart <- 30
misc$cre.sub <- TRUE
misc$posterior_var <- FALSE
} else {
}
}
# Testing
if(country=='Testing-Brad'){
misc$ncores <- 4
misc$posterior_var <- TRUE
}
# If manually specifying the begin_year and end_year, that can be set here
if(grepl('select',country)){
year_set = sub(".*select_", "", country)
misc$begin_year = as.integer(substr(year_set,1,4))
misc$end_year = as.integer(substr(year_set,6,9))
}
return(misc)
}
#' initialize the options
#' @export
#' @param country Country
Do.Init.Opts <- function(country){
opts <- list()
opts$country <- country
opts$paths <- Do.Country.Paths(country)
opts$loading <- Do.Country.Loading(country)
opts$suffix.set <- Do.Country.Suffix(country)
opts$attrition.exercise <- Do.Country.Attrition(country)
opts$misc <- Do.Country.Misc(country)
return(opts)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.