data-raw/get_OECD.R

# Get OECD data

library(OECD)
library(dplyr)
library(tidyr)
library(forcats)
library(lubridate)
library(countrycode)
library(purrr)

devtools::load_all()

start_year <- 1995

# dataset_list <- get_datasets()





## Unit labour cost
# ULC_QUA is discontinued

# search_dataset("Unit", data = dataset_list)

# HUOM esim Japanille Jobs pidempi sarja

# Unit labour costs and labour productivity (employment based), Total economy
# https://stats.oecd.org/Index.aspx?DataSetCode=ULC_EEQ

# ulc_id <- "ULC_EEQ"

# ulc_str <- get_data_structure(ulc_id)

ulc_subjects <- c(
  NULC_APER = "ULCE",
  D1_EMP_APER = "LCEMP",
  GDP_EMP_PER = "GDPEMP"
)

ulc_filter <- pRoductivity::make_oecd_filter(
  list("",              # geo
       "Q",             #
       ulc_subjects,    # Meusure
       "",
       "IX",            # index
       "",
       "",
       "S",              # Seasonal adjust
       ""
       )
)

ulc_oecd_dat0 <-
  get_dataset("OECD.SDD.TPS,DSD_PDB@DF_PDB_ULC_Q,1.0",
              filter = ulc_filter)

ulc_oecd_dat <- ulc_oecd_dat0 %>%
  transmute(
    time = yq(TIME_PERIOD),
    geo = as_factor(countrycode(REF_AREA, "iso3c", "eurostat", nomatch = NULL)),
    na_item = fct_recode(MEASURE, !!!ulc_subjects),
    unit = as_factor("I15"),
    values = as.numeric(ObsValue))


usethis::use_data(ulc_oecd_dat, overwrite = TRUE)

# ulc_oecd_dat_old <- ulc_oecd_dat
# usethis::use_data(ulc_oecd_dat_old, overwrite = TRUE)

## National account

## Quarterly National Accounts

# Series are taken for two different tables. There is also one API dataset, but it get produces duplicated series.
# - Quarterly GDP and components - expenditure approach, national currency
# - Quarterly GDP and components - income approach

qna_subjects_1 <- c(
  B1GQ = "B1GQ", #"Gross domestic product",
  P6 = "P6", #"Exports of goods and services",
  P61 = "P61", #"Exports of goods",
  P62 = "P62", #"Export of services"
  B11 = "B11") #"External balance of goods and services"


qna_subjects_2 <- c(
  D1 = "D1" #"Compensation of employees",
  )

qna_measures <-
  c(
    CP_MNAC = "V",   # National currency, current prices, quarterly levels, seasonally adjusted
    CLV_MNAC = "L"  # National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted
  )


qna_geo <- countrycode(oecd_geos, "eurostat", "iso3c", nomatch = NULL)
gna_cur <- countrycode(oecd_geos, "eurostat", "iso4217c", nomatch = NULL)


# oecd_dat_Q_0 <- get_dataset(dataset = qna_id,
#                           filter = list(qna_geo, qna_subjects, qna_measures, "Q"))
qna_filter_1 <- pRoductivity::make_oecd_filter(
  list(
    "Q",             # Quaterly
    "Y",             # SA
    qna_geo,
    "",
    "",
    qna_subjects_1,
    "_Z",
    c("_Z", "_T"),
    "",
    "",
    qna_measures,
    "",
    ""
  )
)

qna_filter_2 <- pRoductivity::make_oecd_filter(
  list(
    "Q",             # Quaterly
    "Y",             # SA
    qna_geo,
    "",
    "",
    qna_subjects_2,
    "_Z",
    c("_Z", "_T"),
    "",
    "",
    qna_measures,
    "",
    ""
  )
)

# Quarterly GDP and components - expenditure approach, national currency
oecd_dat_Q_1_0 <- get_dataset("OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_NATIO_CURR,1.0",
                            filter = qna_filter_1)
# Quarterly GDP and components - income approach
oecd_dat_Q_2_0 <- get_dataset("OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_INCOME,1.0",
                              filter = qna_filter_2)


oecd_dat_Q <-
  oecd_dat_Q_1_0 %>%
  bind_rows(oecd_dat_Q_2_0) |>
  transmute(
    time = yq(TIME_PERIOD),
    geo = as_factor(countrycode(REF_AREA, "iso3c", "eurostat", nomatch = NULL)),
    na_item = fct_recode(TRANSACTION, !!!c(qna_subjects_1, qna_subjects_2)),
    unit = fct_recode(PRICE_BASE, !!!qna_measures),
    currency = as_factor(CURRENCY),
    values = as.numeric(ObsValue)) %>%
  filter(!(na_item == "B11" & unit == "CLV_MNAC")) |>  # constant p balance not needed
  filter(currency != "_Z") |>     # drop index version
  droplevels()


usethis::use_data(oecd_dat_Q, overwrite = TRUE)
# usethis::use_data(oecd_dat_Q_old, overwrite = TRUE)




## Annual national accounts

# search_dataset("SNA", data = dataset_list) %>% View()

# dataset_list %>% filter(grepl("SNA", id)) %>% View()

# sna1_str <- get_data_structure("SNA_TABLE1")
# # sna6_str <- get_data_structure("SNA_TABLE6")
# sna6a_str <- get_data_structure("SNA_TABLE6A")
# sna7a_str <- get_data_structure("SNA_TABLE7A")

# sna1_str$TRANSACT %>% knitr::kable()
# sna6a_str$TRANSACT %>% knitr::kable()
# sna7a_str$MEASURE %>% knitr::kable()

# Countries
sna_geo <- countrycode(c("US", "JP"), "eurostat", "iso3c")

# Industries
sna_activity <- setNames(names(main_nace_sna_new), main_nace_sna_new)

# Transactions

sna1_transact <- c(
  B1GQ = "B1GQ", #"Gross domestic product",
  # D1 = "D1", #"Compensation of employees, total",
  P6 = "P6", #"Exports of goods and services",
  P61 = "P61", #"Exports of goods",
  P62 = "P62", #"Exports of services",
  B11 = "B11") #"External balance of goods and services"

sna1_2_transact <- c(
  D1 = "D1") #"Compensation of employees, total",


sna6a_transact <- c(
  B1G = "B1G", #"Gross Value added",
  D1 = "D1" #"Compensation of employees, total",
)

sna7a_transact <- c(
  EMP_DC = "EMP", # "Total empoyment",
  SAL_DC = "SAL" #"Employees",
)


# Measeures
sna_measures <-   c(
  CP_MNAC = "V",   # current prices
  CLV15_MNAC = "LR",  # Constant prices
  PYP_MNAC = "Y" # previous year prices
)

sna7a_measures <-   c(
  THS_PER = "PS",   # Persons
  THS_HW = "H",   # Hours
  THS_JOBS = "JB"  # JObs
)


filter_sna1 <- list("A", sna_geo, "", "", sna1_transact, "", "", "", "XDC", sna_measures, "", "")
dat_oecd_sna1_0 <- get_dataset(dataset = "OECD.SDD.NAD,DSD_NAMAIN10@DF_TABLE1_EXPENDITURE,1.0",
                               filter = pRoductivity::make_oecd_filter(filter_sna1))

filter_sna1_2 <- list("A", sna_geo, "", "", sna1_2_transact, "", "_T", "", "XDC", sna_measures["CP_MNAC"], "", "")
dat_oecd_sna1_2_0 <- get_dataset(dataset = "OECD.SDD.NAD,DSD_NAMAIN10@DF_TABLE1_INCOME,1.0",
                                 filter = pRoductivity::make_oecd_filter(filter_sna1_2))

filter_sna6 <- list("A", sna_geo, "", "", sna6a_transact, "", sna_activity, "", "", sna_measures, "", "")
dat_oecd_sna6a_0 <- get_dataset(dataset = "OECD.SDD.NAD,DSD_NAMAIN10@DF_TABLE6,1.0",
                                filter = pRoductivity::make_oecd_filter(filter_sna6))

# dat_oecd_sna7a_0 <- get_dataset(dataset = "SNA_TABLE7A",
#                                 filter = list(sna_geo, sna7a_transact, sna_activity, sna7a_measures))

filter_sna7a <- list("A", sna_geo, "", "", sna7a_transact, "", sna_activity, "", sna7a_measures, "", "", "")
dat_oecd_sna7a_0 <- get_dataset(dataset = "OECD.SDD.NAD,DSD_NAMAIN10@DF_TABLE7,1.0",
                                filter = pRoductivity::make_oecd_filter(filter_sna7a))


dat_oecd_sna1_1 <- dat_oecd_sna1_0 %>%
  transmute(
    time = as.numeric(TIME_PERIOD),
    geo = as_factor(countrycode(REF_AREA, "iso3c", "eurostat", nomatch = NULL)),
    na_item = fct_recode(TRANSACTION, !!!sna1_transact),
    unit = fct_recode(PRICE_BASE, !!!sna_measures),
    currency = as_factor(CURRENCY),
    values = as.numeric(ObsValue))  %>%
  mutate(nace_r2 = "TOTAL")  %>%
  unite(vars, na_item, unit, sep = "__") %>%
  mutate(vars = as_factor(vars)) %>%
  spread(vars, values) %>%
  filter(time >= start_year) %>%
  droplevels() %>%
  complete(time, geo)

dat_oecd_sna1_2 <- dat_oecd_sna1_2_0 %>%
  transmute(
    time = as.numeric(TIME_PERIOD),
    geo = as_factor(countrycode(REF_AREA, "iso3c", "eurostat", nomatch = NULL)),
    na_item = fct_recode(TRANSACTION, !!!sna1_2_transact),
    unit = fct_recode(PRICE_BASE, !!!sna_measures["CP_MNAC"]),
    currency = as_factor(CURRENCY),
    values = as.numeric(ObsValue))  %>%
  mutate(nace_r2 = "TOTAL")  %>%
  unite(vars, na_item, unit, sep = "__") %>%
  mutate(vars = as_factor(vars)) %>%
  spread(vars, values) %>%
  filter(time >= start_year) %>%
  droplevels() %>%
  complete(time, geo)

dat_oecd_sna6a <- dat_oecd_sna6a_0 %>%
  transmute(
    time = as.numeric(TIME_PERIOD),
    geo = as_factor(countrycode(REF_AREA, "iso3c", "eurostat", nomatch = NULL)),
    nace_r2 = fct_recode(ACTIVITY, !!!sna_activity),
    na_item = fct_recode(TRANSACTION, !!!sna6a_transact),
    unit = fct_recode(PRICE_BASE, !!!sna_measures),
    currency = as_factor(CURRENCY),
    values = as.numeric(ObsValue))  %>%
  unite(vars, na_item, unit, sep = "__") %>%
  mutate(vars = as_factor(vars)) %>%
  spread(vars, values)

dat_oecd_sna7a <- dat_oecd_sna7a_0 %>%
  transmute(
    time = as.numeric(TIME_PERIOD),
    geo = as_factor(countrycode(REF_AREA, "iso3c", "eurostat", nomatch = NULL)),
    nace_r2 = fct_recode(ACTIVITY, !!!sna_activity),
    na_item = fct_recode(TRANSACTION, !!!sna7a_transact),
    unit = fct_recode(UNIT_MEASURE, !!!sna7a_measures),
    values = as.numeric(ObsValue))  %>%
  unite(vars, na_item, unit, sep = "__") %>%
  mutate(vars = as_factor(vars)) %>%
  spread(vars, values) |>
  mutate(SAL_DC__THS_HW = 1000 * SAL_DC__THS_HW,
         EMP_DC__THS_HW = 1000 * EMP_DC__THS_HW)


dat_oecd_sna_nace <-
  dat_oecd_sna6a %>%
  left_join(dat_oecd_sna7a, by = c("time", "geo", "nace_r2")) %>%
  filter(time >= start_year) %>%
  complete(time, geo, nace_r2)


dat_oecd_sna_nace_imput <-
  dat_oecd_sna_nace %>%
  # Extrapolate emp based on jobs
  nest(data = c(-geo, -nace_r2)) %>%
  mutate(emp_mod = map(data, possibly(~lm(EMP_DC__THS_PER ~ EMP_DC__THS_JOBS, data = .x), otherwise = NA_real_)),
         emp_approx = map2(emp_mod, data, possibly(~predict(.x, .y), otherwise = NA_real_)),
         sal_mod = map(data, possibly(~lm(SAL_DC__THS_PER ~ SAL_DC__THS_JOBS, data = .x), otherwise = NA_real_)),
         sal_approx = map2(sal_mod, data, possibly(~predict(.x, .y), otherwise = NA_real_))) %>%
  select(-emp_mod, -sal_mod) %>%
  unnest(cols = c(data, emp_approx, sal_approx)) %>%
  mutate(EMP_DC__THS_PER = coalesce(EMP_DC__THS_PER, emp_approx),
         SAL_DC__THS_PER = coalesce(SAL_DC__THS_PER, sal_approx)) %>%
  select(-emp_approx, -sal_approx) %>%
  # Approx EMP_DC__THS_HW base on EMP_DC__THS_PER and SAL_DC__THS_HW and SAL_DC__THS_PER
  mutate(EMP_DC__THS_HW = if_else(is.na(EMP_DC__THS_HW), EMP_DC__THS_PER * SAL_DC__THS_HW / SAL_DC__THS_PER, EMP_DC__THS_HW)) %>%
  # drop M and N for Japan for missing
  filter(!(geo == "JP" & nace_r2 %in% c("M", "N")))

# visdat::vis_dat(dat_oecd_sna_nace)

data_oecd_sna_nace_a <-
  dat_oecd_sna_nace_imput %>%
  mutate_at(vars(matches("CP_MNAC")),
            .funs = list(EUR = ~EUR(., time, currency, exh_eur_a))) %>%
  rename_at(vars(contains("MNAC_EUR")), list(~gsub("NAC_", "", .))) %>%
  filter(time >= a_start_time)

data_oecd_sna_a <-
  dat_oecd_sna1_1 %>%
  left_join(dat_oecd_sna1_2, by = c("time", "geo", "currency", "nace_r2")) %>%
  mutate_at(vars(matches("CP_MNAC")),
            .funs = list(EUR = ~EUR(., time, currency, exh_eur_a))) %>%
  rename_at(vars(contains("MNAC_EUR")), list(~gsub("NAC_", "", .))) %>%
  filter(time >= a_start_time)



# data_oecd_sna_a %>%
#   filter(nace_r2 == "TOTAL") %>%
#   select(geo, time, starts_with("D1")) %>%
#   gather(vars, values, -geo, -time) %>%
#   ggplot(aes(time, values, colour = vars)) +
#   facet_wrap(~geo, scales = "free") +
#   geom_line()


usethis::use_data(data_oecd_sna_nace_a, data_oecd_sna_a, overwrite = TRUE)
pttry/ficomp documentation built on June 11, 2024, 9:53 p.m.