General

STAN combines time series from National Accounts by economic activity. The estimation methodology consists of three major components:

Data Requirements

The data needs to meet the following requirements:

Data for estimation

## load packages and data
require(stan)
require(stanData)
require(ggplot2)
data(STANNAi0)
data(STANNAi3)
data(STANNAi4)
data(stanDim)
## prepare data: extend
input.extend <- list(cou="AUT",
                     var="VALU",
                     ## ind="CTOTAL",
                     year=c(1995:2012))

df.extend1 <- subset(DATA.STANi3,
             cou%in%input.extend$cou &
             var%in%input.extend$var &
              ind%in%c("CTOTAL") &
              ## ind%in%c("CTOTAL, C15T37") &
              year%in%input.extend$year)
df.extend1[["sou"]] <- "STANandBTDi3"

df.extend2 <- subset(DATA.STANi4,
             cou%in%input.extend$cou &
             var%in%input.extend$var &
              ind%in%c("DTOTAL") &
              ## ind%in%c("DTOTAL", "D10T33") &
              year%in%input.extend$year)
df.extend2[["sou"]] <- "STANandBTDi4"
df.extend2$ind <- sub("DTOTAL", "CTOTAL", df.extend2$ind)
## df.extend2$ind <- sub("D10T33", "C15T37", df.extend2$ind)

df.extend2.USD <- convertCurrency(data=df.extend2, datacur=DATA.XRATES[DATA.XRATES$var=="EXCH",])

df.extend <- rbind(df.extend1, df.extend2.USD)

## prepare data: detail
input.detail <- list(cou="JPN",
                     var="VALU",
                     ind=c("C15T37", "C36", "C37"),
                     year=c(1995:2012))

## sort(setdiff(unique(df.detail2$ind), unique(df.detail1$ind)))

df.detail1 <- subset(DATA.STANi3,
                     cou%in%input.detail$cou &
                     var%in%input.detail$var &
                     ind%in%input.detail$ind &
                     year%in%input.detail$year)
df.detail1[["sou"]] <- "STANandBTDi3"

df.detail2 <- subset(DATA.INDSTAT32,
                     cou%in%input.detail$cou &
                     var%in%input.detail$var &
                     ind%in%input.detail$ind &
                     year%in%input.detail$year)
df.detail2[["sou"]] <- "INDSTAT32"
df.detail2$value <- df.detail2$value * 10^6

df.detail <- rbind(df.detail1, df.detail2)

Extend

namesou <- c("STANandBTDi3", "STANandBTDi4")
result <- stan::extend(df.extend, namesou=namesou)
result$cou <- input.extend$cou
result$var <- input.extend$var
result$ind <- "CTOTAL"
result$sou <- "NAPATCH"

If the time coverage of our primary data source is insufficient and a secondary source covers for example more recent years, we attempt to extend the main source using the annual growth rate of the secondary source for years missing in the main source.

$$ estim_{t+1} = main_{t} * \frac{sec_{t+1}}{sec_{t}} $$

In the example below, the sources are as follows:

## create figure: extend

result <- subset(result, select = c("cou", "var", "ind", "year", "value", "sou"))

df.extend.plot <- rbind(df.extend, result)

df.extend.plot$sou <- factor(df.extend.plot$sou, levels = c("STANandBTDi3", "STANandBTDi4", "NAPATCH"))

ggplot(data=df.extend.plot, aes(x=year, y=value)) + # , color=factor(sou)
  geom_line(aes(color = factor(sou))) +
    facet_grid(sou ~ .) +
#    guides(color = guide_legend(label.position = "top"))
    theme(legend.position = "top")

The dataset structure for this example looks as follows:

knitr::kable(head(df.extend), row.names=FALSE)

Apply distribution from secondary sources

namesou <- c("STANandBTDi3", "INDSTAT32")
result <- stan::detail(df.detail, namesou=namesou, ind.parent = input.detail$ind[1], ind.peers = input.detail$ind[-1])

result$cou <- input.detail$cou
result$var <- input.detail$var
## result$ind <- "CTOTAL"
result$sou <- "NAPATCH"

In case our data primary source is not sufficiently detailed and additional detail is available in a secondary source, we attempt to apply the distribution from the secondary source to the primary source.

distribution

In the example below, the sources are as follows:

## create figure: detail

result <- subset(result, select = c("cou", "var", "ind", "year", "value", "sou"))

df.detail.plot <- rbind(df.detail, result)
df.detail.plot <- subset(df.detail.plot, ind%in%input.detail$ind)

df.detail.plot$sou <- factor(df.detail.plot$sou, levels = c("STANandBTDi3", "INDSTAT32", "NAPATCH"))

ggplot(data=df.detail.plot, aes(x=year, y=value)) + # , color=factor(sou)
  geom_line(aes(color = factor(sou))) +
  ## facet_grid(sou ~ ind, scales = "free")
  facet_grid(ind ~ sou, scales = "free") +
  theme(legend.position = "top")

In this example we observe two oddities:

The dataset structure for this example looks as follows:

knitr::kable(head(df.detail), row.names=FALSE)

The industries correspond to:

knitr::kable(STANi3.INDLABEL[STANi3.INDLABEL$ind%in%input.detail$ind,], row.names=FALSE)

Adjust

Numbers are adjusted top-down to ensure consitency with the total economy time series according to the hierarchy below:

## create indentet industry list below

indindent <- function(ind, list, char="    ", width=50) {
  nparent <- length(list[[ind]])
  if (nparent > 0)
    paste0(gsub(", ", "", toString(rep(char, nparent))), "- ", ind, "  ", toString(STANi3.INDLABEL$label[STANi3.INDLABEL$ind==ind], width=width-nparent*nchar(char)-4), "\n")
  else return("\n")
}

# indlist <- STANi4.INDALL[STANi4.INDALL%in%c(STANi4.INDA10, STANi4.INDA21, STANi4.INDA38, STANi4.INDA64, STANi4.INDA88)]

text <- unname(sapply(STANi3.INDA60All, indindent, list=STANi3.HIERARCHYINV))

cat(text)


bowerth/stan documentation built on May 13, 2019, 12:38 a.m.