STAN combines time series from National Accounts by economic activity. The estimation methodology consists of three major components:
The data needs to meet the following requirements:
cou
, var
, ind
, year
, value
, sou
(order does not matter)## load packages and data require(stan) require(stanData) require(ggplot2) data(STANNAi0) data(STANNAi3) data(STANNAi4) data(stanDim)
## prepare data: extend input.extend <- list(cou="AUT", var="VALU", ## ind="CTOTAL", year=c(1995:2012)) df.extend1 <- subset(DATA.STANi3, cou%in%input.extend$cou & var%in%input.extend$var & ind%in%c("CTOTAL") & ## ind%in%c("CTOTAL, C15T37") & year%in%input.extend$year) df.extend1[["sou"]] <- "STANandBTDi3" df.extend2 <- subset(DATA.STANi4, cou%in%input.extend$cou & var%in%input.extend$var & ind%in%c("DTOTAL") & ## ind%in%c("DTOTAL", "D10T33") & year%in%input.extend$year) df.extend2[["sou"]] <- "STANandBTDi4" df.extend2$ind <- sub("DTOTAL", "CTOTAL", df.extend2$ind) ## df.extend2$ind <- sub("D10T33", "C15T37", df.extend2$ind) df.extend2.USD <- convertCurrency(data=df.extend2, datacur=DATA.XRATES[DATA.XRATES$var=="EXCH",]) df.extend <- rbind(df.extend1, df.extend2.USD) ## prepare data: detail input.detail <- list(cou="JPN", var="VALU", ind=c("C15T37", "C36", "C37"), year=c(1995:2012)) ## sort(setdiff(unique(df.detail2$ind), unique(df.detail1$ind))) df.detail1 <- subset(DATA.STANi3, cou%in%input.detail$cou & var%in%input.detail$var & ind%in%input.detail$ind & year%in%input.detail$year) df.detail1[["sou"]] <- "STANandBTDi3" df.detail2 <- subset(DATA.INDSTAT32, cou%in%input.detail$cou & var%in%input.detail$var & ind%in%input.detail$ind & year%in%input.detail$year) df.detail2[["sou"]] <- "INDSTAT32" df.detail2$value <- df.detail2$value * 10^6 df.detail <- rbind(df.detail1, df.detail2)
namesou <- c("STANandBTDi3", "STANandBTDi4") result <- stan::extend(df.extend, namesou=namesou) result$cou <- input.extend$cou result$var <- input.extend$var result$ind <- "CTOTAL" result$sou <- "NAPATCH"
If the time coverage of our primary data source is insufficient and a secondary source covers for example more recent years, we attempt to extend the main source using the annual growth rate of the secondary source for years missing in the main source.
$$ estim_{t+1} = main_{t} * \frac{sec_{t+1}}{sec_{t}} $$
In the example below, the sources are as follows:
r namesou[1]
r namesou[2]
r result$sou[1]
## create figure: extend result <- subset(result, select = c("cou", "var", "ind", "year", "value", "sou")) df.extend.plot <- rbind(df.extend, result) df.extend.plot$sou <- factor(df.extend.plot$sou, levels = c("STANandBTDi3", "STANandBTDi4", "NAPATCH")) ggplot(data=df.extend.plot, aes(x=year, y=value)) + # , color=factor(sou) geom_line(aes(color = factor(sou))) + facet_grid(sou ~ .) + # guides(color = guide_legend(label.position = "top")) theme(legend.position = "top")
The dataset structure for this example looks as follows:
knitr::kable(head(df.extend), row.names=FALSE)
namesou <- c("STANandBTDi3", "INDSTAT32") result <- stan::detail(df.detail, namesou=namesou, ind.parent = input.detail$ind[1], ind.peers = input.detail$ind[-1]) result$cou <- input.detail$cou result$var <- input.detail$var ## result$ind <- "CTOTAL" result$sou <- "NAPATCH"
In case our data primary source is not sufficiently detailed and additional detail is available in a secondary source, we attempt to apply the distribution from the secondary source to the primary source.
In the example below, the sources are as follows:
r namesou[1]
r namesou[2]
r result$sou[1]
## create figure: detail result <- subset(result, select = c("cou", "var", "ind", "year", "value", "sou")) df.detail.plot <- rbind(df.detail, result) df.detail.plot <- subset(df.detail.plot, ind%in%input.detail$ind) df.detail.plot$sou <- factor(df.detail.plot$sou, levels = c("STANandBTDi3", "INDSTAT32", "NAPATCH")) ggplot(data=df.detail.plot, aes(x=year, y=value)) + # , color=factor(sou) geom_line(aes(color = factor(sou))) + ## facet_grid(sou ~ ind, scales = "free") facet_grid(ind ~ sou, scales = "free") + theme(legend.position = "top")
In this example we observe two oddities:
r max(df.detail$year[df.detail$sou==namesou[1]])
, therefore the estimated results don't extend beyondr min(df.detail$year[df.detail$sou==namesou[2] & df.detail$value==0])
-r max(df.detail$year[df.detail$sou==namesou[2] & df.detail$value==0])
and this is transferred to the estimated resultsThe dataset structure for this example looks as follows:
knitr::kable(head(df.detail), row.names=FALSE)
The industries correspond to:
knitr::kable(STANi3.INDLABEL[STANi3.INDLABEL$ind%in%input.detail$ind,], row.names=FALSE)
Numbers are adjusted top-down to ensure consitency with the total economy time series according to the hierarchy below:
## create indentet industry list below indindent <- function(ind, list, char=" ", width=50) { nparent <- length(list[[ind]]) if (nparent > 0) paste0(gsub(", ", "", toString(rep(char, nparent))), "- ", ind, " ", toString(STANi3.INDLABEL$label[STANi3.INDLABEL$ind==ind], width=width-nparent*nchar(char)-4), "\n") else return("\n") } # indlist <- STANi4.INDALL[STANi4.INDALL%in%c(STANi4.INDA10, STANi4.INDA21, STANi4.INDA38, STANi4.INDA64, STANi4.INDA88)] text <- unname(sapply(STANi3.INDA60All, indindent, list=STANi3.HIERARCHYINV)) cat(text)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.