Updating USGDPpresidents

knitr::opts_chunk$set(echo = TRUE)

Purpose of this document

This document describes the process for updating USGDPpresidents {Ecdat}.

Is there new data?

Start by checking the span of years in USGDPpresidents:

library(Ecdat)
(rngYrs <- range(USGDPpresidents$Year))

Next download "GDP - US" and "CPI - US" from Measuring Worth. On 2019-04-06 this produced two csv files, which I downloaded and copied into the working directory:

getwd()
(csv2 <- dir(pattern='\\.csv$'))
if(length(csv2)==2){
  Update0 <- TRUE
} else Update0 <- FALSE

The following assumes there are only 2 csv files in the working directory at this time, with the CPI being the first (in lexicographical order). If this is not correct, something needs to change for the following to work properly.

Read these two files:

Update <- FALSE
if(Update0){
  str(USCPI <- read.csv(csv2[1], skip=2))
  str(USGDP. <- read.csv(csv2[2], skip=1))
  library(Ecfun)
  USGDP <- asNumericDF(USGDP.)
  print(rngCPIyrs <- range(USCPI$Year) )
  print(rngGDPyrs <- range(USGDP$Year) )
  endYr <- max(rngCPIyrs, rngGDPyrs)
  if(endYr>rngYrs[2]) print(Update <- TRUE)
}

Update

If Update, create a local copy of USGDPpresidents with the additional rows required to hold the new data:

if(Update){
  rowsNeeded <- (endYr - rngYrs[2])
  Nold <- nrow(USGDPpresidents)
  iRep <- c(1:Nold, rep(Nold, rowsNeeded))
  USGDPp2 <- USGDPpresidents[iRep,]
}

Fix the Year and insert NAs for all other columns for the new rows:

if(Update){
  iNew <- (Nold+(1:rowsNeeded))
  USGDPp2$Year[iNew] <- ((rngYrs[2]+1):endYr)
  rownames(USGDPp2) <- USGDPp2$Year
#
  USGDPp2[iNew, -1] <- NA
}

Now replace CPI by the new numbers:

if(Update){
  selCPI <- (USGDPp2$Year %in% USCPI$Year)
  if(any(!is.na(USGDPp2[!selCPI, 2]))){
    stop('ERROR:  There are CPI numbers ', 
         'in the current USGDPpresidents ', 
         'that are not in the new.  ', 
         'Manual review required.')
  }
  USGDPp2$CPI[selCPI] <- USCPI[,2]
}

Does USGDPpresidents.Rd needs to be updated to reflect the proper reference years for the CPI?

if(Update){
  readLines(csv2[1], n=4)
}

If this says "Average 1982-84 = 100", it should be good. Otherwise that (and this) should be updated.

Now let's update GDPdeflator:

if(Update){
  selGDP <- (USGDPp2$Year %in% USGDP$Year)
#
  if(any(!is.na(USGDPp2[!selGDP, 'GDPdeflator']))){
    stop('ERROR:  There are GDPdeflator numbers ', 
         'in the current USGDPpresidents ', 
         'that are not in the new.  ', 
         'Manual review required.')
  }
  selDefl <- grep('Deflator', names(USGDP))
  USGDPp2$GDPdeflator[selGDP] <- USGDP[,selDefl]
  print(names(USGDP)[selDefl])
}

Compare the index year of "GDP.Deflator" with that in USGDPpresidents.Rd: If they are different, fix USGDPpresidents.Rd.

Now update population:

if(Update){
  selPop <- grep('Population', names(USGDP))
  USGDPp2$population.K[selGDP] <- USGDP[,selPop]
  print(names(USGDP)[selPop])
}

Now realGDPperCapita. This also has a reference year, so we need to make sure we get them all:

if(Update){
  if(any(!is.na(USGDPp2[!selGDP, 'readGDPperCapita']))){
    stop('ERROR:  There are realGDPperCapita numbers ', 
         'in the current USGDPpresidents ', 
         'that are not in the new.  ', 
         'Manual review required.')
  }
  selGDPperC <- grep('Real.GDP.per.c', names(USGDP))
  USGDPp2$realGDPperCapita[selGDP] <- USGDP[,selGDPperC]
  print(names(USGDP)[selGDPperC])
}

Compare the index year of 'Real.GDP.per.capita' with that in USGDPpresidents.Rd: If they are different, fix USGDPpresidents.Rd.

Next: executive:

if(Update){
  exec <- as.character(USGDPp2$executive)
  exec[is.na(exec)] <- c('Obama', 'Trump', 'Trump')
  lvlexec <- c(levels(USGDPp2$executive), 
               'Trump')
  USGDPp2$executive <- ordered(exec, lvlexec)
}

Similarly: war

if(Update){
  war <- as.character(USGDPp2$war)
  war[is.na(war)] <- ''
  lvlwar <- levels(USGDPp2$war)
  USGDPp2$war <- ordered(war, lvlwar)
}

Next: battleDeaths and battleDeathsPMP:

if(Update){
  USGDPp2$battleDeaths[iNew] <- 0 
#
  USGDPp2$battleDeathsPMP <- with(USGDPp2, 
          1000*battleDeaths/population.K) 
}

Keynes?

if(Update){
  USGDPp2$Keynes[iNew] <- 0 
}

Unemployment?

Unemployment figures came from different sources for different years. Since 1940 the source has been the Bureau of Labor Statistics (BLS), series LNS14000000 from the Curent Population Survey. These data are available as a monthly series from the Current Population Survey of the Bureau of Labor Statistics. Download the most recent years as an Excel file, compute row averages, and manually transfer the numbers for the most recent years here:

if(Update){
  USGDPp2$unemployment[iNew] <- c(4.875, 
                    4.35, 3.89166666666667)
  USGDPp2$unempSource[iNew] <- USGDPp2$unempSource[
    iNew[1]-1]
  tail(USGDPp2)
}

Done: Save

if(Update){
  USGDPpresidents <- USGDPp2
  save(USGDPpresidents, file='USGDPpresidents.rda')
}

Now copy this file from the current working directory to ~Ecdat\data, overwriting the previous version.



Try the Ecfun package in your browser

Any scripts or data that you put into this service are public.

Ecfun documentation built on May 2, 2019, 6:53 p.m.