R/cbs_add_date_column.R

Defines functions cbs_add_date_column

Documented in cbs_add_date_column

#' Convert the time variable into either a date or numeric. 
#' 
#' Time periods in data of CBS are coded: yyyyXXww (e.g. 2018JJ00, 2018MM10, 2018KW02),
#' which contains year (yyyy), type (XX) and index (ww). `cbs_add_date_column` converts
#' these codes into a [Date()] or `numeric`. In addition it adds
#' a frequency column denoting the type of the column.
#' @param x `data.frame` retrieved using [cbs_get_data()]
#' 
#' @param date_type Type of date column: "Date", "numeric. Numeric creates a fractional 
#' number which signs the "middle" of the period. e.g. 2018JJ00 -> 2018.5 and 
#' 2018KW01 -> 2018.167. This is for the following reasons: otherwise 2018.0 could mean
#' 2018, 2018 Q1 or 2018 Jan, and furthermore 2018.75 is a bit strange for 2018 Q4. 
#' If all codes in the dataset have frequency "Y" the numeric output will be `integer`.
#' @param ... future use. 
#' @return original dataset with two added columns: `<period>_date` and 
#' `<period>_freq`. This last column is a factor with levels: `Y`, `Q` and `M`
#' @export
#' @example ./example/cbs_add_date_column.R
#' @family data retrieval
#' @family meta data
cbs_add_date_column <- function(x, date_type = c("Date", "numeric"),...){
  # retrieve period column (using timedimension)
  period_name <- names(unlist(sapply(x, attr, "is_time")))
  
  if (!length(period_name)){
    warning("No time dimension found!")
    return(x)
  }
    
  period <- x[[period_name[1]]]
  
  PATTERN <- "(\\d{4})(\\w{2})(\\d{2})"
  
  year   <- as.integer(sub(PATTERN, "\\1", period))
  number <- as.integer(sub(PATTERN, "\\3", period))
  type   <- factor(sub(PATTERN, "\\2", period))
  
  #TODO add switch for begin / middle / period or number
  
  # year conversion
  is_year <- type %in% c("JJ")
  is_quarter <- type %in% c("KW")
  is_month <- type %in% c("MM")
  is_week <- type %in% c("W1")
  is_week_part <- type %in% c("X0")
  
  
  # date
  date_type <- match.arg(date_type)
  
  if (date_type == "Date"){
    period <- as.POSIXct(character())
    period[is_year] <- ISOdate(year, 1, 1, tz="")[is_year]
    period[is_quarter] <- ISOdate(year, 1 + (number - 1) * 3, 1, tz="")[is_quarter]
    period[is_month] <- ISOdate(year, number, 1, tz="")[is_month]
    period[is_week] <- {
       d <- as.Date(paste0(year, "-1-1")) + 7 * (number - 1)
       # a week starts at monday
       wday <- as.integer(format(d, "%u"))
       d <- d + ((7 - wday - 1) %% 7)
       d
      }[is_week]
    period[is_week_part] <- as.Date(paste0(year, "-1-1"))[is_week_part]
    period <- as.Date(period)
  } else if (date_type == "numeric"){
    period <- numeric()
    period[is_year] <- year[is_year] + 0.5
    period[is_quarter] <- (year + (3*(number - 1) + 2) / 12)[is_quarter]
    period[is_month] <- (year + (number - 0.5) / 12)[is_month]
    period[is_week] <- (year + (number - 0.5)/53)[is_week]
    period[is_week_part] <- year
    if (all(is_year)){
      period <- as.integer(period)
    }
  }
  
  type1 <- factor(levels=c("Y","Q", "M", "W", "D", "X"))
  type1[is_year] <- "Y"
  type1[is_quarter] <- "Q"
  type1[is_month] <- "M"
  type1[is_week] <- "W"
  type1[is_week_part] <- "X"
  type1 <- droplevels(type1)
  
  # put the column just behind the period column
  i <- which(names(x) == period_name)
  x <- x[c(1:i, i, i:ncol(x))]
  idx <- c(i+1, i+2)
  x[idx] <- list(period, type1)
  names(x)[idx] <- paste0(period_name, paste0("_", c(date_type,"freq")))
  x
}

# x <- cbs_get_data("81819NED")
# x1 <- cbs_add_date_column(x)
  

Try the cbsodataR package in your browser

Any scripts or data that you put into this service are public.

cbsodataR documentation built on Jan. 13, 2021, 7:30 p.m.