R/getCleanData.R

#' Clean the funds and risk-free rate data.
#'
#' \code{getCleanData} cleans the fund and risk-free data, merges it, returns
#' only the important columns. Returns daily data (daily return).
#'
#' @import xts dplyr
#' @export
#'
#' @param fundsNav data frame with fund data as returned by getFundData (or any
#'   other data frame that can be passed to computeReturnFromNav() function).
#' @param rf data frame wirh risk-free rates as returned by getRfData function.
#' @param frequency string, one of c("days", "weeks", "months", "quarters",
#'   "years")
#'
#' @return An object of class FundData, which inherits from data.frame. It has
#'   id, date, fund nav, fund return and risk-free rate at a specified
#'   frequency. Returns are same as frequence (e.g. daily for dailty frequence,
#'   monthly for monthly frequency). Frequency is stored in the \code{frequency}
#'   attribute.
getCleanData <- function(fundsNav, rf, frequency = "days") {
  # Error handling
  if (!(frequency %in% c("days", "weeks", "months", "quarters", "years"))) {
    stop("Wrong frequency.")
  }

  N_DAYS <- 252
  N_WEEKS <- 52
  N_MONTHS <- 12
  N_QUARTERS <- 4
  N_YEARS <- 1

  # fundsNav <- getFundData()
  # rf <- getRfData()


  # Select only date, fund id and NAV
  #
  # Date.in.force is unique, so join on it, not Calculation.date
  fundsNav %>%
    select(Date.in.force, ID, NAV.value.EUR) %>%
    rename(date = Date.in.force, id = ID, nav = NAV.value.EUR) %>%
    full_join(rf, by = "date") %>%
    mutate(rf = rf) ->
    joined

  # Split by fund
  lapply(split(joined, joined$id), function(x) {
    currentId <- x$id[1] # take the first one, they are anyway the same

    # Convert to xts object
    xtsObj <- xts::xts(select(x, -date), order.by = x$date)

    # Fill NA with last non-NA
    xtsObj <- xts:::na.locf.xts(xtsObj)

    # Split by required period (frequency) and skip if frequency == "days"
    if (frequency != "days") {
      lapply(xts::split.xts(xtsObj, frequency), function(x) {
        data.frame(date = xts::last(xts:::index.xts(x)),
                   id = currentId,
                   nav = xts::last(x$nav),
                   rf = last(x$rf))
      }) %>%
        data.table::rbindlist() %>%
        as.data.frame() %>%
        arrange(date) -> # need this to ensure propper functioning lag() below
        fundNav
    } else {
      xtsObj %>%
        as.data.frame() %>%
        mutate(date = rownames(.)) %>%
        select(date, id, nav, rf) ->
        fundNav
    }

    # Compute returns
    fundNav %>%
      mutate(ret = nav / lag(nav) - 1) ->
      fundNav

    return(fundNav)
  }) %>%
    data.table::rbindlist() %>%
    as.data.frame() ->
    cleanData

  # Convert date to date type which is lost somewhere along the multiple
  # transformations
  cleanData$date <- as.Date(cleanData$date)

  # Convert rf to the same frequency as the returns on funds. Since it is
  # nominal (and in percents), we convert it to effective
  if (frequency == "days") {
    cleanData$rf <- exp(log( 1 + cleanData$rf / 100) / N_DAYS) - 1
  } else if (frequency == "weeks") {
    cleanData$rf <- exp(log( 1 + cleanData$rf / 100) / N_WEEKS) - 1
  } else if (frequency == "months") {
    cleanData$rf <- exp(log( 1 + cleanData$rf / 100) / N_MONTHS) - 1
  } else if (frequency == "quarters") {
    cleanData$rf <- exp(log( 1 + cleanData$rf / 100) / N_QUARTERS) - 1
  } else if (frequency == "years") {
    cleanData$rf <- cleanData$rf / 100
  }

  return(structure(cleanData,
                   frequency = frequency,
                   class = c("FundData", class(cleanData))))
}
nickto/PensionFundsLv documentation built on May 23, 2019, 5:08 p.m.