R/getFundsSummary.R

#' Get summary on available funds.
#'
#' Get fund ID, human-readable name and available dates for all funds.
#'
#' @export
#' @import dplyr stringr
#'
#' @param fundsNav data frame of funds data as returned by
#'   \code{\link{getFundData}}.
#'
#' @return Data frame with fund ID, fund name, start and end period of
#'   observations.
getFundsSummary <- function(fundsNav, includeType = TRUE) {
  # fundsNav <- getFundData(file = "fund-data-2016-12-26.RData",
  #                         dir = "../PensionFundsLvApp/data")
  THRESHOLD <- 10 # Threshold: if date if further away than this number of days
                  # from other observations, it is ignored.

  # Extract name and corresponing ID
  fundsNav %>%
    select(ID, IP.name) %>%
    distinct() %>%
    rename(id = ID, name = IP.name) ->
    idName

  # Find start and end dates
  sapply(idName$id, function(fundId) {
    fundsNav %>%
      filter(ID == fundId) %>%
      select(Calculation.date) %>%
      rename(date = Calculation.date) %>%
      arrange(date) ->
      fundDates

    # Looking at the first and last observations is not enough, because some
    # funds has some weird dates that are more than half a year appart from the
    # last miningful observation

    # First 20 observations
    datesHead <- head(fundDates, 20)[, 1]
    # Last 20 observations
    datesTail <- tail(fundDates, 20)[, 1]

    # Identify start
    start <- datesHead[1]
    for (idx in 2:20) {
      if (abs(datesHead[idx] - datesHead[idx - 1]) > THRESHOLD) {
        start <- datesHead[idx]
      }
    }
    # Identify end
    end <- datesTail[20]
    for (idx in 19:1) {
      if (abs(datesTail[idx] - datesTail[idx + 1]) > THRESHOLD) {
        end <- datesTail[idx]
      }
    }

    return(data.frame(id = fundId,
                      start = start,
                      end = end))
  }, simplify = FALSE) %>%
    data.table::rbindlist() %>%
    as.data.frame() ->
    startEndDates

  # Join
  idName %>%
    left_join(startEndDates) ->
    joined

  if (includeType) {
    # Add fund type information extracted by getFundsDescription function
    description <- getFundsDescription()
    summary <- joined

    # Names are in slightly different formats, therfore, create a column that
    # consists only of alphanumeric characters for joining the tables
    description %>%
      mutate(nameAlphanum = stringr::str_replace_all(description$name,
                                                     "[^[:alnum:]]",
                                                     "")) %>%
      select(nameAlphanum, type) ->
      description

    summary %>%
      mutate(nameAlphanum = stringr::str_replace_all(summary$name,
                                                     "[^[:alnum:]]",
                                                     "")) ->
      summary

    # Join on this column
    left_join(summary, description, by = c("nameAlphanum")) %>%
      select(-nameAlphanum) ->
      joined
  }

  return(joined)
}
nickto/PensionFundsLv documentation built on May 23, 2019, 5:08 p.m.