R/hfr_fix_date.R

Defines functions hfr_round_date hfr_identify_freq hfr_fix_date

Documented in hfr_fix_date hfr_identify_freq hfr_round_date

#' Convert dates to date format
#'
#' The submission templates do not have date validations stored in the Excel
#' files, so there can be a number of date types submitted that we attempt to
#' account for outside of the normal, ISO format of YYYY-MM-DD. This function
#' handles each identfied date type as a separate dataframe and then binds them
#' back together. Date formats include - Excel, ISO, character dates. If the
#' round_hfrdate param is TRUE, the function rounds to the start of the week or
#' month and aggregates any mid-week/month submission.
#'
#' @param df HFR data frame imported via `hfr_import()`
#' @param round_hfrdate rounds date to the nearest HFRweek start (for non-compliance), default = FALSE
#'
#' @export

hfr_fix_date <- function(df, round_hfrdate = FALSE){

  #adjust Excel formatted dates
    df_date_excel <- df %>%
      dplyr::filter(stringr::str_detect(date, "^[:digit:]{5}")) %>%
      dplyr::mutate(date = as.Date(as.numeric(date), origin = "1899-12-30"))

  #adjust ISO formatted dates
    df_date_iso <- df %>%
      dplyr::filter(stringr::str_detect(date,  "^[:digit:]{4}-")) %>%
      dplyr::mutate(date = as.Date(date))

  #adjust character date, assuming format is mdy
    df_date_mdy <- df %>%
      dplyr::filter(stringr::str_detect(date,  "^[:digit:]{2}(-|/)")) %>%
      dplyr::mutate(date = lubridate::mdy(date))

  #replace other dates as NA
    df_date_other <- df %>%
      dplyr::filter(stringr::str_detect(date, "^[:digit:]{4}-|^[:digit:]{5}|^[:digit:]{2}(-|/)", negate = TRUE)) %>%
      dplyr::mutate(date = as.Date(NA))

  #bind all types together
    df_date_fixed <- dplyr::bind_rows(df_date_excel, df_date_iso, df_date_mdy, df_date_other)

  #identify reporting frequency
    df_date_fixed <- hfr_identify_freq(df_date_fixed)

  #round date (fixes non-compliance)
   if(round_hfrdate == TRUE)
     df_date_fixed <- hfr_round_date(df_date_fixed)

  return(df_date_fixed)
}


#' Identify reporting frequency
#'
#' Flags the frequency of reporting, adding this to the data frame for tracking
#' and for use if rounding and aggregating.
#'
#' @param df HFR data frame from `hfr_fix_date()`
#'
#' @export

hfr_identify_freq <- function(df){

  if(var_exists("val")){

    if(curr_fy > 2020){
      #tally weeks of reporting by orgunituid x mech_code x indicator)
      df_week_tally <- df %>%
        dplyr::filter(!is.na(val)) %>%
        dplyr::distinct(date, orgunituid, mech_code, indicator) %>%
        dplyr::count(orgunituid, mech_code, indicator, name = "dates_reported")

      #identify period type for mapping back on df
      df_pd_type <- df_week_tally %>%
        dplyr::mutate(hfr_freq = ifelse(dates_reported == 1, "month", "week")) %>%
        dplyr::select(-dates_reported)

      #merge onto df
      df <- df %>%
        dplyr::left_join(df_pd_type, by = c("orgunituid", "mech_code", "indicator")) %>%
        dplyr::relocate(hfr_freq, .after = date)

    } else {
      #for FY20, assign weekly reporting frequency
      df <- df %>%
        dplyr::mutate(hfr_freq == "week") %>%
        dplyr::relocate(hfr_freq, .after = date)
    }
  }


    return(df)

}


#' Round Date Values
#'
#' Round values to nearest HFR date
#'
#' @param df df HFR data frame imported via `hfr_import()`
#'
#' @export

hfr_round_date <- function(df){

    #round weekly data to Monday (down for Tues-Sat, up for Sun)
      df_wk <- df %>%
        dplyr::filter(hfr_freq == "week") %>%
        dplyr::mutate(date = dplyr::case_when(lubridate::wday(date) == 1 ~
                                                lubridate::ceiling_date(date, unit = "week",
                                                                        week_start = 1),
                                              lubridate::wday(date)  > 1 ~
                                                lubridate::floor_date(date, unit = "week",
                                                                      week_start = 1)))
    #round monthly/monthly agg (down to 1st of month)
      df_mo <- df %>%
        dplyr::filter(hfr_freq %in% c("month", "month agg")) %>%
        dplyr::mutate(date = lubridate::floor_date(date, unit = "month"))

    #no reporting across pd (will get filtered out later)
      df_na <- df %>%
        dplyr::filter(is.na(hfr_freq))

    #bind full set back together
      df <- dplyr::bind_rows(df_wk, df_mo, df_na)

  return(df)
}
USAID-OHA-SI/Wavelength documentation built on March 24, 2023, 10:07 a.m.