R/combine_netnew.R

Defines functions combine_netnew

Documented in combine_netnew

#' Add NET_NEW to dataset (orginally in achafetz/PartnerProgress)
#'
#' @param df dataframe to create net new based off of and append onto
#' @param archived_msd_filepath if importing FY16Q4 data, identify the folder path where archived file with FY15-16 data sits
#'
#' @export
#'
#' @importFrom dplyr %>%
#'
#' @examples
#' \dontrun{
#' df_msd <- combine_netnew(df_msd, "~/ICPI/Data")}
#'
#'
combine_netnew <- function(df, archived_msd_filepath = NULL){

  .Deprecated(msg = "Given the inclusion of TX_NET_NEW in the FY18Q3 MSD, `combine_netnew()` is deprecated.")

  #store column names (to work for both lower case and camel case) & then covert to lowercase
    headers_orig <- names(df)
    df <- dplyr::rename_all(df, ~ tolower(.))

  #save column names/order for binding at end
    msd_order <- names(df)

  #if merging in FY16Q4 data to ensure FY17cum TX_NET_NEW correctness
    if(!is.null(archived_msd_filepath)){
      df <- import_oldtx(df, archived_msd_filepath) %>%
        dplyr::rename_all(~ tolower(.))
    }

  #keep TX_CURR to create net_new off of
    df_tx <- df %>%
      dplyr::filter(indicator == "TX_CURR")

  #ensure coarsedisggregate is a character for grouping
    df_tx <- df_tx %>%
       dplyr::mutate(coarsedisaggregate = as.character(coarsedisaggregate))

  #create net new values for results and targets
    df_nn_result <- gen_netnew(df_tx, type = "result")
    df_nn_target <- gen_netnew(df_tx, type = "target")

  #create new new for apr by aggregating results data
    df_nn_apr <- df_nn_result %>%
      #reshape long so years can be aggregated together
      tidyr::gather(pd, val, dplyr::starts_with("fy2")) %>%
      #remove period, leaving just year to be aggregated together
      dplyr::mutate(pd = stringr::str_remove(pd, "q[:digit:]"),
                    pd = as.character(pd)) %>%
      #aggregate
      dplyr::group_by_if(is.character) %>%
      dplyr::summarise(val = sum(val, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%
      #rename year with apr to match structured dataset & replace 0's
      dplyr::mutate(pd = paste0(pd, "apr"),
                    val = ifelse(val==0, NA, val)) %>%
      #reshape wide to match MSD
      tidyr::spread(pd, val)

  #change to fy20XXcum if current pd is not APR (Q4)
    curr_qtr <- ICPIutilities::identifypd(df, pd_type = "quarter")
    if(curr_qtr < 4){
      curr_yr <- ICPIutilities::identifypd(df, pd_type = "year")
      curr_cum <- paste0("fy", curr_yr , "cum")
      curr_apr <- paste0("fy", curr_yr, "apr")
      df_nn_apr <- df_nn_apr %>%
        dplyr::rename(!!curr_cum := !!curr_apr)
    }

  #join all net new pds/targets/apr together
    join_vars <- df %>%
      dplyr::select(-dplyr::starts_with("fy")) %>%
      names()
    df_combo <- dplyr::full_join(df_nn_result, df_nn_target, by = join_vars)
    df_combo <- dplyr::full_join(df_combo, df_nn_apr, by = join_vars)

  #add dropped values back in and reoder to append onto original dataframe
    df_combo <- df_combo %>%
      dplyr::select(msd_order)

  #append TX_NET_NEW onto main dataframe
    df <- dplyr::bind_rows(df, df_combo) %>%
      dplyr::select(msd_order) #remove FY16Q4

  #reapply original variable casing type
    names(df) <- headers_orig

  return(df)

}



#' Create Net New Variable
#'
#' @param df data frame to use
#' @param type either result or target, default = result
#'
#' @importFrom dplyr %>%
#'
#' @examples
#' \dontrun{
#' df_mds_results <- gen_netnew(df_mds, type = "result")}

gen_netnew <- function(df, type = "result"){

  #for results, only want to keep quarterly data; for targets, calc off targets and priod q4
    if(type == "result") {
      df_nn <- df %>%
        dplyr::select(-dplyr::ends_with("targets"))
    } else {
      df_nn <- df %>%
        dplyr::select(-dplyr::ends_with("q1"), -dplyr::ends_with("q2"), -dplyr::ends_with("q3"))

    }

  #aggregate so only one line per mech/geo/disagg
    df_nn <- df_nn %>%
      #remove uids that different between targets/results and no need for apr value
      dplyr::select(-dplyr::ends_with("apr"), -dplyr::ends_with("cum")) %>%
      #aggregate all quartertly data
      dplyr::group_by_if(is.character) %>%
      dplyr::summarize_at(dplyr::vars(dplyr::starts_with("fy2")), ~ sum(., na.rm = TRUE)) %>%
      dplyr::ungroup()

  #reshape long to subtract prior pd (keeping full set of pds to ensure nn = pd - pd_lag.1)
    df_nn <- df_nn %>%
      tidyr::gather(pd, val, dplyr::starts_with("fy2"), factor_key = TRUE) %>%
      #fill all NAs with zero so net new can be calculated
      dplyr::mutate(val = ifelse(is.na(val), 0, val))

  #create new new variables
    df_nn <- df_nn %>%
      #group by all meta data and then order by period within each group
      dplyr::group_by_if(is.character) %>%
      dplyr::arrange(pd) %>%
      dplyr::mutate(netnew = val - dplyr::lag(val)) %>%
      dplyr::ungroup() %>%
      #replace all 0's with NA and change ind name from TX_CURR to TX_NET_NEW
      dplyr::mutate(netnew = ifelse(netnew==0, NA, netnew),
             indicator = "TX_NET_NEW") %>%
      #remove val since just need net new
      dplyr::select(-val) %>%
      #reshape wide to bind back onto main data frame
      tidyr::spread(pd, netnew)

  #remove Q4 for targets since just needed for target calc and q4 net new here is meaningless/wrong
    if(type == "target"){
      df_nn <- df_nn %>%
        dplyr::select(-dplyr::ends_with("q4"))
    }

    return(df_nn)
}
ICPI/ICPIutilities documentation built on Aug. 30, 2021, 12:19 p.m.