#' Mask MSD, aka Training Dataset
#'
#' @param msd_filepath full file path to the MSD (PSNUxIM) (.rds)
#' @param psnuuids list of PSNU UIDs to select (n must equal 9)
#' @param output_folderpath full folder folder path to where you want to save to (default = NULL, ie `msd_filepath` folder)
#'
#' @export
#' @importFrom magrittr %>%
#'
#' @examples
#' \dontrun{
#' #filepath for MSD (.rds)
#' msd_filepath <- "~/ICPI/Data/MER_Structured_Dataset_PSNU_IM_FY17-18_20181115_v1_2.rds"
#' #supply list of PSNU uids to use (must be 9)
#' psnuuid_list <- c("QlWUt1rBsEo", "GfzVv4oeclF", "PNgOI7VPe98", "TWpkDEvK6si", "aqktQTp0wHa",
#' "XFdvhW8Ga1S", "sVwvt4bYesp", "EJDcY4F1rsj", "nXEQ97b2YHJ")
#' #generate training dataset
#' mask_msd(msd_filepath, psnuuid_list)
#' #save to a training dataset to a different folder than the MSD folder
#' mask_msd(msd_filepath, psnuuid_list, "~/Output")
#' }
mask_msd <- function(msd_filepath, psnuuids, output_folderpath = NULL){
#exit if 9 PSNU UIDS are not supplied
if(length(psnuuids) == 0) stop("No PSNU UID list supplied")
if(length(psnuuids) != 9) stop("Must supply 9 PSNU UIDs.")
#set seed for sampling to ensure same order ever time
set.seed(14)
#change geography data & bind LIST of real PSNUs onto new/masked geography DATA FRAME
df_mapping <- dplyr::bind_cols(psnuuid = psnuuids, planets_geo)
#import MSD
df_mer <- readr::read_rds(msd_filepath)
#store headers for applying ordering at end
order <- names(df_mer)
#keep data only from select PSNUs, bind the masked geography to the real
df_mw <- dplyr::inner_join(df_mer, df_mapping, by = "psnuuid")
rm(df_mer)
#remove the mech identifiers
df_mw <- dplyr::mutate_at(df_mw, dplyr::vars(pre_rgnlztn_hq_mech_code,
prime_partner_duns,
award_number), ~ as.character(NA))
#identify the number of mechanisms create masked ones and bind back on, replacing the originals
mechs <- df_mw %>%
dplyr::distinct(mech_code) %>%
dplyr::arrange(mech_code)
new_mechs <- seq(1200, 1800,1) %>% #mechanism id can fall between 1200 and 1800
sample(nrow(mechs)) %>% #sample n number of mech ids based on n of mechanisms
as.character(.) %>%
dplyr::bind_cols(mechs, mech_code_mw = .) %>% #combine masked id with dedup
dplyr::mutate(mech_code_mw = ifelse(mech_code %in% c("00000", "00001"),
stringr::str_replace(mech_code, "0000", "000"),
mech_code_mw),
mech_code_mw = paste0("0", mech_code_mw))
#add masked partner & mechanism names
partners <- planets_primepartners %>%
dplyr::sample_n(nrow(mechs), replace = TRUE) %>% #sample primeparter and mech names from list
dplyr::bind_cols(new_mechs, .) %>% #bind onto mechanism list
dplyr::mutate(primepartner_mw = ifelse(mech_code %in% c("00000", "00001"), "Dedup", primepartner_mw),
mech_name_mw = ifelse(mech_code %in% c("00000", "00001"), "Dedup", mech_name_mw))
#bind mechanism info onto dataset
df_mw <- dplyr::left_join(df_mw, partners, by = "mech_code")
#adjust site level if applicable
df_mw <- mask_sites(df_mw)
#remove the original hiearchy/partner data and rename the variables and reorder
lst_unmaskedvar <- df_mw %>%
dplyr::select(dplyr::ends_with("_mw")) %>%
names() %>%
stringr::str_remove("_mw")
df_mw <- df_mw %>%
dplyr::select(-all_of(lst_unmaskedvar)) %>%
dplyr::rename_all(~ stringr::str_remove(., "_mw")) %>%
dplyr::select(order)
#adjust values, 60% of value & rounding to the nearest 10 and then remove zeros
df_mw <- df_mw %>%
dplyr::mutate_if(is.double, ~ 10*ceiling((.*0.60)/10)) %>%
dplyr::mutate_if(is.double, ~ ifelse(. == 0, NA, .))
#remove row with no data
df_mw <- dplyr::filter_at(df_mw, dplyr::vars(cumulative, targets), dplyr::any_vars(!is.na(.)))
#export
output_filename <- msd_filepath %>%
basename() %>%
stringr::str_replace("MER_Structured_Dataset", "MER_Structured_TRAINING_Dataset") %>%
stringr::str_replace("rds", "txt")
if(is.null(output_folderpath)) {
output_folderpath <- file.path(dirname(msd_filepath), output_filename)
} else {
output_folderpath <- file.path(output_folderpath, output_filename)
}
readr::write_tsv(df_mw, output_folderpath, na = "")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.