# PROJECT: Wavelength
# AUTHOR: A.Chafetz | USAID
# PURPOSE: resolve blank row known issue
# LICENSE: MIT
# DATE: 2022-06-23
# UPDATED:
# DEPENDENCIES ------------------------------------------------------------
library(tidyverse)
library(glamr)
library(readxl)
library(openxlsx)
library(Wavelength)
library(lubridate)
library(glue)
library(googledrive)
library(googlesheets4)
library(janitor)
library(crayon)
# GLOBAL VARIABLES --------------------------------------------------------
load_secrets()
temp_folder()
gdrive_submissions <- as_sheets_id("1gQvY1KnjreRO3jl2wzuVCKmKjUUgZDwByVK1c-bzpYI")
# RESOLVE BLANK ROW ISSUE -------------------------------------------------
#load table of submissions
df_subm <- read_sheet(gdrive_submissions, .name_repair = make_clean_names)
#limit submissions to w
df_subm_sel <- df_subm %>%
filter(operating_unit_country %in% c("Zambia", "Western Hemisphere Region/Guatemala"),
str_detect(hfr_fy_and_period, "FY22")) %>%
select(id = upload_your_hfr_file_s_here) %>%
separate_rows(id, sep = ", ") %>%
mutate(id = str_extract(id, "(?<=id=).*"),
name = map(id, ~ drive_get(as_id(.))$name)) %>%
unnest(name)
df_subm_sel <- df_subm_sel %>%
filter(name %in% c("HFR_FY22_08_WesternHemisphere_Guatemala_IntraHealth_20220615 - Daniel Muralles 20220616 160640.xlsx",
"HFR_FY22_May_Zambia_Partners8Data_20220615 - Mwila Kangwa 20220620 222824.xlsx"))
#download issue submissions
df_subm_sel %>%
pwalk(~drive_download(as_id(..1),
file.path(folderpath_tmp, ..2)))
#store
files_local <- list.files(folderpath_tmp, full.names = TRUE)
## FUNCTION TO resolve blank row issue
clean_subm <- function(path){
#identify country for pasting into meta tab of template
cntry <- read_excel(path, range = "C2", col_names = "country") %>% pull()
#identify template type to open a new template of that type
type <- read_excel(path, range = "C5", col_names = "type") %>% pull()
#print out status report to console
cat(paste(" ", green(basename(path)), "\n",
" OU: ", cntry, "\n",
" Type: ", type, "\n",
" Sheets: ", str_subset(excel_sheets(path), "meta", TRUE) %>% paste(collapse = ", "), "\n"
))
#determine which columns need to be kept based on template
col_keep <- switch(type,
"Long" = template_cols_long,
"Wide" = template_cols_wide,
"Wide - Limited" = template_cols_wide_lim)
#import problematic submission
#df_hfr <- hfr_import(path)
#df_hfr <- suppressMessages(hfr_import(path))
print("Importing templates ...")
# Alternative to original import
df_hfr <- hfr_import2(path, cols = col_keep)
print(glue("{nrow(df_hfr)} rows"))
#limit columns (remove blank cols) & resolve date issue from MS import
print("Fixing dates ...")
df_hfr_fix <- df_hfr %>%
hfr_fix_date() %>%
select(all_of(col_keep)) %>%
mutate(date = as.character(date))
#identify which columns are numeric to resolve decimal import issue
col_num <- setdiff(col_keep,
c("date", "orgunit", "orgunituid", "mech_code", "partner",
"operatingunit", "psnu", "indicator", "sex", "agecoarse",
"otherdisaggregate"))
#clean up decimal issue (caused by import)
print("fixing numeric values ...")
df_hfr_fix <- df_hfr_fix %>%
mutate(across(all_of(col_num), ~ as.integer(.)),
mech_code = str_remove(mech_code, "\\.0$"))
#print(glimpse(df_hfr_fix))
#use date to get submission period for meta tab
d <- unique(df_hfr_fix$date) %>% ymd() %>% median()
#create submission period to paste into meta tab
pd <- glue("FY{quarter(d, with_year = TRUE, fiscal_start = 10) %>% str_sub(3, 4)} {month(d, label = TRUE)}") %>% as.character()
#print out status report to console
cat(paste(" Date: ", unique(df_hfr_fix$date) %>% paste(collapse = ", "), "\n",
" Period: ", pd, "\n"
))
# cat(paste(" ", green(basename(path)), "\n",
# " OU: ", cntry, "\n",
# " Date: ", unique(df_hfr_fix$date) %>% paste(collapse = ", "), "\n",
# " Period: ", pd, "\n",
# " Type: ", type, "\n",
# " Sheets: ", str_subset(excel_sheets(path), "meta", TRUE) %>% paste(collapse = ", "), "\n"
# ))
#identify which template type to open and paste into
path_template <- case_when(type == "Long" ~ "templates/HFR_Submission_Template_Long.xlsx",
type == "Wide" ~ "templates/HFR_Submission_Template_Wide.xlsx",
type == "Wide - Limited" ~ "templates/HFR_Submission_Template_Wide_LIMITED.xlsx")
print(glue("Template: {basename(path_template)}"))
#open template
print("Loading template ....")
wb <- loadWorkbook(path_template)
#enter required data onto meta tab - country and period
print(glue("Writing metadata: OU = {cntry}, period = {pd}"))
writeData(wb, sheet = "meta", xy = c("C", 2), x = cntry)
writeData(wb, sheet = "meta", xy = c("C", 3), x = pd)
#paste values into HFR tab
print(glue("Writing data: rows = {nrow(df_hfr_fix)}"))
writeData(wb, sheet = "HFR", startRow = 3, colNames = FALSE, x = df_hfr_fix)
#create and apply same style to cells to match template format
# print("Creating Styles ...")
#
# cell_style <- createStyle(fontName = "Gill Sans MT",
# border = c("top", "bottom", "left", "right"))
#
# print("Adding Styles ...")
#
# addStyle(wb, sheet = "HFR", cell_style,
# cols = 1:ncol(df_hfr_fix),
# rows = 3:(nrow(df_hfr_fix)+2),
# gridExpand = TRUE)
#rename the file for exporting
folder_out <- file.path(dirname(path), "clean")
if(!dir.exists(folder_out))
dir.create(folder_out)
path_out <- file.path(folder_out,
basename(path) %>% str_remove(" -.*") %>% paste0("adj_no_styles.xlsx"))
#save file
print(glue("Saving workbook to: {path_out}"))
saveWorkbook(wb, path_out, overwrite = TRUE)
}
#' @title Read HFR Template
#' @note Making sure empty columns or columns with notes are excluded
#'
hfr_import2 <- function(filepath, cols){
df <- filepath %>%
readxl::excel_sheets() %>%
stringr::str_subset("HFR") %>%
purrr::map_dfr(function(.x) {
# Get columns list - less likely to have notes & empty columns
curr_cols <- names(read_excel(filepath, sheet = .x, skip=1, n_max=0))
# TODO - Compare curr_cols to col_keep and retain valid columns only
com_cols <- curr_cols %in% cols
idx_cols <- which(com_cols == TRUE)
# Read column based range and use second row as header
df <- readxl::read_excel(filepath,
sheet = .x,
range = cellranger::cell_cols(1:length(curr_cols)),
col_types = "text") %>%
janitor::row_to_names(., 1)
return(df)
})
if("mechanismid" %in% names(df))
df <- dplyr::rename(df, mech_code = mechanismid)
return(df)
}
hfr_read_wb <- function(filepath) {
print(filepath)
openxlsx::getSheetNames(filepath) %>%
stringr::str_subset("HFR") %>%
map_dfr(~openxlsx::readWorkbook(filepath, sheet = .x, startRow = 2))
}
##
#run function clean files
walk(files_local, clean_subm)
folderpath_tmp %>%
file.path("clean") %>%
list.files(pattern = ".xlsx$", full.names = T) %>%
map_dfr(hfr_read_wb)
edit_file <- "C:/Users/BKAGNI~1/AppData/Local/Temp/1/RtmpyImXmc/file4bc2e2c4f42/clean/HFR_FY22_08_WesternHemisphere_Guatemala_IntraHealth_20220615adj.xlsx"
read_excel(edit_file, sheet = "HFR", skip = 1)
wbb <- loadWorkbook(edit_file)
getSheetNames(edit_file)
#open temp folder
shell.exec(folderpath_tmp)
#check submission status
s3_objects(bucket = 'gov-usaid',
prefix = "ddc/uat/raw/hfr/incoming/",
n = Inf,
unpack_keys = TRUE) %>%
filter(nchar(sys_data_object) > 1) %>%
pull(sys_data_object)
# RESOLVE LIBERIA MMD ISSUE -----------------------------------------------
# df_subm <- read_sheet(gdrive_submissions, .name_repair = make_clean_names)
#
# df_subm_lbr <- df_subm %>%
# filter(operating_unit_country == "West Africa Region/Liberia",
# str_detect(hfr_fy_and_period, "FY22")) %>%
# select(id = upload_your_hfr_file_s_here) %>%
# mutate(id = str_extract(id, "(?<=id=).*"),
# name = map(id, ~ drive_get(as_id(.))$name)) %>%
# unnest(name)
#
# df_subm_lbr %>%
# pwalk(~drive_download(as_id(..1),
# file.path(folderpath_tmp, ..2)))
#
#
# files_local <- list.files(folderpath_tmp, full.names = TRUE)
#
#
# clean_sub <- function(path){
#
# df_lbr <- read_excel(path, sheet = 2, skip = 1, col_types = "text")
#
# df_lbr_fix <- df_lbr %>%
# hfr_fix_date() %>%
# select(-hfr_freq) %>%
# mutate(otherdisaggregate = str_extract(indicator, "(?<=MMD ).*"),
# indicator = str_replace(indicator, "TX_MMD.*", "TX_MMD"),
# otherdisaggregate = recode(otherdisaggregate,
# "< 3 months" = "<3 months",
# " 3-5 months" = "3-5 months",
# "6+ months" = "6 months or more"),
# operatingunit = "West Africa Region",
# psnu = "Liberia",
# date = as.character(date)) %>%
# relocate(otherdisaggregate, .after = agecoarse)
#
# cat(paste(basename(path), "\n",
# " Date: ", unique(df_lbr_fix$date), "\n",
# " Disaggs: ", unique(df_lbr_fix$otherdisaggregate) %>% paste(collapse = ', '),
# "\n"
# ))
#
# d <- unique(df_lbr_fix$date) %>% ymd()
#
# pd <- glue("FY{quarter(d, with_year = TRUE, fiscal_start = 10) %>% str_sub(3, 4)} {month(d, label = TRUE)}") %>% as.character()
#
# cntry <- "West Africa Region/Liberia"
#
# wb <- loadWorkbook("templates/HFR_Submission_Template_Long.xlsx")
#
# writeData(wb, sheet = "meta", xy = c("C", 2), x = cntry)
#
# writeData(wb, sheet = "meta", xy = c("C", 3), x = pd)
#
# writeData(wb, sheet = "HFR", startRow = 3, colNames = FALSE, x = df_lbr_fix)
#
# cell_style <- createStyle(fontName = "Gill Sans MT",
# border = c("top", "bottom", "left", "right"))
#
# addStyle(wb, sheet = "HFR", cell_style,
# cols = 1:ncol(df_lbr_fix),
# rows = 3:(nrow(df_lbr_fix)+2),
# gridExpand = TRUE)
#
# path_out <- str_remove(path, " -.*") %>% paste0("adj.xlsx")
#
# saveWorkbook(wb, path_out, overwrite = TRUE)
#
# }
#
# walk(files_local, clean_sub)
#
# unlink(files_local)
#
# shell.exec(folderpath_tmp)
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.