Nothing
#' Match raw input loan books with ABCD for PACTA for Supervisors analysis
#'
#' @description
#' `match_loanbooks()` runs the necessary steps to match the raw input loan
#' books with the asset based company data (ABCD) used in the PACTA for
#' Supervisors analysis. Specifically, it prepares matched loan books based on
#' name matching or direct identifiers, depending on the configuration. The
#' output matched loan books need to be manually validated for further
#' processing. Parameters for the matching step are read from a `config.yml`
#' file and follow the options available in `r2dii.match::match_name`. The
#' function is called for its side effects and writes the prepared data sets to
#' the directory specified by `dir_matched_loanbooks` in the `config.yml`.
#'
#' @param config either a path to a config.yml file or a list of parameters
#'
#' @return
#'
#' `match_loanbooks()` returns `NULL` invisibly. The function is called for its
#' side effects and writes the prepared data sets to the directory specified by
#' `dir_matched_loanbooks` in the `config.yml`.
#'
#' @examples
#' \dontrun{
#' config <- "path/to/config.yml"
#' match_loanbooks(config)
#' }
#'
#' @export
match_loanbooks <- function(config) {
config <- load_config(config)
# input paths for match_loanbooks
dir_loanbooks <- get_dir_loanbooks(config)
dir_prepared_abcd <- get_dir_prepared_abcd(config)
# output path for match_loanbooks
dir_matched_loanbooks <- get_dir_matched_loanbooks(config)
if (dir.exists(dir_matched_loanbooks)) {
ask_for_permission(
"The output directory defined by the {.var dir_matched_loanbooks} parameter in your config already exists.\n
{.path {dir_matched_loanbooks}}\n
Would you like to delete it and replace it with the output of the current run?"
)
unlink(dir_matched_loanbooks, recursive = TRUE)
}
dir.create(dir_matched_loanbooks, recursive = TRUE, showWarnings = FALSE)
matching_by_sector <- get_match_by_sector(config)
matching_min_score <- get_match_min_score(config)
matching_method <- get_match_method(config)
# argument p only applies for Jaro-Winkler method
if (matching_method == "jw") {matching_p <- get_match_p(config)}
matching_overwrite <- get_match_overwrite(config)
matching_join_id <- get_match_join_id(config)
matching_use_manual_sector_classification <- get_use_manual_sector_classification(config)
if (matching_use_manual_sector_classification) {
path_manual_sector_classification <- get_path_manual_sector_classification(config)
}
# validate config values----
assert_length(dir_loanbooks, 1L)
assert_inherits(dir_loanbooks, "character")
assert_dir_exists(dir_loanbooks, desc = "Input - loanbooks")
assert_length(dir_prepared_abcd, 1L)
assert_inherits(dir_prepared_abcd, "character")
assert_dir_exists(dir_prepared_abcd, desc = "Output - prepare ABCD")
assert_file_exists(file.path(dir_prepared_abcd, "abcd_final.csv"), desc = "ABCD final")
assert_length(dir_matched_loanbooks, 1L)
assert_inherits(dir_matched_loanbooks, "character")
assert_dir_exists(dir_matched_loanbooks, desc = "Output - Matched loanbooks")
assert_length(matching_by_sector, 1L)
assert_inherits(matching_by_sector, "logical")
assert_length(matching_min_score, 1L)
assert_inherits(matching_min_score, "numeric")
assert_length(matching_method, 1L)
assert_inherits(matching_method, "character")
if (matching_method == "jw") {
assert_length(matching_p, 1L)
assert_inherits(matching_p, "numeric")
}
if (!is.null(matching_overwrite)) {
assert_inherits(matching_overwrite, "data.frame")
# cols are based on r2dii.data::overwrite_demo
assert_expected_columns(
data = matching_overwrite,
cols = c("level", "id_2dii", "name", "sector", "source"),
desc = "matching_overwrite"
)
}
if (!is.null(matching_join_id)) {
assert_inherits(matching_join_id, "character")
}
assert_length(matching_use_manual_sector_classification, 1L)
assert_inherits(matching_use_manual_sector_classification, "logical")
# path to manual sector classification only required if boolean TRUE
if (matching_use_manual_sector_classification) {
assert_length(path_manual_sector_classification, 1L)
assert_inherits(path_manual_sector_classification, "character")
assert_file_exists(path_manual_sector_classification, desc = "Manual sector classification")
}
# load data----
## load abcd----
abcd <- readr::read_csv(
file.path(dir_prepared_abcd, "abcd_final.csv"),
col_select = dplyr::all_of(cols_abcd),
col_types = col_types_abcd_final
)
## load sector classification system----
if (matching_use_manual_sector_classification) {
sector_classification_system <- readr::read_csv(
file = path_manual_sector_classification,
col_types = col_types_sector_classification,
col_select = dplyr::all_of(col_select_sector_classification)
)
} else {
sector_classification_system <- r2dii.data::sector_classifications
}
## load raw loan books----
list_raw <- list.files(path = dir_loanbooks, pattern = "[.]csv$")
assert_any_file_exists(list_raw, dir_loanbooks, "dir_input", "raw loan book CSVs")
raw_lbk <- readr::read_csv(
file = file.path(dir_loanbooks, list_raw),
col_types = col_types_raw,
id = "group_id"
) %>%
dplyr::mutate(group_id = tools::file_path_sans_ext(basename(.data[["group_id"]]))) %>%
dplyr::group_split(.data[["group_id"]])
# match and save loan books----
cli::cli_progress_bar(
total = length(raw_lbk),
format = "{cli::pb_spin} Matching loanbooks {cli::pb_current}/{cli::pb_total} | ETA: {cli::pb_eta}"
)
for (i in seq_along(raw_lbk)) {
group_name <- unique(raw_lbk[[i]][["group_id"]])
## match data----
matched_lbk_i <- r2dii.match::match_name(
loanbook = raw_lbk[[i]],
abcd = abcd,
by_sector = matching_by_sector,
min_score = matching_min_score,
method = matching_method,
p = matching_p,
overwrite = matching_overwrite,
join_id = matching_join_id,
sector_classification = sector_classification_system
)
## write matched data to file----
matched_lbk_i %>%
readr::write_csv(
file = file.path(dir_matched_loanbooks, glue::glue("matched_lbk_{group_name}.csv")),
na = ""
)
cli::cli_progress_update()
}
cli::cli_progress_done()
write_manifest(
config = config,
path = file.path(dir_matched_loanbooks, "manifest.yml"),
prior_input_paths = c(dir_prepared_abcd)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.