# Source file: fixregint.R
#
# GPL-3 License
#
# Copyright (C) 2019-2023 Victor Ordu.
## Approximately matches a string on a list of regions i.e States or LGAs.
## @param str A string to be matched
## @param regions The values to be matched against, specifically from regions
##
## @return A vector of the same length as str with the approximately
## matched value, which in the case of misspelling should be the correct one.
##
## This function is mapped to a vector of states/LGAs
.fix_region_internal <- function(x, region, interactive = FALSE)
{
stopifnot(is.character(x), is.character(region))
cant.fix <- character()
fix.status <- character()
## Internal function to enable identification of entries that need to
## be fixed and preparing attributes that will enable further processing
## downstream.
get_proper_value <- function(str, regions) {
abbrFCT <- .fct_options("abbrev")
if (!is.na(match(str, regions)))
return(str)
if (inherits(regions, "states")) {
if (agrepl(str, abbrFCT, max.distance = .pkgLevDistance())
&& identical(toupper(str), abbrFCT))
return(abbrFCT)
}
## First remove spaces around slashes and hyphens
## Note: run `.__why_no_pipe()` for rationale behind this approach
str <- gsub("\\s\\/", "/", str)
str <- gsub("\\/\\s", "/", str)
str <- sub("-\\s", "-", str)
str <- sub("^Egbado/", "", str) ## TODO: Address hard-coding
## Now, check for exact matching.
rgx <- paste0('^', str, '$')
good <- unique(grep(rgx, regions, value = TRUE, ignore.case = TRUE))
if (length(good) == 1L)
return(good)
## Otherwise check for approximate matches.
fixed <- agrep(str, regions, value = TRUE, max.distance = 1)
numFixed <- length(fixed)
if (numFixed == 1L) {
fs <- c(fix.status, fixed)
names(fs) <- c(names(fix.status), str)
fix.status <<- fs
return(fixed)
}
if (numFixed > 1L && !interactive) {
multimatch <- paste(fixed, collapse = ", ")
cli::cli_inform(
"'{str}' approximately matched more than one region - {multimatch}"
)
}
# if we get to this point, return the misspelt string unchanged
cant.fix <<- c(cant.fix, str)
str
}
spellchecked <-
vapply(x,
get_proper_value,
character(1),
regions = region,
USE.NAMES = FALSE)
attr(spellchecked, "misspelt") <- sort(unique(cant.fix))
## Reduce data for reporting on fixes to only the
## unique instances i.e. avoid redundant output
if (length(fix.status) > 1L) {
allfix <- names(fix.status)
if (anyDuplicated(allfix)) {
dups <- which(duplicated(allfix))
fix.status <- fix.status[-dups]
}
}
attr(spellchecked, "regions.fixed") <- fix.status
spellchecked
}
# Tells the user about what repairs have been made to the spellings
# @param obj - the checked object, which has attributes with relevant details
# @param usedialog Whether to display a dialog (on Windows only).
.report_on_fixes <- function(obj, usedialog = FALSE)
{
spell.details <- attributes(obj)
badspell <- spell.details$misspelt
hasBadspell <- !identical(badspell, character(0))
msg.bad <- msg.good <- ""
if (hasBadspell) {
hdr.bad <- .messageHeader("Fix(es) not applied")
nofix.bullets <-
vapply(badspell, function(x) paste("*", x), character(1))
msg.bad <- paste0(hdr.bad, paste(nofix.bullets, collapse = "\n"))
}
# Put the message together
fixes <- spell.details$regions.fixed
if (!identical(fixes, character(0))) {
hdr.good <- .messageHeader("Successful fix(es)")
fixed.bullets <-
mapply(function(a, z) {
sprintf("* %s => %s", a, z)
},
names(fixes), fixes)
msg.good <- paste0(hdr.good, paste(fixed.bullets, collapse = "\n"))
if (hasBadspell)
msg.good <- paste0(msg.good, "\n") # just add newline
}
if (!nchar(msg.good) && !nchar(msg.bad))
return()
final.msg <- paste(msg.good, msg.bad, sep = "\n")
if (usedialog)
utils::winDialog("ok", final.msg)
else
cli::cli_alert_info(final.msg)
}
.messageHeader <- function(hdr)
{
stopifnot(is.character(hdr))
hdr <- paste0(hdr, ":")
dashes <- strrep("-", nchar(hdr))
hdr <- paste(hdr, dashes, sep = '\n')
paste0(hdr, "\n")
}
## Interactively fixes regions that are bad - this function is primarily
## used for repairing LGA names, since they are so many.
## @param lga.list The vector of LGA names that is being repaired. This vector
## is generated by `.fix_region_internal` and has an attribute called
## `misspelt`, which is the collection of names needing repair.
## @param usedialog Whether to use dialog in prompts (only on Windows)
.fix_lgas_interactive <- function(lga.list, usedialog = FALSE)
{
stopifnot(interactive())
allLgas <- lgas()
menuopt <- integer()
skipped <- character()
bad.values <- attr(lga.list, "misspelt")
# This list doesn't need to be re-created with each loop iteration
# that's why it's been created here.
special.options <- list(
retry = "RETRY",
skip = "SKIP",
quit = "QUIT"
)
for (bad in bad.values) {
msg.fixWhich <- paste("Fixing", sQuote(bad))
repeat {
prompt <- paste(msg.fixWhich, "Enter a search term: ", sep = ' - ')
pattern <- if (usedialog)
utils::winDialogString(prompt, "")
else
readline(prompt)
if (pattern == "" || is.null(pattern))
return()
used.lgas <-
sort(grep(pattern, allLgas, value = TRUE, ignore.case = TRUE))
choices <- c(used.lgas, unlist(unname(special.options)))
menuopt <- utils::menu(choices, graphics = usedialog, "Select the LGA")
chosen <- choices[menuopt]
if (chosen != special.options$retry)
break
}
if (chosen == special.options$quit)
break
if (chosen == special.options$skip) {
skipped <- c(skipped, bad)
next
}
# Note that pipes were deliberately not used here.
lga.list <- sub(bad, chosen, lga.list, fixed = TRUE)
attr.misspelt <- attr(lga.list, "misspelt")
attr.misspelt <- attr.misspelt[attr.misspelt != bad]
attr(lga.list, "misspelt") <- attr.misspelt
attr.regfixed <- attr(lga.list, "regions.fixed")
attr.regfixed <- c(attr.regfixed, chosen)
names(attr.regfixed) <- c(names(attr.regfixed), bad)
}
if (length(skipped)) {
msg <-
paste(
"The following items were skipped and should be fixed manually:",
paste(skipped, collapse = ", ")
)
if (usedialog)
utils::winDialog("ok", msg)
else
cli::cli_inform(msg)
}
lga.list
}
.assert_region <- function(x) {
if (!is_state(x) && !is_lga(x))
cli::cli_abort("{sQuote(x, q = FALSE)} is not a valid region")
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.