#' Expand a 'type' column in a data frame
#'
#' Takes a data frame with a column of type codes
#' (\emph{main type} or \emph{subtype }codes),
#' and, under certain conditions, adds new rows with codes of the associated
#' \emph{subtypes} and \emph{main types}, respectively.
#' It allows to do sensible selections and joins with interpreted forms of the
#' \code{habitatmap_stdized} and \code{watersurfaces_hab} data sources:
#' \code{habitatmap_terr},
#' \code{read_watersurfaces_hab(interpreted = TRUE)}.
#' If the data frame has one or more grouping variables, by default the
#' operation is done independently for each group in turn.
#'
#' The extra rows in the data frame take the values for other variables
#' from the rows with which they are associated, based on the
#' subtype - main type relation.
#' Type codes in the data frame are verified to comply with the codes from the
#' \code{\link{types}} data source.
#' A warning is given when they don't.
#'
#' Main type codes are always expanded with the subtype codes that belong to it.
#'
#' The applied approach to add main type codes only makes sense
#' assuming that the result is to be confronted
#' with one of the \emph{above listed} geospatial data sources.
#'
#' In order to add main type codes based on
#' subtype codes that are present in the type column, specific conditions have
#' to be met:
#' \itemize{
#' \item{for 2330: both subtype codes must be present}
#' \item{for 5130: 5130_hei must be present (note that only the main type code
#' occurs in the targeted data sources)}
#' \item{for 6230: 6230_ha, 6230_hmo and 6230_hn must be present
#' (not the rare 6230_hnk)}
#' \item{for 91E0: 91E0_va, 91E0_vm and 91E0_vn must be present
#' (not the rarer 91E0_sf, 91E0_vc and 91E0_vo)}
#' }
#' However, it is possible to relax this requirement by setting
#' \code{strict = FALSE}.
#' This will add the main type code whenever \emph{one} of the above required
#' subtype codes is present.
#' In all cases no other main type codes are added apart from
#' 2330, 5130, 6230 and 91E0.
#' This is because the data sources with which the result
#' is to be matched (see Description) don't contain certain main type codes,
#' and because it makes no sense in other cases
#' (rbbkam, rbbzil & 9120 in the \code{habitatmap} do not refer to a
#' main type but to a non-defined subtype with no specific code).
#'
#'
#' @param x An object of class \code{data.frame}.
#' @param type_var A string.
#' The name of the data frame variable that holds the type codes.
#' Defaults to \code{type}.
#' @param use_grouping Logical.
#' If the data frame has one or more grouping variables
#' (class \code{grouped_df}),
#' is the operation to be performed independently
#' for each group in turn?
#' @param strict Logical.
#' Apply conditions before expanding subtype codes to main type codes?
#'
#' @return
#' A data frame, either identical or longer than the input data frame.
#'
#' @seealso
#' \code{\link{read_types}},
#' \code{\link{read_habitatmap_terr}},
#' \code{\link{read_watersurfaces_hab}}
#'
#' @examples
#' library(dplyr)
#' x <-
#' n2khabmon::read_scheme_types() %>%
#' filter(scheme == "GW_05.1_terr")
#' expand_types(x)
#' expand_types(x, strict = FALSE)
#'
#' x <-
#' n2khabmon::read_scheme_types() %>%
#' filter(scheme == "GW_05.1_terr") %>%
#' group_by(typegroup)
#' expand_types(x)
#' expand_types(x, use_grouping = FALSE) # equals above example
#'
#' x <-
#' tribble(
#' ~mycode, ~obs,
#' "2130", 5,
#' "2190", 45,
#' "2330_bu", 8,
#' "2330_dw", 8,
#' "5130_hei", 7,
#' "6410_mo", 78,
#' "6410_ve", 4,
#' "91E0_vn", 10
#' )
#' expand_types(x, type_var = "mycode")
#' expand_types(x, type_var = "mycode", strict = FALSE)
#'
#' @importFrom assertthat
#' assert_that
#' is.string
#' is.flag
#' noNA
#' @importFrom tidyr
#' nest
#' unnest
#' @importFrom purrr
#' map
#' @importFrom dplyr
#' %>%
#' mutate
#' select
#' group_by_at
#' group_vars
#' ungroup
#' @importFrom rlang .data
#' @export
expand_types <- function(x,
type_var = "type",
use_grouping = TRUE,
strict = TRUE) {
assert_that(inherits(x, "data.frame"))
assert_that(is.string(type_var))
assert_that(type_var %in% colnames(x),
msg = "type_var must be a variable name in x."
)
assert_that(is.flag(use_grouping), noNA(use_grouping))
assert_that(is.flag(strict), noNA(strict))
types <-
read_types() %>%
select(1:3)
subtypes <-
types %>%
filter(.data$typelevel == "subtype") %>%
select(1, 3)
if (!all(unique(x[[type_var]]) %in% types$type)) {
warning("The data frame contains type codes which are not standard.")
}
if (!use_grouping) {
expand_types_plain(
x = x,
type_var = type_var,
strict = strict,
types = types,
subtypes = subtypes
)
} else {
x %>%
nest(data = -!!(group_vars(x))) %>%
ungroup() %>%
mutate(newdata = map(.data$data,
expand_types_plain,
type_var = type_var,
strict = strict,
types = types,
subtypes = subtypes
)) %>%
select(-.data$data) %>%
unnest(cols = .data$newdata) %>%
group_by_at(x %>% group_vars()) %>%
select(colnames(x))
}
}
#' Expand a 'type' column in a data frame (grouping not taken into account)
#'
#' This is the workhorse for \code{\link{expand_types}}.
#'
#' @inheritParams expand_types
#'
#' @return A data frame.
#'
#' @importFrom dplyr
#' %>%
#' left_join
#' select
#' filter
#' rename
#' group_by
#' summarise
#' anti_join
#' pull
#' inner_join
#' bind_rows
#' mutate
#' distinct
#' @importFrom magrittr
#' set_colnames
#' @importFrom rlang .data
#' @keywords internal
expand_types_plain <- function(x,
type_var = "type",
strict = TRUE,
types,
subtypes) {
orig_types <-
x[, type_var] %>%
rename(orig_abcd = type_var)
# main types to add:
suppressWarnings(
join_main_types <-
subtypes %>%
filter(.data$main_type == "2330" |
.data$type %in% c(
"6230_ha", "6230_hmo", "6230_hn",
"5130_hei",
"91E0_va", "91E0_vm", "91E0_vn"
)) %>%
left_join(
orig_types %>%
mutate(present = 1),
by = c("type" = "orig_abcd")
) %>%
group_by(.data$main_type) %>%
summarise(add = if (strict) {
all(!is.na(.data$present))
} else {
any(!is.na(.data$present))
}) %>%
filter(.data$add) %>%
# only adding codes absent from original data frame:
anti_join(orig_types, by = c("main_type" = "orig_abcd")) %>%
pull(.data$main_type)
)
# expanding main types to their subtypes and adding the latter:
suppressWarnings(
x_expanded <-
x %>%
rename(orig_abcd = type_var) %>%
inner_join(subtypes %>% rename(type_abcd = .data$type),
by = c("orig_abcd" = "main_type")
) %>%
mutate(orig_abcd = .data$type_abcd) %>%
select(-.data$type_abcd) %>%
anti_join(
x %>%
rename(orig_abcd = type_var),
by = "orig_abcd"
) %>%
set_colnames(gsub("orig_abcd", type_var, colnames(.))) %>%
bind_rows(x, .)
)
# adding main_types:
suppressWarnings(
x_expanded <-
x %>%
rename(orig_abcd = type_var) %>%
inner_join(
subtypes %>%
rename(main_type_abcd = .data$main_type),
by = c("orig_abcd" = "type")
) %>%
filter(.data$main_type_abcd %in% join_main_types) %>%
mutate(orig_abcd = if (is.factor(.data$orig_abcd)) {
factor(.data$main_type_abcd,
levels = levels(.data$orig_abcd)
)
} else {
.data$main_type_abcd
}) %>%
select(-.data$main_type_abcd) %>%
distinct() %>%
set_colnames(gsub("orig_abcd", type_var, colnames(.))) %>%
bind_rows(x_expanded, .)
)
return(x_expanded)
}
#' Convert encoding of character and factor variables in a data frame
#'
#' @details
#' Encoding strings: all \code{R} platforms support \code{""} (for the
#' encoding of the current
#' locale), \code{"latin1"} and \code{"UTF-8"}.
#' See \code{\link[base]{iconv}} for more information.
#'
#' @param x An object with the `data.frame`
#' class (such as `data.frame` or `sf`)
#' @param colnames Should column names be converted as well?
#'
#' @inheritParams base::iconv
#'
#' @md
#'
#' @return
#' The original object, with character variables (and levels of
#' (character) factor variables) converted to the specified encoding.
#'
#' @keywords internal
#' @importFrom dplyr
#' %>%
#' mutate_if
#' @importFrom assertthat
#' assert_that
#' is.string
#' is.flag
#' noNA
convertdf_enc <- function(x,
from = "",
to = "UTF-8",
sub = NA,
colnames = FALSE) {
assert_that(inherits(x, "data.frame"))
assert_that(
is.string(to),
is.string(from),
is.string(sub) | is.na(sub)
)
assert_that(is.flag(colnames), noNA(colnames))
is_chfact <- function(vec) {
if (is.factor(vec)) {
is.character(levels(vec))
} else {
FALSE
}
}
conv_levels <- function(fact, from, to, sub) {
levels(fact) <- iconv(levels(fact),
from = from,
to = to,
sub = sub
)
return(fact)
}
x %>%
mutate_if(is.character,
iconv,
from = from,
to = to,
sub = sub
) %>%
mutate_if(is_chfact,
conv_levels,
from = from,
to = to,
sub = sub
) %>%
{
if (colnames) {
`colnames<-`(., iconv(colnames(.),
from = from,
to = to,
sub = sub
))
} else {
.
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.