library(kokudosuuchiUtils) packageDescription("kokudosuuchiUtils")$Built
library(dplyr, warn.conflicts = FALSE)
html_data <- readr::read_csv(rprojroot::find_package_root_file("inst/extdata/html_data.csv"))
html_data %>% filter(stringr::str_detect(.data$type, "列挙型")) %>% select(identifier, code, name, type) %>% mutate_all(funs(stringr::str_replace_all(., "\\s", " "))) %>% knitr::kable()
Collect results into enum_code
.
enum_code <- list()
Enum type ("列挙型") is written in one of the following formats:
列挙型
(only we can do for this is to ignore)列挙型(type1、type2、type3)
(implicitly numbered by sequence of numbers)列挙型(type1=1、type2=2、type3=3)
列挙型(1:type1, 2:type2、3:type3)
html_data_enums <- html_data %>% filter(stringr::str_detect(.data$type, "列挙型[^$]"))
Join codes from KSJMetadata_code
.
html_data_enums <- html_data_enums %>% left_join(KSJMetadata_code, by = c("identifier", "name")) %>% select(identifier, name, code = code.y, type)
N04
should be ignored, as it is not about the codes in data but ones in colnames.
filter(html_data_enums, is.na(code)) %>% knitr::kable()
html_data_enums %>% filter(!stringr::str_detect(.data$type, ":|:|=")) %>% mutate_all(funs(stringr::str_replace_all(., "\\s", " "))) %>% knitr::kable()
enum_code_type2 <- html_data_enums %>% filter(!stringr::str_detect(.data$type, ":|:|=")) %>% # ignore other than A03 filter(.data$identifier == "A03") %>% { setNames(.$type, .$code) } %>% purrr::map(stringr::str_replace, pattern = "^列挙型((.*))$", replacement = "\\1") %>% purrr::map(stringr::str_split, pattern = "、") %>% purrr::map(1L) %>% purrr::map(~ setNames(., seq_along(.)))
html_data_enums %>% filter(stringr::str_detect(.data$type, "=")) %>% mutate_all(funs(stringr::str_replace_all(., "\\s", " "))) %>% knitr::kable()
enum_code_type3 <- html_data_enums %>% filter(stringr::str_detect(.data$type, "=")) %>% { setNames(.$type, .$code) } %>% purrr::map(stringr::str_replace, pattern = "^列挙型((.*))$", replacement = "\\1") %>% purrr::map(stringr::str_split, pattern = "(?<=\\d)、") %>% purrr::map(1L) %>% purrr::map(function(x) { m <- stringr::str_split_fixed(x, "=", 2) setNames(m[, 1, drop = TRUE], m[, 2, drop = TRUE]) })
html_data_enums %>% filter(stringr::str_detect(.data$type, ":|:")) %>% mutate_all(funs(stringr::str_replace_all(., "\\s", " "))) %>% knitr::kable()
As mentioned above, treat N04
differently.
enum_code_type4 <- html_data_enums %>% mutate(type = stringr::str_replace_all(.data$type, "\\s", "")) %>% # ignore N04 filter(stringr::str_detect(.data$type, ":|:"), .data$identifier != "N04") %>% { setNames(.$type, .$code) } %>% purrr::map(stringr::str_replace, pattern = "^列挙型((.*))$", replacement = "\\1") %>% purrr::map(stringr::str_split, pattern = "、") %>% purrr::map(1L) %>% purrr::map(function(x) { m <- stringr::str_split_fixed(x, ":|:", 2) setNames(m[, 2, drop = TRUE], m[, 1, drop = TRUE]) })
html_data %>% filter(stringr::str_detect(.data$type, "コードリスト.*[:=:=]")) %>% select(identifier, code, name, type) %>% mutate_all(funs(stringr::str_replace_all(., "\\s", " "))) %>% knitr::kable()
html_data_psuedo_enums_num_first <- html_data %>% filter(stringr::str_detect(.data$type, "コードリスト.*\\d[:=:=]")) html_data_psuedo_enums_code_first <- html_data %>% filter(stringr::str_detect(.data$type, "コードリスト.*[:=:=]\\d")) enum_code_psuedo_num_first <- html_data_psuedo_enums_num_first$type %>% stringi::stri_trans_nfkc() %>% stringr::str_replace_all(c( "^コードリスト型?(「都市計画決定コード」)?" = "", "※.*$" = "", "^\\(|\\)$" = "" ) ) %>% # workaround for P24_011 stringr::str_replace_all("(?<!^)(\\d)(?=:)", "、\\1") %>% stringr::str_split("、") %>% purrr::map(function(x) { m <- stringr::str_split_fixed(x, "[:=]", 2) setNames(m[, 2, drop = TRUE], m[, 1, drop = TRUE]) }) %>% setNames(html_data_psuedo_enums_num_first$code) enum_code_psuedo_code_first <- html_data_psuedo_enums_code_first$type %>% stringi::stri_trans_nfkc() %>% stringr::str_replace_all(c( "^コードリスト型?(\\(種別コード\\)|「種別コード」)?" = "", "^\\(|\\)$" = "" ) ) %>% # workaround for P24_011 stringr::str_replace_all("(?<!^)(\\d)(?=:)", "、\\1") %>% stringr::str_split("、") %>% purrr::map(function(x) { m <- stringr::str_split_fixed(x, "[:=]", 2) setNames(m[, 1, drop = TRUE], m[, 2, drop = TRUE]) }) %>% setNames(html_data_psuedo_enums_code_first$code)
enum_codes_all <- c( enum_code_type2, enum_code_type3, enum_code_type4, enum_code_psuedo_code_first, enum_code_psuedo_num_first )
devtools::use_data(enum_codes_all, overwrite = TRUE)
file.copy(rprojroot::find_package_root_file("data/enum_codes_all.rda"), rprojroot::find_package_root_file("../kokudosuuchi/data/"), overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.