Nothing
#' Read a municipal data file to a tibble
#'
#' This function is a wrapper around `readxl::read_excel()`, reading a specific
#' municipal data file for a specific year and a specific data domain. Its added
#' value is in its use of `row_to_names_fill()` and its pre-defined parameters for
#' every year and its specific quirks in the Excel headers. For advanced users,
#' the full set of options is available with `il.cbs.muni:::df_cbs_muni_params`.
#'
#' @param path A character vector of length 1, denoting the local file path to the
#' municipal data file. A full list of available files by the CBS is at the
#' [relevant CBS page](https://www.cbs.gov.il/he/publications/Pages/2019/%D7%94%D7%A8%D7%A9%D7%95%D7%99%D7%95%D7%AA-%D7%94%D7%9E%D7%A7%D7%95%D7%9E%D7%99%D7%95%D7%AA-%D7%91%D7%99%D7%A9%D7%A8%D7%90%D7%9C-%D7%A7%D7%95%D7%91%D7%A6%D7%99-%D7%A0%D7%AA%D7%95%D7%A0%D7%99%D7%9D-%D7%9C%D7%A2%D7%99%D7%91%D7%95%D7%93-1999-2017.aspx).
#' @param year A numeric vector of length 1 denoting which year the data file
#' pointed in `path` is for. Currently supporting only 2003 and later, since before
#' 2003 the data structure is very different.
#' @param muni_type A character vector of length 1, one of
#' `c("all", "city_lc", "rc")`. Since 2016, all municipal types are bundled together
#' in the same sheets, but before 2016 there are different sheets for cities and
#' local councils (`"city_lc"`) and regional councils (`"rc"`). This parameter
#' chooses which sheet you would read.
#' @param data_domain A character vector of length 1, one of
#' `c("physical", "budget", "summary", "labor_force_survey", "social_survey")`.
#' Every Excel municipal data file has a few different data domains, most notably
#' physical and population data, and budget data.
#' @param cols <[tidy-select](https://dplyr.tidyverse.org/reference/dplyr_tidy_select.html)>
#' Columns to keep. The default `NULL` keeps all columns.
#' @param col_names A character vector containing the new column names of the
#' output tibble. If `NULL` then the tibble uses the original column names.
#' Must be the same length as the number of columns picked in `cols`.
#'
#' @return A tibble with municipal data for a specific year, where every row is a
#' municipality and every column is a different variable for this municipality in
#' that year. Be advised all columns are of type character, so you need to parse
#' the data types yourself at will. Column names are merged from the relevant headers,
#' and only single whitespaces are kept. Rows with more than 90% empty cells (usually
#' rows with non-data notes) are removed.
#' @export
#' @md
#'
#' @examples
#' df <- read_cbs_muni(
#' system.file("extdata", "p_libud_2021.xlsx", package = "il.cbs.muni"),
#' year = 2021,
#' data_domain = "physical"
#' )
#'
#' df |>
#' dplyr::select(1:15) |>
#' dplyr::glimpse()
read_cbs_muni <- function(
path, year,
muni_type = c("all", "city_lc", "rc"),
data_domain = c("physical", "budget", "summary", "labor_force_survey", "social_survey"),
cols = NULL,
col_names = NULL
) {
# Validate path
if (!is.character(path) || length(path) != 1) {
rlang::abort(
"`path` must be a character vector of length 1.",
class = "read_cbs_muni_invalid_path"
)
}
if (!file.exists(path)) {
rlang::abort(
c(
"`path` does not exist.",
"i" = paste0("Provided path: ", path)
),
class = "read_cbs_muni_path_not_found"
)
}
# Validate year
if (!is.numeric(year) || length(year) != 1) {
rlang::abort(
"`year` must be a numeric vector of length 1.",
class = "read_cbs_muni_invalid_year"
)
}
# Validate col_names if provided
if (!is.null(col_names) && !is.character(col_names)) {
rlang::abort(
"`col_names` must be NULL or a character vector.",
class = "read_cbs_muni_invalid_col_names"
)
}
muni_type <- rlang::arg_match(muni_type)
data_domain <- rlang::arg_match(data_domain)
params <- df_cbs_muni_params |>
dplyr::filter(
year == {{ year }},
muni_type == {{ muni_type }},
data_domain == {{ data_domain }}
)
stopifnot(nrow(params) == 1)
df <- readxl::read_excel(
path = path,
sheet = params$sheet_number,
col_names = FALSE,
col_types = "text"
) |>
suppressMessages() |>
row_to_names_fill(
row_number = unlist(params$col_names_row_number),
fill_missing = unlist(params$fill_missing)
) |>
janitor::remove_empty("rows", cutoff = 0.1)
if (!rlang::quo_is_null(rlang::enquo(cols))) {
df <- df |>
dplyr::select(dplyr::all_of({{ cols }}))
}
if (!is.null(col_names)) {
names(df) <- col_names
} else {
names(df) <- df |>
names() |>
stringr::str_squish()
}
df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.