#' Filter a dataset from openPoland API.
#'
#' \code{openPolandFilter} filter a dataset with given criterias and returns a data frame.
#'
#' @param data A data table that is a result of \code{\link{openPolandData}} function.
#' @param nts A character string. Of length one to indicate the level or of length 10 to search for a specific NTS id of territorial unit.
#' @param unit A character string of predefined values: "region", "voivodship", "subregion", "powiat", "gmina".
#' @param year A numeric value or character string.
#' @param name A character string.
#'
#' @return A data table object. The first column is a NTS id of territorial unit. The second column is a common name of territorial unit. Then there are from 1 to 5 columns with dimensions labels. All dimensions in a given dataset can be previewed by the \code{\link{openPolandMeta}} function. Last four columns of the data table are: year, measure unit, value and data attribute.
#'
#' @seealso \code{\link{openPolandData}} for downloading a selected dataset via openPoland API, \code{\link{openPolandMeta}} for getting meta information about datasets, and \code{\link{openPolandSearch}} for searching database with a given query.
#
#' @examples
#' \dontrun{
#'
#' # token for API authorization
#' token = "[alpha-numeric-string-you-get-after-registration-on-https://openpoland.net]"
#'
#' openPolandMeta(id = 1944, token = token)
#'
#' # a whole, unfiltered dataset
#' data = openPolandData(id = 1944, token = token)
#'
#' unique(openPolandFilter(data = data, unit="region")$name)
#' unique(openPolandFilter(data = data, unit="voivodship")$name)
#' unique(openPolandFilter(data = data, unit="subregion")$name)
#' unique(openPolandFilter(data = data, unit="powiat")$name)
#' unique(openPolandFilter(data = data, unit="gmina")$name)
#'
#' unique(openPolandFilter(data = data, nts=5)$name)
#'
#' openPolandFilter(data = data, name="Warszawa")
#' openPolandFilter(data = data, nts='3265301032')
#' openPolandFilter(data = data, nts='3265301032', year=2010)
#'
#' }
openPolandFilter = function (data = NULL,
nts = NULL,
unit = NULL,
year = NULL,
name = NULL) {
if (is.null(data)) {
stop('Data is missing.',
'\nUse openPolandData() function to download a dataset.')
}
data = as.data.frame(data)
logical_filter = rep(TRUE,NROW(data))
nts_pattern_1 = "^[0-9][0]{9,9}$"
nts_pattern_2 = "^[0-9]{3,3}[0]{7,7}$"
nts_pattern_3 = "^[0-9]{5,5}[0]{5,5}$"
nts_pattern_4 = "^[0-9]{7,7}[0]{3,3}$"
# nts_pattern_5 = "^[0-9]{9,9}[0]{1,1}$"
# the third digit is the symbol of unit type and stands for the following:
# 1 – urban gmina,
# 2 – rural gmina,
# 3 – urban-rural gmina,
# 4 – town in an urban-rural gmina (a rural locality, which assigned the status of town),
# 5 – rural area in an urban-rural gmina (the remaining area of a gmina, excluding the area of a town),
# 8 – quarter of the capital city of Warszawa,
# 9 – representation in other urban gmina.
nts_pattern_5_1 = "^[0-9]{9,9}[1]{1,1}$"
nts_pattern_5_2 = "^[0-9]{9,9}[2]{1,1}$"
nts_pattern_5_3 = "^[0-9]{9,9}[3]{1,1}$"
nts_pattern_5_4 = "^[0-9]{9,9}[4]{1,1}$"
nts_pattern_5_5 = "^[0-9]{9,9}[5]{1,1}$"
nts_pattern_5_6 = "^[0-9]{9,9}[6]{1,1}$"
nts_pattern_5_7 = "^[0-9]{9,9}[7]{1,1}$"
nts_pattern_5_8 = "^[0-9]{9,9}[8]{1,1}$"
nts_pattern_5_9 = "^[0-9]{9,9}[9]{1,1}$"
if (!is.null(unit) && (!unit %in% c("region",
"voivodship",
"subregion",
"powiat",
"gmina"))
) {
stop("There is no such 'unit' defined.")
}
if (!is.null(unit)) {
if (unit == "region") {nts = 1}
if (unit == "voivodship") {nts = 2}
if (unit == "subregion") {nts = 3}
if (unit == "powiat") {nts = 4}
if (unit == "gmina") {
logical_filter =
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_1) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_2) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_3) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_4) &
(
stringr::str_detect(string = data$nts,
pattern = nts_pattern_5_1) |
stringr::str_detect(string = data$nts,
pattern = nts_pattern_5_2) |
stringr::str_detect(string = data$nts,
pattern = nts_pattern_5_3)
)
}
}
if (!is.null(nts) && nchar(as.character(nts)) == 1) {
if (as.character(nts) == '1') {
logical_filter =
stringr::str_detect(string = data$nts,
pattern = nts_pattern_1)
}
if (as.character(nts) == '2') {
logical_filter =
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_1) &
stringr::str_detect(string = data$nts,
pattern = nts_pattern_2)
}
if (as.character(nts) == '3') {
logical_filter =
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_1) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_2) &
stringr::str_detect(string = data$nts,
pattern = nts_pattern_3)
}
if (as.character(nts) == '4') {
logical_filter =
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_1) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_2) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_3) &
stringr::str_detect(string = data$nts,
pattern = nts_pattern_4)
}
if (as.character(nts) == '5') {
logical_filter =
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_1) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_2) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_3) &
!stringr::str_detect(string = data$nts,
pattern = nts_pattern_4)
}
}
if (!is.null(nts) && nchar(as.character(nts)) == 10) {
nts_pattern = as.character(nts)
logical_filter = stringr::str_detect(string = data$nts,
pattern = nts_pattern)
}
if (!is.null(year)) {
logical_filter = logical_filter &
stringr::str_detect(string = data$year,
pattern = as.character(year))
}
if (!is.null(name)) {
logical_filter = logical_filter &
stringr::str_detect(string = data$name,
pattern = as.character(name))
}
df =
data %>%
dplyr::filter(logical_filter)
# %>%
# dplyr::group_by('name', 'year', 'dim1') %>%
# dplyr::summarize(sum=sum(value))
# data.table::as.data.table(df)
df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.