#' Try to guess to whom a street is dedicated to
#'
#' @param gisco_id A gisco identifier of a European LAU.
#' @param search_language Defaults to NULL, guessed based on country.
#' @param response_language Defaults to `tidywikidatar::tw_get_language()`. Used
#' for defining the language in which label and description are returned.
#' @param check_named_after_original Defaults to FALSE. If TRUE, a search is
#' performed on the original street name. If the first result has values for
#' the property "named after", this takes precedence over other methods.
#' @param check_named_after_original_n Defaults to 1.
#' @param check_named_after Defaults to FALSE. If TRUE, a search is performed on
#' the "cleaned" name of the street. If the result is a street, road, square,
#' or similar, and this has values for the property "named after", this is
#' kept instead of the standard method.
#' @param streets_sf Defaults to NULL. If given, used to speed up processing.
#' Must be an sf object such as the ones output by `ll_osm_get_roads()`.
#' Passed to `ll_osm_get_lau_streets()`.
#' @param street_names_df Defaults to NULL. If TRUE, must be a data frame with
#' two columns, named "street_name" and "name_clean" respectively. If given, these
#' are passed directly to the search routine. Useful when name cleaning
#' provided by the package is not satisfying, e.g. in places such as some
#' Belgian cities where street names are given in more than one language.
#' @param checked_df Defaults to NULL. If given, a data frame with a `street_name` and
#' `named_after_id` column. Takes precedence over searches.
#' @param drop_if_street Defaults to TRUE. If the result found is primarily an
#' instance of "street", "square", or such, as the result is probably the
#' street itself, not what or who it is dedicated to.
#'
#' @return
#' @export
#'
#' @examples
sn_search_named_after <- function(gisco_id,
search_language = NULL,
response_language = tidywikidatar::tw_get_language(),
check_named_after_original = TRUE,
check_named_after_original_n = 1,
check_named_after = TRUE,
drop_if_street = TRUE,
drop_if_disambiguation_page = TRUE,
streets_sf = NULL,
street_names_df = NULL,
checked_df = NULL,
cache = TRUE,
overwrite_cache = FALSE,
append = TRUE,
connection = NULL,
disconnect_db = TRUE) {
country_code <- stringr::str_extract(string = gisco_id, pattern = "[A-Z][A-Z]")
country_name <- sn_standard_country(
country = country_code,
type = "name"
)
if (is.null(search_language)) {
search_language <- streetnamer::sn_language_defaults_by_country %>%
dplyr::filter(country == country_name) %>%
dplyr::pull(language_code)
}
if (length(search_language) == 0) {
search_language <- tidywikidatar::tw_get_language()
} else if (length(search_language) > 1) {
search_language <- search_language[1]
}
if (is.null(street_names_df)) {
current_street_names_df <- latlon2map::ll_osm_get_lau_streets(
gisco_id = gisco_id,
unnamed_streets = FALSE,
streets_sf = streets_sf
) %>%
sf::st_drop_geometry() %>%
dplyr::distinct(name) %>%
dplyr::mutate(name_clean = sn_clean_street_name(
street_name = name,
country = country_name
)) %>%
dplyr::mutate(
name = name %>% stringr::str_replace_all(pattern = stringr::fixed("\\"), replacement = " ") %>% stringr::str_squish(),
name_clean = name_clean %>% stringr::str_replace_all(pattern = stringr::fixed("\\"), replacement = " ") %>% stringr::str_squish()
) %>%
dplyr::rename(street_name = .data$name)
} else {
if ("name" %in% colnames(street_names_df)) {
street_names_df <- street_names_df %>%
dplyr::rename(street_name = .data$name)
}
current_street_names_df <- street_names_df %>%
dplyr::mutate(
street_name = street_name %>% stringr::str_replace_all(pattern = stringr::fixed("\\"), replacement = " ") %>% stringr::str_squish(),
name_clean = name_clean %>% stringr::str_replace_all(pattern = stringr::fixed("\\"), replacement = " ") %>% stringr::str_squish()
)
}
language_combo <- stringr::str_c(search_language, "_", response_language)
db_connection <- tidywikidatar::tw_connect_to_cache(
connection = connection,
language = language_combo,
cache = cache
)
table_name <- sn_get_db_table_name(
type = "sn_auto_named_after",
country = country_code
)
if (pool::dbExistsTable(conn = db_connection, name = table_name) == FALSE) {
# do nothing, if table does not exist, data cannot be there
previously_cached_df <- sn_empty_auto_named_after
} else {
db_result <- tryCatch(
dplyr::tbl(src = db_connection, table_name) %>%
dplyr::filter(
.data$street_name %in% !!stringr::str_c(current_street_names_df$street_name)
),
error = function(e) {
logical(1L)
}
)
if (isFALSE(db_result)) {
previously_cached_df <- sn_empty_auto_named_after
} else {
previously_cached_df <- tibble::as_tibble(db_result)
}
}
current_street_names_df <- current_street_names_df %>%
dplyr::anti_join(
y = previously_cached_df,
by = "street_name"
)
if (nrow(current_street_names_df) == 0) {
tw_disconnect_from_cache(
cache = cache,
cache_connection = db_connection,
disconnect_db = disconnect_db,
language = language
)
return(previously_cached_df)
}
exclude_v <- as.character(NA)[FALSE]
if (is.null(checked_df) == FALSE) {
if ("name" %in% colnames(checked_df)) {
checked_df <- checked_df %>%
dplyr::rename(street_name = .data$name)
}
if ("id" %in% colnames(checked_df) & !("named_after_id" %in% colnames(checked_df))) {
checked_df <- checked_df %>%
dplyr::rename(named_after_id = .data$id)
}
from_check_pre_df <- current_street_names_df %>%
dplyr::distinct(.data$street_name) %>%
dplyr::left_join(
y = checked_df %>%
dplyr::distinct(
.data$street_name,
.data$named_after_id
),
by = "street_name"
) %>%
dplyr::filter(tidywikidatar::tw_check_qid(
id = .data$named_after_id,
logical_vector = TRUE
))
if (nrow(from_check_pre_df) > 0) {
from_check_df <- from_check_pre_df %>%
dplyr::mutate(
label = tidywikidatar::tw_get_label(
id = .data$named_after_id,
language = response_language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db_connection,
disconnect_db = FALSE
),
description = tw_get_description(
id = .data$named_after_id,
language = response_language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db_connection,
disconnect_db = FALSE
),
named_after_from_wikidata = FALSE
)
} else {
from_check_df <- from_check_pre_df %>%
dplyr::mutate(
label = as.character(NA),
description = as.character(NA),
named_after_from_wikidata = as.logical(NA)
)
}
current_street_names_original_df <- current_street_names_df
current_street_names_df <- current_street_names_original_df %>%
dplyr::anti_join(
y = from_check_df,
by = "street_name"
)
}
if (check_named_after_original) {
search_no_clean_df_pre <- tidywikidatar::tw_search(
search = current_street_names_df[["street_name"]],
language = search_language,
response_language = response_language,
include_search = TRUE,
cache = cache,
cache_connection = db_connection,
overwrite_cache = overwrite_cache,
disconnect_db = FALSE
) %>%
dplyr::group_by(search) %>%
dplyr::slice(1:check_named_after_original_n) %>%
dplyr::ungroup() %>%
dplyr::rename(street_name = search) %>%
dplyr::mutate(
named_after = tidywikidatar::tw_get_p1(id,
p = "P138",
language = response_language,
cache = cache,
disconnect_db = FALSE,
overwrite_cache = overwrite_cache,
cache_connection = db_connection
)
)
search_no_clean_df <- search_no_clean_df_pre %>%
dplyr::filter(is.na(.data$named_after) == FALSE) %>%
dplyr::distinct(street_name, label, .keep_all = TRUE)
named_after_original_df <- search_no_clean_df %>%
dplyr::mutate(
named_after_label = tidywikidatar::tw_get_label(
id = .data$named_after,
language = response_language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db_connection,
disconnect_db = FALSE
),
named_after_description = tidywikidatar::tw_get_description(
id = .data$named_after,
language = response_language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db_connection,
disconnect_db = FALSE
)
)
output_df <- current_street_names_df %>%
dplyr::left_join(
y = named_after_original_df %>%
dplyr::transmute(.data$street_name,
named_after_id = .data$named_after,
label = .data$named_after_label,
description = .data$named_after_description,
named_after_from_wikidata = TRUE
),
by = "street_name"
) %>%
dplyr::filter(is.na(.data$named_after_id) == FALSE)
exclude_v <- output_df[["street_name"]]
}
to_search_v <- current_street_names_df %>%
dplyr::filter(!.data$street_name %in% exclude_v) %>%
dplyr::pull("name_clean")
if (length(to_search_v) > 0) {
search_results_df <- tidywikidatar::tw_search(
search = current_street_names_df %>%
dplyr::filter(!.data$street_name %in% exclude_v) %>%
dplyr::pull("name_clean"),
language = search_language,
response_language = response_language,
include_search = TRUE,
cache = cache,
cache_connection = db_connection,
overwrite_cache = overwrite_cache,
disconnect_db = FALSE
) %>%
dplyr::group_by(search) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::rename(name_clean = search)
if (check_named_after == TRUE) {
if (length(tw_check_qid(search_results_df[["id"]])) > 0) {
named_after_df <- tw_get_property(
id = search_results_df[["id"]],
p = "P31",
language = response_language,
cache = cache,
disconnect_db = FALSE,
overwrite_cache = overwrite_cache,
cache_connection = db_connection
) %>%
dplyr::filter(.data$value %in% c(
"Q79007", # street
"Q174782", # square
"Q12280", # bridge
"Q3352369", # footpath
"Q34442", # road
"Q54114", # boulevard
"Q24354" # theater
)) %>%
dplyr::left_join(
y = search_results_df,
by = "id"
) %>%
dplyr::distinct(.data$id, .keep_all = TRUE) %>%
dplyr::mutate(
named_after_id = tidywikidatar::tw_get_p1(id,
p = "P138",
language = response_language,
cache = cache,
disconnect_db = FALSE,
overwrite_cache = overwrite_cache,
cache_connection = db_connection
)
) %>%
dplyr::filter(is.na(named_after_id) == FALSE) %>%
dplyr::mutate(
named_after_label = tidywikidatar::tw_get_label(
id = .data$named_after_id,
language = response_language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db_connection,
disconnect_db = FALSE
),
named_after_description = tidywikidatar::tw_get_description(
id = .data$named_after_id,
language = response_language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db_connection,
disconnect_db = FALSE
)
)
} else {
named_after_df <- structure(list(
id = character(0), property = character(0), value = character(0),
rank = character(0), name_clean = character(0), label = character(0),
description = character(0), named_after_id = character(0),
named_after_label = character(0), named_after_description = character(0)
), row.names = integer(0), class = c(
"tbl_df",
"tbl", "data.frame"
))
}
}
if (check_named_after == TRUE) {
processed_df <- current_street_names_df %>%
dplyr::filter(!.data$street_name %in% exclude_v) %>%
dplyr::left_join(
y = named_after_df %>%
dplyr::transmute(
.data$name_clean,
.data$named_after_id,
.data$named_after_label,
.data$named_after_description
),
by = "name_clean"
) %>%
dplyr::filter(is.na(.data$named_after_id) == FALSE)
exclude_v <- unique(c(exclude_v, processed_df[["street_name"]]))
if (check_named_after_original == TRUE) {
output_df <- dplyr::bind_rows(
output_df,
processed_df
) %>%
dplyr::distinct(.data$street_name, .keep_all = TRUE)
} else {
output_df <- processed_df
}
}
if (check_named_after_original | check_named_after) {
output_df <- dplyr::bind_rows(
output_df %>%
dplyr::mutate(named_after_from_wikidata = TRUE),
current_street_names_df %>%
dplyr::filter(!.data$street_name %in% exclude_v) %>%
dplyr::left_join(
y = search_results_df %>%
dplyr::mutate(named_after_from_wikidata = FALSE),
by = "name_clean"
) %>%
dplyr::distinct(.data$street_name,
.keep_all = TRUE
)
)
} else {
output_df <- current_street_names_df %>%
dplyr::filter(!.data$street_name %in% exclude_v) %>%
dplyr::left_join(
y = search_results_df,
by = "name_clean"
)
}
}
if ("id" %in% colnames(output_df)) {
if (!"named_after_id" %in% colnames(output_df)) {
output_df <- output_df %>%
dplyr::mutate(named_after_id = as.character(NA))
}
output_df <- output_df %>%
dplyr::mutate(named_after_id = dplyr::if_else(is.na(named_after_id),
as.character(id),
as.character(named_after_id)
)) %>%
dplyr::select(-id)
}
if (drop_if_street == TRUE) {
pre_process_join_df <- output_df %>%
dplyr::select(-.data$name_clean) %>%
dplyr::mutate(instance_of = tidywikidatar::tw_get_p1(
id = named_after_id,
p = "P31",
language = response_language,
cache = cache,
disconnect_db = FALSE,
overwrite_cache = overwrite_cache,
cache_connection = db_connection
)) %>%
dplyr::filter(!.data$instance_of %in% c(
"Q79007", # street
"Q174782", # square
"Q12280", # bridge
"Q3352369", # footpath
"Q34442", # road
"Q54114" # boulevard
))
if (drop_if_disambiguation_page == TRUE) {
pre_process_join_df <- pre_process_join_df %>%
dplyr::filter(!.data$instance_of %in% c(
"Q4167410"
)) # disambiguation page
}
final_output_df <- current_street_names_df %>%
dplyr::left_join(
y = pre_process_join_df %>%
dplyr::select(-.data$instance_of),
by = "street_name"
)
} else {
final_output_df <- current_street_names_df %>%
dplyr::left_join(
y = output_df %>%
dplyr::select(-.data$name_clean),
by = "street_name"
)
}
if (!"named_after_from_wikidata" %in% colnames(final_output_df)) {
final_output_df <- final_output_df %>%
dplyr::mutate(named_after_from_wikidata = 0)
}
if ("description" %in% colnames(final_output_df)) {
if (!"named_after_description" %in% colnames(final_output_df)) {
final_output_df <- final_output_df %>%
dplyr::mutate(named_after_description = as.character(NA))
}
final_output_df <- final_output_df %>%
dplyr::mutate(named_after_description = dplyr::if_else(condition = is.na(description),
true = named_after_description,
false = description
)) %>%
dplyr::select(-description)
}
if ("label" %in% colnames(final_output_df)) {
if (!"named_after_label" %in% colnames(final_output_df)) {
final_output_df <- final_output_df %>%
dplyr::mutate(named_after_label = as.character(NA))
}
final_output_df <- final_output_df %>%
dplyr::mutate(named_after_label = dplyr::if_else(condition = is.na(label),
true = named_after_label,
false = label
)) %>%
dplyr::select(-label)
}
df <- final_output_df %>%
dplyr::transmute(
.data$street_name,
.data$named_after_id,
.data$named_after_from_wikidata
)
if (pool::dbExistsTable(conn = db_connection, name = table_name) == FALSE) {
# if table does not exist...
DBI::dbWriteTable(db_connection,
name = table_name,
value = df,
append = TRUE
)
} else {
# if table exists...
if (append == TRUE) {
DBI::dbWriteTable(db_connection,
name = table_name,
value = df,
append = TRUE
)
} else {
previously_available <- dplyr::tbl(
src = db_connection,
table_name
) %>%
dplyr::filter(
.data$gisco_id %in% !!stringr::str_c(gisco_id),
.data$street_name %in% !!stringr::str_c(street_name)
) %>%
dplyr::pull(.data$street_name) %>%
length() %>%
as.logical()
if (previously_available == FALSE) {
# if not previously available, then write to database
if (overwrite_cache == TRUE | append == TRUE) {
DBI::dbWriteTable(db_connection,
name = table_name,
value = df,
append = TRUE
)
}
} else {
if (overwrite_cache == TRUE) {
statement <- glue::glue_sql("DELETE FROM {`table_name`} WHERE gisco_id = {gisco_id*} AND street_name = {street_name*}",
gisco_id = unique(df$gisco_id),
table_name = table_name,
street_name = street_name,
.con = db_connection
)
result <- DBI::dbExecute(
conn = db_connection,
statement = statement
)
DBI::dbWriteTable(db_connection,
name = table_name,
value = df,
append = TRUE
)
} else {
# do nothing if data already present and both overwrite_cache and append are set to FALSE
}
}
}
}
tw_disconnect_from_cache(
cache = cache,
cache_connection = db_connection,
disconnect_db = disconnect_db,
language = language
)
final_output_df <- dplyr::bind_rows(
previously_cached_df,
final_output_df %>%
dplyr::transmute(
.data$street_name,
.data$named_after_id,
.data$named_after_label,
.data$named_after_description,
.data$named_after_from_wikidata
)
)
if (is.null(checked_df) == FALSE) {
if (nrow(checked_df) > 0) {
final_output_df <- current_street_names_original_df %>%
dplyr::select(-.data$name_clean) %>%
dplyr::left_join(
y = dplyr::bind_rows(
from_check_df,
final_output_df %>%
dplyr::select(-.data$name_clean)
),
by = "street_name"
)
}
}
final_output_df %>%
dplyr::transmute(
.data$street_name,
.data$named_after_id,
.data$named_after_label,
.data$named_after_description,
.data$named_after_from_wikidata
) %>%
dplyr::arrange(street_name)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.