Nothing
#' Efficiently get a wide table with various properties of a given set of
#' Wikidata identifiers
#'
#' @param label Logical, defaults to `FALSE`. If `TRUE` labels of Wikidata Q
#' identifiers are reported instead of the identifiers themselves (or labels
#' are presented along of them, if `both_id_and_label` is set to `TRUE`)
#' @param property_label_as_column_name Logical, defaults to `FALSE`. If `FALSE`,
#' names of columns with properties are the "P" identifiers of the property.
#' If `TRUE`, the label of the correspondent property is assigned as column
#' name.
#' @param both_id_and_label Logical, defaults to `FALSE`. Relevant only if `label`
#' is set to `TRUE`, otherwise ignored. If `TRUE`, the label is added as a
#' separate column along the original one. Column name is the same as the
#' property column, followed by "_label".
#' @param id_df_label Defaults to NULL. If given, it should be a dataframe
#' typically generated with [tw_get()] with *all* items for which labels will
#' be requested. It is used instead of calling Wikidata or relying on cache.
#' @param unlist Logical, defaults to `FALSE`. Typically used sharing or exporting
#' data as csv files. Collapses all properties in a single string. The
#' separator is defined by the `collapse` parameter. Relevant only when
#' `only_first` is set to `FALSE`.
#' @param collapse Defaults to ";". Character used to separate results when
#' `unlist` is set to `TRUE`.
#'
#' @inheritParams tw_get_property_same_length
#'
#' @return A data frame, with a column for each given property.
#' @export
#'
#' @examples
#'
#' if (interactive()) {
#' tw_get_p_wide(
#' id = c("Q180099", "Q228822", "Q191095"),
#' p = c("P27", "P19", "P20"),
#' label = TRUE,
#' only_first = TRUE
#' )
#' }
tw_get_p_wide <- function(
id,
p,
label = FALSE,
property_label_as_column_name = FALSE,
both_id_and_label = FALSE,
only_first = FALSE,
preferred = FALSE,
unlist = FALSE,
collapse = ";",
language = tidywikidatar::tw_get_language(),
id_df = NULL,
id_df_label = NULL,
cache = NULL,
overwrite_cache = FALSE,
cache_connection = NULL,
disconnect_db = TRUE,
wait = 0
) {
if (is.data.frame(id)) {
id <- id$id
}
db <- tw_connect_to_cache(
connection = cache_connection,
language = language,
cache = cache
)
unique_p <- unique(p)
property <- NA_character_
value <- NA_character_
property_df <- tw_get_property(
id = id,
p = unique_p,
language = language,
id_df = id_df,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db,
disconnect_db = FALSE,
wait = wait
)
if (preferred) {
preferred_df <- property_df %>%
dplyr::mutate(
rank = factor(
.data$rank,
levels = c(
"preferred",
"normal",
"deprecated"
)
)
) %>%
dplyr::group_by(id) %>%
dplyr::arrange(
.by_group = TRUE,
.data$rank
) %>%
dplyr::ungroup()
if (nrow(preferred_df) > 0) {
property_df <- preferred_df
}
}
if (label) {
property_df <- property_df %>%
dplyr::mutate(
label = dplyr::if_else(
condition = tw_check_qid(
id = .data$value,
logical_vector = TRUE
),
true = tw_get_label(
id = .data$value,
language = language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db,
disconnect_db = FALSE,
id_df = id_df_label,
wait = wait
),
false = .data$value,
missing = NA_character_
)
)
}
if (only_first) {
property_df <- property_df %>%
dplyr::distinct(.data$id, .data$property, .keep_all = TRUE)
} else if (!only_first) {
property_df <- property_df %>%
dplyr::distinct(.data$id, .data$property, .data$value, .keep_all = TRUE)
if (label) {
property_df <- property_df %>%
dplyr::group_by(.data$id, .data$property) %>%
dplyr::summarise(
value = list(.data$value),
label = list(.data$label),
.groups = "drop"
) %>%
dplyr::ungroup()
} else {
property_df <- property_df %>%
dplyr::group_by(.data$id, .data$property) %>%
dplyr::summarise(
value = list(.data$value),
.groups = "drop"
) %>%
dplyr::ungroup()
}
}
if (label) {
if (both_id_and_label) {
if (only_first) {
property_df_wide <- property_df %>%
tidyr::pivot_wider(
id_cols = "id",
names_from = "property",
values_from = c("value", "label"),
values_fill = NA_character_,
names_glue = "{property}_{.value}"
)
} else if (!only_first) {
property_df_wide <- property_df %>%
tidyr::pivot_wider(
id_cols = "id",
names_from = "property",
values_from = c("value", "label"),
values_fill = list(NA_character_),
names_glue = "{property}_{.value}"
)
}
} else if (!both_id_and_label) {
if (only_first) {
property_df_wide <- property_df %>%
tidyr::pivot_wider(
id_cols = "id",
names_from = "property",
values_from = "label",
values_fill = NA_character_
)
} else if (!only_first) {
property_df_wide <- property_df %>%
tidyr::pivot_wider(
id_cols = "id",
names_from = "property",
values_from = "label",
values_fill = list(NA_character_)
)
}
} else {
cli::cli_abort(
message = c(
x = "The parameter `both_id_and_label` must be either {.val {TRUE}} or {.val {FALSE}}."
)
)
}
if (both_id_and_label) {
new_order <- c(
"id",
t(matrix(
c(
stringr::str_c(unique_p, "_value"),
stringr::str_c(unique_p, "_label")
),
ncol = 2
)) %>%
as.character()
)
property_df_wide_ordered <- property_df_wide[, new_order]
names(property_df_wide_ordered) <- stringr::str_remove_all(
string = new_order,
pattern = "_value$"
)
} else {
new_order <- c(
"id",
unique_p
)
property_df_wide_ordered <- property_df_wide[, new_order]
}
} else if (label == FALSE) {
if (only_first) {
property_df_wide <- property_df %>%
tidyr::pivot_wider(
id_cols = "id",
names_from = "property",
values_from = "value",
values_fill = NA_character_
)
} else if (!only_first) {
property_df_wide <- property_df %>%
tidyr::pivot_wider(
id_cols = "id",
names_from = "property",
values_from = "value",
values_fill = list(NA_character_)
)
}
new_order <- c(
"id",
unique_p
)
property_df_wide_ordered <- property_df_wide[, new_order]
}
if (property_label_as_column_name) {
p_labels <- names(property_df_wide_ordered)[-1] %>%
stringr::str_extract(pattern = "P[[:digit:]]+") %>%
tw_get_property_label(
language = language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db,
disconnect_db = FALSE,
wait = wait
)
if (both_id_and_label) {
p_labels[!seq_along(p_labels) %% 2] <- stringr::str_c(
p_labels[!seq_along(p_labels) %% 2],
"_label"
)
}
new_col_names <- p_labels %>%
vctrs::vec_as_names(repair = "universal", quiet = TRUE) %>%
stringr::str_replace_all(
pattern = stringr::fixed("."),
replacement = "_"
)
names(property_df_wide_ordered) <- c("id", new_col_names)
}
if (label) {
output_df <- tibble::tibble(id = id) %>%
dplyr::mutate(
label = tw_get_label(
id = .data$id,
language = language,
cache = cache,
overwrite_cache = overwrite_cache,
cache_connection = db,
disconnect_db = FALSE,
id_df = id_df_label,
wait = wait
)
) %>%
dplyr::left_join(
y = property_df_wide_ordered,
by = "id"
)
} else {
output_df <- tibble::tibble(id = id) %>%
dplyr::left_join(
y = property_df_wide_ordered,
by = "id"
)
}
tw_disconnect_from_cache(
cache = cache,
cache_connection = db,
disconnect_db = disconnect_db,
language = language
)
if (unlist & !only_first) {
output_df %>%
dplyr::group_by(.data$id) %>%
dplyr::mutate(
dplyr::across(
where(is.list),
function(x) {
stringr::str_c(unique(unlist(x)), collapse = collapse)
}
)
) %>%
dplyr::ungroup()
} else {
output_df
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.