#' Assemble query parts into a proper SPARQL query
#' @param .query a list with elements of the query
#' @param strict whether to perform some linting on the query,
#' and error in case a problem is detected.
#' @return A query object
#' @export
#' @examples
#' spq_init() %>%
#' spq_add("?city wdt:P31 wd:Q515") %>%
#' spq_label(city, .languages = "fr$") %>%
#' spq_add("?city wdt:P1082 ?pop") %>%
#' spq_assemble() %>%
#' cat()
spq_assemble = function(.query, strict = TRUE) {
.query = spq_prefix(.query, auto = TRUE, prefixes = NULL)
# prefixes -----
prefixes_known = .query[["prefixes_provided"]] %>%
dplyr::bind_rows(usual_prefixes)
check_prefixes(
.query[["prefixes_used"]],
prefixes_known = prefixes_known,
call = rlang::caller_env()
)
part_prefixes <- if (nrow(.query[["prefixes_provided"]]) > 0) {
glue::glue(
'PREFIX {.query[["prefixes_provided"]][["name"]]}: <{.query[["prefixes_provided"]][["url"]]}>') %>%
paste0(collapse = "\n")
} else {
""
}
group_by <- if (sum(.query[["structure"]][["grouping"]]) > 0) {
grouping_vars <- .query[["structure"]][["name"]][.query[["structure"]][["grouping"]]]
paste0("GROUP BY ", paste0(grouping_vars, collapse = " "), "\n")
} else {
""
}
ordering <- .query[["structure"]][.query[["structure"]][["ordering"]] != "none", ]
order_by <- if (!is.null(ordering) && nrow(ordering) > 0) {
ordering_vars = paste0(ordering[["ordering"]], collapse = " ")
sprintf("ORDER BY %s", ordering_vars)
} else {
""
}
# selections and bindings ----
binded <- ""
select <- if (sum(.query[["structure"]][["selected"]]) == 0) {
"*"
} else {
selected <- .query[["structure"]][["name"]][.query[["structure"]][["selected"]]]
# no var defined: this happens in testing
# and other contexts where we demo functions partially
select <- if (is.null(.query[["vars"]])) {
selected
} else {
selected_df <- .query[["vars"]] %>%
dplyr::filter(name %in% selected) %>%
dplyr::select(name, formula) %>%
dplyr::distinct() %>%
dplyr::left_join(.query[["structure"]], by = "name")
nongrouping_selected_df = dplyr::filter(selected_df, !grouping) %>%
dplyr::group_by(name) %>%
# TODO it'd probably be weird to have >1 formula
# so check that
dplyr::summarize(selected_pattern = dplyr::if_else(
!all(is.na(formula)),
formula[!is.na(formula)][1],
name[1]
))
nongrouping_selected = nongrouping_selected_df[["selected_pattern"]]
nongrouping_selected = rlang::set_names(
nongrouping_selected,
nongrouping_selected_df[["name"]]
)
# hack to have formulas last
# more readability if formulas use vars from select
formulas_last <- order(grepl(" AS ", nongrouping_selected, fixed = TRUE))
nongrouping_selected <- nongrouping_selected[formulas_last]
grouping_selected <- dplyr::filter(selected_df, grouping) %>%
dplyr::group_by(name) %>%
dplyr::summarize(selected_pattern = name[1]) %>%
dplyr::pull(selected_pattern)
# we get this to be sure to include any ancestor
# of selected var in a BIND
selected_vars = .query[["vars"]][.query[["vars"]][["name"]] %in% selected_df[["name"]], ]
ancestor_vars = selected_vars[!is.na(selected_vars[["ancestor"]]), ]
filtered_vars = selected_vars[selected_vars[["name"]] %in% .query[["filters"]][["var"]], ]
potential_binded <- c(grouping_selected, ancestor_vars[["ancestor"]], filtered_vars[["name"]])
if (length(potential_binded) > 0) {
to_bind <- dplyr::filter(.query[["vars"]],
name %in% potential_binded,
!is.na(formula))
if (nrow(to_bind) > 0) {
binded <- paste(
sprintf("BIND%s", to_bind[["formula"]]),
collapse = "\n"
)
binded <- paste0(binded, "\n")
}
} else {
to_bind = NULL
}
# nicer to put grouping variables first
select = c(grouping_selected, nongrouping_selected)
# put labels near variables
any_label = any(grepl("_label", select, fixed = TRUE))
if (any_label) {
new_select = select[!grepl("_label", select, fixed = TRUE)]
labels = select[grepl("_label", select, fixed = TRUE)]
select = purrr::reduce2(
labels,
names(labels),
\(new_select, x, y) add_label(new_select, x, y, old_select = select),
.init = new_select
)
}
# do not use formula both in BIND and SELECT
# so remove it from SELECT
select[names(select) %in% to_bind[["name"]]] =
names(select)[names(select) %in% to_bind[["name"]]]
select
}
}
# body ----
triples_present = !is.null(.query[["triples"]])
body = if (triples_present) {
firstborn_triples = .query[["triples"]][is.na(.query[["triples"]][["sibling_triple"]]),]
firstborn_triples = split(firstborn_triples, seq_len(nrow(firstborn_triples)))
# they'll be built as we build their big siblings
other_triples = .query[["triples"]][!is.na(.query[["triples"]][["sibling_triple"]]),]
purrr::map_chr(
firstborn_triples,
~build_part_body(
triple = .x[["triple"]],
required = .x[["required"]],
within_box = .x[["within_box"]],
within_distance = .x[["within_distance"]],
filter = .x[["filter"]],
other_triples = other_triples
)
) |>
paste0(collapse = "")
} else {
NULL
}
valued_vars <- .query[["vars"]][!is.na(.query[["vars"]][["values"]]), ]
if (!is.null(valued_vars) && nrow(valued_vars) > 0) {
body <- paste(
c(
body,
sprintf("VALUES ?%s %s", valued_vars[["name"]], valued_vars[["values"]])
),
collapse = "\n"
)
}
# duplicate ----
spq_duplicate <- if (is.null(.query[["spq_duplicate"]])) {
""
} else {
sprintf("%s ", .query[["spq_duplicate"]])
}
limit <- if (is.null(.query[["limit"]])) {
""
} else {
glue::glue('LIMIT {.query[["limit"]]}\n')
}
offset <- if (is.null(.query[["offset"]])) {
""
} else {
glue::glue('OFFSET {.query[["offset"]]}')
}
filters <- if (is.null(.query[["filters"]])) {
""
} else {
if (strict) {
unknown_filtered_variables <- .query[["filters"]][["var"]][
!(.query[["filters"]][["var"]] %in% .query[["vars"]][["name"]])
]
if (length(unknown_filtered_variables) > 0) {
cli::cli_abort(c(
"Can't filter on undefined variables: {toString(unknown_filtered_variables)}",
i = "You haven't mentioned them in any triple, VALUES, mutate."
))
}
}
paste0(
sprintf("FILTER(%s)", .query[["filters"]][["filter"]]),
collapse = "\n"
)
}
paste0(
part_prefixes, "\n",
"SELECT ", spq_duplicate, paste0(select, collapse = " "), "\n",
"WHERE {\n",
body, "\n", binded,
filters, "\n",
"}\n",
group_by,
order_by, "\n",
limit,
offset
)
}
utils::globalVariables(c("usual_prefixes", "formula", "selected_pattern"))
add_label = function(vector, label, label_name, old_select) {
unlabelled = which(sprintf("%s_label", vector) == label_name)
unlabelled_present = (length(unlabelled) == 1)
if (unlabelled_present) {
return(append(vector, label, after = unlabelled))
}
old_position = which(old_select == label)
if (old_position == 1) {
return(append(vector, label, after = 0))
}
old_neighbour = old_select[old_position - 1]
old_neighbour_current_position = which(vector == old_neighbour)
append(vector, label, after = old_neighbour_current_position)
}
check_prefixes <- function(prefixes, prefixes_known, call) {
purrr::walk(prefixes, check_prefix, prefixes_known = prefixes_known, call = call)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.