#' Find one name per feature in the Composite Gazetteer
#'
#' The Composite Gazetteer of Antarctica is a compilation of place names provided by different countries and organisations. The composite nature of the CGA means that there may be multiple names associated with a single feature. The \code{an_preferred} function can be used to resolve a single name per feature. Provide one or more \code{origin} entries and the input \code{gaz} will be filtered to a single name per feature. For features that have multiple names (e.g. have been named by multiple countries) a single name will be chosen, preferring names from the specified \code{origin} bodies where possible.
#'
#' @references \url{https://data.aad.gov.au/aadc/gaz/scar/}, \url{https://www.scar.org/data-products/place-names/}
#'
#' @param gaz data.frame or SpatialPointsDataFrame: as returned by \code{\link{an_read}} or \code{\link{an_filter}}
#' @param origin character: vector of preferred name origins (countries or organisations), in order of preference. If a given feature has been named by one of these bodies, this place name will be chosen. If the feature in question has not been given a name by any of these bodies, a place name given by another body will be chosen, with preference according to the \code{unmatched} parameter. For valid \code{origin} values, see \code{\link{an_origins}}
#' @param unmatched string: how should names be chosen for features that have not been been named by one of the preferred \code{origin} bodies? Valid values are "random" (the non-preferred originating bodies will be randomly ordered) or "count" (the non-preferred originating bodies will be ordered by their number of entries, with the largest first)
#'
#' @return data.frame of results
#'
#' @seealso \code{\link{an_read}}, \code{\link{an_origins}}
#'
#' @examples
#' \dontrun{
#' g <- an_read(cache = "session")
#'
#' ## get a single name per feature, preferring the
#' ## Polish name where there is one
#' pnames <- an_preferred(g, origin = "Poland")
#'
#' ## names starting with "Sm", preferring US names then
#' ## Australian ones if available
#' g %>% an_filter("^Sm") %>%
#' an_preferred(origin = c("United States of America", "Australia"))
#' }
#'
#' @export
an_preferred <- function(gaz, origin, unmatched = "random") {
assert_that(inherits(gaz, c("data.frame", "SpatialPointsDataFrame")))
assert_that(is.string(unmatched))
unmatched <- match.arg(tolower(unmatched), c("random", "count"))
assert_that(is.character(origin))
is_sp <- inherits(gaz, "SpatialPointsDataFrame")
if (is_sp) {
## if sp, work on the @data object
gaz_sp <- gaz
gaz <- gaz@data
}
## features that have a name from one of our sources of interest
## determine the order of preference of sources that aren't in our preferred list
not_pref_origins <- names(sort(table(gaz$origin), decreasing = TRUE))
## those are in decreasing order of count (i.e. unmatched = "count")
if (unmatched == "random") not_pref_origins <- sample(not_pref_origins, size = length(not_pref_origins), replace = FALSE)
## tack these onto the tail end of our preferred list
origin <- c(origin, setdiff(not_pref_origins, origin))
in_ids <- unique(gaz$scar_common_id[gaz$origin %in% origin])
preferred_gaz_rows <- gaz[gaz$scar_common_id %in% in_ids, ]
## order scar_common_id by origin
## (with ordering as per appearance in the origin vector)
ord <- order(preferred_gaz_rows$scar_common_id, factor(preferred_gaz_rows$origin, levels = origin))
preferred_gaz_rows <- preferred_gaz_rows[ord, ]
preferred_gaz_rows <- preferred_gaz_rows[!duplicated(preferred_gaz_rows$scar_common_id), ] ## take first entry for each scar_common_id
## now add any features that did not have an entry from our preferred origins
## this should not ever happen, but do it here just in case e.g. there are NA entries in origin
not_preferred_gaz_rows <- gaz[!gaz$scar_common_id %in% in_ids, ]
not_preferred_gaz_rows <- not_preferred_gaz_rows[!duplicated(not_preferred_gaz_rows$scar_common_id), ] ## take first entry for each scar_common_id
out <- rbind(preferred_gaz_rows, not_preferred_gaz_rows)
if (is_sp) {
## return the subset of gaz_sp corresponding to the rows we just selected
gaz_sp[gaz_sp$gaz_id %in% out$gaz_id, ]
} else {
out
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.