Nothing
utils::globalVariables(c("siteid"))
#' @title clean Neotoma objects to remove duplicates and empty objects.
#' @author Simon Goring \email{goring@wisc.edu}
#' @import gtools
#' @import lubridate
#' @importFrom progress progress_bar
#' @importFrom methods new
#' @param x sites, datasets, collunits that may have duplicates.
#' @param verbose parameter to prints out progress bar
#' @param ... Additional parameters associated with the call.
#' @description Function that removes duplicate objects such as sites,
#' datasets, or collection units. When we pull in a large number of objects,
#' or overlapping searches, we can run into a problem where we have multiple
#' instances of the same site, but with different datasets. This function
#' attempts to gather all objects together:
#' * Before: \{site: 1, dataset: 1\}, \{site: 1, dataset: 2\}
#' * After: \{site: 1, dataset: \[1, 2\]\}
#' So the site is gathered, and the datasets are now part of an array of datasets.
#' @returns clean `neotoma` objects without duplicates after concatenation
#' @examples \donttest{
#' clean_sites <- get_sites(sitename = "L%", limit = 20)
#' more_sites <- get_sites(sitename = "La%", limit = 20)
#' long_set <- c(clean_sites, more_sites)
#' length(long_set)
#' # By removing duplicates we get a smaller object.
#' length(clean(long_set))
#' # We can do the same thing with collection units:
#' clean_cols <- get_sites(sitename = "L%", limit = 20) %>%
#' collunits()
#' more_cols <- get_sites(sitename = "La%", limit = 20) %>%
#' collunits()
#' long_set <- c(clean_cols, more_cols)
#' length(long_set)
#' # By removing duplicates we get a smaller object.
#' length(clean(long_set))
#' # And datasets:
#' clean_ds <- get_sites(sitename = "L%", limit = 20) %>%
#' get_downloads() %>% datasets()
#' more_ds <- get_sites(sitename = "La%", limit = 20) %>%
#' get_downloads() %>% datasets()
#' long_set <- c(clean_ds, more_ds)
#' length(long_set)
#' # By removing duplicates we get a smaller object.
#' length(clean(long_set))
#' }
#' @keywords internal
#' @noRd
clean <- function(x = NA, verbose = TRUE, ...) {
if (!missing(x)) {
UseMethod("clean", x)
} else {
UseMethod("clean", NA)
}
}
#' @title clean sites objects to remove duplicates.
#' @author Simon Goring \email{goring@wisc.edu}
#' @import gtools
#' @import lubridate
#' @importFrom progress progress_bar
#' @importFrom methods new
#' @param x sites, datasets, collunits that may have duplicates.
#' @param verbose parameter to prints out progress bar
#' @param ... Additional parameters associated with the call.
#' @description Function that removes duplicate objects such as sites,
#' datasets, or collection units.
#' @returns `sites` object
#' @examples {
#' clean_sites <- get_sites(sitename = "L%", limit = 1)
#' more_sites <- get_sites(sitename = "La%", limit = 1)
#' long_set <- c(clean_sites, more_sites)
#' length(long_set)
#' # By removing duplicates we get a smaller object.
#' length(clean(long_set))
#' }
#' @keywords internal
#' @noRd
clean.sites <- function(x, verbose = TRUE, ...) {
siteids <- as.data.frame(x)$siteid
if (any(duplicated(siteids))) {
matched <- unique(siteids[duplicated(siteids)])
clean_sites <- filter(x, !siteid %in% matched)
messy_sites <- neotoma2::filter(x, siteid %in% matched)
pb <- progress_bar$new(total = length(matched))
for (i in matched) {
if (verbose == TRUE) {
pb$tick()
}
messy_site <- neotoma2::filter(messy_sites, siteid == i)
messy_cus <- clean(collunits(messy_site))
new_site <- messy_site[1]
new_site@sites[[1]]@collunits <- messy_cus
clean_sites <- c(clean_sites, new_site[[1]])
}
return(clean_sites)
} else {
return(x)
}
}
#' @title clean sites objects to remove duplicates.
#' @author Simon Goring \email{goring@wisc.edu}
#' @import gtools
#' @import lubridate
#' @importFrom progress progress_bar
#' @importFrom methods new
#' @param x sites, datasets, collunits that may have duplicates.
#' @param verbose parameter to prints out progress bar
#' @param ... Additional parameters associated with the call.
#' @description Function that removes duplicate objects such as sites,
#' datasets, or collection units.
#' @returns cleaned `collunits`
#' @examples \donttest{
#' clean_cols <- get_sites(sitename = "L%", limit = 1) %>%
#' collunits()
#' more_cols <- get_sites(sitename = "La%", limit = 1) %>%
#' collunits()
#' long_set <- c(clean_cols, more_cols)
#' length(long_set)
#' # By removing duplicates we get a smaller object.
#' length(clean(long_set))
#' }
#' @keywords internal
#' @noRd
clean.collunits <- function(x, verbose = TRUE, ...) {
cuids <- as.data.frame(x)$collectionunitid
matched <- unique(cuids[duplicated(cuids)])
if (length(matched) == 0) {
return(x)
} else {
clean_cus <- x[which(!cuids %in% matched)]
for (i in matched) {
messy_cus <- new("collunits", collunits = x@collunits[cuids == i])
cu_ds <- clean(datasets(messy_cus))
newcu <- messy_cus[1]
newcu[[1]]@datasets <- cu_ds
clean_cus <- c(clean_cus, newcu)
}
}
return(clean_cus)
}
#' @title clean sites objects to remove duplicates.
#' @author Simon Goring \email{goring@wisc.edu}
#' @import gtools
#' @import lubridate
#' @importFrom progress progress_bar
#' @importFrom methods new
#' @param x sites, datasets, collunits that may have duplicates.
#' @param verbose parameter to prints out progress bar
#' @param ... Additional parameters associated with the call.
#' @description Function that removes duplicate objects such as sites,
#' datasets, or collection units.
#' @returns cleaned `datasets` after concatenation (no duplicates)
#' @examples \donttest{
#' clean_ds <- get_sites(sitename = "L%", limit = 1) %>%
#' get_downloads() %>% datasets()
#' more_ds <- get_sites(sitename = "La%", limit = 1) %>%
#' get_downloads() %>% datasets()
#' long_set <- c(clean_ds, more_ds)
#' length(long_set)
#' # By removing duplicates we get a smaller object.
#' length(clean(long_set))
#' }
#' @keywords internal
#' @noRd
clean.datasets <- function(x, verbose = TRUE, ...) {
dsids <- as.data.frame(x)$datasetid
return(x[which(!duplicated(dsids))])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.