#' @importFrom dplyr count filter select right_join anti_join arrange
#' @importFrom stats na.omit
duplicate_as_unresolved <- function(df){
## Requires a column called sort that has 1 id for each input.
# NSE
sort <- "sort"
n <- "n"
multi_match <- df %>%
dplyr::count(sort, sort = TRUE) %>%
dplyr::filter(n > 1) %>%
dplyr::select("sort")
input_id <- df %>% dplyr::select("sort") %>% dplyr::distinct()
##alternately, resolve multi-match with top_n?
## Drop multi-match
dplyr::anti_join(df, multi_match, by = "sort") %>%
## And replace as NA
safe_right_join(input_id, by = "sort") %>%
dplyr::arrange(sort)
}
#' @importFrom dplyr count mutate select arrange row_number
#' @importFrom dplyr pull top_n group_by ungroup
#' @importFrom stats na.omit
#' @importFrom utils head
take_first_duplicate <- function(df){
## Will not work on most remote databases
if(inherits(df, "tbl_dbi"))
df <- collect(df)
# avoid complaints about NSE terms
scientificName <- "scientificName"
sort <- "sort"
row_num <- "row_num"
n <- "n"
## Skip this if sort index is never duplicated
max_repeated <- df %>%
dplyr::count(sort, sort = TRUE) %>%
utils::head(1) %>%
dplyr::pull(n)
if(max_repeated == 1) return(df)
## adding row_number avoids top_n()
## collapsing repeated scentificNames
## when sort is already unique.
df %>%
dplyr::arrange(scientificName) %>%
dplyr::mutate(row_num = row_number()) %>%
dplyr::group_by(sort) %>%
dplyr::filter(row_num == max(row_num, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::arrange(sort)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.