Nothing
#' Printing reptSync and reptSplitCheck outcomes in a tidy way
#'
#' @description
#' Prints the data frame derived from \code{reptSync} or \code{reptSplitCheck} in a tidy way.
#' Optionally, it filters the data frame for species with unresolved nomenclature only.
#'
#' @param df The data frame derivated from \code{reptSync}
#' @param filter Logical. If \code{TRUE} will print only the species entries with unresolved nomenclature (e.g.: ambiguous or not_found). Default is \code{TRUE}
#'
#' @return Invisibly returns `NULL`. Used for side-effect printing only.
#'
#' @examples
#' df <- data.frame(
#' query = c("Genus epithet 1",
#' "Genus epithet 2",
#' "Genus epithet 3",
#' "Genus epithet 4",
#' "Genus epithet 5"),
#' RDB = c("Genus epithet 1.1; Genus epithet 1.2",
#' "Genus epithet 2",
#' "Genus epithet 3",
#' "Not found",
#' "Genus epithet 5.1; Genus epithet 5.2; Genus epithet 5.3"),
#' status = c("ambiguous",
#' "updated",
#' "up_to_date",
#' "not_found",
#' "ambiguous"),
#' stringsAsFactors = FALSE
#' )
#' reptTidySyn(df, filter = c("ambiguous", "not_found"))
#'
#' @export
reptTidySyn <- function(df, filter = NULL) {
if (!is.data.frame(df) || ncol(df) < 2) {
stop("Input must be a data frame with at least two columns.")
}
if ("url" %in% names(df)) df <- df[ , !(names(df) == "url")]
statuses <- c("up_to_date", "updated", "ambiguous", "not_found", "merge", "check_split", "updated_typo", "fuzzy_ambiguous")
if (is.null(filter)) filter <- statuses
df <- df[df$status %in% filter,]
if(length(filter) == 1 && filter=="merge"){
df <- df[order(df$RDB),]
} else {
df <- df[order(df$query),]}
spacer <- " " # 3 spaces between columns
col_names <- names(df)
# Determine column widths
col_widths <- sapply(seq_along(df), function(i) {
vals <- if (i == 1) df[[i]] else unlist(strsplit(df[[i]], ";\\s*"))
max(nchar(c(col_names[i], vals)), na.rm = TRUE)
})
# Total line width
total_width <- sum(col_widths) + (length(col_widths) - 1) * nchar(spacer)
# Print header
header_line <- paste(
mapply(function(name, width) sprintf("%-*s", width, name), col_names, col_widths),
collapse = spacer
)
cat(header_line, "\n")
cat(strrep("-", total_width), "\n")
# Print each row
for (i in seq_len(nrow(df))) {
# Split all columns except first into lists
value_lists <- lapply(df[-1], function(col) strsplit(col[i], ";\\s*")[[1]])
max_lines <- max(sapply(value_lists, length), 1)
for (j in seq_len(max_lines)) {
row_output <- c()
for (k in seq_along(df)) {
if (k == 1) {
# First column: only show species name in the first line
row_output[k] <- if (j == 1) sprintf("%-*s", col_widths[k], df[[k]][i])
else sprintf("%-*s", col_widths[k], "")
} else {
val <- value_lists[[k - 1]]
this_val <- if (j <= length(val)) val[j] else ""
row_output[k] <- sprintf("%-*s", col_widths[k], this_val)
}
}
cat(paste(row_output, collapse = spacer), "\n")
}
}
invisible(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.