Nothing
#' Write a manual page for package dataset documentation
#'
#' @description
#' This function produces a \code{roxygen2} R manual (man) page for a dataset that will
#' be included in an R package. To be documented, the dataset needs to be in the global
#' environment. The new documentation template will be named to match the datasets and
#' will be saved in the R folder (i.e., using an \code{analysis} dataset will produce the
#' \code{R/analysis.Rd} file). The page will indicate if the dataset is a data frame
#' or tibble along with the number of rows and columns. For each variable documentation
#' will indicate the variables a type, factor level information (if appropriate), and a
#' generic description section. If the variable is labelled (using the labelled package
#' or packages which use labelled, like \code{tidyREDCap}) the variable label will be
#' used as the default description.
#'
#' @param the_dataset Dataset object (unquoted) or dataset as character (quoted)
#'
#' @importFrom labelled var_label
#'
#' @note You will need to import the `roxygen2` package and add \code{Roxygen: list(markdown = TRUE)}
#' to your DESCRIPTION file. If you made a project using rUM this happens automatically.
#'
#'
#' @return A \code{.Rd} file in the \code{man} package directory corresponding to the
#' name of the supplied dataset.
#'
#' @export
#'
#' @examples
#' if (interactive()) {
#' # Dataset object from Global Environment (unquoted)
#' write_man(mtcars)
#'
#' # Dataset object from Global Environment as character string (quoted)
#' write_man("mtcars")
#' }
write_man <- function(the_dataset) {
# Handle both quoted and unquoted dataset names
if (is.character(the_dataset) && length(the_dataset) == 1) {
# Input is a string:
# This allows write_man("mtcars")
the_dataset_name <- the_dataset
# Check if the dataset exists
if (!exists(the_dataset_name, envir = .GlobalEnv)) {
stop("Dataset '", the_dataset_name, "' not found in the global environment")
}
# Get the actual dataset object
the_dataset <- get(the_dataset_name, envir = .GlobalEnv)
} else {
# Input is an unquoted object:
# This allows write_man(mtcars)
the_dataset_name <- deparse(substitute(the_dataset))
# Check if the unquoted dataset exists
if (!exists(the_dataset_name, envir = .GlobalEnv)) {
stop("Dataset '", the_dataset_name, "' not found in the global environment")
}
}
# rUM needs labelled dependencies for labelled::var_label
# Check if the dataset exists in the global environment: use the quoted name
if (!exists(the_dataset_name, envir = .GlobalEnv)) {
stop("Dataset '", the_dataset_name, "' not found in the global environment")
}
# EDIT: We already have the dataset object in the_dataset parameter, so this section
# is being removed.
# dataset_obj <- get(the_dataset, envir = .GlobalEnv)
# Check if the object is a data.frame or tibble
if (!inherits(the_dataset, "data.frame")) {
stop("Object '", the_dataset_name, "' is not a data.frame or tibble")
}
# Check if R folder exists
if (!dir.exists("R")) {
stop("The R folder does not exist in the current directory")
}
# Construct the potential file path for the R documentation file
# Use the string name of the dataset for the file name
r_file_path <- file.path("R", paste0(the_dataset_name, ".R"))
# Check if the file already exists
if (file.exists(r_file_path)) {
stop("Documentation file '", r_file_path, "' already exists")
}
# Determine if it's specifically a tibble
is_tibble <- inherits(the_dataset, "tbl_df")
n_rows <- nrow(the_dataset)
n_cols <- ncol(the_dataset)
n_rows_formatted <-
format(n_rows, big.mark = ",", scientific = FALSE, trim = TRUE)
n_cols_formatted <-
format(n_cols, big.mark = ",", scientific = FALSE, trim = TRUE)
# Choose the appropriate data structure label
data_structure <- if (is_tibble) "tibble" else "data.frame"
# Create the file and open a connection to it
file_conn <- file(r_file_path, "w")
# Write the documentation content to the file using the string value (the_dataset_name)
cat(paste0("#' ", the_dataset_name, " dataset\n"), file = file_conn)
cat("#'\n", file = file_conn)
cat(paste0("#' @description Description of the ", the_dataset_name, " dataset goes here\n"), file = file_conn)
cat("#'\n", file = file_conn)
cat(paste0("#' @format A ", data_structure, " with ", n_rows_formatted, " rows and ", n_cols_formatted, " variables:\n"), file = file_conn)
cat("#' \\describe{\n", file = file_conn)
# Loop through each variable in the dataset
for (var_name in names(the_dataset)) {
cat(paste0("#' \\item{", var_name, "}{\n"), file = file_conn)
description <- if(!is.null(var_label(the_dataset[[var_name]]))){
the_label <-
var_label(the_dataset[[var_name]]) |>
.remove_braces() |>
.replace_brackets_with_backticks()
paste0(
"#' | *Description:* | ",
the_label,
" |\n"
)
} else {
paste0(
"#' | *Description:* | Description for ",
var_name,
" goes here |\n"
)
}
# see if a variable has a label
var_type <-
# check to see if the first class is labeled (from from labelVector)
if(class(the_dataset[[var_name]])[1] == "labelled"){
# Get primary class which will be the second class listed after
# "labelled" if data was labeled using labelVector instead of labelled
class(the_dataset[[var_name]])[2]
} else {
class(the_dataset[[var_name]])[1]
}
# Create markdown table based on variable type
if (var_type == "integer") {
cat("#'\n", file = file_conn)
cat("#' | *Type:* | integer |\n", file = file_conn)
cat("#' | -------------- | ------------- |\n", file = file_conn)
cat("#' | | |\n", file = file_conn)
cat(description, file = file_conn)
cat("#'\n", file = file_conn)
} else if (var_type == "factor") {
# For factor variables, include levels information
cat("#'\n", file = file_conn)
first_level <- levels(the_dataset[[var_name]])[1]
all_levels <- paste(levels(the_dataset[[var_name]]), collapse = ", ")
if (length(all_levels) > 100) {
warning(paste0(
"Variable '", var_name,
"' has more than 100 levels. Your dataset documentation may be huge! Did you mean to do this?"
))
}
cat(paste0("#' | *Type:* | factor (First/Reference level = `", first_level, "`) |\n"), file = file_conn)
cat("#' | -------------- | ---------------------------------------------------- |\n", file = file_conn)
cat("#' | | |\n", file = file_conn)
cat(description, file = file_conn)
cat("#' | | |\n", file = file_conn)
cat(paste0("#' | *Levels:* | `", all_levels, "` |\n"), file = file_conn)
cat("#'\n", file = file_conn)
} else {
# Generic description for other types
cat("#'\n", file = file_conn)
cat(paste0("#' | *Type:* | ", var_type, " |\n"), file = file_conn)
cat("#' | -------------- | ------------- |\n", file = file_conn)
cat("#' | | |\n", file = file_conn)
cat(description, file = file_conn)
cat("#'\n", file = file_conn)
}
cat("#' }\n", file = file_conn)
}
cat("#' }\n", file = file_conn)
cat("#' @source Where the data came from\n", file = file_conn)
cat(paste0("\"", the_dataset_name, "\""), file = file_conn)
# Close the connection
close(file_conn)
message("Documentation file created at ", r_file_path)
}
#----------------------------------------------------------------------------------------
# Helper functions for write_man()
#----------------------------------------------------------------------------------------
#' Helper for write_man (1)
#' @description
#' Function needed to clean labels for manual variable descriptions. Remove
#' {text} typically used for {other}
#'
#' @param text Character. The text to parse.
#' @noRd
.remove_braces <- function(text) {
gsub("\\{[^\\}]*\\}", "", text)
}
#' Helper for write_man (2)
#' @description
#' Function needed to clean labels for manual variable descriptions. Used
#' to replace square brackets (\code{[]}) with backticks \code{``} for the info piped
#' into variable labels.
#'
#' @param text Character. The text to parse.
#' @noRd
.replace_brackets_with_backticks <- function(text) {
gsub("\\[(.*?)\\]", "`\\1`", text)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.