#' @title Generate documentation in markdown for a dataset
#' @name doc_data
#'
#' @description Generates documentation for a dataset
#' @param x dataset filepath (e.g., csv/txt file)
#' @param overwrite whether to overwrite documentation (defaults to FALSE)
#'
#' @author Hause Lin
#' @usage doc_data(x, overwrite = FALSE)
#'
#' @return markdown file with the same name as input x in the same directory
#' @import data.table
#' @import stringi
#' @examples
#' \dontrun{doc_data("./data/mcars.csv")}
#' @export
#'
doc_data <- function(x, overwrite = FALSE) {
if (!is.character(x) | !file.exists(x)) {
stop("Input must be an existing filepath!")
}
# stopifnot(is.character(x))
# generate doc md outpule file
outfile <- generate_md_string(x)
if (!overwrite) {
if (file.exists(outfile)) {
stop("Doc already exists! Use doc_open or doc_refresh instead.")
}
}
# read template md doc
# template_doc <- read_doc_md("./inst/extdata/template.md")
# save(template_doc, file = "./R/sysdata.rda", version = 2)
# use_data(template_doc, internal = TRUE, overwite = TRUE)
# save(template_doc, file = "./data/sysdata.RData")
# save(template_doc, file = "./R/sysdata.rda")
if (overwrite) {
existing_doc <- read_doc_md(outfile)
}
# get filename
idx <- gregexpr(pattern = .Platform$file.sep, text = x, fixed = TRUE)[[1]]
if (idx[1] != -1) {
idx <- idx[[length(idx)]]
filename <- substring(text = x, first = idx + 1)
} else {
filename <- x
}
# read data
temp_dat <- data.table::fread(x)
c <- ncol(temp_dat)
r <- nrow(temp_dat)
# overwrite template doc with dataset info
for (i in 1:length(template_doc)) {
template_doc[i] <- gsub(pattern = "@time@", replacement = Sys.time(), x = template_doc[i])
template_doc[i] <- gsub(pattern = "@filename@", replacement = filename, x = template_doc[i])
template_doc[i] <- gsub(pattern = "@rows@", replacement = r, x = template_doc[i])
template_doc[i] <- gsub(pattern = "@columns@", replacement = c, x = template_doc[i])
}
if (overwrite) {
idx_start <- grep(pattern = "## Data source", x = template_doc, fixed = TRUE) # beginning of table
idx_end <- grep(pattern = "## Columns", x = template_doc, fixed = TRUE) # beginning of table
part1 <- template_doc[1:(idx_start-1)]
part2 <- template_doc[idx_end:(length(template_doc))]
idx_start <- grep(pattern = "## Data source", x = existing_doc, fixed = TRUE) # beginning of table
idx_end <- grep(pattern = "## Columns", x = existing_doc, fixed = TRUE) # beginning of table
existing <- existing_doc[idx_start:(idx_end-1)]
template_doc <- c(part1, existing, part2)
}
# convert data type and desc to md table
col_types <- data.table::data.table(Column = names(temp_dat), Type = apply(temp_dat, 2, class), Description = "")
md_table <- dat2string(col_types)
if (overwrite) {
idx <- grep(pattern = "| ", x = existing_doc, fixed = TRUE) # beginning of table
c1 <- data.frame(string2dat(existing_doc[idx]))
c2 <- col_types
i <- 1
for (i in 1:nrow(c2)) { # replace empty descriptions with existing descriptions
temp_c1 <- gsub(" ", "", c1$Column[i])
if (temp_c1 %in% gsub(" ", "", c2$Column)) {
temp_c2_idx <- which(gsub(" ", "", c2$Column) == temp_c1)
c2[temp_c2_idx, "Type"] <- c1[i, "Type"]
c2[temp_c2_idx, "Description"] <- c1[i, "Description"]
}
}
md_table <- dat2string(c2)
}
idx <- grep(pattern = "| Column", x = template_doc, fixed = TRUE) # beginning of table
mdpart1 <- template_doc[1:(idx-1)]
mdpart2 <- template_doc[(idx+2):length(template_doc)]
new_doc <- c(mdpart1, md_table, mdpart2) # insert md table
# save md doc
f <- file(outfile)
writeLines(new_doc, f)
close(f)
# open md doc in R console
doc_open(outfile)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.