#' Creates a codebook
#'
#' @param df Dataframe from which to construct the codebook
#'
#' @param outputName Name to be used for output codebook files
#'
#' @param saveLocation Location to store created codebooks
#'
#' @param overwrite if TRUE (default), overwrites existing files
#'
#' @param reportTitle String to be passed to Codebook as title
#'
#' @param outputType Type of file to create (currently only HTML)
#'
#' @param saveRmd if TRUE (default), keeps intermediary markdown file. Otherwise deletes
#'
#' @export
#'
createCodebook <- function(df, outputName="codebook", saveLocation="temp/", overwrite=T, reportTitle="My Codebook", outputType="html", saveRmd=TRUE){
#construct names for .Rmd and .html files
outRmd = paste0(outputName, ".Rmd")
outHtml = paste0(outputName, ".html")
#construct paths
filepathRmd <- file.path(saveLocation, outRmd)
filepathHtml <- file.path(saveLocation, outHtml)
#get database name
dfname <- deparse(substitute(df))
print(paste("Constructing codebook for:", dfname))
#delete existing files, if requested
#TODO: don't delete here, overwrite when files are actually ready
if (overwrite & file.exists(filepathRmd)){
file.remove(filepathRmd)
}
if (overwrite & file.exists(filepathHtml)){
file.remove(filepathHtml)
}
# Open file connections
fileConn <<- file(filepathRmd, "w")
####################################### HEADER ################################
# write YAML preamble --------------------------------------------------------
writer("---")
writer(paste("title:", reportTitle))
writer("subtitle: \"Autogenerated by Codebook Generator\"")
writer(paste("date:", format(Sys.Date(),format="%B %d, %Y")))
if (outputType=="pdf") writer("output: pdf_document")
if (outputType=="html") writer("output:")
if (outputType=="html") writer(" html_document:")
if (outputType=="html") writer(" css: styles.css")
if (outputType=="docx") writer("output: word_document")
writer("---")
# Setup code chunks ----------------------------------------------------------
##include packages as a first chunk
secretChunk.wrapper("library(ggplot2)\nlibrary(pander)\nlibrary(kableExtra)\nlibrary(CodebookGenerator)\nlibrary(DT)")
#reference input dataset as codebookDF
secretChunk.wrapper(paste0("codebookDF <- ", dfname))
#Add functions to initial wrapper
# secretChunk.wrapper(
# paste0("codebookMetadataSummarize1 <-function(var, varName){\n \n #Calculate basic information\n variableType=class(var)\n numMissing=sum(is.na(var))\n numValid=sum(!is.na(var))\n numMissing = paste0(numMissing, \" (\", (numMissing/length(var))*100, \"%)\")\n numUnique=length(unique(var))\n \n # Construct dataset of basic information\n results <- data.frame(Feature=c(\n \"Class\",\n \"Valid obs.\",\n \"NA obs.\",\n \"Unique\"),\n Results = c(\n variableType,\n numValid,\n numMissing,\n numUnique\n )\n )\n return(results)\n}\n"
# ,"\n", "codebookDataTableSummarize1 <-function(var){\n \n varClass <- class(var)\n \n if(varClass %in% c(\"numeric\", \"integer\")){\n \n varMean=mean(var)\n varSD=sd(var)\n p100 = quantile(var,1)\n p75 = quantile(var,.75)\n p50 = quantile(var,.50)\n p25 = quantile(var,.25)\n p0 = quantile(var,0)\n \n results <- data.frame(Feature=c(\n \"Mean\",\n \"Standard Deviation\",\n \"100% (Max)\",\n \"75%\",\n \"50% (Median)\",\n \"25%\",\n \"0% (Min)\"), \n Results = c(\n varMean,\n varSD,\n p100,\n p75,\n p50,\n p25,\n p0\n )\n )\n }\n else{\n x <- as.data.frame(table(var))\n x$percent <- x$Freq/length(var)\n x$validPercent <- x$Freq/sum(!is.na(var))\n results <-x\n }\n \n \n return(results)\n}"
# )
# )
# Data Frame Summary _--------------------------------------------------------
writer(paste(replicate(30, "***"), collapse = ""))
writer("## Data Frame Summary")
chunk.wrapper("kable(CodebookGenerator::summarizeDataFrame(codebookDF), digits=1) %>%\nkable_styling(bootstrap_options = c(\"striped\", \"hover\",\"condensed\"),full_width = F)")
#writer(pander::pandoc.table.return(CodebookGenerator::summarizeDataFrame(df)))
# Variable Summary -----------------------------------------------------------
writer(paste(replicate(30, "***"), collapse = ""))
writer("## Variable Summary")
#chunk.wrapper("kable(CodebookGenerator::summarizeVariables(codebookDF), digits=1,escape = F) %>%\nkable_styling(bootstrap_options = c(\"striped\", \"hover\",\"condensed\"),full_width = F)")
chunk.wrapper("datatable(CodebookGenerator::summarizeVariables(codebookDF), escape=F, rownames=F, fillContainer=F, style=\"bootstrap\")")
#or use data.table
#tab <- SummarizeVariables(df)
#writer(pander::pandoc.table.return(tab))
#Variables <- names()
#sumTab <- data.frame(labels=labs, Variable=vars, Class=classes, Unique=uniques, pMissing=missings")
writer(paste(replicate(30, "***"), collapse = ""))
####################################### MAIN VARIABLE LOOP ################################
#Main variable loop: for each column, write out results
#writer("## Variables")
varNames <- names(df)
for (i in 1:length(df)){
#Get key information
var <- df[[i]]
varName <- varNames[i]
# Write header
writer(paste0("### <a name=\"",varName,"\"></a>", varName))
writer("")
#write label (if one exists)
if (!is.null(attr(var, "labels", exact=T))){
writer(paste ("#### ",attr(var, "label", exact=T)))
}
varClass <-determineVarType(varName, df)
#Write row of data of variable infromation
row.wrapper_4col(
writer.p(varClass),
chunk.wrapper(printVariableOverview(var,varName)),
chunk.wrapper(printsummarizeVariable(varName,varClass)),
fig.wrapper(VisualizeVariable_string(var,varName)),
varClass
)
#write divider before moving to next variable
writer(paste(replicate(30, "***"), collapse = ""))
}
####################################### FOOTER ################################
## Force flush and close connection
flush(fileConn)
close(fileConn)
#Knit document
rmarkdown::render(filepathRmd, 'html_document', outHtml)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.