R/createCodebook.R

#' Creates a codebook
#' 
#' @param df Dataframe from which to construct the codebook
#' 
#' @param outputName Name to be used for output codebook files
#' 
#' @param saveLocation Location to store created codebooks
#' 
#' @param overwrite if TRUE (default), overwrites existing files
#' 
#' @param reportTitle String to be passed to Codebook as title
#' 
#' @param outputType Type of file to create (currently only HTML)
#' 
#' @param saveRmd if TRUE (default), keeps intermediary markdown file.  Otherwise deletes
#' 
#' @export
#' 

createCodebook <- function(df, outputName="codebook", saveLocation="temp/", overwrite=T, reportTitle="My Codebook", outputType="html", saveRmd=TRUE){
  
  #construct names for .Rmd and .html files
  outRmd = paste0(outputName, ".Rmd")
  outHtml = paste0(outputName, ".html")
  
  #construct paths
  filepathRmd <- file.path(saveLocation, outRmd)
  filepathHtml <- file.path(saveLocation, outHtml)
  
  #get database name
  dfname <- deparse(substitute(df))
  print(paste("Constructing codebook for:", dfname))
  
  #delete existing files, if requested
  #TODO: don't delete here, overwrite when files are actually ready
  if (overwrite & file.exists(filepathRmd)){
    file.remove(filepathRmd)
  }
  if (overwrite & file.exists(filepathHtml)){
    file.remove(filepathHtml)
  }
  
  # Open file connections
  fileConn <<- file(filepathRmd, "w") 
  
  ####################################### HEADER ################################
  
  # write YAML preamble --------------------------------------------------------
  writer("---")
  writer(paste("title:", reportTitle))
  writer("subtitle: \"Autogenerated by Codebook Generator\"")
  writer(paste("date:", format(Sys.Date(),format="%B %d, %Y"))) 
  if (outputType=="pdf") writer("output: pdf_document")
  if (outputType=="html") writer("output:")
  if (outputType=="html") writer("    html_document:")
  if (outputType=="html") writer("        css: styles.css")
  if (outputType=="docx") writer("output: word_document")
  writer("---")
  
  
  # Setup code chunks ----------------------------------------------------------
  ##include packages as a first chunk
  secretChunk.wrapper("library(ggplot2)\nlibrary(pander)\nlibrary(kableExtra)\nlibrary(CodebookGenerator)\nlibrary(DT)")
  
  #reference input dataset as codebookDF
  secretChunk.wrapper(paste0("codebookDF <- ", dfname))
  
  #Add functions to initial wrapper
  # secretChunk.wrapper(
  #   paste0("codebookMetadataSummarize1 <-function(var, varName){\n  \n  #Calculate basic information\n  variableType=class(var)\n  numMissing=sum(is.na(var))\n  numValid=sum(!is.na(var))\n  numMissing = paste0(numMissing, \" (\", (numMissing/length(var))*100, \"%)\")\n  numUnique=length(unique(var))\n  \n  # Construct dataset of basic information\n  results <- data.frame(Feature=c(\n    \"Class\",\n    \"Valid obs.\",\n    \"NA obs.\",\n    \"Unique\"),\n    Results = c(\n      variableType,\n      numValid,\n      numMissing,\n      numUnique\n    )\n  )\n  return(results)\n}\n"
  #          ,"\n", "codebookDataTableSummarize1 <-function(var){\n  \n  varClass <- class(var)\n  \n  if(varClass %in% c(\"numeric\", \"integer\")){\n    \n    varMean=mean(var)\n    varSD=sd(var)\n    p100 = quantile(var,1)\n    p75 = quantile(var,.75)\n    p50 = quantile(var,.50)\n    p25 = quantile(var,.25)\n    p0 = quantile(var,0)\n    \n    results <- data.frame(Feature=c(\n      \"Mean\",\n      \"Standard Deviation\",\n      \"100% (Max)\",\n      \"75%\",\n      \"50% (Median)\",\n      \"25%\",\n      \"0% (Min)\"), \n      Results = c(\n        varMean,\n        varSD,\n        p100,\n        p75,\n        p50,\n        p25,\n        p0\n      )\n    )\n  }\n  else{\n    x <- as.data.frame(table(var))\n    x$percent <- x$Freq/length(var)\n    x$validPercent <- x$Freq/sum(!is.na(var))\n    results <-x\n  }\n  \n  \n  return(results)\n}"
  #   )
  # )
  # Data Frame Summary _--------------------------------------------------------
  writer(paste(replicate(30, "***"), collapse = ""))
  
  writer("## Data Frame Summary")

    
  chunk.wrapper("kable(CodebookGenerator::summarizeDataFrame(codebookDF), digits=1) %>%\nkable_styling(bootstrap_options = c(\"striped\", \"hover\",\"condensed\"),full_width = F)") 
  
  #writer(pander::pandoc.table.return(CodebookGenerator::summarizeDataFrame(df)))
  
  
  
  # Variable Summary -----------------------------------------------------------
  writer(paste(replicate(30, "***"), collapse = ""))
  writer("## Variable Summary")
  
  #chunk.wrapper("kable(CodebookGenerator::summarizeVariables(codebookDF), digits=1,escape = F) %>%\nkable_styling(bootstrap_options = c(\"striped\", \"hover\",\"condensed\"),full_width = F)") 
  chunk.wrapper("datatable(CodebookGenerator::summarizeVariables(codebookDF), escape=F, rownames=F, fillContainer=F, style=\"bootstrap\")")

  
  #or use data.table
  
  
  #tab <- SummarizeVariables(df)
  #writer(pander::pandoc.table.return(tab))
  
  #Variables <- names()
  #sumTab <- data.frame(labels=labs, Variable=vars, Class=classes, Unique=uniques, pMissing=missings")
  writer(paste(replicate(30, "***"), collapse = ""))
  
  
  ####################################### MAIN VARIABLE LOOP ################################
  #Main variable loop: for each column, write out results
  #writer("## Variables")
  varNames <- names(df)
  for (i in 1:length(df)){
    
    #Get key information
    var <- df[[i]]
    varName <- varNames[i]
   
    # Write header
    writer(paste0("### <a name=\"",varName,"\"></a>", varName))
    
    writer("")
    #write label (if one exists)
    if (!is.null(attr(var, "labels", exact=T))){
      writer(paste ("#### ",attr(var, "label", exact=T)))
    }
    varClass <-determineVarType(varName, df)
    #Write row of data of variable infromation
    row.wrapper_4col(
      writer.p(varClass),
      chunk.wrapper(printVariableOverview(var,varName)),
      chunk.wrapper(printsummarizeVariable(varName,varClass)),
      fig.wrapper(VisualizeVariable_string(var,varName)),
      varClass
    )
    
    #write divider before moving to next variable
    writer(paste(replicate(30, "***"), collapse = ""))
  }
  
  ####################################### FOOTER ################################
  ## Force flush and close connection
  flush(fileConn)
  close(fileConn)
  
  #Knit document
  rmarkdown::render(filepathRmd, 'html_document', outHtml)
  
}
MartinLBarron/CodebookGenerator documentation built on May 25, 2019, 12:23 p.m.