rawdata/import.R

source_dir <- 'dta'
files <- tools::file_path_sans_ext(list.files(source_dir))

df_list <- lapply(files, function(f){
    foreign::read.dta(sprintf("%s/%s.dta", source_dir, f))
})

export <- function(x, n) {# x is the dataset, n its name
  ## now save in proper name and export
  eval(parse(text = sprintf("%s <- x", n)))
  save(list = n,
       file = sprintf("../data/%s.rda", n),
       compress = "bzip2")
}
Map(export, df_list, as.list(files))


## ## -----
## ## Utils
## ## -----
## "%without%" <-  function(x, y) x[!(x %in% y)]

## ## Function for 'extracting' description from naming scheme
## description <- function(x) {
##   if (grepl("^E\\d\\d", x, perl = TRUE)) {
##     ## E - Exercises match
##     sub("^E(\\d\\d)(.*)" ,
##         "Dataset for Exercise \\2, Chapter \\1",
##         x,
##         perl = TRUE)
##   } else if (grepl("^T\\d\\d\\d{1,2}$", x, perl = TRUE)) {
##     ## T - Tables
##     sub("^T(\\d\\d)(\\d{1,2})$" ,
##         "Dataset from Table \\1.\\2",
##         x,
##         perl = TRUE)
##   } else if (grepl("^X\\d\\d\\d{1,2}$", x, perl = TRUE)) {
##     ## X - Examples
##     sub("^X(\\d\\d)(\\d{1,2})$" ,
##         "Dataset for Example \\2, Chapter \\1",
##         x,
##         perl = TRUE)
##   } else if (grepl("^A\\d\\d[A-Z]$", x, perl = TRUE)) {
##     ## A - Appendixes
##     sub("^A(\\d\\d)([A-Z])$" ,
##         "Dataset for Appendix \\2, Chapter \\1",
##         x,
##         perl = TRUE)
##   } else if (grepl("^S\\d\\d\\d{1,2}$", x, perl = TRUE)) {
##     ## S - Sections
##     sub("^S(\\d\\d)(\\d{1,2})$" ,
##         "Dataset for Section \\1.\\2",
##         x,
##         perl = TRUE)
##   } else if (grepl("^Q\\d\\d-\\d-\\d$", x, perl = TRUE)) {
##     ## Q - Equations
##     sub("^Q(\\d\\d)-(\\d)-(\\d)$" ,
##         "Dataset for Equation \\1.\\2.\\3",
##         x,
##         perl = TRUE)
##   } else
##     ## When anything else fails ...
##     sprintf("%s data", x)
## }

## ## ------
## ## Status
## ## ------
## TODO <- c("")
## processed <- list.files("cleaned") %without% TODO

## ## ------
## ## Import
## ## ------
## import.fun <- function(path) {
##   cat("Importing", path , "\n")
##   res <- read.table(file = path, header = TRUE, fill = TRUE)
##   res <- res[, names(res) %without% "id"]
##   ## filenam from cleaned directory
##   fileName <- strsplit(path, "/")[[1]][2]
##   ## Exceptions to data.frame of fully numeric data here ...
##   if (fileName == "X161") {
##     res <- unname(as.matrix(res))
##   } else if (fileName == "E07C") {
##     res$x4 <- factor(res$x4, levels = c(-1,1), labels = c("No","Yes"))
##   } else if (fileName == "T032") {
##     res$z <- factor(res$z, levels = 0:1, labels = c("Virgo","Coma"))
##   } else if (fileName == "E14B") {
##     res <- within(res, {
##       tiller <- factor(tiller)
##       type <- factor(type, levels = c("w","c"), labels = c("Waldron","Ciano"))
##       nrate <- factor(nrate, levels = c(0,50,270))
##     })
##   } else if (fileName == "E14C") {
##     res$operator <- factor(res$operator, levels = 1:3)
##   } else if (fileName == "E14R") {
##     res <- within(res, {
##       boot <- factor(boot, levels = c(-1,1), labels = c("A","B"))
##       temperature <- factor(temperature)
##       subject <- factor(subject)
##     })
##   } else if (fileName == "E23E") {
##     res <- within(res, {
##       a <- factor(a, levels = c(-1,1), labels = c("No","Yes"))
##       b <- factor(b, levels = c(-1,1), labels = c("No","Yes"))
##       c <- factor(c, levels = c(-1,1), labels = c("No","Yes"))
##       d <- factor(d, levels = c(-1,1), labels = c("No","Yes"))
##     })
##   } else if (fileName == "E23F") {
##     res <- within(res, {
##       group <- factor(group)
##     })
##   } else if (fileName %in% c("E23G-1","E23G-2")) {
##     res <- within(res, {
##       level <- factor(level)
##       tube <- factor(tube)
##     })
##   } else if (fileName == "E23H") {
##     res <- within(res, {
##       matings <- factor(matings, levels = 1:4,
##                         labels = c("DD","DS","SD","SS"))
##     })
##   }
##   ## END Exceptions
##   ## Now, attach a bit of metadata (useful in what follows) to the object
##   attr(res, "fileName") <- fileName
##   dfName <- gsub("-","_", fileName)
##   dfName <- paste0("ds", dfName)
##   dfName <- tolower(dfName)
##   attr(res, "dfName") <- dfName
##   ## Description of the data
##   attr(res, "dfDescription") <- description(fileName)
##   res
## }
## pathList <- as.list( paste0("cleaned", "/", processed) )
## dfList <- lapply(pathList, import.fun)

## ## -------------------
## ## making roxygen2 doc
## ## -------------------
## doc.fun <- function(x){
  
##   dfName <- attr(x, "dfName")
##   ## Header section
##   Header <- c(
##     sprintf("#' %s", attr(x, "dfDescription") ),
##     "#' ",
##     sprintf("#' %s", attr(x, "dfDescription")),
##     "#' "
##   )
##   ## Format/Describe section (if data.frame give names of columns,
##   ## otherwise if matrix list only ncol and nrow.
##   makeRdItems <- function(item){
##     unlist(lapply(item, function(x) sprintf("#' \\item{%s}{}", x)))
##   }
##   Format <- sprintf("#' @format A \\code{%s} with %s rows and %s %s%s",
##                     class(x), nrow(x), ncol(x),
##                     ifelse(is.matrix(x), "columns", "variables"),
##                     ifelse(is.matrix(x), ".", ":"))
##   if (class(x) == "data.frame")
##     Format <- c(Format , "#' \\describe{", makeRdItems(names(x)), "#' }")

##   ## The source book
##   Source <- paste0("#' @source Draper, N.R., Smith, H., (1998) ",
##                    "Applied Regression Analyis, 3rd ed., ",
##                    "New York: Wiley")
##   ## Simple examples
##   Examples <- c("#' @examples",
##                 sprintf("#' %s", dfName))
##   ## and the name of the dataset as string
##   Dataset <- sprintf("'%s'", dfName)
##   ## write all to the appropriate .R file
##   writeLines(text = c(Header, Format, Source, Examples, Dataset),
##              con = sprintf("../R/%s.R", dfName))
## }
## lapply(dfList, doc.fun)

## ## ---------------
## ## Standard export
## ## ---------------
## export.fun <- function(x) {
##   dfName <- attr(x, "dfName")
##   ## remove used metadata
##   myAttrs <- c("fileName", "dfName", "dfDescription")
##   attributes(x)[myAttrs] <- NULL
##   ## now save in proper name and export
##   eval(parse(text = sprintf("%s <- x", dfName)))
##   save(list = c(dfName),
##        file = sprintf("../data/%s.rda", dfName),
##        compress = "bzip2")
## }
## lapply(dfList, export.fun)
lbraglia/ala2e documentation built on Feb. 1, 2022, 12:29 a.m.