R/setupfile.R

Defines functions setupfile

Documented in setupfile

setupfile <- function(lbls = "", type="all", csv = "", miss = "", uniqueid = "",
                      SD = "", delimiter = ",", OS = "windows", outfile = "",
                      forcenum = c(""), ...) {
    
    # change the intrnlbls argument into a very unique name, just in case the
    # list object in the external R file(s) are named "intrnlbls" as well
    
    intrnlbls <- lbls
    intrnlbls_objname <- deparse(substitute(lbls))
    rm(lbls)
    
    other.args <- list(...)
    
    if ("pathIsFolder" %in% names(other.args)) {
        pathIsFolder <- other.args$pathIsFolder
    }
    
    saveFile <- FALSE
    if ("saveFile" %in% names(other.args)) {
        saveFile <- other.args$saveFile
    }
    
    
    if (OS == "") {
        OS <- Sys.info()[['sysname']]
    }
    
    getNofDecimals <- function(number) {
        decs <- nchar(format(abs(number),scientific=FALSE))-(trunc(log10(max(1,trunc(abs(number)))))+1)-1
        return(ifelse(decs < 0, 0, decs))
    }
    
    
    
    if (all(is.character(intrnlbls))) { # all() just in case someone provides a vector by mistake
        
        if (length(intrnlbls) > 1) {
            cat("\n")
            stop("The lbls argument should contain a single path to the list object.\n\n", call. = FALSE)
        }
        
        xmlfiles <- FALSE
        
        labelist <- treatPath(intrnlbls, type = "R")
        
        if (length(labelist) == 1) {
            labelist <- treatPath(intrnlbls, type = "XML")
            if (length(labelist) == 1) {
                cat("\n")
                stop(gsub("XML", "R or .XML", labelist), call. = FALSE)
            }
            else {
                xmlfiles <- TRUE
            }
        }
        else if (length(labelist$fileext) == 1) {
            if (labelist$fileext == "XML") {
                xmlfiles <- TRUE
            }
        }
        
        pathIsFolder <- length(labelist$fileext) > 1
        
        if (!file.exists("Setup files")) {
            dir.create("Setup files")
        }
        
        csvdatadir <- FALSE # by default
        
        
        # now trying to assess what the csv argument is
        # it can be an object containing csv data, or
        # it can be a string containing a path to the data
        
        
        
        if (all(is.character(csv))) {
            if (csv != "") {
                if (length(csv) > 1) {
                    cat("\n")
                    stop("The csv argument should contain a single path to the list object.\n\n", call. = FALSE)
                }
                
                csvlist <- treatPath(csv, type = "csv")
                if (length(csvlist) > 1) {
                    datadir <- csvlist$completePath
                    csvdatadir <- TRUE
                }
                else {
                    cat("\nNOTE:", csvlist)
                          # since "csvlist" is now an error message from treatPath()
                }
            }
            else {
                # it's important to differentiate between "data" and "Data", for OSs that are case sensitive
                csvdatadir <- file.exists(datadir <- file.path(labelist$completePath, "data"))
                datathere <- csvdatadir
                csvdatadir <- file.exists(datadir <- file.path(labelist$completePath, "Data"))
                
                if (csvdatadir) {
                    csvlist <- treatPath(datadir, type = "csv")
                    if (length(csvlist) == 1) {
                        csvdatadir <- FALSE
                        cat(paste("\nNOTE: There is a ", ifelse(datathere, "data", "Data"), " directory within ",
                            labelist$completePath, ". "), csvlist)
                    }
                }
            }
        }
        
        
        if (csvdatadir) {
            csvfiles <- csvlist$files
            csvnames <- csvlist$filenames
            csvext <- csvlist$fileext
            
            cat ("Processing (including data directory):\n")
        }
        else {
            
            if (is.data.frame(csv)) {
                if (length(labelist$files) > 1) {
                    cat("\n")
                    stop("There are multiple files containing labels and only one csv file provided.\n\n", call. = FALSE)
                }
            }
            
            cat("Processing (no data directory):\n")
        }
        
        
        # when pathIsFolder, it is unlikely that uniqueid is the same for all data files
        # so set uniqueid to "" to ignore it.
        
        # if (pathIsFolder) {
        #     uniqueid <- ""
        # }
        
        for (i in seq(length(labelist$files))) {
            
            if (xmlfiles) {
                intrnlblsObject <- getMetadata(file.path(labelist$completePath, labelist$files[i]), fromsetupfile = TRUE, saveFile = saveFile)
            }
            else {
                aa <- ls()
                
                tryCatch(eval(parse(file.path(labelist$completePath, labelist$files[i]))), error = function(x) {
                    stop(paste("\nThere is an error associated with the file \"", labelist$files[i], "\", see below:\n       ", gsub("Error in ", "", as.character(x)), sep=""), call. = FALSE)
                })
                
                bb <- ls()
                bb <- bb[-which(bb == "aa")]
            }
            
            currentdir <- getwd()
            
            if (csvdatadir) {
                
                if (labelist$filenames[i] %in% csvnames) {
                    cat(labelist$filenames[i], "\n")
                    position <- match(labelist$filenames[i], csvnames)
                    
                    
                    for (j in seq(length(position))) {
                        
                        if (csvext[position[j]] %in% c("CSV", "CSV.GZ")) {
                                                                                                   # delimiter is already set from the function's formal argument
                            csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), sep = delimiter, header = TRUE, as.is = TRUE)
                            
                            if (ncol(csvreadfile) == 1) {
                                delimiter <- getDelimiter(file.path(datadir, csvfiles[position[j]]))
                                
                                if (delimiter == "unknown") {
                                    stop(paste("Unknown column separator for the file", csvfiles[position[j]],
                                               "\nShould be either \",\" or \";\" or tab separated.\n\n"), call. = FALSE)
                                }
                                
                                csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), sep = delimiter, header = TRUE, as.is = TRUE)
                            }
                            
                            
                            if (!xmlfiles) {
                                intrnlblsObject <- get(setdiff(bb, aa))
                            }
                            
                            tryCatch(Recall(intrnlblsObject, type = type, miss = miss, csv = csvreadfile, uniqueid = uniqueid, SD = SD,
                                            delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
                                error = function(x) {
                                    # if no sink() is needed, an invisible warning message will be returned
                                    tryCatch(sink(), warning=function(y) return(invisible(y)))
                                    setwd(currentdir)
                                    cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
                                    cat(as.character(x))
                                })
                        }
                    }
                }
                else {
                    cat(labelist$filenames[i], "(no .csv file)", "\n")
                    if (!xmlfiles) {
                        intrnlblsObject <- get(setdiff(bb, aa))
                    }
                    
                    tryCatch(Recall(intrnlblsObject, type = type, miss = miss, uniqueid = uniqueid, SD = SD, 
                                    delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
                        error = function(x) {
                            tryCatch(sink(), warning=function(y) return(invisible(y)))
                            setwd(currentdir)
                            cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
                            cat(as.character(x))
                        })
                }
            }
            else {
                cat(labelist$filenames[i], "\n")
                
                if (is.data.frame(csv)) {
                    if (length(labelist$filenames) == 1) {
                        if (!xmlfiles) {
                            intrnlblsObject <- get(setdiff(bb, aa))
                        }
                        tryCatch(Recall(intrnlblsObject, type = type, miss = miss, csv = csv, uniqueid = uniqueid, SD = SD,
                                        delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
                        error = function(x) {
                            tryCatch(sink(), warning=function(y) return(invisible(y)))
                            setwd(currentdir)
                            cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
                            cat(as.character(x))
                        })
                    }
                }
                else {
                    # there is really no csv data
                        if (!xmlfiles) {
                            intrnlblsObject <- get(setdiff(bb, aa))
                        }
                        tryCatch(Recall(intrnlblsObject, type = type, miss = miss, uniqueid = uniqueid, SD = SD, 
                                        delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
                        error = function(x) {
                            tryCatch(sink(), warning=function(y) return(invisible(y)))
                            setwd(currentdir)
                            cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
                            cat(as.character(x))
                        })
                }
            }
            
            if (!xmlfiles) {
                rm(list = c(eval(setdiff(bb, aa)), "bb", "aa"))
            }
        }
        
        # if (pathIsFolder & (type == "all" | type == "R")) {
        #     cat("\nMissing values not treated for R when using an entire directory.\n")
        #     cat("Run individual commands and specify the uniqueid for each dataset.\n")
        # }
        
        cat("\nSetup files created in:\n", file.path(currentdir, "Setup files"), "\n\n", sep="")
        
        return(invisible())
    }
    
    
    
    
    csvlist <- NULL # initialization
    if (all(is.character(csv))) {
        if (all(csv != "")) {
            if (length(csv) > 1) {
                cat("\n")
                stop("The csv argument should contain a single path to the list object.\n\n", call. = FALSE)
            }
            
            csvlist <- treatPath(csv, type = "CSV")
            if (length(csvlist) > 1) {
                # no error
                if (length(csvlist$files) > 1) {
                    cat("\n")
                    stop("There is only one object containing labels and multiple csv files.\n\n", call. = FALSE)
                }
            }
            else {
                # There is a single string returned by treatPath(), with an error message
                cat("\nNOTE:", csvlist)
                csv <- "" # back to the default value
            }
        }
    }
    
    
    
    if (is.null(names(intrnlbls)) | !all(names(intrnlbls) %in% c("varlab", "vallab"))) {
        cat("\n")
        stop("The object does not contain labels for variables and/or values.\n\n", call. = FALSE)
    }
    
    if (!(type %in% c("SPSS", "Stata", "SAS", "R", "all"))) {
        cat("\n")
        stop("The argument <type> can only be: \"SPSS\", \"Stata\", \"SAS\", \"R\", or \"all\".\n\n", call. = FALSE)
    }
    
    enter <- getEnter(OS=OS)
    
    varnames <- names(intrnlbls$varlab)
    maxchars <- max(nchar(varnames))
    varcheck <- rep(0, length(varnames))
    formats <- FALSE
    
    csv_is_df <- is.data.frame(csv)
    
    csv_is_path <- FALSE
    if (length(csv) == 1) { # csv is a character vector of length 1, i.e. a path 
        if (is.character(csv)) {
            if (csv != "") {
                csv_is_path <- TRUE
            }
        }
    }
    
    if (csv_is_df | csv_is_path) {
        
        if (!is.null(csvlist)) {
                                                                                           # delimiter is already set from the function's formal argument
            csvreadfile <- read.csv(file.path(csvlist$completePath, csvlist$files[1]), sep = delimiter, header = TRUE, as.is=TRUE)
            
            if (ncol(csvreadfile) == 1) {
            
                delimiter <- getDelimiter(file.path(csvlist$completePath, csvlist$files[1]))
                
                if (delimiter == "unknown") {
                    stop(paste("Unknown column separator for the file", csvlist$files[1],
                               "\nShould be either \",\" or \";\" or tab separated.\n\n"), call. = FALSE)
                }
                
                csvreadfile <- read.csv(file.path(csvlist$completePath, csvlist$files[1]), sep = delimiter, header = TRUE, as.is=TRUE)
            }
            
            # cat("\n")
            # cat("Found \"", csvlist$files[1], "\" in the directory \"", csvlist$completePath, "\". Using that as the .csv file.\n\n", sep="")
            
            csv <- csvreadfile
        }
        
        csvnames <- names(csv)
        csvformats <- sasformats <- rep("", length(csvnames))
        if (!is.data.frame(csv)) {
            cat("\n")
            stop("The csv file should be a data frame.\n\n", call. = FALSE)
        }
        
        gofurther <- TRUE
        
        plusnames <- setdiff(toupper(csvnames), toupper(names(intrnlbls$varlab)))
        if (length(plusnames) > 0) {
            if (length(plusnames) == length(csvnames)) {
                cat("    None of the variables in the .csv file have metadata information.\n",
                    "    (perhaps the .csv file doesn't have the variable names in the first row?)\n", sep="")
                gofurther <- FALSE
            }
            else {
                cat("    There is no metadata information for the following variables in the .csv file:\n")
                plusnames <- strwrap(paste(plusnames, collapse=", "), 75)
                for (pnms in plusnames) {
                    cat("       ", pnms, "\n")
                }
                cat("\n")
            }
        }
        
        
        plusnames <- setdiff(toupper(names(intrnlbls$varlab)), toupper(csvnames))
        if (length(plusnames) > 0) {
            cat("    There is metadata information for the following variables, but *not* in the .csv file:\n")
            plusnames <- strwrap(paste(plusnames, collapse=", "), 75)
            for (pnms in plusnames) {
                cat("       ", pnms, "\n")
            }
            
            if (gofurther) {
                cat("       ", ifelse(length(plusnames) == 1, "This variable", "These variables"), "will be omitted.\n")
            }
            else {
                cat("\n")
            }
        }
        
        nrowscsv <- nrow(csv)
        
        
        
        if (gofurther) {
            
            # filter the variable labels
            intrnlbls$varlab <- intrnlbls$varlab[which(toupper(names(intrnlbls$varlab)) %in% toupper(csvnames))]
            
            # then filter the corresponding value labels
            intrnlbls$vallab <- intrnlbls$vallab[which(toupper(names(intrnlbls$vallab)) %in% toupper(csvnames))]
            
            varnames <- names(intrnlbls$varlab)
            maxchars <- max(nchar(varnames))
            varcheck <- rep(0, length(varnames))
            
            printNOTE <- FALSE
            
            for (i in seq(ncol(csv))) {
                vartype <- "numeric"
                decimals <- FALSE
                
                tempvar <- csv[, i]
                
                if (is.character(tempvar)) {
                    vartype <- "string"
                    
                    # %in% performs better than == when NAs are present
                    # any(tempvar == ".") generates an error, while
                    # any(tempvar %in% ".") doesn't generate any error
                    
                    if (any(tempvar %in% ".")) { # Stata type empty cells
                        if (sum(tempvar %in% "") == 0) {
                            # test if there are pure blank cells
                            # if none than a "." must be representing a Stata type missing
                            printNOTE <- TRUE
                            tempvar[tempvar %in% "."] <- NA
                        }
                    }
                }
                
                
                ####
                # the following code is needed just in case there are multibyte characters somewhere
                # that prevents nchar() from running properly (if so, it generates an error)
                nofchars <- tryCatch(nchar(as.character(tempvar)), error = function(x) return(x))
                
                if (is.list(nofchars)) {
                    # if an error is generated, nchar()'s output is a list
                    # a multibyte error should be the only one that nchar() throws
                    # don't know what other error would be possible with nchar()
                    
                    tempvar2 <- tempvar
                    error <- unlist(strsplit(nofchars[[1]], split=" "))
                    tempvar2 <- tempvar2[-as.numeric(error[length(error)])]
                    
                    cat("    There are multibyte characters in this data (ex. ", csvnames[i], ", position ", error[length(error)], ")\n", sep="")
                    
                    multibyte <- TRUE
                    
                    while(multibyte) {
                        
                        nofchars <- tryCatch(nchar(as.character(tempvar2)), error = function(x) return(x))
                        if (is.list(nofchars)) {
                            error <- unlist(strsplit(nofchars[[1]], split=" "))
                            tempvar2 <- tempvar2[-as.numeric(error[length(error)])]
                        }
                        else {
                            multibyte <- FALSE
                        }
                    }
                    
                    if (length(nofchars) == 0) {
                        nofchars <- 1
                    }
                    else {
                        nofchars[is.na(tempvar2)] <- 0
                    }
                }
                else {
                    nofchars[is.na(tempvar)] <- 0
                }
                ####
                
                maxvarchar <- max(nofchars)
                
                if (toupper(csvnames[i]) %in% names(intrnlbls$vallab)) {
                    if (is.character(intrnlbls$vallab[[toupper(csvnames[i])]])) {
                        vartype <- "string"
                        # gofurther <- FALSE
                        maxvarchar <- max(maxvarchar, nchar(intrnlbls$vallab[[toupper(csvnames[i])]]))
                    }
                }
                
                if (all(is.na(tempvar))) { # completely empty variable
                    vartype <- "missing"
                }
                
                if (vartype == "numeric") {
                    
                    tempvar2 <- tempvar[!is.na(tempvar)]
                    
                    if (any(tempvar2 - floor(tempvar2) > 0)) { # has decimals
                        decimals <- TRUE
                    }
                    else {
                        if (toupper(csvnames[i]) %in% names(intrnlbls$vallab)) {
                            if (is.numeric(intrnlbls$vallab[[toupper(csvnames[i])]])) {
                                maxvarchar <- max(maxvarchar, nchar(length(intrnlbls$vallab[[toupper(csvnames[i])]])))
                            }
                        }
                    }
                    
                    if (decimals) {
                        csvformats[i] <- paste("F", maxvarchar, ".2", sep="")
                    }
                    else {
                        csvformats[i] <- paste("F", maxvarchar, ".0", sep="")
                    }
                }
                else if (vartype == "string") {
                    sasformats[i] <- "$"
                    csvformats[i] <- paste("A", maxvarchar, sep="")
                }
                else { # all the values are missing
                    csvformats[i] <- paste("F1.0", sep="")
                }
                
                varcheck[i] <- 1
            }
            
            if (printNOTE) {
                cat("    NOTE: some variable(s) in this file have a \".\" sign to represent a missing.\n")
                cat("    The .csv file might not import properly in some software.\n\n")
            }
            
            formats <- all(csvformats != "")
        }
        
        ## TO check if any of the existing metadata variables is not found in the CSV data file
    }
    
    stringvars <- lapply(intrnlbls$vallab, function(x) {
        all(is.character(x[[1]]))
    })
    
    
    if (identical(outfile, "")) {
        if (grepl("\"", intrnlbls_objname)) {
            outfile <- readline("Name for the setup file:\n")
        }
        else {
            outfile <- intrnlbls_objname
        }
    }
    
    uniqueList <- lapply(unique(intrnlbls$vallab), function(uniques) {
        vars <- sapply(names(intrnlbls$vallab),
                     function(x) {
                         ifelse(length(intrnlbls$vallab[[x]]) == length(uniques),
                                all(names(intrnlbls$vallab[[x]]) == names(uniques)), FALSE)
                     })
        return(names(vars[vars]))
    })
    
    
    # initiate an empty list
    numerical_with_strings <- list()
    
    if (!identical(forcenum, c(""))) {
        
        if (!all(forcenum %in% csvnames)) {
            cat("The following <numerical> variable names were not found in the data:\n",
                "\"", paste(forcenum[!forcenum %in% csvnames], collapse="\", \""), "\"\n\n", sep="")
        }
        
        which_numerical <- csvnames[csvnames %in% forcenum]
        
        if (length(which_numerical) > 0) {
            
            # test which values are not numbers in the respective variables
            # has_strings  <- lapply(csv[, which_numerical, drop = FALSE], function(x) {
            #    grep("^-?[0-9]+([.]?[0-9]+)?$", x, perl=TRUE, invert=TRUE)
            # })
            
            has_strings  <- lapply(csv[, which_numerical, drop = FALSE], function(x) {
                which(is.na(suppressWarnings(as.numeric(x))))
            })
            
            numerical_with_strings  <- has_strings[unlist(lapply(has_strings, length)) > 0]
        }
    }
    
    
    if (type == "SPSS" | type == "all") {
        intrnlbls2 <- intrnlbls
        printMISSING <- FALSE
        
        if (!file.exists("Setup files")) {
            dir.create("Setup files")
        }
        
        if (!file.exists(file.path("Setup files", "SPSS"))) {
            dir.create(file.path("Setup files", "SPSS"))
        }
        
        
        currentdir <- getwd()
        setwd(file.path("Setup files", "SPSS"))
        sink(ifelse(length(grep("\\.sps", outfile)) > 0, outfile, paste(outfile, ".sps", sep="")))
        
        cat("* ------------------------------------------------------------------------------", enter, enter,
            "* --- CONFIGURATION SECTION - START ---", enter, enter, enter, sep="")

        if (formats) {
            cat("* The following command should contain the complete path and", enter,
                "* name of the .csv file to be read (e.g. \"C:/CIS 2008/Data/ALL.csv\")", enter,
                "* Change CSV_DATA_PATH to your filename, below:", enter, enter,
                "FILE HANDLE csvpath /NAME=\"CSV_DATA_PATH\" .", enter, enter, enter, sep="")
        }
        
        cat("* The following command should contain the complete path and", enter,
            "* name of the .sav file to be saved (e.g. \"C:/CIS 2008/Data/ALL.sav\")", enter,
            "* Change SAV_DATA_PATH to your filename, below:", enter, enter,
            "FILE HANDLE savfile /NAME=\"SAV_DATA_PATH\" .", enter, enter, enter,
            "* --- CONFIGURATION SECTION -  END  ---", enter, enter,
            "* ------------------------------------------------------------------------------", enter, enter, enter, enter,
            "* There should be nothing to change below this line", enter,                                                            
            "* ------------------------------------------------------------------------------", enter, enter, enter, enter, sep="")
                  
        if (formats) {
            cat("* -------------- Start Definition Macro --------------", enter, enter,
                "SET LOCALE = \"English\" .", enter,
                "SHOW LOCALE .", enter, enter, # SET DECIMAL = COMMA . * (might be another idea)
                "* --------------     Read Raw Data      --------------", enter, enter,
                "GET DATA", enter,
                " /TYPE=TXT", enter,
                " /FILE=csvpath", enter,
                " /DELCASE=LINE", enter,
                " /ARRANGEMENT=DELIMITED", enter,
                " /DELIMITERS='", ifelse(delimiter == "\t", "\\t", delimiter), "'", enter,
                " /QUALIFIER='\"'", enter,
                " /FIRSTCASE=2", enter,
                " /IMPORTCASE=ALL", enter,
                " /VARIABLES=", enter, sep="")
            
            maxcharcsv <- max(nchar(csvnames))
            for (i in seq(length(csvnames))) {
                cat(toupper(csvnames[i]), paste(rep(" ", maxcharcsv - nchar(csvnames[i]) + 1), collapse=""), csvformats[i], sep="")
                if (i == length(csvnames)) {
                    cat(" .")
                }
                cat(enter)
            }
            cat("CACHE .", enter, "EXECUTE .", enter, enter,
                "* ------------------------------------------------------------------------------", enter, enter, enter, sep="")
        }
        
        
        if (any(unlist(stringvars))) {
            cat("* --- Recode string variables which have labels, to numeric variables ---", enter, enter, enter)
            for (i in names(stringvars)) {
                if (stringvars[[i]]) {
                    
                    precommand <- paste("RECODE ", toupper(i), " ", sep="")
                    postcommand <- "(\""
                    
                    command <- paste(precommand, postcommand, sep = "")
                    vals <- seq(length(intrnlbls2$vallab[[i]]))
                    for (j in vals) {
                        postcommand <- paste(postcommand, intrnlbls2$vallab[[i]][j], "\"", " = ", j, sep = "")
                        command <- paste(command, intrnlbls2$vallab[[i]][j], "\"", " = ", j, sep = "")
                        if (j == length(intrnlbls2$vallab[[i]])) {
                            command <- paste(command, ") INTO TEMPVRBL .", enter, "EXECUTE .", enter, enter, sep="")
                        }
                        else {
                            if (nchar(postcommand) > 70) {
                                postcommand <- paste(paste(rep(" ", nchar(precommand)), collapse=""), "(\"", sep="")
                                command <- paste(command, ")", enter, paste(rep(" ", nchar(precommand)), collapse=""), "(\"", sep="")
                            }
                            else {
                                command <- paste(command, ") (\"", sep="")
                            }
                        }
                    }
                    
                    cat(command)
                    
                    cat("ALTER TYPE ", toupper(i), "(F", max(nchar(vals)),".0) .", enter, enter,
                        "COMPUTE ", toupper(i), " = TEMPVRBL .", enter, "EXECUTE .", enter, enter, sep="")
                    cat("DELETE VARIABLES TEMPVRBL .", enter, "EXECUTE .", enter, enter, sep="")
                    
                    names(vals) <- names(intrnlbls2$vallab[[i]])
                    intrnlbls2$vallab[[i]] <- vals
                }
            }
            cat(enter)
        }
        
        
        
        if (length(numerical_with_strings) > 0) {
            cat("* --- Force variables as numeric --- ", enter, enter, sep="")
            
            for (i in names(numerical_with_strings)) {
                
                tempvar <- csv[, i]
                
                ##### code to recode and transform into numeric
                cat("RECODE ", toupper(i), " (\"", paste(sort(unique(tempvar[numerical_with_strings[[i]]])), collapse = "\" = \"\") (\""), "\" = \"\") .", enter,
                    "EXECUTE .", enter, enter, sep="")
                
                tempvar <- tempvar[-numerical_with_strings[[i]]]
                tempvar <- as.numeric(tempvar)
                
                cat ("ALTER TYPE ", toupper(i), " (F", max(nchar(tempvar)), ".", ifelse(any(tempvar - floor(tempvar) > 0), 2, 0), ") .", enter,
                     "EXECUTE .", enter, enter, sep="")
                
            }
            cat(enter)
        }
        
        
        
        cat("* --- Add variable labels --- ", enter, enter,
            "VARIABLE LABELS", enter, sep="")
        
        for (i in seq(length(varnames))) {
            cat(toupper(varnames[i]), paste(rep(" ", maxchars - nchar(varnames[i])), collapse=""), " \"", intrnlbls2$varlab[[i]][1], "\"", sep="")
            if (i == length(varnames)) {
                cat(" .", enter, "EXECUTE .", sep="")
            }
            cat(enter)
        }
        cat(enter, enter, sep="")
        
        cat("* --- Add value labels --- ", enter, enter,
            "VALUE LABELS", enter, sep="")
        
        
        for (i in seq(length(uniqueList))) {
            n <- uniqueList[[i]][1]
            
            cat(splitrows(uniqueList[[i]], enter, 80), enter, sep="")
            
            #if (all(is.character(intrnlbls2$vallab[[n]]))) {
            #    cat(paste(paste("\"", intrnlbls2$vallab[[n]], "\" \"", names(intrnlbls2$vallab[[n]]), "\"", sep=""), collapse="\n"))
            #}
            #else {
                cat(paste(paste(intrnlbls2$vallab[[n]], " \"", names(intrnlbls2$vallab[[n]]), "\"", sep=""), collapse=enter))
            #}
            if (i == length(uniqueList)) {
                cat(" .", enter, "EXECUTE .", enter, sep="")
            }
            else {
                cat(enter, "/", enter, sep="")
            }
        }
        cat(enter, enter, sep="")
        
        
        if (!identical(miss, "")) {
        
            if (is.numeric(miss)) {
                missvars <- lapply(intrnlbls2$vallab, function(x) !is.na(match(miss, x)))
                withmiss <- as.vector(unlist(lapply(missvars, any)))
                missvals <- unique(lapply(missvars[withmiss], function(x) miss[x]))
            }
            else {
                missvars <- lapply(intrnlbls2$vallab, function(x) !is.na(match(names(x), miss)))
                withmiss <- as.vector(unlist(lapply(missvars, any)))
                missvals <- unique(lapply(which(withmiss), function(x) as.vector(intrnlbls2$vallab[[x]][missvars[[x]]])))
            }
            
            msngs <- unique(unlist(missvals))
            
            withmiss2 <- which(withmiss)
            
            if (length(missvals) > 0) {
                uniqueMissList <- list()
                for (i in seq(length(missvals))) {
                    vars <- NULL
                    for (j in withmiss2) {
                        y <- intrnlbls2$vallab[[j]][missvars[[j]]]
                        if (all(y %in% missvals[[i]])) {
                            vars <- c(vars, names(intrnlbls2$vallab)[j])
                        }
                    }
                    uniqueMissList[[i]] <- vars
                }
                
                if (length(uniqueMissList) > 0) {
                
                    cat("* --- Add missing values --- ", enter, enter,
                        "MISSING VALUES", enter, sep="")
                        
                    for (i in seq(length(uniqueMissList))) {
                        if (length(missvals[[i]]) < 4) {
                            cat(splitrows(toupper(uniqueMissList[[i]]), enter, 80))
                            cat(" (", paste(missvals[[i]], collapse=", ") , ")", sep="")
                        }
                        else {
                            absrange <- abs(range(missvals[[i]]))
                            
                            if (all(missvals[[i]] < 0)) {
                                cat(splitrows(toupper(uniqueMissList[[i]]), enter, 80))
                                cat(" (LOWEST THRU ", max(missvals[[i]]) , ")", sep="")
                            }
                            else {
                                # check if the missing values range doesn't contain any other (non-missing) values
                                checklist <- list()
                                for (mv in uniqueMissList[[i]]) {
                                    allvalues <- intrnlbls2$vallab[[mv]]
                                    nonmiss <- allvalues[!allvalues %in% missvals[[i]]]
                                    checklist[[mv]] <- any(nonmiss %in% seq(min(missvals[[i]]), max(missvals[[i]])))
                                }
                                
                                checklist <- unlist(checklist)
                                
                                # print(checklist)
                                
                                if (any(checklist)) {
                                    # at least one variable has a non-missing value within the range of the missing values
                                    printMISSING <- TRUE
                                    
                                    # now trying to see if at least some of the variables can be "rescued"
                                    if (any(!checklist)) {
                                        ###
                                        # poate merge...? DE TESTAT
                                        cat(splitrows(names(checklist)[!checklist], enter, 80))
                                        # cat(paste(names(checklist)[!checklist], collapse=", "))
                                        ###
                                        cat(paste(" (", min(missvals[[i]]), " TO ", max(missvals[[i]]) , ")", enter, sep=""))
                                        checklist <- checklist[checklist]
                                    }
                                    
                                    ###
                                    # poate merge...? DE TESTAT
                                    cat(splitrows(names(checklist), enter, 80))
                                    # cat(paste(names(checklist), collapse=", "))
                                    ###
                                    cat(paste(" (", paste(missvals[[i]][1:3], collapse=", ") , ")", sep=""))
                                    cat(ifelse(i == length(uniqueMissList), " .", ""))
                                    cat("  * more than three distinct missing values found")
                                }
                                else {
                                    cat(splitrows(toupper(uniqueMissList[[i]]), enter, 80))
                                    cat(" (", min(missvals[[i]]), " TO ", max(missvals[[i]]) , ")", sep="") 
                                }
                            }
                        }
                        cat(ifelse(i == length(uniqueMissList), " .", ""))
                        cat(enter)
                    }
                    cat(enter, enter, sep="")
                }
            }
        }
        
        cat(paste("* --- Save the .sav file --- ", enter, enter,
                  "SAVE OUTFILE=savfile", enter, "/KEEP", enter, sep=""))
        
        if (formats) {
            for (n in csvnames) {
                cat(toupper(n), enter, sep="")
            }
        }
        else {
            for (n in names(intrnlbls2$varlab)) {
                cat(toupper(n), enter, sep="")
            }
        }
        
        cat("  /COMPRESSED .", enter, "EXECUTE .", enter, sep="")
        
        # finish writing and close the .sps file
        sink()
        
        if (printMISSING) {
            # this would be printed on the screen
            cat("    For some variables, more than 3 distinct missing values were found.\n")
            cat("    Only the first three were used.\n\n")
        }
        
        setwd(currentdir)
        
    }
    
    
    
    if (type == "Stata" | type == "all") {
        intrnlbls2 <- intrnlbls
        
        if (!file.exists("Setup files")) {
            dir.create("Setup files")
        }
        
        if (!file.exists(file.path("Setup files", "Stata"))) {
            dir.create(file.path("Setup files", "Stata"))
        }
        
        currentdir <- getwd()
        setwd(file.path("Setup files", "Stata"))
        sink(ifelse(length(grep("\\.do", outfile)) > 0, outfile, paste(outfile, ".do", sep="")))
        
        cat("/* Initialization commands */", enter,
            "clear", enter,
            "capture log close", enter,
            "set more off", enter,
            "version 12.0", enter,
            "set linesize 250", enter,
            "set varabbrev off", enter,
            "set memory 1G // not necessary in Stata 12",
            ifelse(SD == ";", "\n#delimit ;", ""), enter, enter, enter,
            "* ----------------------------------------------------------------------------",
            ifelse(SD == ";", " ;", ""), enter, enter,
            "* --- CONFIGURATION SECTION - START ---",
            ifelse(SD == ";", "                                        ;", ""), enter, enter,
            "* The following command should contain the complete path and",
            ifelse(SD == ";", "                   ;", ""), enter,
            "* name of the Stata log file.",
            ifelse(SD == ";", "                                                  ;", ""), enter,
            "* Change LOG_FILENAME to your filename, below:",
            ifelse(SD == ";", "                                 ;", ""), enter,
            "local log_file \"LOG_FILENAME\"",
            ifelse(SD == ";", " ;", ""), enter, enter, enter,
            "* The following command should contain the complete path and",
            ifelse(SD == ";", "                   ;", ""), enter,
            "* name of the CSV file, usual file extension \".csv\"",
            ifelse(SD == ";", "                            ;", ""), enter,
            "* Change CSV_DATA_PATH to your filename, below:",
            ifelse(SD == ";", "                                ;", ""), enter,
            "local csvpath \"CSV_DATA_PATH\"",
            ifelse(SD == ";", " ;", ""), enter, enter, enter,
            "* The following command should contain the complete path and",
            ifelse(SD == ";", "                   ;", ""), enter,
            "* name of the STATA file, usual file extension \".dta\"",
            ifelse(SD == ";", "                          ;", ""), enter,
            "* Change STATA_DATA_PATH to your filename, below:",
            ifelse(SD == ";", "                              ;", ""), enter,
            "local statapath \"STATA_DATA_PATH\"",
            ifelse(SD == ";", " ;", ""), enter, enter, enter,
            "* --- CONFIGURATION SECTION - END ---",
            ifelse(SD == ";", "                                          ;", ""), enter, enter,
            "* ----------------------------------------------------------------------------",
            ifelse(SD == ";", " ;", ""), enter, enter, enter, enter,
            "* There should be nothing to change below this line",
            ifelse(SD == ";", "                            ;", ""), enter,
            "* ----------------------------------------------------------------------------",
            ifelse(SD == ";", " ;", ""), enter, enter, enter, enter,         
            "log using \"`log_file'\", replace text",
            ifelse(SD == ";", " ;", ""), enter, enter,
            "insheet using \"`csvpath'\", comma names case",
            ifelse(SD == ";", " ;", ""), enter, enter,
            "* Note that some variables in the csv raw data file might be in lowercase",
            ifelse(SD == ";", "      ;", ""), enter,
            "* To ensure that the dataset contains only variable names in uppercase",
            ifelse(SD == ";", "         ;", ""), enter, enter,
            "foreach var of varlist _all {",
            ifelse(SD == ";", " ;", ""), enter,
            "    local newname = upper(\"`var'\")",
            ifelse(SD == ";", " ;", ""), enter,
            "    cap rename \`var\' \`newname\'",
            ifelse(SD == ";", " ;", ""), enter,
            "}",
            ifelse(SD == ";", " ;", ""), enter, enter, enter, sep="")
        
        
        if (any(unlist(stringvars))) {
            cat("* Recode string variables which have labels, to numeric variables", ifelse(SD == ";", " ;", ""), enter, enter, sep="")
            for (i in seq(length(stringvars))) {
                
                if (stringvars[[i]]) {
                
                    ## sort() is necessary because Stata automatically transforms
                    # character to numeric using the labels in alfabetical ascending order (TO VERIFY THAT!!)
                    vallabs <- sort(intrnlbls2$vallab[[names(stringvars)[i]]])
                    
                    vals <- seq(length(vallabs))
                    names(vals) <- names(vallabs)
                    
                    if (any(grepl("^[0-9]", vallabs))) {
                        cat("generate TEMPVAR = .", ifelse(SD == ";", " ;", ""), enter, sep="")
                        for (j in vals) {
                            cat("replace TEMPVAR = ", j, " if ", toupper(names(stringvars)[i]), " == \"", vallabs[j], ifelse(SD == ";", "\" ;", "\""), enter, sep="")
                        }
                        cat("drop ", toupper(names(stringvars)[i]), ifelse(SD == ";", " ;", ""), enter,
                            "rename TEMPVAR ", toupper(names(stringvars)[i]), ifelse(SD == ";", " ;", ""), enter, sep="")
                    }
                    else {
                        cat("encode (", toupper(names(stringvars)[i]), "), generate (TEMPVAR)", ifelse(SD == ";", " ;", ""), enter,
                            "drop ", toupper(names(stringvars)[i]), ifelse(SD == ";", " ;", ""), enter,
                            "rename TEMPVAR ", toupper(names(stringvars)[i]), ifelse(SD == ";", " ;", ""), enter, sep="")
                    }
                    
                    intrnlbls2$vallab[[names(stringvars)[i]]] <- vals
                    cat(enter)
                }
            }
            cat(enter, enter, sep="")
        }
        
        
        
        if (length(numerical_with_strings) > 0) {
            cat("* Force variables as numeric", ifelse(SD == ";", " ;", ""), enter, enter, sep="")
            
            for (i in names(numerical_with_strings)) {
                
                tempvar <- csv[, i]
                
                for (j in sort(unique(tempvar[numerical_with_strings[[i]]]))) {
                    cat("replace ", toupper(i), " = \"\" if ", toupper(i), " == \"", j, ifelse(SD == ";", "\" ;", "\""), enter, sep="")
                }
                
                cat ("destring ", toupper(i), ", replace", ifelse(SD == ";", " ;", ""), enter, enter, sep="")
                
            }
            cat(enter)
        }
        
        
        
        maxchars <- max(nchar(names(intrnlbls2$varlab)))
        
        cat("* Definition of variable labels", ifelse(SD == ";", " ;", ""), enter, enter, sep="")
        
        # cat(maxchars, enter)
        
        for (n in names(intrnlbls2$varlab)) {
            cat(paste("label variable ", toupper(n), paste(rep(" ", maxchars - nchar(n)), collapse=""), " \"", intrnlbls2$varlab[[n]][1], "\"", ifelse(SD == ";", " ;", ""), enter, sep=""))
        }
        cat(enter, enter, sep="")
        
        cat("* Definition of category labels", ifelse(SD == ";", " ;", ""), enter, enter, sep="")
        
        for (i in seq(length(uniqueList))) {
            n <- uniqueList[[i]][1]
            
            headerlabel <- paste("label define LABELS_GROUP_", i, sep="")
            
            if (SD == ";") {
                cat(headerlabel, enter,
                    paste(paste(paste(intrnlbls2$vallab[[n]], " \"", names(intrnlbls2$vallab[[n]]), "\"", sep=""),
                                collapse=enter), #paste("\n", paste(rep(" ", nchar(headerlabel)), collapse=""), sep="")),
                          " ;", enter, enter, sep=""),
                    sep="")
            }
            else if (SD == "///") {
                cat(headerlabel, " ///", enter,
                    paste(paste(paste(intrnlbls2$vallab[[n]], " \"", names(intrnlbls2$vallab[[n]]), "\"", sep=""),
                                collapse=" ///", enter), enter, enter, sep=""), sep="")
            }
            else if (SD == "/*") {
                cat(headerlabel, " /*", enter,
                    paste("*/ ", paste(paste(intrnlbls2$vallab[[n]], " \"", names(intrnlbls2$vallab[[n]]), "\"", sep=""),
                                collapse=paste(" /*", enter, "*/ ", sep="")),
                          enter, enter, sep=""),
                    sep="")
            }
            else {
                cat(paste("label define LABELS_GROUP_", i, " ", sep=""),
                    paste(paste(paste(intrnlbls2$vallab[[n]], " \"", names(intrnlbls2$vallab[[n]]), "\"", sep=""),
                                collapse=" "),
                          enter, sep=""),
                    sep="")
            }
        }
        
        cat(enter, enter, sep="")
        cat("* Attachment of category labels to variables", ifelse(SD == ";", " ;", ""), enter, enter, sep="")
        
        for (i in seq(length(uniqueList))) {
            n <- uniqueList[[i]][1]
            for (j in uniqueList[[i]]) {
                cat(paste("label values ", toupper(j), paste(rep(" ", maxchars - nchar(j)), collapse=""), " LABELS_GROUP_", i, ifelse(SD == ";", " ;", ""), enter, sep=""))
            }
        }
        
        cat("\ncompress", ifelse(SD == ";", " ;", ""), enter,
            "save \"`statapath'\", replace", ifelse(SD == ";", " ;", ""), enter, enter,
            "log close", ifelse(SD == ";", " ;", ""), enter,
            "set more on", ifelse(SD == ";", "\n#delimit cr", ""), enter, sep="")
        
        sink()
        setwd(currentdir)
    }
    
    
    if (type == "SAS" | type == "all") {
        intrnlbls2 <- intrnlbls
        if (!file.exists("Setup files")) {
            dir.create("Setup files")
        }
        
        if (!file.exists(file.path("Setup files", "SAS"))) {
            dir.create(file.path("Setup files", "SAS"))
        }
        
        currentdir <- getwd()
        setwd(file.path("Setup files", "SAS"))
        sink(ifelse(length(grep("\\.sas", outfile)) > 0, outfile, paste(outfile, ".sas", sep="")))
        
        cat("* ------------------------------------------------------------------------------ ;", enter, enter,
            "* --- CONFIGURATION SECTION - START ---                                          ;", enter, enter, enter, sep="")                                            

        if (formats) {
            cat("* The following command should contain the complete path and                     ;", enter,
                "* name of the .csv file to be read (e.g. \"C:/CIS2008/Data/ALL.csv\")              ;", enter,
                "* Change CSV_DATA_PATH to your filename, below                                   ;", enter, enter,
                "FILENAME csvpath \"CSV_DATA_PATH\";", enter, enter, enter, sep="")
                      
           # cat("* It is assumed the data file was created under Windows (end of line is CRLF);", enter,
           #     "* If the csv file was created under Unix,  change eol=LF;", enter,
           #     "* If the csv file was created under MacOS, change eol=CR below;", enter, enter,
           #     "%LET eol=CRLF;", enter, enter, enter, sep="")
        }
        
        cat("* The following command should contain the complete path of the                  ;", enter,
            "* directory where the setup file will be saved (e.g. \"C:/CIS2008/Data\")          ;", enter,
            "* Change SAS_DATA_FOLDER to your directory name, below                           ;", enter, enter,
            "LIBNAME dirout \"SAS_DATA_FOLDER\";", enter, enter, enter, sep="")
                  
        cat("* The following command should contain the name of the output SAS file only      ;", enter,
            "* (without quotes, and without the .sas7bdat extension)                          ;", enter,
            "* Change SAS_FILE_NAME to your output file name, below                           ;", enter, enter,
            "%LET sasfile=SAS_FILE_NAME;", enter, enter, enter,
            "* --- CONFIGURATION SECTION -  END ---                                           ;", enter, enter,
            "* ------------------------------------------------------------------------------ ;", enter, enter, enter, enter,
            "* There should be nothing to change below this line;", enter,
            "* ------------------------------------------------------------------------------ ;", enter, enter, enter, enter, sep="")
        
        if (formats) {
            cat("* --- Read the raw data file ---                                                 ;", enter, enter,
                "DATA sasimport;", enter, enter,
                "INFILE csvpath", enter,
                "       DLM=", ifelse(delimiter == "\t", "'09'X", paste("\"", delimiter, "\"", sep="")), enter,
                "       FIRSTOBS=2", enter,
                #### "       TERMSTR=&eol", enter, #### line commented out
                "       DSD", enter,
                "       TRUNCOVER", enter,
                "       LRECL=512", enter,
                "       ;", enter, enter,
                
                "INPUT  ", toupper(csvnames[1]), ifelse(sasformats[1] == "$", " $", ""), enter, sep="")
            
            for (i in seq(2, length(csvnames))) {
                cat("       ", toupper(csvnames[i]), ifelse(sasformats[i] == "$", " $", ""), enter, sep="")
            }
            cat("       ;", enter, sep="")
            cat("RUN;", enter, enter,
                "* ------------------------------------------------------------------------------ ;", enter, enter, enter, sep="")
        }
        
        if (any(unlist(stringvars))) {
            cat("* --- Recode string variables which have labels, to numeric variables ---        ;", enter, enter, sep="")
                
            for (i in names(stringvars)) {
                if (stringvars[[i]]) {
                    cat("DATA sasimport;", enter, enter,
                        "    SET sasimport;", enter, enter, sep="")
                    
                    x <- intrnlbls2$vallab[[i]]
                    vals <- seq(length(x))
                    names(vals) <- names(x)
                    for (j in seq(length(vals))) {
                        cat("    IF (", i, " = '", x[j], "') THEN TEMPVAR = ", vals[j], ";", enter, sep="")
                    }
                    
                    cat(enter, "RUN;", enter, enter, "DATA sasimport;", enter, enter,
                        "    SET sasimport;", enter, enter,
                        "    DROP ", i, ";", enter,
                        "    RENAME TEMPVAR = ", i, ";", enter, enter,
                        "RUN;", enter, enter, sep="")
                    
                    intrnlbls2$vallab[[i]] <- vals
                }
            }
            
            cat("* ------------------------------------------------------------------------------ ;", enter, enter, enter, sep="")
        }
        
        
        
        
        if (length(numerical_with_strings) > 0) {
            
            cat("* --- Force variables as numeric ---                                             ;", enter, enter, sep="")
            
            for (i in toupper(names(numerical_with_strings))) {
                
                cat("DATA sasimport;", enter, enter,
                    "    SET sasimport;", enter, enter,
                    "    TEMPVAR = INPUT(", i, ", ?? best.);", enter, enter,
                    "RUN;", enter, enter, 
                    "DATA sasimport;", enter, enter,
                    "    SET sasimport;", enter, enter,
                    "    DROP ", i, ";", enter,
                    "    RENAME TEMPVAR = ", i, ";", enter, enter,
                    "RUN;", enter, enter, sep="")
            }
            
            cat("* ------------------------------------------------------------------------------ ;", enter, enter, enter, sep="")
        }
        
        
        if (any(unlist(stringvars)) | length(numerical_with_strings) > 0) {
            cat("* --- Reorder the variables in their original positions ---                      ;", enter, enter,
                "DATA sasimport;", enter, enter,
                "    RETAIN ", gsub(",", "", splitrows(toupper(names(intrnlbls2$varlab)), enter, 70, "           ")), ";", enter, enter,
                "    SET sasimport;", enter, enter,
                "RUN;", enter, enter,
                "* ------------------------------------------------------------------------------ ;", enter, enter, enter, sep="")
        }
        
        
        
        cat("* --- Add variable labels ---                                                    ;", enter, enter,
            "DATA sasimport;", enter, enter,
            "    SET sasimport;", enter, enter, sep="")
        
        for (i in seq(length(varnames))) {
            cat("    LABEL ", toupper(varnames[i]), paste(rep(" ", maxchars - nchar(varnames[i]) + 1), collapse=""), "=", " \"", intrnlbls2$varlab[[i]][1], "\";", enter, sep="")
        }
        
        cat(enter, "RUN;", enter, enter,
            "* ------------------------------------------------------------------------------ ;", enter, enter, enter, sep="")
        
        cat("* --- Create value labels groups ---                                             ;", enter, enter,
            "PROC FORMAT;", enter, enter, sep="")
        
        for (i in seq(length(uniqueList))) {
            n <- uniqueList[[i]][1]
            cat(paste("VALUE LABELS_", i, "_GROUP", enter, sep=""),
                paste(paste(paste("      ", intrnlbls2$vallab[[n]], "=\"", names(intrnlbls2$vallab[[n]]), "\"", sep=""),
                            collapse=enter), #paste("\n", paste(rep(" ", nchar(headerlabel)), collapse=""), sep="")),
                      ";", enter, enter, sep=""),
                sep="")
        }
		  
        cat("RUN;", enter, enter,
            "* ------------------------------------------------------------------------------ ;", enter, enter, enter, sep="")
        
        cat("* --- Format variables with value labels ---                                     ;", enter, enter,
            "DATA sasimport;", enter, enter,
            "    SET sasimport;", enter, enter, "    FORMAT", enter, sep="")
        
        for (i in seq(length(uniqueList))) {
            n <- uniqueList[[i]][1]
            for (j in uniqueList[[i]]) {
                cat("    ", toupper(j), paste(rep(" ", maxchars - nchar(j)), collapse=""), " LABELS_", i, "_GROUP", ".", enter, sep="")
            }
        }
          
        cat("    ;", enter, enter, "RUN;", enter, enter,
            "* ------------------------------------------------------------------------------ ;", enter, enter, enter, sep="")
        
                      
        cat("* --- Save data to a sas type file ---                                           ;", enter, enter,
            "DATA dirout.&sasfile;", enter, enter,
            "    SET sasimport;", enter, enter,
            "RUN;", enter, sep="")
        
        sink()
        setwd(currentdir)
    }
    
    
    
    if (type == "R" | type == "all") {
        intrnlbls2 <- intrnlbls
        printMISSING <- FALSE
        
        if (!file.exists("Setup files")) {
            dir.create("Setup files")
        }
        
        if (!file.exists(file.path("Setup files", "R"))) {
            dir.create(file.path("Setup files", "R"))
        }
        
        currentdir <- getwd()
        setwd(file.path("Setup files", "R"))
        sink(ifelse(length(grep("\\.R", outfile)) > 0, outfile, paste(outfile, ".R", sep="")))
        
        cat("# ------------------------------------------------------------------------------", enter, enter,
            "# --- CONFIGURATION SECTION - START ---", enter, enter, enter, sep="")

        if (formats) {
            cat("# The following command should contain the complete path and", enter,
                "# name of the .csv file to be read (e.g. \"C:/CIS 2008/Data/ALL.csv\")", enter,
                "# Change CSV_DATA_PATH to your filename, below:", enter, enter,
                "csvpath <- \"CSV_DATA_PATH\"", enter, enter, enter, sep="")
        }
        
        cat("# The following command should contain the complete path and", enter,
            "# name of the .Rdata file to be saved (e.g. \"C:/CIS 2008/Data/ALL.Rdata\")", enter,
            "# Change RDATA_PATH to your filename, below:", enter, enter,
            "rdatapath <- \"RDATA_PATH\"", enter, enter, enter,
            # "# The following command should contain the name of the unique ID variable", enter,
            # "# in the .csv file (to identify missing observations in different variables).", enter,
            # "# Change UNIQUEID to your unique ID variable name, below:", enter, enter,
            # "uniqueid <- \"UNIQUEID\"", enter, enter, enter,
            "# --- CONFIGURATION SECTION -  END  ---", enter, enter,
            "# ------------------------------------------------------------------------------", enter, enter, enter, enter,
            "# There should be nothing to change below this line", enter,                                                            
            "# ------------------------------------------------------------------------------", enter, enter, enter, enter, sep="")
                  
        if (formats) {
            cat("# --- Read the raw data ---", enter, enter,
                "rdatafile <- read.csv(csvpath, sep = \"", ifelse(delimiter == "\t", "\\t", delimiter), "\")",
                enter, enter, "names(rdatafile) <- toupper(names(rdatafile))    # all variable names to upper case",
                enter, enter, enter,
                "# ------------------------------------------------------------------------------",
                enter, enter, enter, enter, sep="")
        }
        else {
            cat("# \"rdatafile\" should be an R data.frame (usually read from a .csv file)\n\n")
        }
        
        
        if (any(unlist(stringvars))) {
            cat("# --- Recode string variables which have labels, to numeric variables ---", enter, enter, sep="")
            stringvars <- stringvars[which(unlist(stringvars))]
            for (i in names(stringvars)) {
                if (stringvars[[i]]) {
                    x <- intrnlbls2$vallab[[i]]
                    vals <- seq(length(x))
                    names(vals) <- names(x)
                    cat(toupper(i), " <- rdatafile[ , \"", toupper(i), "\"]", enter,
                        "tempvar <- rep(NA, length(", toupper(i), "))", enter, enter, sep="")
                    for (j in seq(length(vals))) {
                        cat("tempvar[", toupper(i), " == \"", x[j], "\"] <- ", vals[j], enter, sep="")
                    }
                    
                    cat(enter, "rdatafile[ , \"", toupper(i), "\"] <- tempvar", enter, enter, sep="")
                    
                    intrnlbls2$vallab[[i]] <- vals
                }
            }
            
            cat(enter,
                "# ------------------------------------------------------------------------------",
                enter, enter, enter, enter, sep="")
        }
        
        
        
        if (length(numerical_with_strings) > 0) {
            
            cat("# --- Force variables as numeric ---", enter, enter, sep="")
            
            maxnchars <- max(nchar(names(numerical_with_strings)))
            
            for (i in toupper(names(numerical_with_strings))) {
                cat("rdatafile[ , \"", i, "\"]", paste(rep(" ", maxnchars - nchar(i)), collapse=""),
                    " <- suppressWarnings(as.numeric(rdatafile[ , \"", i, "\"]", "))", enter, sep="")
            }
            cat(enter, enter, enter,
                "# ------------------------------------------------------------------------------",
                enter, enter, enter, enter, sep="")
        }
        
        
        
        cat("# --- Set the variable labels attribute --- ", enter, enter,
            "attr(rdatafile, \"variable labels\") <- list(", enter, sep="")
        
        maxnchars <- max(nchar(varnames))
        
        for (i in varnames) {
            cat("\"", toupper(i), "\"",
            paste(rep(" ", maxnchars - nchar(i)), collapse=""),
            " = \"", intrnlbls2$varlab[[i]][1], "\"",
            ifelse(i == varnames[length(varnames)], ")", ","),
            enter, sep="")
        }
        
        uList <- unlist(uniqueList)
        
        cat(enter, enter,
            "# ------------------------------------------------------------------------------",
            enter, enter, enter, enter,
            "# --- Set the value labels attribute --- ", enter, enter,
            "attr(rdatafile, \"value labels\") <- vector(mode=\"list\", length=", length(uList), ")", enter, enter, sep="")
            
            ##### "names(attr(rdatafile, \"value labels\")) <- c(", enter, 
            ##### splitrows(paste("\"", uList, "\"", sep=""), enter, 80), enter,
            ##### ")", enter, enter, sep="")
        
        contor <- 1
        
        for (i in seq(length(uniqueList))) {
            n <- uniqueList[[i]][1]
            
            listelements <- ifelse(length(uniqueList[[i]]) == 1, contor, paste(contor, ":", contor + length(uniqueList[[i]]) - 1, sep=""))
            
            cat("attr(rdatafile, \"value labels\")[", listelements, "] <- list(c(", enter,
                paste(paste("    \"", names(intrnlbls2$vallab[[n]]), "\" =", sep=""),
                      intrnlbls2$vallab[[n]],
                      collapse=paste(",", enter, sep="")), enter,
                "))", enter, enter,
                "names(attr(rdatafile, \"value labels\"))[", listelements, "] <- ", sep="")
            
            if (length(uniqueList[[i]]) == 1) {
                cat("\"", toupper(uniqueList[[i]]), "\"", enter, enter, sep="")
            }
            else {
                cat("c(", enter, "    ",
                    splitrows(paste("\"", toupper(uniqueList[[i]]), "\"", sep=""), enter, 80, spacerep="    "), enter,
                    ")", enter, enter, sep="")
            }
            
            contor <- contor + length(uniqueList[[i]])
        }
        
        
        cat(enter,
            "# ------------------------------------------------------------------------------",
            enter, enter, enter, enter, sep="")
        
        
        checkMISSING <- rep(FALSE, 3)
        
        checkMISSING[1] <- is.data.frame(csv)
        checkMISSING[2] <- !identical(miss, "")
        checkMISSING[3] <- !identical(uniqueid, "")
        
        if (all(checkMISSING)) {
        # if (is.data.frame(csv) & uniqueid != "") {
            
            if (is.numeric(miss)) {
                missvars <- lapply(intrnlbls2$vallab, function(x) !is.na(match(miss, x)))
                withmiss <- as.vector(unlist(lapply(missvars, any)))
                missvals <- lapply(missvars[withmiss], function(x) miss[x])
                names(missvals) <- names(intrnlbls2$vallab)[withmiss]
            }
            else {
                missvars <- lapply(intrnlbls2$vallab, function(x) !is.na(match(names(x), miss)))
                withmiss <- as.vector(unlist(lapply(missvars, any)))
                missvals <- lapply(which(withmiss), function(x) as.vector(intrnlbls2$vallab[[x]][missvars[[x]]]))
                names(missvals) <- names(intrnlbls2$vallab)[withmiss]
            }
            
            uniqueid <- toupper(uniqueid)
            
            if (length(missvals) > 0) {
                
                cat("# --- Set the missing values attribute --- ", enter, enter,
                    "# only if the uniqueid variable really exists,", enter,
                    "# and it has no duplicate or missing values", enter, enter,
                    "if (\"", uniqueid, "\" %in% names(rdatafile)) {", enter,
                    "    if (!any(duplicated(rdatafile[, \"", toupper(uniqueid), "\"]))) {", enter, enter,
                    "        attr(rdatafile, \"unique id\") <- \"", toupper(uniqueid), "\"", enter, enter, sep="")
                    
                for (i in seq(length(missvals))) {
                    values <- intrnlbls2$vallab[[names(missvals)[i]]]
                    cat("        # ", toupper(names(missvals)[i]), enter, sep="")
                    
                    for (j in seq(length(missvals[[i]]))) {
                        
                        # for the moment, the length of missvals[[i]][j] is always equal to 1,
                        # but in the future metadata information might allocate ranges for some missing types
                        # therefore missvals[[i]] might be a list
                        
                        testvals <- ifelse(length(missvals[[i]][j]) == 1,
                                           missvals[[i]][j],
                                           paste("c(", paste(missvals[[i]][j], collapse = ", "), ")", sep=""))
                        
                        cat("        attr(rdatafile, \"missing types\")$", toupper(names(missvals)[i]),
                            "[[\"", names(values)[values == missvals[[i]][j]], "\"]] <- list(", enter,
                            "        values = ", testvals, ",", enter,
                            "        cases = sort(rdatafile$", toupper(uniqueid), "[rdatafile$", toupper(names(missvals)[i]),
                            ifelse(length(missvals[[i]][j]) == 1, " == ", " %in% "), testvals,  
                            "])", enter, "        )", enter, enter, sep="")
                    }
                    
                    # here, though, it is likely to have multiple missing values
                    # in the future, missvals[[i]] might be a list to accommodate for ranges
                    
                    cat("        rdatafile$", toupper(names(missvals)[i]), "[rdatafile$", toupper(names(missvals)[i]),
                        ifelse(length(unlist(missvals[[i]])) == 1, paste(" == ", unlist(missvals[[i]]), sep=""),
                               paste(" %in% c(", paste(unlist(missvals[[i]]), collapse = ", "), ")", sep="")), 
                        "] <- NA", enter, enter, sep="")
                }
                
                cat("    }", enter, "}", enter, enter, enter, enter,
                "# ------------------------------------------------------------------------------",
                enter, enter, enter, enter, sep="")
            }
        }
        
        
        cat("# --- Save the R data file --- ", enter, enter,
            "rfilename <- unlist(strsplit(basename(rdatapath), split=\"\\\\.\"))[1]", enter,
            "rdatapath <- file.path(dirname(rdatapath), paste(rfilename, \".Rdata\", sep=\"\"))", enter,
            "assign(rfilename, rdatafile)", enter,
            "eval(parse(text = paste(\"save(\", rfilename, \", file=rdatapath)\", sep=\"\")))",
            enter, enter, enter,
            "# ------------------------------------------------------------------------------",
            enter, enter, enter, enter,
            "# --- Clean up the working space --- ", enter, enter,
            "rm(rfilename, rdatafile, csvpath, rdatapath", sep="")
        
        if (any(unlist(stringvars))) {
            cat(", tempvar, ", toupper(paste(names(stringvars)[unlist(stringvars)], collapse=", ")), sep="")
        }
        
        cat(")", enter, enter, enter,
            "# ------------------------------------------------------------------------------",
            enter, enter, enter, enter, sep="")
        
        
        
        # finish writing and close the .R file
        sink()
        
        if (any(checkMISSING)) {
            if (!all(checkMISSING)) {
                if (!checkMISSING[1]) {
                    cat("    Data not found, check the \"csv\" argument or the \"data\" directory.\n")
                }
                if (!checkMISSING[2]) {
                    cat("    The \"miss\" argument needs to be specified.\n")
                }
                if (!checkMISSING[3]) {
                    cat("    The uniqueid variable has to be specified.\n")
                }
                cat("    Missing values not treated for the R setup file.\n\n")
            }
        }
        
        setwd(currentdir)
        
    }
}

Try the DDIwR package in your browser

Any scripts or data that you put into this service are public.

DDIwR documentation built on May 2, 2019, 4:19 p.m.