R/lsa.convert.data2.r

Defines functions lsa.convert.data2

Documented in lsa.convert.data2

#' @title Convert Large-Scale Assessments' Datasets to .RData Format
#'
#' @description \code{lsa.convert.data2} is a deprecated version of of the \code{lsa.convert.data} function using a different approach. It is an old version of the \code{lsa.convert.data} function and will be removed from the package in June of 2025. Use only if you experience issues with \code{lsa.convert.data}.
#'
#' @param inp.folder    The folder containing the IEA-like SPSS data files or ASCII text files and
#'                      \code{.sps} import files for OECD PISA data from cycles prior to 2015.
#'                      See details.
#'                      If blank, the working directory (\code{getwd()}) is used.
#' @param PISApre15     When converting PISA files, set to \code{TRUE} if the input files are from
#'                      PISA cycles prior 2015 (ASCII text format with \code{.sps} control files)
#'                      or to \code{FALSE} (default) if they are in SPSS \code{.sav} format, as in
#'                      the case of IEA studies and the like and OECD PISA 2015 or later. Ignored
#'                      if the input folder contains IEA-like studies.
#' @param ISO           Vector containing character ISO codes of the countries' data files to
#'                      convert (e.g. \code{ISO = c("aus", "svn")}). If none of the files contain
#'                      the specified ISO codes in their names, the codes are ignored and a warning
#'                      is shown. Ignored when converting PISA files (both for cycles prior 2015
#'                      and 2015 and later). This argument is case-insensitive, i.e. the ISO codes
#'                      can be passed as lower- or upper-case.
#'                      (lower or upper) as the original SPSS \code{.sav} files.
#' @param missing.to.NA Should the user-defined missing values be recoded to \code{NA}? If
#'                      \code{TRUE}, all user-defined missing values from the SPSS files (or
#'                      specified in the OECD PISA import syntax files) are all imported as
#'                      \code{NA}. If \code{FALSE} (default), they are converted to valid values
#'                      and the missing codes are assigned to an attribute \code{missings} for
#'                      each variable. See details.
#' @param out.folder    Path to the folder where the converted files will be stored. If omitted,
#'                      same as the \code{inp.folder}, and if the \code{inp.folder} is missing as
#'                      well, this will be \code{getwd()}.
#'
#' @details
#' See the details of \code{lsa.convert.data}.
#'
#' @return
#' See the "Value" section of the \code{lsa.convert.data}.
#'
#' @note
#' See the "Notes" section of \code{lsa.convert.data}.
#'
#' @examples
#' # See the examples of \code{lsa.convert.data}.
#'
#' @references
#' Foy, P. (Ed.). (2018). \emph{PIRLS 2016 User Guide for the International Database}. TIMSS & PIRLS International Study Center.
#'
#' @seealso \code{\link{lsa.convert.data2}}, \code{\link{lsa.merge.data}}, \code{\link{lsa.vars.dict}}, \code{\link{lsa.recode.vars}}
#' @export
lsa.convert.data2 <- function(inp.folder, PISApre15 = FALSE, ISO, missing.to.NA = FALSE, out.folder) {
  .Deprecated(new = "lsa.convert.data", msg = '\nThis function is deprecated and will be removed from the package in June of 2025. Please use the "lsa.convert.data" instead.\n')
  tmp.options <- options(scipen = 999, digits = 22)
  on.exit(expr = options(tmp.options), add = TRUE)
  tryCatch({
    if(missing(x = inp.folder)) {
      inp.folder = getwd()
    } else {
      inp.folder <- paste(inp.folder)
    }
    if(missing(x = out.folder)) {
      out.folder <- paste(inp.folder)
    } else {
      out.folder <- paste(out.folder)
    }
    if(PISApre15 == FALSE) {
      if(missing(x = ISO)) {
        ISO <- paste(list.files(path = inp.folder, pattern = ".sav", ignore.case = TRUE), sep = "")
        if(length(ISO) == 0) {
          if(dir.exists(paths = inp.folder) == TRUE) {
            message("\nError:\nThe input folder contains no \".sav\" files to read. Check your input.\n\n")
          } else {
            message("\nError:\nThe input folder does not exist. Check your input.\n\n")
          }
        }
        if(any(sapply(X = ISO, FUN = nchar) == 12 )) {
          full.inp.file.path <- file.path(inp.folder, ISO)
          study.and.cycle <- unique(x = substr(x = ISO, start = 7, stop = 8))
          if(length(x = study.and.cycle) > 1) {
            message("\nError:\nThe input folder contains data files from more than one study and/or cycle. The input folder must contain files from single study and cycle only.\n\n")
            stop()
          }
        } else if(any(sapply(X = ISO, FUN = nchar) > 12 )) {
          full.inp.file.path <- file.path(inp.folder, ISO)
          first.four.char <- unique(x = substr(x = tolower(ISO), start = 1, stop = 4))
          if(grepl(pattern = "0", x = first.four.char)[1]) {
            study.and.cycle <- unique(x = substr(x = tolower(ISO), start = 1, stop = 4))
          } else {
            study.and.cycle <- unique(x = substr(x = tolower(ISO), start = 1, stop = 3))
          }
          if(length(x = study.and.cycle) > 1) {
            message("\nError:\nThe input folder contains data files from more than one study and/or cycle. The input folder must contain files from single study and cycle only.\n\n")
            stop()
          }
        }
      } else {
        files <- list.files(path = inp.folder, pattern = ".sav", ignore.case = TRUE)
        cnt.ISOs <- unique(x = substr(x = files, start = 4, stop = 6))
        if(length(files) > 0 & length(intersect(x = tolower(ISO), tolower(cnt.ISOs))) == 0) {
          message("\nError:\nNo country codes matching the data files in the input folder were entered in \"ISO = ...\". \nCheck your input and the files in the input folder.\n\n")
        }
        if(length(files) == 0) {
          if(dir.exists(paths = inp.folder) == TRUE) {
            message("\nError:\nThe input folder contains no \".sav\" files to read. Check your input.\n\n")
          } else {
            message("\nError:\nThe input folder does not exist. Check your input.\n\n")
          }
        }
        study.and.cycle <- unique(x = substr(x = files, start = 7, stop = 8))
        if(length(x = study.and.cycle) > 1) {
          message("\nError:\nThe input folder contains data files from more than one study and/or cycle. The input folder must contain files from single study and cycle only.\n\n")
          stop()
        }
        available.ISOs <- unique(substring(text = files, first = 4, last = 8))
        if(any(tolower(paste0(ISO, study.and.cycle)) %in% tolower(available.ISOs) == "FALSE")) {
          warning("Data files for one or more countries in \"ISO = ...\"", " do not exist in the specified folder.", "\nCheck your input.\n", call. = FALSE)
        }
        ISO <- files[grep(pattern = paste0(ISO, study.and.cycle, collapse = "|"), x = files, ignore.case = TRUE)]
        full.inp.file.path <- file.path(inp.folder, ISO)
      }
      root.file.names <- sapply(X = strsplit(x = ISO, split = "\\."), FUN = "[", 1)
      full.out.file.path <- paste(file.path(out.folder, tolower(x = root.file.names)), "RData", sep = ".")
      if(length(ISO) > 0) {
        if(dir.exists(paths = out.folder) == TRUE) {
          if(length(ISO) == 1) {
            message("\n", "One dataset found for conversion. Some datasets can be rather large. Please be patient.\n")
          } else {
            message("\n", paste(length(ISO), " datasets found for conversion. Some datasets can be rather large. Please be patient.\n"))
          }
        } else {
          message("\nError:\nThe output folder does not exist. Check your input.\n\n")
        }
      } else {
        if(dir.exists(paths = out.folder) == FALSE) {
          message("\nError:\nThe output folder does not exist. Check your input.\n\n")
        }
      }
      counter <- 0
      convert.IEA <- function(inp.file, out.file, new.file) {
        ptm <- proc.time()
        if(missing.to.NA == TRUE) {
          tmp <- suppressMessages(read.spss(file = inp.file, use.value.labels = TRUE, use.missings = TRUE, to.data.frame = TRUE, stringsAsFactors = TRUE, duplicated.value.labels = "append", duplicated.value.labels.infix = " ", add.undeclared.levels = "append", reencode = "CP1252", sub = ".", trim_values = TRUE, trim.factor.names = TRUE))
          tmp1 <- suppressMessages(read.spss(file = inp.file, use.value.labels = TRUE, use.missings = FALSE, to.data.frame = TRUE, stringsAsFactors = TRUE, duplicated.value.labels = "append", duplicated.value.labels.infix = " ", add.undeclared.levels = "append", reencode = "CP1252", sub = ".", trim_values = TRUE, trim.factor.names = TRUE))
          value.labels <- lapply(X = tmp1, FUN = levels)
          tmp1 <- NULL
          value.labels <- lapply(X = value.labels, FUN = function(i) {
            gsub(pattern = "'", replacement = "\\\\'", x = i)
          })
          index.missings.only <- function(missing.values, labels.list) {
            which(setNames(labels.list %in% missing.values, names(labels.list)))
          }
          num.vars.with.labels <- index.missings.only(missing.values = all.missing.values.combinations, labels.list = value.labels)
          if(length(num.vars.with.labels) > 0) {
            tmp2 <- suppressMessages(read.spss(file = inp.file, use.value.labels = FALSE, use.missings = TRUE, to.data.frame = TRUE, stringsAsFactors = TRUE, duplicated.value.labels = "append", duplicated.value.labels.infix = " ", add.undeclared.levels = "append", reencode = "CP1252", sub = ".", trim_values = TRUE, trim.factor.names = TRUE))
            tmp[num.vars.with.labels] <- tmp2[num.vars.with.labels]
            tmp2 <- NULL
          }
          factor.vars <- names(Filter(is.factor, tmp))
          tmp[factor.vars] <- lapply(X = tmp[factor.vars], FUN = function(i) {
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u0022]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u00e2]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u20ac]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u201c]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u201d]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u2122]", replacement = "'", x = levels(i)))
            if(length(grep(pattern = "-$", x = levels(i))) == 0) {
              i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "--|---|----", replacement = "-", x = levels(i)))
            }
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u00c2]", replacement = "'", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u00b4]", replacement = "'", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "\\'\\'", replacement = "'", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "-+\\'", replacement = "'", x = levels(i)))
          })
          factors.to.be.numeric <- names(
            Filter(Negate(is.null), lapply(X = tmp[factor.vars], FUN = function(i) {
              factor.vars.missings.levels <- which(levels(i) %in% names(fac.to.num.missing.codes))
              factor.vars.num.values.levels <- which(grepl(pattern = "^\\-*[[:digit:]]+$|^\\-*[[:digit:]]+\\.[[:digit:]]+$", x = levels(i)) == TRUE)
              if(length(factor.vars.missings.levels) + length(factor.vars.num.values.levels) == length(levels(i))) {
                TRUE
              }
            })))
          if(length(factors.to.be.numeric) > 0) {
            tmp[factors.to.be.numeric] <- lapply(X = tmp[factors.to.be.numeric], FUN = function(i) {
              as.numeric(as.character(i))
            })
          }
          factor.vars <- names(Filter(is.factor, tmp))
          tmp[factor.vars] <- lapply(X = tmp[factor.vars], FUN = function(i) {
            i <- factor(x = i, levels = levels(i)[!levels(i) %in% unique(unlist(all.missing.values.combinations))])
          })
          rep.indicator <- names(tmp)[which(colnames(tmp) %in% c("JKINDIC", "JKCREP", "JKREP", "jkrep"))]
          if(length(rep.indicator) > 0) {
            tmp[rep.indicator] <- lapply(X = tmp[rep.indicator], FUN = function(i) {
              if(is.factor(i)) {
                i <- as.numeric(i) - 1
              } else {
                i
              }
            })
          }
          numeric.vars <- names(Filter(is.numeric, tmp))
        } else if(missing.to.NA == FALSE) {
          tmp <- suppressMessages(read.spss(file = inp.file, use.value.labels = TRUE, use.missings = FALSE, to.data.frame = TRUE, stringsAsFactors = TRUE, duplicated.value.labels = "append", duplicated.value.labels.infix = " ", add.undeclared.levels = "append", reencode = "CP1252", sub = ".", trim_values = TRUE, trim.factor.names = TRUE))
          value.labels <- lapply(X = tmp, FUN = levels)
          value.labels <- lapply(X = value.labels, FUN = function(i) {
            gsub(pattern = "'", replacement = "\\\\'", x = i)
          })
          index.missings.only <- function(missing.values, labels.list) {
            which(setNames(labels.list %in% missing.values, names(labels.list)))
          }
          num.vars.with.labels <- index.missings.only(missing.values = all.missing.values.combinations, labels.list = value.labels)
          if(length(x = num.vars.with.labels) > 0) {
            tmp1 <- suppressMessages(read.spss(file = inp.file, use.value.labels = FALSE, use.missings = FALSE, to.data.frame = TRUE, stringsAsFactors = TRUE, duplicated.value.labels = "append", duplicated.value.labels.infix = " ", add.undeclared.levels = "append", reencode = "CP1252", sub = ".", trim_values = TRUE, trim.factor.names = TRUE))
            tmp[num.vars.with.labels] <- tmp1[num.vars.with.labels]
            tmp1 <- NULL
          }
          tmp2 <- suppressMessages(read.spss(file = inp.file, use.value.labels = TRUE, use.missings = TRUE, duplicated.value.labels = "append", duplicated.value.labels.infix = " ", add.undeclared.levels = "append", to.data.frame = TRUE, stringsAsFactors = TRUE, reencode = "CP1252", sub = ".", trim_values = TRUE, trim.factor.names = TRUE))
          factor.vars <- names(Filter(is.factor, tmp))
          string.vars.to.remove <- grep(pattern = "IS4G06A|IS4G045|IS4G00|IS4G06B|IS4G02U|IS4G02V|IS4G08A|IS4G08B", x = factor.vars, value = TRUE)
          if(length(string.vars.to.remove)) {
            factor.vars <- factor.vars[!factor.vars %in% string.vars.to.remove]
          }
          tmp[factor.vars] <- lapply(X = tmp[factor.vars], FUN = function(i) {
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u0022]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u00e2]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u20ac]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u201c]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u201d]", replacement = "-", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u2122]", replacement = "'", x = levels(i)))
            if(length(grep(pattern = "-$", x = levels(i))) == 0) {
              i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "--|---|----", replacement = "-", x = levels(i)))
            }
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u00c2]", replacement = "'", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "[\u00b4]", replacement = "'", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "\\'\\'", replacement = "'", x = levels(i)))
            i <- factor(x = i, levels = levels(i), labels = gsub(pattern = "-+\\'", replacement = "'", x = levels(i)))
          })
          numeric.vars <- names(Filter(is.numeric, tmp))
          full.labels.factor.vars <- lapply(X = tmp[factor.vars], FUN = function(i) {
            levels(i)
          })
          full.labels.numeric.vars <- lapply(X = tmp[numeric.vars], FUN = function(i) {
            as.numeric(attr(i, "value.labels"))
          })
          full.labels <- c(full.labels.factor.vars, full.labels.numeric.vars)
          incomplete.labels.factor.vars <- lapply(X = tmp2[factor.vars], FUN = function(i) {
            levels(i)
          })
          missing.factor.labels <- mapply(FUN = setdiff, full.labels.factor.vars, incomplete.labels.factor.vars)
          missing.labels <- c(missing.factor.labels, full.labels.numeric.vars)
          missing.labels <- missing.labels[lapply(X = missing.labels, FUN = length) > 0]
          missing.labels <- lapply(X = missing.labels, FUN = function(i) {
            i <- gsub(pattern = "'", replacement = "\\\\'", x = i)
            i <- gsub(pattern = "\\\\M", replacement = "-M", x = i)
            i <- gsub(pattern = "\\\\NO", replacement = "-NO", x = i)
          })
          if(length(x = missing.labels) > 0) {
            miss.statements.left <- paste0("attr(tmp[['", names(missing.labels), "']], 'missings') <- ")
            miss.statements.right <- lapply(X = missing.labels, FUN = function(i) {
              if(is.character(i) && length(i) == 1) {
                paste0("'", i, "'")
              } else if(is.character(i) && length(i) > 1) {
                paste0("c('", paste(i, collapse = "', '"), "')")
              } else if(is.numeric(i) && length(i) == 1) {
                paste(i)
              } else if(is.numeric(i) && length(i) > 1) {
                paste0("c(", paste(i, collapse = ", "), ")")
              }
            })
            miss.statements <- paste0(miss.statements.left, miss.statements.right)
            eval(parse(text = miss.statements))
          }
          names.num.miss.labels <- lapply(X = tmp[numeric.vars], FUN = function(i) {
            names(attr(i, "value.labels"))
          })
          names.num.miss.labels <- Filter(Negate(is.null), names.num.miss.labels)
          if(length(names.num.miss.labels) > 0) {
            names.num.miss.labels <- names.num.miss.labels[which(unlist(lapply(X = names.num.miss.labels, FUN = function(i) {length(i) > 0})))]
            naming.statement.left <- paste0("names(attr(tmp[['", names(names.num.miss.labels), "']], 'missings')) <- ")
            naming.statements.right <- lapply(X = names.num.miss.labels, FUN = function(i) {
              paste0("c('", paste0(i, collapse = "', '"), "')")
            })
            naming.statements <- paste0(naming.statement.left, naming.statements.right)
          }
          num.vars.miss.attr <- lapply(X = tmp[numeric.vars], FUN = attr, "missings")
          num.vars.wo.miss.attr <- names(num.vars.miss.attr[unlist(lapply(num.vars.miss.attr, is.null))])
          full.num.unique.values <- lapply(X = tmp[num.vars.wo.miss.attr], unique)
          incomplete.num.unique.values <- lapply(X = tmp2[num.vars.wo.miss.attr], unique)
          miss.values.num <- mapply(FUN = setdiff, full.num.unique.values, incomplete.num.unique.values)
          miss.values.num <- Filter(length, miss.values.num)
          if(length(x = miss.values.num) > 0) {
            eval(parse(text = paste0("attr(tmp[['", names(miss.values.num), "']], 'missings') <- ", miss.values.num)))
          }
          tmp2 <- NULL
          factor.vars <- names(Filter(is.factor, tmp))
          all.levels.factor.vars <- lapply(X = tmp[factor.vars], FUN = levels)
          levels.match.all.missing.combinations <- Filter(length, lapply(X = all.levels.factor.vars, FUN = function(i) {
            i[i %in% unlist(all.missing.values.combinations)]
          }))
          names.levels.match.all.missing.combinations <- names(lapply(X = levels.match.all.missing.combinations, FUN = names))
          if(length(x = levels.match.all.missing.combinations) > 0) {
            produce.statements.left <- function(data.obj, var.names) {
              paste0("attr(tmp[['", var.names, "']], 'missings') <- ")
            }
            missing.statements.left <- as.list(mapply(FUN = produce.statements.left, data.obj = tmp[unlist(names.levels.match.all.missing.combinations)], var.names = names.levels.match.all.missing.combinations))
            levels.match.all.missing.combinations <- lapply(X = levels.match.all.missing.combinations, FUN = function(i) {
              gsub(pattern = "'", replacement = "\\\\'", x = i)
            })
            missing.statements.right <- lapply(X = levels.match.all.missing.combinations, FUN = function(i) {
              if(length(i) == 1) {
                paste0("'", i, "'")
              } else {
                i <- paste(i, sep = "", collapse = "', '")
                i <- paste0("c('", i, "')")
              }
            })
            missing.statements <- mapply(FUN = paste0, missing.statements.left, missing.statements.right)
            eval(parse(text = missing.statements))
          }
          num.vars.miss.attr <- names(Filter(Negate(is.null), lapply(X = tmp[numeric.vars], FUN = function(i) {
            attr(i, "missings")
          })))
          if(length(num.vars.miss.attr) > 0) {
            to.numeric.statements <- lapply(num.vars.miss.attr, function(i) {
              paste0("attr(tmp[['", i, "']], 'missings') <- as.numeric(attr(tmp[['", i, "']], 'missings'))")
            })
            eval(parse(text = to.numeric.statements))
          }
          if(length(names.num.miss.labels) > 0) {
            eval(parse(text = naming.statements))
          }
          factor.vars <- names(Filter(is.factor, tmp))
          factors.to.be.numeric <- names(Filter(Negate(is.null), lapply(X = tmp[factor.vars], FUN = function(i) {
            factor.vars.missings <- which(attributes(i)[["missings"]] %in% levels(i))
            factor.vars.num.values.levels <- which(grepl(pattern = "^\\-*[[:digit:]]+$|^\\-*[[:digit:]]+\\.[[:digit:]]+$", x = levels(i)) == TRUE)
            if(length(factor.vars.missings) + length(factor.vars.num.values.levels) == length(levels(i))) {
              TRUE
            }
          })))
          missing.codes <- lapply(X = tmp[factors.to.be.numeric], FUN = function(i) {
            attr(i, "missings")
          })
          missing.codes <- lapply(X = missing.codes, FUN = function(i) {
            sapply(X = i, FUN = function(j) {
              if(j %in% names(fac.to.num.missing.codes)) {
                idx <- which(names(fac.to.num.missing.codes) %in% j)
                j <- unname(fac.to.num.missing.codes)[idx]
              }
            })
          })
          missing.codes <- Filter(length, missing.codes)
          if(length(missing.codes) > 0) {
            overwrite.numeric.miss.attr.statements.left <- paste0("attr(tmp[['", names(missing.codes), "']], 'missings') <- ")
            overwrite.numeric.miss.attr.statements.right <- paste0(missing.codes)
            overwrite.numeric.miss.attr.statements <- paste0(overwrite.numeric.miss.attr.statements.left, overwrite.numeric.miss.attr.statements.right)
            eval(parse(text = overwrite.numeric.miss.attr.statements))
            tmp[names(missing.codes)] <- lapply(X = tmp[names(missing.codes)], FUN = function(i) {
              levels.and.missing.dictionary <- setNames(c(as.character(attributes(i)[["missings"]]), grep(pattern = "^\\-*[[:digit:]]+$|^\\-*[[:digit:]]+\\.[[:digit:]]+$", x = levels(i), value = TRUE)), c(as.character(names(attributes(i)[["missings"]])), grep(pattern = "^\\-*[[:digit:]]+$|^\\-*[[:digit:]]+\\.[[:digit:]]+$", x = levels(i), value = TRUE)))
              variable.as.character <- as.character(i)
              if(any(names(attributes(i)[["missings"]]) %in% variable.as.character) == TRUE) {
                replacement.values <- as.numeric(levels.and.missing.dictionary[variable.as.character])
              } else {
                replacement.values <- as.numeric(variable.as.character)
              }
              missings.attribute <- attributes(i)[["missings"]]
              attr(replacement.values, "missings") <- missings.attribute
              return(replacement.values)
            })
            Filter(Negate(is.na), lapply(X = tmp, FUN = function(i) {
              if(length(levels(i)) > 0) {
                if(!is.null(attr(i, "missings")) && !is.na(attr(i, "missings")[levels(i)[1] == attr(i, "missings")][1])) {
                  miss.to.remove <- attr(i, "missings")[levels(i)[1] %in% attr(i, "missings")][1]
                  setattr(x = i, name = "missings", value = attr(i, "missings")[!attr(i, "missings") %in% miss.to.remove])
                }
              }
            }))
          }
          rep.indicator <- names(tmp)[which(colnames(tmp) %in% c("JKINDIC", "JKCREP", "JKREP", "jkrep"))]
          if(length(rep.indicator) > 0) {
            tmp[rep.indicator] <- lapply(X = tmp[rep.indicator], FUN = function(i) {
              if(is.factor(i)) {
                i <- as.numeric(i) - 1
              } else {
                i
              }
            })
          }
        }
        factor.vars <- names(Filter(is.factor, tmp))
        doubled.num.levels.with.space <- names(
          Filter(length, lapply(X = tmp[factor.vars], FUN = function(i) {
            grep(pattern = "^[[:digit:]]+[[:space:]]+[[:digit:]]+$|^[[:digit:]]+\\.[[:digit:]]+[[:space:]]+[[:digit:]]+\\.[[:digit:]]+$", x = levels(i), value = TRUE)
          })
          ))
        if(length(doubled.num.levels.with.space) > 0) {
          tmp[doubled.num.levels.with.space] <- lapply(X = tmp[doubled.num.levels.with.space], FUN = function(i) {
            as.numeric(as.character(droplevels(i)))
          })
        }
        factor.vars <- names(Filter(is.factor, tmp))
        factors.with.whitespace <- names(Filter(function(i) {length(i) > 0}, lapply(X = tmp[factor.vars], FUN = function(j) {
          grep(pattern = "^.+[[:space:]]+$|^[[:digit:]]+\\.[[:digit:]]+[[:space:]]*$", x = levels(j))
        })
        ))
        if(length(factors.with.whitespace) > 0) {
          tmp[factors.with.whitespace] <- lapply(X = tmp[factors.with.whitespace], FUN = function(i) {
            factor(x = str_trim(i), levels = make.unique(str_trim(levels(i))))
          })
        }
        factor.vars <- names(Filter(is.factor, tmp))
        factor.vars.all.num.levels <- names(which(sapply(X = tmp[factor.vars], FUN = function(i) {
          all(grepl(pattern = "^\\-*[[:digit:]]+[[:space:]]*$|^\\-*[[:digit:]]+\\.[[:digit:]]+[[:space:]]*$", x = levels(i)) == TRUE)
        }) == TRUE))
        tmp[factor.vars.all.num.levels] <- lapply(X = tmp[factor.vars.all.num.levels], FUN = function(i) {
          as.numeric(as.character(i))
        })
        numeric.val.lab.to.null <- paste0("attr(tmp[['", numeric.vars, "']], 'value.labels') <- NULL")
        eval(parse(text = numeric.val.lab.to.null))
        if(any(sapply(X = ISO, FUN = nchar) == 12 )) {
          inp.file.first.char <- substr(x = basename(inp.file), start = 1, stop = 1)
        } else if (any(sapply(X = ISO, FUN = nchar) > 12 )) {
          first.four.char <- unique(x = substr(x = tolower(ISO), start = 1, stop = 4))
          if(grepl(pattern = "0", x = first.four.char)[1]) {
            inp.file.first.char <- substr(x = basename(inp.file), start = 1, stop = 4)
          } else {
            inp.file.first.char <- substr(x = basename(inp.file), start = 1, stop = 3)
          }
        }
        inp.file.first.char <- tolower(inp.file.first.char)
        study.and.cycle <- tolower(study.and.cycle)
        if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m1") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "1995"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m2") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "1999"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m3") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2003"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m4") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2007"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "b4") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2007"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m5") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2011"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m6") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2015"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m7") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2019"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "b7") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2019"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "z7") {
          study.attribute <- "eTIMSS PSI"
          cycle.attribute <- "2019"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m8") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2023"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "m9") {
          study.attribute <- "TIMSS"
          cycle.attribute <- "2027"
        } else if(inp.file.first.char %in% c("a", "b") && study.and.cycle == "n1") {
          study.attribute <- "preTIMSS"
          cycle.attribute <- "2015"
        } else if(inp.file.first.char %in% c("m", "p") && study.and.cycle == "m1") {
          study.attribute <- "TIMSS Advanced"
          cycle.attribute <- "1995"
        } else if(inp.file.first.char %in% c("m", "p") && study.and.cycle == "m2") {
          study.attribute <- "TIMSS Advanced"
          cycle.attribute <- "2008"
        } else if(inp.file.first.char %in% c("m", "p") && study.and.cycle == "m3") {
          study.attribute <- "TIMSS Advanced"
          cycle.attribute <- "2015"
        } else if(inp.file.first.char == "a" && study.and.cycle == "r1") {
          study.attribute <- "PIRLS"
          cycle.attribute <- "2001"
        } else if(inp.file.first.char == "a" && study.and.cycle == "r2") {
          study.attribute <- "PIRLS"
          cycle.attribute <- "2006"
        } else if(inp.file.first.char == "a" && study.and.cycle == "r3") {
          study.attribute <- "PIRLS"
          cycle.attribute <- "2011"
        } else if(inp.file.first.char == "a" && study.and.cycle == "r4") {
          study.attribute <- "PIRLS"
          cycle.attribute <- "2016"
        } else if(inp.file.first.char == "a" && study.and.cycle == "r5") {
          study.attribute <- "PIRLS"
          cycle.attribute <- "2021"
        } else if(inp.file.first.char == "a" && study.and.cycle == "a5") {
          study.attribute <- "PIRLS"
          cycle.attribute <- "2021"
        } else if(inp.file.first.char == "a" && study.and.cycle == "r6") {
          study.attribute <- "PIRLS"
          cycle.attribute <- "2026"
        } else if(inp.file.first.char == "a" && study.and.cycle == "l1") {
          study.attribute <- "prePIRLS"
          cycle.attribute <- "2011"
        } else if(inp.file.first.char == "a" && study.and.cycle == "l2") {
          study.attribute <- "prePIRLS"
          cycle.attribute <- "2016"
        } else if(inp.file.first.char == "a" && study.and.cycle == "e1") {
          study.attribute <- "ePIRLS"
          cycle.attribute <- "2016"
        } else if(inp.file.first.char == "a" && study.and.cycle == "b1") {
          study.attribute <- "TiPi"
          cycle.attribute <- "2011"
        } else if(inp.file.first.char == "a" && study.and.cycle == "t1") {
          study.attribute <- "RLII"
          cycle.attribute <- "1991"
        } else if(inp.file.first.char == "a" && study.and.cycle == "t2") {
          study.attribute <- "RLII"
          cycle.attribute <- "2001"
        } else if(inp.file.first.char %in% c("a", "b", "c") && study.and.cycle == "s0") {
          study.attribute <- "SITES"
          cycle.attribute <- "1998"
        } else if(inp.file.first.char == "b" && study.and.cycle == "s1") {
          study.attribute <- "SITES"
          cycle.attribute <- "2006"
        } else if(inp.file.first.char %in% c("b", "c") && study.and.cycle == "f2") {
          study.attribute <- "CivED"
          cycle.attribute <- "1999"
        } else if(inp.file.first.char %in% c("i", "j") && study.and.cycle == "c2") {
          study.attribute <- "ICCS"
          cycle.attribute <- "2009"
        } else if(inp.file.first.char == "i" && study.and.cycle == "c3") {
          study.attribute <- "ICCS"
          cycle.attribute <- "2016"
        } else if(inp.file.first.char == "i" && study.and.cycle == "c4") {
          study.attribute <- "ICCS"
          cycle.attribute <- "2022"
        } else if(inp.file.first.char == "i" && study.and.cycle == "b4") {
          study.attribute <- "ICCS"
          cycle.attribute <- "2022"
        } else if(inp.file.first.char == "b"  && study.and.cycle == "i1") {
          study.attribute <- "ICILS"
          cycle.attribute <- "2013"
        } else if(inp.file.first.char == "b"  && study.and.cycle == "i2") {
          study.attribute <- "ICILS"
          cycle.attribute <- "2018"
        } else if(inp.file.first.char == "b"  && study.and.cycle == "t1") {
          study.attribute <- "TALIS"
          cycle.attribute <- "2008"
        } else if(inp.file.first.char %in% c("a", "b", "c", "p")  && study.and.cycle == "t2") {
          study.attribute <- "TALIS"
          cycle.attribute <- "2013"
        } else if(inp.file.first.char %in% c("a", "b", "c", "p")  && study.and.cycle == "t3") {
          study.attribute <- "TALIS"
          cycle.attribute <- "2018"
        } else if(inp.file.first.char %in% c("a", "b")  && study.and.cycle == "s1") {
          study.attribute <- "TALIS 3S"
          cycle.attribute <- "2018"
        } else if(inp.file.first.char == "d"  && study.and.cycle == "t1") {
          study.attribute <- "TEDS-M"
          cycle.attribute <- "2008"
        } else if(inp.file.first.char == "b"  && study.and.cycle == "v1") {
          study.attribute <- "REDS"
          cycle.attribute <- "2021"
        } else if(inp.file.first.char == "cy6") {
          study.attribute <- "PISA"
          cycle.attribute <- "2015"
        } else if(inp.file.first.char == "cy07") {
          study.attribute <- "PISA"
          cycle.attribute <- "2018"
        } else if(inp.file.first.char == "cy1") {
          study.attribute <- "PISA for Development"
          cycle.attribute <- "2019"
        }
        base.ISO <- gsub(pattern = ".sav$", replacement = "", x = basename(inp.file), ignore.case = TRUE)
        if(any(sapply(X = ISO, FUN = nchar) == 12 )) {
          inp.file.abbrev <- substr(x = basename(inp.file), start = 1, stop = 3)
        } else if (any(sapply(X = ISO, FUN = nchar) > 12 )) {
          if(study.and.cycle %in% c("cy6", "cy07")) {
            inp.file.abbrev <- substr(x = base.ISO, start = nchar(base.ISO) - 6, stop = nchar(base.ISO))
          } else if(study.and.cycle == "cy1") {
            if(nchar(basename(inp.file)) == 19) {
              inp.file.abbrev <- substr(x = base.ISO, start = nchar(base.ISO) - 6, stop = nchar(base.ISO))
            } else if(nchar(basename(inp.file)) == 15) {
              inp.file.abbrev <- substr(x = base.ISO, start = nchar(base.ISO) - 6, stop = nchar(base.ISO))
            }
          }
        }
        if(inp.file.abbrev %in% c("alg", "ALG", "blg", "BLG")) {
          file.type.attribute <- "leader.bckg"
        } else if(study.attribute != "TALIS 3S" && inp.file.abbrev %in% c("asg", "bsg", "ASG", "BSG", "asc", "bs_", "cs_", "isg", "ISG", "jsg", "JSG")) {
          file.type.attribute <- "std.bckg"
        } else if(study.attribute == "TALIS 3S" && inp.file.abbrev %in% c("asg", "bsg", "ASG", "BSG")) {
          file.type.attribute <- "staff.bckg"
        } else if(inp.file.abbrev %in% c("msg", "MSG")) {
          file.type.attribute <- "math.std.bckg"
        } else if(inp.file.abbrev %in% c("psg", "PSG")) {
          file.type.attribute <- "phys.std.bckg"
        } else if(inp.file.abbrev %in% c("asa", "bsa", "ASA", "BSA", "isa", "ISA", "jsa", "JSA")) {
          file.type.attribute <- "std.ach"
        } else if(inp.file.abbrev %in% c("msa", "MSA")) {
          file.type.attribute <- "math.std.ach"
        } else if(inp.file.abbrev %in% c("psa", "PSA")) {
          file.type.attribute <- "phys.std.ach"
        } else if(inp.file.abbrev %in% c("asr", "bsr", "ASR", "BSR", "isr", "ISR", "jsr", "JSR")) {
          file.type.attribute <- "std.rel"
        } else if(inp.file.abbrev %in% c("msr", "MSR")) {
          file.type.attribute <- "math.std.rel"
        } else if(inp.file.abbrev %in% c("psr", "PSR")) {
          file.type.attribute <- "phys.std.rel"
        } else if(inp.file.abbrev %in% c("ash", "ASH")) {
          file.type.attribute <- "std.home"
        } else if(inp.file.abbrev %in% c("ise", "ISE", "jse", "JSE")) {
          file.type.attribute <- "std.EUM"
        } else if(inp.file.abbrev %in% c("iss", "ISS")) {
          file.type.attribute <- "std.AM"
        } else if(inp.file.abbrev %in% c("isl", "ISL")) {
          file.type.attribute <- "std.LAM"
        } else if(inp.file.abbrev %in% c("ast", "bst", "AST", "BST", "bl_")) {
          file.type.attribute <- "std-tch.lnk"
        } else if(inp.file.abbrev %in% c("mst", "MST")) {
          file.type.attribute <- "math.std-tch.lnk"
        } else if(inp.file.abbrev %in% c("pst", "PST")) {
          file.type.attribute <- "phys.std-tch.lnk"
        } else if(inp.file.abbrev %in% c("atg", "bt_", "itg", "ITG", "btg", "ATG", "BTG", "CTG", "PTG")) {
          file.type.attribute <- "tch.bckg"
        } else if(inp.file.abbrev %in% c("btm", "BTM", "mtg", "MTG")) {
          file.type.attribute <- "math.tch.bckg"
        } else if(inp.file.abbrev %in% c("bts", "BTS")) {
          file.type.attribute <- "sci.tch.bckg"
        } else if(inp.file.abbrev %in% c("ptg", "PTG")) {
          file.type.attribute <- "phys.tch.bckg"
        } else if(inp.file.abbrev == "DPG") {
          file.type.attribute <- "prim.tch.bckg"
        } else if(inp.file.abbrev == "DSG") {
          file.type.attribute <- "low-sec.tch.bckg"
        } else if(inp.file.abbrev == "DPR") {
          file.type.attribute <- "prim.tch.rel"
        } else if(inp.file.abbrev == "DSR") {
          file.type.attribute <- "low-sec.tch.rel"
        } else if(inp.file.abbrev %in% c("acg", "bcg", "bc_", "icg", "ICG", "ACG", "BCG", "CCG", "PCG")) {
          file.type.attribute <- "sch.bckg"
        } else if(inp.file.abbrev %in% c("mcg", "MCG")) {
          file.type.attribute <- "math.sch.bckg"
        } else if(inp.file.abbrev %in% c("pcg", "PCG")) {
          file.type.attribute <- "phys.sch.bckg"
        } else if(inp.file.abbrev == "axg") {
          file.type.attribute <- "prim.sch.bckg"
        } else if(inp.file.abbrev == "bxg") {
          file.type.attribute <- "low-sec.sch.bckg"
        } else if(inp.file.abbrev == "cxg") {
          file.type.attribute <- "upp-sec.sch.bckg"
        } else if(inp.file.abbrev == "DIG") {
          file.type.attribute <- "inst.bckg"
        } else if(inp.file.abbrev == "DEG") {
          file.type.attribute <- "educ.bckg"
        } else if(inp.file.abbrev %in% c("SCH_QQQ", "scq_qqq")) {
          file.type.attribute <- "sch.bckg"
        } else if(inp.file.abbrev %in% c("STU_QTM", "stu_qtm")) {
          file.type.attribute <- "std.qtm"
        } else if(inp.file.abbrev %in% c("STU_TTM", "stu_ttm")) {
          file.type.attribute <- "std.ttm"
        } else if(inp.file.abbrev %in% c("STU_COG", "stu_cog")) {
          file.type.attribute <- "std.ach"
        } else if(inp.file.abbrev %in% c("STU_QQQ", "stu_qqq")) {
          file.type.attribute <- "std.bckg"
        } else if(inp.file.abbrev %in% c("STU_QQ2", "stu_qq2")) {
          file.type.attribute <- "std.bckg"
        } else if(inp.file.abbrev %in% c("STU_FLT", "stu_flt")) {
          file.type.attribute <- "std.fin.lit"
        } else if(inp.file.abbrev %in% c("FLT_QQQ", "flt_qqq")) {
          file.type.attribute <- "std.fin.lit.bckg"
        } else if(inp.file.abbrev %in% c("FLT_COG", "flt_cog")) {
          file.type.attribute <- "std.fin.lit.ach"
        } else if(inp.file.abbrev %in% c("FLT_TTM", "flt_ttm")) {
          file.type.attribute <- "std.fin.lit.ttm"
        } else if(inp.file.abbrev %in% c("FLT_TIM", "flt_tim")) {
          file.type.attribute <- "std.fin.lit.tim"
        } else if(inp.file.abbrev %in% c("STU_CPS", "stu_cps")) {
          file.type.attribute <- "std.col.prob.slv"
        } else if(inp.file.abbrev %in% c("TCH_QQQ", "tch_qqq")) {
          file.type.attribute <- "tch.bckg"
        } else if(inp.file.abbrev %in% c("STU_TIM", "stu_tim")) {
          file.type.attribute <- "std.qtm"
        } else if(inp.file.abbrev %in% c("STU_PVS", "stu_pvs")) {
          file.type.attribute <- "std.bckg"
        } else if(inp.file.abbrev %in% c("COG", "cog")) {
          file.type.attribute <- "out.of.school.ach"
        } else if(inp.file.abbrev %in% c("QQQ", "qqq")) {
          file.type.attribute <- "out.of.school.bckg"
        } else if(inp.file.abbrev %in% c("TIM", "tim")) {
          file.type.attribute <- "out.of.school.qtm"
        }
        attr(x = tmp, which = "study") <- study.attribute
        attr(x = tmp, which = "cycle") <- cycle.attribute
        attr(x = tmp, which = "file.type") <- file.type.attribute
        if(attr(x = tmp, which = "study") %in% c("TIMSS", "PIRLS", "TIMSS Advanced", "RLII", "TiPi", "prePIRLS", "preTIMSS", "ePIRLS", "eTIMSS PSI", "CivED", "ICCS")) {
          cnt.identifier <- which(names(tmp) %in% c("IDCNTRY", "idcntry"))
        } else if(attr(tmp, "study") %in% c("IALS", "ALLS", "PIAAC")) {
          cnt.identifier <- which(names(tmp) %in% c("CNTRYID", "CNTRID"))
        }
        if(attr(x = tmp, which = "study") %in% c("TIMSS", "PIRLS", "TIMSS Advanced", "RLII", "TiPi", "prePIRLS", "preTIMSS", "ePIRLS", "eTIMSS PSI", "CivED", "ICCS", "IALS", "ALLS", "PIAAC")) {
          tmp[[cnt.identifier]] <- factor(x = tmp[[cnt.identifier]], levels = c(32, 51, 36, 40, 48, 3724, 956, 957, 56, 84, 72, 100, 124, 9132, 9133, 9134, 9135, 9136, 152, 158, 170, 196, 203, 200, 208, 818, 926, 826, 233, 246, 250, 268, 276, 288, 300, 344, 348, 352, 9352, 11800, 360, 364, 372, 376, 380, 392, 400, 410, 414, 428, 422, 440, 442, 807, 458, 498, 504, 528, 554, 578, 9578, 275, 608, 616, 620, 634, 642, 643, 6431, 682, 927, 891, 702, 703, 222, 705, 710, 4710, 724, 7241, 752, 3752, 756, 760, 764, 780, 788, 792, 840, 887, 470, 12700, 12500, 512, 804, 12, 398, 496, 70, 7841, 76, 484, 48401, 48402, 48499, 214, 320, 438, 600, 6162, 57891, 57892, 57893, 57894, 7842, 784, 31, 72401, 72404, 7246, 340, 191, 6504, 9470, 928, 9528, 7554, 7702, 688, 10400, 11100, 10800, 10900, 11200, 13700, 6887, 32001, 8261, 9642, 48411, 48420, 48412, 48415, 48416, 48417, 48418, 48421, 48422, 48425, 48426, 48427, 48428, 188, 558, 604, 9137, 156001, 643002, 5784, 5788, 276001, 724005, 446, 643001, 7105, 208001, 724004, 710003, 704, 858, 218, 8, 499, 586, 411, 710004, 854, 231, 356, 404, 646, 800, 860, 7106), labels = c("Argentina", "Armenia", "Australia", "Austria", "Bahrain", "Spain (Basque Country)", "Belgium (Flemish)", "Belgium (French)", "Belgium", "Belize", "Botswana", "Bulgaria", "Canada", "Canada (Ontario)", "Canada (Quebec)", "Canada (Alberta)", "Canada (British Columbia)", "Canada (Nova Scotia)", "Chile", "Chinese Taipei", "Colombia", "Cyprus", "Czech Republic", "Czech Republic", "Denmark", "Egypt", "England", "United Kingdom", "Estonia", "Finland", "France", "Georgia", "Germany", "Ghana", "Greece", "Hong Kong, SAR", "Hungary", "Iceland", "Iceland (Grade 5)", "United States (Indiana)", "Indonesia", "Iran, Islamic Republic of", "Ireland", "Israel", "Italy", "Japan", "Jordan", "Korea, Republic of", "Kuwait", "Latvia", "Lebanon", "Lithuania", "Luxembourg", "North Macedonia", "Malaysia", "Moldova", "Morocco", "Netherlands", "New Zealand", "Norway", "Norway (Grade 5)", "Palestinian National Authority", "Philippines", "Poland", "Portugal", "Qatar", "Romania", "Russian Federation", "Russian Federation (Moscow)", "Saudi Arabia", "Scotland", "Serbia", "Singapore", "Slovak Republic", "El Salvador", "Slovenia", "South Africa", "South Africa (Grade 4)", "Spain", "Spain (Catalonia)", "Sweden", "Sweden (Grade 3)", "Switzerland", "Syria, Arab Republic of", "Thailand", "Trinidad And Tobago", "Tunisia", "Turkey", "United States", "Yemen", "Malta", "United States (Minnesota)", "United States (Massachusetts)", "Oman", "Ukraine", "Algeria", "Kazakhstan", "Mongolia", "Bosnia and Herzegovina", "United Arab Emirates (Dubai)", "Brazil", "Mexico", "Mexico (Generales/Tecnicas/Privadas)", "Mexico (Telesecundarias)", "Mexico (Talis-Nacional)", "Dominican Republic", "Guatemala", "Liechtenstein", "Paraguay", "Poland (Second-Cycle Programs)", "Norway (ALU)", "Norway (ALU +)", "Norway (PPU)", "Norway (MASTERS)", "United Arab Emirates (Abu Dhabi)", "United Arab Emirates", "Azerbaijan, Republic of", "Spain (Andalucia)", "Spain (Canary Islands)", "Finland (Grade 7)", "Honduras, Republic of", "Croatia", "Morocco (Grade 6)", "Malta (Maltese)", "Northern Ireland", "The Netherlands (50 additional schools)", "New Zealand (TIMSS data processing)", "Singapore (Chinese Grade 7)", "Serbia", "United States (Alabama)", "United States (California)", "United States (Colorado)", "United States (Connecticut)", "United States (Florida)", "United States (North Carolina)", "Yemen (Grade 6)", "Argentina, Buenos Aires", "England and Northern Ireland (UK)", "Romania", "Mexico (Distrito Federal)", "Mexico (International Telesecundaria)", "Mexico (Jalisco)", "Mexico (Nuevo Leon)", "Mexico (Quintana Roo)", "Mexico (San Luis Potosi)", "Mexico (Tamaulipas)", "Mexico (Telesecundaria-Distrito Federal)", "Mexico (Telesecundaria-Jalisco)", "Mexico (Telesecundaria-Nuevo Leon)", "Mexico (Telesecundaria-Quintana Roo)", "Mexico (Telesecundaria-San Luis Potosi)", "Mexico (Telesecundaria-Tamaulipas)", "Costa Rica", "Nicaragua", "Peru", "Canada (Newfoundland and Labrador)", "China (Shanghai)", "Russia (8+ sample)", "Norway (4)", "Norway (8)", "Germany, North-Rhine Westphalia", "Spain, Madrid", "Macao SAR", "Russian Federation, Moscow", "South Africa (Eng/Afr)", "Denmark (Grade 3)", "Spain, Madrid, Bilingual", "South Africa (Gauteng)", "Vietnam", "Uruguay", "Ecuador", "Albania", "Montenegro", "Pakistan", "Kosovo", "South Africa (Western Cape Province)", "Burkina Faso", "Ethiopia", "India", "Kenya", "Rwanda", "Uganda", "Uzbekistan", "South Africa (Grade 6)"))
          tmp[[cnt.identifier]] <- droplevels(tmp[[cnt.identifier]])
        }
        if(attr(x = tmp, which = "study") %in% c("ICILS", "SITES", "TEDS-M", "TALIS", "REDS")) {
          tmp[["IDCNTRY"]] <- droplevels(tmp[["IDCNTRY"]])
        }
        names(tmp) <- toupper(names(tmp))
        if(study.attribute %in% c("PISA", "PISA for Development")) {
          setDT(x = tmp, key = "CNT")
        } else {
          setDT(x = tmp, key = "IDCNTRY")
        }
        var.labels <- attr(tmp, "variable.labels")
        var.labels <- gsub(pattern = "\\\\", replacement = "/", x = var.labels)
        var.labels <- gsub(pattern = "'", replacement = "\\\\'", x = var.labels)
        var.labels <- sapply(X = var.labels, FUN = function(i) {
          i <- gsub(pattern = "\u00e2\u20ac\u2122", replacement = "\\\\'", x = i)
        })
        var.labels <- paste("attr(tmp[['", toupper(names(var.labels)), "']], 'variable.label') <- '", var.labels, "'", sep = "")
        eval(parse(text = var.labels))
        attr(x = tmp, which = "variable.labels") <- NULL
        attr(x = tmp, which = "class") <- c("lsa.data", attr(x = tmp, which = "class"))
        assign(x = tolower(new.file), value = tmp)
        tmp <- NULL
        save(list = tolower(new.file), file = out.file, compress = FALSE)
        counter <<- counter + 1
        message("     ",
                if(nchar(counter) == 1) {
                  paste0("(  ", counter, "/", length(ISO), ")   ")
                } else if(nchar(counter) == 2) {
                  paste0("( ", counter, "/", length(ISO), ")   ")
                } else if(nchar(counter) > 2) {
                  paste0("(", counter, "/", length(ISO), ")   ")
                },
                basename(inp.file), " converted in ", format(as.POSIXct("0001-01-01 00:00:00") + {proc.time() - ptm}[[3]], "%H:%M:%OS3"))
      }
      ptm <- proc.time()
      suppressWarnings(
        invisible(mapply(FUN = convert.IEA, inp.file = full.inp.file.path, out.file = full.out.file.path, new.file = root.file.names))
      )
      if(length(ISO) == 1) {
        message("\n", " The selected file successfully converted in ", format(as.POSIXct("0001-01-01 00:00:00") + {proc.time() - ptm}[[3]] - 1, "%H:%M:%OS3"), "\n\n")
      } else {
        message("\n All ", length(ISO), " found files successfully converted in ", format(as.POSIXct("0001-01-01 00:00:00") + {proc.time() - ptm}[[3]] - 1, "%H:%M:%OS3"), "\n\n")
      }
    } else if(PISApre15 == TRUE) {
      sps.files <- list.files(path = inp.folder, pattern = "\\.sps")
      PISA.cycle <- unique(str_extract(string = sps.files, pattern = "^[[:alpha:]]+[[:digit:]]+"))
      if(length(PISA.cycle) > 1) {
        message("\nError:\nThe input folder contains data files from more than one PISA pre 2015 cycle. The input folder must contain files from single study and cycle only.\n\n")
        stop()
      }
      txt.files <- list.files(path = inp.folder, pattern = "\\.txt")
      if(dir.exists(paths = inp.folder) == FALSE) {
        message("\nError:\nThe input folder does not exist. Check your input.\n\n")
      } else {
        if(length(sps.files) == 0) {
          message("\nError:\nThe input folder contains no control (\".sps\") files to read. Check your input.\n\n")
        }
        if(length(txt.files) == 0) {
          message("\nError:\nThe input folder contains no data (\".txt\") files to read. Check your input.\n\n")
        }
      }
      full.inp.file.path <- file.path(inp.folder, sps.files)
      root.file.names <- sapply(X = strsplit(x = sps.files, split = "\\."), FUN = "[", 1)
      full.out.file.path <- paste(file.path(out.folder, tolower(x = root.file.names)), "RData", sep = ".")
      if(length(sps.files) > 0 & length(txt.files) > 0) {
        message("\n", paste(length(txt.files), " datasets selected for conversion. Some datasets can be rather large. Please be patient.\n"))
      }
      count.success <- 0
      convert.PISApre15 <- function(inp.file, out.file, new.file) {
        tryCatch({
          ptm <- proc.time()
          syntax.path <- inp.file
          raw.spss.syntax <- readLines(con = syntax.path)
          line.encodings <- sapply(X = stri_enc_detect(raw.spss.syntax), FUN = function(i) {
            i[[1]][[1]][1]
          })
          most.freq.encoding <- names(sort(table(line.encodings), decreasing = TRUE)[1])
          spss.syntax <- str_trim(string = gsub(pattern = "^\t+|\\\"|\'", replacement = "", x = readLines(con <- file(syntax.path, encoding = most.freq.encoding))))
          close(con)
          spss.syntax <- stri_enc_toutf8(spss.syntax)
          spss.syntax <- gsub(pattern = "\t+", replacement = " ", x = spss.syntax)
          spss.syntax <- gsub(pattern = "/\\s+", replacement = "/", x = spss.syntax)
          spss.syntax <- gsub(pattern = "[[:space:]]{2, }", x = spss.syntax, replacement = " ")
          spss.syntax <- spss.syntax[spss.syntax != ""]
          comments <- grep(pattern = "^.{3}\\*|^\\*", x = spss.syntax)
          if(length(x = comments) != 0) {
            spss.syntax <- spss.syntax[-comments]
          }
          variables <- grep(pattern = "^/*[[:alpha:]]+(\\_)*([[:alnum:]]+)*\\s\\d+\\s*-\\s*\\d+\\s*", x = spss.syntax, value = TRUE)
          variables <- gsub(pattern = "/", replacement = "", x = variables)
          variables <- gsub(pattern = "\\)\\s+\\.", replacement = ").", x = variables)
          var.names <- toupper(word(string = variables, start = 1, end = 1))
          ranges <- str_extract(string = variables, pattern = "[[:digit:]]+\\s*-\\s*[[:digit:]]+")
          ranges.start <- as.numeric(gsub(pattern = "\\s*-", replacement = "", x = str_extract(string = variables, pattern = "[[:digit:]]+\\s*-")))
          ranges.end <- as.numeric(gsub(pattern = "-\\s*", replacement = "", x = str_extract(string = variables, pattern = "-\\s*[[:digit:]]+")))
          no.formats <- grep(pattern = "\\)\\.?$", x = variables, invert = TRUE)
          if(length(x = no.formats) != 0) {
            variables[no.formats] <- paste(variables[no.formats], "(F,0)")
          }
          var.formats <- toupper(x = grep(pattern = "[[:alpha:]]", x = gsub(pattern = "[^[:alpha:]]|[[:space:]]", replacement = "", x = word(string = variables, start = str_count(variables, "\\S+"), end = str_count(variables, "\\S+"))), value = TRUE))
          var.dec <- grepl(pattern = "[1-9]+", x = word(string = variables, start = str_count(variables, "\\S+"), end = str_count(variables, "\\S+")))
          var.classes <- ifelse(var.formats == "F" & var.dec == FALSE, "factor", ifelse(var.formats == "F" & var.dec == TRUE, "numeric", ifelse(var.formats == "A", "character", "undefined")))
          file.name.string <- gsub(pattern = "\\\\", replacement = "/", x = grep(pattern = "data list file", x = spss.syntax, ignore.case = TRUE, value = TRUE))
          txt.file.name <- basename(gsub(pattern = "data list file *=*|\\.txt\\s*/*$", replacement = "", x = file.name.string, ignore.case = TRUE))
          txt.file.name <- gsub(pattern = "PAQ09", replacement = "PAR09", x = txt.file.name)
          txt.file.name <- gsub(pattern = "SCQ09_DEC11", replacement = "SCQ09_Dec11", x = txt.file.name)
          txt.file.name <- gsub(pattern = "cogn_2003", replacement = "cogn_2003_v2", x = txt.file.name)
          txt.file.path <- paste0(dirname(syntax.path), "/", txt.file.name, ".txt")
          if(file.exists(txt.file.path) == TRUE) {
            count.success <<- count.success + 1
          }
          tmp <- suppressMessages(data.frame(read_fwf(file = txt.file.path, fwf_positions(start = ranges.start, end = ranges.end, col_names = var.names), progress = FALSE)))
          assign.class <- function(data, classes) do.call(paste("as", classes, sep = "."), list(data))
          tmp <- replace(tmp,, Map(f = assign.class, data = tmp, classes = var.classes))
          character.vars <- names(Filter(is.character, tmp))
          tmp[character.vars] <- lapply(X = tmp[character.vars], FUN = function(i) {
            i <- sub(pattern = "^0+([1-9])", "\\1", x = i)
            i <- gsub(pattern = "^0+$", replacement = "0", x = i)
          })
          value.labels.first <- grep(pattern = "val[[:alpha:]]* lab[[:alpha:]]*", x = spss.syntax, ignore.case = TRUE)[1]
          value.labels.part <- spss.syntax[value.labels.first:length(spss.syntax)]
          clean.unwanted.statements <- function(text, expression) {
            statements <- grep(pattern = expression, x = text, ignore.case = TRUE)
            if(length(statements) != 0) {
              periods.after.statements <- grep(pattern = "\\.$", x = text)
              periods.after.first.statement <- periods.after.statements[which(periods.after.statements >= statements[1])]
              periods.terminating.statements <- head(periods.after.first.statement, n = length(statements))
              ranges.statements <- mapply(c, statements, periods.terminating.statements, SIMPLIFY = FALSE)
              indx.statements <- lapply(ranges.statements, function(i) {
                seq(from = i[1], to = i[2])
              })
              cleaned <- text[-unlist(indx.statements)]
            } else {
              cleaned <- text
            }
            exe.cache.recode.statements <- grep(pattern = "^exe[[:alpha:]]*\\.|^cache\\.|^recode", x = cleaned, ignore.case = TRUE)
            if(length(exe.cache.recode.statements) > 0) {
              cleaned <- cleaned[-exe.cache.recode.statements]
            }
            save.statements <- grep(pattern = "sav[[:alpha:]]*\\s+out[[:alpha:]]*", x = cleaned, ignore.case = TRUE)
            if(length(save.statements) > 0) {
              cleaned <- cleaned[-save.statements]
            }
            return(cleaned)
          }
          tmp.value.labels <- clean.unwanted.statements(text = value.labels.part, expression = "mis[[:alpha:]]*\\s+val[[:alpha:]]*")
          tmp.value.labels <- clean.unwanted.statements(text = tmp.value.labels, expression = "formats*\\s+[[:alnum:]]+")
          tmp.value.labels <- clean.unwanted.statements(text = tmp.value.labels, expression = "var[[:alpha:]]*\\s+lev[[:alpha:]]*")
          single.periods <- which(tmp.value.labels == ".")
          if(length(tmp.value.labels[single.periods]) != 0) {
            tmp.value.labels <- tmp.value.labels[-single.periods]
          }
          indx.ending.slashes <- grep(pattern = "/$", x = tmp.value.labels)
          if(length(indx.ending.slashes) > 0) {
            indx.move.end.slash <- indx.ending.slashes + 1
            tmp.value.labels[indx.move.end.slash] <- paste("/", tmp.value.labels[indx.move.end.slash], sep = "")
            tmp.value.labels[indx.ending.slashes] <- gsub(pattern = "/$", replacement = "", x = tmp.value.labels[indx.ending.slashes])
          }
          lab.statements.to.split <- grep(pattern = "Missing /", x = tmp.value.labels)
          if(length(lab.statements.to.split) > 0){
            add.split <- gsub(pattern = " /", replacement = " split /", x = tmp.value.labels[lab.statements.to.split])
            splitted.statements <- strsplit(x = add.split, split = " split ")
            tmp.value.labels[lab.statements.to.split] <- splitted.statements
            tmp.value.labels <- unlist(tmp.value.labels)
          }
          tmp.value.labels <- gsub(pattern = "xxxn", replacement = "", x = tmp.value.labels, ignore.case = TRUE)
          mixed.val.lab.var.names <- grep(pattern = "val[[:alpha:]]* lab[[:alpha:]]* [[:alnum:]]+", x = tmp.value.labels, ignore.case = TRUE)
          if(length(mixed.val.lab.var.names) != 0) {
            mixed.val.lab.var.names.pos <- lapply(tmp.value.labels[mixed.val.lab.var.names], function(i) {
              gregexpr(pattern = "\\s", text = i)[[1]][2]
            })
            max.char.mixed.lab.var.names <- lapply(X = tmp.value.labels[mixed.val.lab.var.names], nchar)
            val.lab.keywords <- str_trim(substring(text = tmp.value.labels[mixed.val.lab.var.names], first = 1, last = mixed.val.lab.var.names.pos), side = "both")
            var.names.and.labels <- str_trim(substring(text = tmp.value.labels[mixed.val.lab.var.names], first = mixed.val.lab.var.names.pos, last = max.char.mixed.lab.var.names), side = "both")
            split.statements <- Map(c, val.lab.keywords, var.names.and.labels)
            tmp.value.labels[mixed.val.lab.var.names] <- split.statements
            tmp.value.labels <- unlist(tmp.value.labels)
          }
          value.labels.next.pos <- grep(pattern = "val[[:alpha:]]* lab[[:alpha:]]*", x = tmp.value.labels, ignore.case = TRUE) + 1
          no.slash <- value.labels.next.pos[grep(pattern = "^/", x = tmp.value.labels[value.labels.next.pos], invert = TRUE)]
          tmp.value.labels[no.slash] <- paste("/", tmp.value.labels[no.slash], sep = "")
          tmp.value.labels <- tmp.value.labels[-grep(pattern = "val[[:alpha:]]* lab[[:alpha:]]*", x = tmp.value.labels, ignore.case = TRUE)]
          last.labels <- grep(pattern = "^/[[:alnum:]]+", x = tmp.value.labels) - 1
          tmp.value.labels[last.labels] <- gsub(pattern = "\\.$", replacement = "", x = tmp.value.labels[last.labels], ignore.case = TRUE)
          tmp.value.labels <- paste(tmp.value.labels, " split ", sep = "")
          tmp.value.labels[length(tmp.value.labels)] <- gsub(pattern = " split ", replacement = "", x = tmp.value.labels[length(tmp.value.labels)])
          indx.var.labels.lines <- grep(pattern = "^/|^[[:upper:]]{4, }|^[[:alpha:]]+[[:digit:]]+[[:alpha:]]", x = tmp.value.labels)
          indx.var.names <- split(indx.var.labels.lines, cumsum(c(TRUE, diff(indx.var.labels.lines) != 1)))
          indx.sequences <- indx.var.names[which(sapply(indx.var.names, length) > 1)]
          if(length(indx.sequences) == 0) {
            tmp.value.labels <- gsub(pattern = " split ", replacement = "", x = tmp.value.labels)
            tmp.value.labels <- str_trim(string = tmp.value.labels, side = "both")
          } else {
            indx.starting.slash <- grep(pattern = "^/", x = tmp.value.labels)
            tmp.value.labels[indx.starting.slash] <- paste("split1", tmp.value.labels[indx.starting.slash], sep = "")
            indx.remove.split <- unlist(lapply(X = indx.sequences, FUN = function(i) {
              head(i, n = length(i) - 1)
            }))
            tmp.value.labels[indx.remove.split] <- gsub(pattern = " split ", replacement = "", x = tmp.value.labels[indx.remove.split])
            pasted.value.labels <- paste(tmp.value.labels, sep = "", collapse = " ")
            tmp.value.labels <- unlist(str_split(string = pasted.value.labels, pattern = " split "))
            tmp.value.labels <- unlist(str_split(string = tmp.value.labels, pattern = "split1"))
            tmp.value.labels <- str_trim(string = tmp.value.labels, side = "both")
            tmp.value.labels <- tmp.value.labels[tmp.value.labels != ""]
          }
          mixed.statements <- grep(pattern = "^/(\\w+\\s)+([[:upper:]]\\s|[[:digit:]]+\\s)", x = tmp.value.labels)
          extract.and.split.labels <- function(vec, lst1, lst2) {
            substring(lapply(vec, function(i) i), first = lapply(lst1, function(i) i), last = lapply(lst2, function(i) i))
          }
          if(length(mixed.statements) != 0) {
            mixed.statements.pos.and.lengt <- gregexpr(pattern = "\\s[[:upper:]]\\s|\\s[[:digit:]]+\\s([^000]|[0-9]+\\%|[1-9]+[[:alpha:]]|[0-9]+\\-[0-9]+)", text = tmp.value.labels[mixed.statements])
            mixed.statements.pos <- lapply(mixed.statements.pos.and.lengt, function(i) {
              as.vector(i)
            })
            extract.var.names <- str_trim(substring(text = tmp.value.labels[mixed.statements], first = 1, last = sapply(mixed.statements.pos, function(i) i[1])), side = "both")
            split.end.pos <- lapply(X = mixed.statements.pos, FUN = function(i) {
              if(length(i) > 1) {
                i[2:length(i)] - 1
              } else if(length(i) == 1) {
                NULL
              }
            })
            split.last.pos <- lapply(X = tmp.value.labels[mixed.statements], FUN = function(i) {
              nchar(i)
            })
            split.end.pos <- mapply(c, split.end.pos, split.last.pos, SIMPLIFY = FALSE)
            extract.val.labels <- Map(extract.and.split.labels, tmp.value.labels[mixed.statements], mixed.statements.pos, split.end.pos)
            join.names.and.labels <- Map(c, as.list(extract.var.names), as.list(extract.val.labels))
            tmp.value.labels[mixed.statements] <- join.names.and.labels
          }
          tmp.value.labels <- str_trim(string = unlist(tmp.value.labels), side = "both")
          tmp.value.labels <- gsub(pattern = "\\s*/*\\.$", replacement = "", x = tmp.value.labels)
          mixed.labels <- grep(pattern = "^[[:digit:]]+\\s([[:alpha:]]+/*)+\\s[[:digit:]]+\\s[[:alpha:]]", x = tmp.value.labels)
          statements.to.escape <- c("1 pwk & 1 pmn",
                                    "[[:digit:]]+\\s*(hours|minutes)* and [[:digit:]]+\\s*(hours|minutes)",
                                    "times a year",
                                    "6342 QR 50000 or more - less than QR 750000",
                                    "6343 QR 75000 or more - less than QR 100000",
                                    "6344 QR 100000 or more - less than QR 125000",
                                    "6345 QR 125000 or more - less than QR 150000",
                                    "6346 QR 150000 or more",
                                    "2082 DKK 1 or more - less than DKK 500",
                                    "2083 DKK 501 or more - less than DKK 1.000",
                                    "6202 from 0 euros to 75 euros",
                                    "6203 from 75 euros to 3 999 euros",
                                    "6342 QR 500 or more - less than QR1000",
                                    "6346 QR 16000 or more",
                                    "QR 1000 or more - less than QR 6000",
                                    "QR 6000 or more - less than QR 11000",
                                    "QR 11000 or more - less than QR 16000",
                                    "QR 16000 or more",
                                    "YTL 600 or more - less than YTL 1200",
                                    "YTL 1200 or more - less than YTL 5000",
                                    "YTL 5000 or more - less than YTL 15000",
                                    "YTL 15000 or more",
                                    "QR 50000 or more - less than QR 75000",
                                    "YTL 6000 or more - less than YTL 12000",
                                    "YTL 12000 or more - less than YTL 24000",
                                    "YTL 24000 or more - less than YTL 48000",
                                    "YTL 48000 or more - less than YTL 72000",
                                    "YTL 72000 or more")
          unwanted.pattern <- grep(pattern = paste(statements.to.escape, collapse = "|"), x = tmp.value.labels)
          mixed.labels <- setdiff(mixed.labels, unwanted.pattern)
          if(length(mixed.labels) != 0) {
            mixed.labels.pos <- gregexpr(pattern = "\\s[[:digit:]]", text = tmp.value.labels[mixed.labels])
            mixed.labels.pos <- lapply(mixed.labels.pos, function(i) {
              as.vector(i)
            })
            length.mixed.labels <- lapply(tmp.value.labels[mixed.labels], nchar)
            all.split.points <- Map(c, 1, mixed.labels.pos, length.mixed.labels)
            mix.lab.start.positions <- lapply(X = all.split.points, FUN = function(i) {
              sort(c(i[seq(1, length(i), 2)], i[seq(2, length(i) - 1, 2)]))
            })
            mix.lab.end.positions <- lapply(X = all.split.points, FUN = function(i) {
              tail(i, n = length(i) - 1)
            })
            split.labels <- Map(extract.and.split.labels, tmp.value.labels[mixed.labels], mix.lab.start.positions, mix.lab.end.positions)
            tmp.value.labels[mixed.labels] <- split.labels
            tmp.value.labels <- str_trim(unlist(tmp.value.labels), side = "both")
          }
          value.labels.var.names.pos <- grep(pattern = "^/", x = tmp.value.labels)
          value.labels.start.pos <- value.labels.var.names.pos + 1
          value.labels.end.pos <- (value.labels.var.names.pos - 1)
          value.labels.end.pos[length(value.labels.end.pos) + 1] <- length(tmp.value.labels)
          value.labels.end.pos <- value.labels.end.pos[-1]
          form.statements <- function(string.object) {
            if(length(grep(pattern = "^/", x = string.object)) == 0) {
              obj.var.names <- string.object
            } else {
              obj.var.names <- toupper(sub(pattern = "^/", x = grep(pattern = "^/", x = string.object, value = TRUE), replacement = ""))
            }
            indx.elem.list <- lapply(gregexpr(" TO ", obj.var.names), function(component){
              attributes(component) <- NULL
              ifelse(component == -1, component <- 0, component)
              component
            })
            all.spaces <- gregexpr("\\s+", obj.var.names)
            matched.space.pos <- mapply(match, indx.elem.list, all.spaces)
            previous.string.pos <- lapply(matched.space.pos, function(x) x - 1)
            next.string.pos <- lapply(matched.space.pos, function(x) x + 1)
            previous.string <- lapply(seq_along(all.spaces), function(i) all.spaces[[i]][previous.string.pos[[i]]])
            next.string <- lapply(seq_along(all.spaces), function(i) all.spaces[[i]][next.string.pos[[i]]])
            ranges.var.list <- lapply(1:length(previous.string), function(i){
              xx <- strsplit(x = obj.var.names, split = "")[[i]]
              xx[next.string[[i]]] <- " which(names(tmp)=='"
              xx <- gsub(pattern = " TO ", replacement = ":", x = xx)
              xx <- gsub(pattern = " TO ", replacement = "'):", x = paste(xx, sep = " ", collapse = ""))
              xx <- gsub(pattern = " ", replacement = "' ", x = xx)
              xx <- lapply(X = xx, FUN = function(j) {
                if(str_count(string = j, pattern = "\\S+") == 1) {
                  paste("names(tmp['", j, "'])", sep = "")
                } else {
                  paste("names(tmp[ , c(which(names(tmp)=='", gsub(pattern = " ", replacement = "), which(names(tmp)=='", x = paste(xx, sep = " ", collapse = "")), "'))])", sep = "")
                }
              })
              xx <- unlist(xx)
            })
            number.of.single.quotes <- lapply(ranges.var.list, FUN = function(i) {
              x <- str_count(string = i, pattern = "'")
              return(x)
            })
            indx.four.quotes <- which(number.of.single.quotes == 4)
            indx.colons <- lapply(ranges.var.list, function(i) {
              grep(pattern = ":", i)
            })
            indx.single.colon <- which(indx.colons == 1)
            single.colon <- intersect(indx.four.quotes, indx.single.colon)
            if(length(single.colon) > 0) {
              ranges.var.list[single.colon] <- gsub(pattern = "names(tmp[", replacement = "names(tmp[which(names(tmp)==", x = ranges.var.list[single.colon], fixed = TRUE)
              ranges.var.list[single.colon] <- gsub(pattern = "'])", replacement = "')])", x = ranges.var.list[single.colon], fixed = TRUE)
              ranges.and.prev.string <- list(ranges.var.list = ranges.var.list, previous.string = previous.string, obj.var.names = obj.var.names)
            } else {
              ranges.and.prev.string <- list(ranges.var.list = ranges.var.list, previous.string = previous.string, obj.var.names = obj.var.names)
            }
            return(ranges.and.prev.string)
          }
          ranges.and.prev.pos <- form.statements(tmp.value.labels)
          value.labels.ranges.var.list <- ranges.and.prev.pos[["ranges.var.list"]]
          previous.string <- ranges.and.prev.pos[["previous.string"]]
          value.labels.var.names <- ranges.and.prev.pos[["obj.var.names"]]
          value.labels.full.var.list <- lapply(value.labels.ranges.var.list, function(i) {eval(parse(text = i))})
          value.labels.block.ranges <- paste(value.labels.start.pos, value.labels.end.pos, sep = ":", collapse = " ")
          value.labels.ranges.expr <- unlist(lapply(X = 1:str_count(string = value.labels.block.ranges, pattern = "\\S+"), FUN = function(i) {
            paste("value.labels.block", i, " <- tmp.value.labels[", word(string = value.labels.block.ranges, start = i, end = i), "]", sep = "")
          }))
          eval(parse(text = value.labels.ranges.expr))
          list.value.labels.blocks <- paste(ls(pattern = "value.labels.block[[:digit:]]+"), collapse = ", ")
          list.value.labels.blocks.names <- paste(ls(pattern = "value.labels.block[[:digit:]]+"))
          eval(parse(text = paste("value.labels.blocks <- list(", list.value.labels.blocks, ")", sep = "")))
          names(value.labels.blocks) <- list.value.labels.blocks.names
          value.labels.blocks <- value.labels.blocks[order(nchar(names(value.labels.blocks)), names(value.labels.blocks))]
          value.labels.blocks <- lapply(X = value.labels.blocks, gsub, pattern = paste("^00+\\s+", collapse = "|"), replacement = "0 ")
          value.labels.blocks <- lapply(X = value.labels.blocks, FUN = function(i) {
            sub("^0+([1-9])", "\\1", x = i)
          })
          concat.value.labels.blocks <- lapply(value.labels.blocks, function(i) {
            paste(i, collapse = " ")
          })
          blocks.with.just.miss <- c("^7 N/A 8 Invalid 9 Missing r Not reached$",
                                     "^7 N/A 8 Invalid r Not reached$",
                                     "^7 N/A 8 M/R 9 Mis",
                                     "^7 N/A 9 Missing r Not reached$",
                                     "^7 N/A r Not reached$",
                                     "^7+ N/A 8+ Invalid 9+ Missing$",
                                     "^7+ N/A 8+ Invalid$",
                                     "^8 Invalid r Not reached$",
                                     "^9 Missing r Not reached$",
                                     "^9*7 N/A 9*8 Invalid 9+ Miss.*$",
                                     "^9*7 N/A 9*8 Invalid$",
                                     "^9*7 N/A 9*8 M/R 9+ Missing$",
                                     "^9*7 N/A 9+ Mis$",
                                     "^9*7 N/A 9+ Missing 9*8 Invalid$",
                                     "^9*7 N/A 9+ Missing$",
                                     "^9*7 N/A$",
                                     "^9*8 Invalid 9*7 N/A 9+ Missing$",
                                     "^9*8 Invalid 9*7 N/A$",
                                     "^9*8 Invalid 9+ Missing$",
                                     "^9*8 Invalid$",
                                     "^9+ Missing 9*7 N/A$",
                                     "^9+ Missing 9*8 Invalid$",
                                     "^9+ Missing$",
                                     "^9+7 N/A 9+ Miss$",
                                     "^96 Ungraded 97 N/A 98 Invalid 99 Miss$",
                                     "^96 Ungraded 98 Invalid 99 Miss$",
                                     "^96 Ungraded 98 Invalid 99 Missing$",
                                     "^96 Ungraded 99 Missing$",
                                     "^996 N/A - <ISCED level> not available in this school 9997 N/A 9998 Invalid 9999 Miss$",
                                     "^996 N/A 9997 N/A 9998 Invalid 9999 Missing$",
                                     "^997 Missing$")
          indx.numeric <- unique(grep(paste(blocks.with.just.miss, collapse = "|"), concat.value.labels.blocks))
          if(length(indx.numeric) != 0) {
            tmp[ , unlist(unique(value.labels.full.var.list[indx.numeric]))] <- lapply(X = unlist(unique(value.labels.full.var.list[indx.numeric])), FUN = function(x) {
              as.numeric(as.character(tmp[ , x]))
            })
          }
          get.orig.data.values <- function(data, var.list) {
            component.names <- unlist(var.list)
            unique.values <- lapply(data[component.names], unique)
            unique.values <- lapply(unique.values, sort)
            unique.values <- lapply(unique.values, as.character)
            return(unique.values)
          }
          orig.data.values <- get.orig.data.values(data = tmp, var.list = value.labels.full.var.list)
          character.columns <- names(Filter(is.character, tmp))
          components.to.clean <- character.columns[is.element(character.columns, names(orig.data.values))]
          orig.data.values[components.to.clean] <- lapply(X = orig.data.values[components.to.clean], FUN = function(i) {
            sub("^0+([1-9])", "\\1", x = i)
          })
          orig.data.values.in.blocks <- lapply(X = value.labels.full.var.list, FUN = function(i) {
            values <- sort(unique(unlist(orig.data.values[i], use.names = FALSE)))
            duplicated.values <- paste(values, values)
            return(duplicated.values)
          })
          duplicated.data.values.in.blocks <- mapply(FUN = c, value.labels.blocks, orig.data.values.in.blocks, SIMPLIFY = FALSE)
          value.labels.blocks <- lapply(X = duplicated.data.values.in.blocks, FUN = function(i) {
            i[!duplicated(word(string = i, start = 1, end = 1))]
          })
          value.labels.blocks <- lapply(X = value.labels.blocks, FUN = sort)
          list.value.LEVELS <- lapply(X = value.labels.blocks, FUN = function(element) {
            word(string = element, start = 1, end = 1)
          })
          list.value.LABELS <- lapply(X = value.labels.blocks, FUN = function(element) {
            make.unique(gsub(pattern = "^\\w*[[:punct:]]*\\w*\\s*|^\\w*\\s*", replacement = "", x = element))
          })
          if(length(indx.numeric) != 0) {
            value.labels.blocks <- value.labels.blocks[-indx.numeric]
            value.labels.full.var.list <- value.labels.full.var.list[-indx.numeric]
            previous.string <- previous.string[-indx.numeric]
            value.labels.var.names <- value.labels.var.names[-indx.numeric]
            list.value.LEVELS <- list.value.LEVELS[-indx.numeric]
            list.value.LABELS <- list.value.LABELS[-indx.numeric]
          }
          value.lab.statement.left <- lapply(1:length(previous.string), function(i){
            if(str_count(string = value.labels.var.names[[i]], pattern = "\\S+") == 1) {
              xx <- strsplit(value.labels.var.names, "")[[i]]
              xx <- paste("tmp[ , '", paste(value.labels.full.var.list[[i]], sep = "", collapse = "', '"), "']", sep = "")
            } else {
              xx <- strsplit(value.labels.var.names, "")[[i]]
              xx <- paste("tmp[ , c('", paste(value.labels.full.var.list[[i]], sep = "", collapse = "', '"), "')]", sep = "")
            }
          })
          value.lab.statement.right <- lapply(1:length(previous.string), function(i){
            if(str_count(string = value.labels.var.names[[i]], pattern = "\\S+") == 1) {
              xx <- strsplit(value.labels.var.names, "")[[i]]
              xx <- paste("factor(tmp[ , '" , paste(value.labels.full.var.list[[i]], sep = "", collapse = "', '"), "'], levels = c('", paste(list.value.LEVELS[[i]], sep = "", collapse = "', '"), "')", ", labels = c('", paste(list.value.LABELS[[i]], sep = "'", collapse = "', '"), "'))", sep = "")
            } else {
              xx <- strsplit(value.labels.var.names, "")[[i]]
              xx <- paste("lapply(tmp[ , c('" , paste(value.labels.full.var.list[[i]], sep = "", collapse = "', '"), "')], factor, levels = c('", paste(list.value.LEVELS[[i]], sep = "", collapse = "', '"), "')", ", labels = c('", paste(list.value.LABELS[[i]], sep = "'", collapse = "', '"), "'))", sep = "")
            }
          })
          value.labels.statements <- paste(value.lab.statement.left, "<-", value.lab.statement.right)
          if(txt.file.name == "INT_stui_2003_v2") {
            val.lab.statements.to.remove <- c("\\'ST17Q14\\'], levels = c\\(\\'1\\', \\'20301\\', \\'27601\\',",
                                              "\\'ST17Q15\\'], levels = c\\(\\'1', \\'20301\\', \\'2\\', \\'30001\\',",
                                              "\\'ST17Q16\\'], levels = c\\(\\'1\\', \\'2\\', \\'30001\\', \\'30002\\',")
            indx.statements.to.remove <- grep(pattern = paste(val.lab.statements.to.remove, collapse = "|"), x = value.labels.statements)
            value.labels.statements <- value.labels.statements[-indx.statements.to.remove]
          }
          eval(parse(text = value.labels.statements))
          indx.char.cols.to.numeric <- function(data) {
            indx.character.columns <- sapply(X = data, FUN = function(x) {
              grepl(pattern = "character", x = class(x))
            })
            indx.uppercase.columns <- sapply(X = data, FUN = function(x) {
              all(grepl(pattern = "[[:upper:]]{3}", x = x))
            })
            indx.character.columns <- indx.character.columns[-which(indx.uppercase.columns == TRUE)]
            indx.mixed.columns <- sapply(X = data[indx.character.columns], FUN = function(x){
              any(grepl(pattern = "[a-zA-Z]+[0-9]+|[0-9]+[a-zA-Z]+", x = x) == TRUE)
            })
            indx.sentences.columns <- sapply(data[indx.character.columns], function(x) {
              any(str_count(string = x, pattern = "\\S+") > 5)
            })
            indx.cols.with.levels <- sapply(data, function(i) {
              indx.not.NULL <- length(attr(i, "levels"))
            })
            indx.cols.with.levels <- names(indx.cols.with.levels[indx.cols.with.levels > 0])
            indx.all.levels.num.values <- sapply(data[indx.cols.with.levels], function(i) {
              any(grep(pattern = "[[:alpha:]]", x = attr(x = i, which = "levels")), na.rm = TRUE)
            })
            character.columns <- names(indx.character.columns[indx.character.columns == TRUE])
            mixed.columns <- names(indx.mixed.columns[indx.mixed.columns == TRUE])
            sentences.columns <- names(indx.sentences.columns[indx.sentences.columns == TRUE])
            numeric.columns <- names(indx.all.levels.num.values[indx.all.levels.num.values == FALSE])
            preserved.columns <- c(mixed.columns, sentences.columns)
            c(setdiff(character.columns, preserved.columns), numeric.columns)
          }
          indx.convert.to.num <- indx.char.cols.to.numeric(tmp)
          tmp[ , indx.convert.to.num] <- lapply(X = indx.convert.to.num, FUN = function(x) {
            as.numeric(as.character(tmp[ , x]))
          })
          character.columns <- names(which(sapply(tmp, function(i) {class(i) == "character"})))
          tmp[character.columns] <- lapply(tmp[character.columns], function(i) {
            if(all(grepl(pattern = "^[[:upper:]]{3}$", x = i)) == TRUE) {
              as.factor(i)
            } else {
              as.character(i)
            }
          })
          filter.factor.vars.with.missing <- function(data) {
            var.levels <- lapply(X = data, FUN = function(i) {
              var.levels <- levels(i)
            })
            length.of.0 <- sapply(X = var.levels, FUN = length)
            vars.with.missing.levels <- var.levels[length.of.0 != 0]
            return(vars.with.missing.levels)
          }
          vars.with.missings <- names(filter.factor.vars.with.missing(tmp))
          missing.patterns <- c("N/A",
                                "Invalid",
                                "Missing",
                                "Miss",
                                "Mis",
                                "Not reached",
                                "N/A, Yes: Not administered",
                                "N/A, N/A: Not administered",
                                "Not administered",
                                "Unreached",
                                "Ungraded",
                                "M/R",
                                "99999970",
                                "99999990",
                                "9999997",
                                "9999998",
                                "9999999")
          fac.missing.levels.extract <- function(data, patterns) {
            missing.values.indx <- lapply(X = data, FUN = function(i) {
              grep(pattern = paste(patterns, collapse = "|"), x = levels(i))
            })
            length.of.0 <- sapply(X = missing.values.indx, FUN = length)
            vars.with.missing.levels <- missing.values.indx[length.of.0 != 0]
            var.names.with.missing.levels <- names(vars.with.missing.levels)
            var.levels <- lapply(data[var.names.with.missing.levels], function(i) {
              number.of.levels <- nlevels(i)
              factor.levels <- levels(i)
              match.factor.levels <- vector(mode = "character", length = number.of.levels)
              names(match.factor.levels) <- factor.levels
              match.factor.levels[1:number.of.levels] <- 1:number.of.levels
              return(match.factor.levels)
            })
            extract <- lapply(var.levels, function(i) {
              indx.differences <- names(i) %in% patterns
              difference <- i[indx.differences == TRUE]
            })
            flatten <- lapply(X = extract, FUN = function(i) {names(unlist(i))})
            return(flatten)
          }
          missing.levels.list.factors <- fac.missing.levels.extract(data = tmp, patterns = missing.patterns)
          missings.part <- spss.syntax[grep(pattern = "mis[[:alpha:]]*\\sval[[:alpha:]]*\\s*", x = spss.syntax, ignore.case = TRUE)[1]:length(spss.syntax)]
          missing.keywords <- grep(pattern = "mis[[:alpha:]]*\\sval[[:alpha:]]*\\s*", x = missings.part, ignore.case = TRUE)
          ending.periods <- grep(pattern = "([[:digit:]]+|[[:alpha:]]+)\\s*\\)\\s*\\.$", x = missings.part)
          if(length(missing.keywords) == 1 && length(ending.periods) == 0) {
            ending.period <- grep(pattern = "\\.", x = missings.part)
            missings.part <- missings.part[missing.keywords:ending.period]
            missings.part <- missings.part[-grep(pattern = "save outfile", x = missings.part)]
            missings.part <- gsub(pattern = "^/", replacement = "missing values ", x = missings.part)
          } else {
            first.period.larger.than.missing <- sapply(missing.keywords, function(i) head(ending.periods[ending.periods >= i], 1))
            missings.part <- missings.part[unlist(Map(`:`, missing.keywords, first.period.larger.than.missing))]
            missings.part <- gsub(pattern = "^/", replacement = "missing values ", x = missings.part)
            missings.part <- gsub(pattern = "\\s+\\)", replacement = ")", x = missings.part)
            missings.part <- gsub(pattern = "\\(\\s+", replacement = "(", x = missings.part)
            missings.part <- gsub(pattern = "\\s+\\.", replacement = ".", x = missings.part)
          }
          closing.brackets.no.period <- grep(pattern = "\\)$", x = missings.part)
          if(length(closing.brackets.no.period) > 0) {
            missings.part[closing.brackets.no.period] <- paste0(missings.part[closing.brackets.no.period], ".")
          }
          if(all(substring(text = missings.part, first = nchar(x = missings.part), last = nchar(x = missings.part)) == ".") == FALSE) {
            periods <- grep(pattern = "\\.", x = missings.part)
            missing.statements.collapsed <- paste(missings.part, collapse = " ")
            missing.values.statements <- unlist(strsplit(missing.statements.collapsed, split = ".", fixed = TRUE))
            missing.values.statements <- toupper(gsub(pattern = "\\s*mis[[:alpha:]]* val[[:alpha:]]* ", replacement = "", x = grep(pattern = "mis[[:alpha:]]* val[[:alpha:]]*", x = missing.values.statements, ignore.case = TRUE, value = TRUE), ignore.case = TRUE))
          } else {
            missing.values.statements <- gsub(pattern = "\\.", replacement = "", x = missings.part)
            missing.values.statements <- toupper(gsub(pattern = "\\s*mis[[:alpha:]]* val[[:alpha:]]* ", replacement = "", x = grep(pattern = "mis[[:alpha:]]* val[[:alpha:]]*", x = missing.values.statements, ignore.case = TRUE, value = TRUE), ignore.case = TRUE))
          }
          if(length(grep(pattern = " TO ", x = missing.values.statements)) > 0) {
            statements.with.var.ranges <- grep(pattern = " TO ", missing.values.statements)
            var.ranges <- sapply(X = str_split(string = missing.values.statements[statements.with.var.ranges], pattern = "\\s\\("), function(i) {
              i[1]
            })
            missing.values <- sapply(X = str_split(string = missing.values.statements[statements.with.var.ranges], pattern = "\\s\\("), function(i) {
              paste("(", i[2], sep = "")
            })
            list.of.variables <- lapply(form.statements(var.ranges)[["ranges.var.list"]], function(i) {eval(parse(text = i))})
            formed.var.list.w.miss.values <- lapply(X = Map(c, list.of.variables, missing.values), FUN = function(i) {
              paste(i, collapse = " ")
            })
            missing.values.statements[statements.with.var.ranges] <- formed.var.list.w.miss.values
            missing.values.statements <- unlist(missing.values.statements)
          }
          missing.values.statements <- gsub(pattern = ", ", replacement = ",", x = missing.values.statements)
          missing.values.statements <- gsub(pattern = " THRU ", replacement = "THRU", x = missing.values.statements)
          miss.var.names <- sapply(as.list(missing.values.statements), function(i) {
            word(string = i, start = 1, end = str_count(string = i, pattern = "\\S+") - 1)
          })
          miss.val.lists <- sapply(as.list(missing.values.statements), function(i) {
            list.miss <- word(string = i, start = str_count(string = i, pattern = "\\S+"), end = str_count(string = i, pattern = "\\S+"))
            gsub(pattern = ",", replacement = ", ", x = list.miss)
            gsub(pattern = "THRU", replacement = " THRU ", x = list.miss)
          })
          split.var.names <- str_split(string = miss.var.names, pattern = " ")
          indx.TO.statements <- which(lapply(X = split.var.names, FUN = function(i) {
            "TO" %in% i
          }) == TRUE)
          split.var.names[indx.TO.statements] <- lapply(X = split.var.names[indx.TO.statements], FUN = paste, collapse = " ")
          missing.values.statements <- unlist(mapply(FUN = paste, split.var.names, as.list(miss.val.lists)))
          missing.values.statements <- gsub(pattern = "9997 THRU 9999", replacement = "9997, 9998, 9999", x = missing.values.statements)
          missing.values.statements <- gsub(pattern = "9997 THRU 9999, 996", replacement = "996, 9997, 9998, 9999", x = missing.values.statements)
          missing.values.statements <- gsub(pattern = "96 THRU 99", replacement = "96, 97, 98, 99", x = missing.values.statements)
          missing.values.statements <- gsub(pattern = "\\s+\\)", replacement = ")", x = missing.values.statements)
          missing.values.statements <- gsub(pattern = "\\(\\s+", replacement = "(", x = missing.values.statements)
          indx.end.var.names <- unlist(gregexpr(pattern = " \\(", text = missing.values.statements))
          var.names.missing.statements <- word(string = missing.values.statements, start = 1, end = indx.end.var.names, sep = "")
          var.names.missing.statements <- unlist(strsplit(x = var.names.missing.statements, split = " "))
          num.vars.with.missings.names <- var.names.missing.statements[is.na(match(var.names.missing.statements,vars.with.missings))]
          if(length(num.vars.with.missings.names) == 0) {
            missing.levels.list <- missing.levels.list.factors
          } else {
            num.vars.with.missings.indx <- is.element(el = word(string = missing.values.statements, start = 1, end = indx.end.var.names, sep = ""), set = num.vars.with.missings.names)
            num.vars.with.missings.raw <- missing.values.statements[num.vars.with.missings.indx == TRUE]
            indx.end.var.names.raw <- unlist(gregexpr(pattern = "\\(", text = num.vars.with.missings.raw)) + 1
            indx.end.var.label.statements.raw <- unlist(gregexpr(pattern = "\\)", text = num.vars.with.missings.raw))
            missing.levels.list.numeric <- lapply(X = strsplit(x = word(string = num.vars.with.missings.raw, start = indx.end.var.names.raw, end = indx.end.var.label.statements.raw, sep = ""), split = ",", fixed = TRUE), FUN = function(i) {
              x <- sort(i)
              y <- as.numeric(x)
              return(y)
            })
            names(missing.levels.list.numeric) <- num.vars.with.missings.names
            missing.levels.list <- c(missing.levels.list.factors, missing.levels.list.numeric)
          }
          assign.attributes <- function(data, attr.name, attr.list, to.na = c("yes", "no", "ignore")) {
            if(attr.name == "missings" & to.na == "yes") {
              common.vars <- intersect(names(data), names(attr.list))
              factor.vars.logical <- lapply(data[common.vars], function(i) {class(i) == "factor"})
              numeric.vars.logical <- lapply(data[common.vars], function(i) {class(i) == "numeric"})
              names.factor.vars <- names(factor.vars.logical[factor.vars.logical == TRUE])
              names.numeric.vars <- names(numeric.vars.logical[numeric.vars.logical == TRUE])
              missing.levels.factor <- attr.list[names.factor.vars]
              missing.values.numeric <- attr.list[names.numeric.vars]
              statements.factor.left <- paste(deparse(substitute(data)), "[['", names.factor.vars, "']] <- ", sep = "")
              statements.factor.right0 <- lapply(missing.levels.factor, FUN = function(i) {
                if(length(i) == 1) {
                  paste("c('", i, "')", sep = "")
                } else {
                  paste("c('", paste(i, collapse = "', '"), "')", sep = "")
                }
              })
              statements.factor.right <- paste("unknownToNA(x = ", deparse(substitute(data)), "[['", names(missing.levels.factor), "']], unknown = ", statements.factor.right0, ")", sep = "")
              statements.numeric.left <- paste(deparse(substitute(data)), "[['", names.numeric.vars, "']] <- ", sep = "")
              statements.numeric.right0 <- lapply(missing.values.numeric, FUN = function(i) {
                if(length(i) == 1) {
                  paste("c(", i, ")", sep = "")
                } else {
                  paste("c(", paste(i, collapse = ", "), ")", sep = "")
                }
              })
              statements.numeric.right <- paste("unknownToNA(x = ", deparse(substitute(data)), "[['", names(missing.values.numeric), "']], unknown = ", statements.numeric.right0, ")", sep = "")
              statements.factor <- paste(statements.factor.left, statements.factor.right, sep = "")
              statements.numeric <- paste(statements.numeric.left, statements.numeric.right, sep = "")
              statements <- c(statements.factor, statements.numeric)
              empty <- grep(pattern = "[['']]", x = statements, fixed = TRUE)
              statements <- if(length(empty) == 0) {
                statements
              } else {
                statements[-empty]
              }
            } else if(attr.name == "missings" & to.na == "no") {
              var.names <- names(attr.list)
              statements.left <- paste("attr(", deparse(substitute(data)), "[['", var.names, "']], '", attr.name, "') <- ", sep = "")
              statements.right <- lapply(X = attr.list, FUN = function(i) {
                if(is.character(i) == TRUE) {
                  paste("c('", paste(i, collapse = "' ,'"), "')", sep = "")
                } else if(is.numeric(i) == TRUE) {
                  paste("c(", paste(i, collapse = ", "), ")", sep = "")
                }
              })
              statements <- paste(statements.left, statements.right, sep = "")
            } else if(attr.name == "var.labels" & to.na == "ignore") {
              statements <- paste("attr(tmp[['", names(var.labels), "']], 'variable.label') <- '", var.labels, "'", sep = "")
            }
            return(statements)
          }
          if(missing.to.NA == TRUE) {
            eval(parse(text = assign.attributes(data = tmp, attr.name = "missings", attr.list = missing.levels.list, to.na = "yes")))
            tmp <- droplevels(tmp)
          } else if(missing.to.NA == FALSE) {
            tmp <- droplevels(tmp)
            classes.missing.levels.list <- lapply(missing.levels.list, class)
            fac.vars.missing.levels.list <- names(classes.missing.levels.list[classes.missing.levels.list == "character"])
            missing.strings.fac <- lapply(tmp[fac.vars.missing.levels.list], function(i) {
              grep(pattern = paste(missing.patterns, collapse = "|"), x = levels(i), value = TRUE)
            })
            missing.strings.fac <- missing.strings.fac[lapply(missing.strings.fac, length) != 0]
            num.vars.missing.levels.list <- names(classes.missing.levels.list[classes.missing.levels.list == "numeric"])
            if(length(num.vars.missing.levels.list) > 0) {
              missing.strings.num <- missing.levels.list[num.vars.missing.levels.list]
              unique.values <- lapply(X = tmp[num.vars.missing.levels.list], FUN = function(i) (
                unique(i)
              ))
              which.missing.are.present <- function(list.of.missings, unique.data.values) {
                list.of.missings[which(list.of.missings %in% unique.data.values)]
              }
              presence.missing.strings.num <- mapply(FUN = which.missing.are.present, list.of.missings = missing.strings.num, unique.data.values = unique.values, SIMPLIFY = FALSE)
              indx.presence <- sapply(presence.missing.strings.num, function(i) {
                length(i) > 0
              })
              missing.strings.num <- presence.missing.strings.num[indx.presence]
              missing.levels.list.new <- c(missing.strings.fac, missing.strings.num)
            } else {
              missing.levels.list.new <- missing.strings.fac
            }
            eval(parse(text = assign.attributes(data = tmp, attr.name = "missings", attr.list = missing.levels.list.new, to.na = "no")))
            no.missing.attr <- lapply(tmp, function(i) {
              is.null(attr(i, "missings"))
            })
            no.missing.attr <- names(which(no.missing.attr == TRUE))
            missing.attr.as.NA <- as.list(rep(NA, times = length(no.missing.attr)))
            names(missing.attr.as.NA) <- no.missing.attr
            NA.miss.assign.statements <- assign.attributes(data = tmp, attr.name = "missings", attr.list = missing.attr.as.NA, to.na = "no")
            NA.miss.assign.statements <- gsub(pattern = "NULL", replacement = "NA", x = NA.miss.assign.statements)
            eval(parse(text = NA.miss.assign.statements))
          }
          var.labels.start <- grep(pattern = "var[[:alpha:]]* lab[[:alpha:]]*", x = spss.syntax, ignore.case = TRUE) + 1
          var.labels.end <- var.labels.start + (length(var.names) - 1)
          tmp.var.labels <- spss.syntax[var.labels.start:var.labels.end]
          periods.objects <- which(tmp.var.labels == ".")
          if(length(periods.objects) > 0) {
            tmp.var.labels <- tmp.var.labels[-which(tmp.var.labels == ".")]
          }
          tmp.var.labels.names <- toupper(word(tmp.var.labels, start = 1, end = 1))
          tmp.var.labels.content <- gsub(pattern = "^\\w*\\s*", replacement = "", x = tmp.var.labels)
          var.labels <- as.list(structure(.Data = tmp.var.labels.content, names = tmp.var.labels.names))
          var.labels <- lapply(X = var.labels, function(i) {
            gsub(pattern = "[[:space:]]\\.$|\\.$", replacement = "", x = i)
          })
          eval(parse(text = assign.attributes(data = tmp, attr.name = "var.labels", attr.list = var.labels, to.na = "ignore")))
          attr(x = tmp, which = "study") <- "PISA"
          if(txt.file.name %in% c("intcogn_v4", "intscho", "intstud_math_v3", "intstud_read_v3", "intstud_scie_v3")) {
            attr(x = tmp, which = "cycle") <- 2000
          } else if(txt.file.name %in% c("INT_cogn_2003_v2", "INT_schi_2003", "INT_stui_2003_v2")) {
            attr(x = tmp, which = "cycle") <- 2003
          } else if(txt.file.name %in% c("INT_Cogn06_S_Dec07", "INT_Cogn06_T_Dec07", "INT_Par06_Dec07", "INT_Sch06_Dec07", "INT_Stu06_Dec07")) {
            attr(x = tmp, which = "cycle") <- 2006
          } else if(txt.file.name %in% c("INT_COG09_S_DEC11", "INT_COG09_TD_DEC11", "INT_PAR09_DEC11", "INT_SCQ09_Dec11", "INT_STQ09_DEC11")) {
            attr(x = tmp, which = "cycle") <- 2009
          } else if(txt.file.name %in% c("INT_COG12_DEC03", "INT_COG12_S_DEC03", "INT_PAQ12_DEC03", "INT_SCQ12_DEC03", "INT_STU12_DEC03")) {
            attr(x = tmp, which = "cycle") <- 2012
          }
          if(txt.file.name %in% c("INT_STU12_DEC03", "INT_STQ09_DEC11", "INT_Stu06_Dec07", "INT_Par06_Dec07", "INT_stui_2003_v2", "intstud_math_v3", "intstud_read_v3", "intstud_scie_v3")) {
            attr(x = tmp, which = "file.type") <- "std.bckg"
          } else if(txt.file.name %in% c("INT_COG12_DEC03", "INT_COG09_DEC10", "INT_Cogn06_T_Dec07", "intcogn_v4")){
            attr(x = tmp, which = "file.type") <- "std.cogn"
          } else if(txt.file.name %in% c("INT_COG12_S_DEC03", "INT_COG09_Scored_DEC10", "INT_Cogn06_S_Dec07", "INT_cogn_2003_v2")) {
            attr(x = tmp, which = "file.type") <- "std.cogn.scr"
          } else if(txt.file.name %in% c("INT_PAQ12_DEC03", "INT_PAQ09_DEC10")) {
            attr(x = tmp, which = "file.type") <- "std.par.bckg"
          } else if(txt.file.name %in% c("INT_SCQ12_DEC03", "INT_SCQ09_Dec10", "INT_Sch06_Dec07", "INT_schi_2003", "intscho")) {
            attr(x = tmp, which = "file.type") <- "sch.bckg"
          }
          setDT(x = tmp, key = "CNT")
          attr(x = tmp, which = "class") <- c("lsa.data", attr(x = tmp, which = "class"))
          assign(x = tolower(new.file), value = tmp)
          tmp <- NULL
          full.out.file.path <- file.path(out.folder, paste(txt.file.name, "RData", sep = "."))
          save(list = tolower(new.file), file = out.file, compress = FALSE)
          message("       (", count.success, "/", length(txt.files), ")  ", str_pad(string = sub(pattern = "\\.sps", replacement = ".txt", x = basename(inp.file), ignore.case = TRUE), width = 40, side = "right"), " converted in ", format(as.POSIXct("0001-01-01 00:00:00") + {proc.time() - ptm}[[3]] - 1, "%H:%M:%OS3"))
        },
        error = function(e) {
          message("")
        })
      }
      ptm <- proc.time()
      suppressWarnings(
        invisible(mapply(FUN = convert.PISApre15, inp.file = full.inp.file.path, out.file = full.out.file.path, new.file = root.file.names))
      )
      if(length(sps.files) > 0 & length(txt.files) > 0) {
        message("\n All ", count.success, " found files successfully converted in ", format(as.POSIXct("0001-01-01 00:00:00") + {proc.time() - ptm}[[3]] - 1, "%H:%M:%OS3"), "\n\n")
      }
      if(length(sps.files) > length(txt.files)) {
        message("\nWarning:\nSome of the control (\".sps\") files have no matching data (\".txt\") file to convert. These files are skipped.\nCheck the content of the input folder.\n\n")
      }
      if(length(txt.files) > length(sps.files)) {
        message("\nWarning:\nSome of the data (\".txt\") files to convert have no matching control (\".sps\") files. These files were skipped.\nCheck the content of the input folder.\n\n")
      }
    }
  }, interrupt = function(f) {
    message("\n\nInterrupted by the user. Not all files have been converted.")
  },
  error = function(e) {
    message("")
  })
}

Try the RALSA package in your browser

Any scripts or data that you put into this service are public.

RALSA documentation built on April 4, 2025, 2:39 a.m.