#' @title S4 class for the standard data.frame for a fixed-width file schema.
#'
#' @description Definition of the S4 class named \code{StfwfSchema} for the data.frame providing the
#' schema of the fixed-width file to read.
#'
#'
#' @slot df data.frame with 5 columns:
#' \itemize{
#'
#' \item variable: the name of the variable.
#' \item width: the number of positions which the values of this variable occupies in the file.
#' \item initialPos: initial position of the field whic hthe values of this variable occupies in
#' the file.
#' \item finalPos: final position of the field whic hthe values of this variable occupies in the
#' file.
#' \item type: type of the variable. It must be either \code{num} or \code{char}.
#' \item valueRegEx: regular expression for the values of this variable.
#' \item description: textual description of the variable.
#'
#' }
#'
#'
#' @examples
#' # An empty StfwfSchema object:
#' new(Class = 'StfwfSchema')
#'
#' # A trivial example:
#' df <- data.frame(variable = c('Turnover', 'Employees'),
#' width = c(9L, 3L),
#' initialPos = c(1, 10),
#' finalPos = c(9, 12),
#' type = rep('num', 2),
#' valueRegEx = c('[0-9]{0,9}', '[0-9]{0,3}'),
#' description = c('Turnover of the business unit',
#' 'Number of employees of the business unit'),
#' stringsAsFactors = FALSE)
#' new(Class = 'StfwfSchema', df = df)
#'
#' @importFrom qdapRegex is.regex
#'
#' @export
setClass(Class = "StfwfSchema",
representation = list(df = 'data.frame'),
prototype = list(df = data.frame(variable = character(0),
width = integer(0),
initialPos = integer(0),
finalPos = integer(0),
type = character(0),
valueRegEx = character(0),
description = character(0))),
validity = function(object){
df <- slot(object, 'df')
if (dim(df)[1] == 0) {
warning('[StfwfSchema:: validity StfwfSchema] The schema data.frame has 0 rows.\n')
}
# Column types
colClasses <- lapply(df, class)
if (colClasses$variable != 'character') {
stop('[StfwfSchema:: validity StfwfSchema] The class of column variable must be character.\n')
}
if (!colClasses$width %in% c('numeric', 'integer')) {
stop('[StfwfSchema:: validity StfwfSchema] The class of column width must be numeric or integer.\n')
}
if (!colClasses$initialPos %in% c('numeric', 'integer')) {
stop('[StfwfSchema:: validity StfwfSchema] The class of column initialPos must be numeric or integer.\n')
}
if (!colClasses$finalPos %in% c('numeric', 'integer')) {
stop('[StfwfSchema:: validity StfwfSchema] The class of column finalPos must be numeric or integer.\n')
}
if (colClasses$type != 'character') {
stop('[StfwfSchema:: validity StfwfSchema] The class of column type must be character.\n')
}
if (colClasses$valueRegEx != 'character') {
stop('[StfwfSchema:: validity StfwfSchema] The class of column valueRegEx must be character.\n')
}
if (colClasses$description != 'character') {
stop('[StfwfSchema:: validity StfwfSchema] The class of column description must be character.\n')
}
# Column names
if (!all(names(df) ==
c('variable', 'width', 'initialPos', 'finalPos',
'type', 'valueRegEx', 'description'))) {
stop('[StfwfSchema:: validity StfwfSchema] The schema data.frame has wrong column names. (Check also the order). \n')
}
# No duplicated variable names
variables <- df$variable
dupVars <- variables[duplicated(variables)]
if (length(dupVars) > 0){
stop(paste0('[StfwfSchema:: validity StfwfSchema] The following variables are duplicated: ',
paste0(dupVars, collapse = ', '), '.\n'))
}
# finPos >= iniPos
diffPos <- df$finalPos - df$initialPos
errorDiffPos <- df$variable[diffPos < 0]
if (length(errorDiffPos) > 0) {
stop(paste0('[StfwfSchema:: validity StfwfSchema] The following variables have incoherent positions:', paste0(errorDiffPos, collapse = ' , '), '.\n'))
}
# width = finPos - iniPos + 1
diff <- df$finalPos - df$initialPos + 1
errorwidthVar <- df$variable[diff != df$width]
if (length(errorwidthVar) > 0) {
stop(paste0('[StfwfSchema:: validity StfwfSchema] The following variables have incoherent widths and positions:', paste0(errorwidthVar, collapse = ' , '), '.\n'))
}
# sum(width) = finPos[final] - iniPos[initial] + 1
if (df$finalPos[dim(df)[1]] - df$initialPos[1] + 1 > sum(df$width)) {
warning('[StfwfSchema:: validity StfwfSchema] The sum of widths is not equal to the number of positions. There seems to be blank spaces in the schema.\n')
}
if (df$finalPos[dim(df)[1]] - df$initialPos[1] + 1 < sum(df$width)) {
stop('[StfwfSchema:: validity StfwfSchema] The sum of widths is not equal to the number of positions. There seems to be overlapping positions.\n')
}
# type num or char
invalidTypes <- which(!df$type %in% c('num', 'char', 'log'))
invalidTypes <- df$variable[invalidTypes]
if (length(invalidTypes) > 0) {
stop(paste0('[StfwfSchema:: validity StfwfSchema] Types must be char, num or log. The type of the following variables are invalid:\n',
paste0(invalidTypes, collapse = ', '), '.\n'))
}
# ascending order (by initialPos)
variables_origOrder <- df$variable
variables_ascOrder <- df$variable[order(df$initialPos)]
dif <- (variables_origOrder != variables_ascOrder)
if (sum(dif) > 0){
stop(paste0('[StfwfSchema:: validity StfwfSchema] the following variables are not in the correct order (ascending initial position): ', paste0(variables_origOrder[dif], collapse = ', ')))
}
# Is regex?
notValidRegex <- qdapRegex::is.regex(df$valueRegEx)
notRegExVar <- df$variable[!notValidRegex]
if (!all()) {
stop(paste0('[StfwfSchema:: validity StfwfSchema] The following variables have invalid regex:', paste0(notRegExVar, collapse = ' , '), '.\n'))
}
return(TRUE)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.