R/methods_class_dataObj.r

########################################
### methods only for class 'dataObj' ###
########################################
#' @aliases get.dataObj,dataObj,character-method
#' @rdname get.dataObj-method
setMethod(f="get.dataObj", signature=c("dataObj", "character"),
  definition=function(object, type) {
    if ( !type %in% c("rawData", "dimVarInd", "freqVarInd",
        "numVarInd", "weightVarInd", "sampWeightInd",
        "isMicroData", "numVarNames", "freqVarName", "varName") ) {
      stop("get.dataObj:: argument 'type' is not valid!\n")
    }
    if ( type == "rawData" ) {
      return(g_raw_data(object))
    }
    if ( type == "dimVarInd" ) {
      return(g_dimvar_ind(object))
    }
    if ( type == "freqVarInd" ) {
      return(g_freqvar_ind(object))
    }
    if ( type == "numVarInd" ) {
      return(g_numvar_ind(object))
    }
    if ( type == "weightVarInd" ) {
      return(g_weightvar_ind(object))
    }
    if ( type == "sampWeightInd" ) {
      return(g_sampweight_ind(object))
    }
    if ( type == "isMicroData" ) {
      return(g_is_microdata(object))
    }
    if ( type == 'numVarNames' ) {
      return(names(g_raw_data(object))[g_numvar_ind(object)])
    }
    if ( type == 'freqVarName' ) {
      return(names(g_raw_data(object))[g_freqvar_ind(object)])
    }
    if ( type == 'varName' ) {
      return(names(g_raw_data(object))[g_dimvar_ind(object)])
    }
  }
)

#' @aliases set.dataObj,dataObj,character,listOrNULL-method
#' @rdname set.dataObj-method
setMethod(f='set.dataObj', signature=c("dataObj", "character", "listOrNULL"),
  definition=function(object, type, input) {
    if ( !type %in% c("rawData") ) {
      stop("set.dataObj:: check argument 'type'!\n")
    }
    if ( type == "rawData" ) {
      s_raw_data(object) <- input[[1]]
    }
    validObject(object)
    return(object)
  }
)

#' @aliases init.dataObj,list-method
#' @rdname init.dataObj-method
setMethod(f = "init.dataObj", signature = c("list"), 
  definition=function(input) {
    freq <- N <- .N <- NULL
    inputData <- input$inputData
    dimVarInd <- input$dimVarInd
    freqVarInd <- input$freqVarInd
    numVarInd <- input$numVarInd
    weightInd <- input$weightInd
    sampWeightInd <- input$sampWeightInd
    
    if (is.null(sampWeightInd)) {
      inputData$tmpsamplingweights <- 1
      sampWeightInd <- ncol(inputData)
    }
    
    # w --> costs for csp
    wExists <- FALSE
    if (!is.null(weightInd)) {
      if (is.null(freqVarInd)) {
        wExists <- ifelse(weightInd %in% numVarInd, TRUE, FALSE)
      } else {
        wExists <- ifelse(weightInd == freqVarInd | weightInd %in% numVarInd, TRUE, FALSE)
      }
    }
    isMicroData <- FALSE

    ## aggregate data, use data.table
    datO <- data.table(inputData, key = colnames(inputData)[dimVarInd])
    rawData <- datO[, list(N = .N), by = key(datO)]
    N <- rawData$N
    rawData$N <- NULL

    if (any(N != 1)) {
      isMicroData <- TRUE
      rawData <- copy(datO)
      rawData <- rawData[, key(rawData), with = FALSE]
      
      if (is.null(freqVarInd)) {
        set(rawData, NULL, "freq", 1)
        freqVarInd <- which(colnames(rawData) == "freq")
      } else {
        f <- datO[, get(colnames(datO)[freqVarInd]), ]
        set(rawData, NULL, "freq", as.numeric(f))
      }
    } else {
      # data already aggregated
      if (is.null(freqVarInd)) {
        set(rawData, NULL, "freq", as.numeric(N))
        freqVarInd <- which(colnames(rawData) == "freq")
      } else {
        set(rawData, NULL, "freq", as.numeric(datO[[freqVarInd]]))
      }
    }
    freqVarInd <- which(colnames(rawData) == "freq")
    dimVarInd <- match(key(rawData), colnames(rawData))
    
    if (!isMicroData) {
      if (!is.null(input$sampWeightInd))
      message("a complete table is used as input: --> sampling weights are ignored.")
      datO[[sampWeightInd]] <- 1
    }

    # multiply frequencies by using sampling weights
    sw <- datO[[sampWeightInd]]
    set(rawData, NULL, colnames(datO)[sampWeightInd], sw)
    sampWeightInd <- ncol(rawData)
    set(rawData, NULL, "freq", as.numeric(sw * rawData$freq))
  
    ## numvars
    nr_keys <- length(key(datO))
    if (!is.null(numVarInd)) {
      c_start <- ncol(rawData)
      cols <- colnames(datO)[numVarInd]

      for (j in 1:length(cols)) {
        set(rawData, NULL, cols[j], as.numeric(sw * datO[[cols[j]]]))
      }
      
      #if (isMicroData) {
      #  for (j in 1:length(cols)) {
      #    set(rawData, NULL, cols[j], as.numeric(sw * datO[[cols[j]]]))
      #  }
      #} else {
      #  browser()
      #  xx <- datO[, lapply(.SD, sum), by = key(datO), .SDcols = cols]
      #  rawData <- merge(rawData, xx)
      #}
      numVarInd <- (c_start + 1):ncol(rawData)
    }
    
    ## weight var
    if (!is.null(weightInd)) {
      if (isMicroData == TRUE) {
        w <- datO[, get(colnames(datO)[weightInd])]
      } else {
        w <-
          datO[, list(w = sum(get(colnames(datO)[weightInd]))), by = key(datO)]$w
      }
      # we set this column only, if weightInd does not already exist in rawData
      # this could be the case, if a variable also used as numVar is specified 
      # as weightVar
      if (!wExists) {
        set(rawData, NULL, colnames(datO)[weightInd], as.numeric(w))
        weightInd <- ncol(rawData)
      } else {
        weightInd <- match(colnames(datO)[weightInd], names(rawData))
        if (is.na(weightInd)) {
          weightInd <- match("freq", names(rawData))
        }
      }
    }

    ## do not use factors
    cols <- colnames(rawData)[dimVarInd]
    rawData[,cols] <- rawData[, lapply(.SD, as.character), .SDcols=cols]
    rm(datO)

    setkeyv(rawData, colnames(rawData)[dimVarInd])

    out <- new("dataObj",
      rawData=rawData,
      dimVarInd=dimVarInd,
      freqVarInd=freqVarInd,
      numVarInd=numVarInd,
      weightVarInd=weightInd,
      sampWeightInd=sampWeightInd,
      isMicroData=isMicroData
    )
    return(out)
  }
)

setMethod(f="g_raw_data", signature=c("dataObj"), definition=function(object) {
  return(object@rawData)
})

setMethod(f="g_dimvar_ind", signature=c("dataObj"), definition=function(object) {
  return(object@dimVarInd)
})

setMethod(f="g_freqvar_ind", signature=c("dataObj"), definition=function(object) {
  return(object@freqVarInd)
})

setMethod(f="g_numvar_ind", signature=c("dataObj"), definition=function(object) {
  return(object@numVarInd)
})

setMethod(f="g_weightvar_ind", signature=c("dataObj"), definition=function(object) {
  return(object@weightVarInd)
})

setMethod(f="g_sampweight_ind", signature=c("dataObj"), definition=function(object) {
  return(object@sampWeightInd)
})

setMethod(f="g_is_microdata", signature=c("dataObj"), definition=function(object) {
  return(object@isMicroData)
})

setMethod(f="g_numvar_names", signature=c("dataObj"), definition=function(object) {
  return(names(g_raw_data(object))[g_numvar_ind(object)])
})

setMethod(f="g_freqvar_name", signature=c("dataObj"), definition=function(object) {
  return(names(g_raw_data(object))[g_freqvar_ind(object)])
})

setMethod(f="g_var_name", signature=c("dataObj"), definition=function(object) {
  return(names(g_raw_data(object))[g_dimvar_ind(object)])
})

setReplaceMethod(f="s_raw_data", signature=c("dataObj", "list"), definition=function(object, value) {
  object@rawData <- value[[1]]
  return(object)
})

Try the sdcTable package in your browser

Any scripts or data that you put into this service are public.

sdcTable documentation built on Aug. 11, 2023, 9:06 a.m.