R/impute.R

Defines functions impute

# preprocessr
#
# Function: impute()
#
# Description: A function to impute missing data in a data frame, matrix
# or vector
#
# Author: Peter Xenopoulos
#
#

impute <- function( data , method = "remove" , opts ) {

  # Method to remove values if NA
  if( method == "remove" ) {
    # If the data input isn't a vector, deal with it as a DF or matrix
    if( is.numeric(data) == FALSE )
      return( data[ complete.cases(data), ] )
    else
      return( data[ complete.cases(data) ] )
  }

  # Method to replace NAs with the mean value
  if( method == "mean" ) {
    # If data is matrix or DF
    if( is.numeric(data) == FALSE ) {
      for( i in 1:ncol(data) ) {
        indices.na <- which( is.na(data[,i]) )
        avg = mean( data[,i] , na.rm = TRUE )
          for( j in 1:length(indices.na) )
            data[indices.na[j],i] = avg
      }
    }

    # If data is a vector
    else {
      indices.na <- which( is.na(data) )
      avg = mean( data , na.rm = TRUE)
      data[indices.na] = avg
    }

    # Return
    return(data)
  }

  # Method to replace NAs with the mean value
  if( method == "median" ) {
    # If data is matrix or DF
    if( is.numeric(data) == FALSE ) {
      for( i in 1:ncol(data) ) {
        indices.na <- which( is.na(data[,i]) )
        med = median( data[,i] , na.rm = TRUE)
        for( j in 1:length(indices.na) )
          data[indices.na[j],i] = med
      }
    }

    # If data is a vector
    else {
      indices.na <- which( is.na(data) )
      med = median( data , na.rm = TRUE)
      data[indices.na] = med
    }

    # Return
    return(data)
  }

  # Method to replace NAs with a given value
  if( method == "replace" ) {
    # If data is matrix or DF
    if( is.numeric(data) == FALSE ) {
      for( i in 1:ncol(data) ) {
        indices.na <- which( is.na(data[,i]) )
        for( j in 1:length(indices.na) )
          data[indices.na[j],i] = opts
      }
    }

    # If data is a vector
    else {
      indices.na <- which( is.na(data) )
      data[indices.na] = opts
    }

    # Return
    return(data)
  }

  # Method to use principal component regression



}
peterxeno/preprocessr documentation built on May 25, 2019, 2:10 a.m.