# preprocessr
#
# Function: impute()
#
# Description: A function to impute missing data in a data frame, matrix
# or vector
#
# Author: Peter Xenopoulos
#
#
impute <- function( data , method = "remove" , opts ) {
# Method to remove values if NA
if( method == "remove" ) {
# If the data input isn't a vector, deal with it as a DF or matrix
if( is.numeric(data) == FALSE )
return( data[ complete.cases(data), ] )
else
return( data[ complete.cases(data) ] )
}
# Method to replace NAs with the mean value
if( method == "mean" ) {
# If data is matrix or DF
if( is.numeric(data) == FALSE ) {
for( i in 1:ncol(data) ) {
indices.na <- which( is.na(data[,i]) )
avg = mean( data[,i] , na.rm = TRUE )
for( j in 1:length(indices.na) )
data[indices.na[j],i] = avg
}
}
# If data is a vector
else {
indices.na <- which( is.na(data) )
avg = mean( data , na.rm = TRUE)
data[indices.na] = avg
}
# Return
return(data)
}
# Method to replace NAs with the mean value
if( method == "median" ) {
# If data is matrix or DF
if( is.numeric(data) == FALSE ) {
for( i in 1:ncol(data) ) {
indices.na <- which( is.na(data[,i]) )
med = median( data[,i] , na.rm = TRUE)
for( j in 1:length(indices.na) )
data[indices.na[j],i] = med
}
}
# If data is a vector
else {
indices.na <- which( is.na(data) )
med = median( data , na.rm = TRUE)
data[indices.na] = med
}
# Return
return(data)
}
# Method to replace NAs with a given value
if( method == "replace" ) {
# If data is matrix or DF
if( is.numeric(data) == FALSE ) {
for( i in 1:ncol(data) ) {
indices.na <- which( is.na(data[,i]) )
for( j in 1:length(indices.na) )
data[indices.na[j],i] = opts
}
}
# If data is a vector
else {
indices.na <- which( is.na(data) )
data[indices.na] = opts
}
# Return
return(data)
}
# Method to use principal component regression
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.