R/getCharColDescription.R

getCharColDescription <- function(data){
  columns <- colnames(data)
  character_columns <- columns[sapply(data, class) == 'character']
  ncols_character <- length(character_columns)
  characterData <- as.data.frame(data[, character_columns])
  characterDataDescription <- data.frame(matrix(NA, ncol=4, nrow=ncols_character))
  colnames(characterDataDescription) <- c('Feature','MissingValuesCount','UniqueValuesCount','MostFrequentValue')
  characterDataDescription$Feature <- character_columns
  characterDataDescription$MissingValuesCount <- colSums(is.na(characterData))
  characterDataDescription$MostFrequentValue <- apply(characterData, 2, function(colData){
    ux <- unique(colData)
    tab <- tabulate(match(colData, ux))
    ux[tab == max(tab)]
  })
  characterDataDescription$UniqueValuesCount <- apply(characterData, 2, function(colData){
    length(unique(colData))
  })
  return(characterDataDescription)
}
nkdhruw/XploreR documentation built on June 24, 2019, 12:44 a.m.