R/mutual_info.R

Defines functions mutual_info encode_values

Documented in encode_values mutual_info

encode_values <- function(values){
  #' Perform a label encoding on a categorical varaible.
  #' 
  #' @param values Vector of values to be encoded.
  #' 
  #' @return Vector of integer values 
  as.numeric(as.factor(values))
}

mutual_info <- function(data){
  #' Calculate mutual information between pairs of variables in a data frame
  #' 
  #' @param data Data frame (n by k) of observations
  #' 
  #' @return matrix (k by k) of mutual information.  Diagonal represents total information a varaible has.
  copied <- data
  numeric_columns <- sapply(copied, is.numeric)
  factor_columns <- colnames(copied[!numeric_columns])

  for (name in factor_columns){
    copied[,name] <- encode_values(copied[,name])
  }
  
  disc.df <- infotheo::discretize(copied)
  K <- dim(copied)[2]

  info <- sapply(
    1:K, 
    function(i) sapply(
      1:K, 
      function(j) infotheo::multiinformation(disc.df[,c(i,j)])
    )
  )
  
  rownames(info) <- colnames(copied)
  colnames(info) <- colnames(copied)
  
  info
}
Stat-Cook/NURS.Data.Quality documentation built on Dec. 18, 2021, 2:09 p.m.