R/uniquePermutations.R

Defines functions uniquePermutations

Documented in uniquePermutations

#' Compute all possiable property types given input meta-data in integer format
#' 
#' This function accepts a data frame where columns are filled with integers.
#' The order of these integers in each row matters (hence are conisdered permutations).
#' It returns all possiable permutations that can be achieved given the unique counts in each column of the data frame. 
#' 
#' Please note: (1) NAs are NOT considered in the computation, (2) the retived list of permutations may contains permutation instances that do not exists in input data frame, (3) the output list of integer vectors may not be sorted.
#' 
#' 
#' @param df Data frame of property meta-data in integer format (e.g. bathroom and bedroom counts). NAs are allowed.
#' @return List of all possiable permutations.  
#' @export 

uniquePermutations = function(df){
  # n - the "things" to choose from, from each column (e.g. counts of bedrooms, bathrooms, livingrooms)
  # in order for the permutations algorithm to work unique "things" are amalgamated into one bag
  
  ## Step 1 : creating a generic bag from which to choose "things" using permutation with repetition algorithm
  n = c()
  for(i in 1:ncol(df)){
    n = c(n,df[,i])
  }
  n = as.integer(unique(n))
  n = n[!is.na(n)]

  ## Step 2 : creating all possiable integer permutations 
  perm = permutations(n=length(n),r = ncol(df),repeats.allowed = T)

  ## Step 3 : converting permutations into property class // class is defined based on integer permutation
  perm = t(apply(perm,MARGIN = 1, FUN = function(x){
    for(i in 1:length(x)){
      index_of_n = as.integer(x[i])
      x[i] = as.integer(n[index_of_n])
    }
    return(x)
  }))

  ## Step 4: removing irrelevant rows
  # as we have amalgamated all "things" into one bag some of the geneated permutations are not reasonable as different columns may have different things
  # e.g. if '2' is not present in the second column we cannot have a permutation where 2 is the second element
  
  unique_per_col = list()
  for(i in 1:ncol(df)){
    unique_per_col_i = as.integer(unique(df[,i]))
    unique_per_col = lappend(unique_per_col, unique_per_col_i)
  }
  rows_to_remove = c()
  for(c in 1:ncol(perm)){
    for(r in 1:nrow(perm)){
      case = !any(perm[r,c] == unique_per_col[[c]],na.rm = T)
      if(is.na(perm[r,c]) & anyNA(unique_per_col[[c]])){
        case = F
      }
      if(case){
        rows_to_remove = c(rows_to_remove,r)
      }
    }
  }
  rows_to_remove = unique(rows_to_remove)
  perm = perm[-rows_to_remove,]
  
  ## Step 5: returning final output
  o = list()
  for(i in 1:nrow(perm)){
    o = lappend(o,perm[i,])
  }
  return(o)
}
msxakk89/dat documentation built on Aug. 3, 2020, 6:39 p.m.