#' R6 class representing an information table.
#'
#' @description
#' An information table consists of the decision table and meta-data.
#'
#' @details
#' This class stores the decision table, and meta-data.
#' The decision table consists of an object identifier column, a decision column, and at least one additional attribute.
#' The meta-data consist of the attribute names, their types, and the alpha and beta values for similarity attributes.
#'
#' @export
InformationTable <- R6::R6Class(
classname = "InformationTable",
public = list(
#' @field decisionTable the set of examples
decisionTable = data.frame(),
#' @field metaData meta-data of the attributes, including their name and type, along with alpha and beta parameters for similarity variables
metaData = data.frame(),
### Derived fields ###
#' @field objects vector of object names
objects = NA,
#' @description
#' Create a new information table object.
#' @param decisionTable data frame containing the decision examples
#' @param metaData data frame containing the meta-data of the attributes. This parameter is optional, and if not provided, we assume all dominance attributes.
initialize = function(decisionTable, metaData = NA) {
# ERROR-CHECKS on the decision table:
stopifnot('data.frame' %in% class(decisionTable))
stopifnot(ncol(decisionTable) >= 3) # at least one attribute apart from object and decision
# ERROR-CHECKS on the meta-data:
if (all(is.na(metaData))) {
# if meta-data not provided, then set the types as follows:
# - first attribute to object,
# - last attribute to decision,
# - all other attributes to dominance
attributeCount = ncol(decisionTable)
metaData = data.frame(
name = names(decisionTable),
type = c('object', rep('dominance', attributeCount - 2), 'decision'),
alpha = rep(NA_real_, attributeCount),
beta = rep(NA_real_, attributeCount),
stringsAsFactors = FALSE
)
# Set "misc" type to any column with NAs:
metaData$type[map_lgl(decisionTable, ~ any(is.na(.)))] = 'misc'
}
stopifnot('data.frame' %in% class(metaData))
stopifnot(setequal(c('name', 'type', 'alpha', 'beta'), names(metaData))) # it should contain exactly the name, type, alpha and beta columns
stopifnot(setequal(names(decisionTable), metaData$name)) # need meta-data for all columns of the decision table
stopifnot(all(names(decisionTable) == metaData$name)) # need meta-data and columns of the decision table to be in the same order
metaData$type = factor(metaData$type,
levels = c('indiscernibility', 'similarity', 'dominance', 'misc', 'object', 'decision'),
ordered = FALSE)
objectColumn = which(metaData$type == 'object', arr.ind = TRUE)
stopifnot(length(objectColumn) == 1) # we expect exactly one object column
objects = decisionTable[[objectColumn]]
stopifnot(length(unique(objects)) == length(objects))
self$objects = objects
decisionColumn = which(metaData$type == 'decision', arr.ind = TRUE)
stopifnot(length(decisionColumn) == 1) # we expect exactly one decision column
if (!'factor' %in% class(decisionTable[[decisionColumn]])) {
decisionTable[[decisionColumn]] = factor(decisionTable[[decisionColumn]], ordered = T)
}
self$decisionTable = decisionTable
stopifnot(metaData %>%
filter(type == 'similarity') %>%
filter(is.na(alpha) | is.na(beta)) %>%
nrow() == 0
) # all similarity variables need the alpha and beta parameters provided
isFactor = map_lgl(decisionTable, ~ 'factor' %in% class(.))
stopifnot(all(!isFactor[metaData$type == 'similarity'])) # similarity variables cannot be factors
self$metaData = metaData
},
#' @description
#' Method to determine whether another information table is compatible with this one.
#' @param it the information table to compare to
isCompatible = function(it) {
return(class(it) == 'InformationTable' &&
it$metaData == self$metaData)
},
#' @description
#' Method to get the type of an attribute.
#' @param attribute the name of the attribute
#' @return the type
getType = function(attribute) {
return(self$metaData$type[self$metaData$name == attribute])
},
#' @description
#' Method for creating a new information table by removing examples of belonging to the specified objects.
#' @param objects the object to filter out
#' @return a new information table instance
removeObjects = function(objects) {
rowsToRemove = self$objects %in% objects
decisionTableFiltered = self$decisionTable[!rowsToRemove, ]
return(InformationTable$new(decisionTable = decisionTableFiltered, metaData = self$metaData))
},
#' @description
#' Method for calculating the downward class union.
#' @param class the decision class to compare to
#' @return the set of objects in the downward class union
downwardClassUnion = function(class) {
objectColumn = which(self$metaData$type == 'object', arr.ind = TRUE)
decisionColumn = which(self$metaData$type == 'decision', arr.ind = TRUE)
self$decisionTable[, c(objectColumn, decisionColumn)] %>%
filter(.[[2]] <= class) %>%
pull(1)
},
#' @description
#' Method for calculating the upward class union.
#' @param class the decision class to compare to
#' @return the set of objects in the upward class union
upwardClassUnion = function(class) {
objectColumn = which(self$metaData$type == 'object', arr.ind = TRUE)
decisionColumn = which(self$metaData$type == 'decision', arr.ind = TRUE)
self$decisionTable[, c(objectColumn, decisionColumn)] %>%
filter(.[[2]] >= class) %>%
pull(1)
},
#' @description
#' Method for encoding the decision column to the 1:N range, where N is the cardinality of this column.
#' @return the encoded decision column
encodeDecisionColumn = function() {
decisionColumn = which(self$metaData$type == 'decision', arr.ind = TRUE)
decisions = self$decisionTable[[decisionColumn]]
decisionCard = length(unique(decisions))
decisionIDs = 1:decisionCard
decisions = factor(decisions, labels = decisionIDs, levels = sort(unique(decisions)), ordered = TRUE)
return(decisions)
},
#' @description
#' Method for decoding 1:N-encoded decisions.
#' @param encoded a vector of encoded decisions
#' @return a vector with the decoded decisions
decodeDecisions = function(encoded) {
decisionColumnIndex = which(self$metaData$type == 'decision', arr.ind = TRUE)
decisionColumn = self$decisionTable[[decisionColumnIndex]]
uniqueDecisions = sort(unique(decisionColumn))
decoded = uniqueDecisions[encoded]
return(decoded)
},
#' @description
#' Method for calculating all downward- and upward class unions at once.
#' @return a pair of matrices for both class unions, where each row represents a class, and each column represents an object
classUnions = function() {
decisions = self$encodeDecisionColumn()
decisionCard = length(unique(decisions))
decisionIDs = 1:decisionCard
objectCount = length(self$objects)
upwardClassUnions = matrix(nrow = decisionCard, ncol = objectCount)
downwardClassUnions = upwardClassUnions
for (objectID in 1:objectCount) {
upwardClassUnions[decisionIDs, objectID] = decisions[objectID] >= decisionIDs
downwardClassUnions[decisionIDs, objectID] = decisions[objectID] <= decisionIDs
}
return(list(upward = upwardClassUnions, downward = downwardClassUnions))
},
#' @description
#' Method that partitions attribute set P into into sets of the same attribute type.
#' Only types relevant for the dominance relation are considered (indiscernibility, similarity, and dominance).
#' @param P the set of attributes to partition - vector of attribute names
#' @return a list of attribute sets
partitionAttributes = function(P) {
types = self$metaData %>%
filter(name %in% P) %>%
select(name, type)
P_ind = types %>% filter(type == "indiscernibility") %>% pull(name)
P_sim = types %>% filter(type == "similarity") %>% pull(name)
P_dom = types %>% filter(type == "dominance") %>% pull(name)
return(list(ind = P_ind, sim = P_sim, dom = P_dom))
},
#' Function to determine whether x dominates y on the mixed attribute set P.
#' @param x the left operand - object name
#' @param y the right operand - object name
#' @param P the set of attributes to test - vector of attribute names
#' @param compareSimilaritySwitched whether to test similarity with the parameters switched
#' @return whether x dominates y on attribute set P
dominates = function(x, y, P, compareSimilaritySwitched = FALSE) {
# ERROR-CHECKS:
stopifnot(length(x) == length(y), length(x) > 0)
stopifnot(x %in% self$objects, y %in% self$objects)
stopifnot(P %in% self$metaData$name)
# the subsets of the decision table, relevant for the object in x and y, respectively:
X = self$decisionTable[map_int(x, ~ which(. == self$objects)), ] %>% select(P)
Y = self$decisionTable[map_int(y, ~ which(. == self$objects)), ] %>% select(P)
# the partitioned set of attributes to consider:
P = self$partitionAttributes(P)
R_ind = map_dfc(P$ind, function(q) { X[[q]] == Y[[q]] }) %>% apply(FUN = all, MARGIN = 1)
if (compareSimilaritySwitched) {
R_sim = map_dfc(P$sim, ~ self$similar(Y, X, .)) %>% apply(FUN = all, MARGIN = 1)
} else {
R_sim = map_dfc(P$sim, ~ self$similar(X, Y, .)) %>% apply(FUN = all, MARGIN = 1)
}
R_dom = map_dfc(P$dom, function(q) { X[[q]] >= Y[[q]] }) %>% apply(FUN = all, MARGIN = 1)
if (length(R_ind) == 0) {
R_ind = rep(TRUE, nrow(X))
}
if (length(R_sim) == 0) {
R_sim = rep(TRUE, nrow(X))
}
if (length(R_dom) == 0) {
R_dom = rep(TRUE, nrow(X))
}
R_ind & R_sim & R_dom
},
#' @description
#' Method for calculating the P-dominated and P-dominating sets all at once.
#' @param P the set of attributes to test - vector of attribute names
dominatingAndDominatedSets = function(P) {
# ERROR-CHECKS:
stopifnot(P %in% self$metaData$name)
# the subsets of the decision table, relevant for the object in x and y, respectively:
X = self$decisionTable
Y = self$decisionTable
# the partitioned set of attributes to consider:
P = self$partitionAttributes(P)
# Determine indiscernibility
n = length(self$objects)
R_ind = matrix(rep(TRUE, n * n), nrow = n, ncol = n)
for (i in 1:n) {
for (j in i:n) {
result = all(X[i, P$ind] == Y[j, P$ind])
R_ind[i, j] = result
R_ind[j, i] = result
}
}
# Determine similarity
R_sim = matrix(rep(TRUE, n * n), nrow = n, ncol = n)
simAttributeIndex = map_int(P$sim, ~ which(self$metaData$name == ., arr.ind = T))
alpha = self$metaData$alpha[simAttributeIndex]
beta = self$metaData$beta[simAttributeIndex]
for (i in 1:n) {
for (j in 1:n) {
exampleX = X[i, simAttributeIndex]
exampleY = Y[j, simAttributeIndex]
# TODO: for factors convert them to integers:
# factorVariables = map_lgl(self$decisionTable, is.factor)
R_sim[i, j] = all(abs(exampleX - exampleY) <= alpha * exampleY + beta)
}
}
R_sim_switched = t(R_sim)
# Determine dominance
R_dominates = matrix(rep(TRUE, n * n), nrow = n, ncol = n)
for (i in 1:n) {
for (j in 1:n) {
R_dominates[i, j] = all(X[i, P$dom] >= Y[j, P$dom])
}
}
R_dominatedBy = t(R_dominates)
# Return the results:
return(list(
dominating_L = R_ind & R_sim_switched & R_dominatedBy,
dominating_U = R_ind & R_sim & R_dominatedBy,
dominated_L = R_ind & R_sim & R_dominates,
dominated_U = R_ind & R_sim_switched & R_dominates
))
},
#' Method to determine whether x is similar to y on attribute q.
#' @param x the left operand - a data frame
#' @param y the right operand - a data frame
#' @param q the attribute to test
#' @return whether x is similar to y on attribute q
similar = function(x, y, q) {
# ERROR-CHECKS:
stopifnot(nrow(x) == nrow(y), nrow(x) > 0, q %in% names(x), q %in% names(y))
exampleX = x[[q]]
exampleY = y[[q]]
attributeIndex = which(self$metaData$name == q)
alpha = self$metaData$alpha[attributeIndex]
beta = self$metaData$beta[attributeIndex]
abs(exampleX - exampleY) <= alpha * exampleY + beta
},
#' @description
#' This method calculates the dominating set of an object with respect to a criterion set.
#' @param x the object - object name
#' @param P the criterion set
#' @param compareSimilaritySwitched whether to test similarity with the parameters switched
#' @return the set of objects that dominate object x
dominatingSet = function(x, P, compareSimilaritySwitched = TRUE) {
stopifnot(x %in% self$objects)
d = map_lgl(self$objects, ~ self$dominates(., x, P, compareSimilaritySwitched = compareSimilaritySwitched))
self$objects[d]
},
#' @description
#' This method calculates the dominated set of an object with respect to a criterion set.
#' @param x the object - object name
#' @param P the criterion set
#' @param compareSimilaritySwitched whether to test similarity with the parameters switched
#' @return the set of objects that are dominated by object x
dominatedSet = function(x, P, compareSimilaritySwitched = FALSE) {
stopifnot(x %in% self$objects)
d = map_lgl(self$objects, ~ self$dominates(x, ., P, compareSimilaritySwitched = compareSimilaritySwitched))
self$objects[d]
},
#' @description
#' This method calculates the P-upper approximations of the upward class unions.
#' @param dominating_U the P-dominating sets (U) - matrix
#' @return the approximations for all classes in a boolean matrix from
upwardClassUnionUpperApproximation = function(dominating_U) {
upwardClassUnion = self$classUnions()$upward
approximations = matrix(nrow = nrow(upwardClassUnion), ncol = ncol(upwardClassUnion))
for (class in 1:nrow(upwardClassUnion)) {
result = dominating_U[upwardClassUnion[class,], ]
approximations[class, ] = if(!is.null(dim(result))) apply(result, MARGIN = 2, FUN = any) else result
}
return(approximations)
},
#' @description
#' This method calculates the P-lower approximations of the upward class unions.
#' @param downward_U the P-upper approximations of the downward class unions - matrix
#' @return the approximations for all classes in a boolean matrix from
upwardClassUnionLowerApproximation = function(downward_U) {
U = rep(TRUE, length(self$objects))
classCount = nrow(downward_U)
objectCount = ncol(downward_U)
approximations = matrix(nrow = classCount, ncol = objectCount)
approximations[1, ] = U
approximations[2:classCount, ] = !downward_U[1:(classCount - 1), ]
return(approximations)
},
#' @description
#' This method calculates the P-upper approximations of the downward class unions.
#' @param dominated_U the P-dominated sets (U) - matrix
#' @return the approximations for all classes in a boolean matrix from
downwardClassUnionUpperApproximation = function(dominated_U) {
downwardClassUnion = self$classUnions()$downward
approximations = matrix(nrow = nrow(downwardClassUnion), ncol = ncol(downwardClassUnion))
for (class in 1:nrow(downwardClassUnion)) {
result = dominated_U[downwardClassUnion[class,], ]
approximations[class, ] = if(!is.null(dim(result))) apply(result, MARGIN = 2, FUN = any) else result
}
return(approximations)
},
#' @description
#' This method calculates the P-lower approximations of the downward class unions.
#' @param upward_U the P-upper approximations of the upward class unions - matrix
#' @return the approximations for all classes in a boolean matrix from
downwardClassUnionLowerApproximation = function(upward_U) {
U = rep(TRUE, length(self$objects))
classCount = nrow(upward_U)
objectCount = ncol(upward_U)
approximations = matrix(nrow = classCount, ncol = objectCount)
approximations[classCount, ] = U
approximations[1:(classCount - 1), ] = !upward_U[2:classCount, ]
return(approximations)
},
#' @description
#' This method calculates the P-lower and P-upper approximations of class unions and boundary regions.
#' @param P the attribute set
#' @return a named list of the approximations
roughSets = function(P) {
dom = self$dominatingAndDominatedSets(P)
approx = list(
upward_U = self$upwardClassUnionUpperApproximation(dom$dominating_L),
upward_L = NA,
downward_U = self$downwardClassUnionUpperApproximation(dom$dominated_U),
downward_L = NA
)
approx$upward_L = self$upwardClassUnionLowerApproximation(approx$downward_U)
approx$downward_L = self$downwardClassUnionLowerApproximation(approx$upward_U)
return(approx)
},
#' @description
#' This method calculates the boundary regions of rough sets.
#' @param roughSets the class union approximations.
#' @return the upward and downward boundary regions
boundaryRegions = function(roughSets) {
return(list(
upward = roughSets$upward_U & !roughSets$upward_L,
downward = roughSets$downward_U & !roughSets$downward_L
))
},
#' @description
#' This method calculates the accuracy of the approximations of the class unions.
#' @param roughSets the approximations
#' @return a pair of vectors describing the accuracy of the downward and upward class union approximations
accuracyOfApproximation = function(roughSets) {
classCount = nrow(roughSets$upward_U)
acc = list(
upward = rep(NA_real_, classCount),
downward = rep(NA_real_, classCount)
)
for (class in 1:classCount) {
acc$upward[class] = sum(roughSets$upward_L[class, ]) / sum(roughSets$upward_U[class, ])
acc$downward[class] = sum(roughSets$downward_L[class, ]) / sum(roughSets$downward_U[class, ])
}
return(acc)
},
#' @description
#' This method calculates the quality of the approximations of the class unions.
#' @param boundaryRegions the boundary regions of the rough set
#' @return a number expressing the ratio of all P-correctly sorted actions to all actions in the decision table
qualityOfApproximation = function(boundaryRegions) {
incorrectlySorted = apply(boundaryRegions$downward, MARGIN = 2, FUN = any)
return(sum(!incorrectlySorted) / length(self$objects))
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.