R/RuleInduction.OtherFuncCollections.R
In RoughSets: Data Analysis Using Rough Set and Fuzzy Rough Set Theories

Documented in X.laplace X.rulesCounting X.ruleStrength

#############################################################################
#
#  This file is a part of the R package "RoughSets".
#
#  Author: Lala Septem Riza and Andrzej Janusz
#  Supervisors: Chris Cornelis, Francisco Herrera, Dominik Slezak and Jose Manuel Benitez
#  Copyright (c):
#       DiCITS Lab, Sci2s group, DECSAI, University of Granada and
#       Institute of Mathematics, University of Warsaw
#
#  This package is free software: you can redistribute it and/or modify it under
#  the terms of the GNU General Public License as published by the Free Software
#  Foundation, either version 2 of the License, or (at your option) any later version.
#
#  This package is distributed in the hope that it will be useful, but WITHOUT
#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
#  A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
#############################################################################
## it is used to represent rules in order to get the standard format
# @param rules original form of rules
# @param type.method a type of method
# @param decision.table a decision table
# @param t.similarity a type of similarity equation
# @param t.tnorm a type of t-tnorm
std.rules <- function(rules, type.method = "RI.hybridFS.FRST", decision.table, t.similarity = NULL, t.tnorm = NULL){
	## get data
	objects <- decision.table
	desc.attrs <- attr(decision.table, "desc.attrs")
	nominal.att <- attr(decision.table, "nominal.attrs")
	decision.attr <- attr(decision.table, "decision.attr")
	consequent.attr <- names(desc.attrs)[decision.attr]

	if (any(type.method == c("RI.hybridFS.FRST", "RI.GFRS.FRST"))){
		all.attr = colnames(objects)

		if (nominal.att[length(nominal.att)] == TRUE){
			type.task <- c("classification")
		}
		else {
			type.task <- c("regression")
		}

		## check duplication attributes in rules for collecting their objects
		duplicated.pt <- duplicated(rules$attributes)
		new.obj <- c()
		ii <- 1

		## collect the objects that have the same attributes in the rules
		for (i in 1 : length(duplicated.pt)){
			if (duplicated.pt[i] == FALSE){
				new.obj <- append(new.obj, list(rules$objects[[i]]))
				ii <- ii + 1
			}
			else {
				new.obj[[ii - 1]] <- c(new.obj[[ii - 1]], rules$objects[[i]])
			}
		}

		if (type.method == "RI.hybridFS.FRST"){
			rules <- list(attributes = rules$attributes[which(duplicated.pt == FALSE)], objects = new.obj)
			attrs.rules <- unique(unlist(rules$attributes))
			rules <- ch.rules(rules, type.method = type.method, decision.table)

			## calculate variance and range of data
			attrs.p <- c(which(colnames(objects) %in% attrs.rules))
			res.varRange <- cal.var.range(objects, attrs.p, nominal.att)
			variance.data <- res.varRange$variance.data
			rownames(variance.data) <- NULL
			colnames(variance.data) <- colnames(objects)[attrs.p]
			range.data <- res.varRange$range.data
			colnames(range.data) <- colnames(objects)[attrs.p]

			mod <- list(rules = rules, type.model = "FRST", type.method = type.method, type.task = type.task,
						t.similarity = t.similarity, t.tnorm = t.tnorm, variance.data = variance.data, range.data = range.data,
						antecedent.attr = colnames(objects)[attrs.p], consequent.attr = consequent.attr, nominal.att = nominal.att[attrs.p])
			class.mod <- ObjectFactory(mod, classname = "RuleSetFRST")
			return(class.mod)
		}
		else if (type.method == "RI.GFRS.FRST"){
			rules <- list(attributes = rules$attributes[which(duplicated.pt == FALSE)], objects = new.obj,
			            threshold = rules$threshold[sort(unlist(new.obj))])
			attrs.rules <- unique(unlist(rules$attributes))
			rules <- ch.rules(rules, type.method = type.method, decision.table)

			## calculate variance and range of data
			attrs.p <- c(which(colnames(objects) %in% attrs.rules))
			res.varRange <- cal.var.range(objects, attrs.p, nominal.att)
			variance.data <- res.varRange$variance.data
			rownames(variance.data) <- NULL
			colnames(variance.data) <- colnames(objects)[attrs.p]
			range.data <- res.varRange$range.data
			colnames(range.data) <- colnames(objects)[attrs.p]

			mod <- list(rules = rules, type.model = "FRST", type.method = type.method,
			            type.task = type.task, t.similarity = t.similarity, t.tnorm = t.tnorm, variance.data = variance.data, range.data = range.data,
						antecedent.attr = colnames(objects)[attrs.p], consequent.attr = consequent.attr, nominal.att = nominal.att[attrs.p])
			class.mod <- ObjectFactory(mod, classname = "RuleSetFRST")
			return(class.mod)
		}
	}
}

## it is used to change rules in order to get the standard format
# @param rules original form of rules
# @param type.method a type of method
# @param decision.table
ch.rules <- function(rules, type.method = "RI.hybridFS.FRST", decision.table){
	tra.objects <- decision.table
	desc.attrs <- attr(decision.table, "desc.attrs")
	nominal.att <- attr(decision.table, "nominal.attrs")
	rule <- c()

	for (i in 1 : length(rules$attributes)){
		exit <- FALSE
		ii <- 1
		while (exit == FALSE) {
			## get value for antecedent and consequent of each rule
			anteVal <- tra.objects[rules$objects[[i]][ii], c(rules$attributes[[i]]), drop = FALSE]
			conqVal <- tra.objects[rules$objects[[i]][ii], ncol(tra.objects), drop = FALSE]
			if (is.null(rule)){
				tmpRule <- cbind(anteVal, conqVal)
				rownames(tmpRule) <- "value"
				rule <- list(tmpRule)
			}
			else {
				tmpRule <- cbind(anteVal, conqVal)
				rownames(tmpRule) <- "value"
				rule <- append(rule, list(tmpRule))
			}

			## checking stopping criteria
			if (ii == length(rules$objects[[i]])){
				exit <- TRUE
			}
			else {
				ii <- ii + 1
			}
		}
	}

	if (type.method == "RI.GFRS.FRST"){
		return(list(rules = rule, threshold = rules$threshold))
	}
	else {
		return(list(rules = rule))
	}
}

#an auxiliary function for computing laplace estimate of rule's confidence
laplaceEstimate <- function(rule, dataS, clsVec, uniqueCls, suppIdx = NULL) {
	if (is.null(suppIdx)) {
		if (length(rule$idx) > 1) {
			dataS <- do.call(paste, dataS[rule$idx])
		}
		else {
			dataS <- as.character(dataS[[rule$idx]])
		}

		tmpValues = paste(rule$values, collapse = " ")
		suppIdx = which(dataS == tmpValues)
	}

	clsFreqs = table(clsVec[suppIdx])
	maxIdx = which.max(clsFreqs)
	nOfCorrect = clsFreqs[maxIdx]

	rule$consequent = names(clsFreqs)[maxIdx]
	rule$support = as.integer(suppIdx)
	rule$laplace = (nOfCorrect + 1)/(length(suppIdx) + length(uniqueCls))
	return(rule)
}

# An auxiliary function that performs the best-first voting strategy.
#
# @title the best-first voting strategy function
# @param object a class "RuleSetRST".
# @param ruleValues values of rules.
# @param consequents values on the consequent part.
# @param majorityCls  a value representing majority class of decision attribute.
# @param ... other parameters.
# @return predicted values.
bestFirst <- function(object, ruleValues, consequents, majorityCls, ...) {
	matchIdx <- which(ruleValues == object)
	if (length(matchIdx) > 0) {
		prediction <- consequents[matchIdx[1]]
	}
	else {
		prediction <- majorityCls
	}

	return(prediction)
}

# An auxiliary function that performs the first-win voting strategy.
#
# @title the first-win voting strategy
# @param a data vector representing the object for which predictions are to be made
# @param object a class "RuleSetRST"
# @return a predicted value
firstWin <- function(object, ruleSet) {
  rulesCount = length(ruleSet)
  i = 1
  endFlag = FALSE
  prediction = NULL

  while(!endFlag) {
    if(all(object[ruleSet[[i]]$idx] == ruleSet[[i]]$values))  {
      endFlag = T
      prediction = ruleSet[[i]]$consequent
    } else  {
      if(i >= rulesCount) endFlag = TRUE
      i = i + 1
    }
  }

  if(is.null(prediction)) prediction = attr(ruleSet, "majorityCls")
  return(prediction)
}

# An auxiliary function that classifies a data object using custom voting strategy.
rulesVoting <- function(object, ruleSet, votingMethod = X.ruleStrength, ...) {

  rulesIdx = which(sapply(ruleSet,
                          function(rule, obj) all(obj[rule$idx] == rule$values),
                          object))

  prediction = NULL
  voteCounter = rep(0, length(attr(ruleSet, "uniqueCls")))
  names(voteCounter) = attr(ruleSet, "uniqueCls")

  if(length(rulesIdx) > 0) {
    for(i in rulesIdx) {
      voteCounter[ruleSet[[i]]$consequent] = voteCounter[ruleSet[[i]]$consequent] + votingMethod(ruleSet[[i]], ...)
    }
    tmpPreds = names(voteCounter)[which(voteCounter == max(voteCounter))]
    if(length(tmpPreds) == 1) {
      prediction = tmpPreds
    } else {
      prediction = names(attr(ruleSet, "clsProbs")[tmpPreds])[which.max(attr(ruleSet, "clsProbs")[tmpPreds])]
    }
  }

  if(is.null(prediction)) prediction = attr(ruleSet, "majorityCls")

  return(prediction)
}

#' A function returning a weight of rule's vote understood as strength of the rule.
#' It is defined as a product of a cardinality of a support of a rule and the length of this rule.
#'
#' @title Rule voting by strength of the rule
#' @author Andrzej Janusz
#'
#' @param rule a decision rule, i.e. element of a "RuleSetRST" object
#'
#' @return a numerical weight of the vote
#'
#' @seealso Other currently available voting methods are: \code{\link{X.laplace}}, \code{\link{X.rulesCounting}}.
#'
#' @export
X.ruleStrength <- function(rule) {
	return(length(rule$support) * length(rule$idx))
}

#' A function returning a weight of rule's vote understood as the Laplace estimate of its confidence.
#'
#' @title Rule voting by the Laplace estimate
#' @author Andrzej Janusz
#'
#' @param rule a decision rule, i.e. element of an "RuleSetRST" object
#'
#' @return a numerical weight of the vote
#'
#' @seealso Other currently available voting methods are: \code{\link{X.ruleStrength}}, \code{\link{X.rulesCounting}}.
#'
#' @export
X.laplace <- function(rule) {
	return(rule$laplace)
}

#' A function returning an equal vote's weight for every rule. It corresponds to voting by counting the
#' matching rules.
#'
#' @title Rule voting by counting matching rules
#' @author Andrzej Janusz
#'
#' @param rule a decision rule, i.e. element of an "RuleSetRST" object
#'
#' @return a numerical weight of the vote
#'
#' @seealso Other currently available voting methods are: \code{\link{X.ruleStrength}}, \code{\link{X.laplace}}.
#'
#' @export
X.rulesCounting = function(rule) {
	return(1)
}

# It is an auxiliary function for greedy covering the data with decision rules using CN2 algorithm.
findBestCN2Rule = function(dataSet, clsVec, uniqueCls, K = 10)  {
  descriptorsList = lapply(dataSet, function(x) sort(unique(x)))

  descriptorCandidates = list()
  for(i in 1:length(descriptorsList)) {
    descriptorCandidates = c(descriptorCandidates,
                             lapply(descriptorsList[[i]],
                                    function(v, x) return(list(idx = x, values = v)), i))
  }

  ruleCandidates = lapply(descriptorCandidates, laplaceEstimate, dataSet, clsVec, uniqueCls)

  endFlag = F
  ruleScores = sapply(ruleCandidates, function(x) return(x$laplace))
  bestIdx = which.max(ruleScores)
  bestRule = ruleCandidates[[bestIdx]]
  ruleCandidates = ruleCandidates[order(ruleScores, decreasing = T)[1:K]]

  while(!endFlag) {

    ruleCandidates = addDescriptorCN2(ruleCandidates, descriptorCandidates)
    ruleCandidates = lapply(ruleCandidates, laplaceEstimate, dataSet, clsVec, uniqueCls)
    ruleScores = sapply(ruleCandidates, function(x) return(x$laplace))

    if(bestRule$laplace < max(ruleScores)) {
      bestIdx = which.max(ruleScores)
      bestRule = ruleCandidates[[bestIdx]]
      ruleCandidates = ruleCandidates[order(ruleScores, decreasing = T)[1:K]]
		}
		else endFlag = T

  }
  return(bestRule)
}

# Adding a descriptor to a rule in CN2 algorithm
addDescriptorCN2 <- function(rulesList, descCandidates) {
  candidates = list()
  for(i in 1:length(rulesList)) {
    candidates = c(candidates,
                   lapply(descCandidates,
                          function(descriptor, rule) return(list(idx = c(rule$idx, descriptor$idx),
                                                                 values = c(as.character(rule$values), as.character(descriptor$values)))),
                          rulesList[[i]]))
  }

  return(candidates)
}

# Computation of a covering of a lower approximation of a concept by decision rules using the LEM2 algorithm.
# It is an auxiliary function.
computeLEM2covering <- function(concept, attributeValuePairs, decisionValues, uniqueCls) {

  if(length(concept) == 0) stop("Empty lower approximation of a decision class.")
  uncoveredConcept = concept
  rules = list()

  while(length(uncoveredConcept) > 0) {
    support = 1:length(decisionValues)
    rules[[length(rules) + 1]] = list(idx = integer(0), values = character(), support = support)
    tmpAttributeValuePairs = attributeValuePairs
    selectedAttributeValuePairs = list()

    while(!all(support %in% concept)) {
      tmpSupp = intersect(support, uncoveredConcept)
      correctLaplaceVec = sapply(tmpAttributeValuePairs,
                                 function(rule) {
                                   nOfCorrect = length(intersect(rule$support, tmpSupp));
                                   if(nOfCorrect > 0) laplace = nOfCorrect + rule$laplace  #(nOfCorrect + 1)/(length(tmpSupp) + length(uniqueCls))   #(length(rule$support) + length(uniqueCls))
                                   else laplace = 0;
                                   return(laplace)
                                 })
      tmpIdx = which.max(correctLaplaceVec)
      tmpRule = tmpAttributeValuePairs[[tmpIdx]]
      selectedAttributeValuePairs[[length(selectedAttributeValuePairs) + 1]] = tmpRule
      tmpAttributeValuePairs = tmpAttributeValuePairs[-tmpIdx]
      support = intersect(support, tmpRule$support)
    }
    rm(tmpSupp, tmpIdx, tmpRule, tmpAttributeValuePairs)

    toRmIdx = integer(0)
    if(length(selectedAttributeValuePairs) > 1) {
      for(i in 1:length(selectedAttributeValuePairs)) {
        suppList = lapply(selectedAttributeValuePairs[-c(toRmIdx, i)], function(x) x$support)
        if(length(suppList) > 0) {
          tmpSupport = Reduce(intersect, suppList)
        } else {
          tmpSupport = -1
        }
        if(all(tmpSupport %in% concept)) toRmIdx = c(toRmIdx, i)
      }
    }

    if(length(toRmIdx) > 0) {
      selectedAttributeValuePairs = selectedAttributeValuePairs[-toRmIdx]
      suppList = lapply(selectedAttributeValuePairs, function(x) x$support)
      support = Reduce(intersect, suppList)
    }

    idxVec = sapply(selectedAttributeValuePairs, function(x) x$idx)
    valuesVec = sapply(selectedAttributeValuePairs, function(x) x$values)
    rules[[length(rules)]]$support = support
    rules[[length(rules)]]$values = valuesVec
    rules[[length(rules)]]$idx = idxVec

    uncoveredConcept = setdiff(uncoveredConcept, support)
  }

  toRmIdx = integer(0)
  for(i in 1:length(rules)) {
    suppList = lapply(rules[-c(toRmIdx, i)], function(x) x$support)
    tmpSupport = Reduce(intersect, suppList)
    if(all(concept %in% tmpSupport)) toRmIdx = c(toRmIdx, i)
  }

  if(length(toRmIdx) > 0) {
    rules = rules[-toRmIdx]
  }

  return(rules)
}

# Computation of a covering of a lower approximation of a concept by decision rules using the AQ algorithm.
# It is an auxiliary function.
computeAQcovering <- function(concept, attributeValuePairs, dataTab, epsilon = 0.05, K = 2) {

  if(length(concept) == 0) stop("Empty lower approximation of a decision class.")
  uncoveredConcept = concept
  coverCounts = rep(0, length(concept))
  names(coverCounts) = as.character(concept)
  rules = list()

  while(length(uncoveredConcept) > 0) {
    seedIdx = sample(uncoveredConcept, 1)
    selectedAttributeValuePairs = mapply(function(avps, v) avps[[as.character(v)]],
                                         attributeValuePairs, dataTab[seedIdx,],
                                         SIMPLIFY = FALSE)

    suppList = lapply(selectedAttributeValuePairs, function(x) x$support)
    support = Reduce(intersect, suppList)
    rules[[length(rules) + 1]] = list(idx = integer(0), values = character(), support = support)

    attrOrdering = sample(1:length(selectedAttributeValuePairs))
    suppList = suppList[attrOrdering]
    selectedAttributeValuePairs = selectedAttributeValuePairs[attrOrdering]

    for(i in length(attrOrdering):1)  {
      tmpSupport = Reduce(intersect, suppList[-i])
      if(length(tmpSupport) > 0 && sum(tmpSupport %in% concept)/length(tmpSupport) >= 1-epsilon) {
        support = tmpSupport
        suppList = suppList[-i]
        selectedAttributeValuePairs = selectedAttributeValuePairs[-i]
      }
    }
    rm(tmpSupport, attrOrdering, suppList)

    idxVec = sapply(selectedAttributeValuePairs, function(x) x$idx)
    valuesVec = sapply(selectedAttributeValuePairs, function(x) x$values)
    rules[[length(rules)]]$support = support
    rules[[length(rules)]]$values = valuesVec
    rules[[length(rules)]]$idx = idxVec

    coveredConcept = intersect(support, concept)
    coverCounts[as.character(coveredConcept)] = coverCounts[as.character(coveredConcept)] + 1
    uncoveredConcept = concept[which(coverCounts < K)]
  }

  for(i in length(rules):1) {
    tmpCoveredConcept = intersect(concept, rules[[i]]$support)
    tmpCoverCounts = coverCounts
    tmpCoverCounts[as.character(tmpCoveredConcept)] = tmpCoverCounts[as.character(tmpCoveredConcept)] - 1
    if(all(tmpCoverCounts >= K)) {
      rules = rules[-i]
      coverCounts = tmpCoverCounts
    }
  }
  rm(tmpCoveredConcept, tmpCoverCounts)

  return(rules)
}
Any scripts or data that you put into this service are public.
RoughSets documentation built on May 29, 2024, 7:34 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
RoughSets
Data Analysis Using Rough Set and Fuzzy Rough Set Theories

R/RuleInduction.OtherFuncCollections.R
In RoughSets: Data Analysis Using Rough Set and Fuzzy Rough Set Theories

Defines functions computeAQcovering computeLEM2covering addDescriptorCN2 findBestCN2Rule X.rulesCounting X.laplace X.ruleStrength rulesVoting firstWin bestFirst laplaceEstimate ch.rules std.rules

Documented in X.laplace X.rulesCounting X.ruleStrength

Try the RoughSets package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RoughSets Data Analysis Using Rough Set and Fuzzy Rough Set Theories

R/RuleInduction.OtherFuncCollections.R In RoughSets: Data Analysis Using Rough Set and Fuzzy Rough Set Theories

Defines functions computeAQcovering computeLEM2covering addDescriptorCN2 findBestCN2Rule X.rulesCounting X.laplace X.ruleStrength rulesVoting firstWin bestFirst laplaceEstimate ch.rules std.rules

Documented in X.laplace X.rulesCounting X.ruleStrength

Try the RoughSets package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RoughSets
Data Analysis Using Rough Set and Fuzzy Rough Set Theories

R/RuleInduction.OtherFuncCollections.R
In RoughSets: Data Analysis Using Rough Set and Fuzzy Rough Set Theories