R/rsr.R

Defines functions rsr

Documented in rsr

###############################################################################################

# Function Description
#   Rating scale reduction
#   This package implements a rather sophisticated method published in (Koczkodaj et al., 2017) In
#   essence, it is a stepwise method fro maximizing the area under the area (AUC) of receiver operating
#   characteristic (ROC). In this description, data mining terminology will be used:
#       1.examples (observations in statistics),
#       2.variables in statistics,
#       3.class or decision attribute (decision variable may be used statistics).
#   The implemented algorithm (when reduced to its minimum) comes to using a loop for all attributes
#   (with the class excluded) to compute AUC. Subsequently, attributes are sorted in the descending
#   order by AUC. The attribute with the largest AUC is added to a subset of all attributes (evidently, it
#   cannot be empty since it is supposed to be the minimum subset S of all attributes with the maximum
#   AUC). We keep adding the next in line (according to AUC) attribute to the subset S checking AUC.
#   If it decreases, we stop the procedure. The above procedure can be described by the following
#   algorithm.
#   Algorithm:
#       1. compute AUC of all attributes excluding class
#       2. sort attributes by their AUC in the ascending order
#       3. select the attribute with the largest AUC to subset S
#       4. select the next attribute A with the largest AUC to subset S
#       5. if the AUC of the subset S is larger that AUC of the former AUC then go to 3
#   There are a lot of checking (e.g., if the dataset is not empty or full of replications) involved.

# Parameters
#   attribute: a matrix or data.frame containing attributes
#       D: the decision vector
#       plotRSR: If TRUE the ROC curve is ploted
#   method: the Stop reduction criteria: First Max of AUC or Global Max of AUC

# Return result
#   rsr.auc: total AUC of atrtibutes
#   rsr.label: attribute labels
#   summary: a summary table
###############################################################################################

rsr <- function(attribute, D, plotRSR = FALSE, method = c("Stop1Max", "StopGlobalMax")) {
    method <- match.arg(method)
	if (length(names(attribute)) == 0) {
        outlist <- list(message("names(attribute)==NULL Create attribute LABELS (e.g. using colnames)"))
    }
    
    if (nrow(attribute) != length(D)) {
        stop("Attributes and decision must have the same number of rows")
    } 
    else {
        # calculate AUC for every attribute
        start.auc <- sapply(1:ncol(attribute), function(i)
            roc(D, attribute[, i], plotROC = FALSE)$auc)
        
        # sort attributes according to AUCs in decreasing order
        s <- attribute[, sort.list(start.auc, decreasing = TRUE)]
        
        # compute running total AUC for attributes 1 to i
        ss <- sapply(1:ncol(s), function(i) 
            roc(D, rowSums(as.matrix(s[, 1:i])), plotROC = FALSE)$auc
        )
        
        # create reduced rating scale taking the criteria 'Stop first Max'
        auc1 <- c()

        if (method == "Stop1Max") 
            {
                i <- 1
                while (ss[i + 1] > ss[i]) {
                  auc1[i] <- ss[i]
                  i <- i + 1
                }
                auc.reduct <- c(auc1, ss[i])
            }  # create reduced rating scale taking the criteria 'Global Max'
        
        else {
            i = which.max(ss)
            auc.reduct <- ss[1:i]
        }
        
### create summary table
        if (length(auc.reduct) > 1) {
            tab <- data.frame(names(s)[1:length(auc.reduct)],
                              start.auc[1:length(auc.reduct)],
                              auc.reduct)
            colnames(tab) <- c("item", "AUC one variable", "AUC running total")
        }
        
        else {
            tab <- data.frame(names(s)[1:length(auc.reduct)], auc.reduct)
            colnames(tab) <- c("item", "AUC running total")
            }
        
### plot the ROC curve for reduced rating scale (sum of attributes in rsr)
        if (plotRSR == TRUE) {
            if (length(auc.reduct) == 1) {  # reduced to one attribute ??
                y <- s[, 1]
                p <- plot.roc(D, y, plotROC = TRUE)
                outlist <- list(rsr.auc = auc.reduct, rsr.label = names(s)[1], summary = tab, p)
            } 
            else {
                p <- roc(D, rowSums(s[, 1:i]))
            ### prepared data for plotting
                x <- 1 - p$specificities
                y <- p$sensitivities
                dfr <- data.frame(x, y)  
### plot ROC curve of total AUC of reduced rating scale
                pp <- ggplot(dfr, aes(x, y)) + geom_line(col = "red", size = 1) + geom_abline(intercept = 0, 
                  slope = 1, size = 0.5) + geom_polygon(fill = "gray") + theme_bw() + theme(panel.border = element_blank(), 
                  panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + 
                  ggtitle("ROC curve of total AUC of reduced rating scale") + labs(x = "1-specificities", 
                  y = "sensitivities") + theme(plot.title = element_text(hjust = 0.5))
                
                outlist <- list(rsr.auc = auc.reduct, rsr.label = names(s[1, 1:length(auc.reduct)]), 
                  summary = tab, pp)
            }

            } 
            else { if (length(auc.reduct) == 1) {  
						outlist <- list(rsr.auc = auc.reduct, rsr.label = names(s)[1], summary = tab)}
							else {
                outlist <- list(rsr.auc = auc.reduct, rsr.label = names(s[1, 1:length(auc.reduct)]), 
                  summary = tab)}}
    }
    
    class(outlist) = "RatingScaleReduction"
### the list includes ROC curve of total AUC of reduced rating scale, reduced rating scale label and result summary
    return(outlist)  
}

Try the RatingScaleReduction package in your browser

Any scripts or data that you put into this service are public.

RatingScaleReduction documentation built on Jan. 21, 2021, 5:06 p.m.