R/truthTable.R

truthTable <- function(data, outcome = "", neg.out = FALSE, exo.facs = c(""), 
                       n.cut = 1, incl.cut1 = 1, incl.cut0 = 1, complete = FALSE, 
                       show.cases = FALSE, sort.by = c(""), decreasing = TRUE, 
                       inf.test = c(""), use.letters = FALSE, ...) {
    
    metacall <- match.call()
    other.args <- list(...)
    via.pof <- "via.pof" %in% names(other.args)
    PRI <- "PRI" %in% names(other.args)
    
    # first turn "matrix" into "data.frame" 
    #---------------------------------------------------------------------------
    
    if (is.matrix(data)) {
        
        data <- as.data.frame(data)
    }
    
    # outcome
    #---------------------------------------------------------------------------
    
    original.outcome <- outcome
    
    if (outcome == "") {
     
        errmsg <- paste0("No outcome is specified.")
        cat("\n")
        stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), call. = FALSE)
    }
    
    mv.outcome <- grepl("[{]", outcome)
    
    if (mv.outcome) {
     
        endo.fac <- substr(outcome, 1, regexpr("[{]", outcome)[1] - 1)
        endo.fac.level <- as.numeric(substr(outcome, regexpr("[{]", outcome)[1] + 1, 
                                                     regexpr("}"  , outcome)[1] - 1))
        if (!(endo.fac %in% colnames(data))) {
         
            errmsg <- paste0("The specified endogenous factor ", endo.fac, " does 
                              not exist in the data.")
            cat("\n")
            stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), 
                 call. = FALSE)
        }
        
        else if (!(endo.fac.level %in% unique(data[, endo.fac]))) {
         
            errmsg <- paste0("The specified endogenous factor ", endo.fac, " has 
                              no level {", endo.fac.level, "}.")
            cat("\n")
            stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), 
                 call. = FALSE)
        }
    }
    
    else {
     
        endo.fac <- outcome
        
        if (!(endo.fac %in% colnames(data))) {
     
            errmsg <- paste0("The name of the outcome is incorrect. The corresponding
                              endogenous factor ", endo.fac, " does not exist in the 
                              data.")
            cat("\n")
            stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), 
                 call. = FALSE)
        }
    }
    
    # exo.facs 
    #---------------------------------------------------------------------------
    
    # if no exogenous factors are specified, use all factors except the 
    # endogenous factor
    if (all(gsub("\\s", "", exo.facs, perl = TRUE) == "")) {
     
        exo.facs <- colnames(data)[-which(colnames(data) == endo.fac)]
    }
    
    # if there are at least two exogenous factors,...
    else if (length(exo.facs) > 1) {
     
        # and the endogenous factor is also in the set of exogenous factors
        if (endo.fac %in% exo.facs) {
      
            errmsg <- paste0("The endogenous factor ", endo.fac, " cannot 
                              simultaneously be an exogenous factor.")
            cat("\n")
            stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), call. = FALSE)
        }
     
        # if at least one exogenous factor is not in the data
        if (!all(exo.facs %in% colnames(data))) {
      
            f.notindata <- exo.facs[which(!(exo.facs %in% colnames(data)))]
            errmsg <- paste0("The following exogenous factors are not present in 
                              the data: ", f.notindata, ".")
            cat("\n")
            stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), call. = FALSE)
        }
    }
    
    # if there is only one exogenous factor,...
    else {
   
        errmsg <- paste0("At least two exogenous factors need to be specified.")
        cat("\n")
        stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), call. = FALSE)
    }
    
    # specify the data under the factor frame
    #---------------------------------------------------------------------------
    
    data <- data[, c(exo.facs, endo.fac)]
    initial.data <- data
    
    if (mv.outcome) {
    
        data[, endo.fac] <- ifelse(data[, endo.fac] == endo.fac.level, 1, 0)
    }
     
    # check for problems in the data 
    #---------------------------------------------------------------------------  
    
    # missing values
    if (any(is.na(data))) {
       
        f.nas <- names(data)[apply(apply(data, 2, is.na), 2, any)]
        errmsg <- paste0("Included factors must not contain missing values. The 
                          following factors contain missing values: ", f.nas, ".")
        cat("\n")
        stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), call. = FALSE)
    }  
        
    # constants
    constant <- apply(data[, exo.facs, drop = FALSE], 2, function (x) {
                      length(unique(x)) == 1})
     
    if (any(constant)) {
      
        errmsg <- paste0("The exogenous factor/s ", paste(exo.facs[constant], 
                         collapse = " and "), " is a/are constant/s. Please 
                         eliminate constants from the data before proceeding.")
        cat("\n")
        stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), 
                immediate. = TRUE, call. = FALSE)
    }
     
    # duplicates
    dupl.ex.facs <- duplicated(t(data[, exo.facs]))
     
    if (any(dupl.ex.facs)) {
      
        errmsg <- paste0("The exogenous factor/s ", paste(exo.facs[dupl.ex.facs], 
                          collapse = " and "), " is a/are duplicate/s. Please 
                          eliminate duplicates from the data before proceeding.")
        cat("\n")
        stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), 
             immediate. = TRUE, call. = FALSE)
    }
    
    # lower-case letters
    if (any(strsplit(endo.fac, "")[[1]] %in% letters)) {
     
        wrnmsg <- paste0("The label of the endogenous factor, '", endo.fac, "', 
                          has been transformed to upper case.")
        cat("\n")
        warning(paste(strwrap(wrnmsg, exdent = 7), collapse = "\n"), 
                immediate. = TRUE, call. = FALSE)
    }
    
    if (any(sapply(strsplit(exo.facs, ""), function (x) any(x %in% letters)))) {
     
        wrnmsg <- paste0("The label of at least one exogenous factor has been
                          transformed to upper case.")
        cat("\n")
        warning(paste(strwrap(wrnmsg, exdent = 7), collapse = "\n"), 
                immediate. = TRUE, call. = FALSE)
    }
   
    # make sure all relevant labels are uppercase
    exo.facs <- toupper(exo.facs)
    endo.fac <- toupper(endo.fac)
    colnames(data) <- toupper(colnames(data))
    colnames(initial.data) <- toupper(colnames(initial.data))
   
    # call the verification function for truth tables
    if (!via.pof) {
     
    verify.tt(data = data, neg.out = neg.out, exo.facs = exo.facs, 
              incl.cut1 = incl.cut1, incl.cut0 = incl.cut0, inf.test = inf.test)
    }
     
        
    # preliminaries
    #---------------------------------------------------------------------------
    
    # if user lowers only incl.cut1, make sure incl.cut0 is lowered accordingly
    if (incl.cut0 > incl.cut1) {
     
        incl.cut0 <- incl.cut1
    }
    
    # if 'neg.out = TRUE', negate the outcome
    if (neg.out) {
     
        data[, endo.fac] <- 1 - data[, endo.fac]
    }
    
    # number of exogenous factors and don't care code
    n.exo.facs <- length(exo.facs)
    dc.code <- -1
    
    # replace labels of exogenous factors with upper-case letters (in alphabetical
    # order) if they are not already labelled with upper-case letters (not necessarily
    # in alphabetical order)
    if (use.letters & sum(nchar(exo.facs)) != n.exo.facs) {
     
        exo.facs <- LETTERS[seq(n.exo.facs)]
        colnames(data)[seq(n.exo.facs)] <- exo.facs
    }
    
    # which exogenous factors are fuzzy-set/multi-value factors
    fs <- apply(data[, exo.facs], 2, function(x) any(x > 0 & x < 1))
    
    # which exogenous factors are multi-value factors (values such as 2.5 are
    # already caught in verify.tt)
    mv <- apply(data[, exo.facs], 2, function(x) any(x > 1))
    
    # check whether mv-factors contain unused levels (cs-factors with unused 
    # levels are constants and already caught above)
    ul.exo.facs <- apply(data[, exo.facs][mv], 2, function (x) {
                         length(unique(x)) < (max(x) + 1)}
    )
        
    if (any(ul.exo.facs)) {
     
        errmsg <- paste0("The multivalent exogenous factor/s ", 
                          paste(exo.facs[mv][ul.exo.facs], collapse = " and "), 
                          " contain/s one or more unused levels. Please reduce 
                          the number of levels before proceeding (multivalent 
                          factors require integers beginning with 0 at increments
                          of 1).")
        cat("\n")
        stop(paste(strwrap(errmsg, exdent = 7), collapse = "\n"), 
             immediate. = TRUE, call. = FALSE)
    }
    
    #
    #---------------------------------------------------------------------------
    for (i in seq(n.exo.facs)) {
     
         if (!fs[i]) {
      
             copy.cc <- data[, i]
      
             if (any(copy.cc < 0)) {
       
                 copy.cc[copy.cc < 0] <- max(copy.cc) + 1
                 data[, i] <- copy.cc
             }
         }
    }
    
    # collect number of levels for exogenous factors
    noflevels <- apply(data[, exo.facs, drop = FALSE], 2, max) + 1
    noflevels[fs] <- 2
    
    if (via.pof) {
         
         return(as.vector(noflevels))
    }
    
    # build the minterm matrix as the first part of the truth table     
    tt <- mintermMatrix(noflevels)
    
    # first element : first line is inclusion, second PRI, third number of cases
    # second element: tt line numbers of cases
    incl.cases <- .Call("truthTable", as.matrix(data[, exo.facs]), tt, 
                        as.numeric(fs), data[, endo.fac], package = "QCApro")
    
    colnames(incl.cases[[1]]) <- seq.int(ncol(incl.cases[[1]]))
    line.data <- incl.cases[[2]]
    freq.lines <- table(line.data)
    
    # which tt rows contain at least n.cut cases
    preserve <- incl.cases[[1]][3, ] >= n.cut
    
    # the inclusion scores for each tt row; NA rows do not contain any cases with
    # membership score above 0 
    incl.scores  <- incl.cases[[1]][1:2, ]
    incl.scores[is.nan(incl.scores)] <- NA
    
    # the output function values for each observed tt row, first positive and negative,
    # then contradictions
    out.values <- as.numeric(incl.scores[1, preserve] >= (incl.cut1 - .Machine$double.eps ^ 0.5))
    out.values[incl.scores[1, preserve] < incl.cut1 & incl.scores[1, preserve] >= (incl.cut0 - .Machine$double.eps ^ 0.5)] <- "C"
    names(out.values) <- colnames(incl.scores)[preserve]
    
    line.data[!line.data %in% colnames(incl.scores)[preserve]] <- 0
    
    # actually observed tt rows that count as unobserved due to n.cut
    excluded <- line.data == 0
    
    # the new line data without excluded cases
    line.data <- line.data[!excluded]
    
    data[!excluded, exo.facs] <- tt[line.data, ]
    
    if (any(excluded)) {
        
        excluded.data <- data[excluded, ]
        data <- data[!excluded, ]
    }
    
    
    if (complete) {
        
        line.tt <- seq_len(dim(tt)[1])
    }
    
    else {
        
        line.tt <- sort(unique(line.data))
        tt <- getRow(unname(noflevels), line.tt)
    }
    
    tt <- as.data.frame(tt)
    rownames(tt) <- line.tt
    
    if (complete) {

        tt <- tt[order(line.tt)[c(as.numeric(names(out.values)), 
                                  as.numeric(names(freq.lines)[freq.lines < n.cut]), 
                                  line.tt[!line.tt %in% c(as.numeric(names(out.values)), 
                                                          as.numeric(names(freq.lines)))
                                          ]
                                  )
                                ],
                 ]
    }
    
    tt <- cbind(tt, "?", 0, "-")
    colnames(tt) <- c(exo.facs, "OUT", "n", "incl")
    tt$OUT <- as.character(tt$OUT)
    tt$incl <- as.character(tt$incl)
    
    tt[seq_along(out.values), "OUT"] <- out.values
    tt[seq_along(freq.lines), "n"] <- freq.lines[match(rownames(tt)[seq(length(freq.lines))],
                                                       rownames(freq.lines))]
    
    incl.scores <- incl.scores[, which(colnames(incl.scores) %in% rownames(tt)), drop = FALSE]
    tt[colnames(incl.scores), "incl"] <- incl.scores[1, ]
    
    if (PRI) {
    
        tt <- cbind(tt, PRI = "-")
        tt$PRI <- as.character(tt$PRI)
        tt[colnames(incl.scores), "PRI"] <- incl.scores[2, ]
    }
    
    # collect cases
    #---------------------------------------------------------------------------
    cases <- sapply(line.tt, function(x) {
     
        paste(rownames(data)[which(line.data == x)], collapse = ",")
    })
    
    print(cases)
    cat("\n")
    stop("TEST")
    
    #
    #---------------------------------------------------------------------------
    for (i in seq(n.exo.facs)) {
        if (!fs[i]) {
            if (any(initial.data[, i] == dc.code)) {
                tt[, i][tt[, i] == max(tt[, i])] <- dc.code
                data[, i][data[, i] == max(data[, i])] <- dc.code
                noflevels[i] <- noflevels[i] - 1
            }
        }
    }
    
    # inference-statistical testing
    #---------------------------------------------------------------------------
    
    statistical.testing <- FALSE
    
    if (inf.test[1] == "binom") {
        
        statistical.testing <- TRUE
        
        if (length(inf.test) > 1) {
        
            alpha <- as.numeric(inf.test[2])
        }
        
        else {
         
            alpha <- 0.05
        }
        
        observed <- which(tt$OUT != "?")
        trials <- tt[observed, "n"]
        success <- trials * as.numeric(tt[observed, "incl"])
                
        tt <- cbind(tt, pval1 = "-", pval0 = "-")
        tt[, "pval1"] <- tt[, "pval0"] <- as.character(tt[, "pval1"])
        tt[observed, "OUT"] <- 0
        
        for (i in seq_along(observed)) {
             
             pval1 <- tt[observed[i], "pval1"] <- binom.test(success[i], trials[i], incl.cut1, "less")$p.value
             pval0 <- tt[observed[i], "pval0"] <- binom.test(success[i], trials[i], incl.cut0, "greater")$p.value
            
             if (pval1 > alpha) {
            
                 tt[i, "OUT"] <- 1
             }
            
             else if (pval1 < alpha & pval0 < alpha) {
            
                 tt[i, "OUT"] <- "C"
             }
        }
        tt[!observed, "pval1"] <- "-"
        tt[!observed, "pval0"] <- "-"
    }
    
    # add cases
    #---------------------------------------------------------------------------
    
    if (show.cases) {
        
        tt <- cbind(tt, cases)
    }
    
    
    # reorder the truth table by inclusion and/or number of cases
    #---------------------------------------------------------------------------
    if (any(sort.by != "")) {
     
        sort.args <- c("incl", "n")
     
        if (!all(is.na(args.match <- match(sort.by, sort.args)))) {
      
            sort.args <- sort.args[args.match[!is.na(args.match)]]
            consorder <- order(tt[, sort.args[1]], decreasing = decreasing)
      
            if (length(sort.args) == 2) {
       
                consorder <- order(tt[, sort.args[1]], tt[, sort.args[2]], 
                                   decreasing = decreasing)
            }
      
            tt <- tt[consorder, ]
            line.tt <- line.tt[consorder]
        }
    }
    
    # collect essentials
    #---------------------------------------------------------------------------
    
    x <- list(tt = tt, indexes = sort(unique(line.data)), noflevels = noflevels, 
              initial.data = initial.data, recoded.data = data, cases = cases, 
              outcome <- original.outcome, neg.out = neg.out, n.cut = n.cut, 
              incl.cut1 = incl.cut1, incl.cut0 = incl.cut0,
              inf.test = statistical.testing  
              )
    
    if (any(excluded)) {
       
        x$excluded.data <- excluded.data
    }
    
    x$tt$incl[is.na(x$tt$incl)] <- "-"
    x$tt$PRI[is.na(x$tt$PRI)] <- "-"
    
    PRI <- FALSE
    if ("PRI" %in% names(other.args)) {
        if (is.logical(other.args$PRI)) {
            PRI <- other.args$PRI[1]
        }
    }
    
    x$PRI <- PRI
    x$call <- metacall
    x$opts$use.letters <- use.letters
    
    return(structure(x, class = "tt"))
}
AlrikThiem/QCApro documentation built on May 5, 2019, 4:55 a.m.