R/calibrate.R
In QCA: Qualitative Comparative Analysis

# Copyright (c) 2016 - 2020, Adrian Dusa
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, in whole or in part, are permitted provided that the
# following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * The names of its contributors may NOT be used to endorse or promote products
#       derived from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL ADRIAN DUSA BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

`calibrate` <-
function (x, type = "fuzzy", method = "direct", thresholds = NA,
          logistic = TRUE, idm = 0.95, ecdf = FALSE, below = 1, above = 1, ...) {
    other.args <- list(...)
    funargs <- lapply(lapply(match.call(), deparse)[-1], function(x) gsub("\"|[[:space:]]", "", x))
    if (is.element("q", names(other.args))) {
        above <- other.args$q
    }
    if (is.element("p", names(other.args))) {
        below <- other.args$p
    }
    if (admisc::possibleNumeric(x)) {
        x <- admisc::asNumeric(x) 
    }
    else {
        if (grepl("[$]", funargs$x) & is.null(x)) {
            x <- unlist(strsplit(funargs$x, split = "\\$"))
            cat("\n")
            stop(simpleError(sprintf("There is no column \"%s\" in the dataframe %s.\n\n", x[2], x[1])))
        }
        cat("\n")
        stop(simpleError("The input is not numeric.\n\n"))
    }
    if (!is.element(type, c("crisp", "fuzzy"))) {
        cat("\n")
        stop(simpleError("Incorrect calibration type.\n\n"))
    }
    if (!is.element(method, c("direct", "indirect", "TFR"))) {
        cat("\n")
        stop(simpleError("Incorrect calibration method.\n\n"))
    }
    if (method != "TFR") {
        if(all(is.na(thresholds))) {
            cat("\n")
            stop(simpleError("Threshold value(s) not specified.\n\n"))
        }
        if (is.character(thresholds) & length(thresholds) == 1) {
            thresholds <- admisc::splitstr(thresholds)
        }
        if (admisc::possibleNumeric(thresholds)) {
            nmsths <- NULL
            if (!is.null(names(thresholds))) {
                nmsths <- names(thresholds)
            }
            thresholds <- admisc::asNumeric(thresholds)
            names(thresholds) <- nmsths
        }
        else {
            cat("\n")
            stop(simpleError("Thresholds must be numeric.\n\n"))
        }
    }
    if (type == "crisp") {
        if (any(thresholds < min(x) | thresholds > max(x))) {
            cat("\n")
            stop(simpleError("Threshold value(s) outside the range of x.\n\n"))
        }
        if (!is.null(names(thresholds))) {
            cat("\n")
            stop(simpleError("Named thresholds require fuzzy type calibration.\n\n"))
        }
        thresholds <- sort(thresholds)
        return(findInterval(x, thresholds))
    }
    else if (type == "fuzzy") {
        check.equal <- function(x, y) {
            check.vector <- as.logical(unlist(lapply(x, all.equal, y)))
            check.vector[is.na(check.vector)] <- FALSE
            return(check.vector)
        }
        lth <- length(thresholds)
        nth <- names(thresholds)
        if (method == "direct") {
            if (lth != 3 & lth != 6) {
                cat("\n")
                stop(simpleError("For fuzzy direct calibration, there should be either 3 or 6 thresholds\".\n\n"))
            }
            if (idm <= 0.5 | idm >= 1) {
                cat("\n")
                stop(simpleError("The inclusion degree of membership has to be bigger than 0.5 and less than 1.\n\n"))
            }
            if (lth == 3) {
                if (!is.null(names(thresholds))) {
                    if (length(unique(nth)) == sum(is.element(nth, c("e", "c", "i")))) {
                        thresholds <- thresholds[match(c("e", "c", "i"), nth)]
                    }
                }
                thresholds <- as.vector(thresholds)
                thEX <- thresholds[1]
                thCR <- thresholds[2]
                thIN <- thresholds[3]
                if (logistic) {
                    if (thresholds[1] > thresholds[3]) {
                        thEX <- thresholds[3]
                        thIN <- thresholds[1]
                    }
                    y <- (x < thCR) + 1
                    fs <- 1/(1 + exp(-((x - thCR) * (c(1, -1)[y]*log(idm/(1 - idm))/(c(thIN, thEX)[y] - thCR)))))
                    if (thresholds[1] > thresholds[3]) {
                        fs <- 1 - fs
                    }
                }
                else {
                    if (any(table(c(thEX, thCR, thIN)) > 1)) {
                        cat("\n")
                        warning(simpleWarning("Some thresholds equal, that should not be equal.\n\n"))
                    }
                    if (above <= 0 | below <= 0) {
                        cat("\n")
                        stop(simpleError("Arguments \"above\" and \"below\" should be positive.\n\n"))
                    }
                    increasing <- TRUE
                    if (thIN < thCR & thCR < thEX) {
                        increasing <- FALSE
                    }      
                    if (ecdf) {
                        ecdfx <- x[-which(x < min(thresholds))]
                        ecdfx <- ecdfx[-which(ecdfx > max(thresholds))]
                        Fn <- ecdf(ecdfx)
                    }
                    fs <- rep(NA, length(x))    
                    for (i in seq(length(x))) {
                        if (increasing) {
                            if (x[i] < thEX | check.equal(x[i], thEX)) {
                                fs[i] <- 0
                            }
                            else if (x[i] < thCR | check.equal(x[i], thCR)) {
                                fs[i] <- (((thEX - x[i])/(thEX - thCR))^below)/2
                                if (ecdf) {
                                    fs[i] <- (Fn(x[i])/Fn(thCR))/2
                                }
                            }
                            else if (x[i] < thIN | check.equal(x[i], thIN)) {
                                fs[i] <- 1 - (((thIN - x[i])/(thIN - thCR))^above)/2
                                if (ecdf) {
                                    fs[i] <- 1 - ((1 - Fn(x[i]))/(1 - Fn(thCR)))/2
                                }
                            }
                            else {
                                fs[i] <- 1
                            }
                        }
                        else {
                            if (x[i] < thIN | check.equal(x[i], thIN)) {
                                fs[i] <- 1
                            }
                            else if (x[i] < thCR | check.equal(x[i], thCR)) {
                                fs[i] <- 1 - (((thIN - x[i])/(thIN - thCR))^above)/2
                                if (ecdf) {
                                    fs[i] <- 1 - (Fn(x[i])/Fn(thCR))/2
                                }
                            }
                            else if (x[i] < thEX | check.equal(x[i], thEX)) {
                                fs[i] <- (((thEX - x[i])/(thEX - thCR))^below)/2
                                if (ecdf) {
                                    fs[i] <- ((1 - Fn(x[i]))/(1 - Fn(thCR)))/2
                                }
                            }
                            else {
                                fs[i] <- 0
                            }
                        }
                    }
                }
            }
            else { 
                if (!is.null(nth)) {
                    if (length(unique(nth)) == sum(is.element(nth, c("e1", "c1", "i1", "i2", "c2", "e2")))) {
                        thresholds <- thresholds[match(c("e1", "c1", "i1", "i2", "c2", "e2"), nth)]
                    }
                }
                thresholds <- as.vector(thresholds)
                thEX1 <- thresholds[1]
                thCR1 <- thresholds[2]
                thIN1 <- thresholds[3]
                thIN2 <- thresholds[4]
                thCR2 <- thresholds[5]
                thEX2 <- thresholds[6]
                if (thCR1 < min(thEX1, thIN1) | thCR1 > max(thEX1, thIN1)) {
                    cat("\n")
                    stop(simpleError("First crossover threshold not between first exclusion and inclusion thresholds.\n\n"))
                }
                if (thCR2 < min(thEX2, thIN2) | thCR2 > max(thEX2, thIN2)) {
                    cat("\n")
                    stop(simpleError("Second crossover threshold not between second exclusion and inclusion thresholds.\n\n"))
                }
                somequal <- FALSE
                if (any(table(c(thEX1, thCR1, thIN1)) > 1) | any(table(c(thIN2, thCR2, thEX2)) > 1) | thCR1 == thCR2) {
                    somequal <- TRUE
                }  
                increasing <- TRUE
                if (thIN1 < thCR1 & thCR1 < thEX1 & thEX1 <= thEX2 & thEX2 < thCR2 & thCR2 < thIN2) {
                    increasing <- FALSE
                }
                if (increasing) {
                    if (thEX1 == thEX2) {
                        somequal <- TRUE
                    }
                }
                else {
                    if (thIN1 == thIN2) {
                        somequal <- TRUE
                    }
                }
                if (somequal) {
                    cat("\n")
                    stop(simpleError("Some thresholds equal, that should not be equal.\n\n"))
                }
                if (above <= 0 | below <= 0) {
                    cat("\n")
                    stop(simpleError("Arguments \"above\" and \"below\" should be positive.\n\n"))
                }
                fs <- rep(NA, length(x))
                for (i in seq(length(x))) {
                    if (increasing) {
                        if (x[i] < thEX1 | check.equal(x[i], thEX1)) {
                            fs[i] <- 0
                        }
                        else if (x[i] < thCR1 | check.equal(x[i], thCR1)) {
                            fs[i] <- (((thEX1 - x[i])/(thEX1 - thCR1))^below)/2
                        }
                        else if (x[i] < thIN1) {
                            fs[i] <- 1 - (((thIN1 - x[i])/(thIN1 - thCR1))^above)/2
                        }
                        else if (x[i] < thIN2 | check.equal(x[i], thIN2)) {
                            fs[i] <- 1
                        }
                        else if (x[i] < thCR2 | check.equal(x[i], thCR2)) {
                            fs[i] <- 1 - (((thIN2 - x[i])/(thIN2 - thCR2))^above)/2
                        }
                        else if (x[i] < thEX2 | check.equal(x[i], thEX2)) {
                            fs[i] <- (((thEX2 - x[i])/(thEX2 - thCR2))^below)/2
                        }
                        else {
                            fs[i] <- 0
                        }
                    }
                    else {
                        if (x[i] < thIN1 | check.equal(x[i], thIN1)) {
                            fs[i] <- 1
                        }
                        else if (x[i] < thCR1 | check.equal(x[i], thCR1)) {
                            fs[i] <- 1 - (((thIN1 - x[i])/(thIN1 - thCR1))^above)/2
                        }
                        else if (x[i] < thEX1) {
                            fs[i] <- (((thEX1 - x[i])/(thEX1 - thCR1))^below)/2
                        }
                        else if (x[i] < thEX2 | check.equal(x[i], thEX2)) {
                            fs[i] <- 0
                        }
                        else if (x[i] < thCR2 | check.equal(x[i], thCR2)) {
                            fs[i] <- (((thEX2 - x[i])/(thEX2 - thCR2))^below)/2
                        }
                        else if (x[i] < thIN2 | check.equal(x[i], thIN2)) {
                            fs[i] <- 1 - (((thIN2 - x[i])/(thIN2 - thCR2))^above)/2
                        }
                        else {
                            fs[i] <- 1
                        }
                    }
                }
            }
            fs[fs < 0.0001] <- 0
            fs[fs > 0.9999] <- 1
            return(fs)
        }
        else if (method == "indirect") {
            thresholds <- sort(thresholds)
            values <- round(seq(0, 1, by = 1 / length(thresholds)), 3)
            y <- rep(0, length(x))
            for (i in seq(length(thresholds))) {
                y[x > thresholds[i]] = values[i + 1]
            }
            x[x == 0] <- 0.00001
            fracpol <- glm(y ~ log(x) + I(x^(1/2)) + I(x^1) + I(x^2), family = quasibinomial(logit))
            fs <- round(unname(predict(fracpol, type = "response")), 6)
            fs[fs < 0.0001] <- 0
            fs[fs > 0.9999] <- 1
            return(fs)
        }
        else if (method == "TFR") {
            E <- ecdf(x)
            return(pmax(0, (E(x) - E(1)) / (1 - E(1))))
        }
    }
}