R/occ_amount.R

Defines functions MinComp MaxComp Logit LogitInv FitOccAmount Example dBf InvDBf predict.occAmountMod Example PredictOccAmountCalib OptimizePodFar OptimizeCatLoss

#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# authors: Alireza Hosseini, Reza Hosseini

######## Main functions for occurrence amount modelling
MinComp = function(x, y) {
    (x + y - abs(x - y))/2
}

MaxComp = function(x, y) {
    -MinComp(-x, -y)
}

Logit = function(x) {
    out = log(x/(1 - x))
    return(out)
}

LogitInv = function(x) {
    z = exp(x)
    if (z == Inf) {
        return(1)
    } else {
        return((z / (z + 1)))
    }
}

## this function fits an occurrence and amount model to inputs and returns the models
FitOccAmount = function(
    y,
    explanOcc,
    explanAmount,
    amountFamily=NULL,
    amountTransFcn=NULL,
    massPoint=0) {

    yBin = (y != massPoint)
    df = data.frame(explanOcc)
    occMod = glm(yBin ~ ., df, family="binomial")

    ind = which(y != massPoint)
    yNonzero = y[ind]

    explanAmount1 = explanAmount[ind, , drop=FALSE]
    df = data.frame(explanAmount1)

    if (!is.null(amountTransFcn)) {
        yNonzero = amountTransFcn(yNonzero)
    }

    if (is.null(amountFamily)) {
        amountFamily="gaussian"
    }

    #amountMod = glm(yPos~., df , family=amountFamily)
    amountText = paste(
        "amountMod = glm(yNonzero ~ ., df, family=", amountFamily, ")", sep="")
    eval(parse(text=amountText))

    out = list("occ"=occMod, "amount"=amountMod)
    class(out) = "occAmountMod"
    return(out)
}

Example = function() {
    n = 1000
    x1 = rnorm(n)
    x2 = rnorm(n)
    x3 = rnorm(n)
    explanOcc = cbind(x1, x2, x3)
    explanAmount = cbind(x1, x2, x3)
    y = x1 + x2 + 0*x3
    y = exp(y)
    y[x3 < 0] = 0
    res = FitOccAmount(y, explanOcc, explanAmount)
    summary(res[["occ"]])
    summary(res[["amount"]])
}

## Decible units
dBf = function(y) {
    10 * log(y, 10)
}

InvDBf = function(y) {
    10^(y/10)
}


##### prediction model
predict.occAmountMod = function(
    occAmountMod,
    explanOcc,
    explanAmount,
    amountTrans=NULL) {

    occMod = occAmountMod[["occ"]]
    amountMod = occAmountMod[["amount"]]
    explanOcc = data.frame(explanOcc)
    explanAmount = data.frame(explanAmount)

    names1 = names(occMod[["coefficients"]])[-1]
    names2 = names(explanOcc)

    if (length(names1) != length(names2)) {
        print(
            "WARNING: the number of predictors of the model
            and test data predictors
            do not match for OCC")
    }

    if (sum(names1 == names2) != max(length(names1), length(names2))) {
        print("WARNING: OCC names do not match")
    }


    names1 = names(amountMod[["coefficients"]])[-1]
    names2 = names(explanAmount)

    if (length(names1) != length(names2)) {
        print(
            "WARNING: the number of predictors of the
            model and test data predictors
            do not match for AMOUNT")
    }

    if (sum(names1 == names2) != max(length(names1), length(names2))) {
        print("WARNING: amount names do not match")
    }

    occPred = predict(occMod, newdata=data.frame(explanOcc))
    occPred = LogitInv(occPred)

    amountPred = predict(amountMod, newdata=data.frame(explanAmount))
    #amountPred = 10^(amountPred/10)
    if (!is.null(amountTrans)) {
        amountPred=amountTrans(amountPred)
    }

    out = list(occ=occPred, amount=amountPred, expected=occPred*amountPred)
    return(out)
}


Example = function() {
    n = 100
    x1 = sample(-10:10, n, replace=TRUE)
    x2 = abs(rnorm(n))
    x3 = abs(rnorm(n))

    explanOcc = cbind(x1,x2,x3)
    explanAmount = cbind(x1,x2,x3)

    y = x1 + x2 + 0*x3
    y[x3 < 1] = 0

    occAmountMod = FitOccAmount(y, explanOcc, explanAmount)
    summary(occAmountMod[["occ"]])
    summary(occAmountMod[["amount"]])

    res = predict(occAmountMod, explanOcc, explanAmount)
    plot(y, res[["expected"]])

    colnames(explanOcc) = c("z1", "z2", "z3")
    res = predict(occAmountMod, explanOcc, explanAmount)
    plot(y,res[["expected"]])

    ### simulation

    n = 10000
    x1 = sample(0:20, n, replace=TRUE)
    x2 = abs(rnorm(n))
    x3 = abs(rnorm(n))

    occMu = (1/2)*x1 + 0*x2 + 0*x3 - 10
    occProb = LogitInv(occMu)
    amount = 2*x1 + 0*x2 + x3

    for (i in 1:n) {
        occ[i] = rbinom(n=1, size=1, prob=occProb[i])
    }

    y = occ*amount

    plot(x1,y)

    obsData = data.frame(x1, x2, x3, occ=occ, amount=y)

    explanOcc = obsData[ , c("x1", "x2", "x3")]
    explanAmount = explanOcc

    mod = FitOccAmount(
        y, explanOcc, explanAmount, amountFamily=NULL,
        amountTransFcn=NULL, massPoint=0)
}

##### calibration algorithm. This algorithm calibrates the results of the prediction models
##### using probCutoff and amount.trans
## this is parabolic transformation
PredictOccAmountCalib = function(
    probPred,
    amountPred,
    probCutoff=1/2,
    amountPostTrans=c(0, 1, 0),
    expectation=FALSE) {

    if (is.null(amountPostTrans)) {
        amountPostTrans=c(0, 1, 0)
    }

    a1 = amountPostTrans[1]
    a2 = amountPostTrans[2]
    a3 = amountPostTrans[3]
    yBin = probPred > probCutoff
    yPred = yBin*(a1 + a2*amountPred + a3*amountPred^2)

    if (expectation) {
        yPred = probPred*(a1 + a2*amountPred + a3*amountPred^2)
    }
    return(yPred)
}


### This function returns the best possible min_pod given the
### farS with parabolic transformation
OptimizePodFar = function(
    yObs,
    probPred,
    amountPred,
    cuts,
    farThresh=rep(1,length(cuts)-1),
    podThresh=rep(0,length(cuts)-1),
    podInd=length(podThresh)) {

    probVec = seq(0, 1, by = 0.3)
    aGrid = seq(-10, 10, by = 2.5)
    bGrid = seq(-2, 2, by=0.25)
    cGrid = seq(-2, 2, by=0.25)

    search_grid = expand.grid(probVec, aGrid, bGrid, cGrid)
    dim(search_grid)
    optimal_trans =search_grid[1, ]
    value = 0
    n = dim(search_grid)[1]

    optimal_pod = rep(0, length(podThresh))
    optimal_far = rep(1, length(farThresh))

    for (i in 1:n) {
        s_g = search_grid[i, ]
        probCutoff = as.numeric(s_g[1])
        amountPostTrans = as.numeric(as.vector(c(s_g[2], s_g[3], s_g[4])))

        yPred = PredictOccAmountCalib(
            probPred, amountPred, probCutoff, amountPostTrans)

        #predict_occ_prob_amount
        #yPred2 = MinComp(yPred,60)
        #yPred = sqrt(yPred*yPred2)

        res = calc_podfar(yObs, yPred, cuts)

        pod1 = res[["pod"]]
        far1 = res[["far"]]

        min_pod = min(pod1[podInd])

        if (
            (min_pod > value) &
            (sum(far1 < farThresh) == length(far1)) &
            (sum(pod1 > podThresh) == length(pod1))) {
            optimal_trans=s_g
            optimal_pod = pod1
            optimal_far=far1
            value=min_pod
        }
        #print(optimal_trans)
    }
    optimal_trans
    pod1 = round(optimal_pod*100)
    far1 = round(optimal_far*100)
    out = list(pod=pod1, far=far1, trans=optimal_trans)
    return(out)
}


## optimizing categorical loss by transformations
OptimizeCatLoss = function(
    yObs,
    probPred,
    amountPred,
    cuts,
    catInd=(1:(length(cuts)-1)),
    upperBound=NULL,
    tol=NULL,
    percent_err=TRUE,
    expectation=NA) {
    #predict_occ_prob_amount
    if (!is.null(upperBound)) {
        for (i in 1:length(amountPred)) {
            if (amountPred[i] > upperBound) {
                amountPred[i] = (amountPred[i]*upperBound)^(1/2)
            }
        }
    }

    ## define tolerance vector across the board
    tolerance = rep(tol,length(cuts)-1)

    #probVec = seq(0.4, 0.6, by = 0.1)
    probVec = 0.5
    aGrid = seq(6,15,by = 2)
    bGrid = seq(0.7,1.5,by=0.2)
    cGrid = seq(0,0.4,by=0.2)
    #cGrid = 0

    search_grid = expand.grid(probVec,aGrid,bGrid,cGrid)
    dim(search_grid)
    optimal_trans = rep(NA,4)
    value = Inf
    N = dim(search_grid)[1]

    optimalCatLoss = rep(Inf, length(cuts))
    opt = NULL

    for (i in 1:N) {
        s_g = search_grid[i,]
        probCutoff = as.numeric(s_g[1])
        amountPostTrans = as.numeric(as.vector(c(s_g[2], s_g[3], s_g[4])))

        yPred = PredictOccAmountCalib(
            probPred=probPred,
            amountPred=amountPred,
            probCutoff=probCutoff,
            amountPostTrans=amountPostTrans,
            expectation=expectation)

        res = errConti_byLevel(
          y=yObs,
          yPred=yPred,
          cuts=cuts,
          tolerance=tolerance)

        err = res[[1]]
        if (percent_err) {
            err = res[[2]]
        }

        max_err = max(err[catInd])

        if (!is.na(max_err)) {
            if (max_err < value) {
                optimal_trans = s_g
                optimalCatLoss = err
                value=max_err;
                opt=res
            }
        }
    }

    out = list(err=optimalCatLoss, opt=opt, trans=optimal_trans)
    return(out)
}
Reza1317/funcly documentation built on Feb. 5, 2020, 4:06 a.m.