#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# authors: Alireza Hosseini, Reza Hosseini
######## Main functions for occurrence amount modelling
MinComp = function(x, y) {
(x + y - abs(x - y))/2
}
MaxComp = function(x, y) {
-MinComp(-x, -y)
}
Logit = function(x) {
out = log(x/(1 - x))
return(out)
}
LogitInv = function(x) {
z = exp(x)
if (z == Inf) {
return(1)
} else {
return((z / (z + 1)))
}
}
## this function fits an occurrence and amount model to inputs and returns the models
FitOccAmount = function(
y,
explanOcc,
explanAmount,
amountFamily=NULL,
amountTransFcn=NULL,
massPoint=0) {
yBin = (y != massPoint)
df = data.frame(explanOcc)
occMod = glm(yBin ~ ., df, family="binomial")
ind = which(y != massPoint)
yNonzero = y[ind]
explanAmount1 = explanAmount[ind, , drop=FALSE]
df = data.frame(explanAmount1)
if (!is.null(amountTransFcn)) {
yNonzero = amountTransFcn(yNonzero)
}
if (is.null(amountFamily)) {
amountFamily="gaussian"
}
#amountMod = glm(yPos~., df , family=amountFamily)
amountText = paste(
"amountMod = glm(yNonzero ~ ., df, family=", amountFamily, ")", sep="")
eval(parse(text=amountText))
out = list("occ"=occMod, "amount"=amountMod)
class(out) = "occAmountMod"
return(out)
}
Example = function() {
n = 1000
x1 = rnorm(n)
x2 = rnorm(n)
x3 = rnorm(n)
explanOcc = cbind(x1, x2, x3)
explanAmount = cbind(x1, x2, x3)
y = x1 + x2 + 0*x3
y = exp(y)
y[x3 < 0] = 0
res = FitOccAmount(y, explanOcc, explanAmount)
summary(res[["occ"]])
summary(res[["amount"]])
}
## Decible units
dBf = function(y) {
10 * log(y, 10)
}
InvDBf = function(y) {
10^(y/10)
}
##### prediction model
predict.occAmountMod = function(
occAmountMod,
explanOcc,
explanAmount,
amountTrans=NULL) {
occMod = occAmountMod[["occ"]]
amountMod = occAmountMod[["amount"]]
explanOcc = data.frame(explanOcc)
explanAmount = data.frame(explanAmount)
names1 = names(occMod[["coefficients"]])[-1]
names2 = names(explanOcc)
if (length(names1) != length(names2)) {
print(
"WARNING: the number of predictors of the model
and test data predictors
do not match for OCC")
}
if (sum(names1 == names2) != max(length(names1), length(names2))) {
print("WARNING: OCC names do not match")
}
names1 = names(amountMod[["coefficients"]])[-1]
names2 = names(explanAmount)
if (length(names1) != length(names2)) {
print(
"WARNING: the number of predictors of the
model and test data predictors
do not match for AMOUNT")
}
if (sum(names1 == names2) != max(length(names1), length(names2))) {
print("WARNING: amount names do not match")
}
occPred = predict(occMod, newdata=data.frame(explanOcc))
occPred = LogitInv(occPred)
amountPred = predict(amountMod, newdata=data.frame(explanAmount))
#amountPred = 10^(amountPred/10)
if (!is.null(amountTrans)) {
amountPred=amountTrans(amountPred)
}
out = list(occ=occPred, amount=amountPred, expected=occPred*amountPred)
return(out)
}
Example = function() {
n = 100
x1 = sample(-10:10, n, replace=TRUE)
x2 = abs(rnorm(n))
x3 = abs(rnorm(n))
explanOcc = cbind(x1,x2,x3)
explanAmount = cbind(x1,x2,x3)
y = x1 + x2 + 0*x3
y[x3 < 1] = 0
occAmountMod = FitOccAmount(y, explanOcc, explanAmount)
summary(occAmountMod[["occ"]])
summary(occAmountMod[["amount"]])
res = predict(occAmountMod, explanOcc, explanAmount)
plot(y, res[["expected"]])
colnames(explanOcc) = c("z1", "z2", "z3")
res = predict(occAmountMod, explanOcc, explanAmount)
plot(y,res[["expected"]])
### simulation
n = 10000
x1 = sample(0:20, n, replace=TRUE)
x2 = abs(rnorm(n))
x3 = abs(rnorm(n))
occMu = (1/2)*x1 + 0*x2 + 0*x3 - 10
occProb = LogitInv(occMu)
amount = 2*x1 + 0*x2 + x3
for (i in 1:n) {
occ[i] = rbinom(n=1, size=1, prob=occProb[i])
}
y = occ*amount
plot(x1,y)
obsData = data.frame(x1, x2, x3, occ=occ, amount=y)
explanOcc = obsData[ , c("x1", "x2", "x3")]
explanAmount = explanOcc
mod = FitOccAmount(
y, explanOcc, explanAmount, amountFamily=NULL,
amountTransFcn=NULL, massPoint=0)
}
##### calibration algorithm. This algorithm calibrates the results of the prediction models
##### using probCutoff and amount.trans
## this is parabolic transformation
PredictOccAmountCalib = function(
probPred,
amountPred,
probCutoff=1/2,
amountPostTrans=c(0, 1, 0),
expectation=FALSE) {
if (is.null(amountPostTrans)) {
amountPostTrans=c(0, 1, 0)
}
a1 = amountPostTrans[1]
a2 = amountPostTrans[2]
a3 = amountPostTrans[3]
yBin = probPred > probCutoff
yPred = yBin*(a1 + a2*amountPred + a3*amountPred^2)
if (expectation) {
yPred = probPred*(a1 + a2*amountPred + a3*amountPred^2)
}
return(yPred)
}
### This function returns the best possible min_pod given the
### farS with parabolic transformation
OptimizePodFar = function(
yObs,
probPred,
amountPred,
cuts,
farThresh=rep(1,length(cuts)-1),
podThresh=rep(0,length(cuts)-1),
podInd=length(podThresh)) {
probVec = seq(0, 1, by = 0.3)
aGrid = seq(-10, 10, by = 2.5)
bGrid = seq(-2, 2, by=0.25)
cGrid = seq(-2, 2, by=0.25)
search_grid = expand.grid(probVec, aGrid, bGrid, cGrid)
dim(search_grid)
optimal_trans =search_grid[1, ]
value = 0
n = dim(search_grid)[1]
optimal_pod = rep(0, length(podThresh))
optimal_far = rep(1, length(farThresh))
for (i in 1:n) {
s_g = search_grid[i, ]
probCutoff = as.numeric(s_g[1])
amountPostTrans = as.numeric(as.vector(c(s_g[2], s_g[3], s_g[4])))
yPred = PredictOccAmountCalib(
probPred, amountPred, probCutoff, amountPostTrans)
#predict_occ_prob_amount
#yPred2 = MinComp(yPred,60)
#yPred = sqrt(yPred*yPred2)
res = calc_podfar(yObs, yPred, cuts)
pod1 = res[["pod"]]
far1 = res[["far"]]
min_pod = min(pod1[podInd])
if (
(min_pod > value) &
(sum(far1 < farThresh) == length(far1)) &
(sum(pod1 > podThresh) == length(pod1))) {
optimal_trans=s_g
optimal_pod = pod1
optimal_far=far1
value=min_pod
}
#print(optimal_trans)
}
optimal_trans
pod1 = round(optimal_pod*100)
far1 = round(optimal_far*100)
out = list(pod=pod1, far=far1, trans=optimal_trans)
return(out)
}
## optimizing categorical loss by transformations
OptimizeCatLoss = function(
yObs,
probPred,
amountPred,
cuts,
catInd=(1:(length(cuts)-1)),
upperBound=NULL,
tol=NULL,
percent_err=TRUE,
expectation=NA) {
#predict_occ_prob_amount
if (!is.null(upperBound)) {
for (i in 1:length(amountPred)) {
if (amountPred[i] > upperBound) {
amountPred[i] = (amountPred[i]*upperBound)^(1/2)
}
}
}
## define tolerance vector across the board
tolerance = rep(tol,length(cuts)-1)
#probVec = seq(0.4, 0.6, by = 0.1)
probVec = 0.5
aGrid = seq(6,15,by = 2)
bGrid = seq(0.7,1.5,by=0.2)
cGrid = seq(0,0.4,by=0.2)
#cGrid = 0
search_grid = expand.grid(probVec,aGrid,bGrid,cGrid)
dim(search_grid)
optimal_trans = rep(NA,4)
value = Inf
N = dim(search_grid)[1]
optimalCatLoss = rep(Inf, length(cuts))
opt = NULL
for (i in 1:N) {
s_g = search_grid[i,]
probCutoff = as.numeric(s_g[1])
amountPostTrans = as.numeric(as.vector(c(s_g[2], s_g[3], s_g[4])))
yPred = PredictOccAmountCalib(
probPred=probPred,
amountPred=amountPred,
probCutoff=probCutoff,
amountPostTrans=amountPostTrans,
expectation=expectation)
res = errConti_byLevel(
y=yObs,
yPred=yPred,
cuts=cuts,
tolerance=tolerance)
err = res[[1]]
if (percent_err) {
err = res[[2]]
}
max_err = max(err[catInd])
if (!is.na(max_err)) {
if (max_err < value) {
optimal_trans = s_g
optimalCatLoss = err
value=max_err;
opt=res
}
}
}
out = list(err=optimalCatLoss, opt=opt, trans=optimal_trans)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.