R/error_assessment.R

Defines functions MCE Example Example MCEbinary MCEordinal Discretize ErrConti_byLevel Example MCEordinal_byLevel Example Example EvalModel EvalModel_binMatch EvalModel_ord Example Build_cutMatrix pod_conti FAR_conti FPR_conti Calc_podfar AssessFit_conti CategJointDist Example CategPredErr Example

#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# authors: Alireza Hosseini, Reza Hosseini

MCE = function(y, yPred, levels, lossMatrix) {

  n = length(y)
	loss = 0
	for (i in 1:n) {
		y1 = y[i]
		y2 = yPred[i]
		k1 = which(levels == y1)
		k2 = which(levels == y2)
		loss = loss + lossMatrix[k1, k2]/n
	}
	return(loss)
}


# Example
Example = function() {
	levels = c('L', 'A', 'H', 'E')
	lossMatrix = matrix(1, 4, 4)
	diag(lossMatrix) = 0
	lossMatrix[2,1] = 100
	y = c('L', 'L', 'L', 'A', 'E', 'L', 'A', 'A')
	yPred = y
	yPred[3] = 'H'
	yPred[8] = 'L'

	MCE(y, yPred, levels, lossMatrix)
}

### this one penalizes mistakes in the upper tail more severely
lossMatrixHE = matrix(NA, 4, 4)
for (i in 1:4) {
	for (j in 1:4) {
		lossMatrixHE[i,j] = abs(i-j)
	}
}
#lossMatrixHE[4,] = lossMatrixHE[4,]^2


Example = function() {

	lossMatrixExtr = matrix(NA, 4, 4)
	for (i in 1:4) {
		for (j in 1:4)
		{
			lossMatrixExtr[i,j] = abs(i-j)
		}
	}
	lossMatrixExtr[4,] = lossMatrixExtr[4, ]^2
}

MCEbinary = function(y, yPred) {

  n = length(y)
  loss = 0
  for (i in 1:n) {
	  y1 = y[i]
	  y2 = yPred[i]
	  loss = loss + (y1!=y2)/n
  }
  return(loss)
}

MCEordinal = function(y, yPred, levels) {

	n = length(y)
	loss = 0
	for (i in 1:n) {
		y1 = y[i]
		y2 = yPred[i]
		k1 = which(levels == y1)
		k2 = which(levels == y2)
		loss = loss + abs(k1-k2)/n
	}
	return(loss)
}

#y[8] = "E"
#MCEordinal(y,yPred,levels)

#y[8] = "A"
#MCEordinal(y,yPred,levels)

## function to convert continuous to categorical with given cuts
Discretize = function(y, cuts, levels) {
	n = length(y)
	m = (length(cuts))
	if (is.na(levels[1])) {
		levels = 1:(m-1)
		levels = factor(levels)
	}
	out = rep(NA, n)

	for (i in 1:n) {
		y1 = y[i]
		for (j in 1:(m-1)) {
			if (y1 >= cuts[j] & y1 <= cuts[j+1]) {
        out[i]=levels[j]
      }
		}
	}
	return(out)
}

#y = rnorm(20)
#cuts = c(-Inf,-1,0,1,Inf)
#Discretize(y,cuts,levels=c(1,2,3,4))
#y = rnorm(100)
#cuts = c(-2,-1,0,1,2)
#yCat = Discretize(y,cuts,levels=c(1,2,3,4))



##### continuous error by category
ErrConti_byLevel = function(y, yPred, cuts, tolerance=NA) {

	m = length(cuts)
	levels = factor(1:(m-1))
	yCat = Discretize(y, cuts, levels)
	yCat = as.factor(yCat)
	levels.test = levels

	m = length(levels.test)
	if (is.na(tolerance[1])) {tolerance=rep(0, m)}
	loss_vec = rep(NA, m)
	percent_loss_vec = rep(NA, m)
	bias_vec = rep(NA, m)
	percent_bias_vec = rep(NA, m)
	for (j in 1:m) {
		ind = which(yCat == as.character(levels.test)[j])
		if (length(ind) > 0) {
			y_sub = y[ind]
			yPred_sub = yPred[ind]
			n = length(y_sub)
			loss = 0
			if (n>0) {
				loss = mean(abs(yPred_sub - y_sub))
				bias = mean(yPred_sub - y_sub)

				z_sub = abs(y_sub)
				z_sub = maxComp(z_sub, tolerance[j])
				loss2 = mean(abs((yPred_sub - y_sub)/(z_sub)))
				bias2 = mean((yPred_sub - y_sub)/(z_sub))
			}
			loss_vec[j] = loss
			percent_loss_vec[j] = loss2
			bias_vec[j] = bias
			percent_bias_vec[j] = bias2
		}
	}
	outList = list(
      loss=loss_vec, percent_loss=percent_loss_vec,
		  bias=bias_vec, percent_bias=percent_bias_vec)
	return(outList)
}


Example = function() {

  y = seq(0, 50, by=1)
	y = y + rnorm(y)
	yPred = y + rnorm(length(y))

	yPred[2] = 1000
	yPred[40] = 1000

	cuts = c(-Inf, 0, 5.2, 5.5, 10, 30, Inf)
	ErrConti_byLevel(y, yPred,cuts)

	ErrConti_byLevel(y, yPred, cuts, tolerance=rep(1, length(cuts)))
}

### ordinal loss assessment per category
MCEordinal_byLevel = function(y, yPred, levels, levels.test) {

	m = length(levels.test)
	loss_vec = rep(NA, m)
	for (j in 1:m) {
		ind = which(y == levels.test[j])
		if (length(ind) > 0) {
			y_sub = y[ind]
			yPred_sub = yPred[ind]
			n = length(y_sub)
			loss = 0
			if (n>0) {
				for (i in 1:n) {
					y1 = y_sub[i]
					y2 = yPred_sub[i]
					k1 = which(levels == y1)
					k2 = which(levels == y2)
					loss = loss + abs(k1-k2)/n
				}
			}
			loss_vec[j] = loss
		}
	}
	return(loss_vec)
}

Example = function() {

	y = c("L", "H", "H", "H", "L", "E", "E", "L", "VH", "VH")
	yPred = y
	yPred[1] = "VH"
	yPred[2:3] = 'VH'
	levels = c("L", "H", "VH", "E")
	levels.test = levels
	MCEordinal_byLevel(y, yPred, levels, levels.test)
}


# Example
Example = function() {

	levels = c('L', 'A', 'H', 'E')
	lossMatrix = matrix(1, 4, 4)
	diag(lossMatrix) = 0
	lossMatrix[2, 1] = 100
	y = c('L', 'L', 'L', 'A', 'E', 'L', 'A', 'A')
	yPred = y
	yPred[3] = 'H'
	yPred[8] = 'L'

	MCE(y, yPred, levels, lossMatrix)
}

EvalModel = function(
    y, yPred, cuts=10*log(c(0, 20, 30, 50, Inf), 10)) {

  if (length(y) != length(yPred)) {
		print("Warning! Length mismatch between y and yPred.")
	}
	ind1 = which(is.na(y)==FALSE)
	ind2 = which(is.na(yPred)==FALSE)
	ind = intersect(ind1, ind2)
	y = y[ind]
	yPred = yPred[ind]
	print('cor')
	print(signif(cor(yPred, y), 2))
	print('MSE')
	print(signif(sqrt(mean((y-yPred)^2)), 2))
	yCat = Discretize(y, cuts, levels=c(1, 2, 3, 4))
	yPred_cat = Discretize(yPred, cuts, levels=c(1, 2, 3, 4))
	print('MCEbinary')
	print(signif(MCEbinary(yCat, yPred_cat), 2))
	print('MCE ordinal')
	print(signif(MCEordinal(yCat, yPred_cat, levels=c(1, 2, 3, 4)), 2))
	print('MCE ordinal Non Homogeneous')
	print(signif(MCE(yCat, yPred_cat, levels=c(1, 2, 3, 4), lossMatrixExtr), 2))
}

EvalModel_binMatch = function(y, yPred) {

  ind1 = which(!is.na(y))
	ind2 = which(!is.na(yPred))
	ind = intersect(ind1, ind2)
	y = y[ind]
	yPred = yPred[ind]
	print('MCEbinary is')
	sum((yPred != y)) / length(y)
}

EvalModel_ord = function(
  y, yPred, cuts=c(0, 20, 30, 50, 70, Inf)) {

	ind1 = which(!is.na(y))
	ind2 = which(!is.na(yPred))
	ind = intersect(ind1, ind2)
	y = y[ind]
	yPred = yPred[ind]

  levels = (1:(length(cuts)-1))
	yCat = Discretize(y, cuts, levels)
	#print("yCat original")
	#print(yCat)
	yPred_cat = Discretize(yPred, cuts, levels)
	#print("yPred_cat original")
	#print(yPred_cat)
	levelsTest = (levels)
	print('MCE_ord is')
	#plot(yCat,yPred_cat)
	res = MCEordinal_byLevel(yCat, yPred_cat, levels, levelsTest)
	return(res)
}

Example= function() {
	y = runif(100, min=0, max=70)*4 + rnorm(1:100)
	yPred = y + rnorm(100)
	y = abs(y)
	yPred = abs(yPred)
	cuts = c(0, 20, 30, 50, 70,Inf)
	EvalModel_ord(y, yPred, cuts=c(0, 20, 30, 50, 70, Inf))
	plot(y, yPred)
}

Build_cutMatrix = function(cuts) {

  n = length(cuts)-1
	cutMat = matrix(NA, n, 2)
	for (i in 1:n) {
		cutMat[i, ] = c(cuts[i], cuts[i+1])
	}
	return(cutMat)
}

## "Probability of detection" (pod) or  "true positive rate" (TPR): Probability(yPred =i | Y =i)
pod_conti = function(y, yPred, cuts=cuts) {
	cutMat = Build_cutMatrix(cuts)
	n = dim(cutMat)[1]
	out = rep(NA, n)
	for (i in 1:n) {
		cuts = cutMat[i, ]
		m=cuts[1]
		M=cuts[2]
		ind = which(y >= m & y <= M)
		subs = yPred[ind]
		L1 = length(subs)
		out[i] = 1
		if (L1 > 0)
		{
			ind2 = which(subs >= m & subs <= M)
			L2 = length(ind2)
			out[i] = L2/L1
		}
	}
	return(out)
}


## false alarm rate (far) or false discovery rate(FDR):  P(Y= not i |  yPred = i)
FAR_conti = function(y, yPred, cuts=cuts) {

	cutMat = Build_cutMatrix(cuts)
	n = dim(cutMat)[1]
	out = rep(NA, n)
	for (i in 1:n) {
		out[i] = 0
		cuts = cutMat[i, ]
		m=cuts[1]
		M=cuts[2]
		ind = which(yPred>=m & yPred<=M)
		subs = y[ind]
		L1 = length(subs)
		if (L1 > 0) {
			ind2 = which(subs>=m & subs<=M)
			L2 = length(ind2)
			out[i] = 1 - L2/L1
		}
	}
	return(out)
}


## false positive rate:  P(yPred = i | Y = not i)
FPR_conti = function(y, yPred, cuts=cuts) {

  cutMat = Build_cutMatrix(cuts)
	n = dim(cutMat)[1]
	out = rep(NA, n)
	for (i in 1:n) {
		out[i] = 0
		cuts = cutMat[i, ]
		m=cuts[1]
		M=cuts[2]
		ind = which(y < m | y > M)
		subs = yPred[ind]
		L1 = length(subs)
		if (L1 > 0)
		{
			ind2 = which(subs >= m & subs <= M)
			L2 = length(ind2)
			out[i] = L2/L1
		}
	}
	return(out)
}



Calc_podfar = function(yObs, yPred, cuts) {
	pod = pod_conti(yObs, yPred, cuts=cuts)
	far = FAR_conti(yObs, yPred, cuts=cuts)
	outList = list(pod=pod, far=far)
	return(outList)
}



## assessing the fit
AssessFit_conti = function(
    yObs, yPred, minThresh=NA, q=NA, silent=FALSE) {

	y1 = yObs
	y2 = yPred
	y_nonzero = yObs[yObs != 0]
	mean_nonzero = mean(yObs)
	if (is.na(minThresh) & !is.na(q)) {
		minThresh = quantile(y_nonzero,q)
	}
	if (is.na(minThresh) & is.na(q)) {
		minThresh = 0
	}

	d = minThresh
	z1 = maxComp(abs(y1), d)
	x = abs(y1 - y2)
	x = mean(x)
	#x = sqrt(x)
	x = round(x,1)
	MAE = x

	x = abs(y1 - y2)/mean_nonzero
	x = mean(x)*100
	#x = sqrt(x)
	x = round(x, 1)
	PMAE = x

	x = abs(y1 - y2)/z1
	x = mean(x)*100
	#x = sqrt(x)
	x = round(x, 1)

	#print('NMAE (%)')
	#print(x)

	x = abs(y1 - y2)^2
	x = mean(x)
	x = sqrt(x)
	x = round(x, 1)

	#print('rmse')
	#print(x)

	x = abs(y1-y2)^2
	x = mean(x)
	x = sqrt(x)
	x = (x/mean(z1))*100
	x = round(x, 1)

	#print('rmse %')
	#print(x)

	x = (y1-y2)^2
	x = mean(x)
	x = sqrt(x)/mean_nonzero
	x = round(x, 1)*100

	#print('Prmse (%)')
	#print(x)

	corr = round(100*cor(y1, y2), 1)

	if (!silent) {
		print('MAE');
		print(MAE)
		print("average (typical number)");
		print(mean_nonzero)
		print('PMAE (%)');
		print(PMAE)
		print('corr')
		print(corr)
	}

	return(list(MAE=MAE, PMAE=PMAE, corr=corr))
}



### categorical distbn
CategJointDist = function(y, z) {
	ind1 = which(!is.na(y))
	ind2 = which(!is.na(z))
	ind = intersect(ind1, ind2)
	y = y[ind]
	z = z[ind]

	factors1 = levels(y)
	factors2 = levels(z)
	factors = c(factors1, factors2)
	factors = unique(factors)
	L = length(factors)
	P = matrix(NA, L, L)
	for (i in 1:L) {
		for (j in 1:L) {
			faci = factors[i]
			facj = factors[j]
			#print(c(fac_i,fac_j))
			n_ij = sum(y==faci & z==facj)
			#print(n_ij)
			P[i,j] = n_ij/length(y)
		}
	}
	return(list(factors=factors, distbn=P))
}

Example = function() {
	y = 1:10
	z = 2:11
	y = cut(y, c(0, 5, 10))
	z = cut(z, c(0, 5, 10))
	CategJointDist(y, z)
}


CategPredErr = function(y, yPred) {

  res = CategJointDist(y, yPred)
	factors = res[['factors']]
	L = length(factors)
	P = res[['distbn']]
	pod = rep(NA, length(factors))
	far = rep(NA, length(factors))
	FPR = rep(NA, length(factors))

	for (i in 1:L) {
		pod[i] = P[i, i]/(sum(P[i, ]))
		FPR[i] = (sum(P[ , i]) - P[i, i])/(1 - sum(P[i, ]))
		far[i] = (sum(P[ , i]) - P[i, i])/sum(P[, i])
	}
	return(list(pod=pod, FPR=FPR, far=far))
}


Example = function() {

	yObs = c("H", "H", "H", "T", "T")
	yPred = c("H", "T", "H", "H", "H")
	yObs = as.factor(yObs)
	yPred = as.factor(yPred)

	y = 8:17
	z = 1:10

	yObs = cut(y, c(0, 5, 10))
	yPred = cut(z, c(0, 5, 10))

	CategJointDist(yObs, yPred)

	CategPredErr(yObs, yPred)

}
Reza1317/funcly documentation built on Feb. 5, 2020, 4:06 a.m.