#' Naive Bayes Classifier
#'
#' A function of Naive Bayes Classifier for numeric only independents.
#'
#' @param fit a data.table class modelling dataset; only support numeric independents.
#' @param newdata a data.table class dataset with target variable; only support numeric independents.
#' @param threshold a positive numeric, usually small; if the variance of independent is 0, then use this value to replace it to avoid perfect division.
#'
#' @return a list of modeling result. It contains the following components:
#' predict: a prediction result table.
#'
#' @export
predict_naive_bn=function(fit,newdata,threshold){
#newdata must be data.table class WITH TARGET VARIABLE
#newdata=data
#threshold=0.01 # replace 0 sd
zero.prob=1e-4 # replace 0 prob
newdata=newdata[,!fit$target,with=F]
names.ind=names(newdata)
n=ncol(newdata)
level.target=fit$prior$target
prob=vector("list",length(level.target))
if(fit$method=="exp"){
for (i in 1:length(level.target)){
#i=2
prob.ind=vector("list",n)
prior.target=fit$prior$prob[i]
for (j in 1:length(names.ind)){
#j=17
tp=names.ind[j]
if (fit$overall.mean[[tp]]!=0) lbd=1/fit$overall.mean[[tp]] else
lbd=1/threshold
index=fit$group.mean[[fit$target]]==level.target[i]
if (fit$group.mean[[tp]][index]!=0) lbd.cond=1/fit$group.mean[[tp]][index] else
lbd.cond=1/threshold
test=dexp(newdata[[tp]],rate=lbd)
if (sum(test==0)==0) {
prob.ind[[j]]=dexp(newdata[[tp]],rate=lbd.cond)/test
}else {
test[test==0]=zero.prob
prob.ind[[j]]=dexp(newdata[[tp]],rate=lbd.cond)/test
}
}
# compute target prob for level.target[i]
prob.ind.matrix=do.call("rbind",prob.ind)
prob[[i]]=apply(prob.ind.matrix,2,prod)*prior.target
}
}else if (fit$method=="normal") {
for (i in 1:length(level.target)){
#i=1
prob.ind=vector("list",n)
prior.target=fit$prior$prob[i]
for (j in 1:length(names.ind)){
#j=1
tp=names.ind[j]
if (fit$overall.sd[[tp]]!=0) sd.overall=fit$overall.sd[[tp]] else
sd.overall=threshold
mean.overall=fit$overall.mean[[tp]]
index=fit$group.mean[[fit$target]]==level.target[i]
if (fit$group.sd[[tp]][index]!=0) sd.cond=fit$group.sd[[tp]][index] else
sd.cond=threshold
mean.cond=fit$group.mean[[tp]][index]
test=dnorm(newdata[[tp]],mean=mean.overall,sd=sd.overall)
if(sum(test==0)==0){
prob.ind[[j]]=dnorm(newdata[[tp]],mean=mean.cond,sd=sd.cond)/test
} else {
test[test==0]=zero.prob
prob.ind[[j]]=dnorm(newdata[[tp]],mean=mean.cond,sd=sd.cond)/test
}
}
# compute target prob for level.target[i]
prob.ind.matrix=do.call("rbind",prob.ind)
prob[[i]]=apply(prob.ind.matrix,2,prod)*prior.target
}
}else if (fit$method=="gamma"){
for (i in 1:length(level.target)){
#i=1
prob.ind=vector("list",n)
prior.target=fit$prior$prob[i]
for (j in 1:length(names.ind)){
#j=1
tp=names.ind[j]
if ((fit$overall.a[[tp]])^2!=0) a.overall=fit$overall.a[[tp]] else
a.overall=sqrt(threshold)
k.overall=fit$overall.k[[tp]]
index=fit$group.a[[fit$target]]==level.target[i]
if ((fit$group.a[[tp]][index])^2!=0) a.cond=fit$group.a[[tp]][index] else
a.cond=sqrt(threshold)
k.cond=fit$group.k[[tp]][index]
test=dgamma(newdata[[tp]],shape=k.overall,scale=a.overall)
if(sum(test==0)==0){
prob.ind[[j]]=dgamma(newdata[[tp]],shape=k.cond,scale=a.cond)/test
} else {
test[test==0]=zero.prob
prob.ind[[j]]=dgamma(newdata[[tp]],shape=k.cond,scale=a.cond)/test
}
}
# compute target prob for level.target[i]
prob.ind.matrix=do.call("rbind",prob.ind)
prob[[i]]=apply(prob.ind.matrix,2,prod)*prior.target
}
}
rm(prob.ind.matrix,prob.ind)
prob.final=do.call("rbind",prob)
rm(prob)
prob.final.sum=apply(prob.final,2,sum)
pred=data.frame(t(sweep(prob.final,2,prob.final.sum,"/")))
rm(prob.final)
names(pred)=level.target
index=prob.final.sum %in% c(0,Inf) | is.nan(prob.final.sum)
temp=data.frame(rep(fit$prior$prob[1],sum(index)),rep(fit$prior$prob[2],sum(index)))
names(temp)=level.target
pred[index,]=temp
return(list(predict=pred))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.