#' Predict outcome of new data
#'
#' You can use `Predict_naBa` with any data frame.
#'
#' @param prior Prior information. (output of Info_prior())
#' @param newdata with the same format as prior dataset. (Output of prep())
#' @param type Type of outcome you want: "Class" prediction or "Raw" probabilities.
#' @param eps A small number to specify an epsilon-range for Laplace smoothing; default=0.
#' @param threshold Replace cells value under epsilon range; default=0.001.
#' @return
#' `output`: If type="class", returns a vector of predicted classes ;
#' If type="raw", returns a matrix of conditional probabilities.
#' @export
#' @examples
#' data(mood)
#' x=mood[,1:5]
#' y=mood[,6]
#' prior=Info_prior(x,y) #Laplace=0 as default
#' newdata=mood[1:200,1:5]
#' myresult=predict_naBa(prior,newdata,"raw")
predict_naBa=function(prior,newdata, type = c("class", "raw"),eps=0,threshold=0.001){
newdata=as.data.frame(newdata)
## fix factor variables to be identical with training data
attribs <- match(prior$var_names, names(newdata))
ppd_data=data.frame(matrix(nrow=nrow(newdata),ncol=length(attribs)))
not_na=which(!is.na(attribs))
ppd_data[,not_na]=newdata[,attribs[not_na]]
colnames(ppd_data)=prior$var_names
ny=length(prior$apriori)
newdata=ppd_data
num_var=prior$numvar_names
cat_var=prior$catvar_names
if (length(num_var)==0){
newdata_cat=as.matrix(newdata[,cat_var])
prob.cat=prob_cat(length(prior$apriori),names(prior$apriori),newdata_cat,prior$tables[colnames(newdata_cat)])
prob.cat=lapply(prob.cat,function(x) {
apply(x,2,function(a){
a[a<=eps]=threshold
a=log(a)
return (a)})})
numerator=as.data.frame(lapply(prob.cat,rowSums))}
else if (length(cat_var)==0){
newdata_num=as.matrix(newdata[,num_var])
prob.num=prob_num(length(prior$apriori),names(prior$apriori),newdata_num,prior$tables[colnames(newdata_num)])
prob.num=lapply(prob.num,function(x) {
apply(x,2,function(a){
a[a==0]=1
a[a<=eps]=threshold
a=log(a)
return (a)})})
numerator=as.data.frame(lapply(prob.num,rowSums))}
else{
newdata_num=as.matrix(newdata[,num_var])
newdata_cat=as.matrix(newdata[,cat_var])
prob.cat=prob_cat(length(prior$apriori),names(prior$apriori),newdata_cat,prior$tables[colnames(newdata_cat)])
prob.num=prob_num(length(prior$apriori),names(prior$apriori),newdata_num,prior$tables[colnames(newdata_num)])
prob.cat=lapply(prob.cat,function(x) {
apply(x,2,function(a){
a[a<=eps]=threshold
a=log(a)
return (a)})})
prob.num=lapply(prob.num,function(x) {
apply(x,2,function(a){
a[a==0]=1
a[a<=eps]=threshold
a=log(a)
return (a)})})
foo <- function(x, y) {list(x+y)}
numerator=as.data.frame(mapply(foo, lapply(prob.cat,rowSums), lapply(prob.num,rowSums)))
}
numerator=numerator+t(matrix(log(prior$apriori),ny,nrow(newdata)))
output=sapply(1:ny,function(y){ 1/rowSums(exp(numerator - numerator[,y]))})
if (type == "class"){output=as.factor(names(prior$apriori)[apply(output, 1, which.max)])}
else {colnames(output)=names(prior$apriori)}
return (output)
}
#' @export
#' @rdname predict.naBa
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.