R/predict.GAMens.R

#' Predicts from a fitted GAMens object (i.e., GAMbag, GAMrsm or GAMens
#' classifier).
#'
#' Generates predictions (classes and class membership probabilities) for
#' observations in a dataframe using a GAMens object (i.e., GAMens, GAMrsm or
#' GAMbag classifier).
#'
#'
#' @param object fitted model object of \code{GAMens} class.
#' @param data data frame with observations to genenerate predictions for.
#' @param ...  further arguments passed to or from other methods.
#' @return An object of class \code{predict.GAMens}, which is a list with the
#' following components: \item{pred}{the class membership probabilities
#' generated by the ensemble classifier. } \item{class}{the classes predicted
#' by the ensemble classifier. } \item{conf}{the confusion matrix which
#' compares the real versus predicted class memberships, based on the
#' \code{class} object. Obtains value \code{NULL} if the testdata is unlabeled.
#' }
#' @author Koen W. De Bock \email{kdebock@@audencia.com}, Kristof Coussement
#' \email{K.Coussement@@ieseg.fr} and Dirk Van den Poel
#' \email{Dirk.VandenPoel@@ugent.be}
#' @seealso \code{\link{GAMens}}, \code{\link{GAMens.cv}}
#' @references De Bock, K.W. and Van den Poel, D. (2012):
#' "Reconciling Performance and Interpretability in Customer Churn Prediction Modeling
#' Using Ensemble Learning Based on Generalized Additive Models".
#' Expert Systems With Applications, Vol 39, 8, pp. 6816--6826.
#'
#' De Bock, K. W., Coussement, K. and Van den Poel, D. (2010):
#' "Ensemble Classification based on generalized additive models".
#' Computational Statistics & Data Analysis, Vol 54, 6, pp. 1535--1546.
#'
#' Breiman, L. (1996): "Bagging predictors". Machine Learning, Vol 24, 2, pp.
#' 123--140.
#'
#' Hastie, T. and Tibshirani, R. (1990): "Generalized Additive Models", Chapman
#' and Hall, London.
#'
#' Ho, T. K. (1998): "The random subspace method for constructing decision
#' forests". IEEE Transactions on Pattern Analysis and Machine Intelligence,
#' Vol 20, 8, pp. 832--844.
#' @keywords models classif
#' @export
#' @import gam caTools mlbench splines
#' @importFrom stats predict
#' @examples
#' ## Load data, mlbench library should be loaded!)
#' library(mlbench)
#' data(Sonar)
#' SonarSub<-Sonar[,c("V1","V2","V3","V4","V5","V6","Class")]
#'
#' ## Select indexes for training set observations
#' idx <- c(sample(1:97,60),sample(98:208,70))
#'
#' ## Train GAMrsm using all variables in Sonar dataset. Generate predictions
#' ## for test set observations.
#' Sonar.GAMrsm <- GAMens(Class~.,SonarSub[idx,], autoform=TRUE, iter=10,
#' bagging=FALSE, rsm=TRUE)
#' Sonar.GAMrsm.predict <- predict(Sonar.GAMrsm,SonarSub[-idx,])
#'
#'
#' ## Load data mlbench library should be loaded!)
#' library(mlbench)
#' data(Ionosphere)
#' IonosphereSub<-Ionosphere[,c("V1","V2","V3","V4","V5","V6","V7","V8","Class")]
#' Ionosphere_s <- IonosphereSub[order(IonosphereSub$Class),]
#'
#' ## Select indexes for training set observations
#' idx <- c(sample(1:97,60),sample(98:208,70))
#'
#'
#' ## Compare test set classification performance of GAMens, GAMrsm and
#' ## GAMbag ensembles, using using 4 nonparametric terms and 2 linear terms in the
#' ## Ionosphere dataset
#' Ionosphere.GAMens <- GAMens(Class~s(V3,4)+s(V4,4)+s(V5,3)+s(V6,5)+V7+V8,
#' IonosphereSub[idx,], autoform=FALSE, iter=10, bagging=TRUE, rsm=TRUE)
#'
#' Ionosphere.GAMens.predict <- predict(Ionosphere.GAMens,
#' IonosphereSub[-idx,])
#'
#' Ionosphere.GAMrsm <- GAMens(Class~s(V3,4)+s(V4,4)+s(V5,3)+s(V6,5)+V7+V8,
#' IonosphereSub[idx,], autoform=FALSE, iter=10, bagging=FALSE, rsm=TRUE)
#'
#' Ionosphere.GAMrsm.predict <- predict(Ionosphere.GAMrsm,
#' IonosphereSub[-idx,])
#'
#' Ionosphere.GAMbag <- GAMens(Class~s(V3,4)+s(V4,4)+s(V5,3)+s(V6,5)+V7+V8,
#' IonosphereSub[idx,], autoform=FALSE, iter=10, bagging=TRUE, rsm=FALSE)
#'
#' Ionosphere.GAMbag.predict <- predict(Ionosphere.GAMbag,
#' IonosphereSub[-idx,])
#'
#' ## Calculate AUCs(for function colAUC, load caTools library)
#' library(caTools)
#' GAMens.auc <- colAUC(Ionosphere.GAMens.predict[[1]],
#' IonosphereSub[-idx,"Class"]=="good", plotROC=FALSE)
#'
#' GAMrsm.auc <- colAUC(Ionosphere.GAMrsm.predict[[1]],
#' Ionosphere[-idx,"Class"]=="good", plotROC=FALSE)
#'
#' GAMbag.auc <- colAUC(Ionosphere.GAMbag.predict[[1]],
#' IonosphereSub[-idx,"Class"]=="good", plotROC=FALSE)
#'
#'
predict.GAMens <-
function(object, data,...)
{
	formula <- object[[2]]
	fusion <- object[[8]]
	depvar <- as.character(formula[[2]])
	iter<-object[[3]]
	n <- length(data[,1])
	cutoff <- 0.5
	errors <- 1 - object[[12]]
	target_classes_s <- unique(object$class)[order(unique(object$class))]

	predictions_p <- data.frame(cbind(1:nrow(data)))
	predictions_c <- data.frame(cbind(1:nrow(data)))
	for (m in 1:iter) {
		pred <- predict(object[[1]][[m]],data,type="response")
		pred2 <- as.data.frame(as.numeric(pred))
		predictions_p <- cbind(predictions_p,pred2)
		pred2 <- as.data.frame(cbind(as.numeric(pred) > cutoff)*1)
		predictions_c <- cbind(predictions_c,pred2)

		temp_pred_p <- predictions_p
		temp_pred_p[is.na(temp_pred_p)] <- 0
		if (m > 1) {temp_sums <- rowSums(temp_pred_p[,2:ncol(temp_pred_p)])
			temp_n <- rowSums(!is.na(predictions_p[,2:ncol(temp_pred_p)]))

		}
		else { temp_sums <- temp_pred_p[,2:ncol(temp_pred_p)]
			temp_n <- !is.na(predictions_p[,2:ncol(temp_pred_p)])*1
		}
	}

	temp_pred_p <- predictions_p
	temp_pred_p[is.na(temp_pred_p)] <- 0
	temp_pred_c <- predictions_c
	temp_pred_c[is.na(temp_pred_c)] <- 0
	temp_sums <- rowSums(temp_pred_p[,2:ncol(temp_pred_p)])
	temp_sums_cl <- rowSums(temp_pred_c[,2:ncol(temp_pred_p)])
	temp_n <- rowSums(!is.na(predictions_p[,2:ncol(temp_pred_p)]))

	if (fusion == "avgagg") {
		pred <- cbind(temp_sums / temp_n)
		class <- as.numeric(pred > cutoff)
	}else if (fusion == "w.avgagg") {
		temp_sums_weighted <- as.matrix(temp_pred_p[,2:ncol(temp_pred_p)]) %*% (1 - errors)
		temp_n_weighted <- as.matrix(!is.na(predictions_p[,2:ncol(temp_pred_p)])*1) %*% (1 - errors)
		pred <- temp_sums_weighted / temp_n_weighted
		class <- as.numeric(pred > cutoff)
	}else if (fusion == "majvote") {
		pred <- cbind(temp_sums_cl / temp_n)
		class <- as.numeric(pred > cutoff)
	}else if (fusion == "w.majvote") {
		temp_sums_weighted <- as.matrix(temp_pred_c[,2:ncol(temp_pred_c)]) %*% (1 - errors)
		temp_n_weighted <- as.matrix(!is.na(predictions_c[,2:ncol(temp_pred_c)])*1) %*% (1 - errors)
		pred <- temp_sums_weighted / temp_n_weighted
		class <- as.numeric(pred > cutoff)}
	class <-  cbind(gsub(1,target_classes_s[[2]],class,fixed=FALSE))
	class <-  cbind(gsub(0,target_classes_s[[1]],class,fixed=FALSE))
	if (match(depvar,names(data),nomatch=0)!=0) {
		conf <- table(class, data[,as.character(formula[[2]])], dnn=c("Predicted Class", "Observed Class"))
		} else { conf <- NULL }
	output <- list(pred=pred, class=class, conf=conf)
}

Try the GAMens package in your browser

Any scripts or data that you put into this service are public.

GAMens documentation built on May 2, 2019, 1:07 p.m.