#' @title Predict from machine learning model
#'
#' @description Predict using machine learning model.
#'
#' @param obj An object of class \code{fdaModel} generated by \code{\link{fdaML_train}}.
#'
#' @param new_x A new functional predictor matrix.
#'
#' @param new_z A new non-functional predictor matrix.
#'
#' @param new_y A new response vector. If this is provided, the quality of predictions
#' is assessed.
#'
#' @param verbose Whether to print a progress bar (\code{TRUE}) or not (\code{FALSE}).
#'
#' @details
#'
#' @return An object of class \code{fdaModelPred}, which is a list containing the
#' predictions given by model in \code{obj}.
#'
#' @seealso
#' \code{\link{fdaML_train}}
#'
#' @examples
#'
#' @export
#'
fdaML_predict <- function(obj, new_x, new_z = NULL, new_y, verbose = TRUE){
if( class(obj) != "fdaModel" ){
stop("Input 'obj' should be of class 'fdaModel'.")
}
uniq <- sort(unique(new_y)); lu <- length(uniq)
# transform new data
new_y_out <- new_y
cenY <- mean(obj$y)
new_y <- scale(new_y, cen=cenY, scale=F)
new_XB <- new_x %*% t(t(obj$B) / colSums(obj$B)) # scaling of 'new_x' is done below as it is randomisation-dependent
# initialisations
newN <- length(new_y)
new_XBV <- ROC_to_plot_binom <- list()
new_y_pred <- resid_test <- matrix(NA, newN, obj$reps)
err_test <- rep(NA, obj$reps)
bias_test <- matrix(0, nrow=obj$reps, ncol=length(uniq)); colnames(bias_test) <- paste0("age",uniq)
alpha <- rep(NA, obj$reps)
beta <- matrix(NA, ncol(new_XB), obj$reps)
gamma <- if(is.null(new_z)){ NULL }else{ matrix(NA, obj$S, obj$reps) }
linear_predictor <- matrix(NA, newN, obj$reps)
AUC_opt <- matrix(NA, obj$reps, 3); colnames(AUC_opt) <- c("train","valid","test")
avgTestErr_avgTrainCut <- confu <- dc <- NA
classProbs <- classPreds <- list()
avgErr <- rep(NA, obj$reps)
err_stdev_out <- NULL
avg_err_avgcut <- NULL
# coefficients' positions
coef_pos <- matrix(NA,3,2); rownames(coef_pos) <- c('alpha', 'gamma', 'beta'); colnames(coef_pos) <- c('from', 'to')
if(obj$intercept==T & obj$S != 0){
coef_pos['alpha',] <- c(1,1)
coef_pos['gamma',] <- c(2,obj$S+1)
coef_pos['beta', ] <- c(obj$S+2,1+obj$S+obj$Q_opt)
}else if(obj$intercept==T & obj$S == 0){
coef_pos['alpha',] <- c(1,1)
coef_pos['gamma',] <- c(NA,NA)
coef_pos['beta', ] <- c(2,1+obj$Q_opt)
}else if(obj$intercept==F & obj$S != 0){
coef_pos['alpha',] <- c(NA,NA)
coef_pos['gamma',] <- c(1,obj$S)
coef_pos['beta', ] <- c(obj$S+1,obj$S+obj$Q_opt)
}else if(obj$intercept==F & obj$S == 0){
coef_pos['alpha',] <- c(NA,NA)
coef_pos['gamma',] <- c(NA,NA)
coef_pos['beta', ] <- c(1,obj$Q_opt)
}
if(verbose){
cat("","PREDICTING:", sep="\n")
pb = txtProgressBar(min = 0, max = obj$reps, initial = 0, style = 3)
}
for(rr in 1:obj$reps){
new_XBV[[rr]] <- scale(new_XB, cen=obj$cenX[[rr]], scale=F) %*% obj$D_opt[[rr]]
if(!is.null(new_z)){
new_XBV[[rr]] <- cbind(new_z, new_XBV[[rr]])
}
if(obj$task == "regr"){
new_y_pred[,rr] <- fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]], optionsPred = list(intercept = obj$intercept, lam_cv_type=obj$lam_cv_type, task=obj$task, model=obj$model, fam=obj$family))
resid_test[,rr] <- new_y - new_y_pred[,rr]
err_test[rr] <- GET_rmsd(new_y, new_y_pred[,rr])
for(uu in 1:length(uniq)){# bias
id <- (new_y+cenY) == uniq[uu]
bias_test[rr,uu] <- mean(new_y_pred[id,rr]+cenY - uniq[uu])
}
err_out <- round(mean(err_test), 2)
}else if(obj$task == "clas"){
if(obj$family == "binomial"){
new_y_pred[,rr] <- fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]], optionsPred = list(intercept=obj$intercept, lam_cv_type=obj$lam_cv_type, task=obj$task, model=obj$model, fam=obj$family))
AUC_opt[rr,] <- c(NA, NA, GET_auc(y_pred = new_y_pred[,rr], y_true = new_y, f = obj$family))
ROC_to_plot_binom$pred_test[[rr]] <- new_y_pred[,rr]
ROC_to_plot_binom$labels[[rr]] <- new_y
}else if(obj$family == "multinomial"){
# get class probabilities by repetition (see also 'classProbs_byTrueLabel' below)
classProbs[[rr]] <- drop(fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]], optionsPred = list(lam_cv_type=obj$lam_cv_type, intercept=obj$intercept, fam=obj$family, predType="probs")))
# get class predictions
classPreds[[rr]] <- drop(fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]], optionsPred = list(lam_cv_type=obj$lam_cv_type, intercept=obj$intercept, fam=obj$family, predType="class")))
new_y_pred[,rr] <- as.numeric(classPreds[[rr]]) - 1
if(all(new_y_pred[,rr] != 0)){ stop("# MAKE SURE LABELS START AT 0!") }
# get accuracy rate
avgErr[rr] <- mean(new_y_out != new_y_pred[,rr])
}#family
}#task
if(verbose)
setTxtProgressBar(pb,rr)
}#rr
if(verbose){
cat("","", sep="\n")
}
# get class probabilities by true label for mGLM
classProbs_byTrueLabel <- list()
if(obj$family == "multinomial"){
for(q in 1:lu){
classProbs_byTrueLabel[[paste0("lvl",uniq[q])]] <- 0
idx <- new_y_out == uniq[q]
for(r in 1:obj$reps){
classProbs_byTrueLabel[[paste0("lvl",uniq[q])]] <- rbind(classProbs_byTrueLabel[[paste0("lvl",uniq[q])]], classProbs[[r]][idx,])
}
classProbs_byTrueLabel[[paste0("lvl",uniq[q])]] <- classProbs_byTrueLabel[[paste0("lvl",uniq[q])]][-1,]
}
}
# compute error for GLMs
if(obj$model == "glm" & obj$family == "binomial"){
# get average cutoff from training data
pred <- prediction(obj$ROC_to_plot_binom$pred_test, obj$ROC_to_plot_binom$labels)
perf <- performance(pred, "tpr", "fpr")
err_perf <- performance(pred, measure = "err")
err_ind = sapply(1:obj$reps, function(z){ which.min(err_perf@y.values[[z]]) })
if(any(err_ind == 1)){ # get rid of infinities in 'err_perf@x.values)'
tt <- (1:obj$reps)[err_ind == 1]; for(l in 1:length(tt)){ if( is.infinite(slot(err_perf, "x.values")[[tt[l]]][1]) ){ err_ind[tt[l]] <- 2 }}
}
err_val = sapply(1:obj$reps, function(z){ err_perf@y.values[[z]][err_ind[z]] })
err_cut = sapply(1:obj$reps, function(z){ err_perf@x.values[[z]][err_ind[z]] })
avg_err <- mean(err_val)
avg_err_cut <- mean(err_cut)
avg_err_avgcut <- mean(as.numeric(c(obj$y_testpred_opt) > avg_err_cut) != obj$y[unlist(obj$id_test)])
# get error rate for testing data using average cutoff from training data
error_by_rep <- sapply(1:obj$reps, function(zz){ mean((new_y_pred[,zz] > avg_err_cut) != (new_y + attr(new_y, 'scaled:center'))) })
avgTestErr_avgTrainCut <- mean(error_by_rep)
stdevTestErr_avgTrainCut <- sd(error_by_rep)
#avgTestErr_avgTrainCut <- mean(as.numeric(c(new_y_pred) > avg_err_cut) != rep(new_y, obj$reps) + attr(new_y, 'scaled:center'))
# confusion matrix
confu <- confusionMatrix(data=factor(as.numeric(c(new_y_pred) > avg_err_cut)), reference = factor(rep(new_y, obj$reps) + attr(new_y, 'scaled:center')))$table
dc <- list(tnr = confu[1,1] / sum(confu[,1]),
tpr = confu[2,2] / sum(confu[,2]),
fnr = confu[1,2] / sum(confu[,2]),
fpr = confu[2,1] / sum(confu[,1]))
err_out <- avgTestErr_avgTrainCut
err_stdev_out <- stdevTestErr_avgTrainCut
}else if(obj$model == "glm" & obj$family == "multinomial"){
err_out <- mean(avgErr)
if(length(uniq) > 1){
dc <- confusionMatrix(data=c(new_y_pred), reference=rep(new_y_out, obj$reps))$table
#dc <- confu / matrix(rep(colSums(confu), lu), lu, lu, byrow=T)
}
}
ret <- structure( list(new_x = new_x,
new_y = new_y_out,
new_z = new_z,
new_XBV = new_XBV,
new_y_pred = new_y_pred,
id_test = obj$id_test,
task = obj$task,
model = obj$model,
family = obj$family,
reps = obj$reps,
uniq = uniq,
lu = lu,
cenY = cenY,
avg_err_avgcut = avg_err_avgcut,
err_test = err_test,
bias_test = bias_test,
ROC_to_plot_binom = ROC_to_plot_binom,
AUC_opt = AUC_opt,
avgTestErr = err_out,
stdevTestErr = err_stdev_out,
confu = confu,
errorBreakdown = dc,
classProbs_byRep = classProbs,
classProbs_byTrueLabel = classProbs_byTrueLabel,
classPreds = classPreds
), class="fdaModelPred")
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.