R/fdaML_predict.R

Defines functions fdaML_predict

Documented in fdaML_predict

#' @title Predict from machine learning model
#'
#' @description Predict using machine learning model.
#'
#' @param obj An object of class \code{fdaModel} generated by \code{\link{fdaML_train}}.
#'
#' @param new_x A new functional predictor matrix.
#'
#' @param new_z A new non-functional predictor matrix.
#'
#' @param new_y A new response vector. If this is provided, the quality of predictions
#' is assessed.
#'
#' @param verbose Whether to print a progress bar (\code{TRUE}) or not (\code{FALSE}).
#'
#' @details
#'
#' @return An object of class \code{fdaModelPred}, which is a list containing the
#' predictions given by model in \code{obj}.
#'
#' @seealso
#' \code{\link{fdaML_train}}
#'
#' @examples
#'
#' @export
#'
fdaML_predict <- function(obj, new_x, new_z = NULL, new_y, verbose = TRUE){

  if( class(obj) != "fdaModel" ){
    stop("Input 'obj' should be of class 'fdaModel'.")
  }

  uniq <- sort(unique(new_y));  lu <- length(uniq)

  # transform new data
  new_y_out <- new_y
  cenY <- mean(obj$y)
  new_y <- scale(new_y, cen=cenY, scale=F)
  new_XB <- new_x %*% t(t(obj$B) / colSums(obj$B))  # scaling of 'new_x' is done below as it is randomisation-dependent

  # initialisations
  newN <- length(new_y)
  new_XBV <- ROC_to_plot_binom <- list()
  new_y_pred <- resid_test <- matrix(NA, newN, obj$reps)
  err_test <- rep(NA, obj$reps)
  bias_test <- matrix(0, nrow=obj$reps, ncol=length(uniq)); colnames(bias_test) <- paste0("age",uniq)
  alpha <- rep(NA, obj$reps)
  beta <- matrix(NA, ncol(new_XB), obj$reps)
  gamma <- if(is.null(new_z)){ NULL }else{ matrix(NA, obj$S, obj$reps) }
  linear_predictor <- matrix(NA, newN, obj$reps)
  AUC_opt <- matrix(NA, obj$reps, 3);   colnames(AUC_opt) <- c("train","valid","test")
  avgTestErr_avgTrainCut <- confu <- dc <- NA
  classProbs <- classPreds <- list()
  avgErr <- rep(NA, obj$reps)
  err_stdev_out <- NULL
  avg_err_avgcut <- NULL

  # coefficients' positions
  coef_pos <- matrix(NA,3,2); rownames(coef_pos) <- c('alpha', 'gamma', 'beta'); colnames(coef_pos) <- c('from', 'to')
  if(obj$intercept==T & obj$S != 0){
    coef_pos['alpha',] <- c(1,1)
    coef_pos['gamma',] <- c(2,obj$S+1)
    coef_pos['beta', ] <- c(obj$S+2,1+obj$S+obj$Q_opt)
  }else if(obj$intercept==T & obj$S == 0){
    coef_pos['alpha',] <- c(1,1)
    coef_pos['gamma',] <- c(NA,NA)
    coef_pos['beta', ] <- c(2,1+obj$Q_opt)
  }else if(obj$intercept==F & obj$S != 0){
    coef_pos['alpha',] <- c(NA,NA)
    coef_pos['gamma',] <- c(1,obj$S)
    coef_pos['beta', ] <- c(obj$S+1,obj$S+obj$Q_opt)
  }else if(obj$intercept==F & obj$S == 0){
    coef_pos['alpha',] <- c(NA,NA)
    coef_pos['gamma',] <- c(NA,NA)
    coef_pos['beta', ] <- c(1,obj$Q_opt)
  }

  if(verbose){
    cat("","PREDICTING:", sep="\n")
    pb = txtProgressBar(min = 0, max = obj$reps, initial = 0, style = 3)
  }

  for(rr in 1:obj$reps){

    new_XBV[[rr]] <- scale(new_XB,  cen=obj$cenX[[rr]], scale=F)  %*% obj$D_opt[[rr]]
    if(!is.null(new_z)){
      new_XBV[[rr]] <- cbind(new_z, new_XBV[[rr]])
    }

    if(obj$task == "regr"){

      new_y_pred[,rr]  <- fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]],  optionsPred = list(intercept = obj$intercept, lam_cv_type=obj$lam_cv_type, task=obj$task, model=obj$model, fam=obj$family))
      resid_test[,rr] <- new_y - new_y_pred[,rr]
      err_test[rr] <- GET_rmsd(new_y, new_y_pred[,rr])
      for(uu in 1:length(uniq)){# bias
        id <- (new_y+cenY) == uniq[uu]
        bias_test[rr,uu] <- mean(new_y_pred[id,rr]+cenY - uniq[uu])
      }
      err_out <- round(mean(err_test), 2)

    }else if(obj$task == "clas"){

      if(obj$family == "binomial"){

        new_y_pred[,rr]  <- fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]],  optionsPred = list(intercept=obj$intercept, lam_cv_type=obj$lam_cv_type, task=obj$task, model=obj$model, fam=obj$family))
        AUC_opt[rr,] <- c(NA, NA, GET_auc(y_pred = new_y_pred[,rr],  y_true = new_y,  f = obj$family))
        ROC_to_plot_binom$pred_test[[rr]] <- new_y_pred[,rr]
        ROC_to_plot_binom$labels[[rr]]    <- new_y

      }else if(obj$family == "multinomial"){

        # get class probabilities by repetition (see also 'classProbs_byTrueLabel' below)
        classProbs[[rr]] <- drop(fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]], optionsPred = list(lam_cv_type=obj$lam_cv_type, intercept=obj$intercept, fam=obj$family, predType="probs")))
        # get class predictions
        classPreds[[rr]] <- drop(fdaPrediction(m = obj$m_opt[[rr]], newX = new_XBV[[rr]], optionsPred = list(lam_cv_type=obj$lam_cv_type, intercept=obj$intercept, fam=obj$family, predType="class")))
        new_y_pred[,rr] <- as.numeric(classPreds[[rr]]) - 1
        if(all(new_y_pred[,rr] != 0)){ stop("# MAKE SURE LABELS START AT 0!") }
        # get accuracy rate
        avgErr[rr] <- mean(new_y_out != new_y_pred[,rr])

      }#family
    }#task
    if(verbose)
      setTxtProgressBar(pb,rr)
  }#rr

  if(verbose){
    cat("","", sep="\n")
  }


  # get class probabilities by true label for mGLM
  classProbs_byTrueLabel <- list()
  if(obj$family == "multinomial"){
    for(q in 1:lu){
      classProbs_byTrueLabel[[paste0("lvl",uniq[q])]] <- 0
      idx <- new_y_out == uniq[q]
      for(r in 1:obj$reps){
        classProbs_byTrueLabel[[paste0("lvl",uniq[q])]] <- rbind(classProbs_byTrueLabel[[paste0("lvl",uniq[q])]], classProbs[[r]][idx,])
      }
      classProbs_byTrueLabel[[paste0("lvl",uniq[q])]] <- classProbs_byTrueLabel[[paste0("lvl",uniq[q])]][-1,]
    }
  }

  # compute error for GLMs
  if(obj$model == "glm" & obj$family == "binomial"){

    # get average cutoff from training data
    pred <- prediction(obj$ROC_to_plot_binom$pred_test, obj$ROC_to_plot_binom$labels)
    perf <- performance(pred, "tpr", "fpr")
    err_perf <- performance(pred, measure = "err")
    err_ind = sapply(1:obj$reps, function(z){ which.min(err_perf@y.values[[z]]) })
    if(any(err_ind == 1)){   # get rid of infinities in 'err_perf@x.values)'
      tt <- (1:obj$reps)[err_ind == 1]; for(l in 1:length(tt)){ if( is.infinite(slot(err_perf, "x.values")[[tt[l]]][1]) ){ err_ind[tt[l]] <- 2 }}
    }
    err_val = sapply(1:obj$reps, function(z){ err_perf@y.values[[z]][err_ind[z]] })
    err_cut = sapply(1:obj$reps, function(z){ err_perf@x.values[[z]][err_ind[z]] })
    avg_err <- mean(err_val)
    avg_err_cut <- mean(err_cut)
    avg_err_avgcut <- mean(as.numeric(c(obj$y_testpred_opt) > avg_err_cut) != obj$y[unlist(obj$id_test)])

    # get error rate for testing data using average cutoff from training data
    error_by_rep <- sapply(1:obj$reps, function(zz){ mean((new_y_pred[,zz] > avg_err_cut) != (new_y + attr(new_y, 'scaled:center'))) })
    avgTestErr_avgTrainCut <- mean(error_by_rep)
    stdevTestErr_avgTrainCut <- sd(error_by_rep)
    #avgTestErr_avgTrainCut <- mean(as.numeric(c(new_y_pred) > avg_err_cut) != rep(new_y, obj$reps) + attr(new_y, 'scaled:center'))

    # confusion matrix
    confu <- confusionMatrix(data=factor(as.numeric(c(new_y_pred) > avg_err_cut)), reference = factor(rep(new_y, obj$reps) + attr(new_y, 'scaled:center')))$table
    dc <- list(tnr = confu[1,1] / sum(confu[,1]),
               tpr = confu[2,2] / sum(confu[,2]),
               fnr = confu[1,2] / sum(confu[,2]),
               fpr = confu[2,1] / sum(confu[,1]))

    err_out <- avgTestErr_avgTrainCut
    err_stdev_out <- stdevTestErr_avgTrainCut

  }else if(obj$model == "glm" & obj$family == "multinomial"){

    err_out <- mean(avgErr)
    if(length(uniq) > 1){
      dc <- confusionMatrix(data=c(new_y_pred), reference=rep(new_y_out, obj$reps))$table
      #dc <- confu / matrix(rep(colSums(confu), lu), lu, lu, byrow=T)
    }

  }

  ret <- structure( list(new_x = new_x,
                         new_y = new_y_out,
                         new_z = new_z,
                         new_XBV = new_XBV,
                         new_y_pred = new_y_pred,
                         id_test = obj$id_test,
                         task = obj$task,
                         model = obj$model,
                         family = obj$family,
                         reps = obj$reps,
                         uniq = uniq,
                         lu = lu,
                         cenY = cenY,
                         avg_err_avgcut = avg_err_avgcut,
                         err_test = err_test,
                         bias_test = bias_test,
                         ROC_to_plot_binom = ROC_to_plot_binom,
                         AUC_opt = AUC_opt,
                         avgTestErr = err_out,
                         stdevTestErr = err_stdev_out,
                         confu = confu,
                         errorBreakdown = dc,
                         classProbs_byRep = classProbs,
                         classProbs_byTrueLabel = classProbs_byTrueLabel,
                         classPreds = classPreds
  ), class="fdaModelPred")
  return(ret)
}
pmesperanca/mlevcm documentation built on March 17, 2021, 10:03 p.m.