R/stacking_regressor.R

Defines functions stacking_regressor stacking_regression_wrapper plot_stacking_regression_results

Documented in stacking_regression_wrapper stacking_regressor

#' Function to create the stacking model
#'
#' This function fits a stacking regression model from the prediction of the previous one
#' @param pred_tr is a list containing the vector of predictions for the training set
#' @param pred_te is a list containing the vector of predictions for the test set
#' @param y_train is the vector containing the response values for the training
#' @param y_test is the vector containing the response values for the test
#' @return an list containing the forllowing objects,
#' \item{fit}{the fitted stacked model}
#' \item{yhat_test}{vector of prediction on the test set}
#' \item{yhat_train}{vector of prediction on the training set}
#' \item{Metrics}{Named vector with internal and external validation metrics}
#' @keywords stacked regressor
#' @export
#' @examples
#'
stacking_regressor = function(pred_tr = abs.alpha.power.res$pred_tr,
                              pred_te = abs.alpha.power.res$pred_te,
                              y_test = abs.alpha.power.res$y_test,
                              y_train = abs.alpha.power.res$y_train){
  stacked_train = c()
  stacked_test = c()

  for(i in 1:length(pred_tr)){
    stacked_train = cbind(stacked_train, pred_tr[[i]])
  }

  for(i in 1:length(pred_te)){
    stacked_test = cbind(stacked_test, pred_te[[i]])
  }

  stacked_train = as.data.frame(stacked_train)
  colnames(stacked_train) = paste("Mod",1:ncol(stacked_train),sep="")
  # stacked_train = cbind(stacked_train,y_train)

  stacked_test = as.data.frame(stacked_test)
  colnames(stacked_test) = paste("Mod",1:ncol(stacked_train),sep="")

  fit = lm(formula = y_train~., data = stacked_train)

  yhat_test = predict(fit, as.data.frame(stacked_test))
  yhat_train = predict(fit, as.data.frame(stacked_train))

  r2_test = R2_func(observed = y_test, predicted = yhat_test)
  r2_train = R2_func(observed = y_train, predicted = yhat_train)

  # plot(y_train, yhat_train)
  # points(y_test, yhat_test, col = "red")
  # abline(lm(yhat_train~y_train))

  mse_te = sum( { y_test  -  yhat_test }^2 )/length(yhat_test)
  mse_tr = sum( { y_train  -  yhat_train }^2 )/length(yhat_train)

  c(Q2,CCC,Q2F1,Q2F2,Q2F3,xxx) %<-% validation_metrics_linear(y_train,stacked_train,train.prop=0.9,  nPerm = 100)
  wp = williams_plot_linear(X_train = as.matrix(stacked_train), X_test = as.matrix(stacked_test), y_train, y_test, fit)
  c(rm2,dr2m)  %<-% ojha_validation_metrics_linear_model(X_train = as.matrix(stacked_train), X_test = as.matrix(stacked_test), y_train, y_test)
  c(trm1, k, trm2, k1, trm3)  %<-%  tropsha_metrics(predicted = yhat_test,observed = y_test)

  Metrics = c(nMD=0, nGE=0, MSE_tr = mse_tr, MSE_te = mse_te, R2_tr = r2_train, R2_te = r2_test, Q2 = Q2, CCC=CCC,
              Q2F1=Q2F1,Q2F2=Q2F2,Q2F3=Q2F3,rm2=rm2,dr2m=dr2m, trm1=trm1, k=k, trm2=trm2,
              k1=k1, trm3=trm3,R2Yscr=NA,Q2Yscr=NA, AD_train = wp$ADVal[1], AD_test = wp$ADVal[2])

  return(list(fit = fit, y_test=y_test,y_train=y_train,yhat_test=yhat_test,yhat_train=yhat_train,Metrics = Metrics,wp=wp))
}

#' Function to create a minimal stacking model
#'
#' This function fits a stacking regression model from the prediction of the previous one
#' @param pred_tr is a list containing the vector of predictions for the training set
#' @param pred_te is a list containing the vector of predictions for the test set
#' @param y_train is the vector containing the response values for the training
#' @param y_test is the vector containing the response values for the test
#' @return an list containing the forllowing objects,
#' \item{fit}{the fitted stacked model}
#' \item{yhat_test}{vector of prediction on the test set}
#' \item{yhat_train}{vector of prediction on the training set}
#' \item{Metrics}{Named vector with internal and external validation metrics}
#' @keywords stacked regressor
#' @export
#' @examples
#'
stacking_regression_wrapper = function(pred_tr = abs.alpha.power.res$pred_tr,
                                        pred_te = abs.alpha.power.res$pred_te,
                                        y_test = abs.alpha.power.res$y_test,
                                        y_train = abs.alpha.power.res$y_train){

  names(pred_tr) = paste("Mod",1:length(pred_tr),sep="")
  names(pred_te) = paste("Mod",1:length(pred_te),sep="")

  stack_res = stacking_regressor(pred_tr = pred_tr, pred_te = pred_te, y_test = y_test,y_train =y_train)

  SF = summary(stack_res$fit)
  ID= which(SF$coefficients[,4]<0.05)
  if(sum(names(ID) %in% names(pred_tr)) <= 1){
    return(list(stack_res=stack_res,stack_res_2=stack_res))
  }

  stack_res_2 = stacking_regressor(pred_tr = pred_tr[names(ID)], pred_te = pred_te[names(ID)],y_test = y_test,y_train =y_train)
  return(list(stack_res=stack_res,stack_res_2=stack_res_2))
}

plot_stacking_regression_results = function(stack_res,type.transformation, stack_lev,xlab,ylab, start_path){

  write.table(stack_res$Metrics,file = paste(start_path,type.transformation,"_stacking_level_",stack_lev,"_unfiltered_metrics.csv",sep=""), quote = FALSE, sep="\t")

  pdf(file = paste(start_path,type.transformation,"_stacking_level_",stack_lev,"_WP.pdf",sep=""))
  plot_wp(stack_res$wp$DTP)
  dev.off()

  pdf(file = paste(start_path,type.transformation,"_stacking_level_",stack_lev,"_experimental_predicted.pdf",sep=""))
  plot_pred_real(pred_train=stack_res$yhat_train,
                 pred_test=stack_res$yhat_test,
                 train_class = stack_res$y_train, test_class=stack_res$y_test, xlab, ylab)
  dev.off()

  PredVal = cbind(rbind(cbind(stack_res$yhat_train, stack_res$y_train, abs(stack_res$yhat_train - stack_res$y_train)),
                        cbind(stack_res$yhat_test, stack_res$y_test, abs(stack_res$yhat_test - stack_res$y_test))),
                  c(rep("Train", length(stack_res$y_train)),rep("Test", length(stack_res$y_test))))

  colnames(PredVal) = c("Predicted_HSB","Experimental_HSB","Resuidusl","Train/Test")


  write.table(PredVal,file = paste(start_path,type.transformation,"_stacking_level_",stack_lev,"_exp_vs_pred,csv",sep=""), quote = FALSE, sep="\t")


}
angy89/hyQSAR documentation built on Sept. 24, 2019, 7:31 a.m.