R/PLOT_MODEL.R
In SIMPLE.REGRESSION: OLS, Moderated, Logistic, and Count Regressions Made Simple

Documented in PLOT_MODEL

PLOT_MODEL <- function(model, 
                       IV_focal_1, IV_focal_1_values=NULL, 
                       IV_focal_2=NULL, IV_focal_2_values=NULL, 
                       IVs_nonfocal_values = NULL,
                       bootstrap=FALSE, N_sims=100, CI_level=95, 
                       xlim=NULL, xlab=NULL,
                       ylim=NULL, ylab=NULL,
                       title = NULL,
                       plot_save = FALSE, plot_save_type = 'png',
                       cols_user = NULL,
                       verbose=TRUE) {
  
  
  if (is.null(cols_user))
    cols_user <- c("blue", "red", 'black', 'cyan2', "blueviolet", 'limegreen', "yellow", 'mediumvioletred')
  # cols_user <- c("mediumvioletred", 'black', "blue", 'cyan2', "red", 'limegreen', "yellow", 'blueviolet')
  # cols_user <- c("ivory", 'black', "blue", 'cyan2', "red", 'limegreen', "yellow", 'blueviolet')
  
  
  # kind of model 
  if (inherits(model,"OLS_REGRESSION"))            kind = 'OLS'
  if (inherits(model,"MODERATED_REGRESSION"))      kind = 'MODERATED'
  if (inherits(model,"LOGISTIC_REGRESSION"))       kind = 'LOGISTIC'
  if (inherits(model,"COUNT_REGRESSION"))          kind = model$kind
  
  if (inherits(model,"MODERATED_REGRESSION"))   names(model)[5] <- 'modelMAIN'
  
  # # yhatR$se.fit is not available for 'zinfl_poisson' & 'zinfl_negbin' models 
  # # so can't do regular CIs, must bootstrap
  # bootstrap_flag <- FALSE
  # if (!bootstrap & model_type == 'ZINFL')  bootstrap <- bootstrap_flag <- TRUE
  
  modeldata <- model$modeldata
  
  
  # if (kind != 'OLS' & kind != 'LOGISTIC')  modeldata <- model$modeldata
  # 
  # if (kind == 'OLS' | kind == 'LOGISTIC')  modeldata <- model$modeldata_ORIG
  
  # if (kind != 'ZINFL' & kind != 'HURDLE')  modeldata <- model$modelMAIN$data
  
  # if (kind == 'ZINFL' | kind == 'HURDLE')  modeldata <- model$modeldata
  
  DV <- names(modeldata)[1]
  
  IVnames <- names(modeldata)[-1]
  
  
  
  #   if (kind != 'ZINFL' & kind != 'HURDLE') {
  #     
  #     modeldata <- model$modelMAIN$data
  #   
  #     DV <- names(modeldata)[1]
  #     
  #     IVnames <- names(modeldata)[-1]
  #   }
  #   
  #   if (kind == 'ZINFL' | kind == 'HURDLE') {
  #     
  #     modeldata <- model$modeldata
  #     
  #     
  #     
  #     
  #     
  #   
  #   # DV name
  #   if (kind == 'ZINFL' | kind == 'HURDLE') {
  #     DV <- names(attr(model$modelMAIN$terms$full, "dataClasses"))[1]
  #   } else { DV <- colnames(modeldata)[1] }
  #   
  #   
  #   names(attr(model$modelMAIN$terms, "dataClasses"))
  #   
  #   head(model$modelMAIN$data)
  #   
  #   
  #   
  #   # IV names
  #   if (kind == 'ZINFL' | kind == 'HURDLE') {
  #     IVnames <- attr(model$modelMAIN$terms$full, "term.labels")
  #   } else { IVnames <- attr(model$modelMAIN$terms, "term.labels") }
  # 
  #     
  #   # check if there is an offset variable & add it to IVnames
  #   if (kind == 'ZINFL' | kind == 'HURDLE') {
  #     
  #   # varnoms <- colnames(modeldata)
  #   # 
  #   # offset_any <- grepl( 'offset', varnoms, fixed = TRUE)
  #   # 
  #   # if (any(offset_any))  {
  #   #   offset_pos <- which(grepl( 'offset', varnoms, fixed = TRUE) == TRUE)
  #   #   # extract the real name of the offset variable
  #   #   offset_nom <- varnoms[offset_pos]
  #   #   IVnames <- c(IVnames, offset_nom)
  #   # }
  #   
  #   
  #   varnoms <- names(attr(model$modelMAIN$terms$full, "dataClasses"))
  #   offset_any <- grepl( 'offset', varnoms, fixed = TRUE)
  #   if (any(offset_any))  {
  #     offset_pos <- which(grepl( 'offset', varnoms, fixed = TRUE) == TRUE)
  #     # extract the real name of the offset variable
  #     offset_nom <- varnoms[offset_pos]
  #     offset_nom <- gsub("offset\\(", "", offset_nom)
  #     offset_nom <- gsub("\\)", "", offset_nom)
  #     IVnames <- c(IVnames, offset_nom)
  #     
  #     # sub the same name into modeldata
  #     modeldata_noms <- names(modeldata)
  #     offset_any <- grepl( 'offset', modeldata_noms, fixed = TRUE)
  #     offset_pos <- which(grepl( 'offset', varnoms, fixed = TRUE) == TRUE)
  #     colnames(modeldata)[offset_pos] <- offset_nom
  #   }
  # }
  
  
  
  
  # remove interaction terms i.e., that contain :
  IVnames <- IVnames[!grepl(':', IVnames)]
  
  # are any of the predictors factors?
  if (kind == 'ZINFL' | kind == 'HURDLE') { 
    list_xlevels <- model$modelMAIN$levels
  } else { list_xlevels <- model$modelMAIN$xlevels }
  
  
  
  # IV_focal_1_values
  
  # test if IV_focal_1 is in the IVnames
  if (!is.element(IV_focal_1, IVnames)) 
    message('\nThe IV_focal_1 variable does not appear in the model predictors. Perhaps there is a spelling error.\n')
  
  # if IV_focal_1 is a factor
  if (IV_focal_1 %in% names(list_xlevels)) {
    
    # if IV_focal_1 is a factor & IV_focal_1_values were NOT provided, use all levels of it
    if (is.null(IV_focal_1_values))  IV_focal_1_values <- unlist(list_xlevels[IV_focal_1])
    
    # if IV_focal_1 is a factor & IV_focal_1_values were provided, test if valid levels of it were provided
    if (!is.null(IV_focal_1_values) & !all(is.element(IV_focal_1_values,  unlist(list_xlevels[IV_focal_1])))) {
      
      eles_false <- which(is.element(IV_focal_1_values, unlist(list_xlevels[IV_focal_1])) == FALSE)
      
      invalid_levels <- IV_focal_1_values[eles_false]
      
      message('\nThe following specified levels of "', IV_focal_1 , '" do not exist in the model: ', invalid_levels)
      
      IV_focal_1_values <- IV_focal_1_values [! IV_focal_1_values %in% invalid_levels]
    }
  }
  
  # if IV_focal_1 is numeric & IV_focal_1_values were not provided
  if (is.null(IV_focal_1_values)) {
    
    IV_focal_1_range <- range(modeldata[IV_focal_1])
    
    IV_focal_1_values <- seq(IV_focal_1_range[1], IV_focal_1_range[2], length.out = 100)
  }
  
  
  
  # IV_focal_2_values
  
  # notice if IV_focal_2 is not provided but IV_focal_2_values are provided
  if (is.null(IV_focal_2) & !is.null(IV_focal_2_values)) 
    message('\nIV_focal_2_values were provided but without specifying the name of IV_focal_2.')
  
  if (!is.null(IV_focal_2)) {
    
    # test if IV_focal_2 is in the IVnames
    if (!is.element(IV_focal_2, IVnames)) 
      message('\nThe IV_focal_2 variable does not appear in the model predictors. Perhaps there is a spelling error.')
    
    # if IV_focal_2 is a factor
    if (IV_focal_2 %in% names(list_xlevels)) {
      
      # if IV_focal_2 is a factor & IV_focal_2_values were NOT provided, use all levels of it
      if (is.null(IV_focal_2_values))  IV_focal_2_values <- unlist(list_xlevels[IV_focal_2])
      
      # if IV_focal_2 is a factor & IV_focal_2_values were provided, test if valid levels of it were provided
      if (!is.null(IV_focal_2_values) & !all(is.element(IV_focal_2_values,  unlist(list_xlevels[IV_focal_2])))) {
        
        eles_false <- which(is.element(IV_focal_2_values, unlist(list_xlevels[IV_focal_2])) == FALSE)
        
        invalid_levels <- IV_focal_2_values[eles_false]
        
        message('\nThe following specified levels of "', IV_focal_2 , '" do not exist in the model: ', invalid_levels)
        
        IV_focal_2_values <- IV_focal_2_values [! IV_focal_2_values %in% invalid_levels]
      }
    }
    
    # if IV_focal_2 is numeric & IV_focal_2_values were not provided -- have to use just a few values
    if (is.null(IV_focal_2_values)) {
      
      IV_focal_2_mn <- mean(unlist(modeldata[IV_focal_2]))
      
      IV_focal_2_sd <- sd(unlist(modeldata[IV_focal_2]))
      
      IV_focal_2_values <- c((IV_focal_2_mn - IV_focal_2_sd), IV_focal_2_mn, (IV_focal_2_mn + IV_focal_2_sd))
    }
  }
  
  
  
  # non focal IV values
  
  IVs_nonfocal <- setdiff(IVnames, c(IV_focal_1, IV_focal_2))
  
  # remove interaction terms i.e., that contain :
  IVs_nonfocal <- IVs_nonfocal[!grepl(':', IVs_nonfocal)]
  
  IVs_nonfocal_values_list <- c()
  
  # when there are factors, cycle through the IVs_nonfocal, use mean if continuous, use baseline if categorical
  if (length(IVs_nonfocal) > 0) {
    for (lupe in 1:length(IVs_nonfocal)) {
      
      if (IVs_nonfocal[lupe] %in% names(list_xlevels)) {
        
        IVs_nonfocal_values_list[[IVs_nonfocal[lupe]]] <- sort(list_xlevels[[IVs_nonfocal[lupe]]])[1]
        
      } else { IVs_nonfocal_values_list[[IVs_nonfocal[lupe]]] <- mean(modeldata[,IVs_nonfocal[lupe]]) }
      
    }
    
    # if IVs_nonfocal_values were provided by user, substitute them into IVs_nonfocal_values_list
    # IVs_nonfocal_values = list( EDUC = 5)
    if (!is.null(IVs_nonfocal_values)) {
      
      # first check to make sure the names of the IVs_nonfocal_values are in the data
      wrongnoms <- setdiff( names(IVs_nonfocal_values), names(IVs_nonfocal_values_list) )
      if (!identical(wrongnoms, character(0))) 
        message('\n\nOne or more names on IVs_nonfocal_values does not exist in the model data. Expect errors.\n')
      
      for (lupe in 1:length(IVs_nonfocal_values))
        IVs_nonfocal_values_list[names(IVs_nonfocal_values)[lupe]] <- IVs_nonfocal_values[lupe]
    }
  }
  
  IVs_nonfocal_values_df <- data.frame(cbind(lapply(IVs_nonfocal_values_list, 
                                                    function(y) if(is.numeric(y)) round(y, 2) else y)) )
  colnames(IVs_nonfocal_values_df) <- NULL
  
  
  
  # testdata for predict
  testdata <- IVs_nonfocal_values_list
  
  testdata[[IV_focal_1]] <- IV_focal_1_values
  
  if (!is.null(IV_focal_2)) testdata[[IV_focal_2]] <- IV_focal_2_values
  
  testdata <- do.call(expand.grid, testdata)
  
  # making sure that the order of the variables for testdata is the same as for model$modelMAIN
  testdata <- testdata[IVnames]
  
  head(testdata)
  
  # getting the predicted values & CIs
  testdata <- cbind(testdata, predict_boc(modelMAIN=model$modelMAIN, modeldata=modeldata,
                                          newdata=testdata, CI_level=CI_level, bootstrap=bootstrap, 
                                          kind=kind, family=model$family))
  
  head(testdata)
  
  
  # are CIs available?
  if (anyNA(testdata)) { CIs <- FALSE } else {CIs <- TRUE}
  
  
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar))
  
  if (plot_save == TRUE) {
    
    plot_title = title
    
    if (kind != 'ZINFL' & kind != 'HURDLE') {height=7; width=9}
    if (kind  == 'ZINFL' | kind == 'HURDLE')  {height=9; width=7}
    
    if (is.null(plot_save_type))  plot_save_type = 'png'
    
    if (plot_save_type == 'bitmap')
      bitmap(paste("Figure - ",plot_title,".bitmap",sep=""), height=height, width=width, units='in', res=1200, pointsize=12)
    
    if (plot_save_type == 'tiff')
      tiff(paste("Figure - ",plot_title,".tiff",sep=""), height=height, width=width, units='in', res=1200, pointsize=12)
    
    if (plot_save_type == 'png')
      png(paste("Figure - ",plot_title,".png",sep=""), height=height, width=width, units='in', res=1200, pointsize=12)
    
    if (plot_save_type == 'jpeg')
      jpeg(paste("Figure - ",plot_title,".jpeg",sep=""), height=height, width=width, units='in', res=1200, pointsize=12)
    
    if (plot_save_type == 'bmp')
      bmp(paste("Figure - ",plot_title,".bmp",sep=""), height=height, width=width, units='in', res=1200, pointsize=12)
    
    # if (kind != 'ZINFL' & kind != 'HURDLE')
    #   par(mfrow=c(1,1), pty="m", mar=c(3,2,3,2) + 2.6)
  }
  
  
  
  if (kind == 'OLS' | kind == 'MODERATED' | kind == 'LOGISTIC' | 
      kind == 'POISSON' | kind == 'NEGBIN') {
    
    par(mfrow=c(1,1), pty="m", mar=c(3,2,3,2) + 2.6)
    
    # renaming yhat
    found <- match(colnames(testdata), 'yhat', nomatch = 0)
    DV_predicted <- paste(DV, '_predicted', sep='')
    colnames(testdata)[colnames(testdata) %in% 'yhat'] <- DV_predicted
    
    head(testdata)
    
    if (is.null(xlim) & is.numeric((testdata[,IV_focal_1])))  
      xlim <- c(min(testdata[,IV_focal_1]), max(testdata[,IV_focal_1]))
    
    if (is.null(xlab))  xlab <- IV_focal_1
    
    if (is.null(ylim)) {
      
      if (kind == 'OLS' | kind == 'MODERATED')  
        # ylim <- c(min(testdata$ci_lb), max(testdata$ci_ub))
        ylim <- range( pretty(testdata$ci_lb), pretty(testdata$ci_ub))
      
      if (kind == 'LOGISTIC')  ylim <- c(0, 1)
      
      if (kind == 'POISSON' | kind == 'NEGBIN')  {
        
        if (CIs) { ylim <- range(pretty(c(0, max(testdata$ci_ub))))   # c(0, max(testdata$ci_ub))
        } else {   ylim <- range(pretty(c(0, max(modeldata[DV]))))   # c(0, max(modeldata[DV]))} 
        }
      }
      # if (kind != 'LOGISTIC')  ylim[2] <- ylim[2] + (ylim[2] * .05)
    }
    
    if (is.null(ylab)) {
      
      if (kind == 'OLS' | kind == 'MODERATED')  ylab <- DV
      
      if (kind == 'LOGISTIC')  ylab <- paste("Probability of ", DV)
      
      if (kind == 'POISSON' | kind == 'NEGBIN')     ylab <- DV
    }
    
    if (is.null(title)) {
      
      if (kind == 'OLS' | kind == 'MODERATED') title <- paste('OLS regression prediction of', DV)
      if (kind == 'LOGISTIC')  title <- paste('Logistic regression prediction of', DV)
      if (kind == 'POISSON' | kind == 'NEGBIN') title <- paste('Count regression prediction of', DV)
    }  
    
    plotfun(testdata=testdata, list_xlevels=list_xlevels, DV_predicted=DV_predicted, 
            CIs=CIs, kind=kind, cols_user=cols_user,
            xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, title=title, 
            IV_focal_1=IV_focal_1, IV_focal_1_values=IV_focal_1_values, 
            IV_focal_2=IV_focal_2, IV_focal_2_values=IV_focal_2_values)
  }
  
  
  if (kind == 'ZINFL' | kind == 'HURDLE') {
    
    # par(mfrow = c(2, 1),  mar = c(5,6,4,12))  #, xpd= NA )  # oma = c(0,4,0,4) )#, mar = c(2,2,1,1)).  , xpd=TRUE
    # par(mfrow=c(1,1), pty="m", mar=c(3,2,3,2) + 2.6)
    
    par(mfrow = c(2, 1),  oma = c(0,4,0,6) )  #, mar = c(2,2,1,1))
    
    testdata_ORIG <- testdata
    
    
    if (is.null(xlim) & !is.factor(testdata[,IV_focal_1]))  
      xlim <- c(min(testdata[,IV_focal_1]), max(testdata[,IV_focal_1]))
    
    if (is.factor(testdata[,IV_focal_1]))  xlim <- NULL
    
    if (is.null(xlab))  xlab <- IV_focal_1
    
    
    # zero data
    # remove the count data
    testdata <- testdata_ORIG[,!grepl("count", colnames(testdata_ORIG))]
    # remove "_zero" from column names
    names(testdata) <- gsub(pattern = "_zero", replacement = "", x = names(testdata))
    
    # reversing yhat for hurdle models, to make DV = the probability of crossing the hurdle
    if (kind == 'HURDLE') testdata$yhat <- 1 - testdata$yhat
    
    # renaming yhat
    found <- match(colnames(testdata), 'yhat', nomatch = 0)
    DV_predicted <- paste(DV, '_predicted', sep='')
    colnames(testdata)[colnames(testdata) %in% 'yhat'] <- DV_predicted
    
    ylim <- c(0, 1)
    
    ylab <- 'Probability'
    
    if (kind == 'ZINFL')  title <- paste(DV, ':\nProbability of excess zeroes', sep='')
    # if (kind == 'HURDLE') title <- paste(DV, ':\nProbability of zero', sep='')
    if (kind == 'HURDLE') title <- paste(DV, ':\nProb. of hurdle cross', sep='')
    
    
    plotfun(testdata=testdata, list_xlevels=list_xlevels, DV_predicted=DV_predicted, 
            CIs=CIs, kind=kind, cols_user=cols_user,
            xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, title=title, 
            IV_focal_1=IV_focal_1, IV_focal_1_values=IV_focal_1_values, 
            IV_focal_2=IV_focal_2, IV_focal_2_values=IV_focal_2_values)
    
    
    # count data
    # remove the zero data
    testdata <- testdata_ORIG[,!grepl("zero", colnames(testdata_ORIG))]
    # remove "_count" from column names
    names(testdata) = gsub(pattern = "_count", replacement = "", x = names(testdata))
    
    # renaming yhat
    found <- match(colnames(testdata), 'yhat', nomatch = 0)
    DV_predicted <- paste(DV, '_predicted', sep='')
    colnames(testdata)[colnames(testdata) %in% 'yhat'] <- DV_predicted
    
    if (CIs) { ylim <- c(0, max(testdata$ci_ub))
    } else {   ylim <- c(0, max(modeldata[DV]))} 
    
    ylab <- DV
    
    title <- paste(DV, ':\nExpected counts', sep='')
    
    plotfun(testdata=testdata, list_xlevels=list_xlevels, DV_predicted=DV_predicted, 
            CIs=CIs, kind=kind, cols_user=cols_user,
            xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, title=title, 
            IV_focal_1=IV_focal_1, IV_focal_1_values=IV_focal_1_values, 
            IV_focal_2=IV_focal_2, IV_focal_2_values=IV_focal_2_values)
    
    testdata <- testdata_ORIG
  }
  
  
  if (plot_save == TRUE)  dev.off()
  
  
  if (verbose) {
    
    message('\nThe DV is "', DV, '"')
    
    # if IV_focal_1 is a factor vs numeric
    if (IV_focal_1 %in% names(list_xlevels)) { IV_focal_1_type <- 'a factor'
    } else { IV_focal_1_type <- 'numeric' }
    
    message('\nThe focal, varying IV is "', IV_focal_1, '", and it is ', IV_focal_1_type)
    
    if (!is.null(IV_focal_2)) {
      # if IV_focal_2 is a factor vs numeric
      if (IV_focal_2 %in% names(list_xlevels)) { IV_focal_2_type <- 'a factor'
      } else { IV_focal_2_type <- 'numeric' }
      
      message('\nThe second varying IV (moderator) is "', IV_focal_2, '," and it is ', IV_focal_2_type)
      message('\nThe levels of "', IV_focal_2, '" are:')
      if (is.numeric(IV_focal_2_values))  print(round(IV_focal_2_values,3))
      if (!is.numeric(IV_focal_2_values)) {
        rownames(IV_focal_2_values) <- NULL
        # print(unname(IV_focal_2_values), row.names = F)
        print(  unname(as.data.frame(IV_focal_2_values)), quote = FALSE, row.names = FALSE)
      }
    }
    
    # if (!is.null(IVs_nonfocal)) {
    if (length(IVs_nonfocal) > 0) {
      message('\nThe constant values for the other predictors are:')
      print(IVs_nonfocal_values_df, print.gap=4)
    }
    
    message('\nThe confidence interval percentage is: ', CI_level)
    
    message('\nBootstrapped confidence intervals: ', bootstrap)
    
    if (bootstrap) {
      # if (bootstrap_flag) {
      #   message('\nConventional CIs cannot be computed for zero-inflated models at this time.')
      #   message('Bootrapped CIs were computed instead.\n')
      # }
      message('\nThe number of bootstrap simulations: ', N_sims)
    }
    
    message('\nThe top rows of the output data matrix:\n')
    print(head(round_boc(testdata, 3), n=6), print.gap=4)
    
    message('\nThe bottom rows of the output data matrix:\n')
    print(tail(round_boc(testdata, 3), n=6), print.gap=4)
  }
  
  return(invisible(testdata)) 
}