R/write_batch_file.R

#' Create a GUIDE batch input file
#'
#' This function generates a GUIDE batch input file
#' @param i_singleTree
#' @param i_treeType
#' @param i_treeOptions
#' @param i_fpath
#' @param i_fname
#' @param VERBOSE=FALSE
#' @keywords GUIDE, input, batch file
#' @export
#'

write_batch_file <- function(i_singleTree, i_treeType, i_treeOptions, i_fpath, VERBOSE=FALSE) {

  v_fname_out   <- basename(i_fpath$out)
  v_fname_desc  <- basename(i_fpath$desc)
  v_fname_tex   <- basename(i_fpath$tex)
  v_fname_fn    <- basename(i_fpath$fn)
  v_fname_sfv   <- basename(i_fpath$sfv)
  v_fname_rnam  <- basename(i_fpath$rnam)
  v_fname_rcoef <- basename(i_fpath$rcoef)
  v_fname_R     <- basename(i_fpath$R)
  #v_outPath    <- i_fpath$outPath

  #------ Tree header -----------------------------------------------------------------------
  catret("GUIDE       (do not edit this file unless you know what you are doing)", file=i_fpath[["in"]])
  catret("  25.4      (version of GUIDE that generated this file)", file=i_fpath[["in"]], append=T)
  catret(" 1          (1=model fitting, 2=importance or DIF scoring, 3=data conversion)", file=i_fpath[["in"]], append=T)
  catret(paste0("\"", v_fname_out, "\"","  (name of output file)"), file=i_fpath[["in"]], append=T)

  # Single trees
  if (i_singleTree) {
    # Tree type
    catret(" 1          (1=one tree, 2=ensemble)", file=i_fpath[["in"]], append=T)
    catret(" 2          (1=classification, 2=regression, 3=propensity score grouping)", file=i_fpath[["in"]], append=T)

    #------ Linear trees -------------------------------------------------------------------------------
    if (substr(i_treeType,1,2) == "LS" || substr(i_treeType,1,3) == "LMS") {
      catret(" 1          (1=linear, 2=quantile, 3=Poisson, 4=hazard, 5=multiresponse or itemresponse, 6=longitudinal with T variables)", file=i_fpath[["in"]], append=T)

      #------ Least squares ----------------------------------------------------------------------------
      if (substr(i_treeType,1,2) == "LS") {
        catret(" 1          (1=least squares, 2=least median of squares)", file=i_fpath[["in"]], append=T)

        if (i_treeType == "LS-constant") {
          if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Linear > Least squares > Constant) ", i_fpath[["in"]],"\n"))
          catret(" 3          (0=stepwise linear, 1=multiple linear, 2=best polynomial, 3=constant, 4=steptwise simple ANCOVA)", file=i_fpath[["in"]], append=T)
        }
        if (i_treeType == "LS-linear") {  #==== default LS ====
          if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Linear > Least squares > Stepwise linear) ", i_fpath[["in"]],"\n"))
          catret(" 0          (0=stepwise linear, 1=multiple linear, 2=best polynomial, 3=constant, 4=steptwise simple ANCOVA)", file=i_fpath[["in"]], append=T)

          # Specific options (all set to default values, instead of modifying the value directly create your own tree type)
          catret(" 1          (1=forward+backward, 2=forward, 3=all subsets)", file=i_fpath[["in"]], append=T)
          catret("     0      (max. number of variables to be selected; 0=max. possible)", file=i_fpath[["in"]], append=T)
          catret("  4.00      (f-to-enter)", file=i_fpath[["in"]], append=T)
          catret("  3.99      (f-to-delete)", file=i_fpath[["in"]], append=T)
          # Choose a truncation method for predicted values
          catret(paste0(" ", i_treeOptions$truncmeth, "          (0=none, 1=node range, 2=+10% node range, 3=global range)"), file=i_fpath[["in"]], append=T)
        }
        if (i_treeType == "LS-bestPolynomial") {
          if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Linear > Least squares > Best polynomial) ", i_fpath[["in"]],"\n"))
          catret(" 2          (0=stepwise linear, 1=multiple linear, 2=best polynomial, 3=constant, 4=steptwise simple ANCOVA)", file=i_fpath[["in"]], append=T)

          # Specific options
          catret(" 1          (1=drop insignificant powers, 2=keep all powers)", file=i_fpath[["in"]], append=T)
          catret(" 0.050      (significance level)", file=i_fpath[["in"]], append=T)
          # Choose a truncation method for predicted values
          catret(paste0(" ", i_treeOptions$truncmeth, "          (0=none, 1=node range, 2=+10% node range, 3=global range, 4=2-sided Winsorization))"), file=i_fpath[["in"]], append=T)
        }
        if (i_treeType == "LS-multilinear") {
          if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Linear > Least squares > Multiple linear) ", i_fpath[["in"]],"\n"))
          catret(" 1          (0=stepwise linear, 1=multiple linear, 2=best polynomial, 3=constant, 4=steptwise simple ANCOVA)", file=i_fpath[["in"]], append=T)

          # Specific options
          catret(" 1          (1=intercept included, 2=intercept excluded)", file=i_fpath[["in"]], append=T)
          catret(paste0(" ", i_treeOptions$truncmeth, "          (0=no truncation, 1=node range, 2=+10% node range, 3=global range)"), file=i_fpath[["in"]], append=T)
        }
        if (i_treeType == "LS-simpleANCOVA") {
          if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Linear > Least squares > Stepwise simple ANCOVA) ", i_fpath[["in"]],"\n"))
          catret(" 4          (0=stepwise linear, 1=multiple linear, 2=best polynomial, 3=constant, 4=steptwise simple ANCOVA)", file=i_fpath[["in"]], append=T)

          # Specific options
          # The number of linear predictors permitted at each node may be restricted or unrestricted,
          # subject to the standard F-to-enter and F-to-remove thresholds of 4.0 (Miller, 2002).
          catret("     0      (max. number of variables to be selected; 0=max. possible)", file=i_fpath[["in"]], append=T)
          catret("  4.00      (f-to-enter)", file=i_fpath[["in"]], append=T)
          catret("  3.99      (f-to-delete)", file=i_fpath[["in"]], append=T)

          # Choose a truncation method for predicted values
          catret(paste0(" ", i_treeOptions$truncmeth, "          (0=none, 1=node range, 2=+10% node range, 3=global range, 4=2-sided Winsorization))"), file=i_fpath[["in"]], append=T)
        }
      }

      #------ Least median of squares ------------------------------------------------------------------
      if (substr(i_treeType,1,3) == "LMS") {
        catret(" 2          (1=least squares, 2=least median of squares)", file=i_fpath[["in"]], append=T)

        if (i_treeType == "LMS-constant") {
          catret(" 3          (1=multiple linear, 2=simple linear, 3=constant)", file=i_fpath[["in"]], append=T)
        }
        if (i_treeType == "LMS-multilinear") {
          catret(" 1          (1=multiple linear, 2=simple linear, 3=constant)", file=i_fpath[["in"]], append=T)

          # Specific options (all set to default values, instead of modifying the value directly create your own tree type)
          # Choose a truncation method for predicted values
          # To avoid large prediction errors caused by extrapolation, truncate all predicted values so that they lie
          # within the range of the training sample data values in their nodes.
          catret(paste0(" ", i_treeOptions$truncmeth, "          (0=none, 1=node range, 2=+10% node range, 3=global range)"), file=i_fpath[["in"]], append=T)
        }
        if (i_treeType == "LMS-bestSimpleLinear") { #==== default LMS ====
          catret(" 2          (1=multiple linear, 2=simple linear, 3=constant)", file=i_fpath[["in"]], append=T)

          # Specific options (all set to default values, instead of modifying the value directly create your own tree type)
          # Choose a truncation method for predicted values
          # To avoid large prediction errors caused by extrapolation, truncate all predicted values so that they lie
          # within the range of the training sample data values in their nodes.
          catret(paste0(" ", i_treeOptions$truncmeth, "          (0=none, 1=node range, 2=+10% node range, 3=global range, 4=2-sided Winsorization)"), file=i_fpath[["in"]], append=T)
        }
      }

      #------ Single LS/LMS tree footer ---------------------------------------------------------------
      # (all set to default values, all files are generated)
      catret(" 1          (1=interaction tests, 2=skip them)", file=i_fpath[["in"]], append=T)

      # CV options
      catret(paste0(" ", i_treeOptions$prune,"          (1=prune by CV, 2=no pruning)"), file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_desc, "\"","  (name of data description file)"), file=i_fpath[["in"]], append=T)

      # Missing regressor values
      if (i_treeOptions$prune == 1) {
        if (i_treeType %in% c("LS-linear", "LS-multilinear", "LS-bestPolynomial", "LS-ANCOVA",
                              "LMS-multilinear", "LMS-bestSimpleLinear")) {
          catret(paste0(" ", i_treeOptions$missregval,"          (missing regressor values: 1=separate models, 2=impute with means, 3=constant model)"), file=i_fpath[["in"]], append=T)
        }

        catret(paste0("        ",i_treeOptions$nbcv,"  (number of cross-validations)"), file=i_fpath[["in"]], append=T)
        catret(paste0(" ",i_treeOptions$cvtype,"          (1=mean-based CV tree, 2=median-based CV tree)"), file=i_fpath[["in"]], append=T)
        catret(paste0("     ",format(i_treeOptions$seval, nsmall=3),"  (SE number for pruning)"), file=i_fpath[["in"]], append=T)
      }

      # Split point selection method for numerical variables
      if (i_treeType %in% c("LS-constant", "LS-multilinear")) {
        catret(paste0(" ",i_treeOptions$search,"          (1=split point from quantiles, 2=use exhaustive search)"), file=i_fpath[["in"]], append=T)
      }

      # Fraction of cases for splitting
      if (i_treeType %in% c("LS-linear", "LS-bestPolynomial", "LS-ANCOVA",
                            "LMS-multilinear", "LMS-bestSimpleLinear", "LMS-constant")) {
        if (is.null(i_treeOptions$splitfrac) || is.na(i_treeOptions$splitfrac)) {
          catret(" 1          (1=accept default splitting fraction, 2=change it)", file=i_fpath[["in"]], append=T)
        } else {
          catret(" 2          (1=accept default splitting fraction, 2=change it)", file=i_fpath[["in"]], append=T)
          catret(paste0("  ",format(i_treeOptions$splitfrac, nsmall=4),"     (frac, where #splits = max(9,fract*n), with n = #cases in node)"), file=i_fpath[["in"]], append=T)
        }
      }

      # Max. number of splits and min. node size
      catret(" 2          (1=default max. number of split levels, 2=specify no. in next line)", file=i_fpath[["in"]], append=T)
      catret(paste0("        ",i_treeOptions$maxsplits,"   (max. no. split levels)"), file=i_fpath[["in"]], append=T)
      catret(" 2          (1=default min. node size, 2=specify min. value in next line)", file=i_fpath[["in"]], append=T)
      catret(paste0("        ",i_treeOptions$minnbnodes,"  (min. node sample size)"), file=i_fpath[["in"]], append=T)

      # LaTex file options
      catret(" 1          (1=write latex, 2=skip latex)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_tex, "\"","  (latex file name)"), file=i_fpath[["in"]], append=T)
      catret(" 1          (1=vertical tree, 2=sideways tree)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=include node numbers, 2=exclude)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=number all nodes, 2=only terminal nodes)", file=i_fpath[["in"]], append=T)
      if (i_treeType %in% c("LS-linear", "LS-multilinear", "LS-constant",
                            "LMS-multilinear", "LMS-constant")) {
        catret(" 6          (1=white, 2=lightgray, 3=gray, 4=darkgray, 5=black, 6=yellow, 7=red, 8=blue, 9=green, 10=magenta, 11=cyan)", file=i_fpath[["in"]], append=T)
      }
      if (i_treeType %in% c("LS-bestPolynomial", "LS-ANCOVA",
                            "LMS-bestSimpleLinear")) {
        catret("1          (1=red-green-blue,2=magenta-yellow-green,3=yellow,4=green,5=magenta,6=cyan,7=purple,8=lightgray,9=white)", file=i_fpath[["in"]], append=T)
      }

      # Fit and split variables information file
      catret(" 2          (1=no storage, 2=store fit and split variables, 3=store split variables and values)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_sfv, "\"","  (split variable file name)"), file=i_fpath[["in"]], append=T)

      # Regressor name file
      catret(" 2          (1=do not save, 2=save regressor names in a file)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_rnam, "\"","  (regressor names file)"), file=i_fpath[["in"]], append=T)

      # Regression coefficients file
      if (i_treeType %in% c("LMS-bestSimpleLinear")) {
        catret(" 2          (1=do not save, 2=save regression coefs in separate file)", file=i_fpath[["in"]], append=T)
        catret(paste0("\"", v_fname_rcoef, "\"","  (regression coefficient file)"), file=i_fpath[["in"]], append=T)
      }

      # Fitted values and node IDs file
      catret(" 2          (1=do not save individual fitted values and node IDs, 2=save in a file)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_fn, "\"","  (file name for individual fitted values and node IDs)"), file=i_fpath[["in"]], append=T)

      # Terminal node IDs for important scoring file
      if (i_treeType %in% c("LMS-bestSimpleLinear")) {
        catret(" 1          (1=do not save terminal node IDs for importance scoring in a file, 2=save them)", file=i_fpath[["in"]], append=T)
      }

      # R file
      catret(" 2          (1=do not write R function, 2=write R function)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_R, "\"","  (R code file)"), file=i_fpath[["in"]], append=T)

      #file.copy(i_fpath[["in"]], file.path(v_outPath, v_fname_in), overwrite = TRUE)
      #file.copy(i_fpath[["in"]], file.path("../GUIDE", v_fname_in), overwrite = TRUE)

    }

    #------ Quantile regression trees ------------------------------------------------------------------
    if (substr(i_treeType,1,1) == "Q") {
      catret(" 2          (1=linear, 2=quantile, 3=Poisson, 4=proportional Hazards, 5=multiresponse or itemresponse, 6=longitudinal with T variables)", file=i_fpath[["in"]], append=T)
      if (i_treeType == "Q1-Constant") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Quantile > Constant (1 quantile: ", i_treeOptions$quantile1,")) ", i_fpath[["in"]],"\n"))
        catret(" 3          (1=multiple linear, 2=polynomial, 3=constant)", file=i_fpath[["in"]], append=T)
        catret(" 1          (1=one quantile, 2=two quantiles)", file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile1,"     (quantile)"), file=i_fpath[["in"]], append=T)
      }
      if (i_treeType == "Q2-Constant") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Quantile > Constant (2 quantiles: ", i_treeOptions$quantile1,", ", i_treeOptions$quantile2,")) ", i_fpath[["in"]],"\n"))
        catret(" 3          (1=multiple linear, 2=polynomial, 3=constant)", file=i_fpath[["in"]], append=T)
        catret(" 1          (1=one quantile, 2=two quantiles)", file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile1,"     (1st quantile probability)"), file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile2,"     (2nd quantile probability)"), file=i_fpath[["in"]], append=T)
      }
      if (i_treeType == "Q1-multiLinear") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Quantile > Multiple linear (1 quantile: ", i_treeOptions$quantile1,")) ", i_fpath[["in"]],"\n"))
        catret(" 2          (1=multiple linear, 2=polynomial, 3=constant)", file=i_fpath[["in"]], append=T)
        catret(" 1          (highest degree of polynomial model)", file=i_fpath[["in"]], append=T)
        catret(" 1          (1=one quantile, 2=two quantiles)", file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile1,"     (quantile)"), file=i_fpath[["in"]], append=T)
      }
      if (i_treeType == "Q2-multiLinear") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Quantile > Multiple linear (2 quantiles: ", i_treeOptions$quantile1,", ", i_treeOptions$quantile2,")) ", i_fpath[["in"]],"\n"))
        catret(" 2          (1=multiple linear, 2=polynomial, 3=constant)", file=i_fpath[["in"]], append=T)
        catret(" 1          (highest degree of polynomial model)", file=i_fpath[["in"]], append=T)
        catret(" 2          (1=one quantile, 2=two quantiles)", file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile1,"     (1st quantile probability)"), file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile2,"     (2nd quantile probability)"), file=i_fpath[["in"]], append=T)
      }
      if (i_treeType == "Q1-bestPolynomial") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Quantile > Best polynomial (1 quantile: ", i_treeOptions$quantile1,")) ", i_fpath[["in"]],"\n"))
        catret(" 2          (1=multiple linear, 2=polynomial, 3=constant)", file=i_fpath[["in"]], append=T)
        catret(" 1          (highest degree of polynomial model)", file=i_fpath[["in"]], append=T)
        #catret(" 1          (1=one quantile, 2=two quantiles)", file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile1,"     (quantile)"), file=i_fpath[["in"]], append=T)
      }
      if (i_treeType == "Q2-bestPolynomial") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Quantile > Best polynomial (2 quantiles: ", i_treeOptions$quantile1,", ", i_treeOptions$quantile2,")) ", i_fpath[["in"]],"\n"))
        catret(" 2          (1=multiple linear, 2=polynomial, 3=constant)", file=i_fpath[["in"]], append=T)
        catret(" 1          (highest degree of polynomial model)", file=i_fpath[["in"]], append=T)
        catret(" 2          (1=one quantile, 2=two quantiles)", file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile1,"     (1st quantile probability)"), file=i_fpath[["in"]], append=T)
        catret(paste0(" ", i_treeOptions$quantile2,"     (2nd quantile probability)"), file=i_fpath[["in"]], append=T)
      }

      #------ Single Q tree footer ---------------------------------------------------------------
      # (all set to default values, all files are generated)
      catret(" 1          (1=interaction tests, 2=skip them)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=prune by CV, 2=no pruning)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_desc, "\"","  (name of data description file)"), file=i_fpath[["in"]], append=T)
      catret(" 2          (missing values: 1=separate models, 2=impute with means, 3=constant model)", file=i_fpath[["in"]], append=T)
      catret("        10  (number of cross-validations)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=mean-based CV tree, 2=median-based CV tree)", file=i_fpath[["in"]], append=T)
      catret("     0.500  (SE number for pruning)", file=i_fpath[["in"]], append=T)
      catret(" 2          (1=split point from quantiles, 2=use exhaustive search)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=default max number of split levels, 2=specify no. in next line)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=default min node size, 2=specify node size in next line)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=write latex, 2=skip latex)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_tex, "\"","  (latex file name)"), file=i_fpath[["in"]], append=T)
      catret(" 1          (1=vertical tree, 2=sideways tree)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=include node numbers, 2=exclude)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=number all nodes, 2=only terminal nodes)", file=i_fpath[["in"]], append=T)
      catret(" 6          (1=white, 2=lightgray, 3=gray, 4=darkgray, 5=black, 6=yellow, 7=red, 8=blue, 9=green, 10=magenta, 11=cyan)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=no storage, 2=store fit and split variables, 3=store split variables and values)", file=i_fpath[["in"]], append=T)

      if (i_treeType %in% c("Q1-multiLinear","Q2-multiLinear","Q1-bestPolynomial","Q2-bestPolynomial")) {
        catret(" 2          (1=do not save, 2=save regression coefs in separate file)", file=i_fpath[["in"]], append=T)
        catret(paste0("\"", v_fname_rcoef, "\"","  (regression coefficient file)"), file=i_fpath[["in"]], append=T)
      } else {
        catret(" 1          (1=do not save, 2=save regression coefs in separate file)", file=i_fpath[["in"]], append=T)
      }


      catret(" 2          (1=do not save individual fitted values and node IDs, 2=save in a file)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_fn, "\"","  (file name for individual fitted values and node IDs)"), file=i_fpath[["in"]], append=T)

      catret(" 1          (1=do not save terminal node IDs for importance scoring in a file, 2=save them)", file=i_fpath[["in"]], append=T)

      catret(" 1          (1=do not write R function, 2=write R function)", file=i_fpath[["in"]], append=T)

    }


    #------ Longitudinal data trees -------------------------------------------------------------------------------
    if (strsplit(i_treeType, "-", fixed=TRUE)[[1]][1] == "Longitudinal") {
      catret(" 6          (1=linear, 2=quantile, 3=Poisson, 4=hazard, 5=multiresponse or itemresponse, 6=longitudinal with T variables)", file=i_fpath[["in"]], append=T)

      if (i_treeType == "Longitudinal-Lowess") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Longitudinal > Lowess smoothing) ", i_fpath[["in"]],"\n"))
        catret(" 1          (1=lowess, 2=smoothing spline)", file=i_fpath[["in"]], append=T)
      }
      if (i_treeType == "Longitudinal-Spline") {
        if (VERBOSE) cat(paste0("  - batch input file (case: Single tree > Regression > Longitudinal > Spline smoothing) ", i_fpath[["in"]],"\n"))
        catret(" 2          (1=lowess, 2=smoothing spline)", file=i_fpath[["in"]], append=T)
      }

      #------ Single longitudinal tree footer ---------------------------------------------------------------
      # (all set to default values, all files are generated)
      catret(" 1          (1=interaction tests, 2=skip them)", file=i_fpath[["in"]], append=T)
      catret(paste0(" ", i_treeOptions$prune,"          (1=prune by CV, 2=no pruning)"), file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_desc, "\"","  (name of data description file)"), file=i_fpath[["in"]], append=T)

      # Size groups
      eqsizegroups <- TRUE
      if (eqsizegroups) {
        catret(" 1          (1=roughly equal-sized groups, 2=customized groups)", file=i_fpath[["in"]], append=T)
        catret("   3        (number of roughly equal-sized groups)", file=i_fpath[["in"]], append=T)
        catret("  31        (number of interpolating points for prediction)", file=i_fpath[["in"]], append=T)
      } else {
        # "You should have a file containing group time boundaries with one boundary point per line and number of lines = (nb_group -1)
        catret(" 2          (1=roughly equal-sized groups, 2=customized groups)", file=i_fpath[["in"]], append=T)
        stop("Not yet implemented")
      }

      # !! In the interactive mode of GUIDE, this is never asked. Probably because otherwise the tree does not yield meaningful results
      catret(" 2          (1=use all obs, 2=use obs with complete D values)", file=i_fpath[["in"]], append=T)

      if (i_treeOptions$prune == 1) {
        catret(paste0("        ",i_treeOptions$nbcv,"  (number of cross-validations)"), file=i_fpath[["in"]], append=T)
        catret(paste0(" ",i_treeOptions$cvtype,"          (1=mean-based CV tree, 2=median-based CV tree)"), file=i_fpath[["in"]], append=T)
        catret(paste0("     ",format(i_treeOptions$seval, nsmall=3),"  (SE number for pruning)"), file=i_fpath[["in"]], append=T)
      }

      catret(paste0(" ",i_treeOptions$search,"          (1=split point from quantiles, 2=use exhaustive search)"), file=i_fpath[["in"]], append=T)

      # Max. number of splits and min. node size
      if (is.na(i_treeOptions$maxsplits)) {
        catret(" 1          (1=default max. number of split levels, 2=specify no. in next line)", file=i_fpath[["in"]], append=T)
      } else {
        catret(" 2          (1=default max. number of split levels, 2=specify no. in next line)", file=i_fpath[["in"]], append=T)
        catret(paste0("        ",i_treeOptions$maxsplits,"   (max. no. split levels)"), file=i_fpath[["in"]], append=T)
      }
      if (is.na(i_treeOptions$minnbnodes)) {
        catret(" 1          (1=default min. node size, 2=specify min. value in next line)", file=i_fpath[["in"]], append=T)
      } else {
        catret(" 2          (1=default min. node size, 2=specify min. value in next line)", file=i_fpath[["in"]], append=T)
        catret(paste0("        ",i_treeOptions$minnbnodes,"  (min. node sample size)"), file=i_fpath[["in"]], append=T)

      }

      # LaTex file options
      catret(" 1          (1=write latex, 2=skip latex)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_tex, "\"","  (latex file name)"), file=i_fpath[["in"]], append=T)
      catret(" 1          (1=vertical tree, 2=sideways tree)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=include node numbers, 2=exclude)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=number all nodes, 2=only terminal nodes)", file=i_fpath[["in"]], append=T)
      catret(" 6          (1=white, 2=lightgray, 3=gray, 4=darkgray, 5=black, 6=yellow, 7=red, 8=blue, 9=green, 10=magenta, 11=cyan)", file=i_fpath[["in"]], append=T)

      # Fit and split variables information file
      catret(" 2          (1=no storage, 2=store fit and split variables, 3=store split variables and values)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_sfv, "\"","  (split variable file name)"), file=i_fpath[["in"]], append=T)

      # Save individual fitted values and node IDs
      catret(" 2          (1=do not save individual fitted values and node IDs, 2=save in a file)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_fn, "\"","  (file name for terminal node IDs)"), file=i_fpath[["in"]], append=T)

      # Fitted values and node IDs file
      v_fname_fntn <- "tnode.txt"
      catret(" 2          (1=do not save fitted values at terminal nodes, 2=save fitted values)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_fntn, "\"","  (file name for node fitted values)"), file=i_fpath[["in"]], append=T)

      # R file
      catret(" 2          (1=do not write R function, 2=write R function)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_R, "\"","  (R code file)"), file=i_fpath[["in"]], append=T)

    }


    # Tree ensemble
  } else {
    # Tree type
    catret(" 2          (1=one tree, 2=ensemble)", file=i_fpath[["in"]], append=T)

    if (i_treeType == "Bagging") {
      catret(" 1          (1=Bagging, 2=rForest)", file=i_fpath[["in"]], append=T)
      catret(" 2          (1=classification, 2=regression, 3=propensity score grouping)", file=i_fpath[["in"]], append=T)

      catret(" 1          (1=interaction tests, 2=skip them)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=prune by CV, 2=no pruning)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_desc, "\"","  (name of data description file)"), file=i_fpath[["in"]], append=T)

      catret(" 2          (1=accept default number of trees, 2=change)", file=i_fpath[["in"]], append=T)
      catret(" 100        (no. trees)", file=i_fpath[["in"]], append=T)
      catret("         5  (number of cross-validations)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=mean-based CV tree, 2=median-based CV tree)", file=i_fpath[["in"]], append=T)
      catret("     0.500  (SE number for pruning)", file=i_fpath[["in"]], append=T)

    }
    if (i_treeType == "RF-random") {
      catret(" 2          (1=Bagging, 2=rForest)", file=i_fpath[["in"]], append=T)
      catret(" 1          (1=Random splits of missing values, 2=Non-random splits of missing values)", file=i_fpath[["in"]], append=T)
      catret(" 2          (1=classification, 2=regression, 3=propensity score grouping)", file=i_fpath[["in"]], append=T)

      catret(" 2          (1=interaction tests, 2=skip them)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_desc, "\"","  (name of data description file)"), file=i_fpath[["in"]], append=T)

      catret(" 2          (1=accept default number of trees, 2=change)", file=i_fpath[["in"]], append=T)
      catret(" 100        (no. trees)", file=i_fpath[["in"]], append=T)
      catret(" 2          (1=accept default number of variables for splitting, 2=change it)", file=i_fpath[["in"]], append=T)
      catret(" 3          (number of variables used for splitting)", file=i_fpath[["in"]], append=T)
    }
    if (i_treeType == "RF-nonRandom") {
      catret(" 2          (1=Bagging, 2=rForest)", file=i_fpath[["in"]], append=T)
      catret(" 2          (1=Random splits of missing values, 2=Non-random splits of missing values)", file=i_fpath[["in"]], append=T)
      catret(" 2          (1=classification, 2=regression, 3=propensity score grouping)", file=i_fpath[["in"]], append=T)

      catret(" 2          (1=interaction tests, 2=skip them)", file=i_fpath[["in"]], append=T)
      catret(paste0("\"", v_fname_desc, "\"","  (name of data description file)"), file=i_fpath[["in"]], append=T)

      catret(" 2          (1=accept default number of trees, 2=change)", file=i_fpath[["in"]], append=T)
      catret(" 100        (no. trees)", file=i_fpath[["in"]], append=T)
      catret(" 2          (1=accept default number of variables for splitting, 2=change it)", file=i_fpath[["in"]], append=T)
      catret(" 3          (number of variables used for splitting)", file=i_fpath[["in"]], append=T)

    }

    catret(" 1          (1=split point from quantiles, 2=use exhaustive search)", file=i_fpath[["in"]], append=T)
    catret(" 2          (1=accept default splitting fraction, 2=change it)", file=i_fpath[["in"]], append=T)
    catret(" 0.67432E-01 (frac, where #splits = max(9,fract*n), with n = #cases in node)", file=i_fpath[["in"]], append=T)
    catret(" 2          (1=default max. number of split levels, 2=specify no. in next line)", file=i_fpath[["in"]], append=T)
    catret(" 7          (max. no. split levels)", file=i_fpath[["in"]], append=T)
    catret(" 2          (1=default min. node size, 2=specify min. value in next line)", file=i_fpath[["in"]], append=T)
    catret(" 14         (min. node sample size)", file=i_fpath[["in"]], append=T)
    catret(paste0("\"", v_fname_fn, "\"","  (file name for predicted values)"), file=i_fpath[["in"]], append=T)
  }
}
jhilaire/guidr documentation built on May 7, 2019, 10:47 p.m.