R/tdmBigLoop.r

Defines functions tdmBigLoop bigLoopStep populateEnvT saveEnvT saveSRFinfo removeTmpfiles2 prepareParallelExec

Documented in tdmBigLoop

#require(SPOT);

######################################################################################
# tdmBigLoop:
#
#' Tuning and unbiased evaluation in a big loop.
#'
#' For each configuration object \code{.conf} in \code{tdm$runList} call all tuning algorithms (SPOT, CMA-ES 
#' or other) specified in \code{tdm$tuneMethod} (via function \code{\link{tdmDispatchTuner}}).
#' After each tuning process perform a run of \code{tdm$unbiasedFunc} 
#' (usually \code{\link{unbiasedRun}}). \cr
#' Each of these experiments is repeated \code{tdm$nExperim} times. Thus we have for each tripel \cr
#'    \tabular{ll}{
#'        \tab \code{   (confName,nExp,theTuner)} \cr
#'    }
#' a tuning result. The ranges of the triple elements are: \cr
#'    \tabular{ll}{
#'        \tab \code{   confName in tdm$runList} \cr
#'        \tab \code{   nExp in 1,...,tdm$nExperim} \cr
#'        \tab \code{   theTuner in tdm$tuneMethod} \cr
#'    }
#' 
#Details:
#' \code{tdm} refers to \code{envT$tdm}.
#'
#' The available tuning algorithms (tuners) are 
#'      \itemize{
#'      \item{\code{\link{spotTuner}}:  Call \code{\link[SPOT]{spot}}.   }
#'      \item{\code{\link{lhdTuner}}:  Perform a parameter tuning using a Latin hypercube design (LHD) 
#'            for obtaining best design points. LHD is performed by configuring SPOT 
#'            in such a way that all the budget is used for the initial design (usually LHD). }
#'      \item{\code{\link{cma_jTuner}}:  Perform a parameter tuning by CMA-ES, using the *Java* 
#'            implementation by Niko Hansen through the interface package \code{\link[rCMA]{rCMA-package}}.    }
#'      \item{\code{\link{cmaesTuner}}:  Perform a parameter tuning by CMA-ES, using the *R*-implementation 
#'            (package \code{\link[cmaes]{cma_es}} by Olaf Mersmann) 
#'            (deprecated, use \code{\link{cma_jTuner}} instead).  }
#'      \item{\code{\link{bfgsTuner}}:   Perform a parameter tuning by Broyden, Fletcher, Goldfarb and Shanno (BFGS) method.
#'            The L-BFGS-B version allowing box constraints is used.  }
# --- powellTuner removed because package powell was archived by CRAN
#      \item{\code{\link{powellTuner}}:  Perform a parameter tuning by Powell's UObyQA algorithm 
#            (unconstrained optimization by quadratic approximation), see package \code{\link[powell]{powell}}).   } 
#'      }
#'
#' @param envT      an environment containing on input at least the element \code{tdm} (a list with general settings for TDMR, 
#'                   see \code{\link{tdmDefaultsFill}}), which has at least the elements  
#'     \describe{
#'     \item{\code{tdm$runList}}{ vector of configuration names \code{.conf} }
#     \item{\code{tdm$spotList}}{ \code{[NULL]} vector of \code{.conf} filenames for which spot tuning is done. 
#                             If \code{NULL}, then \code{spotList=runList} }
#'     }
# @param spotStep  \code{["auto"]} which step of SPOT to execute (either \code{"auto"} or \code{"rep"}).
#' @param dataObj   \code{[NULL]} optional object of class \code{\link{TDMdata}} (the same for all runs in big loop). 
#'      If it is \code{NULL}, it will be constructed here with the help of \code{\link{tdmReadAndSplit}}.
#'      Then it can be different for each configuration object in the big loop.
#'
#' @return environment \code{envT}, containing  the results
#'      \item{res}{ data frame with results from last tuning (one line for each call of \code{tdmStart*})} 
#'      \item{bst}{ data frame with the best-so-far results from last tuning (one line collected after each (SPO) step)}
#'      \item{resGrid}{  list with data frames \code{res} from all tuning runs. Use \cr
#'            \code{envT$getRes(envT,confFile,nExp,theTuner)}  \cr
#'        to retrieve a specific \code{res}. }
#'      \item{bstGrid}{  list with data frames \code{bst} from all tuning runs. Use \cr
#'            \code{envT$getBst(envT,confFile,nExp,theTuner)}  \cr
#'        to retrieve a specific \code{bst}. }
#'      \item{theFinals}{ data frame with one line for each triple \code{(confFile,nExp,tuner)}, each line contains summary
#'        information about the tuning run in the form: \cr
#'            \code{confFile tuner nExp [params] NRUN NEVAL RGain.bst RGain.* sdR.*} \cr
#'        where \code{[params]} is written depending on \code{tdm$withParams}. \cr
#'        \code{NRUN} is the number of unbiased evaluation runs. \cr
#'        \code{NEVAL} is the number of function evaluations (model builds) during tuning. \cr
#'        \code{RGain} denotes the relative gain on a certain data set: the actual gain achieved with the model 
#'        divided by the maximum gain possible for the current cost matrix and the current data set. This is for classification
#'        tasks, in the case of regression each \code{RGain.*} is replaced by \code{RMAE.*}, the relative mean absolute error. \cr
#'        Each 'sdR.' denotes the standard deviation of the preceeding RGain or RMAE. \cr
#'        RGain.bst is the best result during tuning obtained on the training-validation data. RGain.avg is the average result 
#'        during tuning. The following pairs {RGain.* sdR.*} are the results of one or several unbiased evaluations on the test data
#'        where '*' takes as many values as there are elements in \code{tdm$umode} (the possible values are explained in 
#'        \code{\link{unbiasedRun}}).  
#'        }
#'      \item{result}{ object of class \code{\link{TDMclassifier}} or \code{\link{TDMregressor}}. This is a list with results from \code{tdm$mainFunc} 
#'          as called in the last unbiased evaluation using the best parameters found during tuning. 
#'          Use \code{\link[=print.TDMclassifier]{print}(envT$result)} to get more info on such an object of class \code{\link{TDMclassifier}}.  }
#'      \item{tunerVal}{ an object with the return value from the last tuning process. For every tuner, this is the list 
#'          \code{spotConfig}, containing the SPOT settings plus the TDMR settings in elements \code{opts} and \code{tdm}. Every tuner 
#'          extends this list by \code{tunerVal$alg.currentResult} and \code{tunerVal$alg.currentBest}, see \code{\link{tdmDispatchTuner}}.
#'          In addition, each tuning method might add specific elements to the list, see the description of each tuner. }
#'   Environment \code{envT} contains further elements, but they are only relevant for the internal operation of 
#'   \code{tdmBigLoop} and its subfunctions.
#'
#' @note Side effects:
#---- now deleted, we act always as if tdm$fileMode==FALSE ---------------- 
#   Irrespective of the value of \code{tdm$fileMode}, 
#     \itemize{
#         \item 
#--------------------------------------------------------------------------
#'   A compressed version of \code{envT } is saved to file \code{tdm$filenameEnvT} 
#'   (default: \code{<runList[1]>.RData}) in directory \code{tdm$path}. If \code{tdm$path==NULL}
#'   use the current directory.\cr
#       }
#'   If \code{tdm$U.saveModel==TRUE}, then \code{envT$result$lastRes$lastModel} (the last 
#'   trained model) will be saved to \code{tdm$filenameEnvT}. 
#'   The default is \code{tdm$U.saveModel==TRUE}. If \code{tdm$U.saveModel==FALSE} 
#'   then smaller \code{.RData} files will result.
#'
#---- now deleted, we act always as if tdm$fileMode==FALSE ---------------- 
#   If \code{tdm$fileMode==TRUE}, more files are written relative to  the directory of the \code{.conf} file:
#     \itemize{
#         \item \code{envT$theFinals } is written to file \code{tdm$finalFile} 
#         \item \code{envT$theFinals } is appended to \code{tdm$experFile}
#      }                                                                                                           
#   If \code{tdm$finalFile==NULL}, then it is set to \code{sub(".conf",".fin",runList[1])}.  \cr
#   If \code{tdm$experFile==NULL}, then nothing is appended to any experiment file.
#--------------------------------------------------------------------------
#'
#' Example usages of function \code{tdmBigLoop} are shown in \cr
#' \tabular{ll}{
#'    \tab \code{   demo(demo03sonar)} \cr 
#'    \tab \code{   demo(demo03sonar_B)} \cr
#'    \tab \code{   demo(demo04cpu)} \cr
#' }
#' where the corresponding R-sources are in directory \code{demo}.
#'
#' @examples
#' #*# This demo shows a complete tuned data mining process (level 3 of TDMR) where 
#' #*# the data mining task is the classification task SONAR (from UCI repository, 
#' #*# http://archive.ics.uci.edu/ml/datasets/Connectionist+Bench+%28Sonar,+Mines+vs.+Rocks%29).
#' #*# The data mining process is in main_sonar.r, which calls tdmClassifyLoop and tdmClassify
#' #*# with Random Forest as the prediction model. 
#' #*# The three parameter to be tuned are CUTOFF1, CLASSWT2 and XPERC, as specified 
#' #*# in controlSC() (control_sonar.r). The tuner used here is LHD.  
#' #*# Tuning runs are rather short, to make the example run quickly. 
#' #*# Do not expect good numeric results. 
#' #*# See demo/demo03sonar_B.r for a somewhat longer tuning run, with two tuners SPOT and LHD.
#'
#'   ## path is the dir with data and main_*.r file:
#'   path <- paste(find.package("TDMR"), "demo02sonar",sep="/");
#'   #path <- paste("../../inst", "demo02sonar",sep="/");
#' 
#'   ## control settings for TDMR
#'   tdm <- list( mainFunc="main_sonar"
#'              , runList = c("sonar_04.conf")
#'              , umode="CV"              # { "CV" | "RSUB" | "TST" | "SP_T" }
#'              , tuneMethod = c("lhd")
#'              , filenameEnvT="exBigLoop.RData"   # file to save environment envT 
#'              , nrun=1, nfold=2         # repeats and CV-folds for the unbiased runs
#'              , nExperim=1
#'              , optsVerbosity = 0       # the verbosity for the unbiased runs
#'              );
#'   source(paste(path,"main_sonar.r",sep="/"));    # main_sonar, readTrnSonar
#' \dontshow{
#'   #*# This demo is for "R CMD check" (runs in half the time) 
#'   source(paste(path,"control_sonar_check.r",sep="/")); # controlDM, controlSC
#' }
#' \donttest{
#'   #*# This demo is for example and help (more meaningful, a bit higher budget)
#'   source(paste(path,"control_sonar.r",sep="/")); # controlDM, controlSC
#' }
#'   
#'   
#'   ctrlSC <- controlSC();
#'   ctrlSC$opts <- controlDM();
#'   
#'   # construct envT from settings given in tdm & sCList
#'   envT <- tdmEnvTMakeNew(tdm,sCList=list(ctrlSC));
#'   dataObj <- tdmReadTaskData(envT,envT$tdm);
#'   envT <- tdmBigLoop(envT,dataObj=dataObj);     # start the big tuning loop 
#' \dontshow{
#'   #*# This demo is for "R CMD check" (to remove the output files) 
#'   unlink(paste(envT$tdm$path,"exBigLoop.RData",sep="/"));
#' }
#' @seealso   \code{\link{tdmDispatchTuner}}, \code{\link{unbiasedRun}}
#' @author Wolfgang Konen (\email{wolfgang.konen@@th-koeln.de}), THK, Patrick Koch
#' @export
######################################################################################
tdmBigLoop <- function(envT,dataObj=NULL) {
                            # The environment envT is passed by reference into the inner functions
                            # which means that it can be used a) to transport information back from
                            # those inner functions and b) to transport information to and back even 
                            # for functions like tdmStartOther which are not allowed to have envT in 
                            # their argument list
  tdm <- envT$tdm;
  tdm <- tdmMapDesLoad(tdm);
  spotStep="auto";   # --- deprecated, only to have a value for spotStep (no longer used)
  
  if (is.null(envT$getBst)) envT <- tdmEnvTAddGetters(envT);
  # just in case the user loaded envT via load("myFile.Rdata") and not tdmEnvTLoad("myFile.Rdata") 
      
  #if (!is.null(tdm$mainFile)) source(tdm$mainFile);           # deprecated (sourcing main_TASK should be done in caller of tdmBigLoop)
  nTuner <- length(tdm$tuneMethod);
  nRunList <- length(tdm$runList);

	# depending on tdm$parallelCPUs, the following lines execute the code either in parallel 
	# or sequential: For each value of indVec the function bigLoopStep is 
	# called. The vectors tuneVec, expeVec, confVec contain for each element of indVec the
	# corresponding value of tuner, experiment number and configuration, resp.
	# In case of parallel execution, parSapply will only return after the last parallel job has finished.
	#
	indVec <- 1:(tdm$nExperim*nTuner*nRunList);
	tuneVec <- rep(tdm$tuneMethod,tdm$nExperim*nRunList);
	expeVec <- rep(sort(rep(1:tdm$nExperim,nTuner)),nRunList);
	confVec <- sort(rep(tdm$runList,tdm$nExperim*nTuner));
	if (length(spotStep)>1) stop("Only a scalar string allowed for spotStep");
	if (length(tdm$umode)>1) stop("Only a scalar string allowed for tdm$umode");
	if (length(indVec)==1 & tdm$parallelCPUs>1) {
	  warning("There is only one job (length(indVec)==1) --> parallelization would not run, so we set tdm$ParallelCPUs=1");
	  tdm$parallelCPUs=1;
  }

  if (tdm$parallelCPUs>1) {
    cl = prepareParallelExec(tdm);
    sappResult <- parallel::parSapply(cl, indVec, bigLoopStep, tuneVec,expeVec,confVec,tdm$spotList,spotStep,dataObj,envT,tdm);
    parallel::stopCluster(cl);
  } else {  
		sappResult <- sapply(indVec, bigLoopStep, tuneVec,expeVec,confVec,tdm$spotList,spotStep,dataObj,envT,tdm);
  }
  # populate envT with the results returned in matrix sappResult:
	envT <- populateEnvT(sappResult,envT,tdm,spotStep);
  
  if (TRUE) { # (spotStep == "auto") {
    saveEnvT(envT,tdm$runList,saveModel=tdm$U.saveModel);
    saveSRFinfo(envT);
  } 

  if (nrow(envT$theFinals)>0) {
    print(envT$theFinals);
  } else {
    cat("Note: No rows in data frame envT$theFinals\n");
  }

  envT;
} # function tdmBigLoop

      ######################################################################################
  		#------------------------------------------------------------------------------------------------------
      #  bigLoopStep: helper function for tdmBigLoop, called via sapply or parSapply:
      #  (ind is an index where confFile varies slowest, nExp varies 2nd-slowest and theTuner varies fastest)
      bigLoopStep <- function(ind,tuneVec,expeVec,confVec,spotList,spotStep,dataObj,envT,tdm) {
        #browser()
        if (tdm$parallelCPUs>1) library(TDMR);
        theTuner = tuneVec[ind];
        nExp = expeVec[ind];
        confFile = confVec[ind];
        nConf = which(confFile==envT$runList);
        #i <- (nConf-1) %% length(spotStep) + 1; # i is an index which cyclically re-uses entries      
        #                                        # from vector spotStep, if it is shorter than runList
        #--- deprecated, now spotStep is only a string, "auto" or "rep" or "report"
        
        print(c(ind,confFile,nExp,theTuner));
        envT$spotConfig <- sC <- envT$sCList[[nConf]]; 
        envT$theTuner <- theTuner;
        envT$nExp <- envT$spotConfig$nExp <- nExp;
        envT$bst <- NULL;
        envT$res <- NULL; 
        
        # --- DEPRECATED --- 
        # The variable spotStep is no longer relevant, it should be always "auto"
        # if (spotStep=="rep" | spotStep=="report" | !(confFile %in% spotList)) {
        #   envT$bst = envT$getBst(envT,confFile,nExp,theTuner);
        #   envT$res = envT$getRes(envT,confFile,nExp,theTuner);
        # }

        # if (tdm$fileMode) {  
        #   if (is.null(tdm$finalFile)) tdm$finalFile = sub(".conf",".fin",confFile);
        #   tFinalFile <- ifelse(tdm$nExperim>1, sprintf("%s-e%02d%s",sub(".fin","",tdm$finalFile,fixed=TRUE),nExp,".fin"), tdm$finalFile);
        #   # i.e. if tdm$finalFile="cpu.fin", then tFinalFile="cpu-e02.fin" for nExp=2
        #   if (file.exists(tFinalFile) & theTuner==tuneVec[1] & confFile==envT$runList[1]) file.remove(tFinalFile);
        # } 
        
        # NEW 09/2012: always operate SPOT with spot.fileMode=FALSE (take everything from  envT$spotConfig)
        envT$spotConfig$spot.fileMode=FALSE;
        
        #
        # --- DEPRECATED --- This is no longer the preferred place to read the data. The preferred 
        # place is a cmd dataObj <- tdmReadAndSplit(opts,tdm) in the user's startup-file; prior to 
        # entering tdmBigLoop and passing dataObj into tdmBigLoop.
        # We allow tdmBigLoop's argument dataObj to be NULL only for the rare case where different 
        # conf objects require different dataObj's so that the reading should be done here inside
        # bigLoopStep. But this is only for downward compatibily and should be normally avoided. 
        #
        if (is.null(dataObj)) dataObj <- tdmReadAndSplit(sC$opts,tdm,nExp);
        # (tdmReadAndSplit splits the data into test data and train/vali data.)
        if (!is.null(dataObj)) envT$spotConfig$opts$TST.COL = dataObj$TST.COL;    # this column has to be subtracted in main_* from the input variables

        ptm <- proc.time();
        # --- COMMENTED OUT --- spotList is no longer used 
        # if (confFile %in% spotList) {
            # this is for the case spotStep=="rep":
            envT$spotConfig$alg.currentResult <- envT$res;
            envT$spotConfig$alg.currentBest <- envT$bst;	
            writeLines(sprintf("\n*** Starting TUNER %s on task %s, experiment %d ***",theTuner,confFile,nExp),con=stderr());
            tdmDispatchTuner(theTuner,confFile,spotStep,tdm,envT,dataObj);
            # If spotStep=="auto" then tdmDispatchTuner runs the tuning process, puts its results in envT$bst, envT$res 
            # and returns the (extended) spotConfig in envT$tunerVal.
            # If spotStep=="rep" then tdmDispatchTuner expects that envT$bst, envT$res contain already the right data frames 
            # and it runs just the reporting step of SPOT.
        # } 
        # else {    # i.e. !(confFile %in% spotList)
        #     # we assume that envT$bst and envT$res contain already the right data frames (filled in from a previous setting)
        # }    
        
        #
        # now we have envT$bst and envT$res filled in any case (either from tdmDispatchTuner or from a previous setting)
        #
        if (is.null(envT$bst)) stop("No contents for data frame envT$bst");
        if (is.null(envT$res)) stop("No contents for data frame envT$res");
        
        if (is.null(tdm$timeMode)) stop("tdm$timeMode is not set (NULL). Consider 'tdm <- tdmDefaultsFill(tdm)' to set all defaults");
        time.txt = c("Proc", "System", "Elapsed");
        envT$time.TRN=(proc.time()-ptm)[tdm$timeMode]; opts2=list(); opts2$VERBOSE=1;  
        cat1(opts2,paste(time.txt[tdm$timeMode], "time for tuning with tdmDispatchTuner:",envT$time.TRN,"sec\n"));     

        ptm <- proc.time();
        envT$finals <- NULL;
        if (tdm$nrun>0) {
          for (umode in tdm$umode) {
            writeLines(sprintf("\n*** Starting %s for %s with umode= %s ***",tdm$unbiasedFunc,confFile,tdm$umode),con=stderr());
            #browser()
            envT <- eval(call(tdm$unbiasedFunc,confFile,envT,dataObj,umode=umode,withParams=envT$wP,tdm=tdm))
          }
          time.TST=(proc.time()-ptm)[tdm$timeMode];   
          cat1(opts2,paste(time.txt[tdm$timeMode], "time for",tdm$unbiasedFunc,":",time.TST,"sec\n"));   
          envT$finals <- cbind(envT$finals,Time.TST=time.TST);  

          # transport back opts$srf (data frame with SRF info in case opts$SRF.calc==TRUE)
          envT$sCList[[nConf]]$opts$srf = envT$result$lastRes$opts$srf

        #   if (tdm$fileMode) {  
        #     #removeTmpfiles2(confFile);
        #     if (!file.exists(dirname(tFinalFile))) {
        #       success = dir.create(dirname(tFinalFile));     
        #       if (!success) stop(sprintf("Could not create dirname(tFinalFile)=%s",dirname(tFinalFile)));
        #     }
        # 		colNames = ifelse(file.exists(tFinalFile),FALSE,TRUE);
        # 		write.table(envT$finals
        # 				, file = tFinalFile
        # 				, col.names= colNames
        # 				, row.names= FALSE
        # 				, append = !colNames
        # 				, sep = " ",
        # 				, quote = FALSE
        # 				, eol = "\n"
        # 		);
        # 		if (!is.null(tdm$experFile)) {
        #       if (!file.exists(dirname(tdm$experFile))) {
        #         success = dir.create(dirname(tdm$experFile));     
        #         if (!success) stop(sprintf("Could not create dirname(tdm$experFile)=%s",dirname(tdm$experFile)));
        #       }
        #   		colNames = ifelse(file.exists(tdm$experFile),FALSE,TRUE);
        #   		write.table(envT$finals       # multiple execution of same experiment (nExperim>1 in script_all.R)
        #   				, file = tdm$experFile
        #   				, col.names= colNames
        #   				, row.names= FALSE
        #   				, append = !colNames
        #   				, sep = " ",
        #   				, quote = FALSE
        #   				, eol = "\n"
        #   		);
        # 		}
        #   } # if(tdm$fileMode)
          flush.console(); 
        } # if(tdm$nrun>0)
         
        # we return a *list* with the most important elements from environment envT and not envT itself, because 
        # parSapply (parallel execution) can not deal with environments as return values.
        list( tunerVal=envT$tunerVal    # last tuning result: spotConfig with extensions       
             ,bst=envT$bst              # last tuning result       
             ,res=envT$res              # last tuning result  
             ,result=envT$result        # last tuning result
             ,roi=envT$tunerVal$alg.roi      
             ,theFinals=envT$finals   
             );      
  		} # end of function bigLoopStep
  		#------------------------------------------------------------------------------------------------------

######################################################################################
# helper fct for tdmBigLoop: 
#     Populate the global envT after parallel execution  with 'parallel' (parSapply with results in sappResult).
#     For simplicity we use it also after sequential execution (sapply with results in sappResult). 
populateEnvT <- function(sappResult,envT,tdm,spotStep) {
  
  check_names <- function(names_finals,theFinals,strTheFinals) {
    if (!all(names_finals %in% names(envT$theFinals))) {
      stop(sprintf("Not all names_finals = %s are present as columns in %s = \n  %s",
                   paste(names_finals,collapse=","),strTheFinals,
                   paste(names(theFinals),collapse=",")))
    }
  }
  
  # if (spotStep=="auto") {
    nGrid = length(envT$bstGrid);
    for (ind in 1:(tdm$nExperim*length(tdm$tuneMethod)*length(envT$runList))) {
      nGrid = nGrid+1;
      envT$bstGrid[[nGrid]] <- as.data.frame(sappResult["bst",ind][[1]]);
      envT$resGrid[[nGrid]] <- as.data.frame(sappResult["res",ind][[1]]);   
      envT$roiGrid[[nGrid]] <- as.data.frame(sappResult["roi",ind][[1]]);   
      envT$theFinals <- rbind(envT$theFinals,as.data.frame(sappResult["theFinals",ind][[1]]));
      # "[[1]]" is necessary to avoid prefix "theFinals." in the header names 
    }
    if (!is.null(envT$theFinals)) if(nrow(envT$theFinals)>0)
      rownames(envT$theFinals) <- (1:nrow(envT$theFinals));
    envT$bst <- as.data.frame(sappResult["bst",ncol(sappResult)][[1]]);
    envT$res <- as.data.frame(sappResult["res",ncol(sappResult)][[1]]);
    envT$tunerVal <- sappResult["tunerVal",ncol(sappResult)][[1]];    # last tuning result
    envT$result <- sappResult["result",ncol(sappResult)][[1]];        # last tuning result
  # }
  # else {     # i.e. if spotStep == "rep" or == "report"
  #   nGrid = nrow(envT$theFinals);
  #   if (ncol(sappResult) > nGrid) stop("There are more columns in sappResult than rows in envT$theFinals");
  #   for (ind in 1:ncol(sappResult)) {
  #     #theFinals <- sappResult["theFinals",ind][[1]]
  #     #names_finals <-  paste(c(Opts(envT$result)$rgain.string,"sdR"),tdm$umode,sep=".");
  #     #check_names(names_finals,theFinals,"theFinals");
  #     #check_names(names_finals,envT$theFinals,"envT$theFinals");
  #     #envT$theFinals[ind,names_finals] <- theFinals[names_finals];
  #     #envT$theFinals[ind,"Time.TST"] <- theFinals["Time.TST"];
  #     envT$theFinals[ind,] <- sappResult["theFinals",ind][[1]]
  #     names(envT$theFinals) <- names(sappResult["theFinals",ind][[1]])
  #   }
  #   envT$result <- sappResult["result",ncol(sappResult)][[1]];    # last result from unbiased run training
  # }
  envT;
}

######################################################################################
# helper fct for tdmBigLoop and tdmTuneIt: 
#     Save a small version of environment envT (passed in as thisEnvT) on filenameEnvT
#     (if NULL, use <runList[1]>.RData).
#     If savePredictions is TRUE, the elements envT$result$predictions, envT$result$predProbList, 
#     envT$result$lastRes$predProb are saved. The default is savePredictions==FALSE.
#     If saveModel is TRUE, the element envT$result$lastRes$lastModel is saved. 
#     The default is saveModel=TRUE.
saveEnvT <- function(thisEnvT,runList,savePredictions=FALSE,saveModel=TRUE) {
      envT = list() # new.env(); #            # when saving thisEnvT, we copy the relevant elements to a *list* envT
                                              # and skip the function elements (getBst,getInd,getRes) in envT, because 
                                              # they would also save *their* environment which tends to make the .RData file rather big
      class(envT) = c("TDMenvir")
      filenameEnvT = thisEnvT$tdm$filenameEnvT;
      filepath = thisEnvT$tdm$path;
      if (is.null(filenameEnvT)) stop(sprintf("tdm$filenameEnvT is NULL. Consider %s",
                                              "using 'tdm <- tdmDefaultsFill(tdm)'"));
        {
      }
      for (ele in setdiff(ls(thisEnvT),c("getBst","getInd","getRes"))) 
        eval(parse(text=paste("envT$",ele," = thisEnvT$",ele,sep="")));
                                              # for the save on .RData file we delete also some potentially voluminous elements 
      envT$result$lastRes$d_train=NULL;       # from the local envT, we want only res and bst objects + describing vars ...
      envT$result$lastRes$d_test=NULL;
      envT$result$lastRes$d_dis=NULL;
      envT$result$lastRes$lastProbs=NULL;
      if (!saveModel) envT$result$lastRes$lastModel=NULL;
      envT$result$dset=NULL;
      envT$result$TST=NULL;
      if (!savePredictions) {
        envT$result$predictions=NULL;     
        envT$result$predProbList=NULL;  
        envT$result$lastRes$predProb=NULL;   
      }
      envT$tunerVal$seq.modelFit=NULL;        # this can be quite big in the case of spotPredictRandomForest
      envT$spotConfig=NULL;     # versions of spotConfig are contained in envT$sCList[[i]] and envT$tunerVal
      envT$theTuner=NULL;
      envT$nExp=NULL;           # see envT$tdm$nExperim for number of experiments
      #browser()
      
      fname <- paste(filepath,filenameEnvT,sep="/");
      cat("Saving envT to ", fname, "\n");
      save(envT,file=fname);              
                        # ... but we leave thisEnvT (which is envT in tdmBigLoop) untouched
                        #     and thus return the full envT environment to the caller of tdmBigLoop
}

######################################################################################
# helper fct for tdmBigLoop:
#     Save the info for SRF (sorted Random Forest) importance on file(s) in case opts$SRF.calc==TRUE
#     (Nothing is done in case opts$SRF.calc==FALSE.)
saveSRFinfo <- function(envT) {
  # function saveSRF saves the field opts$srf (a list with as many data frames as there are response variables)
  # in case opts$SRF.calc==TRUE to RData file opts$srfFile (filename set by function addSRF in tdmEnvTMakeNew.r)
  saveSRF <- function(opts) {
      if (opts$SRF.calc==TRUE) {
        cat("Saving sorted RF importance info to file", opts$srfFile, ".\n")
        srf = opts$srf;
        save(file=opts$srfFile,srf);
      }
  }
  k=0;
  for (confFile in envT$runList) {
    k=k+1;
 	  saveSRF(envT$sCList[[k]]$opts);
  }
}

######################################################################################
# helper fct for tdmBigLoop 
removeTmpfiles2 <- function(confFile) {
      #for (suf in c("apd","aroi","bst","conf","des","res","roi"))  {    # old version (removeTmpfiles)
      for (suf in c("aroi","bst","des","res"))  {
        tFile <- paste(strsplit(confFile,"conf"),suf,sep="");
        if (file.exists(tFile)) file.remove(tFile);
      }
}
   
######################################################################################
# helper function for tdmBigLoop, only needed in case tdmParallelCPUs>1 
# 
# set up everything for parallel execution
#
prepareParallelExec <- function(tdm) 
{
    # require(parallel);     # now via direct call 'parallel::' 

    cl <- parallel::makeCluster(tdm$parallelCPUs)
    parallel::clusterExport(cl,c("tdm"))
    
    if (!exists(".Random.seed")) set.seed(42);
    parallel::clusterExport(cl,".Random.seed");
    parallel::clusterExport(cl,c(tdm$mainFunc,tdm$parallelFuncs));
    if (is.null(tdm$tdmPath)) {
        cat("Export installed library TDMR to parallel cluster\n");
        #library(TDMR);     # this is now done in function bigLoopStep
    } else {
        stop("For tdm$parallelCPUs>1 it is required to use the *library* version of TDMR. Consider to set tdm$tdmPath=NULL.")
    }
    cl;
} # prepareParallelExec()
  

Try the TDMR package in your browser

Any scripts or data that you put into this service are public.

TDMR documentation built on March 3, 2020, 1:06 a.m.