plumber.R

getSafe<- function(x, i, default, is.valid=function(x){T}) {
  i <- match(i, names(x))
  if (is.na(i)) {
    default
  } else if(!is.valid(x[[i]])) {
    default
  }else{
    x[[i]]
  }
}
tryDo<-function(exec=function(){},end=function(){}){
  tryCatch(exec,warning=function(w){},error=function(e){},finnaly=end)
}

getFileGridFS <- function(grid,fileID){
  t <- tempfile()
  out <- grid$read(paste0("id:", fileID),t, progress = FALSE)
  return(t)
}
getFileIDByObjectID<- function(col,obid){
  return(col$find(queryByID(obid),'{"file":1,"_id":0}')$file)
}


MultipartModel2GridFS <- function(req,grid){
  form <- Rook::Multipart$parse(req)
  if(!grepl(".RDS",form$file$filename))
    stop("Input file is not a valid .RDS file.")
  else{
    getTokenValidation(form)
    analysis <-getModelValidation(form$file$filename)
    if(!analysis$Valid){
      error(analysis$Message)
    }else{
      upload <-grid$write(form$file$tempfile,form$file$filename)
      return(upload$id)
    }
  }

}
Json2Config <- function(req,userid){
  form <- jsonlite::fromJSON(req$postBody)
  obid<-OBID()
  analysis<-getConfigValidation(form)
  if(!analysis$Valid){
    error(analysis$Message)
  }else{
    ids<-list("_id" = list("$oid" = jsonlite::unbox(obid)),"user" = list("$oid" = jsonlite::unbox(userid)))
    append(ids,form)
  }
}
getConfig <- function(configid){
  .GlobalEnv$configs$find(queryByID(configid))
}


classNumber <- function(x){
  return(inherits(x,'numeric') || inherits(x,'integer'))
}

getModelValidation <-function(file){
  tryCatch({
    out<-list()
    testit::assert('Could not load the model from .RDS file',!testit::has_error({model<-readRDS(file)}))

    #is one of the valid models
    testit::assert('model is not a valid train, caretEnsemble or caretStack object.',{isCaretLegacy(model) || isCaretStack(model) || isCaretEnsemble(model)})

    #is a regression model
    testit::assert('model is not a valid Regression model.',validRegressionModel(model))

    out$Valid<-T
    out$Message<-NULL
    return(out)
  },
  error=function(e){
    out$Valid <- F
    out$Message <- as.character(e)
    return(out)
  })
}
getModelSummary <-function(file){
  props<-NULL
  specs<-NULL
  tryDo({model<-readRDS(file)})
  tryDo({props<-getModelProperties(model)})
  tryDo({specs<-getModelSpecs(model)})
  return(list("Properties"=props,"Specifications"=specs))
}
validRegressionModel<-function(model){
  if(isCaretLegacy(model)){
    isRegression(model)
  }else if(isCaretStack(model) || isCaretEnsemble(model)){
    validRegressionSE(model)
  }
}
validRegressionSE<-function(model){
  res<-unique(lapply(model$models,isRegression))
  if(length(res)!=1){
    F
  }else{
    res[[1]]
  }
}
isRegression <- function(model){
  model$modelType == 'Regression'
}
getModelProperties <-function(model){
  lst <- list()
  if(isCaretLegacy(model)){
    tryDo({lst<-extractProperties(model)})
    tryDo({lst$meta <- 'Caret Legacy Model'})
  }else if(isCaretStack(model)){
    tryDo({lst<-getSEProperties(model)})
    tryDo({lst$stack <- extractProperties(model$ens_model)})
    tryDo({lst$error <- model$error})
    tryDo({lst$meta <- 'Caret Stack Model'})
  }else if(isCaretEnsemble(model)){
    tryDo({lst<-getSEProperties(model)})
    tryDo({lst$ensemble <- extractProperties(model$ens_model)})
    tryDo({lst$error <- model$error})
    lst$meta <- 'Caret Ensemble Model'
  }
  return(lst)
}
extractProperties <- function(model){
  lst<-list()
  tryDo({lst$method <- model$method})
  tryDo({lst$type <- model$modelType})
  tryDo({lst$results <- dplyr::inner_join(model$results,model$bestTune)})
  tryDo({lst$metric <- model$metric})
  tryDo({lst$maximize <- model$maximize})
  tryDo({lst$control <- model$control})
  tryDo({lst$control$index <- NULL})
  tryDo({lst$control$indexOut <- NULL})
  tryDo({lst$control$seeds <- NULL})
  tryDo({lst$preprocess <- names(model$preProcess$method)})
  tryDo({lst$performance <- model$perfNames})
  tryDo({lst$ylimits <- model$yLimits})
  return(lst)
}
getSEProperties <-function(model){
  lapply(model$models,extractProperties)
}
getModelSpecs <- function(model){
  if(isCaretLegacy(model)){
    model$finalModel$xNames
  }else if(isCaretStack(model) || isCaretEnsemble(model)){
    getSESpecs(model)
  }
}
getSESpecs<-function(model){
  listaf<-lapply(model$models, getSafe, i='finalModel')
  listax<-lapply(listaf,getSafe,i='xNames')
  res <- unique(listax)
  if(length(res)==1){
    return(unlist(res))
  }else{
    return(NULL)
  }
}
isCaretLegacy <- function(model){
  inherits(model,"train")
}
isCaretStack<- function(model){
  inherits(model,"caretStack") && !inherits(model,'caretEnsemble')
}
isCaretEnsemble <- function(model){
  inherits(model,"caretEnsemble")
}

is01 <- function(x){
  x>0 && x<0
}
ispos <- function(x){
  x>0
}
isvar <- function(x){
  length(x)==1
}
getConfigValidation <-function(c){
  tryCatch({
    out<-list()
    testit::assert('"legacy" parameter must be a logical.', {is.logical(c$legacy)})
    testit::assert('"stack" parameter must be a logical.', {is.logical(c$stack)})
    testit::assert('"ensemble" parameter must be a logical.', {is.logical(c$ensemble)})
    testit::assert('Multiple meta types of models selected.', {sum(c$legacy,c$stack,c$ensemble) ==1})
    testit::assert('Invalid submodels selection.',{
      (length(c$methods)>0 && all(c$methods %in% names(.GlobalEnv$BSSEModels)))
    })

    if(c$legacy){
      # essemble asssertions
      testit::assert('Multiple submodels selected for legacy model.', {isvar(c$methods)})
    }else if(c$ensemble){
      testit::assert('Select at least 2 submodels for building a greedy ensemble model.', {length(c$methods)>1},{all(c$method%in%names(.GlobalEnv$BSSEModels))})

      testit::assert('"ensemble_metric" parameter must be a single valid metric character.',{is.var(c$ensemble_metric)},{c$ensemble_metric%in%.GlobalEnv$BSSEMetrics})
      testit::assert('"ensemble_maximize" parameter must be a valid logical.',{isvar(c$ensemble_maximize)},{is.logical(c$ensemble_maximize)})

      #ensemble train control
      testit::assert('"ensemble_cv_method" parameter must be a valid cross validation character.',{isvar(c$ensemble_cv_method)},{all(c$ensemble_cv_method%in%.GlobalEnv$BSSECrossvalidation)})
      testit::assert('"ensemble_cv_method" parameter = "none" requires the ensemble_tune_length parameter to be = 1.',{!(c$ensemble_cv_method=="none" && c$ensemble_tune_length !=1)})
      testit::assert('"ensemble_number" parameter must be a valid positive integer.',{isvar(c$ensemble_number)},{is.integer(c$ensemble_number)},{ispos(c$ensemble_number)})
      testit::assert('"ensemble_repeats" parameter must be a valid positive integer.',{isvar(c$ensemble_repeats)},{is.integer(c$ensemble_repeats)},{ispos(c$ensemble_repeats)})
      testit::assert('"ensemble_p_locv" parameter must be a between 0-1.',{isvar(c$ensemble_p_locv)},{is01(c$ensemble_p_locv)})
      testit::assert('"ensemble_search" parameter must be a valid (grid,random) character.',{isvar(c$ensemble_search)},{all(c$ensemble_search%in%c('grid','random'))})

      #ensemble slices
      testit::assert('"ensemble_initial_window" parameter must be NA or positive integer.',{is.var(c$ensemble_initial_window)},{is.null(c$ensemble_initial_window) ||  is.integer(c$ensemble_initial_window) && ispos(c$ensemble_initial_window)})
      testit::assert('"ensemble_horizon" parameter must be a valid positive integer.',{isvar(horizon)},{is.integer(c$ensemble_horizon)},{ispos(c$ensemble_horizon)})
      testit::assert('"ensemble_fixed_window" parameter must be a valid logical.',{isvar(c$ensemble_fixed_window)},{is.logical(c$ensemble_fixed_window)})
      testit::assert('"ensemble_skip" parameter must be a 0 or valid integer.',{isvar(c$ensemble_skip)},{c$ensemble_skip==0 || (is.integer(c$ensemble_skip)&&ispos(c$ensemble_skip))})

      #ensemble adaptive
      testit::assert('"ensemble_adaptive_method" parameter must be a valid (gls,BT) character.',{isvar(c$ensemble_adaptive_method)},{all(c$ensemble_adaptive_method%in% c('gls','BT'))})
      testit::assert('"ensemble_adaptive_alpha" parameter must be a between 0-1.',{isvar(c$ensemble_adaptive_alpha)},{is01(c$ensemble_adaptive_alpha)})
      testit::assert('"ensemble_adaptive_complete" parameter must be a valid logical.',{isvar(c$ensemble_adaptive_complete)},{is.logical(c$ensemble_adaptive_complete)})
      testit::assert('"ensemble_adaptive_min" parameter must be a valid positive integer.',{isvar(c$ensemble_adaptive_min)},{is.integer(c$ensemble_adaptive_min)},{ispos(c$ensemble_adaptive_min)})

    }else if(c$stack){
      testit::assert('Select at least 2 submodels for building a stack model.', {length(c$methods)>1},{all(c$method%in%names(.GlobalEnv$BSSEModels))})
      testit::assert('Stack model require one valid ensemble method parameter.',{isvar(c$ensemble_method)},{all(c$ensemble_method%in%names(.GlobalEnv$BSSEModels))})

      testit::assert('"ensemble_metric" parameter must be a single valid metric character.',{is.var(c$ensemble_metric)},{c$ensemble_metric%in%.GlobalEnv$BSSEMetrics})
      testit::assert('"ensemble_maximize" parameter must be a valid logical.',{isvar(c$ensemble_maximize)},{is.logical(c$ensemble_maximize)})

      #ensemble train control
      testit::assert('"ensemble_cv_method" parameter must be a valid cross validation character.',{isvar(c$ensemble_cv_method)},{all(c$ensemble_cv_method%in%.GlobalEnv$BSSECrossvalidation)})
      testit::assert('"ensemble_cv_method" parameter = "none" requires the ensemble_tune_length parameter to be = 1.',{!(c$ensemble_cv_method=="none" && c$ensemble_tune_length !=1)})
      testit::assert('"ensemble_number" parameter must be a valid positive integer.',{isvar(c$ensemble_number)},{is.integer(c$ensemble_number)},{ispos(c$ensemble_number)})
      testit::assert('"ensemble_repeats" parameter must be a valid positive integer.',{isvar(c$ensemble_repeats)},{is.integer(c$ensemble_repeats)},{ispos(c$ensemble_repeats)})
      testit::assert('"ensemble_p_locv" parameter must be a between 0-1.',{isvar(c$ensemble_p_locv)},{is01(c$ensemble_p_locv)})
      testit::assert('"ensemble_search" parameter must be a valid (grid,random) character.',{isvar(c$ensemble_search)},{all(c$ensemble_search%in%c('grid','random'))})

      #ensemble slices
      testit::assert('"ensemble_initial_window" parameter must be NA or positive integer.',{is.var(c$ensemble_initial_window)},{is.null(c$ensemble_initial_window) ||  is.integer(c$ensemble_initial_window) && ispos(c$ensemble_initial_window)})
      testit::assert('"ensemble_horizon" parameter must be a valid positive integer.',{isvar(horizon)},{is.integer(c$ensemble_horizon)},{ispos(c$ensemble_horizon)})
      testit::assert('"ensemble_fixed_window" parameter must be a valid logical.',{isvar(c$ensemble_fixed_window)},{is.logical(c$ensemble_fixed_window)})
      testit::assert('"ensemble_skip" parameter must be a 0 or valid integer.',{isvar(c$ensemble_skip)},{c$ensemble_skip==0 || (is.integer(c$ensemble_skip)&&ispos(c$ensemble_skip))})

      #ensemble adaptive
      testit::assert('"ensemble_adaptive_method" parameter must be a valid (gls,BT) character.',{isvar(c$ensemble_adaptive_method)},{all(c$ensemble_adaptive_method%in% c('gls','BT'))})
      testit::assert('"ensemble_adaptive_alpha" parameter must be a between 0-1.',{isvar(c$ensemble_adaptive_alpha)},{is01(c$ensemble_adaptive_alpha)})
      testit::assert('"ensemble_adaptive_complete" parameter must be a valid logical.',{isvar(c$ensemble_adaptive_complete)},{is.logical(c$ensemble_adaptive_complete)})
      testit::assert('"ensemble_adaptive_min" parameter must be a valid positive integer.',{isvar(c$ensemble_adaptive_min)},{is.integer(c$ensemble_adaptive_min)},{ispos(c$ensemble_adaptive_min)})

    }

    #general assertions
    #Train
    testit::assert('"metric" parameter must be a single valid metric character.',{is.var(c$metric)},{c$metric%in%.GlobalEnv$BSSEMetrics})
    testit::assert('"maximize" parameter must be a valid logical.',{isvar(c$maximize)},{is.logical(c$maximize)})
    testit::assert('"preprocess_methods" parameter must be a NULL or valid preprocessing characters.',{(is.var(c$preprocessing_methods) && is.null(c$preprocessing_methods)) || all(c$preprocessing_methods%in%.GlobalEnv$BSSEPreprocessing)})
    testit::assert('"tune_length" parameter must be a valid positive integer.',{isvar(c$tune_length)},{is.integer(c$tune_length)},{ispos(c$tune_length)})

    #preprocessing
    testit::assert('"thresh" parameter must be a between 0-1.',{isvar(c$thresh)},{is01(c$thresh)})
    testit::assert('"pca_comp" parameter must be NA or positive integer.',{is.var(c$pca_comp)},{is.null(c$pca_com) ||  is.integer(c$pca_comp) && ispos(c$pca_comp)})
    testit::assert('"k" parameter must be a valid positive integer.',{isvar(c$k)},{is.integer(c$k)},{ispos(c$k)})
    testit::assert('"fudge" parameter must be a between 0-1.',{isvar(c$fudge)},{is01(c$fudge)})
    testit::assert('"num_unique" parameter must be a valid positive integer.',{isvar(c$num_unique)},{is.integer(c$num_unique)},{ispos(c$num_unique)})
    testit::assert('"freq_cut" parameter must be a valid positive integer.',{isvar(c$freq_cut)},{is.integer(c$freq_cut)},{ispos(c$freq_cut)})
    testit::assert('"unique_cut" parameter must be a valid positive integer.',{isvar(c$unique_cut)},{is.integer(c$unique_cut)},{ispos(c$unique_cut)})
    testit::assert('"cut_off" parameter must be a between 0-1.',{isvar(c$cut_off)},{is01(c$cut_off)})
    testit::assert('"range_bounds" must be a numerical tuple.',{length(c$range_bounds)==2},{classNumber(c$range_bounds[1])&&classNumber(c$range_bounds[2])})

    #ica
    testit::assert('"ica_comp" parameter must be a valid positive integer.',{isvar(c$ica_comp)},{is.integer(c$ica_comp)},{ispos(c$ica_comp)})
    testit::assert('"ica_alg_type" parameter must be a valid ica algorithm type character (parallel,deflation).',{isvar(c$ica_alg_type)},{c$ica_alg_type%in%c('parallel','deflation')})
    testit::assert('"ica_fun" parameter must be a valid ica approximation function character (logcosh,exp).',{isvar(c$ica_fun)},{c$ica_fun%in%c('logcosh','exp')})
    testit::assert('"ica_alpha" parameter must be between [1,2]',{isvar(c$ica_alpha)},{classNumber(c$ica_alpha)},{c$ica_alpha>=1 && c$ica_alpha<=2})
    testit::assert('"ica_row_norm" parameter must be a logical',{isvar(c$ica_row_norm)},{is.logical(c$ica_row_norm)})
    testit::assert('"ica_maxit" parameter must be a valid positive integer.',{isvar(c$ica_maxit)},{is.integer(c$ica_maxit)},{ispos(c$ica_maxit)})
    testit::assert('"ica_tol" parameter must be a between 0-1.',{isvar(c$ica_tol)},{is01(c$ica_tol)})

    #train control
    testit::assert('"cv_method" parameter must be a valid cross validation character.',{isvar(c$cv_method)},{all(c$cv_method%in%.GlobalEnv$BSSECrossvalidation)})
    testit::assert('"cv_method" parameter = "none" requires the tune_length parameter to be = 1.',{!(c$cv_method=="none" && c$tune_length !=1)})
    testit::assert('"number" parameter must be a valid positive integer.',{isvar(c$number)},{is.integer(c$number)},{ispos(c$number)})
    testit::assert('"repeats" parameter must be a valid positive integer.',{isvar(c$repeats)},{is.integer(c$repeats)},{ispos(c$repeats)})
    testit::assert('"p_locv" parameter must be a between 0-1.',{isvar(c$p_locv)},{is01(c$p_locv)})
    testit::assert('"search" parameter must be a valid (grid,random) character.',{isvar(c$search)},{all(c$search%in%c('grid','random'))})

    #slices
    testit::assert('"initial_window" parameter must be NA or positive integer.',{is.var(c$initial_window)},{is.null(c$initial_window) ||  is.integer(c$initial_window) && ispos(c$initial_window)})
    testit::assert('"horizon" parameter must be a valid positive integer.',{isvar(horizon)},{is.integer(c$horizon)},{ispos(c$horizon)})
    testit::assert('"fixed_window" parameter must be a valid logical.',{isvar(c$fixed_window)},{is.logical(c$fixed_window)})
    testit::assert('"skip" parameter must be a 0 or valid integer.',{isvar(c$skip)},{c$skip==0 || (is.integer(c$skip)&&ispos(c$skip))})

    #adaptive
    testit::assert('"adaptive_method" parameter must be a valid (gls,BT) character.',{isvar(c$adaptive_method)},{all(c$adaptive_method%in% c('gls','BT'))})
    testit::assert('"adaptive_alpha" parameter must be a between 0-1.',{isvar(c$adaptive_alpha)},{is01(c$adaptive_alpha)})
    testit::assert('"adaptive_complete" parameter must be a valid logical.',{isvar(c$adaptive_complete)},{is.logical(c$adaptive_complete)})
    testit::assert('"adaptive_min" parameter must be a valid positive integer.',{isvar(c$adaptive_min)},{is.integer(c$adaptive_min)},{ispos(c$adaptive_min)})

    #others
    testit::assert('"p" parameter must be a between 0-1.',{isvar(c$p)},{is01(c$p)})
    testit::assert('"seed" parameter must be a valid positive integer.',{isvar(c$seed)},{is.integer(c$seed)},{ispos(c$seed)})
    testit::assert('"parallel" parameter must be a valid logical.',{isvar(c$parallel)},{is.logical(c$parallel)})
    testit::assert('"cores" parameter must be a valid positive integer.',{isvar(c$cores)},{is.integer(c$cores)},{ispos(c$cores)})

    out$Valid<-T
    out$Message<-NULL
    return(out)
  },
  error=function(e){
    out$Valid <- F
    out$Message <- as.character(e)
    return(out)
  })
}

buildModel<-function(X,Y,config){
  if(config$legacy){
    buildLegacy(X,Y,config)
  }else if(config$ensemble){
    buildEnsemble(X,Y,config)
  }else if(config$stack){
    buildStack(X,Y,config)
  }else{
    error('Failed to detect which meta type of model to build.')
  }
}
buildLegacy<-function(X,Y,config){
  model<-NULL
  tryCatch({

    if(config$parallel){
      cl <- parallel::makePSOCKcluster(config$cores)
      doParallel::registerDoParallel(cl)
    }

    set.seed(config$seed)

    idx_train <- caret::createDataPartition(X[2],times=1,p=config$p,list=FALSE)
    idx_test <- (1:nrow(X))[-idx_train]

    control <- caret::trainControl(
      method = config$cv_method,
      number = config$number,
      repeats = config$repeats,
      p = config$p_locv,
      search = config$search,
      initialWindow = config$initial_window,
      horizon = config$horizon,
      fixedWindow = config$fixed_window,
      skip= config$skip,
      verboseIter = F,
      returnData = T,
      returnResamp = "all",
      savePredictions = T,
      classProbs = F,
      summaryFunction = RegressionAbsoluteSummary,
      selectionFunction = config$selection,
      preProcOptions = getPreProcessOptions(config),
      sampling=config$sampling,
      timingSamples =0,
      indexFinal = idx_test,
      adaptive = list('min'=config$adaptive_min,'alpha'=config$adaptive_alpha,'method'=config$adaptive_method,'complete'=config$adaptive_complete),
      trim=F,
      allowParallel = config$parallel)


    model<-train(x = getPredictors(X,config),
          y = Y,
          method = config$methods,
          preProcess = getPreprocessMethods(config),
          metrics = config$metric,
          maximise = config$maximize,
          trControl = control,
          tuneLength =  config$tune_length,
          na.action = na.omit)


  },finally = {
    if(config$parallel){
      parallel::stopCluster(cl)
    }
    })
  return(model)
}
buildStack<-function(X,Y,config){
  model<-NULL
  tryCatch({

    if(config$parallel){
      cl <- parallel::makePSOCKcluster(config$cores)
      doParallel::registerDoParallel(cl)
    }

    set.seed(config$seed)

    idx_train <- caret::createDataPartition(X[2],times=1,p=config$p,list=FALSE)
    idx_test <- (1:nrow(X))[-idx_train]

    submodel_control <- caret::trainControl(
      method = config$cv_method,
      number = config$number,
      repeats = config$repeats,
      p = config$p_locv,
      search = config$search,
      initialWindow = config$initial_window,
      horizon = config$horizon,
      fixedWindow = config$fixed_window,
      skip= config$skip,
      verboseIter = F,
      returnData = T,
      returnResamp = "all",
      savePredictions = T,
      classProbs = F,
      summaryFunction = RegressionAbsoluteSummary,
      selectionFunction = config$selection_function,
      preProcOptions = getPreProcessOptions(config),
      sampling=config$sampling,
      timingSamples =0,
      indexFinal = idx_test,
      adaptive = list('min'=config$adaptive_min,'alpha'=config$adaptive_alpha,'method'=config$adaptive_method,'complete'=config$adaptive_complete),
      trim=F,
      allowParallel = config$parallel
    )

    ensemble_list <- caretEnsemble::caretList(
      x=getPredictors(X,config),y=Y,
      trControl=sub_control,
      tuneList = getEnsembleModelSpecsList(config),
      continue_on_fail = F
    )

    ensemble_control<-caret::trainControl(
      method = config$ensemble_cv_method,
      number = config$ensemble_number,
      repeats = config$ensemble_repeats,
      p = config$ensemble_p_locv,
      search = config$ensemble_search,
      initialWindow = config$ensemble_initial_window,
      horizon = config$ensemble_horizon,
      fixedWindow = config$ensemble_fixed_window,
      skip= config$ensemble_skip,
      verboseIter = F,
      returnData = T,
      returnResamp = "all",
      savePredictions = T,
      classProbs = F,
      summaryFunction = RegressionAbsoluteSummary,
      selectionFunction = config$ensemble_selection_function,
      sampling=config$sampling,
      timingSamples =0,
      adaptive = list('min'=config$ensemble_adaptive_min,'alpha'=config$ensemble_adaptive_alpha,'method'=config$ensemble_adaptive_method,'complete'=config$ensemble_adaptive_complete),
      trim=F,
      allowParallel = config$parallel
    )

    model <- caretEnsemble::caretStack(
      ensemble_list,
      method=config$ensemble_method,
      metric=config$ensemble_metric,
      maximize=config$ensemble_maximize,
      trControl=ensemble_control
    )


  },finally = {
    if(config$parallel){
      parallel::stopCluster(cl)
    }
  })
}
buildEnsemble<-function(X,Y,config){
  model<-NULL
  tryCatch({

    if(config$parallel){
      cl <- parallel::makePSOCKcluster(config$cores)
      doParallel::registerDoParallel(cl)
    }

    set.seed(config$seed)

    idx_train <- caret::createDataPartition(X[2],times=1,p=config$p,list=FALSE)
    idx_test <- (1:nrow(X))[-idx_train]

    submodel_control <- caret::trainControl(
      method = config$cv_method,
      number = config$number,
      repeats = config$repeats,
      p = config$p_locv,
      search = config$search,
      initialWindow = config$initial_window,
      horizon = config$horizon,
      fixedWindow = config$fixed_window,
      skip= config$skip,
      verboseIter = F,
      returnData = T,
      returnResamp = "all",
      savePredictions = T,
      classProbs = F,
      summaryFunction = RegressionAbsoluteSummary,
      selectionFunction = config$selection_function,
      preProcOptions = getPreProcessOptions(config),
      sampling=config$sampling,
      timingSamples =0,
      indexFinal = idx_test,
      adaptive = list('min'=config$adaptive_min,'alpha'=config$adaptive_alpha,'method'=config$adaptive_method,'complete'=config$adaptive_complete),
      trim=F,
      allowParallel = config$parallel
    )

    ensemble_list <- caretEnsemble::caretList(
      x=getPredictors(X,config),y=Y,
      trControl=sub_control,
      tuneList = getEnsembleModelSpecsList(config),
      continue_on_fail = F
    )

    ensemble_control<-caret::trainControl(
      method = config$ensemble_cv_method,
      number = config$ensemble_number,
      repeats = config$ensemble_repeats,
      p = config$ensemble_p_locv,
      search = config$ensemble_search,
      initialWindow = config$ensemble_initial_window,
      horizon = config$ensemble_horizon,
      fixedWindow = config$ensemble_fixed_window,
      skip= config$ensemble_skip,
      verboseIter = F,
      returnData = T,
      returnResamp = "all",
      savePredictions = T,
      classProbs = F,
      summaryFunction = RegressionAbsoluteSummary,
      selectionFunction = config$ensemble_selection_function,
      sampling=config$sampling,
      timingSamples =0,
      adaptive = list('min'=config$ensemble_adaptive_min,'alpha'=config$ensemble_adaptive_alpha,'method'=config$ensemble_adaptive_method,'complete'=config$ensemble_adaptive_complete),
      trim=F,
      allowParallel = config$parallel
    )

    model <- caretEnsemble::caretEnsemble(
      ensemble_list,
      metric=config$ensemble_metric,
      maximize=config$ensemble_maximize,
      trControl=ensemble_control
    )


  },finally = {
    if(config$parallel){
      parallel::stopCluster(cl)
    }
  })
}

getPreprocessMethods<-function(config){
  pre<-config$preprocess_methods
  if(length(pre)==0 || is.na(pre) || pre=="" || is.null(pre)){
    return(NULL)
  }else{
    return(pre)
  }
}
getPreProcessOptions<-function(config){

  pre<-list()
  pre$thresh <- config$thresh
  pre$pcaComp <- config$pca_comp
  pre$k <- config$k
  pre$knnSummary <- mean
  pre$fudge <- config$fudge
  pre$numUnique <- config$num_unique
  pre$freqCut <- config$freq_cut
  pre$uniqueCut <- config$unique_cut
  pre$cutoff <- config$cut_off
  pre$rangeBounds <- config$range_bounds
  pre$n.comp <- config$ica_comp
  pre$alg.typ <- config$ica_alg_type
  pre$fun <- config$ica_fun
  pre$alpha <- config$ica_alpha
  pre$method <- config$ica_method
  pre$row.norm <- config$ica_row_norm
  pre$maxit <- config$ica_maxit
  pre$tol <- config$ica_tol

  return(pre)

  # pre$thresh = 0.95
  # pre$pcaComp = NULL
  # pre$na.remove = TRUE
  # pre$k = 5
  # pre$knnSummary = mean
  # pre$outcome = NULL
  # pre$fudge = 0.2
  # pre$numUnique = 3
  # pre$verbose = FALSE
  # pre$freqCut = 95/5
  # pre$uniqueCut = 10
  # pre$cutoff = 0.9
  # pre$rangeBounds = c(0, 1)
  # pre$n.comp
  # pre$alg.typ = c("parallel","deflation")
  # pre$fun = c("logcosh","exp")
  # pre$alpha = 1.0
  # pre$method = c("R","C")
  # pre$row.norm = FALSE
  # pre$maxit =
  # pre$tol = 1e-04
  # pre$verbose = FALSE
  # pre$w.init = NULL

}
getPredictors <- function(X,config){
  if(config$use_time){
    return(X[-2,drop=F])
  }else{
    return(X[-(1:2),drop=F])
  }
}
tryMetric <- function(obs,pred,fun){
  metric<-NA
  tryDo({metric<-fun(obs,pred)})
  return(metric)
}
RegressionAbsoluteSummary <- function(data, lev = NULL, model = NULL){
  pred <- data[,"pred",drop = T]
  obs  <- data[,"obs", drop = T]
  prob_stats <- c(
  tryMetric(obs,pred,Metrics::ae),
  tryMetric(obs,pred,Metrics::ape),
  tryMetric(obs,pred,Metrics::bias),
  tryMetric(obs,pred,Metrics::mae),
  tryMetric(obs,pred,Metrics::mape),
  tryMetric(obs,pred,Metrics::mase),
  tryMetric(obs,pred,Metrics::mdae),
  tryMetric(obs,pred,Metrics::mse),
  tryMetric(obs,pred,Metrics::msle),
  tryMetric(obs,pred,Metrics::percent_bias),
  tryMetric(obs,pred,Metrics::rae),
  tryMetric(obs,pred,Metrics::rmse),
  tryMetric(obs,pred,Metrics::rmsle),
  tryMetric(obs,pred,Metrics::rrse),
  tryMetric(obs,pred,Metrics::rse),
  tryMetric(obs,pred,Metrics::se),
  tryMetric(obs,pred,Metrics::sle),
  tryMetric(obs,pred,Metrics::smape),
  tryMetric(obs,pred,Metrics::sse))
  names(prob_stats) <- c(
    "ae",
    "ape",
    "bias",
    "mae",
    "mape",
    "mase",
    "mdae",
    "mse",
    "msle",
    "percent_bias",
    "rae",
    "rmse",
    "rmsle",
    "rrse",
    "rse",
    "se",
    "sle",
    "smape",
    "sse"
  );
  }
getEnsembleModelSpec<-function(x,config){
  caretEnsembleModelSpec(method = x, preProcess = getPreprocessMethods(config),
                         metrics = config$metric,
                         maximise = config$maximize,
                         tuneLength = config$tune_length,
                         na.action = na.omit)
}
getEnsembleModelSpecsList <- function(config){

  modelnames <- config$methods
  toreturn<-lapply(modelnames,getEnsembleModelSpec,config)
  names(toreturn)<-modelnames
  return(toreturn)

}

#-------- Routes -------- #

# -----FILTERS ------ #


# -- AVAILABLE -- #


#* Get list of available configs for a user
#* @post /configs/available
function(req,res){
  body<-jsonlite::fromJSON(req$postBody)
  query<-queryByUserID(body$userid)
  fields<-'{"_id":1}'
  return(list(ids=.GlobalEnv$configs$find(query,fields)))
}

#* Get list of available models for a user
#* @post /models/available
function(userid){
  body<-jsonlite::fromJSON(req$postBody)
  query<-queryByUserID(body$userid)
  fields<-'{"_id":1}'
  return(list(ids=.GlobalEnv$models$find(query,fields)))
}



#* Loads configuration into Configs collection of BSSEmsembleR (as JSON object?)
#* @param userid userid corresponding to the owner of the config
#* @post /configs/load
function(req,userid){
  lista<-Json2Config(req,userid)
  .GlobalEnv$configs$insert(jsonlite::toJSON(lista))
  return(lista$'_id'$'$oid')
}

#* Loads model file in BSSEmsembler
#* @param userid userid corresponding to the owner of the model
#* @post /models/load
function(req){
  fileid <- MultipartModel2GridFS(req,.GlobalEnv$gridFS)
  saveUserFileID(.GlobalEnv$models,userid,fileid)
}

# -- MODEL -- #

#* Create a new model based on a configuration and dataset ids
#* @param userid corresponding to the user owning the new model.
#* @param datasetid corresponding to the dataset which provide data for the model.
#* @param configid corresponding to the dataset which configures the model.
#* @get /models/new
function(userid,datasetid,configid){

  fileid <- getFileIDByObjectID(datasetid)
  file<- getFileGridFS(.GlobalEnv$gridFS,fileID)

  #Loading (X,Y,config)
  load(file)
  unlink(file)
  config<-getConfig(configid)

  #from config decide which type of model to build
  model <- buildModel(X,Y,config)

  #Saving the model
  temp<-tempfile()
  saveRDS(model,temp)
  upload <-grid$write(temp,paste0('Build_',datasetid,'_',configid))
  saveUserFileID(.GlobalEnv$models,userid,upload$id)
  return(upload$id)

}

# -- PREDICT -- #

#TODO://takes 1 dataset and activatesit and allow user to send vector of doubles and get predictions

# -- INFO -- #



#* Gets config information in BSSEmsembler
#* @param configsid corresponding to the config document which the information will be retrieved
#* @get /configs/info
function(configid){
  return(getConfig(configid))
}

#* Gets model information in BSSEmsembler
#* @param modelid corresponding to the model which the information will be retrieved
#* @get /models/info
function(modelid){
  sum <- NULL
  val <- NULL
  pls <-NULL #TODO: CREATE PLOTS FOR THE DATASET LOADED

  fileid <- getFileIDByObjectID(.GlobalEnv$models,modelid)
  file <- getFileGridFS(.GlobalEnv$gridFS, fileid)
  tryDo({sum<-getModelSummary(file)})
  tryDo(val<-getModelValidation(file))
  #TODO:   tryDo(pls<-getDatasetPlots(file))
  unlink(file)
  return(list('Summary'=sum,'Validation'=val,'Plots'=pls))
}
AndreGuerra123/BSSEmsembleR documentation built on May 24, 2019, 2:35 p.m.