inst/doc/training_non_default_regression_models.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- packages_installation, eval = FALSE-------------------------------------
#  if(!require(devtools)){
#  	install.packages("devtools")
#  }
#  if(!require(infinityFlow)){
#  	library(devtools)
#  	install_github("ebecht/infinityFlow")
#  }

## ---- preparation-------------------------------------------------------------
library(infinityFlow)

data(steady_state_lung)
data(steady_state_lung_annotation)
data(steady_state_lung_backbone_specification)

dir <- file.path(tempdir(), "infinity_flow_example")
input_dir <- file.path(dir, "fcs")
write.flowSet(steady_state_lung, outdir = input_dir)

write.csv(steady_state_lung_backbone_specification, file = file.path(dir, "backbone_selection_file.csv"), row.names = FALSE)

path_to_fcs <- file.path(dir, "fcs")
path_to_output <- file.path(dir, "output")
path_to_intermediary_results <- file.path(dir, "tmp")
backbone_selection_file <- file.path(dir, "backbone_selection_file.csv")

targets <- steady_state_lung_annotation$Infinity_target
names(targets) <- rownames(steady_state_lung_annotation)
isotypes <- steady_state_lung_annotation$Infinity_isotype
names(isotypes) <- rownames(steady_state_lung_annotation)

input_events_downsampling <- 1000
prediction_events_downsampling <- 500
cores = 1L

## ---- fitter_functions--------------------------------------------------------
print(grep("fitter_", ls("package:infinityFlow"), value = TRUE))

## ---- dependencies------------------------------------------------------------
       optional_dependencies <- c("glmnetUtils", "e1071")
       unmet_dependencies <- setdiff(optional_dependencies, rownames(installed.packages()))
       if(length(unmet_dependencies) > 0){
           install.packages(unmet_dependencies)
       }
       for(pkg in optional_dependencies){
       	   library(pkg, character.only = TRUE)
       }

## ---- regression_functions----------------------------------------------------
regression_functions <- list(
    XGBoost = fitter_xgboost, # XGBoost
    SVM = fitter_svm, # SVM
    LASSO2 = fitter_glmnet, # L1-penalized 2nd degree polynomial model
    LM = fitter_linear # Linear model
)

## ---- extra_args_regression_params--------------------------------------------
backbone_size <- table(read.csv(backbone_selection_file)[,"type"])["backbone"]
extra_args_regression_params <- list(
     ## Passed to the first element of `regression_functions`, e.g. XGBoost. See ?xgboost for which parameters can be passed through this list
    list(nrounds = 500, eta = 0.05),

    # ## Passed to the second element of `regression_functions`, e.g. neural networks through keras::fit. See https://keras.rstudio.com/articles/tutorial_basic_regression.html
    # list(
    #         object = { ## Specifies the network's architecture, loss function and optimization method
    #             model = keras_model_sequential()
    #             model %>%
    #                 layer_dense(units = backbone_size, activation = "relu", input_shape = backbone_size) %>%
    #                 layer_dense(units = backbone_size, activation = "relu", input_shape = backbone_size) %>%
    #                 layer_dense(units = 1, activation = "linear")
    #             model %>%
    #                 compile(loss = "mean_squared_error", optimizer = optimizer_sgd(lr = 0.005))
    #             serialize_model(model)
    #         },
    #         epochs = 1000, ## Number of maximum training epochs. The training is however stopped early if the loss on the validation set does not improve for 20 epochs. This early stopping is hardcoded in fitter_nn.
    #         validation_split = 0.2, ## Fraction of the training data used to monitor validation loss
    #         verbose = 0,
    #         batch_size = 128 ## Size of the minibatches for training.
    # ),

    # Passed to the third element, SVMs. See help(svm, "e1071") for possible arguments
    list(type = "nu-regression", cost = 8, nu=0.5, kernel="radial"),

    # Passed to the fourth element, fitter_glmnet. This should contain a mandatory argument `degree` which specifies the degree of the polynomial model (1 for linear, 2 for quadratic etc...). Here we use degree = 2 corresponding to our LASSO2 model Other arguments are passed to getS3method("cv.glmnet", "formula"),
    list(alpha = 1, nfolds=10, degree = 2),

    # Passed to the fourth element, fitter_linear. This only accepts a degree argument specifying the degree of the polynomial model. Here we use degree = 1 corresponding to a linear model.
    list(degree = 1)
)

## ---- pipeline execution, eval = TRUE-----------------------------------------
if(length(regression_functions) != length(extra_args_regression_params)){
    stop("Number of models and number of lists of hyperparameters mismatch")
}
imputed_data <- infinity_flow(
	regression_functions = regression_functions,
	extra_args_regression_params = extra_args_regression_params,
	path_to_fcs = path_to_fcs,
	path_to_output = path_to_output,
	path_to_intermediary_results = path_to_intermediary_results,
	backbone_selection_file = backbone_selection_file,
	annotation = targets,
	isotype = isotypes,
	input_events_downsampling = input_events_downsampling,
	prediction_events_downsampling = prediction_events_downsampling,
	verbose = TRUE,
	cores = cores
)

## ---- output------------------------------------------------------------------
   print(imputed_data$bgc[1:2, ])

## ---- cores-------------------------------------------------------------------
cores = 1L

## ---- eval = FALSE------------------------------------------------------------
#  optional_dependencies <- c("keras", "tensorflow")
#  unmet_dependencies <- setdiff(optional_dependencies, rownames(installed.packages()))
#  if(length(unmet_dependencies) > 0){
#       install.packages(unmet_dependencies)
#  }
#  for(pkg in optional_dependencies){
#      library(pkg, character.only = TRUE)
#  }
#  
#  invisible(eval(try(keras_model_sequential()))) ## avoids conflicts with flowCore...
#  
#  if(!is_keras_available()){
#       install_keras() ## Instal keras unsing the R interface - can take a while
#  }
#  
#  if (!requireNamespace("BiocManager", quietly = TRUE)){
#      install.packages("BiocManager")
#  }
#  BiocManager::install("infinityFlow")
#  
#  library(infinityFlow)
#  
#  data(steady_state_lung)
#  data(steady_state_lung_annotation)
#  data(steady_state_lung_backbone_specification)
#  
#  dir <- file.path(tempdir(), "infinity_flow_example")
#  input_dir <- file.path(dir, "fcs")
#  write.flowSet(steady_state_lung, outdir = input_dir)
#  
#  write.csv(steady_state_lung_backbone_specification, file = file.path(dir, "backbone_selection_file.csv"), row.names = FALSE)
#  
#  path_to_fcs <- file.path(dir, "fcs")
#  path_to_output <- file.path(dir, "output")
#  path_to_intermediary_results <- file.path(dir, "tmp")
#  backbone_selection_file <- file.path(dir, "backbone_selection_file.csv")
#  
#  targets <- steady_state_lung_annotation$Infinity_target
#  names(targets) <- rownames(steady_state_lung_annotation)
#  isotypes <- steady_state_lung_annotation$Infinity_isotype
#  names(isotypes) <- rownames(steady_state_lung_annotation)
#  
#  input_events_downsampling <- 1000
#  prediction_events_downsampling <- 500
#  
#  ## Passed to fitter_nn, e.g. neural networks through keras::fit. See https://keras.rstudio.com/articles/tutorial_basic_regression.html
#  regression_functions <- list(NN = fitter_nn)
#  
#  backbone_size <- table(read.csv(backbone_selection_file)[,"type"])["backbone"]
#  extra_args_regression_params <- list(
#          list(
#  		object = { ## Specifies the network's architecture, loss function and optimization method
#  		model = keras_model_sequential()
#  		model %>%
#  		layer_dense(units = backbone_size, activation = "relu", input_shape = backbone_size) %>%
#  		layer_dense(units = backbone_size, activation = "relu", input_shape = backbone_size) %>%
#  		layer_dense(units = 1, activation = "linear")
#  		model %>%
#  		compile(loss = "mean_squared_error", optimizer = optimizer_sgd(lr = 0.005))
#  		serialize_model(model)
#  		},
#  		epochs = 1000, ## Number of maximum training epochs. The training is however stopped early if the loss on the validation set does not improve for 20 epochs. This early stopping is hardcoded in fitter_nn.
#  		validation_split = 0.2, ## Fraction of the training data used to monitor validation loss
#  		verbose = 0,
#  		batch_size = 128 ## Size of the minibatches for training.
#  	)
#  )
#  
#  imputed_data <- infinity_flow(
#  	regression_functions = regression_functions,
#  	extra_args_regression_params = extra_args_regression_params,
#  	path_to_fcs = path_to_fcs,
#  	path_to_output = path_to_output,
#  	path_to_intermediary_results = path_to_intermediary_results,
#  	backbone_selection_file = backbone_selection_file,
#  	annotation = targets,
#  	isotype = isotypes,
#  	input_events_downsampling = input_events_downsampling,
#  	prediction_events_downsampling = prediction_events_downsampling,
#  	verbose = TRUE,
#  	cores = 1L
#  )

## -----------------------------------------------------------------------------
sessionInfo()

Try the infinityFlow package in your browser

Any scripts or data that you put into this service are public.

infinityFlow documentation built on Nov. 8, 2020, 8:25 p.m.