Nothing
# This file is part of the R package "aifeducation".
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as published by
# the Free Software Foundation.
#
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>
# Parameters' descriptions ----
#' @keywords internal
paramDesc.ml_framework <- function() {
" `string` Framework to use for training and inference.
* `ml_framework = \"tensorflow\"`: for 'tensorflow'.
* `ml_framework = \"pytorch\"`: for 'pytorch'. "
}
#' @keywords internal
paramDesc.sustain_track <- function() {
" `bool` If `TRUE` energy consumption is tracked during training via the python library codecarbon. "
}
#' @keywords internal
paramDesc.sustain_iso_code <- function() {
" `string` ISO code (Alpha-3-Code) for the country. This variable must be set if sustainability should be tracked. A
list can be found on Wikipedia: <https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes>. "
}
#' @keywords internal
paramDesc.sustain_region <- function() {
" `string` Region within a country. Only available for USA and Canada. See the documentation of codecarbon for more
information <https://mlco2.github.io/codecarbon/parameters.html>. "
}
#' @keywords internal
paramDesc.sustain_interval <- function() " `integer` Interval in seconds for measuring power usage. "
#' @keywords internal
paramDesc.trace <- function() {
" `bool` `TRUE` if information about the progress should be printed to the console. "
}
#' @keywords internal
paramDesc.pytorch_safetensors <- function() {
" `bool` Only relevant for pytorch models.
* `TRUE`: a 'pytorch' model is saved in safetensors format.
* `FALSE` (or 'safetensors' is not available): model is saved in the standard pytorch format (.bin). "
}
#' @keywords internal
paramDesc.log_dir <- function() " Path to the directory where the log files should be saved. "
#' @keywords internal
paramDesc.log_write_interval <- function() {
" `int` Time in seconds determining the interval in which the logger should try to update the log files. Only relevant
if `log_dir` is not `NULL`. "
}
#' @keywords internal
paramDesc.text_dataset <- function() " Object of class [LargeDataSetForText]. "
# Create ----
#' @keywords internal
paramDesc.model_dir <- function() " `string` Path to the directory where the model should be saved. "
#' @keywords internal
paramDesc.vocab_size <- function() " `int` Size of the vocabulary. "
#' @keywords internal
paramDesc.max_position_embeddings <- function() {
" `int` Number of maximum position embeddings. This parameter also determines the maximum length of a sequence which
can be processed with the model. "
}
#' @keywords internal
<- function() {
" `int` Number of neurons in each layer. This parameter determines the dimensionality of the resulting text
embedding. "
}
#' @keywords internal
<- function() " `string` Name of the activation function. "
#' @keywords internal
<- function() " `double` Ratio of dropout. "
#' @keywords internal
paramDesc.attention_probs_dropout_prob <- function() " `double` Ratio of dropout for attention probabilities. "
#' @keywords internal
paramDesc.intermediate_size <- function() {
" `int` Number of neurons in the intermediate layer of the attention mechanism. "
}
#' @keywords internal
paramDesc.num_attention_heads <- function() " `int` Number of attention heads. "
# Dependent parameters ----
#' @keywords internal
paramDesc.vocab_do_lower_case <- function() " `bool` `TRUE` if all words/tokens should be lower case. "
#' @keywords internal
<- function() " `int` Number of hidden layers. "
#' @keywords internal
<- function() {
" `int` Number of neurons in the final layer. This parameter determines the dimensionality of the resulting text
embedding. "
}
#' @keywords internal
paramDesc.block_sizes <- function() " `vector` of `int` determining the number and sizes of each block. "
#' @keywords internal
paramDesc.num_decoder_layers <- function() " `int` Number of decoding layers. "
#' @keywords internal
paramDesc.activation_dropout <- function() {
" `float` Dropout probability between the layers of the feed-forward blocks. "
}
#' @keywords internal
paramDesc.pooling_type <- function() {
" `string` Type of pooling.
* `\"mean\"` for pooling with mean.
* `\"max\"` for pooling with maximum values. "
}
#' @keywords internal
paramDesc.add_prefix_space <- function() {
" `bool` `TRUE` if an additional space should be inserted to the leading words. "
}
#' @keywords internal
paramDesc.trim_offsets <- function() " `bool` `TRUE` trims the whitespaces from the produced offsets. "
#' @keywords internal
paramDesc.attention_window <- function() {
" `int` Size of the window around each token for attention mechanism in every layer. "
}
# Train ----
#' @keywords internal
paramDesc.output_dir <- function() {
" `string` Path to the directory where the final model should be saved. If the directory does not exist, it will be
created. "
}
#' @keywords internal
paramDesc.model_dir_path <- function() " `string` Path to the directory where the original model is stored. "
#' @keywords internal
paramDesc.p_mask <- function() " `double` Ratio that determines the number of words/tokens used for masking. "
#' @keywords internal
paramDesc.whole_word <- function() {
" `bool`
* `TRUE`: whole word masking should be applied.
* `FALSE`: token masking is used. "
}
#' @keywords internal
paramDesc.val_size <- function() " `double` Ratio that determines the amount of token chunks used for validation. "
#' @keywords internal
paramDesc.n_epoch <- function() " `int` Number of epochs for training. "
#' @keywords internal
paramDesc.batch_size <- function() " `int` Size of batches. "
#' @keywords internal
paramDesc.chunk_size <- function() " `int` Size of every chunk for training. "
#' @keywords internal
paramDesc.min_seq_len <- function() {
" `int` Only relevant if `full_sequences_only = FALSE`. Value determines the minimal sequence length included in
training process. "
}
#' @keywords internal
paramDesc.full_sequences_only <- function() {
" `bool` `TRUE` for using only chunks with a sequence length equal to `chunk_size`. "
}
#' @keywords internal
paramDesc.learning_rate <- function() " `double` Learning rate for adam optimizer. "
#' @keywords internal
paramDesc.n_workers <- function() " `int` Number of workers. Only relevant if `ml_framework = \"tensorflow\"`. "
#' @keywords internal
paramDesc.multi_process <- function() {
" `bool` `TRUE` if multiple processes should be activated. Only relevant if `ml_framework = \"tensorflow\"`. "
}
#' @keywords internal
paramDesc.keras_trace <- function() {
" `int`
* `keras_trace = 0`: does not print any information about the training process from keras on the console.
* `keras_trace = 1`: prints a progress bar.
* `keras_trace = 2`: prints one line of information for every epoch. Only relevant if `ml_framework = \"tensorflow\"`.
"
}
#' @keywords internal
paramDesc.pytorch_trace <- function() {
" `int`
* `pytorch_trace = 0`: does not print any information about the training process from pytorch on the console.
* `pytorch_trace = 1`: prints a progress bar. "
}
# Transformer types ----
#' @keywords internal
get_allowed_transformer_types <- function(in_quotation_marks = FALSE) {
res_str <- ""
if (in_quotation_marks) {
for (i in 1:length(AIFETrType)) {
tr_name <- names(AIFETrType)[i]
if (i != 1) res_str <- paste0(res_str, ", ")
res_str <- paste0(res_str, "'", tr_name, "'")
}
} else {
res_str <- paste(unname(AIFETrType), collapse = ", ")
}
return(res_str)
}
#' @keywords internal
get_tr_types_list_decsription <- function() {
list_description <- ""
for (i in 1:length(AIFETrType)) {
tr_name <- names(AIFETrType)[i]
list_element <- paste0("* `", tr_name, "` = '", tr_name, "'")
list_description <- paste0(list_description, "\n", list_element)
}
return(list_description)
}
# For deprecated functions only------------------------------------------------
#' @keywords internal
paramDesc.raw_texts <- function() {
return(" `vector` containing the raw texts for training.")
}
#' @keywords internal
paramDesc.vocab_raw_texts <- function() {
return("`vector` containing the raw texts for creating the vocabulary.")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.