R/obj_TEClassifierRegular.R

# This file is part of the R package "aifeducation".
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as published by
# the Free Software Foundation.
#
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>

#' @title Text embedding classifier with a neural net
#' @description Abstract class for neural nets with 'pytorch'.
#'
#' This class is **deprecated**. Please use an Object of class [TEClassifierSequential] instead.
#'
#' @return Objects of this class are used for assigning texts to classes/categories. For the creation and training of a
#'   classifier an object of class [EmbeddedText] or [LargeDataSetForTextEmbeddings] on the one hand and a [factor] on
#'   the other hand are necessary.
#'
#'   The object of class [EmbeddedText] or [LargeDataSetForTextEmbeddings]  contains the numerical text representations
#'   (text embeddings) of the raw texts generated by an object of class [TextEmbeddingModel]. For supporting large data
#'   sets it is recommended to use [LargeDataSetForTextEmbeddings] instead of [EmbeddedText].
#'
#'   The `factor` contains the classes/categories for every text. Missing values (unlabeled cases) are supported and can
#'   be used for pseudo labeling.
#'
#'   For predictions an object of class [EmbeddedText] or [LargeDataSetForTextEmbeddings] has to be used which was
#'   created with the same [TextEmbeddingModel] as for training.
#'
#' @family Classification
#' @export
TEClassifierRegular <- R6::R6Class(
  classname = "TEClassifierRegular",
  inherit = TEClassifiersBasedOnRegular,
  public = list(
    #' @description Creating a new instance of this class.
    #' @return Returns an object of class [TEClassifierRegular] which is ready for configuration.
    initialize = function() {
      message("TEClassifierRegular is deprecated. Please use TEClassifierSequential.")
    },
    # New-----------------------------------------------------------------------
    #' @description Creating a new instance of this class.
    #' @param name `r get_param_doc_desc("name")`
    #' @param label `r get_param_doc_desc("label")`
    #' @param text_embeddings `r get_param_doc_desc("text_embeddings")`
    #' @param feature_extractor `r get_param_doc_desc("feature_extractor")`
    #' @param bias `r get_param_doc_desc("bias")`
    #' @param target_levels `r get_param_doc_desc("target_levels")`
    #' @param dense_layers `r get_param_doc_desc("dense_layers")`
    #' @param dense_size `r get_param_doc_desc("dense_size")`
    #' @param rec_layers `r get_param_doc_desc("rec_layers")`
    #' @param rec_size `r get_param_doc_desc("rec_size")`
    #' @param rec_type `r get_param_doc_desc("rec_type")`
    #' @param rec_bidirectional `r get_param_doc_desc("rec_bidirectional")`
    #' @param attention_type `r get_param_doc_desc("attention_type")`
    #' @param self_attention_heads `r get_param_doc_desc("self_attention_heads")`
    #' @param repeat_encoder `r get_param_doc_desc("repeat_encoder")`
    #' @param intermediate_size `r get_param_doc_desc("intermediate_size")`
    #' @param add_pos_embedding `r get_param_doc_desc("add_pos_embedding")`
    #' @param act_fct `r get_param_doc_desc("act_fct")`
    #' @param parametrizations `r get_param_doc_desc("parametrizations")`
    #' @param encoder_dropout `r get_param_doc_desc("encoder_dropout")`
    #' @param dense_dropout `r get_param_doc_desc("dense_dropout")`
    #' @param rec_dropout `r get_param_doc_desc("rec_dropout")`
    #' @note This model requires `pad_value=0`. If this condition is not met the
    #' padding value is switched automatically.
    #' @return Returns an object of class [TEClassifierRegular] which is ready for training.
    configure = function(name = NULL,
                         label = NULL,
                         text_embeddings = NULL,
                         feature_extractor = NULL,
                         target_levels = NULL,
                         bias = TRUE,
                         dense_size = 4L,
                         dense_layers = 0L,
                         rec_size = 4L,
                         rec_layers = 2L,
                         rec_type = "GRU",
                         rec_bidirectional = FALSE,
                         self_attention_heads = 0L,
                         intermediate_size = NULL,
                         attention_type = "Fourier",
                         add_pos_embedding = TRUE,
                         act_fct = "ELU",
                         parametrizations = "None",
                         rec_dropout = 0.1,
                         repeat_encoder = 1L,
                         dense_dropout = 0.4,
                         encoder_dropout = 0.1) {
      private$do_configuration(args = get_called_args(n = 1L))
    }
  ),
  # Private---------------------------------------------------------------------
  private = list(
    #--------------------------------------------------------------------------
    create_reset_model = function() {
      private$check_config_for_TRUE()

      private$load_reload_python_scripts()

      private$model <- py$TextEmbeddingClassifier_PT(
        features = as.integer(private$model_config$features),
        times = as.integer(private$model_config$times),
        bias = private$model_config$bias,
        dense_size = as.integer(private$model_config$dense_size),
        dense_layers = as.integer(private$model_config$dense_layers),
        rec_size = as.integer(private$model_config$rec_size),
        rec_layers = as.integer(private$model_config$rec_layers),
        rec_type = private$model_config$rec_type,
        rec_bidirectional = private$model_config$rec_bidirectional,
        intermediate_size = as.integer(private$model_config$intermediate_size),
        attention_type = private$model_config$attention_type,
        repeat_encoder = as.integer(private$model_config$repeat_encoder),
        dense_dropout = private$model_config$dense_dropout,
        rec_dropout = private$model_config$rec_dropout,
        encoder_dropout = private$model_config$encoder_dropout,
        pad_value = private$text_embedding_model$pad_value,
        add_pos_embedding = private$model_config$add_pos_embedding,
        self_attention_heads = as.integer(private$model_config$self_attention_heads),
        target_levels = private$model_config$target_levels,
        act_fct = private$model_config$act_fct,
        parametrizations = private$model_config$parametrizations
      )
    },
    #--------------------------------------------------------------------------
    load_reload_python_scripts = function() {
      super$load_reload_python_scripts()
      load_py_scripts("pytorch_old_scripts.py")
    },
    #--------------------------------------------------------------------------
    check_param_combinations_configuration = function() {
      if (private$model_config$dense_layers > 0L) {
        if (private$model_config$dense_size < 1L) {
          stop("Dense layers added. Size for dense layers must be at least 1.")
        }
      }

      if (private$model_config$rec_layers > 0L) {
        if (private$model_config$rec_size < 1L) {
          stop("Recurrent  layers added. Size for recurrent layers must be at least 1.")
        }
      }

      if (private$model_config$repeat_encoder > 0L &
        private$model_config$attention_type == "MultiHead" &
        private$model_config$self_attention_heads <= 0L) {
        stop("Encoder layer is set to 'multihead'. This requires self_attention_heads>=1.")
      }

      if (private$model_config$rec_layers != 0L & private$model_config$self_attention_heads > 0L) {
        if (private$model_config$features %% 2L != 0L) {
          stop("The number of features of the TextEmbeddingmodel is
               not a multiple of 2.")
        }
      }

      if (private$model_config$rec_layers == 1L && private$model_config$rec_dropout > 0.0) {
        print_message(
          msg = "Dropout for recurrent layers requires at least two layers. Setting rec_dropout to 0.0.",
          trace = TRUE
        )
        private$model_config$rec_dropout <- 0.0
      }
    },
    #--------------------------------------------------------------------------
    adjust_configuration = function() {
      if (is.null(private$model_config$intermediate_size)) {
        if (private$model_config$attention_type == "Fourier" & private$model_config$rec_layers > 0L) {
          private$model_config$intermediate_size <- 2L * private$model_config$rec_size
        } else if (private$model_config$attention_type == "Fourier" & private$model_config$rec_layers == 0L) {
          private$model_config$intermediate_size <- 2L * private$model_config$features
        } else if (
          private$model_config$attention_type == "MultiHead" &
            private$model_config$rec_layers > 0L &
            private$model_config$self_attention_heads > 0L
        ) {
          private$model_config$intermediate_size <- 2L * private$model_config$features
        } else if (
          private$model_config$attention_type == "MultiHead" &
            private$model_config$rec_layers == 0L &
            private$model_config$self_attention_heads > 0L
        ) {
          private$model_config$intermediate_size <- 2L * private$model_config$features
        } else {
          private$model_config$intermediate_size <- NULL
        }
      }

      if (private$model_config$rec_layers <= 1L) {
        private$model_config$rec_dropout <- 0.0
      }
      if (private$model_config$rec_layers <= 0L) {
        private$model_config$rec_size <- 0L
      }

      if (private$model_config$dense_layers <= 1L) {
        private$model_config$dense_dropout <- 0.0
      }
      if (private$model_config$dense_layers <= 0L) {
        private$model_config$dense_size <- 0L
      }
    }
  )
)

# Add Classifier to central index
TEClassifiers_class_names <- append(x = TEClassifiers_class_names, values = "TEClassifierRegular")

Try the aifeducation package in your browser

Any scripts or data that you put into this service are public.

aifeducation documentation built on Nov. 19, 2025, 5:08 p.m.