R/ABA_script.R

Defines functions generate_length_aba create_is_aba generate_codes generate_sequences count_events

Documented in count_events create_is_aba generate_codes generate_length_aba generate_sequences

#' @title Count the Number of Occurrences of Each Event in a Sequence
#'
#' @description This function counts the number of occurrences of each unique event in a sequence.
#' The result is a dataframe with two columns: ID and Frequency.
#'
#' @param event_vector A numeric vector representing a sequence of events.
#'
#' @return A dataframe with two columns: ID and Frequency, showing the number of occurrences of each event.
#'
#' @examples
#' speaker_no <- c(3, 2, 3, 1, 4, 2, 4, 1, 4, 3, 2, 3)
#' count_events(speaker_no)
#'
#' @export
count_events <- function(event_vector) {
  # 'table' counts the frequency of each unique value in 'event_vector'
  # The result is stored in 'event_counts'
  event_counts <- table(event_vector)

  # Convert the 'event_counts' to a data frame
  event_counts_df <- as.data.frame(table(event_vector))

  # Rename the columns to 'ID' and 'Frequency'
  colnames(event_counts_df) <- c("ID", "Frequency")

  # The function returns 'event_counts_df'
  return(event_counts_df)
}
#' @title Generate Sequences of a Given Length from a Numeric Vector
#'
#' @description This function generates sequences of a given length from a numeric vector.
#'
#' @param event_vector A numeric vector representing a sequence of events.
#' @param sequence_length An integer representing the length of sequences to generate. Currenlty only supported with sequence lengths of 3
#'
#' @return A dataframe containing the sequences and their ID.
#'
#' @examples
#' speaker_no <- c(3, 2, 3, 1, 4, 2, 4, 1, 4, 3, 2, 3)
#' generate_sequences(speaker_no, 3)
#'
#' @export
generate_sequences <- function(event_vector, sequence_length) {
  # Initialize an empty list to store sequences
  sequences <- list()

  # Loop through 'event_vector'
  for (i in 1:(length(event_vector) - sequence_length + 1)) {
    # Extract the sequence starting at position 'i' and of length 'sequence_length'
    sequence <- event_vector[i:(i + sequence_length - 1)]

    # Add the sequence to the 'sequences' list
    sequences[[i]] <- sequence
  }

  # Return the 'sequences' list
  return(sequences)
}
#' @title Generate Codes for Sequences Based on Certain Rules
#'
#' @description This function generates one of four possible codes for sequences: AAA, ABA, ABB, ABC.
#'
#' @param sequences A dataframe containing the input sequences.
#'
#' @return A dataframe of sequences with their corresponding codes.
#'
#' @examples
#' speaker_no <- c(3, 2, 3, 1, 4, 2, 4, 1, 4, 3, 2, 3)
#' sequences_df <- generate_sequences(speaker_no, 3)
#' generate_codes(sequences_df)
#'
#' @export
generate_codes <- function(sequences) {
  # Initialize an empty data frame to store the sequence ID and its corresponding code
  codes_df <- data.frame("Sequence_ID" = character(), "Code" = character())

  # Loop through each sequence in 'sequences'
  for (i in 1:length(sequences)) {
    sequence <- sequences[[i]]

    # Initialize an empty character vector to store the code
    code <- character(length(sequence))

    # Always code the first element as A
    code[1] <- "A"

    # Code the second element
    if (sequence[2] == sequence[1]) {
      code[2] <- "A"
    } else {
      code[2] <- "B"
    }

    # Code the third element
    if (sequence[3] == sequence[1]) {
      code[3] <- "A"
    } else if (sequence[3] == sequence[2]) {
      code[3] <- "B"
    } else {
      code[3] <- "C"
    }

    # Convert the sequence and the code to a single string
    sequence_id <- paste(sequence, collapse = "")
    code <- paste(code, collapse = "")

    # Add the sequence ID and its corresponding code to 'codes_df'
    codes_df <- rbind(codes_df, data.frame("Sequence_ID" = sequence_id, "Code" = code))
  }

  # Return 'codes_df'
  return(codes_df)
}
#' @title Create a Dummy Variable Indicating Whether a Code Represents 'ABA' (1) or not (0).
#'
#' @description This function creates a dummy variable indicating whether a code represents 'ABA'.
#'
#' @param codes_df A dataframe of binary codes generated by the generate_codes function.
#'
#' @return A dataframe of codes with an additional column indicating whether the code represents 'ABA'.
#'
#' @examples
#' speaker_no <- c(3, 2, 3, 1, 4, 2, 4, 1, 4, 3, 2, 3)
#' sequences_df <- generate_sequences(speaker_no, 3)
#' codes_df <- generate_codes(sequences_df)
#' create_is_aba(codes_df)
#'
#' @export
create_is_aba <- function(codes_df) {
  # Create a new column 'Is_ABA' that is 1 if 'Code' is "ABA", and 0 otherwise
  codes_df$Is_ABA <- as.integer(codes_df$Code == "ABA")

  # Return 'codes_df' with the new column
  return(codes_df)
}
#' @title Generate Length of Consecutive 'ABA'-Coded Sequences
#'
#' @description This function calculates the length of consecutive 'ABA'-coded sequences
#' that share common elements in their ID. It assigns NA to non-'ABA' codes.
#'
#' @param codes_df A dataframe of codes generated by the generate_codes function
#' and processed by the create_is_aba function.
#'
#' @return A dataframe of codes with an additional column representing the length of 'ABA' sequences.
#'
#' @examples
#' speaker_no <- c(3, 2, 3, 1, 4, 2, 4, 1, 4, 3, 2, 3)
#' sequences <- generate_sequences(speaker_no, 3)
#' codes <- generate_codes(sequences)
#' aba <- create_is_aba(codes)
#' length_aba <- generate_length_aba(aba)
#'
#' @export
generate_length_aba <- function(codes_df) {
  codes_df$Length_ABA <- NA  # Initialize Length_ABA column with NA

  # Start from the first row
  prev_length <- 0
  for (i in 1:nrow(codes_df)) {
    if (codes_df$Code[i] == "ABA") {  # If the code is ABA
      if (i == 1) {  # If it's the first row
        prev_length <- 1
      } else if (codes_df$Code[i - 1] == "ABA") {  # If the previous code was also ABA
        # If at least two digits are shared between this sequence and the previous one
        if (length(intersect(strsplit(as.character(codes_df$Sequence_ID[i]), "")[[1]],
                             strsplit(as.character(codes_df$Sequence_ID[i - 1]), "")[[1]])) >= 2) {
          prev_length <- prev_length + 1
        } else {  # If fewer than two digits are shared
          prev_length <- 1
        }
      } else {  # If the previous code was not ABA
        prev_length <- 1
      }
      codes_df$Length_ABA[i] <- prev_length
    }
  }

  return(codes_df)
}

Try the abasequence package in your browser

Any scripts or data that you put into this service are public.

abasequence documentation built on July 26, 2023, 5:43 p.m.