R/S03_Utilities.R

Defines functions update_package_version templates section create_vector create_table_of_contents strip_value percent_that_sums_to_100 not_NA squish replace_string format_numbers align_strings runs_in_sequence print_table over new_limits lin group_index find_increment `every<-` every empty_list data_first add_to_list replace_cases recode match_rows match_and_reorder assign_by_interval save_png_figure copy_from_source source_R_scripts path_to_file load_R_object load_package make_file_name find_file_name date_and_time take_x_by_y term_prep term_coding_effect term_coding_dummy term_coding term_combo term_new pull_id has_NA duplicate_wide_to_long column_by_other column

Documented in add_to_list align_strings assign_by_interval column column_by_other copy_from_source create_table_of_contents create_vector data_first date_and_time duplicate_wide_to_long empty_list every find_file_name find_increment format_numbers group_index has_NA lin load_package load_R_object make_file_name match_and_reorder match_rows new_limits not_NA over path_to_file percent_that_sums_to_100 print_table pull_id recode replace_cases replace_string runs_in_sequence save_png_figure section source_R_scripts squish strip_value take_x_by_y templates term_coding term_coding_dummy term_coding_effect term_combo term_new term_prep update_package_version

# Utilities
# Written by Kevin Potter
# email: kevin.w.potter@gmail.com
# Please email me directly if you
# have any questions or comments
# Last updated 2024-09-03

# Table of contents
# 1) Functions for data frames and matrices
#   1.1) column
#   1.2) column_by_other
#   1.3) duplicate_wide_to_long
#   1.4) has_NA
#   1.5) pull_id
#   1.6) take_x_by_y
# 2) Functions for files
#   2.1) date_and_time
#   2.2) find_file_name
#   2.3) make_file_name
#   2.4) load_package
#   2.5) load_R_object
#   2.6) path_to_file
#   2.7) source_R_scripts
#   2.8) copy_from_source
#   2.9) save_png_figure
# 3) Functions for matching and assignment
#   3.1) assign_by_interval
#   3.2) match_and_reorder
#   3.3) match_rows
#   3.4) recode
#   3.5) replace_cases
# 4) Functions for miscellaneous
#   4.1) add_to_list
#   4.2) data_first
#   4.3) empty_list
#   4.4) every
#     4.4.1) `every`
#     4.4.2) `every<-`
#   4.5) find_increment
#   4.6) group_index
#   4.7) lin
#   4.8) new_limits
#   4.9) over
#   4.10) print_table
#   4.11) runs_in_sequence
# 5) Functions for strings
#   5.1) align_strings
#   5.2) format_numbers
#   5.3) replace_string
#   5.4) squish
# 6) Functions for vectors
#   6.1) not_NA
#   6.2) percent_that_sums_to_100
#   6.3) strip_value
# 7) Functions for writing code
#   7.1) create_table_of_contents
#   7.2) create_vector
#   7.3) section
#   7.4) templates
#   7.5) update_package_version


# TO DO
# - Add Custom tests for file/folder functions
# - Add unit tests for functions

#### 1) Functions for data frames and matrices ####

#### 1.1) column ####
#' Extract Column Names Meeting Inclusion/Exclusion Criteria
#'
#' A function that matches or excludes column names in a
#' data frame based on user-supplied sub-strings.
#'
#' @param dtf A data frame.
#' @param ... Character strings with the sub-strings to match
#'   (or exclude) against the column names in \code{dtf}.
#'   If an entry starts with either \code{!}, \code{~}, or
#'   \code{-}, any columns containing the substring will be
#'   excluded. Otherwise, the function will locate
#'   all column names containing all inputted sub-strings.
#'
#' @author Kevin Potter
#'
#' @return A vector of column names meeting the inclusion
#' and exclusion criteria.
#'
#' @examples
#' # Create a data frame
#' dtf <- data.frame(
#'   IDS.INT.Subject = rep( 1:4, each = 2 ),
#'   SSS.CHR.Group = rep( c( 'A', 'A', 'B', 'B' ), each = 2 ),
#'   SSS.INT.Group = rep( c( 1, 1, 2, 2 ), each = 2 ),
#'   SSS.LGC.Group_A = rep( c( T, T, F, F ), each = 2 ),
#'   SSS.CHR.Time_point = rep( c( 'Pre', 'Post' ), 4 ),
#'   SSS.INT.Time_point = rep( 0:1, 4 ),
#'   OUT.DBL.Scores = rnorm( 8 )
#' )
#'
#' #' # All variables containing 'SSS'
#' column( dtf, 'SSS' )
#'
#' # All variables containing both 'SSS' and 'CHR'
#' column( dtf, 'SSS', 'CHR' )
#'
#' # Variables containing 'SSS' but not 'CHR'
#' column( dtf, 'SSS', '~CHR' )
#'
#' @export

column <- function( dtf, ... ) {

  args <- list(...)
  n_args <- length( args )

  include <- rep( '', n_args )
  exclude <- rep( '', n_args )
  inc_i <- 1
  inc_e <- 1
  for ( i in 1:n_args ) {
    txt <- as.character( args[[i]] )
    if ( grepl( '!', txt, fixed = T ) |
         grepl( '~', txt, fixed = T ) |
         grepl( '-', txt, fixed = T ) ) {
      txt <- gsub( '!', '', txt, fixed = T )
      txt <- gsub( '~', '', txt, fixed = T )
      txt <- gsub( '-', '', txt, fixed = T )
      exclude[inc_e] <- txt
      inc_e <- inc_e + 1
    } else {
      include[inc_i] <- txt
      inc_i <- inc_i + 1
    }
  }

  if ( all( include == '' ) ) {
    include = NULL
  } else {
    include <- include[ include != '' ]
  }
  if ( all( exclude == '' ) ) {
    exclude = NULL
  } else {
    exclude <- exclude[ exclude != '' ]
  }

  clm <- colnames( dtf )
  K <- length( clm )

  if ( !is.null( include ) ) {
    each_include <- sapply( include, function(x) {
      grepl( x, clm, fixed = T )
    } )
  } else {
    each_include = cbind( rep( T, K ) )
  }


  if ( !is.null( exclude ) ) {
    each_exclude <- sapply( exclude, function(x) {
      grepl( x, clm, fixed = T )
    } )
  } else {
    each_exclude = cbind( rep( F, K ) )
  }

  entries =
    rowSums( each_include ) == length( include ) &
    !( rowSums( each_exclude ) > 0 )

  return( clm[ entries ] )
}

#### 1.2) column_by_other ####
#' Unique Values of one Column by Another
#'
#' A function that takes two columns in a data frame
#' and reports the unique values of one column
#' associated with the unique values of the other
#' column.
#'
#' @param dtf A data frame.
#' @param col1 The first column (non-standard evaluation
#'   possible).
#' @param col2 The second column (non-standard evaluation
#'   possible).
#'
#' @author Kevin Potter
#'
#' @return A data frame with the unique values
#' of the first column associated with the
#' unique values of the second.
#'
#' @examples
#' # Define a data frame
#' dtf <- data.frame(
#'   A = c( 1, 1, 2, 2, 3, 3 ),
#'   B = c( 'A', 'A', 'B', 'B', 'C', 'D' )
#' )
#'
#' # Values of column 'B' by values of column 'A'
#' column_by_other( dtf, A, B )
#'
#' @export

column_by_other <- function( dtf, col1, col2 ) {

  # Non-standard evaluation
  V <- as.character( substitute( col1 ) )
  L <- as.character( substitute( col2 ) )

  dtf$Cur_values = dtf[[ V ]]
  dtf$Cur_labels = dtf[[ L ]]

  val <- sort( unique( dtf$Cur_values ) )
  val <- val[ !is.na( val ) ]

  lbl <- lapply(
    val,
    function(x) unique( dtf$Cur_labels[ dtf$Cur_values %in% x ] )
  )

  n <- sum( sapply( lbl, length ) )
  out <- data.frame(
    V = rep( NA, n ),
    L = rep( NA, n )
  )
  colnames( out ) <- c( V, L )

  inc <- 0
  for ( i in 1:length( val ) ) {

    index <- 1:length( lbl[[i]] )
    out[ index + inc, 1 ] <- val[i]
    out[ index + inc, 2 ] <- lbl[[i]]

    inc <- max(index + inc)
  }

  return( out )
}

#### 1.3) duplicate_wide_to_long ####
#' Duplicate Values from Wide-Form to Long-Form Data
#'
#' Function to duplicate values in column \code{x}
#' from a wide-form (one row per case) data set \code{wf}
#' based on a shared column \code{y} with a long-form
#' (multiple rows per case) data set \code{lf}.
#'
#' @param wf A wide-form data frame.
#' @param lf A long-form data frame.
#' @param x The column in \code{wf} with values to
#'   duplicate (non-standard evaluation possible).
#' @param y The column in both \code{wf} and \code{lf}
#'   over which to repeat values over (non-standard
#'   evaluation possible).
#' @param default The value to substitute if
#'   no cases for \code{x} based on \code{y}
#'   are found to duplicate.
#'
#' @return A vector matching in length to the number
#' of rows of \code{lf} with the values of \code{x}
#' repeated for each unique case of \code{y}.
#'
#' @examples
#' # Example wide-form data-frame
#' wf <- data.frame(
#'   ID = 1:3,
#'   Value = 4:6
#' )
#'
#' # Example long-form data-frame
#' lf <- data.frame(
#'   ID = rep( 1:3, each = 3),
#'   Value = NA
#' )
#'
#' # Duplicate values from 'wf' based
#' # on shared # column 'ID'
#' lf$Value <- duplicate_wide_to_long( wf, lf, x = Value, y = ID )
#' print( lf )
#'
#' @export

duplicate_wide_to_long <- function( wf, lf, x, y, default = NA ) {

  # Non-standard evaluation
  X <- as.character(substitute(x))
  Y <- as.character(substitute(y))

  N <- nrow( lf )

  out <- sapply( 1:N, function(j) {

    val <- default

    if ( !is.na( lf[[ Y ]][j] ) ) {

      match_to_wf <-
        wf[[ Y ]] %in% lf[[ Y ]][j] &
        !is.na( wf[[ Y ]] )

      if ( any( match_to_wf ) ) {

        val <- wf[[ X ]][ match_to_wf ][1]

      }

    }

    return( val )
  } )

  return( out )
}

#### 1.4) has_NA ####
#' Identify NA Values by Row
#'
#' Identifies rows in a matrix or data frame
#' that contain any (or all) NA values.
#'
#' @param x A matrix or data frame.
#' @param any Logical; if \code{TRUE} check
#'   if any observation in the row is a \code{NA}
#'   value, else checks if all observations are
#'   \code{NA}.
#'
#' @return A logical vector with values of
#' \code{TRUE} for rows with any \code{NA}
#' values (or rows where all values are \code{NA}
#' when \code{any = FALSE}).
#'
#' @examples
#' x <- matrix(rnorm(9), 3, 3)
#' x[2, 3] <- NA
#' has_NA(x)
#' x <- data.frame(A = c(1, 2, NA), B = 0)
#' has_NA(x)
#'
#' x <- matrix(rnorm(9), 3, 3)
#' x[2, ] <- NA
#' x[3, 1] <- NA
#' has_NA(x, any = FALSE)
#' @export

has_NA <- function(x, any = TRUE) {

  # Initialize output
  out <- NULL

  # If input is matrix or data frame
  if (is.matrix(x) |
      is.data.frame(x)) {
    # Check whether NA values are present in
    # any of the rows
    if (any) {
      out <- apply(x, 1, function(r) any(is.na(r)))
    } else {
      out <- apply(x, 1, function(r) all(is.na(r)))
    }
  } else {
    # Throw an error message
    string <- "Input should be a matrix or data frame"
    stop(string, call. = FALSE)
  }

  return(out)
}

#### 1.5) pull_id ####
#' Extract Subject IDs From a Data-Frame
#'
#' Function to extract the unique values for
#' subject identifiers in a data frame,
#' assuming a common variable name
#' (e.g., \code{IDS.CHR.Subject} or \code{ID}).
#'
#' @param dtf A data frame.
#' @param subject Logical; if \code{TRUE}
#'   looks for common variable names for
#'   subject identifiers (IDS.CHR.Subject,
#'   ID, etc.). If \code{FALSE} looks for
#'   common variable names for screening
#'   identifiers (IDS.CHR.Screen, etc.).
#' @param id A user-defined variable name
#'   if the column with subject identifiers
#'   is not part of the commonly-used labels.
#'
#' @return A vector of values.
#'
#' @examples
#' dtf <- data.frame( ID = rep( 1:3, each = 3 ), X = rnorm(9) )
#' pull_id( dtf )
#'
#' @export

pull_id <- function( dtf, subject = TRUE, id = NULL ) {

  clm <- colnames( dtf )

  if ( is.null( id ) ) {

    if ( subject ) {
      # Check for standard variable names for subject IDs
      standard_id_forms <- c(
        'IDS.CHR.Subject',
        'IDS.INT.Subject',
        'IDS.CHR.AT.Subject',
        'IDS.INT.AT.Subject',
        'ID',
        'study_id',
        'id',
        'studyid'
      )
    } else {
      # Check for standard variable names for subject IDs
      standard_id_forms <- c(
        'IDS.CHR.Screen',
        'IDS.INT.Screen',
        'ID'
      )
    }

    any_match <- standard_id_forms %in% clm

    if ( !any( any_match ) ) {
      stop(
        'No pre-defined variable name for ID found'
      )
    } else {
      id <- standard_id_forms[any_match][1]
    }

  }

  # Extract unique IDs
  out <- unique( dtf[[ id ]] )

  return( out )
}

#### 1.6) term ####

#### 1.6.1) term_new ####
#' Template for Creating New Term
#'
#' Function to help format input to the
#' [arfpam::term_prep] function. Initializes
#' the sublist with the parameters for creating
#' a new term based on an existing variable in a
#' data frame.
#'
#' @param label An optional label for the new term.
#' @param coding A list with the parameters for
#'   recoding the base variable (see [arfpam::term_coding]).
#' @param transformation A character string with an
#'   R expression for transforming the base variable.
#' @param range A vector with the range for the term.
#' @param scale Either a logical value, or a named
#'   vector of the form \code{c(m = ..., sd = ...)}
#'   for standardizing the term.
#' @param order A character vector that can be used
#'   to specify the order in which coding (\code{'c'}),
#'   transformations (\code{'t'}), and scaling (\code{'s'})
#'   are done.
#'
#' @returns A list.
#'
#' @export

term_new <- function( label = '',
                      coding = NULL,
                      transformation = NULL,
                      range = NULL,
                      scale = NULL,
                      order = NULL ) {

  output <- list()
  if ( !is.null( label ) )
    output$label <- label
  if ( !is.null( coding ) )
    output$coding <- coding
  if ( !is.null( transformation ) )
    output$transformation <- transformation
  if ( !is.null( range ) )
    output$range <- range
  if ( !is.null( scale ) )
    output$scale <- scale
  if ( !is.null( order ) )
    output$order <- order

  return( output )
}

#### 1.6.2) term_combo ####
#' Template for Combining Terms
#'
#' Function to help format input to the
#' [arfpam::term_prep] function. Initializes
#' the sublist with the parameters for creating
#' a new term by combining other existing
#' terms (e.g., an interaction).
#'
#' @param combine A character vector, the names
#'   of the terms/variables to combine. Providing a
#'   named vector
#' @param label An optional label for the new term.
#' @param transformation An optional character string
#'   specifying an R expression indicating how terms
#'   should be combined. Variables are matched to the
#'   named elements of \code{combine}. If not provided
#'   defaults to multiplying all terms together (i.e.,
#'   an interaction).
#' @param scale Either a logical value, or a named
#'   vector of the form \code{c(m = ..., sd = ...)}
#'   for standardizing the term.
#'
#' @returns A list.
#'
#' @export

term_combo <- function( combine,
                        label = '',
                        transformation = NULL,
                        scale = NULL ) {

  output <- list(
    combine = combine
  )
  if ( !is.null( label ) )
    output$label <- label
  if ( !is.null( transformation ) )
    output$transformation <- transformation
  if ( !is.null( scale ) )
    output$scale <- scale
  if ( !is.null( order ) )
    output$order <- order

  return( output )
}

#### 1.6.3) term_coding ####
#' Parameters for Recoding Variables
#'
#' Function to help format input to the
#' [arfpam::term_prep] function. Initializes
#' a list with the specific parameters to be
#' passed to the [arfpam::recode] function
#' when creating a new term.
#'
#' @param values A vector or list of values. Must
#'   equal in length to \code{codes}.
#' @param codes A vector of values.
#' @param default. A value.
#' @param type A character string, either \code{'replace'},
#'   \code{'linear'}, or \code{'interval'}. If not provided
#'   function attempts to determine from input to \code{values}.
#' @param categories A vector of values, all cases
#'   to be coded as 1 (dummy coding and effect coding).
#' @param referent A vector of values, all cases
#'   to be coded as -1 (effect coding).
#'
#' @returns A list.
#'
#' @name term_coding
#'
#' @export

term_coding <- function( values,
                         codes = NULL,
                         default = 0,
                         type = NULL ) {

  output <- list(
    values = values,
    codes = codes,
    default = default,
    type = type
  )

  return( output )
}

#### 1.6.3.1) term_coding_dummy ####
#' @rdname term_coding
#' @export

term_coding_dummy <- function( categories ) {

  output <- term_coding(
    values = list( categories ),
    codes = 1,
    default = 0,
    type = 'replace'
  )

  return( output )
}

#### 1.6.3.2) term_coding_effect ####
#' @rdname term_coding
#' @export

term_coding_effect <- function( categories,
                                referent ) {

  output <- term_coding(
    values = list( categories, referent ),
    codes = c( 1, -1 ),
    default = 0,
    type = 'replace'
  )

  return( output )
}

#### 1.6.4) term_prep ####
#' Create New Terms for Data
#'
#' Function that takes a list specifying base variables and
#' new terms to create and updates a data frame appropriately.
#' Allows for easy pre-processing of multiple data frames in
#' the same manner for analyses (e.g., when prepping training
#' and validation sets for data).
#'
#' @param x A data frame.
#' @param settings A named list, in the form:
#'   \code{list(column = list( new1 = list(...), new2 = list(...)))},
#'   where \code{column} is a pre-existing variable in \code{x},
#'   and \code{new1}, \code{new2}, etc. are new terms to be created
#'   using variable \code{column}. If new terms are to be
#'   combined (e.g., interaction effects), provide a list with
#'   (1) the element \code{'new'} for the terms to create and
#'   (2) the element \code{'combo'} for the terms to combine
#'   (see example).
#' @param output A character string, the type of output to return,
#'   where \code{'x'} (the default) returns an updated data frame,
#'   \code{'settings'} returns an updated list, and \code{'both'}
#'   returns a list with both the data frame and list of settings.
#'
#' @returns Either a data frame, a list of settings, or a list with
#' both the data frame and list of settings.
#'
#' @examples
#' data("mtcars")
#' #' Split into two sets of data
#' x1 <- mtcars[ seq( 1, 32, 2), ] # Odd
#' x2 <- mtcars[ seq( 1, 32, 2), ] # Even
#'
#' lst <- list(
#'   new = list(
#'     mpg = list(
#'       outcome = term_new(
#'         label = 'Miles per gallon'
#'       )
#'     ),
#'     hp = list(
#'       log_hp = term_new(
#'         label = 'Log(Horsepower)',
#'         transformation = 'log(x)',
#'         scale = TRUE,
#'         order = c( 't', 's' )
#'       )
#'     ),
#'     vs = list(
#'       vs_0v1 = term_new(
#'         label = 'Engine: V-shaped vs. straight',
#'         coding = term_coding_effect( 1, 0 ),
#'         scale = TRUE,
#'         order = c( 'c', 's' )
#'       )
#'     ),
#'     am = list(
#'       am_0v1 = term_new(
#'         label = 'Transmission: Automatic vs. manual',
#'         coding = term_coding_effect( 1, 0 ),
#'         scale = TRUE,
#'         order = c( 'c', 's' )
#'       )
#'     )
#'   ),
#'   combo = list(
#'     vs_x_am = term_combo(
#'       combine = c( vs = 'vs_0v1', am = 'am_0v1' ),
#'       transformation = 'vs*am',
#'       scale = TRUE
#'     )
#'   )
#' )
#' # Add info on mean/SD from 'x1' data
#' lst <- x1 |> term_prep( lst, output = 'settings' )
#' # Update 'x1' and 'x2'
#' x1 <- x1 |> term_prep( lst )
#' x2 <- x2 |> term_prep( lst )
#'
#' # Fit 'x1' data
#' fit <- lm( outcome ~ log_hp + vs_0v1 + am_0v1 + vs_x_am, data = x1)
#' # Predict 'x2' data
#' predict( fit, newdata = x2 )
#' @export

term_prep <- function( x,
                       settings,
                       output = 'x' ) {

  #### 1.6.4.1) Setup ####

  # Separate lists for new terms and combinations
  if ( all( names(settings) %in% c( 'new', 'combo' ) ) ) {

    settings_combo <- settings$combo
    settings <- settings$new

    # Close 'Separate lists for new terms and combinations'
  } else {

    settings_combo <- NULL

    # Close else for 'Separate lists for new terms and combinations'
  }


  # Function to apply transformation based on char. string
  fun_transform <- function(
    x,
    chr_expr ) {

    num_output <-
      eval( parse(text = chr_expr ) )

    return( num_output )
  }

  # Function to apply transformation based on
  # char. string for multiple variables
  fun_transform_combo <- function(
    lst,
    chr_expr ) {

    # Update expression
    for ( i in seq_along(lst) ) {

      chr_expr <- gsub(
        names(lst)[i], paste0( 'lst$', names(lst)[i] ),
        chr_expr, fixed = TRUE
      )

      # Close 'Update expression'
    }

    num_output <-
      eval( parse(text = chr_expr ) )

    return( num_output )
  }

  #### 1.6.4.2) New terms ####

  # Loop over variables
  for ( v in seq_along(settings ) ) {

    chr_variable <- names( settings )[v]

    chr_new_columns <- names( settings[[v]] )
    lst_new <- lapply(
      seq_along(chr_new_columns), function(n) {
        rep( NA, nrow(x) )
      }
    )
    names(lst_new) <- chr_new_columns

    # Loop over new columns
    for ( n in seq_along(chr_new_columns) ) {

      # Initialize new variable
      lst_new[[n]] <- x[[ chr_variable ]]

      # Default order
      chr_order <- c( 'c', 't', 's' )
      # User-supplied order
      if ( 'order' %in% names( settings[[v]][[n]] ) ) {

        chr_order <- settings[[v]][[n]]$order
        chr_order[
          settings[[v]][[n]]$order %in% c(
            'coding', 'c', '1'
          )
        ] <- 'c'
        chr_order[
          settings[[v]][[n]]$order %in% c(
            'transformation', 'transform', 't', '2'
          )
        ] <- 't'
        chr_order[
          settings[[v]][[n]]$order %in% c(
            'scale', 'standardize', 's', '3'
          )
        ] <- 's'

        # Close 'User-supplied order'
      }

      # Loop over preparations to run
      for ( p in seq_along(chr_order) ) {

        # Prepare: Coding
        if ( chr_order[p] == 'c' ) {

          # Recode
          if ( 'coding' %in% names( settings[[v]][[n]] ) ) {

            lst_codes <- settings[[v]][[n]]$coding

            lst_new[[n]] <- arfpam::recode(
              lst_new[[n]],
              values = lst_codes$values,
              codes = lst_codes$codes,
              default = lst_codes$default,
              type = lst_codes$type
            )

            # Close 'Recode'
          }

          # Close 'Prepare: Coding'
        }

        # Prepare: Transformation
        if ( chr_order[p] == 't' ) {

          # Transformation
          if ( 'transformation' %in% names( settings[[v]][[n]] ) ) {

            chr_expr <- settings[[v]][[n]]$transformation

            lst_new[[n]] <- fun_transform( x = lst_new[[n]], chr_expr )

            # Close 'Transformation'
          }

          # Close 'Prepare: Transformation'
        }

        # Prepare: Scale
        if (chr_order[p] == 's' ) {

          # Standardize
          if ( 'scale' %in% names( settings[[v]][[n]] ) ) {

            vec_scale <- settings[[v]][[n]]$scale

            # Use mean/SD from current data
            if ( length(vec_scale) == 1 & is.logical(vec_scale) ) {

              vec_scale <- c(
                m = mean( lst_new[[n]], na.rm = T ),
                sd = sd( lst_new[[n]], na.rm = T )
              )
              settings[[v]][[n]]$scale <- vec_scale

              # Close 'Use mean/SD from current data'
            }

            # Use pre-specified mean/SD
            if ( all( names(vec_scale) %in% c( 'm', 'sd' ) ) ) {

              lst_new[[n]] <-
                ( lst_new[[n]] - vec_scale['m'] ) / vec_scale['sd']

              # Close 'Use pre-specified mean/SD'
            }

            # Close 'Standardize'
          }

          # Close 'Prepare: Scale'
        }

        # Close 'Loop over preparations to run'
      }

      # Close 'Loop over new columns'
    }

    x <- cbind( x, data.frame( lst_new ) )

    # Close 'Loop over variables'
  }

  #### 1.6.4.3) Combinations ####

  # If terms are to be combined
  if ( !is.null(settings_combo) ) {


    chr_new_columns <- names( settings_combo )
    lst_new <- lapply(
      seq_along(chr_new_columns), function(n) {
        rep( NA, nrow(x) )
      }
    )
    names(lst_new) <- chr_new_columns

    # Loop over variables
    for ( n in seq_along(settings_combo) ) {

      # Extract terms/variables to combine
      chr_combine <- settings_combo[[n]]$combine

      # List of inputs for transformation function
      lst_inputs <- lapply(
        seq_along(chr_combine), function(v) {
          x[[ chr_combine[v] ]]
        }
      )

      # Default labels for transformation expression
      if ( is.null( names(chr_combine ) ) ) {

        names( lst_inputs ) <- paste0( 'x', seq_along(chr_combine) )

        # Close 'Default labels for transformation expression'
      } else {

        names( lst_inputs ) <- names( chr_combine )

        # Close else for 'Default labels for transformation expression'
      }

      # Custom transformation
      if ( 'transformation' %in% names( settings_combo[[n]] ) ) {

        chr_expr <- settings_combo[[n]]$transformation

        # Close 'Custom transformation'
      } else {

        chr_expr <- paste( names(lst_inputs), collapse = '*' )

        # Close 'Custom transformation'
      }

      lst_new[[n]] <- fun_transform_combo(
        lst_inputs, chr_expr
      )

      # Standardize
      if ( 'scale' %in% names( settings_combo[[n]] ) ) {

        vec_scale <- settings_combo[[n]]$scale

        # Use mean/SD from current data
        if ( length(vec_scale) == 1 & is.logical(vec_scale) ) {

          vec_scale <- c(
            m = mean( lst_new[[n]], na.rm = T ),
            sd = sd( lst_new[[n]], na.rm = T )
          )
          settings_combo[[n]]$scale <- vec_scale

          # Close 'Use mean/SD from current data'
        }

        # Use pre-specified mean/SD
        if ( all( names(vec_scale) %in% c( 'm', 'sd' ) ) ) {

          lst_new[[n]] <-
            ( lst_new[[n]] - vec_scale['m'] ) / vec_scale['sd']

          # Close 'Use pre-specified mean/SD'
        }

        # Close 'Standardize'
      }

      # Close 'Loop over variables'
    }

    x <- cbind( x, data.frame( lst_new ) )

    # Close 'If terms are to be combined'
  }

  # Return list with settings
  if (output %in% c( 'settings', 'both' ) ) {

    # Merge lists
    if ( !is.null(settings_combo) ) {

      settings <- list(
        new = settings,
        combo = settings_combo
      )

      # Close 'Merge lists'
    }

    if (output %in% 'both') return( list(x = x, settings = settings) )

    return(settings)

    # Close 'Return list with settings'
  }

  return( x )
}

#### 1.7) take_x_by_y ####
#' Repeat Column Values by Unique Cases in Another Column
#'
#' Function to duplicate values in column \code{x} by
#' the unique cases in column \code{y}. Useful, for
#' example, in extracting and duplicating a subject's
#' baseline values across all time points in a long-form
#' data frame for a longitudinal study.
#'
#' @param lfd A long-form data frame.
#' @param x The column with values to duplicate
#'   (non-standard evaluation possible).
#' @param y The column to repeat values over
#'   (non-standard evaluation possible).
#' @param extra A logical vector matching in
#'   length to the number of rows of \code{lfd}
#'   specifying additional cases to match by
#'   when isolating the values of \code{x} to
#'   repeat.
#' @param default The value to substitute if
#'   no cases for \code{x} based on \code{extra}
#'   are found to duplicate.
#' @param per_row Logical; if \code{FALSE} returns
#'   the associated value of \code{x} for each
#'   unique value of \code{y}, otherwise returns
#'   a value of \code{x} per each frow of \code{lfd}.
#'
#' @return A vector matching in length to the number
#' of rows of \code{lfd} (for \code{per_row = TRUE}) or
#' to the number of unique values of \code{y} with the
#' values of \code{x} repeated for each unique case of
#' \code{y}.
#'
#' @examples
#' # Example long-form data frame
#' lfd <- data.frame(
#'   ID = rep( 1:3, each = 3 ),
#'   Value = 1:9,
#'   Time = rep( 0:2, 3 )
#' )
#'
#' # Repeat first value of Y for each value of X
#' i <- lfd$Time == 0
#' take_x_by_y( lfd, Value, ID, extra = i )
#' # Repeat last value of Y for each value of X
#' i <- lfd$Time == 2
#' take_x_by_y( lfd, Value, ID, extra = i )
#' # Per unique case of ID
#' take_x_by_y( lfd, Value, ID, extra = i, per_row = FALSE )
#'
#' @export

take_x_by_y <- function( lfd, x, y,
                         extra = NULL,
                         default = NA,
                         per_row = TRUE ) {

  # Non-standard evaluation
  X <- as.character(substitute(x))
  Y <- as.character(substitute(y))

  N <- nrow( lfd )

  if ( is.null( extra ) ) {
    extra <- rep( TRUE, N )
  }

  out <- sapply(
    1:N, function(i) {

      specific_index <-
        !is.na( lfd[[ Y ]] ) &
        lfd[[ Y ]] %in% lfd[[ Y ]][i] &
        extra

      if ( sum( specific_index ) > 1 ) {
        warning(
          paste0(
            "Case ", lfd[[ Y ]][i], "\n",
            "   More than one unique value detected"
          )
        )
      }

      if ( any( specific_index ) ) {
        return( lfd[[ X ]][ specific_index ][1] )
      } else {
        return( default )
      }
    } )

  if ( per_row ) {
    return( out )
  } else {

    unq_Y <- unique( lfd[[Y]] )
    unq_Y <- unq_Y[ !is.na(unq_Y) ]
    out_by_y <- sapply( unq_Y, function(y) {
      return( unique( out[ lfd[[Y]] %in% y] ) )
    } )
    names( out_by_y ) <- unq_Y

    return( out_by_y )

  }

}


#### 2) Functions for files ####

#### 2.1) date_and_time ####
#' Formatted Date and Time for File Names
#'
#' Convenience function to generate a
#' nicely formatted character string
#' with the date and time, typically of
#' the form: YYYY_MM_DD-HH_MM to include
#' as part of a file name. Can convert
#' the character string back into a
#' date and time if needed.
#'
#' @param value A character string to convert
#'   back into a date-time object.
#' @param frmt A character string specifying
#'   the format for the date and time object
#'   (see \code{\link[base:strptime]{as.POSIXct}}).
#'
#' @return Either 1) a character string with the
#' date and time, to include in a file name, or
#' 2) a date-time object.
#'
#' @examples
#' string <- date_and_time()
#' string
#' # Convert back to date and time object
#' format( date_and_time( string ), '%Y-%m-%d %H:%M' )
#'
#' @export

date_and_time <- function( value = NULL, frmt = '%Y_%m_%d-%H_%M' ) {

  if ( is.null( value ) ) {
    return( format( Sys.time(), format = frmt ) )
  } else {
    return( as.POSIXct( value, format = frmt ) )
  }

}

#### 2.2) find_file_name ####
#' Check if a File Name can be Found
#'
#' Checks if a file name can be found
#' in a folder via partial string matching.
#' Multiple types of output are supported.
#'
#' @param string A character string, used
#'   for partial string matching.
#' @param output The type of output to return.
#'   Options include...
#'   \itemize{
#'     \item \code{'index'};
#'     \item \code{'name'};
#'     \item \code{'logical'};
#'     \item \code{'vector'}.
#'   }
#' @param full Logical; if \code{TRUE} returns the
#'   full path.
#' @param ... Additional arguments to the
#'   \code{\link[base:list.files]{dir}} function.
#'
#' @return Either...
#'   \itemize{
#'     \item A single logical value, \code{TRUE} if
#'           any matching file names are found
#'           (\code{'logical'}).
#'     \item A logical vector, \code{TRUE} for all
#'           matching files in the vector of file
#'           names (\code{'vector'}).
#'     \item An integer vector giving the position of
#'           any matches in the vector of file names
#'           (\code{'index'}).
#'     \item A character vector with any matching
#'           file names, otherwise \code{NULL}
#'           (\code{'name'}).
#'   }
#'
#' @examples
#' # Go to folder with html files for help pages
#' setwd(find.package("arfpam")[1])
#' setwd("html")
#'
#' # Find help page for function 'every'
#' find_file_name("every")
#' find_file_name("every", output = "index")
#' find_file_name("every", output = "logical")
#' find_file_name("every", output = "vector")
#'
#' @export

find_file_name <- function(string,
                           output = "name",
                           full = FALSE,
                           ...) {

  # All files and folders present
  # in working directory
  all_files <- dir(...)

  # Determine if file name is present
  # in list of files/folders
  check <- grepl(string, all_files, fixed = T)

  # Logical output
  if (output %in% c("Logical", "logical", "L", "l")) {

    return(any(check))

    # Close 'Logical output'
  }

  # Vector output
  if (output %in% c("Vector", "vector", "vec", "V", "v")) {

    return(check)

    # Close 'Vector output'
  }

  # Index output
  if (output %in% c("Index", "index", "I", "i")) {

    return(which(check))

    # Close 'Index output'
  }

  # Name output
  if (output %in% c("Name", "name", "N", "n")) {

    # If file found
    if (any(check)) {

      # If returning full path
      if (full) {

        add_path <- ''

        lst_arg <- list(...)

        # Check if path argument provided
        if ( length(lst_arg) > 0 ) {

          # Update additional path
          if ( !is.null(lst_arg$path) ) {

            add_path <- paste0( lst_arg$path, '/' )

            # Close 'Update additional path'
          }

          # Close 'Check if path argument provided'
        }

        full_path <- paste0(
          getwd(), '/',
          add_path,
          all_files[check]
        )
        return(full_path)

        # Close 'If returning full path'
      } else {

        return(all_files[check])

        # Close else for 'If returning full path'
      }

      # Close 'If file found'
    } else {

      return(NULL)

      # Close else for 'If file found'
    }

    # Close 'Name output'
  }

}

#### 2.3) make_file_name ####
#' Create Formatted File Name
#'
#' Creates a file name using the standardized
#' format 'PROJECT-SXX-Description-YYYY_MM_DD-HH_MM.ext'
#' where 'PROJECT' is a abbreviation for a project name,
#' and 'SXX' is a letter followed by a number indicating
#' the script or analysis index. Both variables can
#' be automatically specified from existing
#' environmental variables.
#'
#' @param description A character string, a human-readable
#'   description of the file (with words typically separated
#'   by \code{'_'}).
#' @param extension A character string, the file extension
#'   (e.g., \code{'R'}, \code{'RData'}) - the period is
#'   added automatically.
#' @param project A character string, the abbreviation for the
#'   project name (can be inferred from environmental variables).
#' @param script A character string, typically a one-letter
#'   abbreviation followed by a two-digit number indicating
#'   the script or analysis index.
#' @param date_time A character string giving the date and time
#'   (usually in the format \code{'YYYY_MM_DD-HH_MM'}) - if
#'   not provided uses the current date and time instead.
#' @param remove Logical; if \code{TRUE} removes any existing
#'   files in the working directory with the same
#'   project, script, description, and extension values.
#' @param path A character string specifying the subfolder(s)
#'   for the file.
#' @param env_variables A character vector specifying the
#'   name of the environmental variables with the
#'   project and script abbreviations.
#'
#' @return A character string.
#'
#' @examples
#'
#' make_file_name( 'Example', 'R' )
#' make_file_name( 'Example_2', 'RData', project = 'EX', script = 'S01' )
#'
#' @export

make_file_name <- function(description,
                           extension,
                           project = NULL,
                           script = NULL,
                           date_time = NULL,
                           remove = FALSE,
                           path = '.',
                           env_variables =
                             c( 'ABBR_PROJECT', 'ABBR_SCRIPT' )) {

  if ( is.null(project) ) {

    # Check for environmental variable
    project <- Sys.getenv( env_variables[1] )
    if ( project == "" ) project <- NULL

  }

  if ( is.null(script) ) {

    # Check for environmental variable
    script <- Sys.getenv( env_variables[2] )
    if ( script == "" ) script <- NULL

  }

  if ( !is.null( project ) ) {
    chr_project <- paste0( project, '-' )
  } else {
    chr_project <- ''
  }

  if ( !is.null( script ) ) {
    chr_script <- paste0( script, '-' )
  } else {
    chr_script <- ''
  }

  if ( is.null( date_time ) ) {
    chr_dat <- paste0( '-', arfpam::date_and_time() )
  } else {
    chr_dat <- paste0( '-', date_time )
  }

  chr_desc <- description
  chr_ext <- paste0( '.', extension )

  chr_file <- paste0(
    chr_project,
    chr_script,
    chr_desc,
    chr_dat,
    chr_ext
  )

  if ( remove ) {

    chr_search <- paste0(
      chr_project,
      chr_script,
      chr_desc
    )

    lgc_match <-
      grepl( chr_search, dir( path ), fixed = TRUE ) &
      grepl( chr_ext, dir( path ), fixed = TRUE )

    if ( any(lgc_match) ) {

      lgc_removed <- file.remove(
        paste0(
          path, '/', dir( path )[lgc_match]
        )
      )

    }

  }

  if ( path == '.' ) {
    return(chr_file)
  } else {
    return( paste0( path, '/', chr_file ) )
  }

}

#### 2.4) load_package ####
#' Convenience Function to Load in R Packages
#'
#' Convenience function to load in a set of R packages -
#' can be used to first install missing packages before
#' loading them.
#'
#' @param pkg A character vector of package names to
#'   load - by default loads the current package
#'   in addition to the package
#'   \code{\link[dplyr:dplyr-package]{dplyr}}.
#' @param install Logical; if \code{TRUE} attempts
#'   to install missing packages.
#' @param quietly Logical; if \code{TRUE} loads
#'   in packages with no console messages (including
#'   warnings and/or error messages).
#'
#' @return Loads in a set of R packages (see
#' \code{\link[base]{library}}).
#'
#' @export

load_package <- function( pkg = c( 'dplyr', 'arfpam' ),
                          install = FALSE,
                          quietly = FALSE ) {

  # Packages on Github repo for 'rettopwnivek'
  rettopwnivek_packages <- c(
    'arfpam',
    'extbrms',
    'extofficer',
    'ffcar',
    'pathdiagrams'
  )

  # Number of packages to load in
  K <- length( pkg )

  # Loop over packages
  for ( k in 1:K ) {

    # If specified install missing packages
    if ( install ) {

      already_installed <- installed.packages()

      # If package hs not been installed
      if ( !pkg[k] %in% rownames( already_installed ) ) {

        # If package is on Github repo
        if ( pkg[k] %in% rettopwnivek_packages ) {

          devtools::install_github(
            paste0( 'rettopwnivek/', pkg[k] )
          )

          # Close 'If package is on Github repo'
        } else {

          install.packages( pkg[k] )

          # Close else for 'If package is on Github repo'
        }

        # Close 'If package hs not been installed'
      }

      # Close 'If specified install missing packages'
    }

    library( pkg[k], character.only = TRUE, quietly = quietly )

    # Close 'Loop over packages'
  }

}

#### 2.5) load_R_object ####
#' Load in R Objects From .RData Files
#'
#' A convenience function that, given a
#' path to a .RData file, will load in
#' a specified R object contained within
#' the file.
#'
#' @param path_to_rdata A character string,
#'   a path to a .RData file to be passed
#'   on to the \code{\link[base]{load}}
#' @param which_object The variable name for
#'   the R object to return from the set of
#'   all variables loaded in from the .RData
#'   file. Uses non-standard evaluation.
#'
#' @author Kevin Potter
#'
#' @export

load_R_object <- function( path_to_rdata, which_object ) {

  # Load in .RData file
  load( path_to_rdata )

  # Return specified object loaded in from .RData file
  return( eval( substitute( which_object ) ) )
}

#### 2.6) path_to_file ####
#' Returns File/Folder Paths
#'
#' Returns an absolute file or folder path.
#' Folder paths can be extracted from a
#' pre-specified environmental variable.
#'
#' @param file_name A character string, a
#'   partial match to the file of interest.
#' @param env_var A character string, the name for
#'   the environment variable.
#' @param path A character string, a relative or
#'   absolute path to a folder.
#' @param latest Logical; if \code{TRUE} returns only
#'   the latest version of a file whose name contains
#'   a date.
#'
#' @return A character string.
#'
#' @export

path_to_file <- function( file_name = NULL,
                          env_var = NULL,
                          path = NULL,
                          latest = TRUE ) {

  if ( !is.null( env_var ) ) {
    path = Sys.getenv( env_var )
    if ( path == '' ) {
      stop( 'Environmental variable for path not found' )
    }
  }

  if ( is.null( path ) ) {
    path <- getwd()
  }

  if ( !is.null( file_name ) ) {

    x <- arfpam::find_file_name(
      file_name, output = 'name',
      path = path
    )

    if ( length( x ) == 0 ) {
      stop( 'File not found' )
    }

    if ( latest ) {
      return( paste0( path, '/', sort( x )[ length(x) ] ))
    } else {
      return( paste0( path, '/', sort( x ) ))
    }

  } else {
    return( path )
  }

}

#### 2.7) source_R_scripts ####
#' Source in Multiple R Scripts in a Folder
#'
#' A convenience function that loops through
#' and reads in code in .R files stored in a
#' folder located in the current working directory.
#'
#' @param files_to_include A vector of either...
#'   \itemize{
#'     \item Numeric indices denoting which files
#'       to include;
#'     \item A character string matching the initial
#'        set of letters across all relevant files (e.g., if
#'        all scripts of interest start with the letter 'S');
#'     \item A character vector with the full file names
#'       for the files to include.
#'   }
#' @param path The folder name with the scripts to source.
#'
#' @author Kevin Potter
#'
#' @export

source_R_scripts = function( files_to_include = NULL,
                             path = 'R' ) {

  # Folders to load
  all_files <- dir(
    path = path
  )

  # Identify R scripts

  f <- function( x ) {
    grepl( x, all_files, fixed = T )
  }
  # Files must have extension .R
  r_files <-
    ( f( '.R' ) | f( '.r' ) ) &
    # Exclude R data files
    !( f( '.RData' ) | f( '.rdata' ) |
         f( '.rda' ) |
         # Exclue R markdown files
         f( '.Rmd' ) | f( '.rmd' )
    )

  # Isolate .R files
  if ( any( r_files ) ) {
    all_files <- all_files[ r_files ]
  } else {
    stop( 'No .R files found' )
  }

  # Check if subset of files should be included
  if ( !is.null( files_to_include ) ) {

    # If numeric indices were provided
    if ( is.numeric( files_to_include ) ) {
      files_to_source <- all_files[ files_to_include ]
    }

    # If a character vector was provided
    if ( is.character( files_to_include ) ) {

      # If a single character string with no '.R' extension
      # was provided
      if ( length( files_to_include ) == 1 &
           !any( grepl( '.R', files_to_include, fixed = T ) ) ) {

        n_letters <- nchar( files_to_include )

        to_check <- substr( all_files, start = 1, stop = n_letters )

        files_to_source <- all_files[
          files_to_include %in% to_check
        ]

      } else {
        # Exact matching to file names
        files_to_source <- all_files[ all_files %in% files_to_include ]
      }

    }
  } else {
    # Otherwise take all files in folder
    files_to_source <- all_files
  }

  # Source in all specified R scripts
  if ( length( files_to_source ) > 0 ) {
    sapply( 1:length( files_to_source ), function(i) {
      source( paste0( path, "/", files_to_source[i] ) )
    } )
  } else {
    stop( 'No files found matching given criteria' )
  }

}

#### 2.8) copy_from_source ####
#' Copy Files From Source Folder
#'
#' Function to copy files from a subfolder in a
#' source folder to a new subfolder in a user-defined
#' source folder in the current directory.
#'
#' @param source_path A character string, the
#'   absolute path to the source folder.
#' @param destination_path A character string,
#'   the path to the folder to which files should
#'   be copied - if blank, uses the current
#'   working directory.
#' @param source_subfolder An optional character
#'   string, the full or partial name of a
#'   subfolder in source location with the files
#'   to copy.
#' @param environment_var An optional character
#'   string, the environmental variable with the
#'   path to the source folder.
#'
#' @returns As a side effect copies files to a to
#' the specified destination folder.
#'
#' @export

copy_from_source <- function( source_path = '',
                              destination_path = '',
                              source_subfolder = '',
                              environment_var = 'FOLDER_SOURCE' ) {

  # Path to source folder from environmental variable
  if ( source_path == '' ) {

    source_path <- Sys.getenv(
      environment_var
    )

    # Close 'Path to source folder from environmental variable'
  }

  # If a subfolder is specified
  if ( source_subfolder != '' ) {

    subfolders <- dir(
      path = source_path
    )
    source_subfolder <- subfolders[
      grepl(
        source_subfolder,
        subfolders,
        fixed = TRUE
      )
    ][1]

    source_path <- paste0(
      source_path, '/', source_subfolder
    )

    # Close 'If a subfolder is specified'
  }

  # List files in source folder
  files_and_folders_to_copy <- list.files(
    path = source_path,
    recursive = TRUE,
    include.dirs = TRUE
  )

  # By default use current working directory
  if ( destination_path == '' ) {

    destination_path <- getwd()

    # Close 'By default use current working directory'
  }

  new_path_for_copied_files_and_folders <- paste0(
    destination_path, '/', files_and_folders_to_copy
  )

  # Check if a subfolder is present
  lgc_is_directory <- !grepl(
    '.', files_and_folders_to_copy, fixed = TRUE
  )

  # If subfolder is present
  if ( any( lgc_is_directory ) ) {

    # Create subfolder in new location
    lgc_success <- sapply(
      new_path_for_copied_files_and_folders[lgc_is_directory],
      function( chr_folder ) {
        dir.create(
          chr_folder,
          recursive = TRUE
        )
      }
    )

    # Close 'If subfolder is present'
  }

  # Copy files to local machine
  lgc_success <- file.copy(
    from = paste0(
      source_path, '/',
      files_and_folders_to_copy[!lgc_is_directory]
    ),
    to = new_path_for_copied_files_and_folders[!lgc_is_directory]
  )

  # Error and warning messages
  if ( any(lgc_success) ) {

    if ( !all(lgc_success) ) {
      warning( 'Some files or folders were not copied' )
    }

    # Close 'Error and warning messages'
  } else {

    stop( 'Failed to copy files' )

    # Close else for 'Error and warning messages'
  }

}

#### 2.9) save_png_figure ####
#' Create and save PNG file
#'
#' Function to create and save a PNG file
#' given a plotting function to run.
#'
#' @param plotting_fun A function, with the
#'   creation of a figure as a side effect.
#' @param file_name A character string, the
#'   file name for the PNG file - must end
#'   in '.png'.
#' @param figure_dim A numeric vector of two
#'   values, the width and height of the
#'   figure in inches.
#' @param res A integer value, the resolution
#'   for the PNG file.
#' @param ... Additional arguments to pass to
#'   \code{plotting_fun}.
#'
#' @returns Output from \code{plotting_fun}, if any.
#'
#' @export

save_png_figure <- function(
    plotting_fun,
    file_name,
    figure_dim = c( 5, 5 ),
    res = 300,
    ... ) {

  png(
    filename = file_name,
    width = figure_dim[1],
    height = figure_dim[2],
    units = 'in',
    res = res
  )

  obj_output <- plotting_fun(
    ...
  )

  dev.off()

  # Pass through output
  if ( !is.null( obj_output ) ) {

    return( obj_output )

    # Close 'Pass through output'
  }

}

#### 3) Functions for matching and assignment ####

#### 3.1) assign_by_interval ####
#' Assign Values by Cases Within Intervals
#'
#' Function that assigns user-specified values
#' for when a numeric variable falls within
#' intervals defined by a set of
#'
#' @param x A numeric vector of values.
#' @param breakpoints A numeric vector of values, the
#'   breakpoints for the intervals. By default, the
#'   lowest and highest breakpoints are set to
#'   \code{-Inf} and \code{Inf}, so only the intervening
#'   points need to be specified (this behavior can be changed).
#' @param values A vector of values to assign for all cases
#'   within a given interval.
#' @param include A character vector with two elements,
#'   either \code{'>'} or \code{'>='} and \code{'<'} or
#'   \code{'<='}.
#' @param ends An optional vector specifying the lowest and
#'   and highest breakpoints. Can be set to \code{NA} to
#'   prevent adding to \code{breakpoints}.
#' @param default The default value to use for cases that
#'   are not within any intervals.
#'
#' @return A vector of values.
#'
#' @examples
#' # Default l > x <= u
#' x <- 1:6
#' assign_by_interval( x, c( 2, 4 ) )
#'
#' # Can quickly define splits
#' x <- c( 1, 1, 1, 1, 2, 2, 10 )
#' # Mean split
#' assign_by_interval( x, mean(x) )
#' # Median split
#' assign_by_interval( x, median(x) )
#' # Custom values
#' assign_by_interval( x, mean(x), values = c( 'Below', 'Above' ) )
#'
#' # Custom conditions and bounds
#' x <- 1:6
#' assign_by_interval(
#'   x, c( 1, 2, 4, 6 ), include = c('>=', '<' ), ends = NULL
#' )
#' # Can change default value for when nothing in range
#' assign_by_interval(x, 6, ends = c( 2, NA ), default = -1 )
#'
#' @export

assign_by_interval <- function(
    x,
    breakpoints,
    values = NULL,
    include = c( '>', '<=' ),
    ends = c( -Inf, Inf ),
    default = NA ) {

  # If limits for breakpoints given
  if ( !is.null( ends ) ) {

    lgc_misspecified <- TRUE

    # If only lower limit given
    if ( length( ends ) == 1 ) {

      if ( !is.na( ends[1] ) ) {
        breakpoints <- c( ends[1], breakpoints )
      }

      lgc_misspecified <- FALSE

      # Close 'If only lower limit given'
    }

    # If lower and upper limit given
    if ( length( ends ) == 2 ) {

      if ( !is.na( ends[1] ) ) {
        breakpoints <- c( ends[1], breakpoints )
      }

      if ( !is.na( ends[2] ) ) {
        breakpoints <- c( breakpoints, ends[2] )
      }

      lgc_misspecified <- FALSE

      # Close 'If lower and upper limit given'
    }

    if ( lgc_misspecified ) {

      chr_error <- paste0(
        "Argument 'ends' must be a vector of one or two values ",
        "giving the lower and upper limits for the breakpoints ",
        "respectively"
      )
      stop( chr_error )

    }

    # Close 'If limits for breakpoints given'
  }

  # Ensure no duplicate cut-offs
  breakpoints <- unique( breakpoints )

  # Number of bins
  bins <- length( breakpoints ) - 1
  # Number of observations
  n <- length(x)

  # By default assign integers for values
  if ( is.null( values ) ) {

    values <- 1:bins

    # Close 'By default assign integers for values'
  }

  # Check number of bins and values
  if ( length(values) != bins ) {

    chr_error <- paste0(
      "Must specify set of replacement values equal to ",
      "number of intervals (i.e., for B breakpoints there are B - 1 values)"
    )
    stop( chr_error )

    # Close 'Check number of bins and values'
  }

  # Shorthand for conditional statements
  if ( length( include ) == 1 ) {

    include <- switch(
      include,
      `<` = c( '>=', '<' ),
      `>` = c( '>', '<=' ),
      `>=` = c( '>=', '<=' ),
      `<=` = c( '>=', '<=' )
    )

    # Close 'Shorthand for conditional statements'
  }

  # Check conditional statements
  if ( !include[1] %in% c( '>', '>=' ) |
       !include[2] %in% c( '<', '<=' ) ) {

    chr_error <- paste0(
      "Argument 'include' must be 2-element vector with either ",
      "'>' or '>=' and '<' or '<='"
    )
    stop( chr_error )

    # Close 'Check conditional statements'
  }

  # Initialize output
  y <- rep( default, n )

  # Loop over bins
  for ( b in 1:bins ) {

    limits <- breakpoints[0:1 + b]

    if ( all( include == c( '>', '<' ) ) ) {
      lgc_inside <-
        !is.na(x) & x > limits[1] & x < limits[2]
    }

    if ( all( include == c( '>=', '<' ) ) ) {
      lgc_inside <-
        !is.na(x) & x >= limits[1] & x < limits[2]
    }

    if ( all( include == c( '>', '<=' ) ) ) {
      lgc_inside <-
        !is.na(x) & x > limits[1] & x <= limits[2]
    }

    if ( all( include == c( '>=', '<=' ) ) ) {
      lgc_inside <-
        !is.na(x) & x >= limits[1] & x <= limits[2]
    }

    y[lgc_inside] <- values[b]

    # Close 'Loop over intervals'
  }

  return(y)
}

#### 3.2) match_and_reorder ####
#' Match and Reorder Vectors
#'
#' Function to match a vector of values against
#' another vector and reorder the original
#' vector based on the matching indices.
#'
#' @param x A vector.
#' @param values A vector containing the values to match
#'   against in \code{x}.
#' @param y An optional vector matching in length to \code{x}
#'   to reorder - otherwise the function returns the indices
#'   for reordering.
#'
#' @return Either a vector of indices for reordering or the
#' reordered output from the input \code{y}.
#'
#' @examples
#' x <- rep( LETTERS[1:3], 3 )
#' values <- c( 'C', 'B', 'A' )
#' y <- rep( 1:3, 3 )
#' match_and_reorder( x, values, y )
#'
#' set.seed( 111 ) # For reproducibility
#' # Example data frame
#' dtf_example <- data.frame(
#'   X = rep( 1:4, each = 2 ),
#'   Y = round( rnorm( 8 ), 2 )
#' )
#' # Resample with replacement from 'X'
#' shuffled_x <- sample( 1:4, size = 4, replace = TRUE )
#' # Create a reordered data frame based on the resampled values
#' dtf_shuffled <- data.frame(
#'   X = match_and_reorder( dtf_example$X, shuffled_x, dtf_example$X ),
#'   Y = match_and_reorder( dtf_example$X, shuffled_x, dtf_example$Y )
#' )
#'
#' @export

match_and_reorder <- function( x, values, y = NULL ) {

  new_index <- lapply(
    values, function(v) {
      which( x %in% v )
    } ) |> unlist()

  if ( is.null( y ) ) {
    return( new_index )
  } else {

    if ( length(y) != length(x) ) {
      stop( "Length of 'x' and 'y' must match" )
    }

    return( y[ new_index ] )
  }

}

#### 3.3) match_rows ####
#' Match Rows Across Two Data Frames
#'
#' Function that returns an index indicating
#' the row in one data frame that matches the
#' row in another data frame.
#'
#' @param dtf_to A data frame with the rows to match over.
#' @param dtf_from A data frame with the rows to compared against.
#'
#' @returns A list equal in length to the number of rows in
#' \code{dtf_to} with the row indices from \code{dtf_from} that
#' matches a given row in \code{dtf_to}. If no matches are found
#' returns \code{NA}.
#'
#' @examples
#' dtf_example_1 <- data.frame( c( LETTERS[3:1], 'Z' ), V2 = c( 3:1, 26 ) )
#' dtf_example_2 <- data.frame( LETTERS[1:6], V2 = 1:6 )
#' match_rows( dtf_example_1, dtf_example_2 )
#'
#' @export

match_rows <- function(
    dtf_to,
    dtf_from ) {

  N_rows_from <- nrow( dtf_from )
  N_rows_to <- nrow( dtf_to )

  lst_index <- lapply(
    1:N_rows_to, function(r_to) {

      lgc_matches <- sapply( 1:N_rows_from, function (r_from) {
        all( all.equal(
          dtf_to[r_to, ],
          dtf_from[r_from, ],
          check.attributes = FALSE
        ) %in% TRUE )
      } )

      if ( any( lgc_matches ) ) {
        return( which( lgc_matches ) )
      } else {
        return( NA )
      }

    }

  )

  return( lst_index )
}

#### 3.4) recode ####
#' Recode a Variable
#'
#' Function to flexibly recode values of a variable. Can
#' match values and replace with new codes, assign codes
#' based on intervals for a continuous variable, or code
#' a linear trend given a start and end point.
#'
#' @param x A vector.
#' @param values Either a vector of values to
#'   replace, or a list of vectors for the sets of values
#'   to replace. Values of the form \code{'(x,y)'} or
#'   \code{'[x,y]'} result in assignment based on an interval
#'   from x to y, while values of the form \code{'x,y'}
#'   result in a linear trend ranging from 0 to 1 over the
#'   range x to y.
#' @param codes A vector of values, either a single
#' value or a vector matching in length with \code{to_replace}.
#' @param default The default value to return as output
#'   for all cases that are not recoded.
#' @param type A character string, either \code{'replace'},
#'   \code{'linear'}, or \code{'interval'}. If not provided
#'   function attempts to determine from input to \code{values}.
#'
#' @return A vector.
#'
#' @examples
#' # Assigning by matching categories
#' x <- rep( LETTERS[1:3], each = 3 )
#' recode( x, list( 'A', c('B', 'C') ) )
#' recode( x, c( 'A', 'B', 'C' ), c( -1, 0, 1 ) )
#'
#' # Assigning by intervals
#' x <- seq( -1.5, 1.5, .5 )
#' cbind(x, recode( x, c( '(-Inf,0]', '(0,Inf]' ) ) )
#'
#' # Linear trend
#' cbind(x, recode( x, '-1,1' ) )
#'
#' @export

recode <- function( x,
                    values,
                    codes = NULL,
                    default = 0,
                    type = NULL ) {

  # Initialize output
  output <- rep( default, length(x) )

  # Convert vector to list
  if ( !is.list(values) ) {

    values <- as.list(values)

    # Close 'Convert vector to list'
  }

  # Default codes
  if ( is.null(codes) ) {

    codes <- seq_along(values)

    # Close 'Default codes'
  }

  # Specify type
  if ( is.null(type) ) {

    # Default type of recoding
    type <- 'replace'

    # Linear/Interval need only one value
    if ( length( values[[1]] ) == 1 ) {

      # Set type appropriately
      if ( grepl( ',', values[[1]], fixed = TRUE ) ) {

        type <- 'linear'

        # Close 'Set type appropriately'
      }

      # Set type appropriately
      if ( grepl( '(', values[[1]], fixed = TRUE ) |
           grepl( '[', values[[1]], fixed = TRUE ) ) {

        type <- 'interval'

        # Close 'Set type appropriately'
      }

      # Close 'Linear/Interval need only one value'
    }

    # Close 'Specify type'
  }

  # Replace values
  if ( type == 'replace' ) {

    # Loop over values
    for ( v in seq_along(values) ) {

      lgc_subset <-
        x %in% values[[v]]
      output[lgc_subset] <- codes[v]

      # Close 'Loop over values'
    }

    # Close 'Replace values'
  }

  # Replace intervals
  if ( type == 'interval' ) {

    # Loop over values
    for ( v in seq_along(values) ) {

      chr_expr <- values[[v]]
      chr_expr <-
        gsub( '(', 'x > ', chr_expr, fixed = TRUE )
      chr_expr <-
        gsub( '[', 'x >= ', chr_expr, fixed = TRUE )
      chr_expr <-
        gsub( ')', ' > x', chr_expr, fixed = TRUE )
      chr_expr <-
        gsub( ']', ' >= x', chr_expr, fixed = TRUE )
      chr_expr <-
        gsub( ',', ' & ', chr_expr, fixed = TRUE )

      lgc_subset <- eval( parse(text = chr_expr) )

      output[lgc_subset] <- codes[v]

      # Close 'Loop over values'
    }

    # Close 'Replace intervals'
  }

  # Linear trend
  if ( type == 'linear' ) {

    num_start_stop <- strsplit(
      values[[1]], split = ',', fixed = TRUE
    )[[1]] |> as.numeric()
    output <- (x - num_start_stop[1])/diff(num_start_stop)
    output[
      output < 0
    ] <- 0
    output[
      output > 1
    ] <- 1

    # Close 'Linear trend'
  }

  return(output)
}

#### 3.5) replace_cases ####
#' Replace Cases
#'
#' Function that matches cases in a vector and
#' replaces them with user-specified values.
#' Robust to NA values.
#'
#' @param x A vector.
#' @param to_replace Either a vector of values to
#'   replace, or a list of vectors for the sets of values
#'   to replace.
#' @param replace_with A vector of values, either a single
#' value or a vector matching in length with \code{to_replace}.
#'
#' @return A vector.
#'
#' @examples
#' # Example vector
#' x <- rep( LETTERS[1:4], each = 3 )
#' # Replace all cases
#' replace_cases( x, c( 'A', 'B', 'C', 'D' ), 1:4 )
#' # Replace some cases and use default value for others
#' replace_cases( x, c( 'A', 'B', 'C' ), 1:3 )
#' # Replace combinations of cases
#' replace_cases( x, list( c( 'A', 'B' ), c( 'C', 'D' ) ), 1:2 )
#'
#' # Robust to NA values
#' x <- c( 1, 1, 2, 2, NA, NA )
#' replace_cases( x, c( 1, 2, NA ), c( 'A', 'B', '' ) )
#'
#' @export

replace_cases <- function( x, to_replace, replace_with, default = NA ) {

  N <- length(x)
  out <- rep( default, N )

  K <- length( to_replace )

  if ( length( replace_with ) == 1 ) {
    replace_with <- rep( replace_with, K )
  }

  if ( length( replace_with ) != length( to_replace ) ) {
    chr_error <- paste0(
      "Argument 'replace_with' must be either single value or ",
      "match in length with argument 'to_replace'."
    )

    stop( chr_error )
  }

  is_list <- is.list( to_replace )

  for ( k in 1:K ) {

    if ( is_list ) {

      if ( any( is.na( to_replace[[k]] ) ) ) {
        cases_that_match <- is.na(x)
      } else {
        cases_that_match <- x %in% to_replace[[k]]
      }

    } else {

      if ( is.na( to_replace[k] ) ) {
        cases_that_match <- is.na(x)
      } else {
        cases_that_match <- x %in% to_replace[k]
      }

    }

    out[ cases_that_match ] <- replace_with[k]

  }

  return( out )
}

#### 4) Functions for miscellaneous ####

#### 4.1) add_to_list ####
#' Add Element to Existing List
#'
#' A function that adds a new element to a pre-existing
#' list. Allows greater compatibility with the pipe operator.
#'
#' @param l A list.
#' @param name A character string, the name of the new
#'   element being added.
#' @param e An R object, the new element to add to
#'   the list.
#'
#' @returns A list with an additional element \code{e}
#'   with the specified name.
#'
#' @examples
#' l <- list() |>
#'   add_to_list( 'A', 1:3 ) |>
#'   add_to_list( 'B', list( C = 1, D = 2 ) )
#' print(l)
#' @export

add_to_list <- function(l, name, e) {

  l[[ length(l) + 1]] <- e
  names(l)[ length(l) ] <- name

  return(l)
}

#### 4.2) data_first ####
#' Adapt Functions to Take Data Argument First
#'
#' A function that ensures the first argument is
#' always for 'data' - allows greater compatibility
#' with the pipe operator.
#'
#' @param data_ob An R object to pass to a given
#'   function's \code{data} argument.
#' @param fun_to_apply An R function that has a
#'   \code{data} argument (e.g., [stats::lm]).
#' @param ... Additional arguments for the given function.
#'
#' @returns The associated output for the \code{fun_to_apply}
#' function.
#'
#' @examples
#' data( 'mtcars' )
#' lm_fit <- data_first( mtcars, lm, formula = mpg ~ cyl )
#' print( lm_fit )
#'
#' @export

data_first <- function( data_obj, fun_to_apply, ... ) {

  return(
    fun_to_apply( ..., data = data_obj )
  )

}

#### 4.3) empty_list ####
#' Create an Empty List
#'
#' Creates a list with a specified
#' number of empty slots.
#'
#' @param size The number of slots.
#' @param labels An optional character
#'   vector (whose length equals
#'   \code{size}) with labels for the
#'   slots.
#'
#' @return A list.
#'
#' @examples
#' # An empty list with 3 slots
#' empty_list(3)
#'
#' # An empty list with labels
#' empty_list( 3, paste0( "S0", 1:3 ) )
#' @export

empty_list <- function(size, labels = NULL) {

  lst <- vector( "list", size )

  if (!is.null(labels)) {
    if (length(labels) == length(lst)) {
      names(lst) <- labels
    } else {
      warning("Vector of labels does not match length of list")
    }
  }

  return(lst)
}

#### 4.4) every ####
#' Sequence of Values in Regular Increments
#'
#' Extracts a sequence of values from a
#' vector in regular increments.
#'
#' @param x A vector of values.
#' @param step The size of the increment between
#'   indices in the sequence.
#' @param start The index at which to start the
#'   sequence.
#' @param value A vector of new values to assign
#'   to \code{x} at the sequence of indices.
#'
#' @return A vector of values extracted from \code{x}.
#'
#' @name every
#'
#' @examples
#' # Extract every other value
#' # at odd positions
#' every(1:10)
#' # Extract every other value
#' # at even positions
#' every(1:10, , 2) # Note double commas
#'
#' # Extract every 3rd value starting
#' # from 6th position
#' every(1:12, 3, 6)
#'
#' # Replace values at even
#' # positions with 0
#' x <- 1:10
#' every(x, , 2) <- 0
#' x
NULL

#### 4.4.1) `every` ####
#' @rdname every
#' @export

every <- function(x, step = 2, start = 1) {
  return(x[seq(start, length(x), step)])
}

#### 4.4.2) `every<-` ####
#' @rdname every
#' @export

`every<-` <- function(x, value, step = 2, start = 1) {
  x[seq(start, length(x), step)] <- value

  return(x)
}

#### 4.5) find_increment ####
#' Find Increment Over Range of Values
#'
#' Given a range of values and a desired divisor,
#' determines the rounded increment to use.
#' Useful, for example, to determine the
#' equally-spaced intervals to use for
#' a figure's axes.
#'
#' @param x A numeric vector of values.
#' @param n An integer, the divisor. If not specified,
#'   uses the number of standard deviations instead.
#'
#' @return A named numeric value, the rounded increment
#'   to iterate over the specified number of times,
#'   with the estimated place to round to as a name.
#'
#' @examples
#' x <- rnorm(100)
#' find_increment(x)
#' find_increment(x, 6)
#'
#' @export

find_increment <- function(x,
                           n = NULL ) {

  r <- range(x, na.rm = TRUE)
  i <- diff(r)

  if ( is.null(n) ) {
    s <- sd(x, na.rm = TRUE)
    n <- round( i/s )
  }

  b10 <- round( log( i/n, base = 10 ) )

  if ( b10 < 0 ) {
    inc <- round( i/n, abs(b10) )
  }

  if ( b10 == 0 ) {
    inc <- round( i/n )
  }

  if ( b10 > 0 ) {
    d <- 10^b10
    inc <- round( (i/n)/d )*d
  }

  names(inc) <- b10

  return(inc)
}

#### 4.6) group_index ####
#' Create Index Over Groupings
#'
#' Create a numeric index over the unique levels of a
#' variable or a set of variables.
#'
#' @param ... Vectors of equal length.
#' @param levels A list with the order of the unique levels for
#'   each input vector (indices assigned from first to last level).
#'
#' @returns An integer vector from 1 to the number of unique levels.
#'
#' @examples
#' # Convert to numeric index
#' group_index( rep( LETTERS[3:1], each = 3 ) )
#'
#' # Can control assignment of indices
#' group_index(
#'   rep( LETTERS[3:1], each = 3 ), levels = list( c( 'C', 'B', 'A' ) )
#' )
#'
#' # Can create single index over all
#' # unique combinations of multiple variables
#' group_index( rep( LETTERS[1:3], each = 3 ), rep( 1:3, 3 ) )
#'
#' @export

group_index <- function( ..., levels = NULL ) {

  # Extract inputs
  lst_arg <- list(...)

  # Number of inputs
  n_arg <- length( lst_arg )

  # Length of each input
  l <- sapply(
    1:n_arg, function(i) length( lst_arg[[i]] )
  )

  # Lengths must be equal
  if ( !all( l %in% l[1] ) ) {

    stop( 'Vectors must be of equal length' )

    # Close 'Lengths must be equal'
  }

  mat_index <- matrix( NA, l[1], n_arg )

  # Loop over inputs
  for ( i in 1:n_arg ) {

    # If levels not specified
    if ( is.null(levels) ) {

      mat_index[, i] <- as.numeric( as.factor( lst_arg[[i]] ) )

      # Close 'If levels not specified'
    } else {

      mat_index[, i] <- as.numeric(
        factor( lst_arg[[i]], levels = levels[[i]] )
      )

      # Close else for 'If levels not specified'
    }

    # Close 'Loop over inputs'
  }

  # Create one collapsed index over all unique combinations
  collapsed_index <- apply(
    mat_index, 1, function(x) {
      paste( x, collapse = '.' )
    }
  )

  return(
    as.numeric( as.factor( collapsed_index ) )
  )

}

#### 4.7) lin ####
#' Create Linear Sequence of Evenly Spaced Values
#'
#' Generates a sequence of evenly spaced
#' intervals between a lower and upper limit.
#'
#' @param start The starting value.
#' @param end The final value.
#' @param n_intervals The number of evenly spaced intervals.
#'
#' @return A numeric vector.
#'
#' @examples
#' # Five evenly spaced intervals from 0 to 1
#' lin(0, 1, 5)
#' @export

lin <- function(start, end, n_intervals) {
  return(seq(start, end, length.out = n_intervals))
}

#### 4.8) new_limits ####
#' Rescale Values to Have New Limits
#'
#' A function that rescales a vector of
#' values to have a new minimum and maximum.
#'
#' @param x A vector of numeric values.
#' @param lower The new lower limit or minimum value.
#' @param upper The new upper limit or maximum value.
#' @param ... Additional parameters for the
#'   \code{\link[base:max]{min}} and
#'   \code{\link[base:max]{max}} functions.
#'
#' @return A vector of rescaled values.
#'
#' @examples
#' x <- 1:3
#' new_limits( x, 0, 1 )
#'
#' @export

new_limits <- function( x, lower, upper, ... ) {

  mn <- min(x, ... )
  mx <- max( x, ... )

  x_unit <- (x - mn) / (mx - mn)

  out <- x_unit*(upper-lower) + lower

  return( out )
}

#### 4.9) over ####
#' A Sequence Adjusted by an Iterator
#'
#' A function that takes a sequence of values
#' and scales it by an iterator. Useful when
#' extracting a set of multiple values within
#' a loop.
#'
#' @param x A sequence of values (converted to integers).
#' @param iter An iterator value, typically the
#'   variable defined within a loop.
#' @param per An optional value specifying the
#'   increment by which to adjust the range
#'   (defaults to the maximum value of \code{x}).
#' @param adj An optional value specifying
#'   any adjustments to the iterator.
#'
#' @return A vector of values.
#'
#' @examples
#' # Pull 3 values per iteration
#' y <- 1:9
#' for (i in 1:3) {
#'   print(y[over(1:3, i)])
#' }
#'
#' # Pull first 2 of each 3 values per iteration
#' # using the 'per' argument
#' for (i in 1:3) {
#'   print(y[over(1:2, i, per = 3)])
#' }
#'
#' # Pull last of 3 values per iteration
#' # using the 'per' argument
#' for (i in 1:3) {
#'   print(y[over(3, i, per = 3)])
#' }
#'
#' # Pull 2 values for final 2 sets using the
#' # 'adj' argument
#' y <- 1:8
#' for (i in 1:2) {
#'   print(y[over(1:2, i, adj = 1)])
#' }
#' @export

over <- function(x, iter,
                 per = NULL,
                 adj = -1) {

  # Ensure inputs are integers
  x <- as.integer(round(x))

  if (is.null(per)) {
    per <- max(x)
  }

  return(x + per * (iter + adj))
}

#### 4.10) print_table ####
#' Print a Nicely Formatted Table
#'
#' Given a data frame, prints to console
#' a formatted table of the results.
#'
#' @param tbl A formatted data frame.
#' @param return Logical; if \code{TRUE}, returns
#'   the character vector with each formatted
#'   row.
#'
#' @return If \code{return} is \code{TRUE} returns the
#' vector of character strings with the formatted output
#' for each row of the table.
#'
#' @examples
#' data("mtcars")
#' tbl <- aggregate(mtcars$mpg, mtcars[, c("cyl", "vs")], mean)
#' tbl$x <- round(tbl$x, 1)
#' colnames(tbl) <- c("# of cylinders", "Engine type", "Miles per gallon")
#' print_table(tbl)
#' @export

print_table <- function(tbl, return = F) {

  # Initialize output
  out <- matrix(" ", nrow(tbl) + 1, ncol(tbl))

  column_names <- colnames(tbl)

  to_remove <- c(
    'PRD',
    'OUT',
    'COL'
  )

  # Loop over elements to remove
  for ( i in seq_along( to_remove ) ) {

    column_names <- gsub(
      to_remove[i], '', column_names
    )

    # Close 'Loop over elements to remove'
  }

  mat_find_replace <- rbind(
    c( 'OPP', '(' ), # 1
    c( 'CLP', ')' ), # 2
    c( 'OPB', '[' ), # 3
    c( 'CLB', ']' ), # 4
    c( 'HPH', '-' ), # 5
    c( 'CLN', ':' ), # 6
    c( 'SMC', ';' ), # 7
    c( 'PPE', '|' ), # 8
    c( 'PLS', '+' ), # 9
    c( 'EQL', '=' ), # 10
    c( 'SLS', '/' ), # 11
    c( 'HSH', '#' ), # 12
    c( 'PRC', '%' ), # 13
    c( 'AMP', '&' ), # 14
    c( 'LSS', '<' ), # 15
    c( 'GRT', '>' ), # 16
    c( 'LTE', '\U2264' ), # 17
    c( 'GTE', '\U2265' ), # 18
    c( 'SP2', '\U00B2' ), # 19
    c( 'ZZ', ' ' ) # 20
  )

  for ( i in 1:nrow( mat_find_replace ) ) {
    column_names <- gsub(
      mat_find_replace[i, 1], mat_find_replace[i, 2],
      column_names, fixed = TRUE
    )
  }

  #< Loop over columns
  for (i in 1:ncol(tbl)) {

    # Determine maximum number of characters for elements
    # in the table's column (including the column name)
    nc <- max(c(
      sapply(as.character(tbl[[i]]), nchar),
      nchar(column_names[i])
    ))

    #<| Loop over rows
    for (j in 1:(nrow(tbl) + 1)) {

      if (j > 1) {
        # Elements in column
        val <- as.character(tbl[[i]])[j - 1]
      } else {
        # Column name
        val <- column_names[i]
      }

      # Current number of characters
      cur_nc <- nchar(val)

      #<|< Pad with empty spaces
      if (cur_nc < nc) {
        val <- paste(paste(rep(" ", nc - cur_nc), collapse = ""),
                     val,
                     sep = ""
        )
        out[j, i] <- val
        #>|> Close conditional 'Pad with empty spaces'
      } else {
        out[j, i] <- val
        #>|> Close conditional 'Pad with empty spaces'
      }

      #|> Close loop 'Loop over rows'
    }

    #> Close loop 'Loop over columns'
  }

  # Convert to vector of strings
  output <- apply(out, 1, paste, collapse = " | ")
  output <- sapply(output, function(x) paste(x, "|"))

  if (!return) {
    for (i in 1:length(output)) {
      cat(c(output[i], "\n"))
    }
  } else {
    return(output)
  }
}

#### 4.11) runs_in_sequence ####
#' Determine Runs in a Sequence
#'
#' Given a sequence of values of which
#' a subset are dubbed 'hits', determine
#' the number of runs of hits and the
#' start and end of each run of hits.
#'
#' @param x A vector of values.
#' @param codes_for_run A vector of the values
#'   in \code{x} indicating a hit.
#'
#' @return A list with a) the total number of runs, and
#' b) a matrix with a column for the start position of
#' each run and a column for the end position of each
#' run.
#'
#' @examples
#' # Generate a sequence of zeros and ones
#' x <- rbinom( 10, 1, .5 )
#' print(x)
#' # Determine runs of ones
#' runs_in_sequence( x )
#'
#' @export

runs_in_sequence <- function( x, codes_for_hit = 1 ) {

  # Number of observations
  n <- length( x )

  # Initialize variables to track
  # start and end of each run
  run_start = rep( NA, n )
  run_end = rep( NA, n )

  # Indicator for start of each run
  new_run <- FALSE

  # Convert to FALSE/TRUE
  positive_cases <- x %in% codes_for_hit

  inc <- 0 # Variable for indexing starts of run

  # Loop over observations
  for ( i in 1:n ) {

    # Check if a run is happening
    is_true <- positive_cases[i]

    # If not 1st observation
    if ( i > 1 ) {

      # Check if a new run is occuring
      if ( is_true & (positive_cases[i-1] != is_true) ) {
        new_run <- TRUE
        inc <- inc + 1
      }

    } else {
      if ( is_true ) {
        new_run <- TRUE
        inc <- inc + 1
      }
    }

    if ( new_run ) {
      # If start of new run
      # record position
      run_start[inc] <- i
      run_end[inc] <- i
    } else {
      # if run is continuing
      if ( is_true ) {
        run_end[inc] <- i
      }

    }

    # Reset tracker for new runs
    new_run <- FALSE
  }

  # Remove missing cases
  run_start <- run_start[ !is.na( run_start ) ]
  run_end <- run_end[ !is.na( run_end ) ]

  n_runs <- length( run_start )

  out <- list(
    n_runs = n_runs,
    sequences = cbind( Start = run_start, End = run_end )
  )

  return( out )
}

#### 5) Functions for strings ####

#### 5.1) align_strings ####
#' Pad Strings to be the Same Length
#'
#' Function that pads strings to be the same
#' length, with padding applied either to the
#' left or right-hand side.
#'
#' @param strings A character vector.
#' @param left Logical; if \code{TRUE}
#'   left-aligns strings, otherwise
#'   right-aligns strings.
#'
#' @return A character vector.
#'
#' @examples
#' # Strings of unequal length
#' x <- c( "A", "BB", "CCC" )
#'
#' # Left-aligned
#' s <- align_strings( x )
#' message( paste0( '|', s, '|\n' ) )
#'
#' # Right-aligned
#' s <- align_strings( x, FALSE )
#' message( paste0( '|', s, '|\n' ) )
#'
#' @export

align_strings <- function( strings, left = TRUE ) {

  nc <- nchar( strings )
  mx <- max( nc )

  padding <- sapply( mx - nc, function(v) {
    if ( v > 0 ) {
      return( squish( rep( " ", v ) ) )
    } else {
      return( "" )
    }
  } )

  if ( left ) {
    return( paste0( strings, padding ) )
  } else {
    return( paste0( padding, strings ) )
  }

}

#### 5.2) format_numbers ####
#' Pad Numeric Values to be the Same Length
#'
#' Function that pads numeric values to be
#' the same length by adding spaces to the
#' left-hand side and trailing zeros after
#' the decimal place.
#'
#' @param x A numeric vector.
#'
#' @return A character vector.
#'
#' @examples
#' # Decimal values
#' message( paste0( format_numbers( round( rnorm( 3 ), 2 ) ), "\n" ) )
#' # Whole numbers
#' message( paste0( format_numbers( rbinom( 3, 100, .1 ) ), "\n" ) )
#'
#' @export

format_numbers <- function( x ) {

  xchr <- as.character( x )

  any_decimals <- any( grepl( ".", xchr, fixed = TRUE ) )

  if ( any_decimals ) {

    lhs <- sapply(
      xchr, function(s) {
        strsplit(
          s, split = ".", fixed = TRUE
        )[[1]][1]
      }
    )

    rhs <- sapply(
      xchr, function(s) {
        strsplit(
          s, split = ".", fixed = TRUE
        )[[1]][2]
      }
    )

    if ( any( is.na( rhs ) ) ) {
      rhs[ is.na( rhs ) ] <- "0"
    }

    nc <- nchar( lhs )
    mx <- max( nc )

    padding <- sapply( mx - nc, function(v) {
      if ( v > 0 ) {
        squish( rep( " ", v ) )
      } else {
        return( "" )
      }
    } )

    lhs <- paste0( padding, lhs )

    nc <- nchar( rhs )
    mx <- max( nc )

    padding <- sapply( mx - nc, function(v) {
      if ( v > 0 ) {
        squish( rep( "0", v ) )
      } else {
        return( "" )
      }
    } )

    rhs <- paste0( rhs, padding )

    return( paste0( lhs, ".", rhs ) )

  } else {

    nc <- nchar( xchr )
    mx <- max( nc )

    padding <- sapply( mx - nc, function(v) {
      if ( v > 0 ) {
        squish( rep( " ", v ) )
      } else {
        return( "" )
      }
    } )

    lhs <- paste0( padding, xchr )

    return( lhs )

  }

}

#### 5.3) replace_string ####
#' Replace String Contents
#'
#' Function that replaces a specified pattern found
#' within a string (or vector of strings) with
#' a user-specified pattern.
#'
#' @param s A character vector.
#' @param to_replace A character vector, the patterns to
#'   match and replace within each string.
#' @param replace_with An optional character vector,
#'   either of length one or of matching length to
#'   \code{to_replace}, the patterns to substitute.
#'
#' @return A character string.
#'
#' @examples
#' # Example string
#' x <- c( 'AA', 'AB', 'AC', 'DD' )
#' # Remove the letter 'A'
#' replace_string( x, 'A' )
#' # Replace the letter 'A' with '1'
#' replace_string( x, 'A', '1' )
#' # Replace multiple letters
#' replace_string( x, c( 'B', 'C' ), c( '1', '2' ) )
#'
#' @export

replace_string <- function( s, to_replace, replace_with = '' ) {

  if ( length( replace_with ) == 1 ) {
    replace_with <- rep( replace_with, length( to_replace ) )
  }


  n_cases <- length( to_replace )
  out <- s

  for ( i in 1:n_cases ) {
    out <- gsub( to_replace[i], replace_with[i], out, fixed = TRUE )
  }

  return( out )
}

#### 5.4) squish ####
#' Collapse a Character Vector
#'
#' Function that combines elements of a
#' character vector into a single string.
#'
#' @param chr_vec A character vector.
#' @param between The value to use as spacing between
#'   each element in \code{chr_vec}.
#'
#' @return A character string.
#'
#' @examples
#' # Collapse a character vector
#' print( squish( c( "A", "B", "C" ) ) )
#'
#' # Collapse a characcter vector with custom spacing
#' print( squish( c( "1", "2", "3" ), " + " ) )
#'
#' @export

squish <- function( chr_vec, between = "" ) {

  return( paste( chr_vec, collapse = between ) )

}

#### 6) Functions for vectors ####

#### 6.1) not_NA ####
#' Return a Logical Vector With NA Values set to FALSE
#'
#' Function that takes a logical vector and ensures
#' any \code{NA} values are set to \code{FALSE}.
#'
#' @param x A logical vector.
#'
#' @return A logical vector with \code{NA} cases
#' set to \code{FALSE}.
#'
#' @examples
#' not_NA( c(TRUE, FALSE, NA) )
#'
#' @export

not_NA <- function( lgc ) {

  return( lgc & !is.na(lgc) )

}

#### 6.2) percent_that_sums_to_100 ####
#' Compute Percentage That Sums to 100%
#'
#' Function that uses the largest remainder method to
#' ensure that a set of percentages sum to 100%
#' even in the presence of rounding error.
#'
#' @param x A vector of proportions or frequencies
#'   that are suppose to sum to 100%.
#' @param digits The number of digits to round to.
#' @param freq Logical; if \code{TRUE} assumes \code{x}
#'   is a vector of frequencies, otherwise assumes
#'   \code{x} is a vector of proportions.
#'
#' @return A vector of percentages that sum to 100%.
#'
#' @examples
#' x <- c( 9990, 5, 5 )
#' # Convert to percentage and round
#' p <- round( 100*x/sum(x), 1 )
#' # No longer sums to 100% due to rounding error
#' print( sum(p) )
#' # Adjust percentages using the
#' # largest remainder method so
#' # they sum to 100%
#' print( percent_that_sums_to_100( p/100 ) )
#' # Works with frequencies as well
#' print( percent_that_sums_to_100( x, freq = TRUE ) )
#'
#' @export

percent_that_sums_to_100 <- function( x, digits = 1, freq = FALSE ) {

  # Convert to proportion
  if ( freq ) {

    p <- x / sum(x)

    # Close 'Convert to proportion'
  } else {

    p <- x

    # Close else for 'Convert to proportion'
  }

  # Number of frequencies
  n <- length(x)

  # Implement largest remainder method
  # so percentages sum to 100
  Nz <- 100 * 10^digits
  lrm <- p * Nz
  r <- Nz - sum( floor( lrm ) )
  a <- rep( 0, n )
  a[ 1:r ] <- 1
  o <- order( lrm - floor( lrm ), decreasing = TRUE )
  lrm[o] <- floor( lrm[o] ) + a[o]

  return( 100*(lrm / Nz) )
}

#### 6.3) strip_value ####
#' Strip Vector of a Value
#'
#' Removes a specified value from a vector.
#'
#' @param x A vector of values.
#' @param value The value to remove (defaults
#'   to \code{NA})
#'
#' @return A vector of values sans the one removed.
#'
#' @examples
#' x <- c( 1, 2, NA, 3 )
#' print( strip_value(x) )
#'
#' x <- c( 'Hello', '', 'world' )
#' print( strip_value(x, '') )
#'
#' @export

strip_value <- function( x, value = NA ) {

  # If removing NA
  if ( is.na( value ) ) {

    out <- x[ !is.na(x) ]

    # Close 'If removing NA'
  } else {

    out <- x[ !( x %in% value ) ]

    # Close else for 'If removing NA'
  }

  return( out )
}

#### 7) Functions for writing code ####

#### 7.1) create_table_of_contents ####
#' Create a Table of Contents
#'
#' Given a path to an R script, creates a
#' table of contents by searching for
#' section headers of the form:
#' \code{#### X) Label ####} where \code{X} is a
#' numbering scheme (e.g.,1, 2.1, 3.2.1)
#' with periods denoting nested sub-sections.
#'
#' @param file_path An absolute path to the
#'   R script.
#'
#' @return Outputs a message to the R console
#' window with the table of contents.
#'
#' @export

create_table_of_contents <- function( file_path ) {

  script_code <- scan(
    file = file_path,
    what = 'character', sep = '\n',
    quiet = TRUE
  )

  four_asterisks <- sapply(
    script_code,
    function(x) grepl( '####', x, fixed = T )
  )

  section_headers <- script_code[ four_asterisks ]

  section_headers <-
    gsub( '#### ', '', section_headers, fixed = TRUE )
  section_headers <-
    gsub( ' ####', '', section_headers, fixed = TRUE )

  pull_num <- function( s ) {
    out <- strsplit( s, split = ')', fixed = T )[[1]][1]
    out <- gsub( " ", "", out, fixed = T )
    out <- paste0( out, ')' )

    n_periods <- sum(
      grepl(
        ".", strsplit( out, split = "" )[[1]], fixed = T
      )
    )
    if ( n_periods > 0 ) {
      out <- paste0(
        paste( rep( "  ", n_periods ), collapse = "" ),
        out
      )
    }

    return( out )
  }
  section_labels <- sapply(
    section_headers,
    function(s) strsplit( s, split = ')', fixed = T )[[1]][2]
  )

  out <- paste0(
    '# ',
    sapply( section_headers, pull_num ),
    section_labels,
    '\n'
  )
  out <- c( '# Table of contents\n', out )

  message( out )
}

#### 7.2) create_vector ####
#' Create Code for Vector
#'
#' Function to extract unique elements of
#' a vector and output example code to the
#' console.
#'
#' @param x A vector of elements.
#' @param inner A character vector with [1] the
#'   string to add before each element and [2]
#'   the string to add after each element.
#' @param outer A character with [1] the
#'   string to add before all elements and
#'   [2] the string to add after all elements.
#'
#' @returns Example code in the console.
#'
#' @examples
#' create_vector( LETTERS[1:3] )
#'
#' @export

create_vector <- function( x,
                           inner = c( '  "', '",\n' ),
                           outer = c( 'c(\n', ')' ),
                           na.rm = TRUE ) {

  if ( na.rm ) {
    x <- x[ !is.na(x) ]
  }

  u <- sort( unique( x ) )

  out <- paste0( inner[1], u, inner[2] )
  if ( inner[2] == '",\n' ) {
    out[ length(out) ] <- gsub(
      '",\n', '"\n', out[ length(out) ], fixed = TRUE
    )
  }
  cat( c( outer[1], out, outer[2] ) )
}

#### 7.3) section ####
#' Section Numbers for Tracking Progress
#'
#' Output section numbers to the console via
#' a call to \code{\link[base]{message}}.
#' Useful for tracking progress when running
#' a script and for debugging where errors
#' occur.
#'
#' @param x A character string, such as
#'   '1)' or '2.3)' or of a similar form.
#' @param run Logical; if TRUE, a message is
#'   generated.
#' @param spacing A character string, the
#'   character to use to determine the
#'   number of indents to use to
#'   identify section hierarchies.
#' @param end A character string, the
#'   final symbol to attach to the end of
#'   a section header.
#'
#' @examples
#'
#' section("1")
#' # Periods indent output by 2 spaces
#' section("1.1")
#' section("1.1.1")
#' @export

section <- function(x, run = TRUE,
                    spacing = ".",
                    end = ")") {
  n_spacers <- sum(
    grepl(
      spacing,
      strsplit(x, split = "", fixed = TRUE)[[1]],
      fixed = TRUE
    )
  )

  if (grepl(")", x, fixed = TRUE)) end <- ""

  indent <- ""
  if (n_spacers > 0) {
    indent <- paste(rep("  ", n_spacers), collapse = "")
  }

  if (run) {
    message(paste0(indent, x, end))
  }
}

#### 7.4) templates ####
#' Create Templates for Annotations and Code
#'
#' Outputs a template of common annotations
#' and code segments to the console for
#' easy copying and pasting.
#'
#' @param type The type of template to return,
#'   with options for...
#'   \itemize{
#'     \item 'Function (function documentation);
#'     \item 'Header' (header for a R script);
#'     \item 'Package' (package version);
#'     \item 'Progress' (progress bar for a loop);
#'     \item 'Loop' (a \code{for} loop statement);
#'     \item 'Conditional' (a \code{if} statement);
#'     \item 'Roxygen2' (function documentation using Roxygen2);
#'     \item 'recode' (values for the 'dplyr'
#'       function \code{\link[dplyr]{recode}}).
#'   }
#' @param val An optional character vector to be used
#'   with \code{type = 'recode'}.
#'
#' @examples
#' # List of possible inputs to argument
#' # 'type' for each template
#' templates()
#'
#' # Function documentation
#' templates("Function")
#'
#' # Header for R script
#' templates("Header")
#'
#' # Package version
#' templates("Package", "stats")
#'
#' # Progress bar for loop
#' templates("Progress")
#'
#' # Loop
#' templates("Loop")
#'
#' # If ... else statement
#' templates("Conditional")
#'
#' # Roxygen2 function documentation
#' templates("roxygen")
#'
#' # HTML internal links
#' templates("html_links")
#'
#' # Example table for specifying
#' # nomenclature and a glossary
#' # in a .Rmd file
#' templates("rmd_glossary")
#'
#' @export

templates <- function(type = NULL, val = NULL) {

  types <- list(
    function_documentation = c(
      "Function", "function",
      "Func", "func", "Fun", "fun",
      "FD", "fd",
      "Arguments", "arguments",
      "Arg", "arg",
      "1"
    ),
    script_header = c(
      "Header", "header",
      "Head", "head",
      "Script", "script",
      "SD", "sd",
      "2"
    ),
    package_versions = c(
      'Package version',
      'package version',
      'Package',
      'package',
      '3'
    ),
    progress_bar = c(
      "Progress", "progress",
      "Prog", "prog",
      "5"
    ),
    loop = c(
      "Loop", "loop",
      "for", "6"
    ),
    conditional = c(
      "Conditional", "conditional",
      "if", "7"
    ),
    roxygen_documentation = c(
      'Roxygen2', 'Roxygen', 'roxygen', 'roxygen2',
      'roxy',
      '8'
    ),
    recode = c(
      "recode", "9"
    ),
    html_links = c(
      "HTML links", "html links",
      "HTML link", "html link",
      "Internal links", "internal links",
      "Internal link", "internal link",
      "10"
    ),
    rmd_glossary = c(
      "Rmd glossary", "rmd glossary",
      "Nomenclature and glossary",
      "nomenclature and glossary",
      "nom-gloss",
      "11"
    )
    # plot_function = c()
  )

  if (is.null(type)) {

    message( 'Available template options:' )

    for (i in 1:length(types)) {
      message( paste0( '  - ', types[[i]][1] ) )
    }
    type <- ''
  }

  if (type %in% types$function_documentation) {
    string <- paste0(
      "# Title \n",
      "# \n",
      "# ... \n",
      "# \n",
      "# @param 'obj_x' An R object. \n",
      "# \n",
      "# @details \n",
      "# Prerequisites: \n",
      "#   * The R package '?' (version ?) \n",
      "# \n",
      "# @returns ... \n",
      "# \n",
      "# @examples \n",
      "# Forthcoming \n"
    )

    message(string)
  }

  if (type %in% types$script_header) {
    author <- getOption("arfpam.author")
    email <- getOption("arfpam.email")

    if (is.null(author)) {
      author <- "Kevin Potter"
    }
    if (is.null(email)) {
      email <- "kevin.w.potter@gmail.com"
    }

    string <- paste0(
      "# Title\n",
      "# Written by ", author, "\n",
      "# email: ", email, "\n",
      "# Please email me directly if you \n",
      "# have any questions or comments\n",
      "# Last updated ", Sys.Date(), "\n",
      "\n",
      "# Table of contents\n",
      "# 1)"
    )

    message(string)
  }

  if (type %in% types$package_version) {

    string <- paste0(
      "#   * The R package '",
      val,
      "' (version ",
      installed.packages()[val, 'Version'],
      ")"
    )

    message(string)
  }

  if (type %in% types$progress_bar) {
    string <- paste0(
      "int_cases <- 10\n",
      "# Create a progress bar using a base R function\n",
      "obj_pb <- txtProgressBar(\n",
      "  min = 1, max = int_cases, style = 3\n",
      ")\n",
      "\n",
      "# Loop over cases\n",
      "for (i in 1:n_cases) {\n",
      "  # Update the progress bar\n",
      "  setTxtProgressBar(obj_pb, i)\n",
      "  # Close 'Loop over cases'\n",
      "}\n",
      "close(obj_pb)\n"
    )

    message(string)
  }

  if (type %in% types$loop) {
    string <- paste0(
      "# Descriptor\n",
      "for (i in 1:n) {\n",
      "  # Do something\n",
      "  # Close 'Descriptor'\n",
      "}"
    )

    message(string)
  }

  if (type %in% types$conditional) {
    string <- paste0(
      "# Descriptor\n",
      "if (value %in% values) {\n",
      "  # Do something\n",
      "  # Close 'Descriptor'\n",
      "} else {\n",
      "  # Do something else\n",
      "  # Close else for 'Descriptor'\n",
      "}"
    )

    message(string)
  }

  if (type %in% types$roxygen_documentation) {
    string <- paste0(
      "#' Title\n",
      "#' \n",
      "#' Description.\n",
      "#' \n",
      "#' @param x ...\n",
      "#' \n",
      "#' @details\n",
      "#' \n",
      "#' @returns Output.\n",
      "#' \n",
      "#' @examples\n",
      "#' # Examples\n",
      "#' \n",
      "#' @export\n",
      "\n"
    )

    message(string)
  }

  if (type %in% types$html_links) {

    string <- paste0(
      '<a name="SXX"></a>\n',
      '<a href="#SXX">LINK</a>\n',
      '<a name="SXX-PXX"></a>\n',
      '<a href="#SXX-PXX">LINK</a>\n',
      '<a href="#TOC">&#129145;</a> <a href="#END">&#129147;</a>\n'
    )

    message(string)
  }

  if (type %in% types$rmd_glossary) {

    string <- paste0(
      '### 1. Nomenclature and glossary\n\n',
      '```{r S01-nom-gloss,echo=FALSE}\n',
      'nom_gloss <- rbind(\n',
      '  c("Example term", "ET", "Notes on term" )\n',
      ')\n',
      'colnames( nom_gloss ) <- "C" %p% 1:3 \n',
      'nom_gloss <- data.frame( nom_gloss )\n\n',
      'th <- create_header( nom_gloss )\n',
      'th$colA <- c(\n',
      '  "Term",\n',
      '  "Abbreviation",\n',
      '  "Comments"\n',
      ')\n\n',
      'ft <- create_ft( nom_gloss, th, alternate_col = "grey95" )\n',
      'ft\n',
      '```\n'
    )

    message(string)
  }

  if (type %in% types$recode & !is.null( val ) ) {

    string <- paste0(
      "`", val, "` = '',\n"
    )
    string[ length( string ) ] <-
      gsub( ",", "", string[ length( string ) ], fixed = TRUE )
    string <- c(
      "recode(\n  x,\n",
      paste0( "  ", string ),
      ")"
    )
    message( paste( string, collapse = "" ) )
  }

}

#### 7.5) update_package_version ####
#' Update Package Versions in Script Documentation
#'
#' Function to update the documentation in an R
#' script with a package's most up-to-date version
#' number.
#'
#' @param file_name A character string, the file
#'   name for the R script to update.
#' @param prior_text_package A character string, the
#'   text preceding the package name.
#' @param prior_text_version A character string, the
#'   text preceding the package's version number.
#' @param post_text_version A character string, the
#'   text following the package's version number
#'   (can just be the initial few characters).
#' @param overwrite Logical; if \code{TRUE} updates
#'   the original R script by overwriting its content.
#' @param display Logical; if \code{TRUE} displays
#'   the updated R script content to the console window.
#'
#' @returns Either as a side effect overwrites the original
#' R script content or displays the updated content to the
#' console window.
#'
#' @export

update_package_version <- function(
    file_name,
    prior_text_package = '#   * The package ',
    prior_text_version = ' (version ',
    post_text_version = ')',
    overwrite = TRUE,
    display = FALSE ) {

  script_content <- scan(
    file = file_name,
    what = character(),
    sep = '\n',
    blank.lines.skip = FALSE
  )

  package_versions <- sapply(
    seq_along( script_content ), function(r) {

      lgc_match <- grepl(
        prior_text_package, script_content[r], fixed = TRUE
      )

      # If match to package version terms
      if ( lgc_match ) {

        package_name <- strsplit(
          script_content[r], split = prior_text_version, fixed = TRUE
        )[[1]][1]
        package_name <- gsub(
          prior_text_package, '', package_name, fixed = TRUE
        )
        package_name <- gsub(
          "'", "", package_name, fixed = TRUE
        )

        # print(package_name)

        version_number <- installed.packages()[package_name, 'Version']

        return(version_number)

        # Close 'If match to package version terms'
      } else {

        return( '' )

        # Close else for 'If match to package version terms'
      }

    }
  )

  # If any matches
  if ( any( package_versions != '' ) ) {

    index <- which( package_versions != '' )

    # Loop over matches
    for ( i in seq_along( index ) ) {

      line_parts <- strsplit(
        script_content[ index[i] ],
        split = prior_text_version,
        fixed = TRUE
      )[[1]]
      line_end <- strsplit(
        line_parts[2],
        split = post_text_version,
        fixed = TRUE
      )[[1]][2]
      if ( is.na(line_end) ) line_end <- ''

      new_term <- c(
        line_parts[1],
        prior_text_version,
        package_versions[ index[i] ],
        post_text_version,
        line_end
      ) |> paste( collapse = '' )

      script_content[ index[i] ] <- new_term

      # Close 'Loop over matches'
    }

    # Close 'If any matches'
  }

  # Update original script
  if ( overwrite ) {

    write(
      script_content,
      file = file_name,
      sep = '\n',
      append = FALSE
    )

    message( paste0( 'Lines updated: ', length(index) ) )

    # Close 'Update original script'
  }

  # Display updated script in console
  if ( display ) {

    lst_nuisance <- sapply( script_content, message )

    return( invisible() )

    # Close 'Display updated script in console'
  }

}
rettopnivek/arfpam documentation built on Oct. 20, 2024, 7:24 p.m.