R/last_left.R

Defines functions last_left

Documented in last_left

#' @export last_left
#' @title get the last n words of a string
#' @description This function returns the last n words of a string (by default, five).
#' This can, for example, be helpful for extracting words from the left context columnumn
#' of a concordance data frame.
#' @param x A vector or data frame.
#' @param column If x is a data frame, the number or name of the column.
#' @param n number of words to return. Default is 5.
#' @param omit_punctuation If TRUE (the default), strings consisting exclusively of
#' non-alphanumeric characters will be omitted.
#' @return A vector containing the last n words of each vector element.



# wrapper function for df and vectors -------------------------------------
last_left <- function(x, column, n = 5, omit_punctuation = TRUE) {


  if(is.data.frame(x)) {

    # if column name is missing, try to guess it
    if(missing(column)) {
      if(is.element("Left", colnames(x))) {
        column <- "Left"
      } else if(is.element("Left_context", colnames(x))) {
        column <- "Left_context"
      } else if(is.element("Left_Context", colnames(x))) {
        column <- "Left_Context"
      } else if(is.element("left_context", colnames(x))) {
        column <- "left_context"
      }
    }

    # substitute to avoid quoting
    column1 <- substitute(column)

    # also allow for entering column name as character
    if(is.character(column1)) {
      column1 <- as.symbol(column1)
    }

    # deparse column name (again, to avoid quoting)
    column2 <- deparse(column1)

    # also allow for entering column number instead of name
    if(is.numeric(column1)) {
      vec <- x[,column]
    } else {
      vec <- x[[column2]]
    }

    # get last left
    ll <- sapply(1:length(vec), function(i) .last_left(vec[i], n = n,
                                                       omit_punctuation = omit_punctuation))

  } else {
    ll <- sapply(1:length(x), function(i) .last_left(x[i], n = n,
                                                     omit_punctuation = omit_punctuation))
  }




  # return last left
  return(ll)

}
hartmast/concordances documentation built on April 19, 2023, 9:39 p.m.