R/in_string_sort.R

Defines functions in_string_sort

Documented in in_string_sort

#' Appropriately Sort Numerical Vectors with Prepended Characters
#'
#' If a number is preceded by a character (as in L2), calling sort on these numbers will fall foul of the '10 before 2' issue. This script breaks the strings down into subsets of equal-length elements and then reorders.
#' IMPORTANT to note is that this breaks down if the text surrounding the numbers differs in length.
#' @param string The vector string (in the case of a vector) or table-like object column name or column index to be sorted.
#' @param type One of 'vector' or 'table'. Entering anything else returns an error. If one enters 'table', the 'table' argument must also be satisfied.
#' @param table Should be left blank if type = 'vector' or the data frame, matrix, tibble, etc. for sorting if type = 'table'.
#' @param cores If the sorting is to be parallelised, enter the number of cores here. Defaults to one.
#' @keywords sort order table
#' @export
#' @import parallel
#' @examples
#' unordered <- paste0('t', c(10, 100, 2, 20, 1))
#' sort(unordered)
#' [1] "t1"   "t10"  "t100" "t2"   "t20"
#' in_string_sort(string = unordered, type = 'vector')
#' [1] "t1"   "t2"   "t10"  "t20"  "t100"
#'
#' unordered.mat <- matrix(data = c(1:20, unordered), ncol = 5)
#' unordered.mat
#'      [,1] [,2] [,3] [,4] [,5]
#' [1,] "1"  "6"  "11" "16" "t10"
#' [2,] "2"  "7"  "12" "17" "t100"
#' [3,] "3"  "8"  "13" "18" "t2"
#' [4,] "4"  "9"  "14" "19" "t20"
#' [5,] "5"  "10" "15" "20" "t1"
#'
#' in_string_sort(string = 5, type = 'table', table = unordered.mat)
#'      [,1] [,2] [,3] [,4] [,5]
#' [1,] "5"  "10" "15" "20" "t1"
#' [2,] "3"  "8"  "13" "18" "t2"
#' [3,] "1"  "6"  "11" "16" "t10"
#' [4,] "4"  "9"  "14" "19" "t20"
#' [5,] "2"  "7"  "12" "17" "t100"

in_string_sort <- function(string, type = c('vector', 'table'), table = NULL, cores = 1) {

  type <- match.arg(arg = type)
  # Restricts selection to just the above types (should use this more often, actually).

  switch(type,
         vector = if ( !is.vector(string) ) { stop("If there is no table provided, 'string' must be an atomic vector.") },
         table = {

           if ( is.null(table) ) { stop('If sorting a table, the table must be provided as an argument.') }
           # Ought to be obvious.

           if ( nrow(table) <= 1 ) { stop("Can't sort something with only one value.") }
           # Weird that people would even use this format, except for maybe binding columns.

           if ( ncol(table) <= 1 ) { stop("A table of one or fewer columns should be treated as a vector.") }
           # Weird that people would even use this format, except for maybe binding columns.

           if ( length(string) == 1 ) {
           # Presumably, a string of length one denotes a column if one wants to sort it (and its made it past the dimension catches).

             switch(class(string),
                    numeric = if ( !is_in(string, 1:ncol(table)) ) { stop("The (length one) string provided doesn't seem to be an index.") },
                    character = if ( !is_in(string, colnames(table)) ) { stop("The (character) string provided doesn't seem to be a column name.") })
             # Check whether the column name/index exists in the table.

           } else { stop('If sorting a table, the string argument MUST be a column name or index.') }

           })
           # A whole lot of error catching here.


  switch(type,
         vector = chars <- nchar(string),
         table = chars <- table[, string] %>% disown %>% nchar)
  # Slightly different approaches. Switch can only handle vectors of length one, so need to trick it.


  if ( !all_unique(chars) ) {

    sorted <-
      mclapply(mc.cores = cores,
               X = sort(unique(chars)),
               FUN = function(n) {

                 switch(type,
                        vector = {
                          equals(nchar(string), n) %>%
                            which %>%
                            { string[.] } %>%
                            sort },
                        table = {
                          equals(nchar(disown(table[, string])), n) %>%
                            which %>%
                            { table[., ] } %>%
                            { if ( is.null(dim(.)) ) { . } else { .[order(disown(.[, string])), ] } } })
                 # SUCH a good function.

             })

    if ( type == 'vector' ) { sorted %<>% { do.call('c', .) } }
    if ( type == 'table' ) { sorted %<>% { do.call('rbind', .) } }

  } else { sorted <- sort(string) }
  # Normal sort works in the case of even-numbered character arguments.

  return(sorted)

}
danjamesadams/Dantools documentation built on Aug. 24, 2019, 6:15 p.m.