R/utilities.R

Defines functions extract_number first_upper_case all_title_case all_lower_case all_upper_case

Documented in all_lower_case all_title_case all_upper_case extract_number first_upper_case

#' @title Utilities for handling with numbers and strings
#' @name utils_num_str
#' @param .data A data frame
#' @param ... The argument depends on the function used.
#'  * For `round_cols()` `...` are the variables to round. If no
#'  variable is informed, all the numeric variables from `data` are used.
#'  * For `all_lower_case()`, `all_upper_case()`,
#'   `all_title_case()`, `stract_number()`, `stract_string()`,
#'   `remove_strings()`, and `tidy_strings()` `...` are the
#'   variables to apply the function. If no variable is informed, the function
#'   will be applied to all non-numeric variables in `.data`.
#' @param digits The number of significant figures.
#' @param pattern A string to be matched. Regular Expression Syntax is also
#'   allowed.
#' @param replacement A string for replacement.
#' @param ignore_case If `FALSE` (default), the pattern matching is case
#'   sensitive and if `TRUE`, case is ignored during matching.
#' @param sep A character string to separate the terms. Defaults to "_".
#' @description
#' `r badge('stable')`
#'
#' * `all_lower_case()`: Translate all non-numeric strings of a data frame
#' to lower case.
#' * `all_upper_case()`: Translate all non-numeric strings of a data frame
#' to upper case.
#' * `all_title_case()`: Translate all non-numeric strings of a data frame
#' to title case.
#' * `first_upper_case`: Translate the first word of a string to upper
#' case.
#' * `extract_number()`: Extract the number(s) of a string.
#' * `extract_string()`: Extract all strings, ignoring case.
#' * `find_text_in_num()`: Find text characters in a numeric sequence and
#' return the row index.
#' * `has_text_in_num()`: Inspect columns looking for text in numeric
#' sequence and return a warning if text is found.
#' * `remove_space()`: Remove all blank spaces of a string.
#' * `remove_strings()`: Remove all strings of a variable.
#' * `replace_number()`: Replace numbers with a replacement.
#' * `replace_string()`: Replace all strings with a replacement, ignoring
#' case.
#' * `round_cols()`: Round a selected column or a whole data frame to
#' significant figures.
#' * `tidy_strings()`: Tidy up characters strings, non-numeric columns, or
#' any selected columns in a data frame by putting all word in upper case,
#' replacing any space, tabulation, punctuation characters by `'_'`, and
#' putting `'_'` between lower and upper case. Suppose that `str =
#' c("Env1", "env 1", "env.1")` (which by definition should represent a unique
#' level in plant breeding trials, e.g., environment 1) is subjected to
#' `tidy_strings(str)`: the result will be then `c("ENV_1", "ENV_1",
#' "ENV_1")`. See Examples section for more examples.
#' @md
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @examples
#' \donttest{
#' library(metan)
#'
#' ################ Rounding numbers ###############
#' # All numeric columns
#' round_cols(data_ge2, digits = 1)
#'
#' # Round specific columns
#' round_cols(data_ge2, EP, digits = 1)
#'
#' ########### Extract or replace numbers ##########
#' # Extract numbers
#' extract_number(data_ge, GEN)

#' # Replace numbers
#' replace_number(data_ge, GEN)
#' replace_number(data_ge,
#'                GEN,
#'                pattern = 1,
#'                replacement = "_one")
#'
#' ########## Extract, replace or remove strings ##########
#' # Extract strings
#' extract_string(data_ge, GEN)

#'
#' # Replace strings
#' replace_string(data_ge, GEN)
#' replace_string(data_ge,
#'                GEN,
#'                pattern = "G",
#'                replacement = "GENOTYPE_")
#'
#' # Remove strings
#' remove_strings(data_ge)
#' remove_strings(data_ge, ENV)
#'
#'
#' ############ Find text in numeric sequences ###########
#' mixed_text <- data.frame(data_ge)
#' mixed_text[2, 4] <- "2..503"
#' mixed_text[3, 4] <- "3.2o75"
#' find_text_in_num(mixed_text, GY)
#'
#' ############# upper, lower and title cases ############
#'gen_text <- c("This is the first string.", "this is the second one")
#'all_lower_case(gen_text)
#'all_upper_case(gen_text)
#'all_title_case(gen_text)
#'first_upper_case(gen_text)
#'
#'# A whole data frame
#'all_lower_case(data_ge)
#'
#'
#' ############### Tidy up messy text string ##############
#' messy_env <- c("ENV 1", "Env   1", "Env1", "env1", "Env.1", "Env_1")
#' tidy_strings(messy_env)
#'
#' messy_gen <- c("GEN1", "gen 2", "Gen.3", "gen-4", "Gen_5", "GEN_6")
#' tidy_strings(messy_gen)
#'
#' messy_int <- c("EnvGen", "Env_Gen", "env gen", "Env Gen", "ENV.GEN", "ENV_GEN")
#' tidy_strings(messy_int)
#'
#' library(tibble)
#' # Or a whole data frame
#' df <- tibble(Env = messy_env,
#'              gen = messy_gen,
#'              Env_GEN = interaction(Env, gen),
#'              y = rnorm(6, 300, 10))
#' df
#' tidy_strings(df)
#' }
#' @export
all_upper_case <- function(.data, ...){
  helper_case(.data, toupper, ...)
}
#' @name utils_num_str
#' @export
all_lower_case <- function(.data, ...){
  helper_case(.data, tolower, ...)
}
#' @name utils_num_str
#' @export
all_title_case <- function(.data, ...){
  helper_case(.data, to_title, ...)
}
#' @name utils_num_str
#' @export
first_upper_case <- function(.data, ...){
  helper_case(.data, first_upper, ...)
}
#' @name utils_num_str
#' @export
#'
extract_number <- function(.data,
                           ...,
                           pattern = NULL){
  if(missing(pattern)){
    pattern <- "[^0-9.-]+"
  }
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    if(missing(...)){
      results <-
        mutate(.data, across(where(~!is.numeric(.)), gsub, pattern = pattern, replacement = "")) %>%
        as_numeric(everything())
    } else{
      results <-
        mutate(.data, across(c(...), gsub, pattern = pattern, replacement = "")) %>%
        as_numeric(...)
    }
    return(results)
  } else{
    return(as.numeric(gsub("[^0-9.-]+", "", .data)))
  }
}
#' @name utils_num_str
#' @export
extract_string <- function(.data,
                           ...,
                           pattern = NULL){
  if(missing(pattern)){
    pattern <- "[^A-z.-]+"
  }
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    if(missing(...)){
      results <-
        mutate(.data, across(where(~!is.numeric(.)), gsub, pattern = pattern, replacement = ""))
    } else{
      results <-
        mutate(.data, across(c(...), gsub, pattern = pattern, replacement = ""))
    }
    return(results)
  } else{
    return(as.character(gsub("[^A-z.-]+", "", .data)))
  }
}
#' @name utils_num_str
#' @export
find_text_in_num <- function(.data, ...){
  help_text <- function(x){
    which(is.na(suppressWarnings(as.numeric(x))))
  }
  if(!missing(...)){
    .data <- select_cols(.data, ...)
    apply(.data, 2, help_text)

  } else{
    paste0("Line(s): ", paste0(help_text(.data), collapse = ","))
  }
}

#' @name utils_num_str
#' @export
has_text_in_num <- function(.data){
  if(any(sapply(sapply(.data, find_text_in_num), length)> 0)){
    var_nam <- names(which(sapply(sapply(.data, find_text_in_num), length)>0))
    warning("Non-numeric variable(s) ", paste(var_nam, collapse = ", "),
            " will not be selected.\nUse 'find_text_in_num()' to see rows with non-numeric characteres.", call. = FALSE)
  }
}
#' @name utils_num_str
#' @export
remove_space <- function(.data, ...){
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    if(missing(...)){
      results <-
        mutate(.data,
               across(where(~!is.numeric(.x)), gsub, pattern = "[[:space:]]", replacement = ""))
    } else{
      results <-
        mutate(.data,
               across(c(...), gsub, pattern = "[[:space:]]", replacement = ""))
    }

    return(results)
  } else{
    return(gsub(pattern = "[[:space:]]", replacement = "", .data))
  }
}

#' @name utils_num_str
#' @export
remove_strings <- function(.data, ...){
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    if(missing(...)){
      results <-
        mutate(.data, across(everything(), gsub, pattern = "[^0-9.-]", replacement = "")) %>%
        as_numeric(everything())
    } else{
      results <-
        mutate(.data, across(c(...), gsub, pattern = "[^0-9.-]", replacement = "")) %>%
        as_numeric(...)
    }
    return(results)
  } else{
    return(gsub(pattern = "[^0-9.-]", replacement = "", .data)) %>%
      remove_space() %>%
      as.numeric()
  }
}
#' @name utils_num_str
#' @export
replace_number <- function(.data,
                           ...,
                           pattern = NULL,
                           replacement = "",
                           ignore_case = FALSE){
  if(missing(pattern)){
    pattern <- "[0-9]"
  } else{
    pattern <- pattern
  }
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    results <-
      .data %>%
      mutate(across(c(...), ~gsub(pattern, replacement, ., ignore.case = ignore_case)))
    return(results)
  } else{
    return(gsub(pattern, replacement, .data, ignore.case = ignore_case))
  }
}
#' @name utils_num_str
#' @export
replace_string <- function(.data,
                           ...,
                           pattern = NULL,
                           replacement = "",
                           ignore_case = FALSE){
  if(missing(pattern)){
    pattern <- "[A-z]"
  } else {
    pattern <- pattern
  }
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    results <-
      .data %>%
      mutate(across(c(...), ~gsub(pattern, replacement, ., ignore.case = ignore_case)))
    return(results)
  } else{
    return(gsub(pattern, replacement, .data, ignore.case = ignore_case))
  }
}

#' @name utils_num_str
#' @export
#' @importFrom dplyr transmute
round_cols <- function(.data, ...,  digits = 2){
  is_mat <- is.matrix(.data)
  if(is_mat ==  TRUE){
    .data <-
      .data %>%
      as.data.frame() %>%
      rownames_to_column()
  }
  rn_test <- has_rownames(.data)
  if(rn_test == TRUE){
    rnames <- rownames(.data)
  }
  if (missing(...)){
    .data %<>% mutate(across(where(is.numeric), round, digits = digits))
  } else{
    .data %<>% mutate(across(c(...), round, digits = digits))
  }
  if(rn_test == TRUE){
    rownames(.data) <- rnames
  }
  if(is_mat ==  TRUE){
    .data <-
      .data %>%
      column_to_rownames() %>%
      as.matrix()
  }
  return(.data)
}
#' @name utils_num_str
#' @export
tidy_strings <- function(.data, ..., sep = "_"){
  fstr <- function(x){
    str <- toupper(gsub("(?<=[a-z0-9])(?=[A-Z])|[[:space:][:punct:]]+", sep, x, perl = TRUE))
    str <- gsub("(?<=[A-Z])(?=[0-9])|(?<=[0-9])(?=[A-Z])", sep, str, perl = TRUE)
    return(str)
  }
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    if(missing(...)){
      results <-
        mutate(.data, across(where(~!is.numeric(.x)),  fstr))
    } else{
      results <-
        mutate(.data, across(c(...), fstr))
    }
    return(results)
  } else{
    return(fstr(.data))
  }
}



#' @title Utilities for handling with rows and columns
#' @name utils_rows_cols
#' @description
#' `r badge('stable')`
#'
#' * `add_cols()`: Add one or more columns to an existing data frame. If
#' specified `.before` or `.after` columns does not exist, columns are
#' appended at the end of the data. Return a data frame with all the original
#' columns in `.data` plus the columns declared in `...`. In
#' `add_cols()` columns in `.data` are available for the expressions.
#' So, it is possible to add a column based on existing data.
#' * `add_rows()`: Add one or more rows to an existing data frame. If
#' specified `.before` or `.after` rows does not exist, rows are
#' appended at the end of the data. Return a data frame with all the original
#' rows in `.data` plus the rows declared in `...` argument.
#' * `add_row_id()`: Add a column with the row id as the first column in
#' `.data`.
#' * `add_prefix()` and `add_suffix()` add prefixes and suffixes,
#' respectively, in variable names selected in `...` argument.
#' * `all_pairs()`: Get all the possible pairs between the levels of a
#' factor.
#' * `colnames_to_lower()`: Translate all column names to lower case.
#' * `colnames_to_upper()`: Translate all column names to upper case.
#' * `colnames_to_title()`: Translate all column names to title case.
#' * `column_exists()`: Checks if a column exists in a data frame. Return a
#' logical value.
#' * `columns_to_first()`: Move columns to first positions in `.data`.
#' * `columns_to_last()`: Move columns to last positions in `.data`.
#' * `columns_to_rownames()`: Move a column of `.data` to its row
#' names.
#' * `rownames_to_column()`: Move the row names of `.data` to a new
#' column.
#' * `remove_rownames()`: Remove the row names of `.data`.
#' * `concatenate()`: Concatenate columns of a data frame. If `drop =
#' TRUE` then the existing variables are dropped. If `pull = TRUE` then the
#' concatenated variable is pull out to a vector. This is specially useful when
#' using `concatenate` to add columns to a data frame with `add_cols()`.
#' * `get_levels()`: Get the levels of a factor variable.
#' * `get_levels_comb()`: Get the combination of the levels of a factor.
#' * `get_level_size()`: Get the size of each level of a factor variable.
#' * `remove_cols()`: Remove one or more columns from a data frame.
#' * `remove_rows()`: Remove one or more rows from a data frame.
#' * `reorder_cols()`: Reorder columns in a data frame.
#' * `select_cols()`: Select one or more columns from a data frame.
#' * `select_first_col()`: Select first variable, possibly with an offset.
#' * `select_last_col()`: Select last variable, possibly with an offset.
#' * `select_numeric_cols()`: Select all the numeric columns of a data
#' frame.
#' * `select_non_numeric_cols()`: Select all the non-numeric columns of a
#' data frame.
#' * `select_rows()`: Select one or more rows from a data frame.
#' * `tidy_colnames()`: Tidy up column names with
#' [tidy_strings()].
#'
#'
#' @param .data A data frame
#' @param ... The argument depends on the function used.
#' * For `add_cols()` and `add_rows()` is name-value pairs. All values
#' must have one element for each row in `.data` when using
#' `add_cols()` or one element for each column in `.data` when using
#' `add_rows()`. Values of length 1 will be recycled when using
#' `add_cols()`.
#'
#' * For `remove_cols()` and `select_cols()`,  `...` is the
#' column name or column index of the variable(s) to be dropped.
#'
#' * For `add_prefix()` and `add_suffix()`,  `...` is the column
#' name to add the prefix or suffix, respectively. Select helpers are allowed.
#'
#' * For `columns_to_first()` and `columns_to_last()`,  `...` is
#' the column name or column index of the variable(s) to be moved to first or
#' last in `.data`.
#'
#' * For `remove_rows()` and `select_rows()`, `...` is an integer
#' row value.
#'
#' * For `concatenate()`, `...` is the unquoted variable names to be
#' concatenated.
#'
#' * For `get_levels()`, `get_level_comb()`, and
#' `get_level_size()` `...` is the unquoted variable names to get the
#' levels, levels combinations and levels size, respectively.
#'
#' @param .before,.after For `add_cols()`, `concatenate()`, and
#'   `reorder_cols()`, one-based column index or column name where to add
#'   the new columns, default: .after last column. For `add_rows()`,
#'   one-based row index where to add the new rows, default: .after last row.
#' @param prefix,suffix The prefix and suffix used in `add_prefix()` and
#'   `add_suffix()`, respectively.
#' @param new_var The name of the new variable containing the concatenated
#'   values. Defaults to `new_var`.
#' @param var Name of column to use for rownames.
#' @param sep The separator to appear when using `concatenate()`,
#'   `add_prefix()`, or `add_suffix()`. Defaults to to `"_"`.
#' @param drop Logical argument. If `TRUE` keeps the new variable
#'   `new_var` and drops the existing ones. Defaults to `FALSE`.
#' @param pull Logical argument. If `TRUE`, returns the last column (on the
#'   assumption that's the column you've created most recently), as a vector.
#' @param cols A quoted variable name to check if it exists in `.data`.
#' @param levels The levels of a factor or a numeric vector.
#' @param offset Set it to *n* to select the *n*th variable from the
#'   end (for `select_last_col()`) of from the begin (for
#'   `select_first_col()`)
#' @md
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @importFrom  tibble add_row
#' @importFrom dplyr relocate across
#' @export
#' @examples
#' \donttest{
#' library(metan)
#'
#' ################# Adding columns #################
#' # Variables x and y .after last column
#' data_ge %>%
#'   add_cols(x = 10,
#'            y = 30)
#' # Variables x and y .before the variable GEN
#' data_ge %>%
#'   add_cols(x = 10,
#'            y = 30,
#'            .before = GEN)
#'
#' # Creating a new variable based on the existing ones.
#' data_ge %>%
#'   add_cols(GY2 = GY^2,
#'            GY2_HM = GY2 + HM,
#'            .after = GY)
#'
#' ############### Reordering columns ###############
#' reorder_cols(data_ge2, NKR, .before = ENV)
#' reorder_cols(data_ge2, where(is.factor), .after = last_col())
#'
#' ######## Selecting and removing columns ##########
#' select_cols(data_ge2, GEN, REP)
#' remove_cols(data_ge2, GEN, REP)
#'
#' ########## Selecting and removing rows ###########
#' select_rows(data_ge2, 2:3)
#' remove_rows(data_ge2, 2:3)
#'
#' ########### Concatenating columns ################
#' concatenate(data_ge, ENV, GEN, REP)
#' concatenate(data_ge, ENV, GEN, REP, drop = TRUE)
#'
#' # Combine with add_cols() and replace_string()
#'data_ge2 %>%
#'  add_cols(ENV_GEN = concatenate(., ENV, GEN, pull = TRUE),
#'           .after = GEN) %>%
#'  replace_string(ENV_GEN,
#'                 pattern = "H",
#'                 replacement = "HYB_")
#'
#' # Use prefixes and suffixes
#' concatenate(data_ge2, REP, prefix = "REP", new_var = REP)
#'
#' # Use prefixes and suffixes (the ear traits EH, EP, EL, and ED)
#' add_prefix(data_ge2, PH, EH, EP, EL, prefix = "EAR")
#' add_suffix(data_ge2, PH, EH, EP, EL, suffix = "EAR", sep = ".")
#'
#' # Use prefixes and suffixes (colnames)
#' concatenate(data_ge2, REP, prefix = "REP", new_var = REP)
#'
#'
#' ########### formating column names ###############
#' # Creating data with messy column names
#' df <- head(data_ge, 3)
#' colnames(df) <- c("Env", "gen", "Rep", "GY", "hm")
#' df
#' colnames_to_lower(df)
#' colnames_to_upper(df)
#' colnames_to_title(df)
#'
#'
#' ################### Adding rows ##################
#' data_ge %>%
#'   add_rows(GY = 10.3,
#'            HM = 100.11,
#'            .after = 1)
#'
#' ########## checking if a column exists ###########
#' column_exists(data_g, "GEN")
#'
#' ####### get the levels, level combinations and size of levels ########
#' get_levels(data_g, GEN)
#' get_levels_comb(data_ge, ENV, GEN)
#' get_level_size(data_g, GEN)
#'
#' ############## all possible pairs ################
#' all_pairs(data_g, GEN)
#'
#' ########## select numeric variables only #########
#' select_numeric_cols(data_g)
#' select_non_numeric_cols(data_g)
#' }
add_cols <- function(.data, ..., .before = NULL, .after = NULL){
  results <- mutate(.data, ...)
  pos_dots <- (ncol(results)) - ((ncol(results) - ncol(.data)) - 1)
  if (!missing(.before) | !missing(.after)){
    results <- reorder_cols(results,
                            all_of(pos_dots):ncol(results),
                            .before = {{.before}},
                            .after = {{.after}})
  }
  return(as_tibble(results, rownames = NA))
}
#' @name utils_rows_cols
#' @export
add_rows <- function(.data, ..., .before = NULL, .after = NULL){
  if(is.character(.before)){
    if(!(.before %in% rownames(.data))){
      .before <- NULL
    }
  }
  if(is.character(.after)){
    if(!(.after %in% rownames(.data))){
      .after <- NULL
    }
  }
  add_row(.data, ..., .before = .before, .after = .after)
}
#' @name utils_rows_cols
#' @export
add_row_id <- function(.data, var = "row_id"){
  if(!has_class(.data, "data.frame")){
    stop("The object '",  match.call()[[".data"]], "' must be a data frame.")
  }
  df <- .data
  add_cols(df, {{var}} := seq_len(nrow(df)), .before = 1)
}
#' @name utils_rows_cols
#' @export
all_pairs <- function(.data, levels){
  levels <-
    get_levels(.data, {{levels}})
  combn(levels, 2) %>%
    t() %>%
    as.data.frame()
}
#' @name utils_rows_cols
#' @export
#' @importFrom dplyr rename_with
add_prefix <- function(.data, ..., prefix, sep = "_"){
  rename_with(.data, ~paste(prefix, ., sep = sep), c(...))
}
#' @name utils_rows_cols
#' @export
add_suffix <- function(.data, ..., suffix, sep = "_"){
  rename_with(.data, ~paste(., suffix, sep = sep), c(...))
}
#' @name utils_rows_cols
#' @export
colnames_to_lower <- function(.data){
  colnames(.data) <- all_lower_case(colnames(.data))
  return(.data)
}
#' @name utils_rows_cols
#' @export
colnames_to_upper <- function(.data){
  colnames(.data) <- all_upper_case(colnames(.data))
  return(.data)
}
#' @name utils_rows_cols
#' @export
colnames_to_title <- function(.data){
  colnames(.data) <- all_title_case(colnames(.data))
  return(.data)
}
#' @name utils_rows_cols
#' @export
column_to_first <-function(.data, ...){
  select_cols(.data, ..., everything())
}
#' @name utils_rows_cols
#' @export
column_to_last <-function(.data, ...){
  select_cols(.data, -c(!!!quos(...)), everything())
}
#' @name utils_rows_cols
#' @export
column_to_rownames <- function(.data, var = "rowname"){
  df <-
    as.data.frame(.data) %>%
    remove_rownames()
  if(!var %in% colnames(df)){
    stop("Variable '", var, "' not in data.", call. = FALSE)
  }
  rownames(df) <- df[[var]]
  df[[var]] <- NULL
  df
}
#' @name utils_rows_cols
#' @export
rownames_to_column <-  function(.data, var = "rowname"){
  col_names <- colnames(.data)
  if (var %in% col_names) {
    stop("Column `", var, "` already exists in `.data`.")
  }
  .data[, var] <- rownames(.data)
  rownames(.data) <- NULL
  .data[, c(var, setdiff(col_names, var))]
}
#' @name utils_rows_cols
#' @export
remove_rownames <- function(.data, ...){
  rownames(.data) <- NULL
  .data
}
#' @name utils_rows_cols
#' @export
column_exists <-function(.data, cols){
  if(length(setdiff(cols, colnames(.data))) != 0){
    FALSE
  } else{
    TRUE
  }
}
#' @name utils_rows_cols
#' @export
concatenate <- function(.data,
                        ...,
                        prefix = NULL,
                        suffix = NULL,
                        new_var = new_var,
                        sep = "_",
                        drop = FALSE,
                        pull = FALSE,
                        .before = NULL,
                        .after  = NULL){

  if (drop == FALSE){
    conc <- select(.data, ...)
    results <- mutate(.data,
                      {{new_var}} := paste(prefix,
                                           ifelse(is.null(prefix), "", sep),
                                           apply(conc, 1, paste, collapse = sep),
                                           ifelse(is.null(suffix), "", sep),
                                           suffix, sep = ""))
    if (pull == TRUE){
      results <- pull(results)
    }
    if (!missing(.before) | !missing(.after)){
      results <- reorder_cols(results, {{new_var}}, .before = {{.before}}, .after = {{.after}})
    }
  } else{
    conc <- select(.data, ...)
    results <- transmute(.data,
                         {{new_var}} := paste(prefix,
                                              ifelse(is.null(prefix), "", sep),
                                              apply(conc, 1, paste, collapse = sep),
                                              ifelse(is.null(suffix), "", sep),
                                              suffix, sep = ""))
    if (pull == TRUE){
      results <- pull(results)
    }
  }
  return(results)
}
#' @name utils_rows_cols
#' @export
get_levels <- function(.data, ...){
  if(missing(...)){
    df <-
      select(.data, everything()) %>% select_non_numeric_cols()
  } else{
    df <- select(.data, ...) %>% select_non_numeric_cols()
  }
  results <-
    df %>%
    as_factor(everything()) %>%
    map(levels)
  if(length(results) == 1){
    return(results[[1]])
  } else{
    return(results)
  }
}
#' @name utils_rows_cols
#' @export
get_levels_comb <- function(.data, ...){
  if(missing(...)){
    df <-
      select(.data, everything()) %>% select_non_numeric_cols()
  } else{
    df <- select(.data, ...) %>% select_non_numeric_cols()
  }
  df %>%
    as_factor() %>%
    map(levels) %>%
    expand.grid() %>%
    as_tibble()
}
#' @name utils_rows_cols
#' @importFrom dplyr count
#' @export
get_level_size <- function(.data, ...){
  return(n_by(.data, ...))
}
#' @name utils_rows_cols
#' @importFrom rlang quos
#' @export
reorder_cols <- function(.data, ..., .before = NULL, .after = NULL){
  return(relocate(.data, ..., .before = {{.before}}, .after = {{.after}}))
}
#' @name utils_rows_cols
#' @export
remove_cols <- function(.data, ...){
  select(.data, -c(!!!quos(...)))
}
#' @name utils_rows_cols
#' @export
remove_rows <- function(.data, ...){
  slice(.data, -c(!!!quos(...)))
}
#' @name utils_rows_cols
#' @export
select_first_col <- function(.data, offset = NULL){
  if(!missing(offset) && offset == 0){
    stop("`offset` must be greater than the 0", call. = FALSE)
  }
  offset <- ifelse(missing(offset), ncol(.data) - 1, ncol(.data) - offset)
  select(.data, last_col(offset = offset))
}
#' @name utils_rows_cols
#' @export
select_last_col <- function(.data, offset = NULL){
  offset <- ifelse(missing(offset), 0, offset)
  select(.data, last_col(offset = offset))
}
#' @name utils_rows_cols
#' @export
select_numeric_cols <- function(.data){
  if(is_grouped_df(.data)){
    .data <- ungroup(.data)
  }
  select_if(.data, is.numeric)
}
#' @name utils_rows_cols
#' @export
select_non_numeric_cols <- function(.data){
  if(is_grouped_df(.data)){
    .data <- ungroup(.data)
  }
  select_if(.data, ~!is.numeric(.x))
}
#' @name utils_rows_cols
#' @export
select_cols <- function(.data, ...){
  select(.data, ...)
}
#' @name utils_rows_cols
#' @export
select_rows <- function(.data, ...){
  slice(.data, ...)
}
#' @name utils_rows_cols
#' @export
tidy_colnames <- function(.data, sep = "_"){
  colnames(.data) <- tidy_strings(colnames(.data), sep = sep)
  .data
}





#' @title Useful functions for computing descriptive statistics
#' @name utils_stats
#' @description
#' `r badge('stable')`
#'
#' * **The following functions compute descriptive statistics by levels of
#' a factor or combination of factors quickly.**
#'    - `cv_by()` For computing coefficient of variation.
#'    - `max_by()` For computing maximum values.
#'    - `mean_by()` For computing arithmetic means.
#'    - `min_by()` For compuing minimum values.
#'    - `n_by()` For getting the length.
#'    - `sd_by()` For computing sample standard deviation.
#'    - `var_by()` For computing sample variance.
#'    - `sem_by()` For computing standard error of the mean.
#'
#' * **Useful functions for descriptive statistics. All of them work
#' naturally with `\%>\%`, handle grouped data and multiple variables (all
#' numeric variables from `.data` by default).**
#'    - `av_dev()` computes the average absolute deviation.
#'    - `ci_mean_t()` computes the t-interval for the mean.
#'    - `ci_mean_z()` computes the z-interval for the mean.
#'    - `cv()` computes the coefficient of variation.
#'    - `freq_table()` Computes a frequency table for either numeric and
#'    categorical/discrete data. For numeric data, it is possible to define the
#'    number of classes to be generated.
#'    - `hmean(), gmean()` computes the harmonic and geometric means,
#' respectively. The harmonic mean is the reciprocal of the arithmetic mean of
#' the reciprocals. The geometric mean is the *n*th root of *n*
#' products.
#'    - `kurt()` computes the kurtosis like used in SAS and SPSS.
#'    - `range_data()` Computes the range of the values.
#'    - `n_valid()` The valid (not `NA`) length of a data.
#'    - `n_unique()` Number of unique values.
#'    - `n_missing()` Number of missing values.
#'    - `row_col_mean(), row_col_sum()` Adds a row with the mean/sum of
#'    each variable and a column with the the mean/sum for each row of the data.
#'    - `sd_amo(), sd_pop()` Computes sample and populational standard
#' deviation, respectively.
#'    - `sem()` computes the standard error of the mean.
#'    - `skew()` computes the skewness like used in SAS and SPSS.
#'    - `ave_dev()` computes the average of the absolute deviations.
#'    - `sum_dev()` computes the sum of the absolute deviations.
#'    - `sum_sq()` computes the sum of the squared values.
#'    - `sum_sq_dev()` computes the sum of the squared deviations.
#'    - `var_amo(), var_pop()` computes sample and populational variance.
#'
#' [desc_stat()] is wrapper function around the above ones and can be
#' used to compute quickly all these statistics at once.
#'
#' @details  The function `freq_table()` computes a frequency table for either
#'   numerical or categorical variables. If a variable is categorical or
#'   discrete (integer values), the number of classes will be the number of
#'   levels that the variable contains.
#'
#'If a variable (say, data) is continuous, the number of classes (k) is given by
#'the square root of the number of samples (n) if `n =< 100` or `5 * log10(n)`
#'if `n > 100`.
#'
#'The amplitude (\mjseqn{A}) of the data is used to define the size of the class (\mjseqn{c}),
#'given by
#'
#' \loadmathjax
#' \mjsdeqn{c = \frac{A}{n - 1}}
#'
#' The lower limit of the first class (LL1) is given by min(data) - c / 2. The
#' upper limit is given by LL1 + c. The limits of the other classes are given in
#' the same way. After the creation of the classes, the absolute and relative
#' frequencies within each class are computed.
#'
#' @param .data A data frame or a numeric vector.
#' @param ... The argument depends on the function used.
#' * For `*_by` functions, `...` is one or more categorical variables
#'  for grouping the data. Then the statistic required will be computed for all
#'  numeric variables in the data. If no variables are informed in `...`,
#'  the statistic will be computed ignoring all non-numeric variables in
#'  `.data`.
#' * For the other statistics, `...` is a comma-separated of unquoted
#'  variable names to compute the statistics. If no variables are informed in n
#'  `...`, the statistic will be computed for all numeric variables in
#'  `.data`.
#' @param .vars Used to select variables in the `*_by()` functions. One or more
#'   unquoted expressions separated by commas. Variable names can be used as if
#'   they were positions in the data frame, so expressions like `x:y` can be
#'   used to select a range of variables. Defaults to `NULL` (all numeric
#'   variables are analyzed)..
#' @param na.rm If `FALSE`, the default, missing values are removed with a
#'   warning. If `TRUE`, missing values are silently removed.
#' @param var The variable to compute the frequency table. See `Details` for
#'   more details.
#' @param k The number of classes to be created. See `Details` for
#'   more details.
#' @param digits The number of significant figures to show. Defaults to 2.
#' @param table A frequency table computed with [freq_table()].
#' @param xlab,ylab The `x` and `y` labels.
#' @param fill,color The color to fill the bars and color the border of the bar,
#'   respectively.
#' @param ygrid Shows a grid line on the `y` axis? Defaults to `TRUE`.
#' freq_hist <- function(table,
#' @param level The confidence level for the confidence interval of the mean.
#'   Defaults to 0.95.
#' @return
#'  * Functions `*_by()` returns a `tbl_df` with the computed statistics by
#'  each level of the factor(s) declared in `...`.
#'  * All other functions return a named integer if the input is a data frame
#'  or a numeric value if the input is a numeric vector.
#'  * `freq_table()` Returns a list with the frequency table and the breaks used
#'  for class definition. These breaks can be used to construct an histogram of
#'  the variable.
#' @md
#' @references Ferreira, Daniel Furtado. 2009. Estatistica Basica. 2 ed. Vicosa,
#'   MG: UFLA.
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @examples
#' \donttest{
#' library(metan)
#' # means of all numeric variables by ENV
#' mean_by(data_ge2, GEN, ENV)
#'
#' # Coefficient of variation for all numeric variables
#' # by GEN and ENV
#' cv_by(data_ge2, GEN, ENV)
#'
#' # Skewness of a numeric vector
#' set.seed(1)
#' nvec <- rnorm(200, 10, 1)
#' skew(nvec)
#'
#' # Confidence interval 0.95 for the mean
#' # All numeric variables
#' # Grouped by levels of ENV
#' data_ge2 %>%
#'   group_by(ENV) %>%
#'   ci_mean_t()
#'
#' # standard error of the mean
#' # Variable PH and EH
#' sem(data_ge2, PH, EH)
#'
#' # Frequency table for variable NR
#' data_ge2 %>%
#'   freq_table(NR)
#'}
#'
#' @export
av_dev <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sum(abs(df - mean(df, na.rm = na.rm)), na.rm = na.rm) / length(which(!is.na(df)))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
ci_mean_t <- function(.data, ..., na.rm = FALSE, level = 0.95) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    qt((0.5 + level/2), (length(which(!is.na(df))) - 1)) * sd(df, na.rm = na.rm)/sqrt(length(which(!is.na(df))))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
ci_mean_z <- function(.data, ..., na.rm = FALSE, level = 0.95) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    qnorm((0.5 + level/2)) * sd(df, na.rm = na.rm)/sqrt(length(which(!is.na(df))))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
cv <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sd(df, na.rm = na.rm)/mean(df, na.rm = na.rm) * 100
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
freq_table <- function(.data, var, k = NULL, digits = 3){
  var <- rlang::ensym(var)
  if(is_grouped_df(.data)){
    res <-
      metan::doo(.data,
                 ~freq_table(., {{var}}, k = k, digits = digits))
    freqs <-
      res %>%
      mutate(freqs = map(data, ~.x %>% .[["freqs"]])) %>%
      unnest(freqs) %>%
      remove_cols(data)
    breaks <-
      res %>%
      mutate(freqs = map(data, ~.x %>% .[["breaks"]])) %>%
      remove_cols(data)
    return(list(freqs = freqs,
                breaks = breaks))

  } else{
    # function to create a frequence table with continuous variable
    # adapted from https://bendeivide.github.io/book-epaec/book-epaec.pdf
    freq_quant <- function(data, k = NULL, digits = digits){
      # the number of observations
      n <- length(data)

      # check the number of classes
      if(is.null(k)){
        if (n > 100) {
          k <- round(5 * log10(n), 0)
        } else{
          k <- round(sqrt(n), 0)
        }
      } else{
        k <- k
      }
      # data range
      rang <- range(data)
      # amplitude
      A <- diff(rang)
      # the size of the class
      c <- round(A / (k - 1), digits = digits)

      # lower and upper limit of the first class
      LI1 <- min(rang) - c / 2
      vi <- c(LI1,     rep(0, k - 1))
      vs <- c(LI1 + c, rep(0, k - 1))

      # build the other classes
      for (i in 2:k) {
        vi[i] <- vi[i - 1] + c
        vs[i] <- vs[i - 1] + c
      }
      vi <- round(vi, digits = digits)
      vs <- round(vs, digits = digits)
      # Find the frequency of each class
      freq <- function(x, vi, vs, k) {
        freq <- rep(0, k)
        for (i in 1:(k - 1)) {
          freq[i] <- length(x[x >= vi[i] & x < vs[i]])
        }
        freq[k] <- length(x[x >= vi[k] & x <= vs[k]])
        return(freq)
      }

      # absolute frequency
      fi <- freq(data, vi, vs, k)
      # check if any class is empty
      if(any(fi == 0)){
        warning("An empty class is not advised. Try to reduce the number of classes with the `k` argument", call. = FALSE)
      }
      # building the classes
      classe <- paste(vi, "|--- ", vs)
      classe[k] <- paste(vi[k], "|---|", vs[k])
      freqs <-
        data.frame(class = classe,
                   abs_freq = fi) %>%
        mutate(abs_freq_ac = cumsum(abs_freq),
               rel_freq = abs_freq / sum(abs_freq),
               rel_freq_ac = cumsum(rel_freq))
      freqs[nrow(freqs) + 1, ] <- c("Total", sum(freqs[, 2]), sum(freqs[, 2]), 1, 1)
      freqs <-
        freqs %>%
        as_numeric(2:5) %>%
        round_cols(digits = digits)

      breaks <- sort(c(vi, vs))
      return(
        structure(
          list(freqs = freqs,
               LL = vi,
               UL = vs,
               vartype = "continuous"),
          class = "freq_table"
        )
      )
    }

    #check the class of the variable
    class_data <- .data %>% pull({{var}}) %>% class()
    # if variable is discrete or categorical
    if(class_data %in% c("character", "factor", "integer")){
      df <-
        .data %>%
        count({{var}}) %>%
        as_character(1) %>%
        mutate(abs_freq = n,
               abs_freq_ac = cumsum(abs_freq),
               rel_freq = abs_freq / sum(abs_freq),
               rel_freq_ac = cumsum(rel_freq)) %>%
        remove_cols(n) %>%
        as.data.frame() %>%
        round_cols(digits = digits)
      df[nrow(df) + 1, ] <- c("Total", sum(df[, 2]), sum(df[, 2]), 1, 1)
      df <- df %>% as_numeric(2:5)
      return(
        structure(
          list(freqs = df,
               vartype = "categorical"),
          class = "freq_table"
        )
      )
    }
    # if variable is numeric
    if(class_data == "numeric"){
      data <- .data %>% pull({{var}})
      # apply the function freq_quant in the numeric vector
      freq_quant(data, k = k, digits = digits)
    }
  }
}

#' @name utils_stats
#' @export
#' @importFrom graphics axis barplot grid plot.new plot.window rect title
freq_hist <- function(table,
                      xlab = NULL,
                      ylab = NULL,
                      fill = "gray",
                      color = "black",
                      ygrid = TRUE) {
  if (!inherits(table, "freq_table")){
    stop("Class of object `table` is not valid. Please use `freq_table()` to create a valid object.")
  }
  if (table$vartype == "categorical") {

    classes <- as.character(table$freqs[[1]])
    classes <- classes[-length(classes)]
    freqs <- table$freqs[[2]]
    freqs <- freqs[-length(freqs)]

    if (is.null(xlab)) {
      xlab <- gettext("Groups")
    }
    if (is.null(ylab)) {
      ylab <- gettext("Frequency")
    }
    barplot(freqs ~ classes, xaxt = "n", xlab = "", ylab = "")

    if(isTRUE(ygrid)){
      grid(nx = NA, ny = NULL, col = "gray")
      opar <- par(new = TRUE)
      on.exit(par(opar))
    }
    barplot(freqs ~ classes,
            xlab = xlab,
            ylab = ylab)
  }
  if (table$vartype == "continuous") {
    xvar1 <- table$LL
    xvar2 <- table$UL
    freqs <- table$freqs$abs_freq
    yvar <- freqs[-length(freqs)]
    # Limiares
    xlim <- c(min(xvar1), max(xvar2))
    ylim <- c(0, 1.2 * max(yvar))

    # Area de plotagem
    plot.new()
    plot.window(xlim, ylim)

    # Labels
    if (is.null(xlab)) {
      xlab <- gettext("Classes")
    }
    if (is.null(ylab)) {
      ylab <- gettext("Frequency")
    }

    title(xlab = xlab, ylab = ylab)

    if(isTRUE(ygrid)){
      grid(nx = NA, ny = NULL, col = "gray")
      opar <- par(new = TRUE)
    }

    rect(xvar1,
         0,
         xvar2,
         yvar,
         col = fill,
         border = color)
    xvar <- c(xvar1, max(xvar2))
    axis(1, at = xvar)
    axis(2)
  }
}
#' @name utils_stats
#' @export
hmean <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    1 / mean(1 / df, na.rm = na.rm)
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
gmean <- function(.data, ..., na.rm = FALSE){
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    exp(sum(log(df[df > 0]), na.rm = na.rm) / length(df))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
kurt <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    n <- length(which(!is.na(df)))
    tmp <- sum((df - mean(df, na.rm = na.rm))^4, na.rm = na.rm)/(var(df, na.rm = na.rm))^2
    n * (n + 1) * tmp/((n - 1) * (n - 2) * (n - 3)) - 3 * (n - 1)^2/((n - 2) * (n - 3))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
n_missing <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    sum(is.na(df))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(everything(), funct)) %>%
        ungroup()
    } else{
      .data %>%
        summarise(across(c(...), funct))
    }
  }
}
#' @name utils_stats
#' @export
n_unique <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    length(unique(df))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(everything(), funct)) %>%
        ungroup()
    } else{
      .data %>%
        summarise(across(c(...), funct))
    }
  }
}
#' @name utils_stats
#' @export
n_valid <- function(.data, ..., na.rm = FALSE){
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    length(which(!is.na(df)))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
pseudo_sigma <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    IQR(df, na.rm = na.rm) / 1.35
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
range_data <- function(.data, ..., na.rm = FALSE){
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    max(df, na.rm = na.rm) - min(df, na.rm = na.rm)
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
row_col_mean <- function(.data, na.rm = FALSE) {
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(!any(sapply(.data, is.numeric))){
    stop("All columns in '.data' must be numeric")
  }
  mat <- as.matrix(.data)

  if(is.null(rownames(mat))){
    row_names <- 1:nrow(mat)
  } else{
    row_names <- rownames(mat)
  }
  row_mean <- rowMeans(mat, na.rm = na.rm)
  col_mean <- colMeans(mat, na.rm = na.rm)
  cmeans <- suppressWarnings(cbind(mat,  row_mean) %>% rbind(col_mean))
  rownames(cmeans) <- c(row_names, "col_mean")
  cmeans[nrow(cmeans), ncol(cmeans)] <- mean(mat, na.rm = na.rm)
  return(cmeans)
}
#' @name utils_stats
#' @export
row_col_sum <- function(.data, na.rm = FALSE) {
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(!any(sapply(.data, is.numeric))){
    stop("All columns in '.data' must be numeric")
  }
  mat <- as.matrix(.data)
  if(is.null(rownames(mat))){
    row_names <- 1:nrow(mat)
  } else{
    row_names <- rownames(mat)
  }
  row_sum <- rowSums(mat, na.rm = na.rm)
  col_sum <- colSums(mat, na.rm = na.rm)
  cmeans <- suppressWarnings(cbind(mat,  row_sum) %>% rbind(col_sum))
  rownames(cmeans) <- c(row_names, "col_sum")
  cmeans[nrow(cmeans), ncol(cmeans)] <- sum(mat, na.rm = na.rm)
  return(cmeans)
}
#' @name utils_stats
#' @export
sd_amo <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sqrt(sum((df - mean(df, na.rm = na.rm))^2, na.rm = na.rm) / (length(which(!is.na(df))) - 1))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
sd_pop <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sqrt(sum((df - mean(df, na.rm = na.rm))^2, na.rm = na.rm) / length(which(!is.na(df))))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
sem <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sd(df, na.rm = na.rm) / sqrt(length(which(!is.na(df))))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
skew <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    n <- length(which(!is.na(df)))
    sum((df - mean(df, na.rm = na.rm))^3, na.rm = na.rm)/sd(df, na.rm = na.rm)^3 * n / ((n - 1) * (n - 2))
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
sum_dev <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sum(abs(df - mean(df, na.rm = na.rm)), na.rm = na.rm)
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
ave_dev <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    mean(abs(df - mean(df, na.rm = na.rm)), na.rm = na.rm)
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
sum_sq_dev <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sum((df - mean(df, na.rm = na.rm))^2, na.rm = na.rm)
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}

#' @name utils_stats
#' @export
sum_sq <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sum(df ^ 2, na.rm = na.rm)
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
var_pop <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sd_pop(df, na.rm = na.rm)^2
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}
#' @name utils_stats
#' @export
var_amo <- function(.data, ..., na.rm = FALSE) {
  funct <- function(df){
    if(na.rm == FALSE & has_na(df)){
      warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
      message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
      na.rm <- TRUE
    }
    sd_amo(df, na.rm = na.rm)^2
  }
  if(is.null(nrow(.data))){
    funct(.data)
  } else{
    if(missing(...)){
      .data %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    } else{
      .data %>%
        select_cols(group_vars(.), ...) %>%
        summarise(across(where(is.numeric), funct)) %>%
        ungroup()
    }
  }
}



# main statistics, possible by one or more factors
#' @name utils_stats
#' @export
cv_by <- function(.data,
                  ...,
                  .vars = NULL,
                  na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), cv, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, cv, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}
#' @name utils_stats
#' @export
max_by <- function(.data,
                   ...,
                   .vars = NULL,
                   na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), max, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, max, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}

#' @name utils_stats
#' @export
min_by <- function(.data,
                   ...,
                   .vars = NULL,
                   na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), min, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, min, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}

#' @name utils_stats
#' @export
means_by <- function(.data,
                     ...,
                     .vars = NULL,
                     na.rm = FALSE){
  deprecated_warning("1.17.0", "metan::mean_by()", "metan::mean_by()")
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), mean, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, mean, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}
#' @name utils_stats
#' @export
mean_by <- function(.data,
                    ...,
                    .vars = NULL,
                    na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), mean, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, mean, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}


#' @name utils_stats
#' @export
n_by <- function(.data,
                 ...,
                 .vars = NULL,
                 na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(everything(), ~sum(!is.na(.))), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, ~sum(!is.na(.))), .groups = "drop") %>%
      ungroup()
  }

}
#' @name utils_stats
#' @export
sd_by <- function(.data,
                  ...,
                  .vars = NULL,
                  na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), sd, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, sd, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}
#' @name utils_stats
#' @export
var_by <- function(.data,
                   ...,
                   .vars = NULL,
                   na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), var, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, var, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}
#' @name utils_stats
#' @export
sem_by <- function(.data,
                   ...,
                   .vars = NULL,
                   na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), sem, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, sem, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}
#' @name utils_stats
#' @export
sum_by <- function(.data,
                   ...,
                   .vars = NULL,
                   na.rm = FALSE){
  if(na.rm == FALSE & has_na(.data)){
    warning("NA values removed to compute the function. Use 'na.rm = TRUE' to suppress this warning.", call. = FALSE)
    message("To remove rows with NA use `remove_rows_na()'. \nTo remove columns with NA use `remove_cols_na()'.")
    na.rm <- TRUE
  }
  if(missing(.vars)){
    group_by(.data, ...) %>%
      summarise(across(where(is.numeric), sum, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  } else{
    group_by(.data, ...) %>%
      summarise(across({{.vars}}, sum, na.rm = na.rm), .groups = "drop") %>%
      ungroup()
  }
}





#' @title Utilities for handling with matrices
#' @description
#' `r badge('stable')`
#'
#'  These functions help users to make upper, lower, or symmetric matrices
#'  easily.
#'
#' @name utils_mat
#' @param x A matrix to apply the function. It must be a symmetric (square)
#'   matrix in `make_upper_tri()` and `make_lower_tri()` or a
#'   triangular matrix in `make_sym()`. `tidy_sym()` accepts both
#'   symmetrical or triangular matrices.
#' @param lower A square matrix to fill the lower diagonal of the new matrix.
#' @param upper A square matrix to fill the upper diagonal of the new matrix.
#' @param diag What show in the diagonal of the matrix. Default to `NA`.
#' @param make The triangular to built. Default is `"upper"`. In this case,
#'   a symmetric matrix will be built based on the values of a lower triangular
#'   matrix.
#' @param keep_diag Keep diagonal values in the tidy data frame? Defaults to
#'   `TRUE`.
#' @details
#' * `make_upper_tri()` makes an upper triangular matrix using a symmetric
#' matrix.
#' * `make_lower_tri()` makes a lower triangular matrix using a symmetric
#' matrix.
#' * `make_sym()` makes a lower triangular matrix using a symmetric matrix.
#' * `tidy_sym()` transform a symmetric matrix into tidy data frame.
#' @md
#' @return An upper, lower, or symmetric matrix, or a tidy data frame.
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @export
#' @examples
#' \donttest{
#' library(metan)
#' m <- cor(select_cols(data_ge2, 5:10))
#' make_upper_tri(m)
#' make_lower_tri(m)
#' make_lower_tri(m) %>%
#' make_sym(diag = 0)
#' tidy_sym(m)
#' tidy_sym(make_lower_tri(m))
#'
#' }
#'
make_upper_tri<-function(x, diag = NA){
  x[lower.tri(x)] <- NA
  diag(x) <- diag
  return(x)
}
#' @name utils_mat
#' @export
make_lower_tri<-function(x, diag = NA){
  x[upper.tri(x)] <- NA
  diag(x) <- diag
  return(x)
}
#' @name utils_mat
#' @export
make_lower_upper<-function(lower, upper, diag = NA){
  if(any(rownames(upper) == rownames(lower)) == FALSE){
    stop("Row names of 'upper' and 'lower' don't match.")
  }
  if(any(colnames(upper) == colnames(lower)) == FALSE){
    stop("Column names of 'upper' and 'lower' don't match.")
  }
  if (nrow(lower) != ncol(lower)) {
    stop("lower matrix must be square")
  }
  if (nrow(upper) != ncol(upper)) {
    stop("upper matrix must be square")
  }
  if (nrow(lower) != ncol(upper)) {
    stop("lower and upper matrices must have the same dimensions")
  }
  result <- matrix(NA, nrow = nrow(upper), ncol = ncol(upper))
  result[lower.tri(result)] <- t(lower)[lower.tri(t(lower))]
  result[upper.tri(result)] <- t(upper)[upper.tri(t(upper))]
  diag(result) <- diag
  rownames(result) <- colnames(result) <- rownames(lower)
  return(result)
}
#' @name utils_mat
#' @export
make_sym <- function(x, make = "upper", diag = NA) {
  if(make == "upper"){
    x[upper.tri(x)] <- t(x)[upper.tri(x)]
    diag(x) <- diag
  }
  if (make == "lower"){
    x[lower.tri(x)] <- t(x)[lower.tri(x)]
    diag(x) <- diag
  }
  return(x)
}
#' @name utils_mat
#' @export
tidy_sym <- function(x, keep_diag = TRUE){
  res <-
    x %>%
    as_tibble(rownames = "group1") %>%
    pivot_longer(names_to = "group2",
                 values_to = "value",
                 -group1) %>%
    remove_rows_na(verbose = FALSE) %>%
    arrange(group1)
  if(keep_diag == FALSE){
    res %<>%  remove_rows(which(res[1] == res[2]))
  }
  return(res)
}
#'Alternative to dplyr::do for doing anything
#'
#'
#' Provides an alternative to the `dplyr:do()` using `nest()`,
#' `mutate()` and `map()` to apply a function to a grouped data frame.
#'
#'  If the applied function returns a data frame, then the output will be
#'  automatically unnested. Otherwise, the output includes the grouping
#'  variables and a column named "data" , which is a "list-columns" containing
#'  the results for group combinations.
#'
#'@param .data a (grouped) data frame
#'@param .fun A function, formula, or atomic vector.
#'@param ... Additional arguments passed on to `.fun`
#'@param unnest Logical argument defaults to `TRUE` to control if results
#'  of `.fun` should be unnested. Valid only if the result is of class
#'  `data.frame` or `tbl_df`.
#' @export
#'@return a data frame
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#'@importFrom purrr map
#'@importFrom tidyr nest unnest
#'@examples
#'\donttest{
#'library(metan)
#'# Head the first two lines of each environment
#'data_ge2 %>%
#'  group_by(ENV) %>%
#'  doo(~head(., 2))
#'
#' # Genotype analysis for each environment using 'gafem()'
#' # variable PH
#' data_ge2 %>%
#'   group_by(ENV) %>%
#'   doo(~gafem(., GEN, REP, PH, verbose = FALSE))
#'}
doo <- function(.data, .fun, ..., unnest = TRUE){
  if(is_grouped_df(.data)){
    out <- nest(.data)
  } else{
    out <- nest(.data, data = everything())
  }
  out %<>%
    ungroup() %>%
    mutate(data = purrr::map(.data$data, droplevels)) %>%
    mutate(data = purrr::map(.data$data, .fun, ...))
  if(has_class(out$data[[1]], c("tbl_df", "data.frame")) & unnest == TRUE){
    out <- unnest(out, cols = c(data))
  }
  if(is_grouped_df(.data)){
    out <- arrange(out, !!!syms(group_vars(.data)))
  }
  return(out)
}

#' @title Utilities for handling with classes
#' @name utils_class
#' @param x An object
#' @param class The class to add or remove
#' @details
#' * `add_class()`: add a class to the object `x` keeping all the other class(es).
#' * `has_class()`: Check if a class exists in object `x` and returns a logical value.
#' * `set_class()`: set a class to the object `x`.
#' * `remove_class()`: remove a class from the object `x`.
#' @md
#' @return The object `x` with the class added or removed.
#' @export
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#'@examples
#'\donttest{
#'library(metan)
#'df <-
#' data_ge2 %>%
#' add_class("my_class")
#'class(df)
#'has_class(df, "my_class")
#'remove_class(df, "my_class") %>% class()
#'set_class(df, "data_frame") %>% class()
#'}
#'
add_class <- function(x, class){
  class(x) <- unique(c(class(x), class))
  return(x)
}
#' @name utils_class
#' @export
has_class <- function(x, class){
  any(class(x)  %in%  class)
}
#' @name utils_class
#' @export
remove_class <- function(x, class){
  if(!class %in% class(x)){
    stop("Class not found in object ", match.call()[["x"]], call. = FALSE)
  }
  class(x) <- setdiff(class(x), class)
  return(x)
}
#' @name utils_class
#' @export
set_class <- function(x, class){
  class(x) <- class
  return(x)
}

#' Generate significance stars from p-values
#'
#' Generate significance stars from p-values using R's standard definitions.
#'
#' Mapping from p_value ranges to symbols:
#' * **0 - 0.0001**: '****'
#' * **0.0001 - 0.001**: '***'
#' * **0.001 - 0.01**: '**'
#' * **0.01 - 0.05**: '*'
#' * **0.05 - 1.0**: 'ns'
#' @md
#' @param p_value A numeric vector of p-values
#'
#' @return A character vector containing the same number of elements as p-value,
#'   with an attribute "legend" providing the conversion pattern.
#' @export
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @examples
#'\donttest{
#' p_vals <- c(0.01, 0.043, 0.1, 0.0023, 0.000012)
#' stars_pval(p_vals)
#' }
#'
stars_pval <- function(p_value){
  unclass(
    symnum(
      p_value,
      corr = FALSE,
      na = FALSE,
      cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05,  1),
      symbols = c("****", "***", "**", "*",  "ns")
    )
  )
}



#' Check if a data set is balanced
#'
#' Check if a data set coming from multi-environment trials is balanced, i.e.,
#' all genotypes are in all environments.
#' @param .data The dataset containing the columns related to Environments,
#'   Genotypes, replication/block and response variable(s).
#' @param env The name of the column that contains the levels of the
#'   environments.
#' @param gen The name of the column that contains the levels of the genotypes.
#' @param resp The response variable.
#' @return A logical value
#' @export
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @examples
#'\donttest{
#' unb <- data_ge %>%
#'         remove_rows(1:3) %>%
#'         droplevels()
#' is_balanced_trial(data_ge, ENV, GEN, GY)
#' is_balanced_trial(unb, ENV, GEN, GY)
#' }
#'

is_balanced_trial <- function(.data, env, gen, resp){
  mat <-
    .data %>%
    make_mat({{env}}, {{gen}}, {{resp}})
  if(has_na(mat) == TRUE){
    return(FALSE)
  } else{
    return(TRUE)
  }
}



#' Utilities for data Copy-Pasta
#' @name utils_data
#' @description
#' `r badge('stable')`
#'
#' These functions allows interacting with the system clipboard. It is possible
#' read from the clipboard or write a data frame or matrix to the clipboard.
#' * `clip_read()` read data from the clipboard.
#' * `clip_write()` write data to the clipboard.
#'
#' @param .data The data that should be copied to the clipboard. Only data frames and matrices are allowed
#' @param header If the copied data has a header row for dataFrame, defaults to
#'   `TRUE`.
#' @param sep The separator which should be used in the copied output.
#' @param row_names Decides if the output should keep row names or not, defaults
#'   to `FALSE`.
#' @param col_names Decides if the output should keep column names or not,
#'   defaults to `TRUE`.
#' @md
#' @param ... Further arguments to be passed to [utils::read.table()].
#' @export
#' @importFrom utils read.table write.table
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @return Nothing
#'
clip_read <- function(header = TRUE, sep = "\t", ...){
  os <- get_os()
  if(os == "windows"){
    df <- read.table("clipboard", sep = sep, header = header, ...)
  }
  if(os == "osx"){
    df <- read.table(pipe("pbpaste"), sep = sep, header = header, ...)
  }
  if(os == "linux"){
    if (!file.exists(Sys.which("xclip")[1L])){
      stop("Cannot find xclip. Try installing it with 'sudo apt-get install xclip'")
    }
    df <- read.table(pipe("xclip -selection clipboard -o", open = "r"), ...)
  }
  return(as_tibble(df))
}
#' @name utils_data
#' @export
clip_write <- function(.data,
                       sep = "\t",
                       row_names = FALSE,
                       col_names = TRUE, ...){
  os <- get_os()
  if(os == "windows"){
    if(!has_class(.data , c("data.frame", "matrix"))){
      stop("Only data frames/tibbles/matrices allowed.")
    }
    write.table(.data,
                "clipboard",
                sep = sep,
                row.names = row_names,
                col.names = col_names,
                ...)
    message("Object '", match.call()[".data"], "' copied to the clipboard")
  }
  if(os == "osx"){
    if(!has_class(.data , c("data.frame", "matrix"))){
      stop("Only data frames/tibbles/matrices allowed.")
    }
    write.table(.data,
                file = pipe("pbcopy"),
                sep = sep,
                row.names = row_names,
                col.names = col_names,
                ...)
    message("Object '", match.call()[".data"], "' copied to the clipboard")
  }
  if(os == "linux"){
    if(!has_class(.data , c("data.frame", "matrix"))){
      stop("Only data frames/tibbles/matrices allowed.")
    }
    if (!file.exists(Sys.which("xclip")[1L])){
      stop("Cannot find xclip. Try installing it with 'sudo apt-get install xclip'")
    }
    write.table(.data,
                file = pipe("xclip -selection clipboard -i", open="w"),
                sep = sep,
                row.names = row_names,
                col.names = col_names,
                ...)
  }
}



### For internal use only ##
# Check labels
check_labels <- function(.data){
  if(any(sapply(.data, grepl, pattern = ":"))){
    stop("Using ':' in genotype or environment labels is not allowed. Use '_' instead.\ne.g., replace_string(data, ENV, pattern = ':', replacement = '_', new_var = ENV)", call. = FALSE)
  }
}
# Case
helper_case <- function(.data, fun, ...){
  if (has_class(.data, c("data.frame","tbl_df", "data.table"))){
    .data <- as.data.frame(.data)
    if(!missing(...)){
      mutate(.data, across(c(...), fun)) %>%
        as_tibble(rownames = NA)
    } else{
      mutate(.data, across(where(~!is.numeric(.x)), fun)) %>%
        as_tibble(rownames = NA)
    }
  } else{
    fun(.data)
  }
}
first_upper <- function(x){
  x <- tolower(x)
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
  return(x)
}
to_title <- function(x){
  gsub("(^|[[:space:]])([[:alpha:]])", "\\1\\U\\2",
       all_lower_case(x),
       perl = TRUE)
}


# Get the OS
get_os <- function(){
  sysinf <- Sys.info()
  if (!is.null(sysinf)){
    os <- sysinf['sysname']
    if (os == 'Darwin')
      os <- "osx"
  } else {
    os <- .Platform$OS.type
    if (grepl("^darwin", R.version$os)){
      os <- "osx"
    }
    if (grepl("linux-gnu", R.version$os)){
      os <- "linux"
    }
  }
  return(all_lower_case(os))
}
# Coord radar for mtsi and mgidi indexes
coord_radar <- function (theta = "x", start = 0, direction = 1) {
  theta <- match.arg(theta, c("x", "y"))
  r <- ifelse(theta == "x", "y", "x")
  ggproto("CoordRadar", CoordPolar, theta = theta, r = r, start = start,
          direction = sign(direction),
          is_linear = function(coord) TRUE)
}
# check for data frames in lists
has_df_in_list <- function(x){
  !any(
    is.na(
      map(x, class) %>%
        lapply(function(x){
          match('data.frame', x)
        })
    )
  )
}
# check if all are dataframes
all_df_in_list <- function(x){
  !all(
    is.na(
      map(x, class) %>%
        lapply(function(x){
          match('data.frame', x)
        })
    )
  )
}
# convert seconds to a hh:mm:ss format.
sec_to_hms <- function(t){
  paste(formatC(t %/% (60*60) %% 24, width = 2, format = "d", flag = "0"),
        formatC(t %/% 60 %% 60, width = 2, format = "d", flag = "0"),
        formatC(t %% 60, width = 2, format = "d", flag = "0"),
        sep = ":"
  )
}
# Embed a lifecycle badge in documentation
# Adapted from
badge <- function(stage){
  stages <- c("experimental", "stable", "superseded", "deprecated")
  if(!stage %in% stages){
    stop("'stage' must be one of the ", stages)
  }
  url <- paste0("https://lifecycle.r-lib.org/articles/stages.html#",
                stage)
  html <- sprintf("\\href{%s}{\\figure{%s}{options: alt='[%s]'}}",
                  url, file.path(sprintf("lifecycle-%s.svg", stage)),
                  first_upper_case(stage))
  text <- sprintf("\\strong{[%s]}", first_upper_case(stage))
  sprintf("\\ifelse{html}{%s}{%s}", html, text)
}


# Tools for deprecating functions and arguments
build_msg <- function(when, what, with, message){
  if(!grepl("\\(", what) && !grepl("\\)", what)){
    stop("must have function call syntax")
  }
  get_arg <- function(x){
    gsub("[\\(\\)]", "", regmatches(x, gregexpr("\\(.*?\\)", x))[[1]])
  }
  pkg <- ifelse(grepl("::", what), sub("::.*", "", what), ".")
  funct <- paste(sub("\\(.*", "", sub(".*::", "", what)), "()", sep = "")
  arg <- ifelse(!grepl("\\()\\b", what), get_arg(what) %>% remove_space(), NA)
  if(!is.na(arg)){
    msge <- paste0("Argument `", arg, "` of `", funct, "` is deprecated as of ", pkg, " ", when, ".")
    if(!is.null(with)){
      if(!grepl("\\(", with) && !grepl("\\)", with)){
        stop("'with' must have function call syntax", call. = FALSE)
      }
      pkg_with <- ifelse(grepl("::", with), sub("::.*", "", with), ".")
      funct_with <- paste(sub("\\(.*", "", sub(".*::", "", with)), "()", sep = "")
      with_mesg <-
        case_when(pkg_with == pkg & funct_with ==  funct ~ paste0("Please use the `", get_arg(with), "` argument instead."),
                  pkg_with == pkg & funct_with !=  funct ~ paste0("Please use the `", get_arg(with), "` argument of `", funct_with, "` instead." ),
                  pkg_with != pkg  & pkg_with == "." ~ paste0("Please use the `", get_arg(with), "` argument of `", funct_with, "` instead."),
                  pkg_with != pkg ~ paste0("Please use the `", get_arg(with), "` argument of `", paste0(pkg_with, "::", funct_with), "` instead."))
      msge <- paste0(msge, "\n", with_mesg)
    }
    msge <- ifelse(is.null(message), msge, paste0(msge, "\n", message))
  } else{
    msge <-  paste0("`",funct, "` is deprecated as of ", pkg, " ", when)
    if(!is.null(with)){
      if(!grepl("\\(", with) && !grepl("\\)", with)){
        stop("'with' must have function call syntax", call. = FALSE)
      }
      pkg_with <- ifelse(grepl("::", with), sub("::.*", "", with), NA)
      funct_with <- paste(sub("\\(.*", "", sub(".*::", "", with)), "()", sep = "")
      with_mesg <-
        case_when(pkg_with == pkg ~ paste0("Please use `", funct_with, "` instead"),
                  pkg_with != pkg ~ paste0("Please use the function `", funct_with, "` of the `",pkg_with, "` package."))
      with_mesg <- ifelse(is.null(message), with_mesg, paste(message))
      msge <- paste(msge, "\n", with_mesg, sep = "")
    }
    msge <- ifelse(is.null(message), msge, paste0(msge, "\n", message))
  }
  return(msge)
}
deprecated_warning <- function(when, what, with = NULL, message = NULL){
  warning(build_msg(when, what, with, message), call. = FALSE)
}
deprecated_error <- function(when, what, with = NULL, message = NULL){
  stop(build_msg(when, what, with, message), call. = FALSE)
}
deprecated <- function(){
  "missing_arg"
}
is_present <- function(x){
  x != "missing_arg"
}




#' Set and get the Working Directory quicky
#' @description
#' `r badge('experimental')`
#' * [get_wd_here()] gets the working directory to the path of the current script.
#' * [set_wd_here()] sets the working directory to the path of the current script.
#' * [open_wd_here()] Open the File Explorer at the directory path of the current script.
#' * [open_wd()] Open the File Explorer at the current working directory.
#'
#' @param path Path components below the project root. Defaults to `NULL`. This means that
#'   the directory will be set to the path of the file. If the path doesn't exist, the
#'   user will be asked if he wants to create such a folder.
#' @return
#' * [get_wd_here()] returns a full-path directory name.
#' * [get_wd_here()] returns a message showing the current working directory.
#' * [open_wd_here()] Opens the File Explorer of the path returned by `get_wd_here()`.
#' @export
#' @importFrom utils install.packages menu
#' @name utils_wd
#' @examples
#'
#' \dontrun{
#' get_wd_here()
#' set_wd_here()
#' open_wd_here()
#' }
set_wd_here <- function(path = NULL){
  if(!requireNamespace("rstudioapi", quietly = TRUE)) {
    if(interactive() == TRUE){
      inst <-
        switch(menu(c("Yes", "No"), title = "Package {rstudioapi} required but not installed.\nDo you want to install it now?"),
               "yes", "no")
      if(inst == "yes"){
        install.packages("rstudioapi", quiet = TRUE)
      } else{
        message("To use `set_wd_here()`, first install {rstudioapi}.")
      }
    }
  } else{
    dir_path <- dirname(rstudioapi::documentPath())
    if(!is.null(path)){
      dir_path <- paste0(dir_path, "/", path)
    }
    d <- try(setwd(dir_path), TRUE)
    if(inherits(d, "try-error")){
      cat(paste0("Cannot change working directory to '", dir_path, "'."))
      done <- readline(prompt = "Do you want to create this folder now? (y/n) ")
      if(done == "y"){
        dir.create(dir_path)
        message("Directory '", dir_path, "' created.")
        setwd(dir_path)
        message("Working directory set to '", dir_path, "'")
      }
    } else{
      message("Working directory set to '", dir_path, "'")
    }
  }
}

#' @export
#' @name utils_wd
get_wd_here <- function(path = NULL){
  if(!requireNamespace("rstudioapi", quietly = TRUE)) {
    if(interactive() == TRUE){
      inst <-
        switch(menu(c("Yes", "No"), title = "Package {rstudioapi} required but not installed.\nDo you want to install it now?"),
               "yes", "no")
      if(inst == "yes"){
        install.packages("rstudioapi", quiet = TRUE)
      } else{
        message("To use `get_wd_here()`, first install {rstudioapi}.")
      }
    }
  } else{
    dir_path <- dirname(rstudioapi::documentPath())
    if(!is.null(path)){
      dir_path <- paste0(dir_path, "/", path)
    }
    dir_path
  }
}
#' @export
#' @name utils_wd
open_wd_here <- function(path = get_wd_here()){
  if(!requireNamespace("utils", quietly = TRUE)) {
    if(interactive() == TRUE){
      inst <-
        switch(menu(c("Yes", "No"), title = "Package {utils} required but not installed.\nDo you want to install it now?"),
               "yes", "no")
      if(inst == "yes"){
        install.packages("utils", quiet = TRUE)
      } else{
        message("To use `open_wd_here()`, first install {utils}.")
      }
    }
  } else{
    utils::browseURL(url = path)
  }
}

#' @export
#' @name utils_wd
open_wd <- function(path = getwd()){
  if(!requireNamespace("utils", quietly = TRUE)) {
    if(interactive() == TRUE){
      inst <-
        switch(menu(c("Yes", "No"), title = "Package {utils} required but not installed.\nDo you want to install it now?"),
               "yes", "no")
      if(inst == "yes"){
        install.packages("utils", quiet = TRUE)
      } else{
        message("To use `open_wd()`, first install {utils}.")
      }
    }
  } else{
    utils::browseURL(url = path)
  }
}
TiagoOlivoto/WAASB documentation built on April 30, 2024, 6:15 p.m.