R/group_dt.R

Defines functions group_dt

Documented in group_dt

#' @title Data manipulation within groups
#' @description Carry out data manipulation within specified groups.
#' @param .data A data.frame
#' @param by Variables to group by,unquoted name of grouping variable of list of unquoted names of grouping variables.
#' @param ... Any data manipulation arguments that could be implemented on a data.frame.
#' @return data.table
#' @details If you want to use \code{summarise_dt} and \code{mutate_dt} in
#' \code{group_dt}, it is better to use the "by" parameter in those functions,
#' that would be much faster because you don't have to use \code{.SD} (which takes
#' extra time to copy).
#' @references https://stackoverflow.com/questions/36802385/use-by-each-row-for-data-table
#' @examples
#' iris %>% group_dt(by = Species,slice_dt(1:2))
#' iris %>% group_dt(Species,filter_dt(Sepal.Length == max(Sepal.Length)))
#' iris %>% group_dt(Species,summarise_dt(new = max(Sepal.Length)))
#'
#' # you can pipe in the `group_dt`
#' iris %>% group_dt(Species,
#'                   mutate_dt(max= max(Sepal.Length)) %>%
#'                     summarise_dt(sum=sum(Sepal.Length)))
#'
#' # for users familiar with data.table, you can work on .SD directly
#' # following codes get the first and last row from each group
#' iris %>%
#'   group_dt(
#'     by = Species,
#'     rbind(.SD[1],.SD[.N])
#'   )
#'
#' #' # for summarise_dt, you can use "by" to calculate within the group
#' mtcars %>%
#'   summarise_dt(
#'    disp = mean(disp),
#'    hp = mean(hp),
#'    by = cyl
#' )
#'
#'   # but you could also, of course, use group_dt
#'  mtcars %>%
#'    group_dt(by =.(vs,am),
#'      summarise_dt(avg = mean(mpg)))
#'
#'   # and list of variables could also be used
#'  mtcars %>%
#'    group_dt(by =list(vs,am),
#'             summarise_dt(avg = mean(mpg)))
#'
#' # examples for `rowwise_dt`
#' df <- data.table(x = 1:2, y = 3:4, z = 4:5)
#'
#' df %>% mutate_dt(m = mean(c(x, y, z)))
#'
#' df %>% rowwise_dt(
#'   mutate_dt(m = mean(c(x, y, z)))
#' )


#' @rdname group_dt
#' @export

group_dt = function(.data,by = NULL,...){
  #dt = as_dt(.data)
  dt = as.data.table(.data)

  by = substitute(by)
  deparse(by) -> by_deparse
  if(by_deparse == "NULL") stop("Please provide the group(s).")
  else if(!str_detect(by_deparse,"^\\.|^list\\("))
    by_deparse %>%
      str_c(".(",.,")") -> by_deparse

  substitute(list(...)) %>%
    deparse() %>%
    str_c(collapse = "") %>%
    str_squish() %>%
    str_extract("\\(.+\\)") %>%
    str_sub(2,-2) -> dot_string

  if(str_detect(dot_string,"\\.SD"))
    to_eval = "dt[,{dot_string},by = {by_deparse}]"
  else
    dot_string %>%
    strsplit("%>%") %>%
    unlist() %>%
    str_squish() %>%
    lapply(dot_convert) %>%
    str_c("[,",.,",","by = {by_deparse}]") %>%
    str_c(collapse = "") %>%
    str_c("dt",.) -> to_eval

  eval(parse(text = str_glue(to_eval)))
}


#' @rdname group_dt
#' @export
rowwise_dt = function(.data,...){
  dt = as_dt(.data)
  dt %>%
    group_dt(
      by = row.names(dt),
      ...
    ) %>%
    select_dt(-row.names)
}



dot_convert = function(string){
  if(str_detect(string,",\\s*\\.\\s*,"))
    str_replace(string,",\\s*\\.\\s*,",",.SD,") -> string
  else if(str_detect(string,",s*\\.s*\\)"))
    str_replace(string,",s*\\.s*\\)",",.SD\\)") -> string
  else str_replace(string,"\\(","\\(.SD,") -> string
  string
}
hope-data-science/tidyfst documentation built on Sept. 23, 2024, 8:05 p.m.