R/cross_fun.R

Defines functions combine_functions cro_spearman cro_pearson cro_median cro_sum cro_mean_sd_n cro_mean cro_fun_df make_dataframe_with_row_labels make_function_for_cro make_function_for_cro_df format_table elementary_cro_fun_df cro_fun cross_spearman cross_pearson cross_median cross_sum cross_mean_sd_n cross_mean cross_fun_df cross_fun

Documented in combine_functions cro_fun cro_fun_df cro_mean cro_mean_sd_n cro_median cro_pearson cro_spearman cross_fun cross_fun_df cross_mean cross_mean_sd_n cross_median cross_pearson cross_spearman cross_sum cro_sum

#' Cross-tabulation with custom summary function.
#' 
#' \itemize{
#' \item{\code{cross_mean}, \code{cross_sum}, \code{cross_median}}{ calculate 
#' mean/sum/median by groups. NA's are always omitted.}
#' \item{\code{cross_mean_sd_n}}{ calculates mean, standard deviation and N
#' simultaneously. Mainly intended for usage with \link{significance_means}.}
#' \item{\code{cross_pearson}, \code{cross_spearman}}{ calculate correlation of 
#' first variable in each data.frame in \code{cell_vars} with other variables. 
#' NA's are removed pairwise.}
#' \item{\code{cross_fun}, \code{cross_fun_df}}{ return table with custom summary 
#' statistics defined by \code{fun} argument. NA's treatment depends on your 
#' \code{fun} behavior. To use weight you should have formal \code{weight} 
#' argument in \code{fun} and some logic for its processing inside. Several 
#' functions with weight support are provided - see \link{w_mean}. 
#' \code{cross_fun} applies \code{fun} on each variable in \code{cell_vars} 
#' separately, \code{cross_fun_df} gives to \code{fun} each data.frame in 
#' \code{cell_vars} as a whole. So \code{cross_fun(iris[, -5], iris$Species, fun =
#' mean)} gives the same result as \code{cross_fun_df(iris[, -5], iris$Species, 
#' fun = colMeans)}. For \code{cross_fun_df} names of \code{cell_vars} will 
#' converted to labels if they are available before the \code{fun} will be applied. 
#' Generally it is recommended that \code{fun} will always return object of the 
#' same form. Row names/vector names of \code{fun} result will appear in the row
#' labels of the table and column names/names of list will appear in the column 
#' labels. If your \code{fun} returns data.frame/matrix/list with element named
#' 'row_labels' then this element will be used as row labels. And it will have
#' precedence over rownames.}
#' \item{\code{cross_*}}{ are evaluate their arguments
#' in the context of the first argument \code{data}.}
#' \item{\code{cro_*}}{ functions use standard evaluation, e. g 'cro(mtcars$am, mtcars$vs)'.}
#' \item{\code{combine_functions}}{ is auxiliary function for combining several 
#' functions into one function for usage with \code{cro_fun}/\code{cro_fun_df}.
#' Names of arguments will be used as statistic labels. By default, results of
#' each function are combined with \link{c}. But you can provide your own method
#' function with \code{method} argument. It will be applied as in the expression
#' \code{do.call(method, list_of_functions_results)}. Particular useful method
#' is \code{list}. When it used then statistic labels will appear in the column
#' labels. See examples. Also you may be interested in \code{data.frame}, 
#' \code{rbind}, \code{cbind} methods.}}
#' 
#' @param cell_vars vector/data.frame/list. Variables on which summary function
#'   will be computed. 
#' @param col_vars vector/data.frame/list. Variables which breaks table by
#'   columns. Use \link{mrset}/\link{mdset} for multiple-response variables.
#' @param row_vars vector/data.frame/list. Variables which breaks table by rows.
#'   Use \link{mrset}/\link{mdset} for multiple-response variables.
#' @param weight numeric vector. Optional cases weights. Cases with NA's,
#'   negative and zero weights are removed before calculations.
#' @param subgroup logical vector. You can specify subgroup on which table will be computed. 
#' @param fun custom summary function. Generally it is recommended that 
#'   \code{fun} will always return object of the same form. Rownames/vector 
#'   names of \code{fun} result will appear in the row labels of the table and 
#'   column names/names of list will appear in the column labels. To use weight 
#'   you should have formal \code{weight} argument in \code{fun} and some logic 
#'   for its processing inside. For \code{cro_fun_df} \code{fun} will receive 
#'   \link[data.table]{data.table} with all names converted to variable labels
#'   (if labels exists). So it is not recommended to rely on original variables
#'   names in your \code{fun}.
#' @param data data.frame in which context all other arguments will be evaluated
#'   (for \code{cross_*}).
#' @param ... further arguments for \code{fun}  in 
#'   \code{cross_fun}/\code{cross_fun_df} or functions for \code{combine_functions}.
#'   Ignored in \code{cross_fun}/\code{cross_fun_df} if \code{unsafe} is TRUE.
#' @param weighted_valid_n logical. Should we show weighted valid N in
#'   \code{cro_mean_sd_n}? By default it is FALSE.
#' @param labels character vector of length 3. Labels for mean, standard
#'   deviation and valid N in \code{cro_mean_sd_n}.
#' @param method function which will combine results of multiple functions in
#'   \code{combine_functions}. It will be applied as in the expression 
#'   \code{do.call(method, list_of_functions_results)}. By default it is
#'   \code{c}.
#' @param unsafe logical/character If not FALSE than \code{fun} will be 
#'   evaluated as is. It can lead to significant increase in the performance. 
#'   But there are some limitations. For \code{cross_fun} it means that your 
#'   function \code{fun} should return vector. If length of this vector is
#'   greater than one than you should provide with \code{unsafe} argument vector
#'   of unique labels for each element of this vector. There will be no attempts
#'   to automatically make labels for the results of \code{fun}. For 
#'   \code{cross_fun_df} your function should return vector or list/data.frame
#'   (optionally with 'row_labels' element - statistic labels). If \code{unsafe}
#'   is TRUE or not logical then further arguments (\code{...}) for \code{fun}
#'   will be ignored.
#'  
#'
#' @return object of class 'etable'. Basically it's a data.frame but class
#'   is needed for custom methods.
#' @seealso \link{tables}, \link{fre}, \link{cross_cases}.
#'
#' @examples
#' data(mtcars)
#' mtcars = apply_labels(mtcars,
#'                       mpg = "Miles/(US) gallon",
#'                       cyl = "Number of cylinders",
#'                       disp = "Displacement (cu.in.)",
#'                       hp = "Gross horsepower",
#'                       drat = "Rear axle ratio",
#'                       wt = "Weight (1000 lbs)",
#'                       qsec = "1/4 mile time",
#'                       vs = "Engine",
#'                       vs = c("V-engine" = 0,
#'                              "Straight engine" = 1),
#'                       am = "Transmission",
#'                       am = c("Automatic" = 0,
#'                              "Manual"=1),
#'                       gear = "Number of forward gears",
#'                       carb = "Number of carburetors"
#' )
#' 
#' 
#' # Simple example - there is special shortcut for it - 'cross_mean'
#' cross_fun(mtcars, 
#'           list(mpg, disp, hp, wt, qsec), 
#'           col_vars = list(total(), am), 
#'           row_vars = vs, 
#'           fun = mean)
#' 
#' 
#' 
#' # The same example with 'subgroup'
#' cross_fun(mtcars, 
#'        list(mpg, disp, hp, wt, qsec), 
#'        col_vars = list(total(), am), 
#'        row_vars = vs, 
#'        subgroup = vs == 0, 
#'        fun = mean)
#'                                 
#' # 'combine_functions' usage  
#' cross_fun(mtcars, 
#'           list(mpg, disp, hp, wt, qsec), 
#'           col_vars = list(total(), am), 
#'           row_vars = vs, 
#'           fun = combine_functions(Mean = mean, 
#'                                   'Std. dev.' = sd,
#'                                   'Valid N' = valid_n)
#' )
#' 
#' # 'combine_functions' usage - statistic labels in columns
#' cross_fun(mtcars, 
#'           list(mpg, disp, hp, wt, qsec), 
#'           col_vars = list(total(), am), 
#'           row_vars = vs, 
#'           fun = combine_functions(Mean = mean, 
#'                                   'Std. dev.' = sd,
#'                                   'Valid N' = valid_n,
#'                                   method = list
#'                                   )
#' )
#' 
#' # 'summary' function
#' cross_fun(mtcars, 
#'           list(mpg, disp, hp, wt, qsec), 
#'           col_vars = list(total(), am), 
#'           row_vars = list(total(), vs), 
#'           fun = summary
#' ) 
#'                           
#' # comparison 'cross_fun' and 'cross_fun_df'
#' cross_fun(mtcars,
#'           data.frame(mpg, disp, hp, wt, qsec), 
#'           col_vars = am,
#'           fun = mean
#' )
#' 
#' 
#' # same result
#' cross_fun_df(mtcars,
#'              data.frame(mpg, disp, hp, wt, qsec), 
#'              col_vars = am, 
#'              fun = colMeans
#'              )
#' 
#' # usage for 'cross_fun_df' which is not possible for 'cross_fun'
#' # linear regression by groups
#' cross_fun_df(mtcars,
#'              data.frame(mpg, disp, hp, wt, qsec), 
#'              col_vars = am,
#'              fun = function(x){
#'                  frm = reformulate(".", response = as.name(names(x)[1]))
#'                  model = lm(frm, data = x)
#'                  cbind('Coef.' = coef(model), 
#'                        confint(model)
#'                  )
#'              } 
#' )
#' @export
cross_fun = function(data,
                     cell_vars, 
                     col_vars = total(), 
                     row_vars = total(label = ""),
                     weight = NULL,
                     subgroup = NULL,
                     fun, 
                     ...,
                     unsafe = FALSE){
    expr = substitute(cro_fun(
        cell_vars = cell_vars, 
        col_vars = col_vars, 
        row_vars = row_vars,
        weight = weight,
        subgroup = subgroup,
        fun = fun, 
        ...,
        unsafe = unsafe)
    )
    calculate_internal(data, expr = expr, parent = parent.frame())
}

###########################

#' @export
#' @rdname cross_fun
cross_fun_df = function(data,
                        cell_vars, 
                        col_vars = total(), 
                        row_vars = total(label = ""),
                        weight = NULL,
                        subgroup = NULL,
                        fun, 
                        ...,
                        unsafe = FALSE){
    expr = substitute(cro_fun_df(
        cell_vars = cell_vars, 
        col_vars = col_vars, 
        row_vars = row_vars,
        weight = weight,
        subgroup = subgroup,
        fun = fun, 
        ...,
        unsafe = unsafe)
    )
    calculate_internal(data, expr = expr, parent = parent.frame())    
}

###########################

#' @export
#' @rdname cross_fun
cross_mean = function(data,
                      cell_vars, 
                      col_vars = total(), 
                      row_vars = total(label = ""),
                      weight = NULL,
                      subgroup = NULL
){
    expr = substitute(
        cro_mean(
            cell_vars = cell_vars, 
            col_vars = col_vars, 
            row_vars = row_vars,
            weight = weight,
            subgroup = subgroup)
    )
    calculate_internal(data, expr = expr, parent = parent.frame())      
}

#########################

#' @export
#' @rdname cross_fun
cross_mean_sd_n = function(data,
                           cell_vars, 
                           col_vars = total(), 
                           row_vars = total(label = ""),
                           weight = NULL,
                           subgroup = NULL,
                           weighted_valid_n = FALSE,
                           labels = NULL
                           
){
    expr = substitute(
        cro_mean_sd_n(
            cell_vars = cell_vars, 
            col_vars = col_vars, 
            row_vars = row_vars,
            weight = weight,
            subgroup = subgroup,
            weighted_valid_n =  weighted_valid_n,
            labels = labels)
    )
    calculate_internal(data, expr = expr, parent = parent.frame()) 
}

###########################

#' @export
#' @rdname cross_fun
cross_sum = function(data,
                     cell_vars, 
                     col_vars = total(), 
                     row_vars = total(label = ""),
                     weight = NULL,
                     subgroup = NULL
){
    expr = substitute(
        cro_sum(
            cell_vars = cell_vars, 
            col_vars = col_vars, 
            row_vars = row_vars,
            weight = weight,
            subgroup = subgroup)
    )
    calculate_internal(data, expr = expr, parent = parent.frame())      
}    

###########################

#' @export
#' @rdname cross_fun
cross_median = function(data,
                        cell_vars, 
                        col_vars = total(), 
                        row_vars = total(label = ""),
                        weight = NULL,
                        subgroup = NULL
){
    expr = substitute(
        cro_median(
            cell_vars = cell_vars, 
            col_vars = col_vars, 
            row_vars = row_vars,
            weight = weight,
            subgroup = subgroup)
    )
    calculate_internal(data, expr = expr, parent = parent.frame())      
}

############################

#' @export
#' @rdname cross_fun
cross_pearson = function(data,
                         cell_vars, 
                         col_vars = total(), 
                         row_vars = total(label = ""),
                         weight = NULL,
                         subgroup = NULL
){
    expr = substitute(
        cro_pearson(
            cell_vars = cell_vars, 
            col_vars = col_vars, 
            row_vars = row_vars,
            weight = weight,
            subgroup = subgroup)
    )
    calculate_internal(data, expr = expr, parent = parent.frame()) 
}

############################

#' @export
#' @rdname cross_fun
cross_spearman = function(data,
                          cell_vars, 
                          col_vars = total(), 
                          row_vars = total(label = ""),
                          weight = NULL,
                          subgroup = NULL
){
    expr = substitute(
        cro_spearman(
            cell_vars = cell_vars, 
            col_vars = col_vars, 
            row_vars = row_vars,
            weight = weight,
            subgroup = subgroup)
    )
    calculate_internal(data, expr = expr, parent = parent.frame()) 
}

############################
#' @export
#' @rdname cross_fun
cro_fun = function(cell_vars, 
                   col_vars = total(), 
                   row_vars = total(label = ""),
                   weight = NULL,
                   subgroup = NULL,
                   fun, 
                   ...,
                   unsafe = FALSE){
    str_cell_vars = expr_to_character(substitute(cell_vars))
    str_row_vars = expr_to_character(substitute(row_vars))
    str_col_vars = expr_to_character(substitute(col_vars))
    
    cell_vars = test_for_null_and_make_dataframe(cell_vars, str_cell_vars)
    row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
    col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
    
    fun = match.fun(fun)
    if(!is.null(weight)){
        stopif(!("weight" %in% names(formals(fun))),
               "`weight` is provided but `fun` doesn't have formal `weight` argument.")
    }
    
    
    row_vars = flat_list(multiples_to_single_columns_with_dummy_encoding(row_vars), flat_df = TRUE)
    col_vars = flat_list(multiples_to_single_columns_with_dummy_encoding(col_vars), flat_df = TRUE)
    stopif(!is.null(subgroup) && !is.logical(subgroup), "'subgroup' should be logical.")
    
    check_sizes("'cro_fun'", cell_vars, row_vars, col_vars, weight, subgroup)
    
    if(is.logical(unsafe) && !unsafe){
        cell_vars = make_labels_from_names(cell_vars)
        fun = make_function_for_cro(fun, ..., need_weight = !is.null(weight))
    } else {
        cell_vars = unlab(names2labels(cell_vars))
    }
    
    res = lapply(row_vars, function(each_row_var){
        all_col_vars = lapply(col_vars, function(each_col_var){
            dtable = elementary_cro_fun_df(cell_var = cell_vars,
                                           row_var = each_row_var, 
                                           col_var = each_col_var, 
                                           weight = weight,
                                           subgroup = subgroup,
                                           fun = fun,
                                           use_lapply = unsafe
            )    
            row_var_lab = var_lab(dtable[["..row_var__"]])
            col_var_lab = var_lab(dtable[["..col_var__"]])
            ### make rectangular table  
            res = long_datatable_to_table(dtable, rows = c("..row_var__", "row_labels"), 
                                          columns = "..col_var__", 
                                          values = colnames(dtable) %d% c("..row_var__", "row_labels", "..col_var__")
            )
            format_table(res, 
                         row_var_lab = row_var_lab, 
                         col_var_lab = col_var_lab)  
        })
        Reduce(merge, all_col_vars)
    })
    
    res = do.call(add_rows, res)
    rownames(res) = NULL
    remove_unnecessary_splitters_from_table(res)
}



### compute statistics for single row_var and single col_var
elementary_cro_fun_df = function(cell_var, 
                                 col_var, 
                                 weight,
                                 fun,
                                 row_var,
                          subgroup, 
                          use_lapply = FALSE
                          ){
    # to pass CRAN check
    ..weight__ = NULL
    row_labels = NULL
    ### calculate vector of valid cases
    if(is.null(subgroup)){
      valid = valid(col_var) & valid(row_var)
    } else {
      valid = valid(col_var) & valid(row_var) & subgroup & !is.na(subgroup)
    }
    max_nrow = max(NROW(cell_var), NROW(col_var), NROW(row_var), NROW(weight))
    
    ## if any of vars is zero-length then we made all vars zero-length
    min_nrow = min(NROW(cell_var), NROW(col_var), NROW(row_var))
    if(any(min_nrow==0)) max_nrow = 0
    
    ##### prepare weight #####
    if(!is.null(weight)){
        weight = set_negative_and_na_to_zero(weight)
        weight = recycle_if_single_row(weight, max_nrow)
        valid = valid & (weight>0)
        weight = weight[valid]
    }
    

    ### recycle variables of length 1

    cell_var = recycle_if_single_row(cell_var, max_nrow)
    col_var = recycle_if_single_row(col_var, max_nrow)
    row_var = recycle_if_single_row(row_var, max_nrow)
    
    ### drop non-valid cases 
    if(!all(valid, na.rm = TRUE) || length(valid)==0){
        cell_var = universal_subset(cell_var, valid, drop = FALSE)
        col_var = universal_subset(col_var, valid)
        row_var = universal_subset(row_var, valid, drop = FALSE)
    } else {
        col_var = drop(col_var)
    }
    ###################

    ### pack data.table #####
    if(NCOL(cell_var)>1){
        colnames(cell_var) =  make_items_unique(colnames(cell_var))
    } 
    
    if(is.null(weight)){
        raw_data = data.table(..row_var__ = row_var, ..col_var__ = col_var, cell_var)
    } else {
        raw_data = data.table(..row_var__ = row_var, ..col_var__ = col_var, ..weight__ = weight, cell_var)
    }

    
    # statistics
    by_string = c("..row_var__", "..col_var__")
    if(is.logical(use_lapply) && !use_lapply){
        if(is.null(weight)){
            dtable = raw_data[ , fun(.SD), by = by_string]
        } else {
            dtable = raw_data[ , fun(.SD, weight = ..weight__), by = by_string, .SDcols = -"..weight__"]
        }
    } else {
        if(is.null(weight)){
            dtable = raw_data[ , lapply(.SD, fun), by = by_string]
        } else {
            dtable = raw_data[ , lapply(.SD[, -"..weight__"], fun, weight = ..weight__), by = by_string]
        }
        dtable = melt(dtable, id.vars = c("..row_var__", "..col_var__"), 
                      variable.name = "row_labels",
                      na.rm = FALSE, variable.factor = TRUE, 
                      value.factor = FALSE, 
                      verbose = FALSE)
        if(!isTRUE(use_lapply)){
            labels_length = length(use_lapply)
            stopif(labels_length==0 || (NROW(dtable) %% labels_length)!=0, 
                   "'cro_fun'/'cro_fun_df' - length of labels provided with 'unsafe' is incorrect: ", 
                   labels_length)
            stopif(anyDuplicated(use_lapply),  "'cro_fun'/'cro_fun_df' - duplicated labels provided with 'unsafe': ", 
                   paste(unique(use_lapply[duplicated(use_lapply)])), collapse = ", ")
            if(NROW(dtable)==0 && length(levels(dtable[["row_labels"]]))>0) {
              # fix bug with unneseccary line: 
              #            cro_mean_sd_n(1:4, total(), row_vars = set_val_lab(rep(NA, 4), c(a=1, b =2, c= 3, d= 4)))
              new_levels = expand.grid(use_lapply, levels(dtable[["row_labels"]]))
              levels(dtable[["row_labels"]])  = paste(new_levels[[2]], new_levels[[1]], sep = "|")
            } else {
              dtable[ , row_labels := paste(row_labels, use_lapply, sep = "|")]
            }
        }
        
    }    
    if(("row_labels" %in% colnames(dtable)) && !is.factor(dtable[["row_labels"]])){ 
        dtable[ , row_labels := fctr(row_labels, levels = unique(row_labels),
                          prepend_var_lab = FALSE)]
    }
    dtable
}    

    

    
########
  
format_table = function(wide_datable, row_var_lab, col_var_lab){  
    # to pass CRAN check
    row_labels = NULL
    ..row_var__ = NULL

    wide_datable[ , row_labels  := as.character(row_labels)] 
    wide_datable[ , row_labels  := paste0(..row_var__, "|", row_labels)]  
    wide_datable[["..row_var__"]] = NULL
    
    wide_datable[, row_labels := paste0(row_var_lab, "|", row_labels)]
    colnames(wide_datable)[-1] = paste0(col_var_lab, "|", colnames(wide_datable)[-1]) 

    wide_datable[ , row_labels := remove_unnecessary_splitters(row_labels)] 
    wide_datable[ , row_labels := make_items_unique(row_labels)] 
    colnames(wide_datable) = remove_unnecessary_splitters(colnames(wide_datable)) 
    wide_datable = as.sheet(wide_datable)
    class(wide_datable) = union("etable", class(wide_datable))
    wide_datable
}

#######
    
make_function_for_cro_df = function(fun, ..., need_weight = TRUE){
    # to pass CRAN check
    row_labels = NULL
    
    force(fun)
    force(need_weight)
    if(need_weight){
        function(x, weight = weight){
            res = fun(x, ..., weight = weight)
            res = make_dataframe_with_row_labels(res)
            # we need convert to factor to keep all row_labels as levels in case of aggregation 
            # of data with zero rows
            if(nrow(x)==0){
                res$row_labels = fctr(res[["row_labels"]], levels = unique(res[["row_labels"]]),
                                                 prepend_var_lab = FALSE)
            }
            res
        }
    } else {
        function(x){
            res = fun(x, ...)
            res = make_dataframe_with_row_labels(res)
            # we need convert to factor to keep all row_labels as levels in case of aggregation 
            # of data with zero rows
            if(nrow(x)==0){
                res$row_labels = fctr(res[["row_labels"]], levels = unique(res[["row_labels"]]),
                                      prepend_var_lab = FALSE)
            }
            res
        }        
    }
}

###############
make_function_for_cro = function(fun, ..., need_weight = TRUE){
    # to pass CRAN check
    row_labels = NULL
    force(fun)
    force(need_weight)
    if(need_weight){
        function(x, weight = weight){
            res = lapply(x, function(column) {
                varlab = var_lab(column)
                each_res = fun(unvr(column), ..., weight = weight)
                each_res = make_dataframe_with_row_labels(each_res)
                if(!is.null(varlab)){
                    each_res[["row_labels"]] = paste0(varlab, "|", each_res[["row_labels"]]) 
                }
                each_res
            })
            res = as.sheet(rbindlist(res, use.names = TRUE, fill = TRUE))
            # we need convert to factor to keep all row_labels as levels in case of aggregation 
            # of data with zero rows
            res$row_labels = make_items_unique(res[["row_labels"]])
            if(nrow(x)==0){
                res$row_labels = fctr(res[["row_labels"]], levels = unique(res[["row_labels"]]),
                                      prepend_var_lab = FALSE)
            }
            res
        }
    } else {
        function(x){
            res = lapply(x, function(column) {
                varlab = var_lab(column)
                each_res = fun(unvr(column), ...)
                each_res = make_dataframe_with_row_labels(each_res)
                if(!is.null(varlab)){
                    each_res[["row_labels"]] = paste0(varlab, "|", each_res[["row_labels"]])
                }
                each_res
            })
            res = as.sheet(rbindlist(res, use.names = TRUE, fill = TRUE))
            # we need convert to factor to keep all row_labels as levels in case of aggregation 
            # of data with zero rows
            res$row_labels = make_items_unique(res[["row_labels"]])
            if(nrow(x)==0){
                res$row_labels = fctr(res[["row_labels"]], levels = unique(res[["row_labels"]]),
                                      prepend_var_lab = FALSE)
            }
            res
        }        
    }
}

#############
make_dataframe_with_row_labels = function(res){
    if(is.table(res)){
        ### block for 'summary' and 'table'
        dm_names = dimnames(res)
        if(is.null(dm_names)){
            dm_names[[1]] = names(res)
        }
        new_df = matrix(NA, nrow= NROW(res), ncol = NCOL(res))
        new_df[] = res
        new_df = as.sheet(new_df)
        rownames(new_df) = NULL
        
        row_labels = dm_names[[1]]
        if(length(dm_names)>1){
            new_df = setNames(new_df, dm_names[[2]])
        } else {
            new_df = setNames(new_df, rep("|", NCOL(new_df)))
        }
        res = new_df
    } else {
        ####
        if(is.matrix(res) || is_list(res)) res = as.sheet(res)
        ####
        if(is.data.frame(res)) {
            if("row_labels" %in% names(res)){
                res[["row_labels"]] = make_items_unique(res[["row_labels"]])
            } else {
                row_labels = rownames(res)
                if(!is.null(row_labels) && length(row_labels)==1 && row_labels[1]==1){
                    row_labels = ""
                }
                res[["row_labels"]] = make_items_unique(row_labels)
            }
            return(res)
        } else {
        #### usual vectors and unknowns    
            row_labels = names(res)
            res = setNames(sheet(res), rep("|", NCOL(res)))
        } 
    }
    
    #######
    if(is.null(row_labels)){
        if(nrow(res)>1){
            row_labels = seq_len(nrow(res)) 
        } else {
            row_labels = rep("", nrow(res)) # for empty results   
        }
    } 
    row_labels = make_items_unique(as.character(row_labels))
    res[["row_labels"]] = row_labels
    res
}




#######

#' @export
#' @rdname cross_fun
cro_fun_df = function(cell_vars, 
                      col_vars = total(), 
                      row_vars = total(label = ""),
                      weight = NULL,
                      subgroup = NULL,
                      fun, 
                      ...,
                      unsafe = FALSE){
    str_cell_vars = expr_to_character(substitute(cell_vars))
    str_row_vars = expr_to_character(substitute(row_vars))
    str_col_vars = expr_to_character(substitute(col_vars))

    cell_vars = test_for_null_and_make_list(cell_vars, str_cell_vars)
    row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
    col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
    
    fun = match.fun(fun)
    if(!is.null(weight)){
        stopif(!("weight" %in% names(formals(fun))),
               "`weight` is provided but `fun` doesn't have formal `weight` argument.")
    }
    if(is.logical(unsafe) && !unsafe) {
        fun = make_function_for_cro_df(fun, ..., need_weight = !is.null(weight))
    }
    cell_vars = make_labels_from_names(cell_vars)
    cell_vars = flat_list(cell_vars, flat_df = FALSE)
    row_vars = flat_list(multiples_to_single_columns_with_dummy_encoding(row_vars), flat_df = TRUE)
    col_vars = flat_list(multiples_to_single_columns_with_dummy_encoding(col_vars), flat_df = TRUE)
    
    stopif(!is.null(subgroup) && !is.logical(subgroup), "'subgroup' should be logical.")
    check_sizes("'cro_fun_df'", cell_vars, row_vars, col_vars, weight, subgroup)
    
    res = lapply(row_vars, function(each_row_var){
        all_cell_vars = lapply(cell_vars, function(each_cell_var){
            all_col_vars = lapply(col_vars, function(each_col_var){
                dtable = elementary_cro_fun_df(cell_var = names2labels(each_cell_var),
                                      row_var = each_row_var, 
                                      col_var = each_col_var, 
                                      weight = weight,
                                      subgroup = subgroup,
                                      fun = fun
                )    
                row_var_lab = var_lab(dtable[["..row_var__"]])
                col_var_lab = var_lab(dtable[["..col_var__"]])
                ### make rectangular table  
                res = long_datatable_to_table(dtable, rows = c("..row_var__", "row_labels"), 
                                              columns = "..col_var__", 
                                              values = colnames(dtable) %d% c("..row_var__", "row_labels", "..col_var__")
                )
                format_table(res, 
                             row_var_lab = row_var_lab, 
                             col_var_lab = col_var_lab)
            })
            Reduce(merge, all_col_vars)
        })
        do.call(add_rows, all_cell_vars) 
        
    })
    res = do.call(add_rows, res)
    rownames(res) = NULL
    remove_unnecessary_splitters_from_table(res)
}




######################################################


#' @export
#' @rdname cross_fun
cro_mean = function(cell_vars, 
                    col_vars = total(), 
                    row_vars = total(label = ""),
                    weight = NULL,
                    subgroup = NULL
){
    
    str_cell_vars = expr_to_character(substitute(cell_vars))
    str_row_vars = expr_to_character(substitute(row_vars))
    str_col_vars = expr_to_character(substitute(col_vars))
    
    cell_vars = test_for_null_and_make_dataframe(cell_vars, str_cell_vars)
    row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
    col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
    
    cro_fun(cell_vars = cell_vars, 
            col_vars = col_vars, 
            row_vars = row_vars, 
            weight = weight,
            subgroup = subgroup,
            # here we heavy rely on the fact that weight already checked on NA and negative values
            fun = function(x, weight = NULL) matrixStats::weightedMean(x, w = weight, na.rm = TRUE),
            unsafe = TRUE
    )
}



#' @export
#' @rdname cross_fun
cro_mean_sd_n = function(cell_vars, 
                         col_vars = total(), 
                         row_vars = total(label = ""),
                         weight = NULL,
                         subgroup = NULL,
                         weighted_valid_n = FALSE,
                         labels = NULL
                         
){
  
  str_cell_vars = expr_to_character(substitute(cell_vars))
  str_row_vars = expr_to_character(substitute(row_vars))
  str_col_vars = expr_to_character(substitute(col_vars))
  
  cell_vars = test_for_null_and_make_dataframe(cell_vars, str_cell_vars)
  row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
  col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
  if(is.null(labels)) {
    labels = c("Mean", "Std. dev.", 
               ifelse(weighted_valid_n, "Valid N", "Unw. valid N"))    
  } else {
    stopif(anyDuplicated(labels), "'cro_mean_sd_n' - all 'labels' should be unduplicated.")
    stopif(length(labels)!=3, "'cro_mean_sd_n' - 'labels' should be vector of three elements.")
  }
  if(weighted_valid_n){
    # here we heavy rely on the fact that weight already checked on NA and negative values
    fun = function(x, weight = NULL) {
      if(is.logical(x)){
        x = as.numeric(x)
      }
      c(
        {
          res = matrixStats::weightedMean(x, w = weight, na.rm = TRUE)
          if(is.nan(res)) res = NA
          res
        },
        weight_helper(matrixStats::weightedSd)(x, weight, na.rm = TRUE), 
        {
          validn = if(is.null(weight)) sum(!is.na(x)) else sum(weight[!is.na(x)], na.rm = TRUE)
          if(!is.na(validn) && validn==0) validn = NA
          validn
        }
      )
    }
  } else {
    fun = function(x, weight = NULL) {
      if(is.logical(x)){
        x = as.numeric(x)
      }
      c(
        {
          res = matrixStats::weightedMean(x, w = weight, na.rm = TRUE)
          if(is.nan(res)) res = NA
          res
        },
        weight_helper(matrixStats::weightedSd)(x, weight, na.rm = TRUE),
        {
          validn = sum(!is.na(x))
          if(!is.na(validn) && validn==0) validn = NA
          validn
        }
      )
    }
  }
  cro_fun(cell_vars = cell_vars, 
          col_vars = col_vars, 
          row_vars = row_vars, 
          weight = weight,
          subgroup = subgroup,
          fun = fun,
          unsafe = labels
  )
}


#' @export
#' @rdname cross_fun
cro_sum = function(cell_vars, 
                   col_vars = total(), 
                   row_vars = total(label = ""),
                   weight = NULL,
                   subgroup = NULL
){

    str_cell_vars = expr_to_character(substitute(cell_vars))
    str_row_vars = expr_to_character(substitute(row_vars))
    str_col_vars = expr_to_character(substitute(col_vars))
    
    cell_vars = test_for_null_and_make_dataframe(cell_vars, str_cell_vars)
    row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
    col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
    
    cro_fun(cell_vars = cell_vars, 
                         col_vars = col_vars, 
                         row_vars = row_vars, 
                         weight = weight,
                         subgroup = subgroup,
                         fun = if(is.null(weight)){
                             function(x) sum(x, na.rm = TRUE)
                         } else {
                             function(x, weight = NULL) sum(x*weight, na.rm = TRUE)
                         },
                         unsafe = TRUE
    )
}

#' @export
#' @rdname cross_fun
cro_median = function(cell_vars, 
                      col_vars = total(), 
                      row_vars = total(label = ""),
                      weight = NULL,
                      subgroup = NULL
){

    str_cell_vars = expr_to_character(substitute(cell_vars))
    str_row_vars = expr_to_character(substitute(row_vars))
    str_col_vars = expr_to_character(substitute(col_vars))
    
    cell_vars = test_for_null_and_make_dataframe(cell_vars, str_cell_vars)
    row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
    col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
    
    cro_fun(cell_vars = cell_vars, 
                         col_vars = col_vars, 
                         row_vars = row_vars, 
                         weight = weight,
                         subgroup = subgroup,
                         fun = function(x, weight = NULL){
                             matrixStats::weightedMedian(x = x, 
                                                         w = weight, 
                                                         na.rm = TRUE, 
                                                         interpolate = TRUE, 
                                                         ties = "weighted")
                         },
                         unsafe = TRUE
    )
}


###############################################
###############################################
###############################################

#' @export
#' @rdname cross_fun
cro_pearson = function(cell_vars, 
                         col_vars = total(), 
                         row_vars = total(label = ""),
                         weight = NULL,
                         subgroup = NULL
){
    fun = function(x, weight = NULL){
        w_pearson(x, weight = weight)[ , 1]
    }    
    
    str_cell_vars = expr_to_character(substitute(cell_vars))
    str_row_vars = expr_to_character(substitute(row_vars))
    str_col_vars = expr_to_character(substitute(col_vars))
    
    cell_vars = test_for_null_and_make_list(cell_vars, str_cell_vars)
    row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
    col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
    
    cro_fun_df(cell_vars = cell_vars, 
               col_vars = col_vars, 
               row_vars = row_vars, 
               weight = weight,
               subgroup = subgroup,
               fun = fun
    )
}


#' @export
#' @rdname cross_fun
cro_spearman = function(cell_vars, 
                           col_vars = total(), 
                           row_vars = total(label = ""),
                           weight = NULL,
                           subgroup = NULL
){
    fun = function(x, weight = NULL){
        w_spearman(x, weight = weight)[ , 1]
    }    
    
    str_cell_vars = expr_to_character(substitute(cell_vars))
    str_row_vars = expr_to_character(substitute(row_vars))
    str_col_vars = expr_to_character(substitute(col_vars))
    
    cell_vars = test_for_null_and_make_list(cell_vars, str_cell_vars)
    row_vars = test_for_null_and_make_list(row_vars, str_row_vars)
    col_vars = test_for_null_and_make_list(col_vars, str_col_vars)
    
    cro_fun_df(cell_vars = cell_vars, 
               col_vars = col_vars, 
               row_vars = row_vars, 
               weight = weight,
               subgroup = subgroup,
               fun = fun
    )
}

##########################



#' @export
#' @rdname cross_fun
combine_functions = function(..., method = c){
    method = match.fun(method)
    possible_names = unlist(lapply(as.list(substitute(list(...)))[-1], expr_to_character))
    args = list(...)
    arg_names =names(args)
    if(is.null(arg_names)) {
        names(args) = possible_names
    } else {
        names(args)[arg_names==""] = possible_names[arg_names==""]
    }  
    function(x, weight = NULL){
        if(!is.null(weight)){
            for(each in seq_along(args)){
                stopif(!("weight" %in% names(formals(args[[each]]))),
                       paste0("`weight` is provided but function`", names(args)[each],
                              "` doesn't have formal `weight` argument."))
            }    
            res = lapply(args, function(f) f(x, weight = weight))
        } else {
            res = lapply(args, function(f) f(x))    
        }
        
        do.call(method, res)
    }
}
gdemin/expss documentation built on April 13, 2024, 2:32 p.m.