R/visualization.R

Defines functions plot_box plot_lines plot_distr getFplot_distr setFplot_distr

Documented in getFplot_distr plot_box plot_distr plot_lines setFplot_distr

#----------------------------------------------#
# Author: Laurent Berge
# Date creation: Mon Sep 30 10:09:06 2019
# ~: user-visible visualization functions
#----------------------------------------------#

# load("data/us_pub_econ.RData")
# roxygen2::roxygenise(roclets = "rd")



#' Sets the defaults of plot_distr
#'
#' The default values of most arguments of \code{\link[fplot]{plot_distr}} can be set with \code{setFplot_distr}.
#'
#' @inheritParams plot_distr
#'
#' @param reset Logical scalar, default is \code{FALSE}. Whether the defaults should be reset.
#'
#' @seealso
#' \code{\link[fplot]{plot_distr}}, \code{\link[fplot]{pdf_fit}}, \code{\link[fplot]{fit.off}}.
#'
#' @examples
#'
#' # Changing the default color set for plot_distr only
#' my_col = c("#36688D", "#F3CD05", "#F49F05", "#F18904", "#BDA589")
#'
#' setFplot_distr(col = my_col, mod.method = "split", border = NA)
#'
#' plot_distr(~ Petal.Length | Species, iris)
#'
#' # Back to normal
#' setFplot_distr(reset = TRUE)
#'
#' plot_distr(~ Petal.Length | Species, iris)
#'
#'
#'
setFplot_distr = function(sorted, log, top, yaxis.num, col, border = "black", mod.method, within, total, at_5, labels.tilted, other, cumul = FALSE, centered = TRUE, weight.fun, int.categorical, dict = NULL, mod.title = TRUE, labels.angle, cex.axis, trunc = 20, trunc.method = "auto", reset = FALSE){
    # Function to set the defaults of plot_distr
    fm_distr = formals(plot_distr)
    arg_list = names(fm_distr)
    # arg_no_default = c("fml", "data", "moderator", "weight", "nbins", "bin.size", "legend_options", "yaxis.show", "mod.select", "plot", "sep", "...")
    # m = fm_distr[!names(fm_distr) %in% arg_no_default]
    # cat(gsub(" = ,", ",", paste0(names(m), " = ", sapply(m, deparse), collapse = ", ")))


    check_arg("logical scalar", sorted, log, yaxis.num, within, total, labels.tilted, other, cumul)
    check_arg("logical scalar", centered, int.categorical, reset)

    check_arg_plus(top, "match(frac, nb, none, FALSE)")
    if(!missing(top) && top == "FALSE") top = "none"

    check_arg(col, "vector")
    check_arg(border, "NULL NA | vector(character, integer) NA OK")

    check_arg_plus(mod.method, "match(side, split, stack)")
    check_arg_plus(at_5, "match(roman, line, FALSE)")

    check_arg(weight.fun, "function arg(1,)")

    check_arg(dict, "null named character vector | logical scalar")
    check_arg(mod.title, "scalar(character, logical)")

    check_arg(labels.angle, "numeric scalar GE{10} LE{90}")
    check_arg(cex.axis, "numeric scalar GT{0}")
    check_arg(trunc, "integer scalar GT{5}")
    check_arg_plus(trunc.method, "match(auto, mid, right)")

    # Getting the existing defaults
    opts = getOption("fplot_distr")

    if(is.null(opts)){
        opts = list()
    } else if(!is.list(opts)){
        warning("Wrong formatting of option 'fplot_distr', all options are reset.")
        opts = list()
    } else if(reset){
        opts = list()
    }

    # Saving the default values
    mc = match.call()
    args_default = intersect(names(mc), arg_list)

    # NOTA: we don't allow delayed evaluation => all arguments must have hard values
    for(v in args_default){
        opts[[v]] = eval(as.name(v))
    }

    options(fplot_distr = opts)

}

#' @rdname setFplot_distr
getFplot_distr = function(){
    opts = getOption("fplot_distr")
    if(!is.list(opts)){
        warning("Wrong formatting of option 'fplot_distr', all options are reset.")
        opts = list()
        options(fplot_distr = opts)
    }
    opts
}




#' Plot distributions, possibly conditional
#'
#' This function plots distributions of items (a bit like an histogram) which can be easily conditioned over.
#'
#' @param fml A formula or a vector. If a formula, it must be of the type: \code{weights ~ var | moderator}. If there are no moderator nor weights, you can use directly a vector, or use a one-sided formula \code{fml = ~var}. You can use multiple variables as weights, if so, you cannot use moderators at the same time. See examples.
#' @param data A data.frame: data set containing the variables in the formula.
#' @param moderator Optional, only if argument \code{fml} is a vector. A vector of moderators.
#' @param weight Optional, only if argument \code{fml} is a vector. A vector of (positive) weights.
#' @param sorted Logical: should the first elements displayed be the most frequent? By default this is the case except for numeric values put to log or to integers.
#' @param log Logical, only used when the data is numeric. If \code{TRUE}, then the data is put to logarithm beforehand. By default numeric values are put to log if the log variation exceeds 3.
#' @param col A vector of colors, default is close to paired. You can also use \dQuote{set1} or \dQuote{paired}.
#' @param border Outer color of the bars. Defaults is \code{"black"}. Use \code{NA} to remove the borders.
#' @param nbins Maximum number of items displayed. The default depends on the number of moderator cases. When there is no moderator, the default is 15, augmented to 20 if there are less than 20 cases.
#' @param bin.size Only used for numeric values. If provided, it creates bins of observations of size \code{bin.size}. It creates bins by default for numeric non-integer data.
#' @param legend_options A list. Other options to be passed to \code{legend} which concerns the legend for the moderator.
#' @param yaxis.show Whether the y-axis should be displayed, default is \code{TRUE}.
#' @param yaxis.num Whether the y-axis should display regular numbers instead of frequencies in percentage points. By default it shows numbers only when the data is weighted with a different function than the sum. For conditionnal distributions, a numeric y-axis can be displayed only when \code{mod.method = "sideTotal"}, \code{mod.method = "splitTotal"} or \code{mod.method = "stack"}, since for the within distributions it does not make sense (because the data is rescaled for each moderator).
#' @param mod.select Which moderators to select. By default the top 3 moderators in terms of frequency (or in terms of weight value if there's a weight) are displayed. If provided, it must be a vector of moderator values whose length cannot be greater than 5. Alternatively, you can put an integer between 1 and 5. This argument also accepts regular expressions.
#' @param mod.NA Logical, default is \code{FALSE}. If \code{TRUE}, and if the moderator contains \code{NA} values, all \code{NA} values from the moderator will be treated as a regular case: allows to display the distribution for missing values.
#' @param mod.method A character scalar: either i) \dQuote{split}, the default for categorical data, ii) \dQuote{side}, the default for data in logarithmic form or numeric data, or iii) \dQuote{stack}. This is only used when there is more than one moderator. If \code{"split"}: there is one separate histogram for each moderator case. If \code{"side"}: moderators are represented side by side for each value of the variable. If \code{"stack"}: the bars of the moderators are stacked onto each other, the bar heights representing the distribution in the total population. You can use the other arguments \code{within} and \code{total} to say whether the distributions should be within each moderator or over the total distribution.
#' @param within Logical, default is missing. Whether the distributions should be scaled to reflect the distribution within each moderator value. By default it is \code{TRUE} if \code{mod.method} is different from \code{"stack"}.
#' @param total Logical, default is missing. Whether the distributions should be scaled to reflect the total distribution (and not the distribution within each moderator value). By default it is \code{TRUE} only if \code{mod.method="stack"}.
#' @param labels.tilted Whether there should be tilted labels. Default is \code{FALSE} except when the data is split by moderators (see \code{mod.method}).
#' @param labels.angle Only if the labels of the x-axis are tilted. The angle of the tilt.
#' @param other Logical. Should there be a last column counting for the observations not displayed? Default is \code{TRUE} except when the data is split.
#' @param plot Logical, default is \code{TRUE}. If \code{FALSE} nothing is plotted, only the data is returned.
#' @param sep Positive number. The separation space between the bars. The scale depends on the type of graph.
#' @param centered Logical, default is \code{TRUE}. For numeric data only and when \code{sorted=FALSE}, whether the histogram should be centered on the mode.
#' @param weight.fun A function, by default it is \code{sum}. Aggregate function to be applied to the weight with respect to variable and the moderator. See examples.
#' @param at_5 Equal to \code{FALSE}, \code{"roman"} or \code{"line"}. When plotting categorical variables, adds a small Roman number under every 5 bars (\code{at_5 = "roman"}), or draws a thick axis line every 5 bars (\code{at_5 = "line"}). Helps to get the rank of the bars. The default depends on the type of data -- Not implemented when there is a moderator.
#' @param dict A dictionnary to rename the variables names in the axes and legend. Should be a named vector. By default it s the value of \code{getFplot_dict()}, which you can set with the function \code{\link[fplot]{setFplot_dict}}.
#' @param mod.title Character scalar. The title of the legend in case there is a moderator. You can set it to \code{TRUE} (the default) to display the moderator name. To display no title, set it to \code{NULL} or \code{FALSE}.
#' @param cex.axis Cex value to be passed to biased labels. By defaults, it finds automatically the right value.
#' @param int.categorical Logical. Whether integers should be treated as categorical variables. By default they are treated as categorical only when their range is small (i.e. smaller than 1000).
#' @param trunc If the main variable is a character, its values are truncaded to \code{trunc} characters. Default is 20. You can set the truncation method with the argument \code{trunc.method}.
#' @param trunc.method If the elements of the x-axis need to be truncated, this is the truncation method. It can be "auto", "right" or "mid".
#' @param cumul Logical, default is \code{FALSE}. If \code{TRUE}, then the cumulative distribution is plotted.
#' @param top What to display on the top of the bars. Can be equal to "frac" (for shares), "nb" or "none". The default depends on the type of the plot. To disable it you can also set it to \code{FALSE} or the empty string.
#' @param ... Other elements to be passed to plot.
#'
#' @details
#' Most default values can be modified with the function \code{\link[fplot]{setFplot_distr}}.
#'
#' @author Laurent Berge
#'
#' @seealso
#' To plot temporal evolutions: \code{\link[fplot]{plot_lines}}. For boxplot: \code{\link[fplot]{plot_box}}. To export graphs: \code{\link[fplot]{pdf_fit}}, \code{\link[fplot]{png_fit}}, \code{\link[fplot]{fit.off}}.
#'
#'
#' @examples
#'
#' # Data on publications from U.S. institutions
#' data(us_pub_econ)
#'
#' # 0) Let's set a dictionary for a better display of variables
#' setFplot_dict(c(institution = "U.S. Institution", jnl_top_25p = "Top 25% Pub.",
#'                 jnl_top_5p = "Top 5% Pub.", Frequency = "Publications"))
#'
#' # 1) Let's plot the distribution of publications by institutions:
#' plot_distr(~institution, us_pub_econ)
#'
#' # When there is only the variable, you can use a vector instead:
#' plot_distr(us_pub_econ$institution)
#'
#' # 2) Now the production of institution weighted by journal quality
#' plot_distr(jnl_top_5p ~ institution, us_pub_econ)
#'
#' # You can plot several variables:
#' plot_distr(1 + jnl_top_25p + jnl_top_5p ~ institution, us_pub_econ)
#'
#' # 3) Let's plot the journal distribution for the top 3 institutions
#'
#' # We can get the data from the previous graph
#' graph_data = plot_distr(jnl_top_5p ~ institution, us_pub_econ, plot = FALSE)
#' # And then select the top universities
#' top3_instit = graph_data$x[1:3]
#' top5_instit = graph_data$x[1:5] # we'll use it later
#'
#' # Now the distribution of journals
#' plot_distr(~ journal | institution, us_pub_econ[institution %in% top3_instit])
#' # Alternatively, you can use the argument mod.select:
#' plot_distr(~ journal | institution, us_pub_econ, mod.select = top3_instit)
#'
#' # 3') Same graph as before with "other" column, 5 institutions
#' plot_distr(~ journal | institution, us_pub_econ,
#'            mod.select = top5_instit, other = TRUE)
#'
#' #
#' # Example with continuous data
#' #
#'
#' # regular histogram
#' plot_distr(iris$Sepal.Length)
#'
#' # now splitting by species:
#' plot_distr(~ Sepal.Length | Species, iris)
#'
#' # idem but the three distr. are separated:
#' plot_distr(~ Sepal.Length | Species, iris, mod.method = "split")
#'
#' # Now the three are stacked
#' plot_distr(~ Sepal.Length | Species, iris, mod.method = "stack")
#'
#'
#'
plot_distr = function(fml, data, moderator, weight, sorted, log, nbins, bin.size, legend_options=list(), top, yaxis.show=TRUE, yaxis.num, col, border = "black", mod.method, within, total, mod.select, mod.NA = FALSE, at_5, labels.tilted, other, cumul = FALSE, plot = TRUE, sep, centered = TRUE, weight.fun, int.categorical, dict = NULL, mod.title = TRUE, labels.angle, cex.axis, trunc = 20, trunc.method = "auto", ...){
    # This function plots frequencies

    # DT VARS
    total_moderator = x_nb = isOther = otherValue = nb_new = share = share_top = moderator_nb = xleft = xright = ybottom = xleft_real = xright_real = x_num = mid_point = NULL
    ytop_new = ybottom_new = share_top_cum = value_cum = ytop_cum = value = NULL

    check_arg(fml, "formula | vector mbt")

    # save full formula
    fml_in = fml

    # New Structure 2020/ARPIL:
    # model.method is now split in two:
    # - mod.method, equal to "side", "split", "stack"
    # - within and total: TRUE/FALSE
    # - all values are missing
    check_arg_plus(mod.method, "null match(side, split, stack)")

    check_arg_plus(top, "null match(frac, nb, none, FALSE)")
    if(!missnull(top) && top == "FALSE") top = "none"

    check_arg_plus(at_5, "NULL match(roman, lines, FALSE)")

    check_arg(moderator, "null vector na ok")
    check_arg(weight, "null numeric vector na ok")

    check_arg(sep, "null numeric scalar GE{0}")
    check_arg(nbins, "null integer scalar GT{0}")
    check_arg(col, "vector(integer, character) NA OK")
    check_arg(bin.size, "null numeric scalar GT{0}")
    check_arg(dict, "null named character vector | logical scalar")
    check_arg(mod.title, "scalar(character, logical)")
    check_arg(labels.angle, "numeric scalar")

    check_arg("null logical scalar", sorted, log, labels.tilted, centered, other, within, total, int.categorical)
    check_arg("logical scalar", yaxis.show, yaxis.num, cumul, mod.NA, plot)

    mc = match.call()

    # Argument whose names have changed

    dots = list(...)
    args_deprec = c("maxBins", "addOther", "onTop", "maxFirst", "toLog")
    if(any(args_deprec %in% names(dots))){
        if("maxBins" %in% names(dots)){
            if(is.null(getOption("fplot_warn_maxBins"))){
                warning("Argument 'maxBins' has been renamed. Please use 'nbins' instead.")
                options(fplot_warn_maxBins = TRUE)
            }
            nbins = dots$maxBins
        }
        if("addOther" %in% names(dots)){
            if(is.null(getOption("fplot_warn_addOther"))){
                warning("Argument 'addOther' has been renamed. Please use 'other' instead.")
                options(fplot_warn_addOther = TRUE)
            }
            other = dots$addOther
        }
        if("onTop" %in% names(dots)){
            if(is.null(getOption("fplot_warn_onTop"))){
                warning("Argument 'onTop' has been renamed. Please use 'top' instead.")
                options(fplot_warn_onTop = TRUE)
            }
            top = dots$onTop
        }
        if("maxFirst" %in% names(dots)){
            if(is.null(getOption("fplot_warn_maxFirst"))){
                warning("Argument 'maxFirst' has been renamed. Please use 'sorted' instead.")
                options(fplot_warn_maxFirst = TRUE)
            }
            sorted = dots$maxFirst
        }
        if("toLog" %in% names(dots)){
            if(is.null(getOption("fplot_warn_toLog"))){
                warning("Argument 'toLog' has been renamed. Please use 'log' instead.")
                options(fplot_warn_toLog = TRUE)
            }
            log = dots$toLog
        }
    }

    set_defaults("fplot_distr")

    # validate_dots(valid_args = c(args_deprec, names(par()), formalArgs(plot.default)), stop = TRUE)

    #
    # Extracting x and the moderator ####
    #

    USE_WEIGHT = MOD_IS_WEIGHT = FALSE
    x_name = ylab = weight_name = moderator_name = ""
    if("formula" %in% class(fml_in)){

        check_arg(data, "data.frame mbt", .message = "If you provide a formula, a data.frame must be given in the argument 'data'.")

        # we check the variables data are there
        check_arg(fml, "formula left(,1) right(,2) var(data)", .data = data, .message = "Argument 'fml' must be a formula of the type weights ~ value | moderator ('weights' and 'moderator' are optional).")

        # Creation of x and the condition
        fml = fml_in

        # 2020/APRIL
        # New Structure:
        # weight_1 + weight_2 + etc ~ value | moderator
        # You can use one sided formulas: ~value

        info = extract_pipe(fml_in)

        fml = info$fml
        lhs_fml = info$lhs_fml
        pipe_fml = info$pipe_fml

        # OLD
        # x = eval(fml[[2]], data)
        # moderator = eval(fml[[3]], data)
        # weight = extract_df(pipe_fml, data)

        # NEW
        x = eval(fml[[3]], data)
        moderator = eval(info$pipe, data)
        weight = extract_df(lhs_fml, data)


        if(length(x) == 1 || length(all.vars(fml[[3]])) == 0){
            stop("The formula must be of the type 'weight(s) ~ variable | moderator', and there must always be the 'variable' element, which is currently missing.")
        }

        if(info$lhs_fml == ~1){
            weight = rep(1, length(x))
        } else if(length(weight) > 1) {
            MOD_IS_WEIGHT = TRUE
            USE_WEIGHT = TRUE
            if(length(moderator) > 1){
                stop("You cannot use a moderator and multiple weights at the same time.")
                # Later: implement a par mf_row (or sorts) to allow the plotting of multi weights + multi moderators
            }

            if(missing(mod.title)) mod.title = FALSE

            # check
            for(i in 1:length(weight)){
                if(is.logical(weight[[i]])){
                    weight[[i]] = weight[[i]] * 1
                } else if(!is.numeric(weight[[i]])){
                    stop("All weights must be numeric, but variable '", names(weight)[i], "' isn't.")
                }
            }

            # We need to stack the data
            n = length(weight[[1]])
            all_mod_names = dict_apply(names(weight), dict)

            # we create a factor to keep the user's order
            moderator = factor(rep(all_mod_names, each = n), levels = all_mod_names)
            x = rep(x, length(weight))
            weight = unlist(weight)

            if(missing(mod.title)){
                mod.title = "Variables"
            }

            moderator_name = ""
            weight_name = paste(enumerate_items(all_mod_names))

        } else {
            USE_WEIGHT = TRUE
            weight = unlist(weight)
            # weight_name = gsub("^.*\\| *", "", deparse(fml_in[[3]]))
            weight_name = deparse(lhs_fml[[2]])
            check_value_plus(weight, "numeric conv vector na ok", .message = "The 'weight' must be a numeric vector.")
        }

        if(length(moderator) <= 1){
            moderator = rep(1, length(x))
        }

        # other info
        # x_name = deparse(fml[[2]])
        # if(!MOD_IS_WEIGHT) moderator_name = deparse(fml[[3]])
        x_name = deparse(fml[[3]])
        if(!MOD_IS_WEIGHT) moderator_name = deparse(info$pipe_fml[[2]])
    } else {
        x = fml_in

        # default no name when no formula => otherwise ugly when exporting
        x_name = clean_name(deparse(substitute(fml)))

        # We check the vector
        if(!checkVector(x)){

            if(length(class(x)) == 1 && class(x) == "table"){
                x = as.vector(x)
            } else {
                if(length(x) == 0){
                    reason = "Currently, the length of fml is 0."
                } else {
                    reason = paste0("Currently, fml is of class ", enumerate_items(class(x)), ".")
                }

                stop("Wrong argument in fml. It must be either a formula, either a vector. ", reason)
            }

        }

        # moderator
        if(!missnull(moderator)){
            if(length(x) != length(moderator)){
                stop("The arguments 'x' and 'moderator' must be of the same length.")
            }
            moderator_name = clean_name(deparse(substitute(moderator)))
        } else {
            moderator = rep(1, length(x))
        }

        # weight
        if(!missnull(weight)){
            check_arg(weight, "numeric vector len(data)", .data = x)
            USE_WEIGHT = TRUE
            weight_name = clean_name(deparse(substitute(weight)))
        } else {
            weight = rep(1, length(x))
        }
    }

    #
    # Parameters setting ####
    #

    if(is.logical(x)){
        x = as.factor(x)
    }

    # Renaming: dict
    x_name = dict_apply(x_name, dict)
    moderator_name = dict_apply(moderator_name, dict)
    weight_name = dict_apply(weight_name, dict)

    # Dropping NAs
    quiNA_x = is.na(x)
    quiNA_mod = is.na(moderator)
    quiNA_weight = is.na(weight)
    if(mod.NA){
        quiNA = quiNA_x | quiNA_weight
        total_NA = sum(quiNA | quiNA_mod)
        if(is.factor(moderator)){
            moderator = addNA(moderator)
        } else {
            # The following will coerce moderator into character
            moderator[is.na(moderator)] = "NA"
        }
    } else {
        quiNA = quiNA_x | quiNA_mod | quiNA_weight
        total_NA = sum(quiNA)
    }

    if(total_NA > 0){
        nb_na = c(sum(quiNA_x), sum(quiNA_mod), sum(quiNA_weight))
        msg_na = paste0(c("x: ", "moderator: ", "weight: "), nb_na)
        message("NOTE: ", total_NA, " observations with NAs (", enumerate_items(msg_na[nb_na>0]), ")")
        x = x[!quiNA]
        moderator = moderator[!quiNA]
        weight = weight[!quiNA]
    }

    # case of empty strings
    if(is.character(x)){
        qui_empty = x == ""
        if(any(qui_empty)){
            x[qui_empty] = "[empty]"
        }
    }

    # Dealing with the moderator
    if(is.factor(moderator)){
        moderator_unik = levels(moderator[drop = TRUE])
        if(anyNA(moderator_unik)){
            # This is such a pain in the neck...
            moderator_unik[is.na(moderator_unik)] = "NA"
            moderator = factor(unclass(moderator), labels = moderator_unik)
        }
    } else {
        moderator_unik = sunique(moderator)
    }

    if(cumul){
        if(!missnull(other) && other == TRUE) message("Argument 'other = TRUE' is ignored since the cumulative distribution is asked for.")
        other = FALSE
    }

    moderator_cases = length(moderator_unik)
    isLegend = FALSE
    USE_MOD = FALSE

    if(moderator_cases > 1){
        USE_MOD = TRUE
        isLegend = TRUE

        n_select = NA
        do_recreate = FALSE
        if(missing(mod.select)){
            if(moderator_cases <= 5){
                # FINE!
            } else {
                # We select the top 3
                n_select = 3
            }
        } else if(length(mod.select) > 5){
            stop("The argument 'mod.select' (currently of length ", length(mod.select), ") cannot be of a length greater than 5!")
        } else if(length(mod.select) == 1 && is.numeric(mod.select) && mod.select %% 1 == 0){
            # Possibly a number
            if(mod.select %in% 1:5){
                n_select = mod.select
            } else {
                stop("Argument 'mod.select' does not have a valid value: if it is an integer, its value must lie between 1 and 5.")
            }
        } else {
            # mod.select must contain moderator values

            # 1) checking the problems
            mod_pblm = setdiff(mod.select, moderator_unik)
            if(length(mod_pblm) > 0){
                # We recreate mod.select

                if(length(mod_pblm) != length(mod.select) && any(quiNA)){
                    # means some are OK
                    suggestion = " Maybe because it has only NA values?"
                } else {
                    suggestion = " It must be either a moderator value, or a regular expression to select moderator values."
                }

                # Maybe they're regular expressions => we keep the order desired by the user
                mod.ok = c()
                for(r in mod.select){
                    if(r %in% moderator_unik){
                        mod.ok = c(mod.ok, r)
                    } else {
                        qui = grepl(r, moderator_unik)
                        n_qui = sum(qui)
                        if(n_qui == 0){
                            stop("In the argument 'mod.select', the value ", r, " could not be found in the moderator variable.", suggestion)
                        } else if(n_qui > 5){
                            stop("In the argument 'mod.select', the value ", r, " leads to more than 5 moderator values to be selected (e.g. ", enumerate_items(moderator_unik[qui], nmax = 10), ".")
                        }

                        mod.ok = c(mod.ok, moderator_unik[qui])
                    }
                }

                mod.ok = unique(mod.ok)
                if(length(mod.ok) > 5){
                    stop("Argument mod.select leads to ", length(mod.ok), " moderator values to be selected. But the maximum is 5.")
                }

                mod.select = mod.ok

            }

            # 2) selection
            do_recreate = TRUE

        }

        # AUTOMATIC selection
        if(!is.na(n_select) && moderator_cases > n_select){

            # We proceed with the "automatic" selection

            if(USE_WEIGHT){
                info_mod = data.table(moderator = moderator, w = weight)
                info_mod = info_mod[, list(value = sum(w)), by = moderator]
                info_method = "the value of the weight."
            } else {
                info_mod = data.table(moderator = moderator)
                info_mod = info_mod[, list(value = .N), by = moderator]
                info_method = "frequency."
            }

            # We inform on the method for the choice
            message(ifelse(n_select == 1, "The moderator was", paste0("The ", n_select, " moderators were")), " chosen based on ", info_method)

            info_mod = info_mod[order(-value)]
            mod.select = info_mod[1:n_select, moderator]

            do_recreate = TRUE

            if(n_select == 1){
                USE_MOD = isLegend = FALSE
            }

        }

        if(mod.NA){
            if(missing(mod.select)){
                do_recreate = FALSE
            } else {
                mod.select = unique(c(mod.select, "NA"))
                do_recreate = TRUE
            }
        }

        if(do_recreate){
            # We recreate the variables

            qui_select = which(moderator %in% mod.select)
            # we recreate all the values
            x = x[qui_select]
            moderator = moderator[qui_select]
            weight = weight[qui_select]

            # Re-Dealing with the moderator
            if(is.factor(moderator)){
                moderator_unik = levels(moderator[drop = TRUE])
            } else {
                moderator_unik = sunique(moderator)
            }

            moderator_cases = length(moderator_unik)
        }

    }

    # if(moderator_cases > 5) {
    #     stop("The number of cases of the moderator cannot be greater than 5!")
    # } else if(moderator_cases > 1){
    #     USE_MOD = TRUE
    #     # we put the legend only if there is a condition
    #     isLegend = TRUE
    # }

    # mod.title
    if(isLegend){
        if(isTRUE(mod.title)){
            mod.title = moderator_name
        } else if(isFALSE(mod.title)){
            mod.title = NULL
        }
    }


    # separation of the x cases:
    if(missnull(sep)){
        sep = 0.20 + 0.15 * (moderator_cases-1)
    }

    # Setting up the data
    # Finding out whether log or not
    isNum = is.numeric(x) && !USE_WEIGHT # no sense to weight "real" numeric data

    # Default of log and sorted
    delayLogChecking = FALSE
    if(missnull(log)){
        if(!isNum || any(x < 0)){
            log = FALSE
        } else {
            log = FALSE
            if(diff(range(base::log(x[x>0]))) >= 3 && max(x) > 100){
                delayLogChecking = TRUE
            }
        }
    } else if(log && !isNum){
        warning("Since 'x' is not numeric, argument 'log' is ignored.")
        log = FALSE
    }

    isInteger = NA
    if(missnull(sorted)){
        if(!isNum){
            sorted = TRUE
        } else if(log){
            sorted = FALSE
        } else {
            isInteger = all(x %% 1 == 0)
            if(isInteger){
                sorted = FALSE
            } else {
                sorted = FALSE
            }
        }
    }

    IS_MAXBIN_USER = TRUE
    if(missnull(nbins)){
        IS_MAXBIN_USER = FALSE
        bin_fit_all = c(20, 11, 8, 6, 5)
        bin_max_all = c(15, 9, 7, 5, 4)

        nb = ifelse(!missnull(mod.method) && mod.method == "stack", 1, moderator_cases)

        nbins = bin_fit_all[nb]
        if(length(unique(x)) > nbins){
            nbins = bin_max_all[nb]
        }
    }

    if(log){
        # Regle: la valeur d'une colonne est: superieur ou egal a la valeur gauche et strictement inferieurs a la valeur droite
        x = pmax(floor(base::log(x + 1e-6)), -1)
    }

    #
    # Binning ####
    #

    numAxis = FALSE
    binned_data = FALSE
    maxBins_old = nbins # used with delayLogChecking
    if(isNum && !log){
        if(!missnull(bin.size)){
            if(!missnull(int.categorical) && int.categorical && all(x %% 1 == 0)){
                isInteger = TRUE
            } else {
                x = (x %/% bin.size) * bin.size
                binned_data = TRUE
                if(!sorted){
                    numAxis = TRUE
                    if(IS_MAXBIN_USER == FALSE){
                        # Since it's a numeric axis, we put a LOT of bins
                        # It is a maximum anyway
                        nbins = 50
                    }
                }
            }

        } else {
            if(is.na(isInteger)){
                isInteger = all(x %% 1 == 0)
            }

            goNum = TRUE
            if(!missnull(int.categorical)){
                # we force integers as categorical
                if(int.categorical == TRUE) goNum = FALSE
                # if int.categorical == FALSE, then we force it as numeric
            } else if(isInteger && diff(range(x)) < 1000){
                # for integers with small range, we don't consider them
                # as numeric
                goNum = FALSE
            }

            if(goNum){

                if(!sorted){
                    numAxis = TRUE
                }

                if(IS_MAXBIN_USER == FALSE){
                    # we reset the number of bins: specific to the numeric case
                    nbins = bin_max_all[1]
                }

                binned_data = TRUE

                # we try to find a "good" bin size
                x_min = min(x)
                x_max = max(x)
                bin.size = signif((x_max - x_min) / min(max(nbins - 1, 10), 15), 1)
                x_tmp = (x %/% bin.size) * bin.size
                tx = ttable(x_tmp)
                if(sum(tx/sum(tx) > 0.03) < 6){
                    # Condition: nber of bins with more than 3% is lower than 6

                    if(delayLogChecking){
                        # Would it be better looking in logarithmic form?
                        x_ln = pmax(floor(base::log(x + 1e-6)), -1)
                        tx_ln = ttable(x_ln)
                        if(sum(tx_ln/sum(tx_ln) > 0.05) >= 5){
                            # nber of signif bins greater than 5
                            log = TRUE
                            x = x_ln
                            numAxis = FALSE
                            binned_data = FALSE
                            nbins = maxBins_old
                        }

                    }
                }

                if(!log){
                    if(sum(tx/sum(tx) > 0.01) < 6){
                        # Condition: nber of bins with more than 1% is lower than 6
                        # More complex binning:
                        # We try to find the "optimal" number of bins

                        n_bins = nbins
                        q001 = quantile(x, seq(0, 1, by = 1 / 40))
                        n_q = length(q001)

                        s_vect = c()
                        score_total_vect = c()
                        score_range_vect = c()
                        score_fullness_vect = c()

                        for(i in 1:20){

                            if(i == 1){
                                s = signif(diff(range(x)) / n_bins, 1)
                            } else {
                                s = signif(s / 1.5, 1)
                            }

                            value_range = integer(n_bins)
                            for(k in 1:n_q){
                                s_start = q001[k]
                                value_range[k] = sum(q001 >= s_start & q001 <= s_start + n_bins*s)
                                if(value_range[k] == length(q001)) break
                            }
                            s_start = q001[which.max(value_range)]

                            missing_range = 1 - sum(q001 >= s_start & q001 <= s_start + n_bins*s)/n_q
                            score_fullness = length(unique(na.omit(cut(q001, seq(s_start, by = s, length.out = n_bins + 1), include.lowest = TRUE))))/ n_bins

                            # Save
                            s_vect[i] = s
                            score_total_vect[i] = score_fullness/2 - 2*missing_range

                            if(missing_range > 0.4) break
                        }

                        bin.size = s_vect[which.max(score_total_vect)]
                        x = (x %/% bin.size) * bin.size
                    } else {
                        x = x_tmp
                    }

                }
            }

        }

    }

    #
    # Default values for mod.method
    #

    # Splitting decision
    if(missnull(mod.method)){
        if(log || numAxis){
            mod.method = "side"
        } else {
            mod.method = "split"
        }
    }

    if(!missnull(total)){
        if(!missnull(within)){
            stop("You cannot provide the values of both arguments 'within' and 'total' at the same time.")
        }
        # we only use the variable 'within' in the code
        within = !total
    } else if(!missnull(within) && within && mod.method == "stack"){
        stop("You cannot use within=TRUE when mod.method='stack'.")
    } else if(missnull(within)){
        within = TRUE
    }

    DO_STACK = FALSE
    if(mod.method == "stack"){

        if(cumul == TRUE){
            stop("You cannot use the argument cumul=TRUE with mod.method='stack'.")
            # I should implement it later, it might be useful in some situations (non overlapping cases)
        }

        DO_STACK = TRUE
        mod.method = "side"
        within = FALSE
    }

    DO_SPLIT = FALSE
    checkNotTilted = FALSE
    delayLabelsTilted = missnull(labels.tilted)
    if(moderator_cases > 1 && mod.method == "split"){
        DO_SPLIT = TRUE
        if(missnull(labels.tilted)){
            labels.tilted = TRUE
            checkNotTilted = TRUE
        }

        # We don't add the other column only when the data is split
        # and it is not a numeric axis or log axis
        if(missnull(other) && !log && !numAxis){
            other = FALSE
        }
    }

    if(missnull(other)){
        other = TRUE
    }

    # yaxis.num
    if(missnull(yaxis.num)){
        if(USE_WEIGHT == FALSE || missing(weight.fun) || (moderator_cases > 1 && within)){
            yaxis.num = FALSE
        } else {
            yaxis.num = TRUE
        }
    } else if(yaxis.num == TRUE && moderator_cases > 1 && within && !MOD_IS_WEIGHT){
        stop("Argument yaxis.num: You cannot have a numeric y-axis when the 'within' distributions among moderators should be reported (because the data is rescaled as frequencies first). Use total=TRUE (or within=FALSE) to display a numeric y-axis.")
    }

    checkForTilting = FALSE
    if(missnull(labels.tilted)){
        labels.tilted = FALSE
        # We check if it's better for tilting only in the regular case
        checkForTilting = TRUE
        # NOTE that we later update the value of checkForTilting
        # because we don't want to do it all the time (but we first need more information)
        # just search the next occurrence of checkForTilting
    }

    # The color
    if(missnull(col)){
        if(moderator_cases == 1){
            col = "#1F78B4"
        } else if(DO_SPLIT){
            col = "set1"
        } else {
            col = c("#1F78B4", "#A6CEE3", "#33A02C", "#B2DF8A", "#E31A1C", "#FB9A99", "#FF7F00", "#FDBF6F")
        }
    }

    if(length(col) == 1 && is.character(col)){
        # col_match = try(match.arg(tolower(col), c("set1", "paired")), silent = TRUE)
        # if(col_match == "set1"){
        #     col = c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999")
        # } else if(col_match == "paired"){
        #     col = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00")
        # }
        check_value_plus(col, "match(set1, paired) | character scalar")
        if(col == "set1"){
            col = c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999")
        } else if(col == "paired"){
            col = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00")
        }
    }

    numLabel = FALSE
    if(moderator_cases >= 2 && numAxis){

        if(DO_SPLIT){
            numLabel = TRUE
        }

        sep = 0
    }

    # for table elements
    if(is.table(x)){
        x = as.vector(x)
    }

    X_FACTOR = FALSE
    if(is.factor(x)){
        X_FACTOR = TRUE
        # we keep the order of x when factors
        x = x[drop = TRUE]
        x_fact_names = levels(x)
        x = unclass(x)
        # => we rename x later
    }

    #
    # Computing data_freq ####
    #

    ADD_OTHER = ADD_OTHER_LEFT = CUT_BAR = FALSE

    if(DO_SPLIT == FALSE){

        if(USE_WEIGHT){
            raw_data = data.table(x = x, weight = weight, moderator = moderator)
            if(missnull(weight.fun)){
                data_freq = raw_data[, list(value = sum(weight)), keyby = list(x, moderator)]
            } else {
                if(!is.function(weight.fun)){
                    stop("Argument 'weight.fun' must be a function.")
                } else if(length(weight.fun(head(weight))) != 1){
                    stop("Function 'weight.fun' must be return a value of length one.")
                }

                data_freq = raw_data[, list(value = weight.fun(weight)), keyby = list(x, moderator)]
            }

        } else {
            raw_data = data.table(x = x, moderator = moderator)
            data_freq = raw_data[, list(value = .N), keyby = list(x, moderator)]
        }

        if(USE_MOD && mod.method == "side" && within){
            data_freq[, "total_moderator" := list(sum(value)), by = moderator]
            data_freq[, "share" := list(value/total_moderator*100)]
        } else {
            total = sum(data_freq$value)
            data_freq[, "share" := list(value/total*100)]
        }

        # We balance the panel for log data
        if(log && sorted == FALSE){
            x_all = min(data_freq$x):max(data_freq$x)
            data_all = data.table(expand.grid(x = x_all, moderator = moderator_unik))
            tmp = merge(data_all, data_freq, all.x = TRUE, by = c("x", "moderator"))
            tmp[is.na(tmp)] = 0
            data_freq = tmp

        }


        # The sorting
        if(sorted){

            if(USE_MOD){
                data_freq[, "total_x" := list(sum(value)), by = x]
                # data_freq = data_freq[order(-total_x, moderator)]
                # setorderv(data_freq, c("total_x", "moderator", "x"), c(-1, 1, 1))
                setorderv(data_freq, c("total_x", "x"), c(-1, 1))
            } else {
                # data_freq = data_freq[order(-value)]
                setorderv(data_freq, "value", -1)
            }
            data_freq[, "x_nb" := list(rleid(x))]

        } else {
            data_freq = data_freq[order(x, moderator)]
            data_freq[, x_nb := quickUnclassFactor(x)]
        }

        # the "other" column
        data_freq[, isOther := FALSE]
        data_freq[, otherValue := ""]


        # if we center on the mode
        if(centered && any(data_freq$x_nb > nbins) && isNum){
            # we center the distribution
            data_freq[, nb_new := x_nb - which.max(value)]
            data_freq[, nb_new := nb_new + max(min(ceiling(nbins/2), abs(min(nb_new)) + 1), nbins - max(nb_new))]
            data_freq[, x_nb := nb_new]
            data_freq[, nb_new := NULL]

            if(other && sorted == FALSE && any(data_freq$x_nb <= 0)){
                if(!any(data_freq$x_nb < 0)){
                    # we just add that bin
                    data_freq[, x_nb := x_nb + 1]
                    nbins = nbins + 1
                } else {
                    ADD_OTHER_LEFT = TRUE
                    # We create a bin on the left
                    data_other_left = data_freq[x_nb <= 0]
                    data_other_left = data_other_left[, list(value = sum(value), share_top = sum(share)), by = moderator]
                    tmp = as.data.table(expand.grid(moderator = data_other_left$moderator))
                    other_col = merge(data_other_left, tmp)
                    other_col[is.na(other_col)] = 0
                    other_col[, x_nb := 0]
                    # we limit the size of this last column
                    share_max = max(data_freq[x_nb %in% 1:nbins, share])
                    # r = 1 + rank(other_col$value, ties.method = "max") / moderator_cases / 10
                    # other_col[, share := pmin(share_top, r * share_max)]
                    other_col[, share := pmin(share_top, 1.1 * share_max)]

                    if(any(other_col$share < other_col$share_top)){
                        CUT_BAR = TRUE
                    }

                    # we merge the information
                    data_freq = rbindDS(other_col, data_freq[x_nb > 0])
                    qui_NA = is.na(data_freq$share_top)
                    data_freq$share_top[qui_NA] = data_freq$share[qui_NA]

                    # Other text => depends on the type of values
                    data_freq[x_nb == 0, "isOther" := TRUE]
                    if(log){
                        x_first = ceiling(exp(data_freq[x_nb == 1, x][1]))
                        otherTextLeft = substitute(phantom()<x_val, list(x_val = formatAxisValue(x_first)))
                        data_freq[x_nb == 0, "otherValue" := paste("<", formatAxisValue(x_first))]
                    } else {
                        if(binned_data == FALSE){
                            x_other = data_freq[x_nb == 1, x][1]
                            otherTextLeft = substitute(phantom()<x_val, list(x_val = formatAxisValue(x_other)))
                            data_freq[x_nb == 0, "otherValue" := paste("<", formatAxisValue(x_other))]
                        } else {
                            # we update the value
                            x_other = data_freq[x_nb == 1, x][1]
                            data_freq[x_nb == 0, x := x_other - bin.size]
                            otherTextLeft = substitute(phantom()<x_other, list(x_other = formatAxisValue(x_other)))
                            data_freq[x_nb == 0, "otherValue" := paste("<", formatAxisValue(x_other))]
                        }

                    }
                }
            } else {
                # We clean negative numbers
                data_freq = data_freq[x_nb > 0]
            }

        }

        # The "other" column
        if(other && max(data_freq$x_nb) == nbins + 1){
            # we just add the bin
            nbins = nbins + 1
            data_freq = data_freq[x_nb <= nbins]
        } else if(other && any(data_freq$x_nb > nbins)){
            # we create the other colum as the sum of the rest
            ADD_OTHER = TRUE
            data_other = data_freq[x_nb > nbins]
            data_other = data_other[, list(value = sum(value), share_top = sum(share)), by = moderator]
            tmp = as.data.table(expand.grid(moderator = data_other$moderator))
            other_col = merge(data_other, tmp)
            other_col[is.na(other_col)] = 0
            other_col[, x_nb := nbins + 1]
            # we limit the size of this last column
            share_max = max(data_freq[x_nb %in% 1:nbins, share])

            # r = 1 + rank(other_col$value, ties.method = "max") / moderator_cases / 10
            # other_col[, share := pmin(share_top, r * share_max)]
            other_col[, share := pmin(share_top, 1.1 * share_max)]

            if(any(other_col$share < other_col$share_top)){
                CUT_BAR = TRUE
            }

            # we merge the information
            data_freq = rbindDS(data_freq[x_nb <= nbins], other_col)
            qui_NA = is.na(data_freq$share_top)
            data_freq$share_top[qui_NA] = data_freq$share[qui_NA]

            # Other text => depends on the type of values
            data_freq[x_nb == nbins + 1, "isOther" := TRUE]
            if(!isNum || sorted){
                otherText = "Other"
                data_freq[x_nb == nbins + 1, "otherValue" := "Other"]
            } else {
                if(log){
                    x_last = round(exp(data_freq[x_nb == nbins, x] + 1))
                    otherText = substitute(phantom()>x_val, list(x_val = formatAxisValue(x_last)))
                    data_freq[x_nb == nbins + 1, "otherValue" := paste(">", formatAxisValue(x_last))]
                } else {
                    if(binned_data == FALSE){
                        x_value = data_freq[x_nb == nbins, x]
                        otherText = substitute(phantom()>x_val, list(x_val = formatAxisValue(x_value)))
                        data_freq[x_nb == nbins + 1, "otherValue" := paste(">", formatAxisValue(x_value))]
                    } else {
                        x_other = data_freq[x_nb == nbins, x][1] + bin.size
                        data_freq[x_nb == nbins + 1, x := x_other]
                        otherText = substitute(phantom()>=x_other, list(x_other = formatAxisValue(x_other)))
                        data_freq[x_nb == nbins + 1, "otherValue" := paste(">", formatAxisValue(x_other))]
                    }

                }
            }

        } else {
            data_freq = data_freq[x_nb <= nbins]
        }



        if(!"share_top" %in% names(data_freq)){
            data_freq[, share_top := share]
        }

        x_nb_all = sort(unique(data_freq$x_nb))
        x_cases = length(x_nb_all)

        # We add the 0s
        if(nrow(data_freq) != x_cases*moderator_cases){
            # we need to add the 0s
            data_all = data.table(expand.grid(x_nb = x_nb_all, moderator = moderator_unik))
            setkey(data_all, x_nb, moderator)
            setkey(data_freq, x_nb, moderator)

            tmp = merge(data_all, data_freq, all.x = TRUE)
            tmp[, c("x", "isOther", "otherValue") := NULL]

            # Merging additional info
            base2merge = unique(data_freq[, list(x, x_nb, isOther, otherValue)])
            quoi = merge(tmp, base2merge, by = "x_nb")
            tmp = quoi[order(x_nb, moderator)]

            # setting to 0
            tmp[is.na(tmp)] = 0
            data_freq = tmp
        }


        # moderator_nb
        if(is.factor(moderator)){
            data_freq[, moderator_nb := as.numeric(unclass(moderator[drop = TRUE]))]
        } else {
            data_freq[, moderator_nb := quickUnclassFactor(moderator)]
        }

        # The coordinates
        if(numAxis){
            data_freq[, xleft := x + bin.size/moderator_cases * (moderator_nb - 1)]
            data_freq[, xright := x + bin.size/moderator_cases * moderator_nb]
        } else {
            data_freq[, xleft := (x_nb-1)*(moderator_cases+sep) + moderator_nb - 1]
            data_freq[, xright := (x_nb-1)*(moderator_cases+sep) + moderator_nb]
        }

        xlim = range_plus(c(data_freq$xleft, data_freq$xright), 5)

        data_freq[, ybottom := 0]
        data_freq[, ytop := share]

        if(X_FACTOR){
            # we reintroduce the values of x
            # Beware: "Other" can be either 0 (when side/split) // or NA (without moderator)

            x_value = rep(NA, nrow(data_freq))
            if(anyNA(data_freq$x)){
                qui_ok = !is.na(data_freq$x)
            } else {
                qui_ok = !data_freq$x == 0
            }

            x_value[qui_ok] = x_fact_names[data_freq$x[qui_ok]]
            data_freq[, x := x_value]
        }

    } else if(DO_SPLIT == TRUE){

        #
        # With SPLIT ####
        #

        # We apply a split to the data
        info = list()
        x_add = 0

        for(i in seq_along(moderator_unik)){
            # we apply plot_distr to subsets

            qui = moderator == moderator_unik[i]

            if(USE_WEIGHT){
                data_freq = plot_distr(x[qui], moderator = moderator[qui], weight = weight[qui], sorted = sorted, nbins = nbins, log = FALSE, other = other, plot = FALSE, bin.size = bin.size, int.categorical = !numAxis)
            } else {
                data_freq = plot_distr(x[qui], moderator = moderator[qui], sorted = sorted, nbins = nbins, log = FALSE, other = other, plot = FALSE, bin.size = bin.size, int.categorical = !numAxis)
            }

            # updating the information

            if(numAxis){
                # We add the value of a bin, BUT start when the last stops

                # we save the "real" values
                data_freq[, xleft_real := xleft]
                data_freq[, xright_real := xright]

                # Now we update xleft/xright
                min_left = min(data_freq$xleft)
                x_add_next = x_add + max(data_freq$xright) - min_left
                data_freq[, xleft := xleft - min_left + x_add]
                data_freq[, xright := xright - min_left + x_add]

                x_add = x_add_next + bin.size
            } else {
                min_left = min(data_freq$xleft)
                x_add_next = x_add + max(data_freq$xright) - min_left
                data_freq[, xleft := xleft - min_left + x_add]
                data_freq[, xright := xright - min_left + x_add]

                x_add = x_add_next + 0.7
            }

            data_freq[, moderator_nb := i]

            info[[i]] = data_freq
        }

        info_all = rbindlist(info, use.names=TRUE)

        if(mod.method == "split" && within == FALSE){
            # SPLIT TOTAL
            total = sum(weight)
            info_all[, share := value/total*100]
            info_all[, share_top := share]
            info_all[, ytop := share]
        }

        if(binned_data && other){
            # we format the data properly
            info_all$x = formatAxisValue(info_all$x)
        }

        if(X_FACTOR){
            # we reintroduce the values of x
            info_all[, x := x_fact_names[x]]
        }

        # managing "other"
        # info_all[, isOther := is.na(x)]

        if(any(info_all$isOther)){
            # bounding bar size
            share_max = max(info_all[isOther == FALSE, share])
            info_all[isOther == TRUE, share := pmin(share_top, 1.1 * share_max)]
            info_all[, ytop := share]

            if(any(info_all$share < info_all$share_top)){
                CUT_BAR = TRUE
            }

            info_all$x = as.character(info_all$x)
            info_all$x[is.na(info_all$x)] = "Other"
        }

        data_freq = info_all

        # Some information
        xlim = range_plus(c(data_freq$xleft, data_freq$xright), 5)
    }

    #
    # Graph setting ####
    #


    if(DO_STACK == TRUE){
        # we reformat the data
        # return(data_freq)
        new_coords = data_freq[isOther == FALSE, list(x_nb, xleft, xright, ybottom, ytop)]

        new_coords[, c("xleft_new", "xright_new") := list(min(xleft), max(xright)), by = x_nb]
        new_coords[, ytop_new := cumsum(ytop), by = x_nb]
        new_coords[, ybottom_new := shift(ytop_new, n = 1, fill = 0), by = x_nb]

        data_freq[isOther == FALSE, c("xleft", "xright", "ybottom", "ytop") := list(new_coords$xleft_new, new_coords$xright_new, new_coords$ybottom_new, new_coords$ytop_new)]

        if(any(data_freq$isOther)){
            # new values top
            max_y = max(data_freq$ytop)
            data_freq[isOther == TRUE, ytop := pmin(1.1 * max_y, share_top)]
        }
        # browser()
    }

    # Cumulative
    if(cumul == TRUE){
        data_freq[, share_top_cum := cumsum(share_top), by = moderator]
        data_freq[, value_cum := cumsum(share), by = moderator]
        data_freq[, ytop_cum := cumsum(ytop), by = moderator]

        data_freq[, share_top := share_top_cum]
        data_freq[, value := value_cum]
        data_freq[, ytop := ytop_cum]
    }

    if(plot == FALSE){
        return(data_freq)
    }

    # Plot preparation

    # default for top
    if(missnull(top)){
        if(cumul){
            top = "frac"
        } else if(numAxis){
            if(any(data_freq$isOther)){
                top = "frac"
            } else {
                top = "none"
            }
        } else if(moderator_cases > 1 && mod.method == "split"){
            # when neck to neck => much easier to compare shares
            # otherwise it is confusing
            top = "frac"
        } else {
            # if coef of variation of x is low => better information is the number
            values = data_freq[share_top <= share, ytop]
            cv = sd(values) / mean(values)
            if(cv > 0.7){
                top = "frac"
            } else {
                top = "nb"
            }
        }
    }

    if(is.null(dots$ylim)){
        ylim = c(0, max(data_freq$ytop))
        ymax_grid = ylim[2]

        # hauteur_caractere_legend = 0
        total_height = 0
        if(isLegend){
            info_legend = legendFit(legend = moderator_unik, plot = FALSE, title = mod.title)
            total_height = info_legend$total_height
        }

        hauteur_caractere_top = 0
        if(top != "none"){

            # We create the information to display top

            if(DO_STACK == FALSE){
                # Normal case
                info_top = data_freq[, list(x_mid = (xleft+xright)/2, ytop, value, share_top)]
            } else if(DO_STACK == TRUE){
                info_top = data_freq[isOther == FALSE, list(x_mid = mean((xleft+xright)/2), ytop = max(ytop), value = sum(value), share_top = sum(share_top)), by = x_nb]
                info_top_other = data_freq[isOther == TRUE, list(x_mid = (xleft+xright)/2, ytop, value, share_top)]
            }


            # We find the optimal cex for the top numbers
            if(top == "nb"){
                top.value2display = formatAxisValue(info_top$value)
            } else {
                top.value2display = paste0(mysignif(info_top$share_top, d = 2, r = 0), "%")
                # We truncate => otherwise ugly
                # top.value2display = gsub("0.0.+", "0.0%", top.value2display)
                # Only one digit after "0."
                top.value2display = gsub("(?<=\\.[[:digit:]]).+", "%", top.value2display, perl = TRUE)
            }

            #
            # NOTE on R behavior with new windows (x11) that are resized
            #
            # The following code computes the optimal width of the number/fractions to be displayed on the top
            # of the bars.
            # However, when we use x11() and then resize the window, R is unable to find out the actual window size
            # Instead we are only able to access the original size.
            # Unfortunately, the drawbacks of the following solutions are too important:
            #   - plotting a "clean" plot, so that R knows the actual size of the window.
            #       * Drawback: if the user calls for several plots in the same window => we're screwed
            #   - making a "smart guess" by assuming that the new window will be larger
            #       * drawback: if it is not the case, it will be ugly (better too small than too big)

            # Getting the width of the bars in inches
            unitary_width = par("pin")[1] / usr_width(xlim)
            if(numAxis){
                tdx = table(data_freq$xright - data_freq$xleft)
                bar_w = as.numeric(names(tdx)[which.max(tdx)])
                unitary_width = unitary_width * bar_w
            }

            # When DO_STACK==TRUE and numAxis==FALSE:
            #   the width is equal to the nber of moderators
            unitary_width_top = unitary_width * (1 + (moderator_cases - 1) * DO_STACK * (!numAxis))

            top.cex = 1
            minCex = 0.5
            v_max = top.value2display[which.max(strwidth(top.value2display, units = "in"))]
            while(top.cex > minCex && strwidth(v_max, units = "in", cex = top.cex) > unitary_width_top*0.9){
                top.cex = top.cex * 0.95
            }

            # the height we need to add to the plot
            hauteur_caractere_top = strheight("W", "in", cex = top.cex) * 1.35 + strwidth("W", "in", cex = 1) * top.cex

            # If DO_STACK==TRUE: the "other" columns have another width
            if(DO_STACK == TRUE && nrow(info_top_other) > 0){

                if(top == "nb"){
                    top.value2display_other = formatAxisValue(info_top_other$value)
                } else {
                    top.value2display_other = paste0(mysignif(info_top_other$share_top, d = 2, r = 0), "%")
                }

                top.cex_other = 1
                minCex = 0.5
                v_max = top.value2display_other[which.max(strwidth(top.value2display_other, units = "in"))]
                while(top.cex_other > minCex && strwidth(v_max, units = "in", cex = top.cex_other) > unitary_width*0.9){
                    top.cex_other = top.cex_other * 0.95
                }

                hauteur_caractere_top_other = strheight("W", "in", cex = top.cex_other) * 1.35 + strwidth("W", "in", cex = 1) * top.cex_other

                # We are careful to add only the necessary height to the plot
                hauteur_caractere_top_usr = hauteur_caractere_top / par("pin")[2] * diff(ylim)
                hauteur_caractere_top_other_usr = hauteur_caractere_top_other / par("pin")[2] * diff(ylim)
                height_normal = max(info_top$ytop) + hauteur_caractere_top_usr
                height_other = max(info_top_other$ytop) + hauteur_caractere_top_other_usr

                hauteur_caractere_top = (max(height_normal, height_other) - ylim[2]) * par("pin")[2] / diff(ylim)
            }

        }

        # We transform the character heights into user format (produit en croix)
        add_usr = (hauteur_caractere_top + total_height) / par("pin")[2] * diff(ylim)
        ylim[2] = ylim[2] + add_usr

        dots$ylim = ylim
    }

    dots$xlim = xlim
    dots$axes = FALSE
    dots$x = dots$y = dots$col = 0

    # ylab
    prefix = ""
    if(cumul) prefix = "Cumulative "
    if(moderator_cases > 1){

        if(nchar(moderator_name) == 0){
            # text = switch(mod.method, sideWithin = paste0("% Within"), sideTotal = "% Total", splitWithin = paste0("% Within"), splitTotal = "% Total")
            text = ifelse(within, "% Within", "% Total")
        } else {
            # text = switch(mod.method, sideWithin = paste0("% Within (", moderator_name, ")"), sideTotal = "% Total", splitWithin = paste0("% Within (", moderator_name, ")"), splitTotal = "% Total")
            text = ifelse(within, paste0("% Within ", moderator_name, ""), "% Total")
        }


        if(USE_WEIGHT){
            if(yaxis.num == TRUE){
                ylab = paste0(prefix, weight_name)
            } else {
                if(MOD_IS_WEIGHT){
                    ylab = paste0("Distribution of ", weight_name)
                } else {
                    ylab = paste0("Distribution of ", weight_name, " (", prefix, text, ")")
                }
            }
        } else {
            if(yaxis.num == TRUE){
                ylab = paste0(prefix, "Frequency")
            } else {
                ylab = paste0(prefix, text)
            }
        }
    } else if(USE_WEIGHT){
        ylab = paste0(prefix, "Distribution of ", weight_name)
    } else if(cumul){
        ylab = "Cumulative %"
    }

    # xlab

    # user providing xlab?
    noXlab = is.null(dots$xlab)
    noSub = is.null(dots$sub)
    # if(!noXlab || isNum){
    #     # if data is numeric, we will want the x-label, even if tilted
    #     line.max = 1 + 1
    # } else if(!noSub){
    #     line.max = 2 + 1
    # } else {
    #     line.max = 3 + 1
    # }
    line.max = 3 + 1

    # Update of checkForTilting
    if(checkForTilting){
        # Only for the standard case
        checkForTilting = !numAxis & !log
    }

    # if(checkForTilting){
    #     # "at risk" of tilting => we don't show the name
    #     listDefault(dots, "xlab", "")
    # } else if(labels.tilted){
    #     # we don't show the name of the variable if lab is tilted
    #     # except if the data is numeric
    #
    #     if(isNum){
    #         listDefault(dots, "xlab", x_name)
    #     } else {
    #         listDefault(dots, "xlab", "")
    #     }
    #
    #     # listDefault(dots, "sub", x_name)
    # } else {
    #     listDefault(dots, "xlab", x_name)
    # }

    if("xlab" %in% names(dots)){
        xlab = dots$xlab
    } else {
        xlab = x_name
    }
    dots$xlab = ""

    if("sub" %in% names(dots)){
        sub = dots$sub
    } else {
        sub = ""
    }
    dots$sub = ""

    #
    # Margins setting ####
    #

    if(yaxis.show){
        # we get the "nice" points display
        # y_points = axis(2, lwd=0, col.axis = 0)
        y_points = pretty(c(data_freq$ytop, 0))

        # We don't go above 100%
        y_points = y_points[y_points <= 100]

        if(yaxis.num){
            # x1 = data_freq[x_nb == 1][1, ]
            x1 = data_freq[ytop > 0][1, ]
            y_labels = formatAxisValue(y_points / x1$ytop * x1$value)
        } else {
            y_labels = paste0(y_points, "%")
        }
    } else {
        y_labels = ""
    }

    if("ylab" %in% names(dots)){
        ylab.resize = FALSE # we don't resize user-provided ylab
        ylab = dots$ylab
    } else {
        ylab.resize = TRUE
    }
    dots$ylab = ""


    # listDefault(dots, "ylab", ylab)
    listDefault(dots, "yaxs", "i")

    mleft = find_margins_left(ylab, y_labels, ylab.resize = ylab.resize)
    # finding the bottom margin is a pain in the neck!!!
    mbot = find_margins_bottom(xlab = xlab, sub = sub, data_freq = data_freq, log = log, isNum = isNum, numLabel = numLabel, numAxis = numAxis, nbins = nbins, DO_SPLIT = DO_SPLIT, ADD_OTHER = ADD_OTHER, ADD_OTHER_LEFT = ADD_OTHER_LEFT, sorted = sorted, labels.tilted = labels.tilted, delayLabelsTilted = delayLabelsTilted, checkForTilting = checkForTilting, checkNotTilted = checkNotTilted, noSub = noSub, binned_data = binned_data, line.max = line.max, trunc = trunc, trunc.method = trunc.method, cex.axis = cex.axis, labels.angle = labels.angle, at_5 = at_5, xlim = xlim)

    if(mleft$total_width > par("mai")[2] || mbot$total_height > par("mai")[1]){
        new_mai = par("mai")

        if(mleft$total_width > par("mai")[2]){
            new_mai[2] = mleft$total_width + 0.05
        }

        if(mbot$total_height > par("mai")[1]){
            new_mai[1] = mbot$total_height + 0.05
        }

        op = par(mai = new_mai)
        on.exit(par(op))
    }

    do.call("plot", dots)

    # ylab
    title(ylab = mleft$ylab, line = mleft$ylab.line)

    hgrid(ymax = ymax_grid)

    rect(xleft = data_freq$xleft, ybottom = data_freq$ybottom, xright = data_freq$xright, ytop = data_freq$ytop, col = col[data_freq$moderator_nb], border = border)

    #
    # "cutting" the bar (if other is very big)
    #

    if(CUT_BAR){
        data_other = data_freq[share_top > share]
        ytop = data_other$ytop[1]
        x_span = (data_other$xright - data_other$xleft)[1] / diff(get_x_lim()) * diff(get_y_lim())
        y_span =  ytop * .03
        y_all = ytop - 2:1*x_span
        for(i in 1:nrow(data_other)){
            shade_area(y_all, y_all - y_span, x = c(data_other$xleft[i], data_other$xright[i]), col = "white", border = "white")
        }
    }

    #
    # x-labels ####
    #

    # Information on plot
    at_info = data_freq[, list(mid_point = (max(xright) + min(xleft)) / 2), by = list(x_nb, x)]
    myat = at_info$mid_point

    if(log){

        if(DO_SPLIT){

            data_freq_valid = data_freq[isOther == FALSE, ]
            data_freq_valid[, x_num := as.numeric(x)]
            x_all = data_freq_valid$x_num
            exp_value = ceiling(exp(x_all))
            exp_value[x_all == -1] = 0

            if(sorted){

                if(delayLabelsTilted){
                    # better display
                    labels.tilted = TRUE
                }

                myat = data_freq_valid[, (xleft+xright)/2]

                exp_value_right = ceiling(exp(x_all + 1))

                # Formatting
                exp_value_format = formatAxisValue(exp_value)
                exp_value_right_format = formatAxisValue(exp_value_right)
                label_displayed = paste0("[", exp_value_format, "; ", exp_value_right_format, ")")

                # finding the location
                if(labels.tilted){
                    info_tilt = xaxis_biased(at = myat, line.max = line.max, labels = label_displayed)
                } else {
                    xaxis_labels(at = myat, labels = label_displayed)
                }

            } else {
                # we draw the axes with nice display

                moreLine = any(data_freq$isOther) * .25

                # Displaying the ticks "all at once" (including the last one)
                mysep = (data_freq$xleft[2] - data_freq$xright[1]) / 2

                exp_value_right = ceiling(exp(x_all + 1))

                # on the right
                myat = data_freq_valid$xright + mysep

                # We add the first tick on the left
                data_first = data_freq_valid[x_nb == 1]
                myat = c(data_first$xleft - mysep, myat)
                # exp_value = c(ceiling(exp(data_first$x_num - 1)), exp_value)
                first_val = ceiling(exp(data_first$x_num - 1))
                first_val[data_first$x_num == -1] = 0
                exp_value = c(first_val, exp_value_right)

                exp_value_format = formatAxisValue(exp_value)

                # tick location

                # 1) the ticks
                axis(1, at = myat, labels = NA, lwd.ticks = 1, lwd = 0, line = moreLine)

                # 2) The labels
                # Tilted labels not implemented for this axis
                if(delayLabelsTilted){
                    if(strwidth(paste0(exp_value_format, collapse = "  ")) / diff(get_x_lim()) > 0.9){
                        labels.tilted = TRUE
                    } else {
                        labels.tilted = FALSE
                    }

                }

                if(labels.tilted){
                    xaxis_biased(at = myat, labels = exp_value_format, yadj = 2, angle = 25)
                } else {
                    axis(1, at = myat, labels = exp_value_format, line = moreLine, lwd = 0)
                }


                # for extra ticking when info is ambiguous
                tck_location = par("usr")[3] - strheight("W")*81/100
                qui = which(exp_value < 10)
                if(length(qui) > 0){
                    # tick location
                    axis(2, at = tck_location - moreLine, pos = myat[qui], labels = NA, tcl = 0.15, xpd = TRUE)
                }

            }

            # Now the "other" ticks
            if(any(data_freq$isOther)){
                data_freq_other = data_freq[isOther == TRUE]

                if(sorted){
                    text2show = rep("Other", nrow(data_freq_other))
                    my_font = 1
                } else {
                    text2show = rep(">", nrow(data_freq_other))
                    text2show[grepl("<", data_freq_other$otherValue)] = "<"
                    my_font = 2
                }

                myat = data_freq_other[, (xleft + xright)/2]

                if(sorted && labels.tilted){
                    xaxis_biased(at = myat, line.max = line.max, labels = text2show, angle = info_tilt$angle, cex = info_tilt$cex)
                } else {
                    axis(1, at = myat, labels = text2show, lwd = 0, line = -0.8, font = my_font)
                }


            }

        } else {
            #
            # formatting of the values
            #

            x_unik = at_info[x_nb %in% 1:nbins, x]
            x_cases = length(x_unik)
            myat = at_info[x_nb %in% 1:nbins, mid_point]
            exp_value = ceiling(exp(x_unik))
            exp_value[x_unik == -1] = 0

            if(is.unsorted(x_unik) || x_cases == 1 || any(diff(x_unik) != 1)){
                exp_value_right = ceiling(exp(x_unik + 1))

                # Formatting
                exp_value_format = substr(exp_value, 1, 7)
                exp_value_right_format = substr(exp_value_right, 1, 7)

                # finding the location
                location = xaxis_labels(at = myat, labels = paste0("[", exp_value_format, "; ", exp_value_right_format, "["), only.params = TRUE)

                # drawing
                for(i in 1:length(x_unik)){

                    value = substitute(group("[",list(x1, x2),")"), list(x1 = formatAxisValue(exp_value[i]), x2 = formatAxisValue(exp_value_right[i])))

                    axis(1, at = myat[i], lwd = 0, labels = value, cex = location$cex, line = 1 + location$line[i])
                }


            } else {
                # we draw the axes with nice display

                moreLine = (ADD_OTHER || ADD_OTHER_LEFT) * .25

                tck_location = par("usr")[3] - strheight("W")*81/100

                # Displaying the ticks "all at once" (including the last one)
                val = c(exp_value, ceiling(exp(tail(x_unik, 1) + 1)))
                exp_value_format = formatAxisValue(val)


                # tick location
                loc = (1:length(val)-1)*(moderator_cases+sep) - sep/2
                axis(1, at = loc, labels = exp_value_format, line = moreLine, lwd.ticks = 1, lwd = 0)

                for(i in 1:x_cases){
                    # tick location
                    loc = (i-1)*(moderator_cases+sep) - sep/2
                    # Ticks showing "strictly" inferior
                    if(x_unik[1] < 2){
                        axis(2, at = tck_location - moreLine, pos = loc, labels = NA, tcl = 0.15, xpd = TRUE)
                    }

                }

            }

            # We find the right cex to fit the "other" group
            unitary_width = par("pin")[1] / usr_width(xlim)
            too_large = function(text, the_cex, the_shift) strwidth(text, units = "in", cex = the_cex)/2 - the_shift*unitary_width > unitary_width*(1/2+sep/2*1.5)

            if(ADD_OTHER){

                axis(1, at = at_info[x_nb == nbins + 1, mid_point], line = -0.8, labels = ">", lwd = 0, font = 2)

            }

            if(ADD_OTHER_LEFT){

                axis(1, at = at_info[x_nb == 0, mid_point], line = -0.8, labels = "<", lwd = 0, font = 2)

            }
        }

    } else if(numLabel){
        # moderator > 1 + split + numeric axis

        current_lim = get_x_lim()

        for(i in 1:moderator_cases){
            x_sub = data_freq[moderator == moderator_unik[i]]

            real_range = c(min(x_sub$xleft_real), max(x_sub$xright_real))
            x_show = pretty(real_range, 3)
            x_show = x_show[x_show >= real_range[1] & x_show <= real_range[2]]
            current_range = c(min(x_sub$xleft), max(x_sub$xright))
            x_show_current = to01(x_show, real_range) * diff(current_range) + min(current_range)
            axis(1, x_show_current, formatAxisValue(x_show), lwd = 0, lwd.ticks = 1)
            axis(1, current_range, NA, lwd = 1, lwd.ticks = 0)
        }

        # We add the bin information
        if(noSub){
            title(sub = substitute(paste("Bin size", phantom()==b), list(b = formatAxisValue(bin.size))), cex.sub = 0.9, line = mbot$sub.line)
            sub = ""
        }

        # now the Other
        if(any(data_freq$isOther)){
            data_freq_other = data_freq[isOther == TRUE]

            # we get the right cex
            w = data_freq_other[1, xright - xleft]
            max_lab = data_freq_other$otherValue[which.max(strwidth(data_freq_other$otherValue))]

            minCex = 0.7
            myCex = 1
            while(myCex > minCex && strwidth(max_lab, cex = myCex) > w){
                myCex = myCex * 0.95
            }

            axis(1, at = data_freq_other[, (xleft + xright)/2], labels = data_freq_other$otherValue, lwd = 0, line = -0.8, cex.axis = myCex)
        }


    } else if(numAxis){

        # We add the bin information
        if(noSub){
            title(sub = substitute(paste("Bin size", phantom()==b), list(b = formatAxisValue(bin.size))), cex.sub = 0.9, line = mbot$sub.line)
            sub = ""
        }

        myBox(1)

        axis_at = axis(1, lwd = 0, labels = NA)
        if(ADD_OTHER){
            # the axis without last tick
            at = axis_at[1:(length(axis_at) - 1)]

            if(abs(bin.size) > 1e4){
                axis(1, at, formatAxisValue(at))
            } else {
                axis(1, at)
            }


            # the "other" text
            axis(1, at = at_info[x_nb == nbins + 1, mid_point], line = -1, labels = otherText, lwd = 0)
        } else {
            if(abs(bin.size) > 1e4){
                axis(1, axis_at, formatAxisValue(axis_at))
            } else {
                axis(1, axis_at)
            }
        }

        if(ADD_OTHER_LEFT){
            # the "other" text
            axis(1, at = at_info[x_nb == 0, mid_point], line = -1, labels = otherTextLeft, lwd = 0)
        }

        # ticks at the right place
        if(moderator_cases > 1){
            # we add the rectangles to replicate a histogram
            # axis(1, at = data_freq[moderator_nb == 1, xleft], labels = NA, lwd = 0, lwd.ticks = 1, tcl = -0.45)
            rect_info = data_freq[, list(xleft = min(xleft), xright = max(xright), ytop = max(ytop)), by = x_nb]
            rect(xleft = rect_info$xleft, ybottom = 0, xright = rect_info$xright, ytop = rect_info$ytop, density = 0, lty = 2)
        }

    } else if(DO_SPLIT){
        # we need to display all xs

        # We add the bin information => specific case: max first + numeric data
        if(binned_data && noSub){
            title(sub = substitute(paste("Bin size", phantom()==b), list(b = formatAxisValue(bin.size))), cex.sub = 0.9, line = mbot$sub.line)
            sub = ""
        }

        data_freq[, mid_point := (xleft + xright) / 2]
        myLabels = data_freq$x
        myAt = data_freq$mid_point


        if(checkNotTilted){
            # If very short labels => we don't tilt them // allows to reintroduce xlab

            axis_info = xaxis_labels(at = myAt, labels = myLabels, trunc = trunc, trunc.method = trunc.method, only.params = TRUE)
            if(length(unique(axis_info$line)) == 1){
                labels.tilted = FALSE
            } else {
                labels.tilted = TRUE
            }
        }

        if(labels.tilted){
            xaxis_biased(at = myAt, labels = myLabels, angle=labels.angle, cex = cex.axis, trunc = trunc, trunc.method = trunc.method, line.max = line.max)
        } else {
            xaxis_labels(at = myAt, labels = myLabels, trunc = trunc, trunc.method = trunc.method)
        }


    } else if(isNum){
        # we can have the "other" column both left and right

        x_unik = at_info[x_nb %in% 1:nbins, x]
        myLabels = x_unik
        myAt = at_info[x_nb %in% 1:nbins, mid_point]

        info_axis = NULL
        if(labels.tilted == FALSE && mean(diff(x_unik)) == 1){
            # This is a "normal" axis
            # everything number follows, this is fine

            # # we still add the xlab we removed before (we could not anticipate the result)
            # if(checkForTilting){
            #     if(noXlab) title(xlab = x_name)
            # }

            axis(1, myAt, labels = myLabels)
        } else {
            if(checkForTilting){
                # If normal axis does not fit => tilt
                axis_info = xaxis_labels(at = myAt, labels = myLabels, trunc = trunc, trunc.method = trunc.method, only.params = TRUE)
                if(axis_info$failed){
                    labels.tilted = TRUE
                } else {
                    labels.tilted = FALSE
                    # # we add the label only if the user didn't provide it before
                    # if(noXlab) title(xlab = x_name)
                }
            }

            if(labels.tilted){
                info_axis = xaxis_biased(at = myAt, labels = myLabels, angle=labels.angle, cex = cex.axis, trunc = trunc, trunc.method = trunc.method, line.max = line.max)
            } else {
                info_axis = xaxis_labels(at = myAt, labels = myLabels, trunc = trunc, trunc.method = trunc.method, line.max = line.max)
            }
        }


        if(ADD_OTHER){
            # the "other" text

            if(is.null(info_axis)){
                axis(1, at = at_info[x_nb == nbins + 1, mid_point] + strwidth("> "), line = -0.85, labels = otherText, lwd = 0, cex.axis = 1)
            } else if(labels.tilted){
                xaxis_biased(at = at_info[x_nb == nbins + 1, mid_point], labels = otherText, angle = info_axis$angle, cex = info_axis$cex, trunc = trunc, trunc.method = trunc.method)
            } else {
                all_lines = sort(unique(info_axis$line))
                my_line = min(info_axis$line[info_axis$line != tail(info_axis$line, 1)])
                axis(1, at = at_info[x_nb == nbins + 1, mid_point] + strwidth("> ", cex = info_axis$cex), line = my_line, labels = otherText, lwd = 0, cex.axis = info_axis$cex)
            }
        }

        if(ADD_OTHER_LEFT){
            # the "other" text on the left

            if(is.null(info_axis)){
                axis(1, at = at_info[x_nb == 0, mid_point] - strwidth("< "), line = -0.85, labels = otherTextLeft, lwd = 0)
            } else if(labels.tilted){
                xaxis_biased(at = at_info[x_nb == 0, mid_point], labels = otherTextLeft, angle = info_axis$angle, cex = info_axis$cex, trunc = trunc, trunc.method = trunc.method)
            } else {
                my_line = min(info_axis$line[info_axis$line != info_axis$line[1]])
                axis(1, at = at_info[x_nb == 0, mid_point] - strwidth("< ", cex = info_axis$cex), line = my_line, labels = otherTextLeft, lwd = 0, cex.axis = info_axis$cex)
            }
        }

    } else {

        if(ADD_OTHER){
            nbins = nbins + 1
            at_info$x[nbins] = otherText
        }

        x_unik = at_info[x_nb %in% 1:nbins, x]
        myLabels = x_unik
        myAt = at_info[x_nb %in% 1:nbins, mid_point]

        if(checkForTilting){
            # If normal axis does not fit => tilt
            axis_info = xaxis_labels(at = myAt, labels = myLabels, trunc = trunc, trunc.method = trunc.method, only.params = TRUE)

            if(axis_info$failed){
                labels.tilted = TRUE
            } else {
                labels.tilted = FALSE
                # # we add the label only if the user didn't provide it before
                # if(noXlab) title(xlab = x_name)
            }
        }

        # We also add ticks every 5/10 bins to help counting
        if(missnull(at_5)){
            at_5 = ifelse(max(at_info$x_nb) > 10, TRUE, FALSE)
            if(at_5) {
                at_5 = ifelse(labels.tilted, "line", "roman")
            }
        } else {
            at_5 = at_5[1]
        }

        if(at_5 == "roman"){
            roman_dict = toupper(c("v", "x", "xv", "xx", "xxv", "xxx", "xxxv", "xL", "xLv", "L", "Lv", "Lx", "lxv", "lxx", "lxxv", "lxxx", "lxxxv", "xc", "xcv", "c"))
            names(roman_dict) = (1:20) * 5
        }

        if(at_5 != "FALSE"){
            qui = which(at_info$x_nb %% 5 == 0)
            # qui = qui[qui != nrow(at_info)]

            if(length(at_info)){
                # quoi data_freq[qui]
                for(i in qui){
                    if(at_5 == "roman"){
                        axis(1, at = at_info[i, mid_point], lwd = 0, labels = roman_dict[as.character(i)], line = -1.3, cex.axis = 0.6)
                    } else {
                        axis(1, at = data_freq[i, list(xleft, xright)], lwd = 3, lwd.ticks = 0, labels = NA)
                    }
                }


            }
        }

        if(labels.tilted){
            info_axis = xaxis_biased(at = myAt, labels = myLabels, angle=labels.angle, cex = cex.axis, trunc = trunc, trunc.method = trunc.method, line.max = line.max, line.min = 0.45 * (at_5 == "roman"))
        } else {
            info_axis = xaxis_labels(at = myAt, labels = myLabels, trunc = trunc, trunc.method = trunc.method, line.min = 0.25 * (at_5 == "roman"))
        }

    }

    #
    # yaxis and topping ####
    #

    if(xlab != ""){
        title(xlab = xlab, line = mbot$xlab.line)
    }

    if(sub != ""){
        title(sub = sub, line = mbot$sub.line)
    }


    if(yaxis.show){
        # we get the "nice" points display
        y_points = axis(2, lwd=0, col.axis = 0)

        # We don't go above 100%
        y_points = y_points[y_points <= 100]

        if(yaxis.num){
            # x1 = data_freq[x_nb == 1][1, ]
            x1 = data_freq[ytop > 0][1, ]
            y_labels = formatAxisValue(y_points / x1$ytop * x1$value)
        } else {
            y_labels = paste0(y_points, "%")
        }

        # we find the proper width
        if("ylab" %in% names(dots)){
            ylab = dots$ylab
        }

        if(nchar(ylab) == 0){
            myWidth = strwidth("7775%")
        } else {
            myWidth = strwidth("75%")
        }

        # We find the right cex
        minCex = 0.7
        myCex = 1
        y_labels_max = y_labels[which.max(strwidth(y_labels))]
        while(strwidth(y_labels_max, cex = myCex) > myWidth && myCex >= minCex){
            myCex = 0.95 * myCex
        }

        axis(2, at=y_points, labels = y_labels, las = 2, cex.axis = myCex)
    }

    # Legend if needed
    if(isLegend){
        #
        # finding the cex of the legend => depends on the longuest character
        #

        # legendFit("top", moderator_unik, bg="white", bty="o", box.col="white", fill=col[1:moderator_cases])

        legendFit("top", moderator_unik, fill=col[1:moderator_cases], title = mod.title, title_out = TRUE, trunc = info_legend$trunc)
    }

    # On top of the bars:

    if(top != "none"){

        qui = strwidth(top.value2display, units = "in", cex = top.cex) <= unitary_width_top*1.5 | nchar(top.value2display) <= 3

        # we don't display the 0s only
        qui = qui & info_top$value > 0

        # if(top.cex < 0.95 && top == "frac"){
        #     # we don't display the small numbers
        #     max_height = max(info_top$ytop)
        #     qui = qui & (info_top$ytop > max_height*0.05 | info_top$share_top >= 1)
        # }

        if(any(qui)){
            # text(data_freq[, (xleft+xright)/2][qui], data_freq$ytop[qui], label = top.value2display[qui], pos = 3, cex = top.cex, offset = 0.5 * top.cex * 0.9)
            text(info_top$x_mid[qui], info_top$ytop[qui], label = top.value2display[qui], pos = 3, cex = top.cex, offset = 0.5 * top.cex * 0.9)
        }

        if(DO_STACK && nrow(info_top_other) > 0){
            text(info_top_other$x_mid[qui], info_top_other$ytop[qui], label = top.value2display_other[qui], pos = 3, cex = top.cex_other, offset = 0.5 * top.cex_other * 0.9)
        }

    }

    return(invisible(data_freq))
}





#' Display means conditionnally on some other values
#'
#' The typical use of this function is to represents trends of average along some categorical variable.
#'
#' @inheritParams plot_distr
#'
#' @param fml A formula of the type \code{variable ~ time | moderator}. Note that the moderator is optional. Can also be a vector representing the elements of the variable If a formula is provided, then you must add the argument \sQuote{data}. You can use multiple variables. If so, you cannot use a moderator at the same time.
#' @param data Data frame containing the variables of the formula. Used only if the argument \sQuote{fml} is a formula.
#' @param time Only if argument \sQuote{fml} is a vector. It should be the vector of \sQuote{time} identifiers to average over.
#' @param moderator Only if argument \sQuote{fml} is a vector. It should be a vector of conditional values to average over. This is an optional parameter.
#' @param fun Function to apply when aggregating the values on the time variable. Default is \code{mean}.
#' @param mod.select Which moderators to select. By default the top 5 moderators in terms of frequency (or in terms of the value of fun in case of identical frequencies) are displayed. If provided, it must be a vector of moderator values whose length cannot be greater than 10. Alternatively, you can put an integer between 1 and 10.
#' @param smoothing_window Default is 0. The number of time periods to average over. Note that if it is provided the new value for each period is the average of the current period and the \code{smoothing_window} time periods before and after.
#' @param col The colors. Either a vector or a keyword (\dQuote{Set1} or \dQuote{paired}). By default those are the \dQuote{Set1} colors colorBrewer. This argument is used only if there is a moderator.
#' @param lty The line types, in the case there are more than one moderator. By default it is equal to 1 (ie no difference between moderators).
#' @param pch The form types of the points, in the case there are more than one moderator. By default it is equal to \8code{c(19, 17, 15, 8, 5, 4, 3, 1)}.
#' @param pt.cex Default to 2. The \code{cex} of the points.
#' @param lwd Default to 2. The width of the lines.
#' @param legend_options A list containing additional parameters for the function \code{\link[graphics]{legend}} -- only concerns the moderator. Note that you can set the additionnal arguments \code{trunc} and \code{trunc.method} which relates to the number of characters to show and the truncation method. By default the algorithm truncates automatically when needed.
#' @param ... Other arguments to be passed to the function \code{plot}.
#'
#' @author Laurent Berge
#'
#'
#' @examples
#'
#'
#' data(airquality)
#'
#' plot_lines(Ozone ~ Day, airquality)
#'
#' plot_lines(Ozone ~ Day | Month, airquality)
#'
#' plot_lines(Ozone ~ Month | cut(Day, 8), airquality)
#'
#'
plot_lines = function(fml, data, time, moderator, mod.select, mod.NA = TRUE, smoothing_window = 0, fun, col = "set1", lty = 1, pch = c(19, 17, 15, 8, 5, 4, 3, 1),  legend_options = list(), pt.cex=2, lwd=2, dict = NULL, mod.title = TRUE, ...){
    # This functions plots the means of x wrt the id
    # we can also add a moderator

    # DT VARS
    moderator_cases = xleft = xright = ybottom = ytop = value = NULL

    check_arg(fml, "formula right(,2) | vector mbt")

    # old params
    style = "line"
    outCol = "black"
    # style = match.arg(style)

    mc = match.call()

    fml_in = fml

    check_arg(smoothing_window, "integer scalar GE{0}")

    check_arg(mod.NA, "logical scalar")

    check_arg(dict, "null named character vector | logical scalar")

    # The color
    if(length(col) == 1){
        check_value_plus(col, "match(set1, paired) | scalar")
        if(col == "set1"){
            col = c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999")
        } else if(col == "paired"){
            col = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00")
        }
    }

    #
    # Extracting the variables ####
    #

    moderator_name = ""
    if("formula" %in% class(fml_in)){
        # Control of the formula

        check_arg(data, "data.frame mbt", .message = "If you provide a formula, a data.frame must be given in the argument 'data'.")

        # if(missing(data) || !is.data.frame(data)){
        #     postfix = ifelse(!is.data.frame(data), paste0(" Currently it is of class ", enumerate_items(class(data))), "")
        #
        #     stop("If you provide a formula, a data.frame must be given in the argument 'data'.", postfix)
        # }

        vars = all.vars(fml_in)
        if(any(!vars %in% names(data))){
            stop("The variable", enumerate_items(setdiff(vars, names(data)), "s.is")," not in the data set (", deparse(mc$data), ").")
        }

        # Creation of x and the condition
        if(!length(fml_in) == 3){
            stop("The formula must be of the type 'var ~ time' or 'var ~ time | moderator'.")
        }

        info = extract_pipe(fml_in)

        lhs_fml = info$lhs_fml
        fml = info$fml
        pipe = info$pipe

        x = extract_df(lhs_fml, data)
        time = eval(fml[[3]], data)
        moderator = eval(pipe, data)

        # other info
        MULTI_VAR = FALSE
        if(length(x) > 1){
            # multiple variables to plot
            MULTI_VAR = TRUE

            if(length(moderator) > 1){
                stop("You cannot use a moderator and multiple variables at the same time. If you intended to use one variable only, wrap it into I(): I(", deparse(lhs_fml[[2]]), ").")
            }

            # check
            for(i in 1:length(x)){
                if(is.logical(x[[i]])){
                    x[[i]] = x[[i]] * 1
                } else if(!is.numeric(x[[i]])){
                    stop("All variables must be numeric, but variable '", names(x)[i], "' isn't.")
                }
            }

            if("Frequency" %in% names(x) && missing(fun)){
                fun = sum
                fun_name = "sum"
            } else if(missing(fun)){
                fun_name = "mean"
            } else {
                fun_name = deparse(substitute(fun))
            }

            # We need to stack the data
            n = length(x[[1]])
            all_x_names = dict_apply(names(x), dict)

            # x_name = paste0(fun_name, "(Variables)")
            x_name = substitute(paste(f, phantom(1), group("(", italic(Variables), ")")), list(f = fun_name))

            moderator = factor(rep(all_x_names, each = n), levels = all_x_names)
            time = rep(time, length(x))
            x = unlist(x)

            if(missing(mod.title)){
                mod.title = "Variables"
            }

            moderator_name = ""
        } else {
            x = unlist(x)
            if(is.logical(x)) x = x * 1
            x_name = deparse(fml[[2]])
            if(!is.numeric(x)) stop("The variable to plot must be numeric, currently '", x_name, "' isn't.")
            if(x_name == "1"){
                x_name = "Frequency"
                fun = sum
            }
        }
        # if(length(x) == 1){
        #     # this is a shortcut to say that we want to display the frequency
        #     fun = sum
        #     x_name = "Frequency"
        #     x = rep(1, length(time))
        # } else {
        #     x_name = deparse(fml[[2]])
        # }

        if(is.null(moderator)){
            moderator = rep(1, length(x))
        } else if(MULTI_VAR == FALSE){
            moderator_name = gsub("^.*\\| *", "", deparse(fml_in[[3]]))
        }

        time_name = deparse(fml[[3]])

    } else {

        # Wow, the next two lines replace 10 lines of code, and error-handling is much better done
        check_arg(time, "vector len(data) mbt", .data = fml)
        check_arg(moderator, "safe NULL vector len(data)", .data = fml)

        x = fml_in

        if(!missnull(moderator)){
            moderator_name = deparse(substitute(moderator))
        } else {
            moderator = rep(1, length(x))
        }

        x_name = deparse(substitute(fml_in))
        time_name = deparse(substitute(time))
        # 14 lignes

        # if(missing(time)){
        #     stop("You must provide the argument 'time'.")
        # } else {
        #     x = fml_in
        #
        #     if(length(x) != length(time)){
        #         stop("The arguments 'x' and 'time' must be of the same length.")
        #     }
        #
        #     if(!missnull(moderator)){
        #         if(length(x) != length(moderator)){
        #             stop("If provided, the argument 'moderator' must be of the same length of 'x'.")
        #         }
        #         moderator_name = deparse(substitute(moderator))
        #     } else {
        #         moderator = rep(1, length(x))
        #     }
        #
        #     # other info
        #     x_name = deparse(substitute(fml_in))
        #     time_name = deparse(substitute(time))
        # }
        # 22 lignes
    }

    # Naming
    if(!is.call(x_name)) x_name = dict_apply(x_name, dict)

    time_name = dict_apply(time_name, dict)
    moderator_name = dict_apply(moderator_name, dict)

    # Dropping NAs
    quiNA_x = is.na(x)
    quiNA_time = is.na(time)
    quiNA_mod = is.na(moderator)

    if(mod.NA){
        quiNA = quiNA_x | quiNA_time
        total_NA = sum(quiNA | quiNA_mod)
        if(is.factor(moderator)){
            moderator = addNA(moderator)
        } else {
            # The following will coerce moderator into character
            moderator[is.na(moderator)] = "NA"
        }
    } else {
        quiNA = quiNA_x | quiNA_time | quiNA_mod
        total_NA = sum(quiNA)
    }

    if(total_NA > 0){
        nb_na = c(sum(quiNA_x), sum(quiNA_mod), sum(quiNA_time))
        msg_na = paste0(c("x: ", "moderator: ", "time: "), nb_na)
        message("NOTE: ", total_NA, " observations with NAs (", enumerate_items(msg_na[nb_na>0]), ")")
        x = x[!quiNA]
        moderator = moderator[!quiNA]
        time = time[!quiNA]
    }

    # We first check the function is valid
    if(missing(fun)){
        fun = mean
    } else {
        if(!is.function(fun)){
            stop("Argument 'fun' must be a function. A the moment its class is ", class(fun)[1], ".")
        }

        test = try(fun(head(x, 5)), silent = TRUE)
        if("try-error" %in% class(test)){
            stop("Evaluation of the function '", deparse_long(mc$fun), "' leads to an error\n", test)
        } else if(length(test) > 1){
            stop("The function in argument 'fun' MUST be an aggregating function: that is returning something of length 1. This is not the case currently.")
        }

    }

    n_moderator = length(unique(moderator))
    if(is.factor(moderator)){
        moderator_unik = levels(moderator[drop = TRUE])
        if(anyNA(moderator_unik)){
            # This is such a pain in the neck...
            moderator_unik[is.na(moderator_unik)] = "NA"
            moderator = factor(unclass(moderator), labels = moderator_unik)
        }
    } else {
        moderator_unik = sunique(as.character(moderator))
    }

    if(n_moderator == 1){
        isLegend = FALSE
    } else {
        isLegend = TRUE
    }

    # mod.select
    if(n_moderator > 1){

        n_select = NA
        do_recreate = FALSE
        if(missing(mod.select)){
            if(n_moderator <= 8){
                # FINE!
            } else {
                # We select the top 5
                n_select = 5
            }
        } else if(length(mod.select) > 10){
            stop("The argument 'mod.select' (currently of length ", length(mod.select), ") cannot be of a length greater than 10.")
        } else if(length(mod.select) == 1 && is.numeric(mod.select) && mod.select %% 1 == 0){
            # Possibly a number
            if(mod.select %in% 1:10){
                n_select = mod.select
            } else {
                stop("Argument 'mod.select' does not have a valid value: if it is an integer, its value must lie between 1 and 10.")
            }
        } else {
            # mod.select must contain moderator values

            # 1) checking the problems
            mod_pblm = setdiff(mod.select, moderator_unik)
            if(length(mod_pblm) > 0){
                # We recreate mod.select

                if(length(mod_pblm) != length(mod.select) && any(quiNA)){
                    # means some are OK
                    suggestion = " Maybe because it has only NA values?"
                } else {
                    suggestion = " It must be either a moderator value, or a regular expression to select moderator values."
                }

                # Maybe they're regular expressions => we keep the order desired by the user
                mod.ok = c()
                for(r in mod.select){
                    if(r %in% moderator_unik){
                        mod.ok = c(mod.ok, r)
                    } else {
                        qui = grepl(r, moderator_unik)
                        n_qui = sum(qui)
                        if(n_qui == 0){
                            stop("In the argument 'mod.select', the value ", r, " could not be found in the moderator variable.", suggestion)
                        } else if(n_qui > 10){
                            stop("In the argument 'mod.select', the value ", r, " leads to more than 10 moderator values to be selected (e.g. ", enumerate_items(moderator_unik[qui], nmax = 15), ".")
                        }

                        mod.ok = c(mod.ok, moderator_unik[qui])
                    }
                }

                mod.ok = unique(mod.ok)
                if(length(mod.ok) > 10){
                    stop("Argument mod.select leads to ", length(mod.ok), " moderator values to be selected. But the maximum is 10.")
                }

                mod.select = mod.ok

            }

            # 2) selection
            do_recreate = TRUE

        }

        # AUTOMATIC selection
        if(!is.na(n_select) && n_moderator > n_select){

            # We proceed with the "automatic" selection

            info_mod = data.table(moderator = moderator)
            info_mod = info_mod[, list(value = .N), by = moderator]
            if(var(info_mod$value) == 0){
                # same freq for all
                info_mod = data.table(x = x, moderator = moderator)
                info_mod = info_mod[, list(value = fun(x)), by = moderator]
                info_method = paste0("the max of ", deparse(mc$fun)[1], ".")
            } else {
                info_method = "frequency."
            }

            # We inform on the method for the choice
            message(ifelse(n_select == 1, "The moderator was", paste0("The ", n_select, " moderators were")), " chosen based on ", info_method)

            info_mod = info_mod[order(-value)]
            mod.select = info_mod[1:n_select, moderator]

            do_recreate = TRUE

            if(n_select == 1){
                isLegend = FALSE
            }

        }

        if(mod.NA){
            if(missing(mod.select)){
                do_recreate = FALSE
            } else {
                mod.select = unique(c(mod.select, "NA"))
                do_recreate = TRUE
            }
        }

        if(do_recreate){
            # We recreate the variables

            qui_select = which(moderator %in% mod.select)
            # we recreate all the values
            x = x[qui_select]
            moderator = moderator[qui_select]
            time = time[qui_select]

            # Re-Dealing with the moderator
            if(is.factor(moderator)){
                moderator_unik = levels(moderator[drop = TRUE])
            } else {
                moderator_unik = sunique(moderator)
            }

            n_moderator = length(moderator_unik)
        }

    }

    #
    # Aggregation
    #

    quoi = data.table(x=x, time=time, moderator=moderator)
    base_agg = quoi[!is.na(x), list(x = fun(x)), by = list(time, moderator)]

    #
    # Preparation
    #

    dots = list(...)

    # ylim
    if(!is.null(dots$ylim)){
        ylim = dots$ylim
    } else if(style == "line"){
        ylim = range(base_agg[!is.na(moderator), x])
    } else if(style == "bar"){
        rx = range(base_agg[!is.na(moderator), x])
        ylim = c(min(0, rx[1]), max(0, rx[2]))
    }

    ymax = Inf
    if(isLegend){
        ymax = ylim[2]

        if(isTRUE(mod.title)){
            mod.title = moderator_name
        } else if(isFALSE(mod.title)){
            mod.title = NULL
        }

        info_legend = legendFit(legend = moderator_unik, title = mod.title, plot = FALSE)
        legend_height = info_legend$total_height
        ylim[2] = ylim[2] + (legend_height + 0.25) / par("pin")[2] * diff(ylim)
    }

    listDefault(dots, "ylim", ylim)
    is_ylab = "ylab" %in% names(dots)
    my_ylab = ifelse(is_ylab, dots$ylab, "")
    dots$ylab = ""
    # listDefault(dots, "ylab", x_name)
    listDefault(dots, "xlab", time_name)
    dots$type = "n"

    dots$y = rep(1, length(unique(base_agg$time)))
    makeAxes = FALSE
    if(is.numeric(base_agg$time)){
        dots$x = sunique(base_agg$time)
    } else {
        # for character vectors, we need to recreate the xaxis
        if(is.null(dots$axes) || dots$axes){
            makeAxes = TRUE
        }
        dots$axes = FALSE
        if(is.factor(base_agg$time)){
            time_unik = levels(unique(base_agg$time)[, drop=TRUE])
            dict_time = 1:length(time_unik)
            names(dict_time) = time_unik
            dots$x = 1:length(time_unik)
            base_agg[, time := dict_time[as.character(time)]]
        } else {
            time_unik = sunique(base_agg$time)
            dots$x = quickUnclassFactor(time_unik)
            base_agg[, time := quickUnclassFactor(time)]
        }
    }



    if(style == "bar"){
        total_width = min(diff(sort(unique(base_agg$time))))
        bar_width = total_width / (n_moderator + 1)

        if(is.null(dots$xlim)){
            total_width = min(diff(sort(unique(base_agg$time))))
            dots$xlim = c(min(base_agg$time) - total_width * 0.5 + bar_width/2, max(base_agg$time) + total_width * 0.5 - bar_width/2)
        }
    } else {
        if(smoothing_window > 0){
            quoi = sort(dots$x)

            if(length(quoi) < 2*smoothing_window + 2){
                stop("To smooth with smoothing_window=", smoothing_window, " you need at least ", 2*smoothing_window + 2, " periods. Yet at the moment there is only ", length(quoi), " periods.")
            }

            dots$xlim = quoi[c(smoothing_window + 1, length(quoi) - smoothing_window)]
        }
    }

    do.call("plot", dots)
    dots$axes = NULL # not a graphical parameter afterwards

    # ylab: because of problems with calls
    if(is_ylab){
        title(ylab = my_ylab)
    } else {
        title(ylab = x_name)
    }


    if(style == "bar"){
        hgrid(ymax = ymax)
    } else {
        hgrid(ymax = ymax)
        vgrid(ymax = ymax)
        # grid(col = "darkgray")
    }

    if(makeAxes){
        box()
        axis(2)
        axis(1, dots$x, time_unik)
    }

    #
    # The plot_line
    #

    all_col = col[1 + (0:(n_moderator-1))%%length(col)]
    all_lty = lty[1 + (0:(n_moderator-1))%%length(lty)]
    all_pch = pch[1 + (0:(n_moderator-1))%%length(pch)]

    if(style == "line"){
        for(i in 1:n_moderator){
            dots$col = all_col[i]
            dots$lty = all_lty[i]
            dots$pch = all_pch[i]
            dots$cex = pt.cex
            dots$add = TRUE
            dots$smoothing_window = smoothing_window
            dots$x = base_agg[moderator == moderator_unik[i], time]
            dots$y = base_agg[moderator == moderator_unik[i], x]

            do.call("plot_line", dots)
        }
    } else if(style == "bar"){
        # Style is barplot
        base_agg[, moderator_cases := quickUnclassFactor(moderator)]

        base_agg[, xleft := time - total_width/2 + bar_width/2 + bar_width*(moderator_cases - 1)]
        base_agg[, xright := xleft + bar_width]
        base_agg[, ybottom := pmin(0, x)]
        base_agg[, ytop := pmax(0, x)]

        # the call to rect
        all_outcol = outCol[1 + (0:(n_moderator-1))%%length(outCol)]

        dots$xleft = base_agg$xleft
        dots$ybottom = base_agg$ybottom
        dots$xright = base_agg$xright
        dots$ytop = base_agg$ytop

        if(n_moderator > 1){
            dots$col = all_col[base_agg$moderator_cases]
            dots$border = all_outcol[base_agg$moderator_cases]
        } else {
            dots$col = col
            dots$border = outCol
        }


        dots = dots[names(dots) %in% names(formals(rect))]
        do.call("rect", dots)
    }


    #
    # Legend
    #

    if(n_moderator > 1 && isLegend){
        if(!is.list(legend_options)){
            stop("Argument'legend_options' must be a list.")
        }

        # mandatory
        legend_options$legend = moderator_unik
        if(style == "line"){
            legend_options$col = all_col
            legend_options$lty = all_lty
            legend_options$pch = all_pch
            legend_options$pt.cex = pmin(pt.cex, 1.5)
            legend_options$lwd = lwd
        } else if(style == "bar"){
            legend_options$fill = all_col
        }

        legend_options$title = mod.title
        legend_options$trunc = info_legend$trunc


        # options
        listDefault(legend_options, "x", "top")

        # the call
        do.call("legendFit", legend_options)
    }

    invisible(base_agg)
}


#' Boxplots with possibly moderators
#'
#' This function allows to draw a boxplot, with possibly separating different moderators.
#'
#' @inheritParams plot_distr
#'
#' @param fml A numeric vector or a formula of the type: \code{vars ~ moderator_1 | moderator_2}. Note that if a formula is provided then the argument \sQuote{data} must be provided. You can plot several variables, if you don't want a moderator, use 1 instead: e.g. \code{plot_box(Petal.Width +Petal.Length ~ 1, iris)}. You can plot all numeric variables from a data set using \code{"."}: \code{plot_box(. ~ 1, iris)}.
#' @param data A data.frame/data.table containing the relevant information.
#' @param case When argument fml is a vector, this argument can receive a vector of cases.
#' @param moderator When argument fml is a vector, this argument can receive a vector of moderators.
#' @param inCol A vector of colors that will be used for within the boxes.
#' @param outCol The color of the outer box. Default is black.
#' @param pch The patch of the outliers. Default is 18.
#' @param addLegend Default is \code{TRUE}. Should a legend be added at the top of the graph is there is more than one moderator?
#' @param lwd The width of the lines making the boxes. Default is 2.
#' @param outlier Default is \code{TRUE}. Should the outliers be displayed?
#' @param dict_case A named character vector. If provided, it changes the values of the variable \sQuote{case} to the ones contained in the vector \code{dict_case}. Example: I want to change my variable named "a" to "Australia" and "b" to "Brazil", then I used \code{dict=c(a="Australia",b="Brazil")}.
#' @param dict_moderator A named character vector. If provided, it changes the values of the variable \sQuote{moderator} to the ones contained in the vector \code{dict_moderator}. Example: I want to change my variable named "a" to "Australia" and "b" to "Brazil", then I used \code{dict=c(a="Australia",b="Brazil")}.
#' @param order_case Character vector. This element is used if the user wants the \sQuote{case} values to be ordered in a certain way. This should be a regular expression (see \code{\link[base]{regex}} help for more info). There can be more than one regular expression. The variables satisfying the first regular expression will be placed first, then the order follows the sequence of regular expressions.
#' @param order_moderator Character vector. This element is used if the user wants the \sQuote{moderator} values to be ordered in a certain way. This should be a regular expression (see \code{\link[base]{regex}} help for more info). There can be more than one regular expression. The variables satisfying the first regular expression will be placed first, then the order follows the sequence of regular expressions.
#' @param addMean Whether to add the average for each boxplot. Default is true.
#' @param mean.col The color of the mean. Default is darkred.
#' @param mean.cex The cex of the mean, default is 2.
#' @param mean.pch The patch of the mean, default is 18.
#' @param line.max Option for the x-axis, how far should the labels go. Default is 1 for normal labels, 2 for tilted labels.
#' @param density The density of lines within the boxes. By default it is equal to -1, which means the boxes are filled with color.
#' @param lty The type of lines for the border of the boxes. Default is 1 (solid line).
#' @param ... Other parameters to be passed to \code{plot}.
#'
#' @author Laurent Berge
#'
#' @return
#' Invisibly returns the coordinates of the x-axis.
#'
#' @examples
#'
#' # Simple iris boxplot
#' plot(1:10)
#'
#' # All numeric variables
#' plot_box(. ~ 1, iris)
#'
#' # All numeric variable / splitting by species
#' plot_box(. ~ Species, iris)
#'
#' # idem but with renaming
#' plot_box(. ~ Species, iris, dict = c(Species="Iris species",
#'          setosa="SETOSA", Petal.Width="Width (Petal)"))
#'
#' # Now using two moderators
#' base = iris
#' base$period = sample(1:4, 150, TRUE)
#'
#' plot_box(Petal.Length ~ period | Species, base)
#'
#'
#'
#'
#'
plot_box = function(fml, data, case, moderator, inCol, outCol = "black", density = -1, lty = 1, pch = 18, addLegend = TRUE,  legend_options = list(), lwd=2, outlier, dict = NULL, dict_case, dict_moderator, order_case, order_moderator, addMean, mean.col = "darkred", mean.pch = 18, mean.cex = 2, mod.title = TRUE, labels.tilted, trunc = 20, trunc.method = "auto", line.max, ...){

    # DT VARS USED
    case_nb = moderator_nb = span = q3 = q1 = min_whisker = y_min = max_whisker = y_max = NULL

    fml_in = fml

    mc = match.call()

    # Controls

    check_arg("logical scalar", addLegend, outlier, addMean, labels.tilted)
    check_arg(order_moderator, order_case, "character vector", .message = "Argument '__ARG__' must be a vector of regular expressions.")

    check_arg(mod.title, "scalar(character, logical)")
    check_arg(trunc, "integer scalar GE{5}")

    check_arg(dict, "null named character vector | logical scalar")

    #
    # Extracting the information
    #

    moderator_name = ""
    if("formula" %in% class(fml_in)){

        # if(missing(data) || !is.data.frame(data)){
        #     postfix = ifelse(!is.data.frame(data), paste0(" Currently it is of class ", enumerate_items(class(data)), "."), "")
        #
        #     stop("If you provide a formula, a data.frame must be given in the argument 'data'.", postfix)
        # }
        #
        # vars = all.vars(fml_in)
        # if(any(!vars %in% names(data))){
        #     stop("The variable", enumerate_items(setdiff(vars, names(data)), "s.is")," not in the data set (", deparse_long(mc$data), ").")
        # }
        #
        # # Creation of x and the condition
        # if(!length(fml_in) == 3){
        #     stop("The formula must be of the type 'var ~ case' or 'var ~ case | moderator'.")
        # }

        check_arg(data, "data.frame mbt", .message = "If you provide a formula, a data.frame must be given in the argument 'data'.")

        # we check the variables data are there
        check_arg(fml, "ts formula left(1) right(,2)", .message = "Argument 'fml' must be a formula of the type variables ~ moderator_1 | moderator_2 ('moderator_1' [you can replace it by 1] and 'moderator_2' are optional).")

        # "." is a special variable
        vars = setdiff(all.vars(fml_in), ".")
        if(any(!vars %in% names(data))){
            stop("The variable", enumerate_items(setdiff(vars, names(data)), "s.is")," not in the data set (", deparse_long(mc$data), ").")
        }

        info = extract_pipe(fml_in)
        fml = extract_pipe(fml_in)$fml
        pipe = extract_pipe(fml_in)$pipe

        if(deparse(fml[[2]]) == "."){
            x_all = data
            qui_num = sapply(x_all, function(z) is.numeric(z) || is.logical(z))
            if(!any(qui_num)){
                stop("Only numeric variables can be displayed, and none were found in the data set.")
            }
            x_all = x_all[qui_num]
        } else {
            # x = eval(fml[[2]], data)
            x_all = extract_df(info$lhs_fml, data)
            qui_num = sapply(x_all, function(z) is.numeric(z) || is.logical(z))
            if(any(!qui_num)){
                stop("Only numeric variable can be displayed, the variable", enumerate_items(names(x_all), "s.quote.is"), " not numeric.")
            }
        }

        case = eval(fml[[3]], data)
        moderator = eval(pipe, data)

        mod_is_set = FALSE
        if(length(x_all) == 1){
            x = x_all[[1]]
            x_name = names(x_all)

            if(length(case) > 1){
                case_name = deparse(fml[[3]])
            }

        } else {
            if(is.data.frame(x_all)){
                n = nrow(x_all)
                K = ncol(x_all)
            } else {
                n = length(x_all[[1]])
                K = length(x_all)
            }

            if(length(case) == 1){
                # No case => the variables become the case
                case = rep(names(x_all), each = n)
            } else if(length(moderator) > 1){
                stop("When plotting multiple variables, you cannot use a second moderator. This is possible only when plotting kust one variable.")
            } else {
                # case: the variables // moderator: the cases
                mod_is_set = TRUE
                moderator_name = deparse(fml[[3]])
                moderator = rep(case, K)
                case = rep(names(x_all), each = n)
            }

            x = unlist(x_all)
            x_name = ""
            case_name = ""
        }

        if(is.null(moderator)){
            moderator = rep(1, length(x))
        } else if(mod_is_set == FALSE){
            moderator_name = gsub("^.*\\| *", "", deparse(fml_in[[3]]))
        }

        # other info
        # x_name = deparse(fml[[2]])
        # case_name = deparse(fml[[3]])

    } else {
        if(missing(case)){
            stop("You must provide the argument 'case'.")
        } else {
            x = fml_in

            if(length(x) != length(case)){
                stop("The arguments 'x' and 'case' must be of the same length.")
            }

            if(!missing(moderator) && !is.null(moderator)){
                if(length(x) != length(moderator)){
                    stop("If provided, the argument 'moderator' must be of the same length of 'x'.")
                }
                moderator_name = clean_name(deparse(substitute(moderator)))
            } else {
                moderator = rep(1, length(x))
            }

            # other info
            x_name = clean_name(deparse(substitute(fml)))
            case_name = clean_name(deparse(substitute(case)))
        }
    }

    # Dropping NAs
    quiNA_x = is.na(x)
    quiNA_mod = is.na(moderator)
    quiNA_case = is.na(case)
    quiNA = quiNA_x | quiNA_mod | quiNA_case
    if(any(quiNA)){
        nb_na = c(sum(quiNA_x), sum(quiNA_mod), sum(quiNA_case))
        msg_na = paste0(c("x: ", "moderator: ", "case: "), nb_na)
        message("NOTE: ", sum(quiNA), " observations with NAs (", enumerate_items(msg_na[nb_na>0]), ").")
        x = x[!quiNA]
        moderator = moderator[!quiNA]
        case = case[!quiNA]
    }

    # Renaming: dict
    x_name = dict_apply(x_name, dict)
    case_name = dict_apply(case_name, dict)
    moderator_name = dict_apply(moderator_name, dict)


    #
    # Aggregation
    #

    mod_num_logical = is.logical(moderator) || is.numeric(moderator) # used later
    moderator = as.character(moderator)

    CASE_FACTOR = FALSE
    if(is.factor(case)){
        CASE_FACTOR = TRUE
        case_unik = levels(case[, drop = TRUE])
        case = unclass(case[, drop = TRUE])
    }


    quoi = data.table(x=as.numeric(x), case=case, moderator=moderator)

    delayAddMean = FALSE
    if(missnull(addMean)){
        addMean = TRUE
        delayAddMean = TRUE
    }

    if(addMean){
        base_agg = quoi[, list(y_min = min(x), q1 = quantile(x, 0.25), m = median(x), q3 = quantile(x, 0.75), y_max = max(x), avg = mean(x)), keyby = list(case, moderator)]
    } else {
        base_agg = quoi[, list(y_min = min(x), q1 = quantile(x, 0.25), m = median(x), q3 = quantile(x, 0.75), y_max = max(x)), keyby = list(case, moderator)]
    }


    #
    # the coordinates
    #

    # putting case to number and formatting it
    if(!missnull(order_case)){
        case_unik = sunique(case)
        for(var2order in rev(order_case)){
            who = grepl(var2order, case_unik)
            case_unik = c(case_unik[who], case_unik[!who])
        }
        base_agg[, case_nb := dict2number(case_unik, case)]
    } else {
        base_agg[, case_nb := quickUnclassFactor(case)]
        if(CASE_FACTOR == FALSE){
            what = unique(base_agg[, list(case, case_nb)])
            what = what[order(case_nb)]
            case_unik = what$case
        }
    }

    aliasCase = dict_apply(case_unik, dict)
    if(!missing(dict_case) && !is.null(dict_case)){
        if(!is.character(dict_case) || is.null(names(dict_case))) stop("the arg. 'dict_case' must be a named character vector.")

        qui = which(case_unik %in% names(dict_case))
        aliasCase[qui] = dict_case[case_unik[qui]]
    }

    # putting moderator to number and formatting it
    if(!missnull(order_moderator)){
        moderator_unik = sunique(moderator)
        for(var2order in rev(order_moderator)){
            who = grepl(var2order, moderator_unik)
            moderator_unik = c(moderator_unik[who], moderator_unik[!who])
        }
        base_agg[, moderator_nb := dict2number(moderator_unik, moderator)]
    } else {
        base_agg[, moderator_nb := quickUnclassFactor(moderator)]
        moderator_unik = sunique(moderator)
    }

    aliasModerator = dict_apply(moderator_unik, dict)
    if(!missing(dict_moderator) && !is.null(dict_moderator)){
        if(!is.character(dict_moderator)|| is.null(names(dict_moderator))) stop("The argument 'dict_moderator' must be a named character vector.")

        qui = which(moderator_unik %in% names(dict_moderator))
        aliasModerator[qui] = dict_moderator[moderator_unik[qui]]
    }

    n_case = length(case_unik)
    n_moderator = length(moderator_unik)

    # The colors
    if(missnull(inCol)){
        if(n_moderator == 1){
            inCol = "#1F78B4"
        } else {
            inCol = c("#1F78B4", "#33A02C", "#E31A1C", "#FF7F00")
        }
    }

    # separation between the different cases:
    sep = 0.30 + 0.15 * (n_moderator-1)

    base_agg[, x := (case_nb-1)*(n_moderator+sep) + moderator_nb]

    # Information on the length of the whiskers
    base_agg[, span := 1.5 * (q3 - q1)]
    base_agg[, min_whisker := pmax(y_min, q1 - span)]
    base_agg[, max_whisker := pmin(y_max, q3 + span)]

    # the legend
    isLegend = FALSE
    if(addLegend && n_moderator > 1){
        isLegend = TRUE
    }

    # mod.title
    # if(isLegend){
    #     if(missing(mod.title)){
    #         if(mod_num_logical){
    #             mod.title = moderator_name
    #         } else {
    #             mod.title = NULL
    #         }
    #     } else if(isTRUE(mod.title)){
    #         mod.title = moderator_name
    #     } else if(isFALSE(mod.title)){
    #         mod.title = NULL
    #     }
    # }
    if(isTRUE(mod.title)){
        mod.title = moderator_name
    } else if(isFALSE(mod.title)){
        mod.title = NULL
    }


    #
    # Preparation
    #

    dots = list(...)

    # default for outlier
    if(missnull(outlier)){
        outlier = FALSE
        if(diff(range(x)) < 2 * diff(range(c(base_agg$min_whisker, base_agg$max_whisker)))){
            outlier = TRUE
        }
    }

    # default for addMean
    if(delayAddMean){
        addMean = !outlier
    }

    # ylim

    if(outlier){
        ylim = range(quoi$x)
    } else {
        ylim = c(min(base_agg$min_whisker), max(base_agg$max_whisker))
    }

    ymax = Inf
    if(isLegend){
        ymax = ylim[2]
        info_legend = legendFit(legend = aliasModerator, plot = FALSE, title = mod.title)
        # hauteur_caractere = strheight("W", "in", cex = info_legend$cex)
        # ylim[2] = ylim[2] + 2.5*hauteur_caractere / par("pin")[2] * diff(ylim)
        total_height = info_legend$total_height
        ylim[2] = ylim[2] + total_height / par("pin")[2] * diff(ylim)
    }

    if(missnull(labels.tilted)){
        if(sum(nchar(aliasCase)) * strwidth("w", "in") > 2 * par("pin")[1]){
            labels.tilted = TRUE
            case_name = ""
        } else {
            labels.tilted = FALSE
        }
    } else if(labels.tilted){
        case_name = ""
    }


    listDefault(dots, "ylim", ylim)
    listDefault(dots, "ylab", x_name)
    listDefault(dots, "xlab", case_name)
    dots$type = "n"

    isAxes = TRUE
    if(!is.null(dots$axes)){
        isAxes = dots$axes
    }
    dots$axes = FALSE

    dots$x = dots$y = 1

    # xlim
    dots$xlim = range(base_agg$x) + c(-0.5, 0.5)

    do.call("plot", dots)

    if(isAxes){
        box()

        las = dots$las
        axis(2, las = las)

        at_labels = 1 + (n_moderator-1)/2 + (0:(n_case-1))*(n_moderator+sep)
        # axis(1, at = at_labels, labels = aliasCase, lwd = 0)

        if(labels.tilted){
            if(missnull(line.max)) line.max = 2 + 1
            xaxis_biased(at = at_labels, labels = aliasCase, trunc = trunc, trunc.method = trunc.method, line.max=line.max)
        } else {
            if(missnull(line.max)) line.max = 1 + 1
            xaxis_labels(at = at_labels, labels = aliasCase, trunc = trunc, trunc.method = trunc.method, line.max=line.max)
        }

    }

    hgrid(ymax = ymax)

    #
    # The plot_line
    #

    if(n_moderator == 1){
        all_inCol = inCol[1 + (0:(n_case-1))%%length(inCol)]
        all_outCol = outCol[1 + (0:(n_case-1))%%length(outCol)]
        all_densities = density[1 + (0:(n_case-1))%%length(density)]
    } else {
        all_inCol = inCol[1 + (0:(n_moderator-1))%%length(inCol)]
        all_outCol = outCol[1 + (0:(n_moderator-1))%%length(outCol)]
        all_densities = density[1 + (0:(n_moderator-1))%%length(density)]
    }


    for(i in 1:n_moderator){
        quoi = base_agg[moderator == moderator_unik[i]]

        if(n_moderator == 1){
            box_single(x = quoi$x, quoi$y_min, quoi$q1, quoi$m, quoi$q3, quoi$y_max, width = 1, inCol = all_inCol, lwd = lwd, outCol = all_outCol, density = all_densities)
        } else {
            box_single(x = quoi$x, quoi$y_min, quoi$q1, quoi$m, quoi$q3, quoi$y_max, width = 1, inCol = all_inCol[i], lwd = lwd, outCol = all_outCol[i], density = all_densities[i])
        }


        # now the outliers
        if(outlier){
            for(j in 1:n_case){
                if(CASE_FACTOR){
                    # values have been unclassed
                    quoi_case = quoi[case == j]
                    y = x[moderator == moderator_unik[i] & case == j]
                } else {
                    quoi_case = quoi[case == case_unik[j]]
                    y = x[moderator == moderator_unik[i] & case == case_unik[j]]
                }

                qui_outlier = y > quoi_case$max_whisker | y <  quoi_case$min_whisker
                if(any(qui_outlier)){
                    points(rep(quoi_case$x, sum(qui_outlier)), y[qui_outlier], pch=pch)
                }
            }
        }

        if(addMean){
            quoi = base_agg[moderator == moderator_unik[i]]
            points(quoi$x, quoi$avg, col = mean.col, pch = mean.pch, cex = mean.cex)
        }
    }

    #
    # Legend
    #

    if(isLegend){
        if(!is.list(legend_options)){
            stop("Argument'legend_options' must be a list.")
        }

        # mandatory
        legend_options$legend = aliasModerator
        legend_options$fill = all_inCol
        legend_options$title = mod.title

        # options
        listDefault(legend_options, "x", "top")

        # the call
        do.call("legendFit", legend_options)
    }

    invisible(1 + (n_moderator-1)/2 + (0:(n_case-1))*(n_moderator+sep))
}

Try the fplot package in your browser

Any scripts or data that you put into this service are public.

fplot documentation built on July 1, 2020, 6:30 p.m.