R/superbPlot.R

Defines functions unfactor meanLocalCorrelation gA unitaryAlpha meanCorrelation mycor superbPlot

Documented in superbPlot

######################################################################################
#' @title summary plot of any statistics with adjusted error bars.
#'
#' @md
#'
#' @description The function ``suberbPlot()`` plots standard error or confidence interval for various  
#'      descriptive statistics under various designs, sampling schemes, population size and purposes,
#'      according to the ``suberb`` framework. See \insertCite{cgh21}{superb} for more.
#'
#' @param data Dataframe in wide format
#'
#' @param BSFactors The name of the columns containing the between-subject factor(s)
#' @param WSFactors The name of the within-subject factor(s)
#' @param WSDesign the within-subject design if not a full factorial design (default "fullfactorial")
#' @param variables The dependent variable(s) as strings
#'
#' @param statistic The summary statistic function to use as a string
#' @param errorbar The function that computes the error bar. Should be "CI" or "SE" or 
#'      any function name if you defined a custom function. Default to "CI"
#' @param gamma The coverage factor; necessary when ``errorbar == "CI"``. Default is 0.95.
#' @param factorOrder Order of factors as shown in the graph (in that order: x axis,
#'       groups, horizontal panels, vertical panels)
#'
#' @param adjustments List of adjustments as described below.
#'      Default is ``adjustments = list(purpose = "single", popSize = Inf, decorrelation = "none",
#'              samplingDesign = "SRS")``
#' @param clusterColumn used in conjunction with samplingDesign = "CRS", indicates which column 
#'    contains the cluster membership
#'
#' @param showPlot Defaults to TRUE. Set to FALSE if you want the output to be the summary 
#'     statistics and intervals.
#' @param plotStyle The type of object to plot on the graph. See full list below.
#'      Defaults to "bar".
#'
#' @param preprocessfct  is a transform (or vector of) to be performed first on data matrix of each group
#' @param postprocessfct is a transform (or vector of)
#'
#' @param ...  In addition to the parameters above, superbPlot also accept a number of 
#'  optional arguments that will be transmitted to the plotting function, such as
#'  pointParams (a list of g`lot2 parameters to input inside geoms; see ?geom_bar2) and
#'  errorbarParams (a list of ggplot2 parameters for geom_errorbar; see ?geom_errorbar)
#'
#'
#' @return a plot with the correct error bars or a table of those summary statistics.
#'         The plot is a ggplot2 object with can be modified with additional declarations.
#'
#'
#' @details The possible adjustements are the following
#' * popsize: Size of the population under study. Defaults to Inf
#' * purpose: The purpose of the comparisons. Defaults to "single". 
#'      Can be "single", "difference", or "tryon".
#' * decorrelation: Decorrelation method for repeated measure designs. 
#'      Chooses among the methods "CM", "LM", "CA", "UA", "LDr" (with r an integer) or "none". Defaults to 
#'      "none". "CA" is correlation-adjusted \insertCite{c19}{superb};
#'      "UA" is based on the unitary Alpha method (derived from the Cronbach alpha;
#'      see \insertCite{lc22}{superb}).
#'      "LDr" is local decorrelation (useful for long time series with autoregressive 
#'      correlation structures; see \insertCite{cppf24}{superb});
#'     .
#' * samplingDesign: Sampling method to obtain the sample. implemented 
#'          sampling is "SRS" (Simple Randomize Sampling) and "CRS" (Cluster-Randomized Sampling).
#'
#' In version 0.97.15, the layouts for plots are the following:
#' * "bar" Shows the summary statistics with bars and error bars;
#' * "line" Shows the summary statistics with lines connecting the conditions over the first factor;
#' * "point" Shows the summary statistics with isolated points
#' * "pointjitter" Shows the summary statistics along with jittered points depicting the raw data;
#' * "pointjitterviolin" Also adds violin plots to the previous layout
#' * "pointindividualline" Connects the raw data with line along the first factor (which should be a repeated-measure factor)
#' * "raincloud" Illustrates the distribution with a cloud (half_violin_plot) and jittered dots next to it. Looks better when coordinates are flipped ``+coord_flip()``.
#' * "corset" Illustrates two repeated-measures with individual lines and clouds
#' * "boxplot" Illustrates the limits, the quartiles and the median using a box
#' @md
#'
#' @references
#' \insertAllCited{}
#'
#' @examples
#' ######################################################################
#'
#' # Basic example using a built-in dataframe as data. 
#' # By default, the mean is computed and the error bar are 95% confidence intervals
#' superbPlot(ToothGrowth, BSFactors = c("dose", "supp"), 
#'   variables = "len") 
#'
#' # Note that function superb() does the same with formula:
#' superb( len ~ dose + supp, ToothGrowth )
#'
#' # Example changing the summary statistics to the median and
#' # the error bar to 80% confidence intervals
#' superbPlot(ToothGrowth, BSFactors = c("dose", "supp"), 
#'   variables = "len", statistic = "median", errorbar = "CI", gamma = .80) 
#'
#' # Example introducing adjustments for pairwise comparisons 
#' # and assuming that the whole population is limited to 200 persons
#' superbPlot(ToothGrowth, BSFactors = c("dose", "supp"), 
#'   variables = "len",  
#'   adjustments = list( purpose = "difference", popSize = 200) )
#'
#' # This example adds ggplot directives to the plot produced
#' library(ggplot2)
#' superbPlot(ToothGrowth, BSFactors = c("dose", "supp"), 
#'   variables = "len") + 
#' xlab("Dose") + ylab("Tooth Growth") +
#' theme_bw()
#'
#' ######################################################################
#'
#' # The following examples are based on repeated measures
#' library(gridExtra)
#' options(superb.feedback = 'none') # shut down 'warnings' and 'design' interpretation messages
#' 
#' # A simple example: The sleep data
#' # The sleep data are paired data showing the additional time of sleep with 
#' # the soporific drugn #1 (("group = 1") and with the soporific drug #2 ("group = 2"). 
#' # There is 10 participants with two measurements.
#' 
#' # sleep is available in long format so we transform it to the in wide format:
#' sleep2 <- reshape(sleep, direction = "wide", idvar = "ID", timevar = "group")
#' sleep2
#' 
#' # Makes the plots first without decorrelation:
#' superbPlot(sleep2, 
#'   WSFactors = "Times(2)", 
#'   variables = c("extra.1", "extra.2")
#' )
#' # As seen the error bar are very long. Lets take into consideration correlation...
#' # ...  with decorrelation (technique Correlation-adjusted CA):
#' superbPlot(sleep2, 
#'   WSFactors = "Times(2)", 
#'   variables = c("extra.1", "extra.2"), 
#'   # only difference:
#'   adjustments = list(purpose = "difference", decorrelation = "CA")
#' )
#' # The error bars shortened as the correlation is substantial (r = .795).
#' 
#' 
#' ######################################################################
#' 
#' # Another example: The Orange data
#' data(Orange)
#' # Use the Orange example, but let's define shorter column names...
#' names(Orange) <- c("Tree","age","circ")
#' # ... and turn the data into a wide format using superbToWide:
#' Orange.wide <- superbToWide(Orange, id = "Tree", WSFactors = "age", variable = "circ") 
#'
#' # This example contains 5 trees whose diameter (in mm) has been measured at various age (in days):
#' Orange.wide
#'
#' # Makes the plots first without decorrelation:
#' p1 <- superbPlot( Orange.wide, WSFactors = "age(7)",
#'   variables = c("circ.118","circ.484","circ.664","circ.1004","circ.1231","circ.1372","circ.1582"),
#'   adjustments = list(purpose = "difference", decorrelation = "none")
#' ) + 
#'   xlab("Age level") + ylab("Trunk diameter (mm)") +
#'   coord_cartesian( ylim = c(0,250) ) + labs(title="''Standalone'' confidence intervals")
#' # ... and then with decorrelation (technique Correlation-adjusted CA):
#' p2 <- superbPlot( Orange.wide, WSFactors = "age(7)",
#'   variables = c("circ.118","circ.484","circ.664","circ.1004","circ.1231","circ.1372","circ.1582"),
#'   adjustments = list(purpose = "difference", decorrelation = "CA")
#' ) + 
#'   xlab("Age level") + ylab("Trunk diameter (mm)") +
#'   coord_cartesian( ylim = c(0,250) ) + labs(title="Decorrelated confidence intervals")
#'
#' # You can present both plots side-by-side
#' grid.arrange(p1, p2, ncol=2)
#'
#' ######################################################################
#' 
######################################################################################
#'
#' @export superbPlot
#' @importFrom lsr wideToLong
#' @importFrom plyr ddply
#' @import ggplot2
#' @importFrom utils capture.output
#
######################################################################################


superbPlot <- function(data, 
    BSFactors     = NULL,            # vector of the between-subject factor columns
    WSFactors     = NULL,            # vector of the names of the within-subject factors
    WSDesign      = "fullfactorial", # or ws levels of each variable if not a full factorial ws design
    factorOrder   = NULL,            # order of the factors for plots
    variables,                       # dependent variable name(s)
    statistic     = "mean",          # descriptive statistics
    errorbar      = "CI",            # content of the error bars
    gamma         = 0.95,            # coverage if confidence intervals
    adjustments   = list(
        purpose        = "single",   # is "single" or "difference"
        popSize        = Inf,        # is Inf or a specific positive integer
        decorrelation  = "none",     # is "CM", "LM", "CA", "UA", or "none"
        samplingDesign = "SRS"       # is "SRS" or "CRS" (in which case use clusterColumn)
    ),
    showPlot      = TRUE,            # show a plot or else summary statistics
    plotStyle     = "bar",           # type of plot (so far, bar, line, point, pointjitter and pointjitterviolin
    preprocessfct = NULL,            # run preprocessing on the matrix
    postprocessfct= NULL,            # run post-processing on the matrix
    clusterColumn = "",              # if samplineScheme = CRS
    ...
    # the following are optional list of graphic directives...
    # errorbarParams,                # merged into ggplot/geom_errorbar
    # pointParams,                   # merged into ggplot/geom_point
    # lineParams,                    # merged into ggplot/geom_line
    # barParams,                     # merged into ggplot/geom_bar
    # etc.
) {

    ##############################################################################
    # STEP 1: Input validation
    ##############################################################################
    # 1.0: is the data actually data!
	data <- as.data.frame(data) # coerce to data.frame if tibble or compatible
    if(!(is.data.frame(data)))
            stop("superb::ERROR: data is not a data.frame or similar data structure. Exiting...")

    # 1.1: missing adjustements
    if(is.null(adjustments$purpose))        {adjustments$purpose        <- "single"}
    if(is.null(adjustments$popSize))        {adjustments$popSize        <- Inf}
    if(is.null(adjustments$decorrelation))  {adjustments$decorrelation  <- "none"}
    if(is.null(adjustments$samplingDesign)) {adjustments$samplingDesign <- "SRS"}

    # 1.2: unknown adjustments listed
    if (!all(names(adjustments) %in% c("purpose","popSize","decorrelation","samplingDesign")))
            stop("superb::ERROR: one of the adjustment is unknown. Exiting...")

    # 1.3: invalid choice in a list of possible choices
    if (is.character(adjustments$popSize)||!(all(adjustments$popSize >0)))  
            stop("superb::ERROR: popSize should be a positive number or Inf (or a list of these). Exiting...")
    if (!(adjustments$purpose %in% c("single","difference","tryon"))) 
            stop("superb::ERROR: Invalid purpose. Did you mean 'difference'? Exiting...")
    if (!(substr(adjustments$decorrelation,1,2) %in% c("no","CM","LM","CA","UA","LD"))) 
            stop("superb::ERROR: Invalid decorrelation method. Did you mean 'CM'? Exiting...")
    if (substr(adjustments$decorrelation,1,2) == "LD") {
            radius <- suppressWarnings(as.integer(substr(adjustments$decorrelation, 3, 100)))
            if ((is.na(radius))||(radius<1))
                stop("superb::ERROR: radius given to LD not an integer or is smaller than 1. Exiting...")
    }
    if (!(adjustments$samplingDesign %in% c("SRS","CRS"))) 
            stop("superb::ERROR: Invalid samplingDesign. Did you mean 'SRS'? Exiting...")

    # 1.4a: innapropriate choice for between-subject specifications
    bsLevels <- dim(unique(data[BSFactors]))[1]
    if (!(length(adjustments$popSize) %in% c(1,bsLevels))) 
            stop("superb::ERROR: popSize is a list whose length does not match the number of groups. Exiting...")
    
    # 1.4b: invalid within-subject factors
    if (any(unlist(gregexpr("\\w\\((\\d+)\\)", WSFactors))== -1))
            stop("superb::ERROR: One of the repeated-measure factor not properly formed 'name(nlevel)'. Exiting...")
    wsMissing <- "DummyWithinSubjectFactor"
    wsLevels <- c(1)
    if (is.null(WSFactors)) {
        WSFactors <- wsMissing
    } else {
        for (i in 1:length(WSFactors)) {
            wsLevels[i] <- as.integer(unlist(strsplit(WSFactors[i], '[()]'))[2]) 
            WSFactors[i] <-            unlist(strsplit(WSFactors[i], '[()]'))[1]
        }
    }
    wslevel <- prod(wsLevels)

    # 1.4c: setting factorOrder in case it is null
    if(is.null(factorOrder))  {
        factorOrder <- c(WSFactors, BSFactors)
        if (('design' %in% getOption("superb.feedback") ) & (length(factorOrder[factorOrder != wsMissing])) > 1)  
                message(paste("superb::FYI: The variables will be plotted in that order: ",
                          paste(factorOrder[factorOrder != wsMissing],collapse=", "),
                          " (use factorOrder to change).", sep=""))
    }

    # 1.4d: checking WSFactors based on WSDesign
    if (all(WSDesign == "fullfactorial")) {
        if (!(length(variables) == wslevel)) 
                stop("superb::ERROR: The number of levels of the within-subject level(s) does not match the number of variables. Exiting...")
        if ((wslevel == 1)&&(!(adjustments$decorrelation == "none"))) 
                stop("superb::ERROR: Decorrelation is not to be used when there is no within-subject factors. Exiting...")
    } else {
        if (!is.list(WSDesign) )
                    stop("superb::ERROR: the WSdesign is not 'fullfactorial' (default) or a list. Exiting...")
        if (length(WSDesign) != length(variables))
                    stop("superb::ERROR: the WSDesign list is not of the same length as the variable vector. Exiting...")
        if (!all(lapply(WSDesign, length)==length(WSFactors)))
                    stop("superb::ERROR: the WSDesign does not contain vectors of factor levels, one per factor. Exiting...")
    }

    # 1.5: invalid column names where column names must be listed
    if (!(all(variables %in% names(data)))) 
            stop("superb::ERROR: One of the variable column is not found in data. Exiting...")
    if (!(all(BSFactors %in% names(data)))) 
            stop("superb::ERROR: One of the BSFactors column is not found in data. Exiting...")

    # 1.6: invalid inputs
    if (length(factorOrder[factorOrder != wsMissing] ) > 4)
            stop("superb::ERROR: Too many factors named on factorOrder. Maximum 4. Exiting...")
    if (length(factorOrder[factorOrder != wsMissing]) < length(WSFactors[WSFactors != wsMissing]) + length(BSFactors)) 
            stop("superb::ERROR: Too few factors named on factorOrder. Exiting...")
    if ((gamma[1] <0)||(gamma[1]>1))
            stop("superb::ERROR: gamma is not within 0 and 1. Exiting...")
    if (!is.logical(showPlot))
            stop("superb::ERROR: showPlot must be TRUE or FALSE. Exiting...")

    # 1.7: align levels and corresponding variables
    weird        <-"+!+" # to make sure that these characters are not in the column names
    if (all(WSDesign == "fullfactorial")) {
        combinaisons <- expand.grid(lapply(wsLevels,seq))
    } else {
        combinaisons <- data.frame(matrix(as.integer(unlist(WSDesign)),nrow=length(WSDesign), byrow = TRUE))
    }
    newnames     <- paste("DV", apply(combinaisons,1,paste,collapse=weird) ,sep=weird)
    design       <- cbind(combinaisons, variables, newnames)
    colnames(design)[1:length(WSFactors)] <- WSFactors
    colnames(design)[length(WSFactors)+1] <- "variable"
    colnames(design)[length(WSFactors)+2] <- "newvars"
    if ( (length(wsLevels)>1) & ('design' %in% getOption("superb.feedback") ) ) {
        message("superb::FYI: Here is how the within-subject variables are understood:")
        temp = paste0(capture.output(print(design[,c(WSFactors, "variable") ], row.names=FALSE)),collapse="\n")
        message(temp) 
    }

    # 1.8: invalid functions 
    widthfct <- paste(errorbar, statistic, sep = ".")
    if (errorbar == "none") { # create a fake function
        eval(parse(text=paste(widthfct, "<-function(X) 0",sep="")), envir = globalenv())
    }
    if ( !(is.stat.function(statistic)) )
            stop("superb::ERROR: The function ", statistic, " is not a known descriptive statistic function. Exiting...")
    if ( !(is.errorbar.function(widthfct)) )
            stop("superb::ERROR: The function ", widthfct, " is not a known function for error bars. Exiting...")
    pltfct <- paste("superbPlot", plotStyle, sep = ".")
    if ( !(is.superbPlot.function(pltfct)) )
            stop("superb::ERROR: The function ", pltfct, " is not a known function for making plots with superbPlot. Exiting...")

    # 1.9: if cluster randomized sampling, check that column cluster is set
    if (adjustments$samplingDesign == "CRS") {
        # make sure that column cluster is defined.
        if(!(clusterColumn %in% names(data))) 
            stop("superb::ERROR: With samplingDesign = \"CRS\", you must specify a valid column with ClusterColumn. Exiting...")
    }

    # 1.10: is there missing data in the scores?
    if (any(is.na(data[,variables])) ) {
        message("superb::WARNING: There are misssing data in your dependent variable(s).\n\tsuperb may show empty summaries... consult help(measuresWithMissingData)")
    }

    # We're clear to go!
    runDebug("superb.1", "End of Step 1: Input validation", 
        c("measure2","design2","BSFactors2","WSFactors2","wsLevels2","wslevel2","factorOrder2","adjustments2"), 
        list(variables, design, BSFactors, WSFactors, wsLevels, wslevel, factorOrder, adjustments) )



    ##############################################################################
    # STEP 2: Decorrelate repeated-measure variables if needed; apply transforms
    ##############################################################################

    complement <- function(x, U) {U[is.na(pmatch(U,x))]}
    x <- c(BSFactors, variables)
    U <- names(data)

    # keep a copy before transforming the data
    # 2022.11.17: sort the columns, bsfactors & wsvariables first
    data.untransformed <- data[ c(x, complement(x,U)) ]
    data.transformed   <- data[ c(x, complement(x,U)) ]

    # We do this step for each group and only on columns with repeated measures.
    if (adjustments$decorrelation == "CM" || adjustments$decorrelation == "LM") {
        data.transformed <- plyr::ddply(data.transformed, .fun = twoStepTransform, .variables= BSFactors, variables)    
    }
    # is LM (pooled standard error) needed?
    if (adjustments$decorrelation == "LM") {
        data.transformed <- plyr::ddply(data.transformed, .fun = poolSDTransform, .variables= BSFactors, variables) 
    }
    # other custom pre-processing of the data matrix per group
    if (!is.null(preprocessfct)) {
        for (fct in preprocessfct)
            data.transformed <- plyr::ddply(data.transformed, .fun = fct, .variables= BSFactors, variables) 
    }
    # other custom post-processing of the data matrix per group
    if (!is.null(postprocessfct)) {
        for (fct in postprocessfct)
            data.transformed <- plyr::ddply(data.transformed, .fun = fct, .variables= BSFactors, variables) 
    }
    
    runDebug("superb.2", "End of Step 2: Data post decorrelation", 
        c("data.transformed2", "newnames2"), list(data.transformed, newnames) )


    ##############################################################################
    # STEP 3: Put data into long format for conveniency
    ##############################################################################

    # replace variable names with names based on design...
    temp <- paste("^", variables, "$", collapse="|", sep="")
    colnames(data.untransformed)[grep(temp, names(data.untransformed))] = newnames
    colnames(data.transformed)[grep(temp, names(data.transformed))] = newnames

    if (exists("id", where = data.untransformed)) {
        #print("Nothing to do, participants identifier exists")
    } else {
        #print("Adding id manually")
        data.untransformed$id = 1:(dim(data.untransformed)[1])
    }
    
    # set data to long format using lsr (Navarro, 2015)
    data.untransformed.long <- suppressWarnings(lsr::wideToLong(data.untransformed, within = WSFactors, sep = weird))
    data.transformed.long   <- suppressWarnings(lsr::wideToLong(data.transformed, within = WSFactors, sep = weird))


    # New May 11th, 2022, version 0.95.1
    as.numeric.factor <- function(x) {strtoi(x)}

    data.untransformed.long[WSFactors] <- mapply(
        as.numeric.factor, data.untransformed.long[WSFactors])
    data.transformed.long[WSFactors] <- mapply(
        as.numeric.factor, data.transformed.long[WSFactors])


    # if there was no within-subject factor, a dummy had been added
    if (WSFactors[1]  == wsMissing) {
        # removing all traces of the dummy
        data.transformed.long[[wsMissing]]   <- NULL # remove the column 
        data.untransformed.long[[wsMissing]] <- NULL # remove the column 
        WSFactors    <- NULL # remove the dummy factor
        factorOrder <- factorOrder[ factorOrder != wsMissing]
    }

    # if the function has an initializer, run it on the long-format data
    if (has.init.function(statistic)) {
        iname = paste("init",statistic, sep=".")
        if (!("none" %in% getOption("superb.feedback")))
            message("superb::FYI: Running initializer ", iname)
        do.call(iname, list(data.untransformed.long) )
    }

    runDebug("superb.3", "End of Step 3: Reformat data frame into long format", 
        c("data.transformed.long2","factorOrder3"), list(data.transformed.long,factorOrder) )



    ##############################################################################
    # STEP 4: Get summary statistics (center, and lowerwidth + upperwidth)
    ##############################################################################

    aggregatefct <- function(subsetOfData) { 
        params1 <- list( subsetOfData$DV  )
        if (is.gamma.required(widthfct)) {
            paramsV <- list( subsetOfData$DV, gamma = gamma )
        } else {
            paramsV <- list( subsetOfData$DV  )
        }
        center <- do.call(statistic, params1)
        limits <- do.call(widthfct, paramsV)
        if (is.interval.function(widthfct)) {
            lowerwidth <- ( min(limits) - center)
            upperwidth <- ( max(limits) - center ) 
        } else {
            lowerwidth <- -limits
            upperwidth <- +limits
        }
        return( c(center=center, lowerwidth=lowerwidth, upperwidth=upperwidth) )
    }

    ### put into FACTORs
    summaryStatistics <- plyr::ddply( data.transformed.long, .fun = aggregatefct, .variables = factorOrder ) 

    summaryStatistics[factorOrder]       <- lapply(summaryStatistics[factorOrder], as.factor)
    data.untransformed.long[factorOrder] <- lapply(data.untransformed.long[factorOrder], as.factor)

    runDebug("superb.4", "End of Step 4: Statistics obtained", 
        c("summaryStatistics2"), list( summaryStatistics) )

    ##############################################################################
    # STEP 5: Get all the adjustments
    ##############################################################################

    # 5.1: Adjust for population size if not infinite 
    nadj <- if (min(adjustments$popSize) != Inf) {
        # Ns the number of subjects per group
        Ns  <- plyr::ddply(data, .fun = dim, .variables = BSFactors )$V1
        # the Ns must be expanded for each repeated measures
        Ns  <- rep(Ns, length(variables))
        sqrt(1 - Ns / adjustments$popSize )        
    } else {1}

    # 5.2: Adjust for purpose if "difference" or "tryon"
    padj <- if (adjustments$purpose == "difference") { 
        sqrt(2) 
    } else if  (adjustments$purpose == "tryon") {
        # extension of Tryon, 2001, for heterogeneous variances in between-group design
        Ns  <- plyr::ddply(data.transformed.long, .fun = dim, .variables = c(WSFactors,BSFactors) )$V1
        sds <- suppressWarnings(plyr::ddply(data.transformed.long, .fun = colSDs, .variables = c(WSFactors,BSFactors) )$DV)
        es <- sqrt(sum(sds^2/Ns)) / sum(sqrt(sds^2/Ns))
        # the es must be expanded for each repeated measures
        es  <- rep(es, length(variables))
        2 * es * sqrt(length(Ns)) / sqrt(2) # 2 because contrary to Tryon, 2001, superb does not want to avoid overlap
    } else {1}

    # 5.3: Adjust for cluster-randomized sampling
    sadj <- if (adjustments$samplingDesign == "CRS") {
        ICCs <- plyr::ddply(data, .fun = ShroutFleissICC1, .variables = BSFactors, clusterColumn, variables )
        KNSs <- plyr::ddply(data, .fun = getKNs, .variables = BSFactors, clusterColumn )
        ICCs <- ICCs[-1:-length(BSFactors)] # drop BSFactors columns
        KNSs <- KNSs[-1:-length(BSFactors)] # drop BSFactors columns

        ICCsKNNs <- cbind(ICCs, KNSs)
        lambdas  <- apply(ICCsKNNs, 1, CousineauLaurencelleLambda) # one lambda per group
        # the lambdas must be expanded for each repeated measures
        lambdas  <- rep(lambdas, length(variables))
        ICCs     <- ICCs$V1 # downcast to a vector for latter display
        lambdas
    } else {1}

    # 5.4: Adjust for correlation if decorrelation == "CA", "UA", ou "LDr"
    radj <- if (adjustments$decorrelation == "CA") {
        rs <- plyr::ddply(data, .fun = meanCorrelation, .variables = BSFactors, cols = variables)$V1
        # the rs must be expanded for each repeated measures
        rs  <- rep(rs, length(variables))
        sqrt(1- rs)
    } else if (adjustments$decorrelation == "UA") {
        rs <- plyr::ddply(data, .fun = unitaryAlpha, .variables = BSFactors, cols = variables)$V1
        # the rs must be expanded for each repeated measures
        rs  <- rep(rs, length(variables))
        sqrt(1- rs)
    } else if (substr(adjustments$decorrelation,1,2) == "LD") {
        rs <- plyr::ddply(data, .fun = meanLocalCorrelation, .variables = BSFactors, cols = variables, w = radius)
#        rs <- unlist(rs[,!is.na(rs)])
        rs <- suppressWarnings(as.numeric(unlist(rs))[!is.na(as.numeric(unlist(rs)))])
        # the rs is a vector containing one rLD for each measurement
        sqrt(1- rs)
    } else {1}

    # All done: apply the corrections to all the widths
    summaryStatistics$lowerwidth <- nadj*padj*sadj*radj*summaryStatistics$lowerwidth
    summaryStatistics$upperwidth <- nadj*padj*sadj*radj*summaryStatistics$upperwidth

    runDebug("superb.5", "End of Step 5: Getting adjustments", 
        c("nadj2","padj2","sadj2","radj2","summaryStatistics3"), list(nadj,padj,sadj,radj,summaryStatistics) )


    ##############################################################################
    # STEP 6: Issue feedback information
    ##############################################################################

    if (('warnings' %in% getOption("superb.feedback") ) | ('all' %in% getOption("superb.feedback")) )  {
        # 6.1: if option is difference, test heterogeneity of variances
        if ((adjustments$purpose == "difference") & (!is.null(BSFactors)) ) {
            crit <- data[,BSFactors]
            ps   <- c()
            for (var in variables ) {
                out <- split(data[,var], crit )
                # 2024.05.30: exlude empty conditions
                out <- out[lengths(out)>1]
                p   <- stats::bartlett.test(out)$p.value
                ps  <- c(ps, p)
            }
            if (any(p < .01)) 
                message("superb::ADVICE: Some of the groups' variances are heterogeneous. Consider using purpose=\"tryon\"." )
        }

        # 6.2: if deccorrelate is CA: show rbar, test Winer
        if ((adjustments$decorrelation == "CA")||(adjustments$decorrelation == "UA")||(substr(adjustments$decorrelation,1,2) == "LD")) {
            message(paste("superb::FYI: The average correlation per group is ", paste(unique(sprintf("%.4f",mean(round(rs,4)))), collapse=" ")) )

            winers <- suppressWarnings(plyr::ddply(data, .fun = "WinerCompoundSymmetryTest", .variables= BSFactors, variables)) 
            winers <- winers[,length(winers)]
            if (any(winers<.05, na.rm = TRUE))
                message("superb::ADVICE: Some of the groups' data are not compound symmetric. Consider using CM." )
        }
        
        # 6.3: if decorrelate is CM or LM: show epsilon, test Winer and Mauchly
        if (adjustments$decorrelation %in% c("CM","LM")) {
            epsGG <- suppressWarnings(plyr::ddply(data, .fun = "HyunhFeldtEpsilon", .variables= BSFactors, variables)) 
            epsGG <- epsGG[,length(epsGG)]
            message(paste("superb::FYI: The HyunhFeldtEpsilon measure of sphericity per group are ", paste(sprintf("%.3f",round(epsGG, 4)), collapse=" ")) )

            winers <- suppressWarnings(plyr::ddply(data, .fun = "WinerCompoundSymmetryTest", .variables= BSFactors, variables) )
            winers <- winers[,length(winers)]
            if (all(winers>.05, na.rm = TRUE))
                message("superb::FYI: All the groups' data are compound symmetric. Consider using CA or UA." )

            mauchlys <- plyr::ddply(data, .fun = "MauchlySphericityTest", .variables= BSFactors, variables) 
            mauchlys <- mauchlys[,length(mauchlys)]
            if (any(mauchlys<.05, na.rm = TRUE))
                message("superb::FYI: Some of the groups' data are not spherical. Use error bars with caution." )
        }
        
        # 6.4: if samplingDesign is CRS: print ICC, check that more than 8 clusters
        if (adjustments$samplingDesign == "CRS") {
            message(paste("superb::FYI: The ICC1 per group are ", paste(sprintf("%.3f",round(ICCs,3)), collapse=" ")) )
        }

        # 6.5: if objective is tryon: print tryon adjustment
        if (adjustments$purpose == "tryon") {
            message(paste("superb::FYI: The tryon adjustments per measures are ", paste("Measure ", 1:length(padj), ": ", sprintf("%.4f",round(padj,4)), sep="",collapse=", "), ", all compared to 1.4142.") )
        }
    }
    
    
    ##############################################################################
    # ALL DONE! Output the plot(s) or the summary data
    ##############################################################################

    runDebug("beforeplot", "Kit for testing plotting function", c("ss","factorOrder2","dl"),list(summaryStatistics, factorOrder, data.untransformed.long) )

    if (showPlot == TRUE) {
        # get the grouping factor
        groupingfactor = if(!is.na(factorOrder[2])) {factorOrder[2]} else { NULL}

        # get the facet factor(s)
        facets <- factorOrder[3:4][!is.na(factorOrder[3:4])]
        facets <- paste( c(facets, ".",".")[1:2], collapse="~")

        # produce the plot
        plot <- do.call( pltfct, list(
                    summarydata    = summaryStatistics,
                    xfactor        = factorOrder[1],
                    groupingfactor = groupingfactor,
                    addfactors     = facets,
                    rawdata        = data.untransformed.long,
                    ...
        ))
        return(plot)
    } else {
        # returns a list with summary statistsics as is and rawdata
        return( list(summaryStatistics = summaryStatistics, rawData = data.untransformed.long))
    }


    ##############################################################################
    # FINISHED! End of function superbPlot
    ##############################################################################
}


#################################################################################
# Statistics functions: colSDs; meanCorrelation; unitaryAlpha
#################################################################################

mycor <- function(X) {
    # a wrapper for cor() that checks if there are constant columns...
    rs <- suppressWarnings( cor(X, use = "pairwise.complete.obs") )
    if (any(is.na(rs))) {
        message("superb::FYI: Some of the measurements are constant. Use CM or use error bars with caution." )
        rs[is.na(rs)] <- 1
    }
    rs
}

meanCorrelation <- function(X, cols) {
    # the mean pair-wise correlations from many columns of the dataframe X
    rs   <- mycor( X[cols] )
    rbar <- mean(rs[upper.tri(rs)])
    rbar
}

unitaryAlpha <- function(X, cols) {
	m <- as.matrix(X[,cols])

	k <- dim(m)[2]
	V <- var(apply(m, 1, FUN=sum))
	S <- sum(apply(m, 2, FUN=var))
	(V-S)/((k-1)*S)
}

colSDs = function (x) {
    # the equivalent of colMeans for standard deviations
    if (is.vector(x))          sd(x)
    else if (is.matrix(x))     apply(x, 2, sd)
    else if (is.data.frame(x)) apply(x, 2, sd)
    else "what the fuck??"
}


##################################################################   
# Functions for local decorrelation
##################################################################   

# The gaussian kernel
gA <- function(w, mu, dim) {
    s1 <- exp(-((1:dim)-mu)^2/(2*w^2))/(w*sqrt(2*pi))
    s1[mu] <- 0 # Is a gaussian kernel with a hole a donut?
    s1 / sum(s1)
}

meanLocalCorrelation <- function(X, cols, w) {
    mat <- mycor( X[cols] )
    nrw <- dim(mat)[2]
    sapply( 1:nrw, 
            \(i) sum(mat[i,] * gA(w,i,nrw) )
    )
}
# that simple!


unfactor <- function( col ) {
    unfactored <- suppressWarnings(as.numeric(as.character( col )))
    if (all(is.na( unfactored ))) {
        col <- as.numeric( col )
    } else {
        col <- unfactored
    }
    col
}

##################################################################   
# End of suberbPlot.
##################################################################   
dcousin3/superb documentation built on Oct. 29, 2024, 5:28 p.m.