R/estimate_magnitude.R

Defines functions estimate_magnitude.jamovi estimate_magnitude.data.frame estimate_magnitude.vector estimate_magnitude.summary estimate_magnitude.base estimate_magnitude

Documented in estimate_magnitude

#' Estimate a magnitude.
#'
#' @description
#' Returns effect sizes appropriate for estimating properties from a
#' quantitative variable.
#'
#' @param data For raw data - a dataframe or tibble
#' @param outcome_variable For raw data - The column name of the outcome
#'   variable, or a vector of numeric data
#' @param mean For summary data - A numeric
#' @param sd For summary data - A numeric > 0
#' @param n For summary data - An integer > 0
#' @param outcome_variable_name Optional friendly name for the outcome variable.
#'   Defaults to 'My outcome variable' or the outcome variable column name if a
#'   data frame is passed.
#' @param conf_level The confidence level for the confidence interval.  Given in
#'   decimal form.  Defaults to 0.95.
#' @param save_raw_data For raw data; defaults to TRUE; set to FALSE to save
#'   memory by not returning raw data in estimate object
#'
#'
#' @return Returnsobject of class esci_estimate
#'
#'
#' @examples
#' # From Raw Data ------------------------------------
#' # Just pass in the data source, grouping column, and outcome column.
#' # You can pass these in by position, skipping the labels:
#'
#' # Note... not sure if PlantGrowth dataset meets assumptions for this analysis
#' estimate_magnitude(
#'  datasets::PlantGrowth,
#'  weight
#' )
#'
#' @export
estimate_magnitude <- function(
  data = NULL,
  outcome_variable = NULL,
  mean = NULL,
  sd = NULL,
  n = NULL,
  outcome_variable_name = "My outcome variable",
  conf_level = 0.95,
  save_raw_data = TRUE
) {

  analysis_type <- "Undefined"


  # Check to see if summary data has been passed
  if (!is.null(mean)) {

    # Summary data is passed, so check to make sure raw data not included
    if(!is.null(data))  stop(
      "You have passed summary statistics,
      so don't pass the 'data' parameter used for raw data.")
    if(!is.null(outcome_variable)) stop(
      "You have passed summary statistics,
      so don't pass the 'outcome_variable' parameter used for raw data.")

    # Looks good, we can pass on to summary data
    analysis_type <- "summary"

  } else {
    # Raw data has been passed, first sure summary data is not passed
    if(!is.null(mean))  stop(
      "You have passed raw data,
      so don't pass the 'mean' parameter used for summary data.")
    if(!is.null(sd))  stop(
      "You have passed raw data,
      so don't pass the 'sd' parameter used for summary data.")
    if(!is.null(n))  stop(
      "You have passed raw data,
      so don't pass the 'n' parameter used for summary data.")

    # Now we have to figure out what type of raw data:
    #   could be tidy column names, string column names, or vectors
    # We check to see if we have a tidy column name by trying to evaluate it
    is_column_name <- try(outcome_variable, silent = TRUE)

    if(class(is_column_name) == "try-error") {
      # Column names have been passed, check if need to be quoted up

      outcome_variable_enquo <- rlang::enquo(outcome_variable)
      outcome_variable_quoname <- try(
        eval(rlang::as_name(outcome_variable_enquo)), silent = TRUE
      )
      if (class(outcome_variable_quoname) != "try-error") {
        # This only succeeds if outcome_variable was passed unquoted
        # Reset outcome_variable to be fully quoted
        outcome_variable <- outcome_variable_quoname
      }

      # Ready to be analyzed as a list of string column names
      analysis_type <- "data.frame"

    } else if (class(outcome_variable) == "numeric") {
      # At this stage, we know that y was not a tidy column name,
      #  so it should be either a vector of raw data (class = numeric)
      #  or a vector of column names passed as strings
      analysis_type <- "vector"
    } else if (class(outcome_variable) == "character") {
      # Ok, must have been string column names
      if (length(outcome_variable) == 1) {
        analysis_type <- "data.frame"
      } else {
        analysis_type <- "jamovi"
      }
    }
  }

  # At this point, we've figured out the type of data passed
  #  so we can dispatch

  # I put all the dispatches here, at the end, to make it easier to
  #   update in case the underlying function parameters change

  if(analysis_type == "data.frame") {
    return(
      estimate_magnitude.data.frame(
        data = data,
        outcome_variable = outcome_variable,
        conf_level = conf_level,
        save_raw_data = save_raw_data
      )

      # estimate_magnitude.data.frame(
      #   data = data,
      #   outcome_variable = make.names(outcome_variable),
      #   conf_level = conf_level,
      #   save_raw_data = save_raw_data
      # )
    )
  } else if (analysis_type == "jamovi") {
    return(
      estimate_magnitude.jamovi(
        data = data,
        outcome_variables = outcome_variable,
        conf_level = conf_level,
        save_raw_data = save_raw_data
      )
    )


  } else if (analysis_type == "summary") {
    return(
      estimate_magnitude.summary(
        mean = mean,
        sd = sd,
        n = n,
        outcome_variable_name = outcome_variable_name,
        conf_level = conf_level
      )
    )
  } else if (analysis_type == "vector") {
    if (outcome_variable_name == "My outcome variable") {
      outcome_variable_name <- deparse(substitute(outcome_variable))
    }

    return(
      estimate_magnitude.vector(
        outcome_variable = outcome_variable,
        outcome_variable_name = outcome_variable_name,
        conf_level = conf_level,
        save_raw_data = save_raw_data
        )
    )
  }

  stop("Something went wrong dispatching this function")

}


# Handles construction of the effect_sizes and standardized_effect_sizes tables
estimate_magnitude.base <- function(
  overview_table,
  outcome_variable_name,
  conf_level
) {

  # Input checks -------------------------
  # This is the base function for generating an estimated contrast
  # It expects:
  # outcome_variable_name - non-zero length character
  # conf_level should be a numeric value > 0 and <1

  # Should already be checked
  # mean, sd, and n should already be checked to be numerics
  #  with sd > 0 and n an integer > 0

  # Check variable names
  esci_assert_type(outcome_variable_name, "is.character")

  # Check conf.level
  esci_assert_type(conf_level, "is.numeric")
  esci_assert_range(
    conf_level,
    lower = 0,
    upper = 1,
    lower_inclusive = FALSE,
    upper_inclusive = FALSE
  )

  # Prepare esci_estimate object that will be returned-------------------------
  estimate <- list()
  estimate$properties <- list(
    outcome_variable_name = outcome_variable_name,
    grouping_variable_name = NULL,
    contrast = NULL,
    conf_level = conf_level
  )
  estimate$es_mean_properties <- list(
    effect_size_name = "M",
    effect_size_name_html = "<i>M</i>",
    effect_size_category = "simple",
    effect_size_precision = "magnitude",
    conf_level = conf_level,
    error_distribution = "t_dist"
  )
  class(estimate) <- "esci_estimate"
  estimate$overview <- overview_table

  estimate$es_mean <- data.frame(
    "outcome_variable_name" = overview_table$outcome_variable_name,
    "effect" = overview_table$outcome_variable_name,
    "effect_size" = overview_table$mean,
    "LL" = overview_table$mean_LL,
    "UL" = overview_table$mean_UL,
    "SE" = overview_table$mean_SE,
    "df" = overview_table$df
  )

  alpha <- 1 - conf_level
  estimate$es_mean$tcrit <- qt(p = alpha * 2, df = estimate$es_mean$df, lower.tail = FALSE)
  estimate$es_mean$ta_LL <- estimate$es_mean$effect_size - (estimate$es_mean$SE * estimate$es_mean$tcrit)
  estimate$es_mean$ta_UL <- estimate$es_mean$effect_size + (estimate$es_mean$SE * estimate$es_mean$tcrit)
  estimate$es_mean$tcrit <- NULL


  if (!is.null(overview_table$median)) {
    estimate$es_median <- data.frame(
      "outcome_variable_name" = overview_table$outcome_variable_name,
      "effect" = overview_table$outcome_variable_name,
      "effect_size" = overview_table$median,
      "LL" = overview_table$median_LL,
      "UL" = overview_table$median_UL,
      "SE" = overview_table$median_SE,
      "df" = overview_table$df
    )
    estimate$es_median_properties <- list(
      effect_size_name = "Mdn",
      effect_size_name_html = "<i>Mdn</i>",
      effect_size_category = "simple",
      effect_size_precision = "magnitude",
      conf_level = conf_level,
      error_distribution = "t_dist"
    )
  }



  return(estimate)

}


estimate_magnitude.summary <- function(
  mean,
  sd,
  n,
  outcome_variable_name = "My outcome variable",
  conf_level = 0.95
){


  # Input checks      ---------------------------------------------------------
  # This function expects:
  # mean - numeric data
  # sds  - numeric, >0
  # ns - numeric integer > 1

  # The base function will check:
  #  conf_level is >0 and <1
  #  outcome_variable_name - optional, non-zero length character

  # Check mean
  esci_assert_type(mean, "is.numeric")
  # Check sd
  esci_assert_type(sd, "is.numeric")
  esci_assert_range(
    sd,
    lower = 0,
    lower_inclusive = FALSE
  )
  # Check n
  esci_assert_type(n, "is.numeric")
  esci_assert_type(n, "is.numeric")
  esci_assert_type(n, "is.whole.number")
  esci_assert_range(
    n,
    lower = 2,
    lower_inclusive = TRUE
  )


  # Do analysis ------------------------------------
  overview_table <- overview.summary(
    means = mean,
    sds = sd,
    ns = n,
    outcome_variable_name = outcome_variable_name,
    conf_level = conf_level,
    assume_equal_variance = FALSE
  )

  estimate <- estimate_magnitude.base(
      overview_table = overview_table,
      outcome_variable_name = outcome_variable_name,
      conf_level = conf_level
  )


  estimate$properties$data_type <- "summary"
  estimate$properties$data_source <- NULL

  return(estimate)

}


estimate_magnitude.vector <- function(
  outcome_variable,
  outcome_variable_name = "My outcome variable",
  conf_level = 0.95,
  save_raw_data = TRUE
) {

  # Input checks --------------------------------------------------------------
  # This function expects:
  #  outcome_variable to be a vector of numeric data:
  #      with > 2 valid rows
  #  save_raw_data is a logical, TRUE or FALSE


  # Check outcome variable
  esci_assert_type(outcome_variable, "is.numeric")
  row_report <- esci_assert_vector_valid_length(
    outcome_variable,
    lower = 2,
    lower_inclusive = FALSE,
    na.invalid = FALSE
  )

  # Check save_raw_data
  esci_assert_type(save_raw_data, "is.logical")


  # Do the analysis --------------------------------------------------
  # Create overview -- which will gracefully deal with missing and n=0 or n=1
  all_overview <- overview.vector(
    outcome_variable = outcome_variable,
    outcome_variable_name = outcome_variable_name,
    conf_level = conf_level,
    assume_equal_variance = FALSE
  )

  # From the overview function, get just the valid groups
  no_miss_overview <- all_overview[row.names(all_overview) != "missing", ]
  overview <- no_miss_overview[no_miss_overview$n > 1, ]

  # Dispatch only valid groups to base function
  estimate <- estimate_magnitude.base(
    overview_table = overview,
    outcome_variable_name = outcome_variable_name,
    conf_level = conf_level
  )

  estimate$overview <- all_overview
  estimate$properties$data_type <- "vector"
  estimate$properties$data_source <- NULL

  # 2 alpha CI for median
  mdn_2a <- statpsych::ci.median1(
    alpha = (1 - conf_level)*2,
    y =outcome_variable[!is.na(outcome_variable)]
  )

  estimate$es_median$ta_LL <- mdn_2a[1, "LL"]
  estimate$es_median$ta_UL <- mdn_2a[1, "UL"]

  # Store raw data -----------------------------------------------
  if (save_raw_data) {
    estimate$raw_data <- data.frame(
      grouping_variable = outcome_variable_name,
      outcome_variable = outcome_variable
    )
  }

  return(estimate)
}


estimate_magnitude.data.frame <- function(
  data,
  outcome_variable,
  conf_level = 0.95,
  save_raw_data = TRUE
) {


  # Input Checks -------------------------------------------------------------
  # This function expects:
  #   data to be a data frame
  #   outcome_variable to be a numeric column in data, with more than 2 rows

  esci_assert_type(data, "is.data.frame")

  # Validate this outcome variable
  esci_assert_valid_column_name(data, outcome_variable)
  esci_assert_column_type(data, outcome_variable, "is.numeric")
  esci_assert_column_has_valid_rows(
    data,
    outcome_variable,
    lower = 2,
    na.rm = TRUE
  )

  # Now pass along to the .vector version of this function
  estimate <- estimate_magnitude.vector(
    outcome_variable = data[[outcome_variable]],
    outcome_variable_name = outcome_variable,
    conf_level = conf_level,
    save_raw_data = save_raw_data
  )

  estimate$properties$data_type <- "data.frame"
  estimate$properties$data_source <- deparse(substitute(data))

  return(estimate)

}


estimate_magnitude.jamovi <- function(
  data,
  outcome_variables,
  conf_level = 0.95,
  save_raw_data = TRUE
) {

  res <- list()

  # Cycle through the list of columns;
  #  for each call estimate_magnitude.data-frame, which handles 1 column
  for (outcome_variable in outcome_variables) {

    # Now pass along to the .vector version of this function
    res[[outcome_variable]] <- estimate_magnitude.data.frame(
      data = data,
      outcome_variable = outcome_variable,
      conf_level = conf_level,
      save_raw_data = save_raw_data
    )

  }

  res <- esci_estimate_consolidate(res)
  class(res) <- "esci_estimate"

  return(res)

}
rcalinjageman/esci4 documentation built on May 18, 2023, 4:01 a.m.