R/class-plotdata-line.R

Defines functions lineplot lineplot.dt validateLinePD newLinePD

Documented in lineplot lineplot.dt

newLinePD <- function(.dt = data.table::data.table(),
                         variables = veupathUtils::VariableMetadataList(),
                         viewport = list('xMin' = NULL,
                                         'xMax' = NULL),
                         binWidth,
                         value = character(),
                         errorBars = logical(),
                         overlayValues = veupathUtils::BinList(),
                         sampleSizes = logical(),
                         completeCases = logical(),
                         evilMode = character(),
                         numeratorValues = character(),
                         denominatorValues = character(),
                         verbose = logical(),
                         ...,
                         class = character()) {

  .pd <- newPlotdata(.dt = .dt,
                     variables = variables,
                     overlayValues = overlayValues,
                     sampleSizes = sampleSizes,
                     completeCases = completeCases,
                     inferredVarAxis = 'y',
                     evilMode = evilMode,
                     verbose = verbose,
                     class = "lineplot")

  attr <- attributes(.pd)
  variables <- attr$variables

  x <- veupathUtils::findColNamesFromPlotRef(variables, 'xAxis')
  xType <- veupathUtils::findDataTypesFromPlotRef(variables, 'xAxis')
  y <- veupathUtils::findColNamesFromPlotRef(variables, 'yAxis')
  yType <- veupathUtils::findDataTypesFromPlotRef(variables, 'yAxis')
  group <- veupathUtils::findColNamesFromPlotRef(variables, 'overlay')
  panel <- findPanelColName(veupathUtils::findVariableSpecFromPlotRef(variables, 'facet1'), 
                            veupathUtils::findVariableSpecFromPlotRef(variables, 'facet2'))

  if (yType == 'STRING') {
    if (is.null(numeratorValues)) {
      stop("Numerator values must be specified for categorical y-axes.")
    }
    if (is.null(denominatorValues)) {
      denominatorValues <- unique(.pd[[y]])
    }
    #validate num and denom values actually exist as part of the y values
    # Removing validation for numerator as part of fixing #175. If the numerator is not in the data, we get all 0s.
    # validateValues(numeratorValues, .pd[[y]])
    validateValues(denominatorValues, .pd[[y]])
    veupathUtils::logWithTime('Numerator and denominator values have been validated.', verbose)

    if (value != 'proportion') { stop('`value` parameter must be `proportion` for categorical y-axes.') }
  } else {
    if (!!length(c(numeratorValues,denominatorValues))) {
      warning("Numerator and/ or denominator values supplied for non-categorical y-axis. These will be ignored.")
    }

    if (value %ni% c('mean', 'median', 'geometricMean')) { stop('`value` parameter must be `mean`, `geometricMean` or `median` for numeric or date y-axes.')}
  } 

  # if no binWidth is provided, find one. if the user doesnt want binning they can set binWidth to 0
  # if someone complains about that well add a boolean param to indicate if binning is desired
  if (xType != 'STRING') {
    if (!length(.pd[[x]])) {
      binWidth <- 0
      binSlider <- list('min'=jsonlite::unbox(NA), 'max'=jsonlite::unbox(NA), 'step'=jsonlite::unbox(NA))
      binSpec <- list('type'=jsonlite::unbox('binWidth'), 'value'=jsonlite::unbox(NA))
      viewport <- list('xMin'=0, 'xMax'=-Inf)
      attr$viewport <- list('xMin'=jsonlite::unbox(""), 'xMax'=jsonlite::unbox(""))
      veupathUtils::logWithTime('No complete cases found.', verbose)
    } else {  
      # think we need to take viewport as input, even if we dont want semantic zoom
      # for consistent bins across the annotated range, we need a consistent range/ bin start
      if (is.null(viewport)) {
        viewport <- findViewport(.pd[[x]], xType)
        veupathUtils::logWithTime('Determined default viewport.', verbose)
      } else {
        viewport <- validateViewport(viewport, xType, verbose)
      }
      attr$viewport <- lapply(viewport, as.character)
      attr$viewport <- lapply(attr$viewport, jsonlite::unbox)
  
      if (is.null(binWidth)) {
        # if we want semantic zoom, then use xVP here instead, see histogram as ex
        binWidth <- findBinWidth(.pd[[x]])
        veupathUtils::logWithTime('Determined ideal bin width.', verbose)
      }
      binSlider <- findBinSliderValues(.pd[[x]], xType, binWidth, 'binWidth')
  
      if (xType %in% c('NUMBER', 'INTEGER')) {
        binSpec <- list('type'=jsonlite::unbox('binWidth'), 'value'=jsonlite::unbox(binWidth))
      } else {
        numericBinWidth <- as.numeric(gsub("[^0-9.-]", "", binWidth))
        if (is.na(numericBinWidth)) { numericBinWidth <- 1 }
        unit <- veupathUtils::trim(gsub("^[[:digit:]].", "", binWidth))
        binSpec <- list('type'=jsonlite::unbox('binWidth'), 'value'=jsonlite::unbox(numericBinWidth), 'units'=jsonlite::unbox(unit))
      }
      veupathUtils::logWithTime('Determined bin width slider min, max and step values.', verbose)
    }
    attr$binSlider <- binSlider
    attr$binSpec <- binSpec
  } else {
    if (!is.null(binWidth)) {
      warning("X-axis must be a continuous number or date in order to be binned. Ignoring `binWidth`.")
    }
    if (!is.null(viewport)) {
      warning("X-axis must be a continuous number or date to apply a viewport range. Ignoring `viewport`.")
    }
  }
  
  # TODO unit tests for ordinal x-axis
  if (value == 'mean') {
    
    mean <- binMean(.pd, x, y, group, panel, NULL, binWidth, viewport, errorBars, xType)
    data.table::setnames(mean, c('binLabel', 'value'), c('seriesX', 'seriesY'))
    .pd <- mean
    veupathUtils::logWithTime('Mean calculated per X-axis value.', verbose)

  } else if (value == 'median') {

    median <- binMedian(.pd, x, y, group, panel, NULL, binWidth, viewport, errorBars, xType)
    data.table::setnames(median, c('binLabel', 'value'), c('seriesX', 'seriesY'))
    .pd <- median
    veupathUtils::logWithTime('Median calculated per X-axis value.', verbose)

  } else if (value == 'geometricMean') {

    mean <- binGeometricMean(.pd, x, y, group, panel, NULL, binWidth, viewport, errorBars, xType)
    data.table::setnames(mean, c('binLabel', 'value'), c('seriesX', 'seriesY'))
    .pd <- mean
    veupathUtils::logWithTime('Geometric mean calculated per X-axis value.', verbose)
  
  } else if (value == 'proportion') {

    proportion <- binCategoryProportion(.pd, x, y, group, panel, NULL, binWidth, viewport, errorBars, numeratorValues, denominatorValues, xType)
    data.table::setnames(proportion, c('binLabel', 'value'), c('seriesX', 'seriesY'))
    .pd <- proportion
    veupathUtils::logWithTime('Y-axis category proportions calculated per X-axis value.', verbose)

  }

  .pd$seriesY <- lapply(.pd$seriesY, as.character)
  if (class(.pd$seriesY) != 'list') .pd$seriesY <- list(list(.pd$seriesY))

  attr$names <- names(.pd)

  veupathUtils::setAttrFromList(.pd, attr)

  return(.pd)
}

validateLinePD <- function(.line, verbose) {
  variables <- attr(.line, 'variables')
  xShape <- veupathUtils::findDataShapesFromPlotRef(variables, 'xAxis')
  if (!xShape %in% c('CONTINUOUS','ORDINAL')) {
    stop('The independent axis must be continuous or ordinal for lineplot.')
  }
  veupathUtils::logWithTime('Line plot request has been validated!', verbose)

  return(.line)
}

#' Line Plot as data.table
#'
#' This function returns a data.table of  
#' plot-ready data with one row per group (per panel). Columns 
#' 'seriesX' and 'seriesY' contain the raw data for the 
#' line plot. Column 'group' and 'panel' specify the group the 
#' series data belongs to.
#' 
#' @section Evil Mode:
#' An `evilMode` exists. It will do the following: \cr
#' - when `strataVariables` it will return 'no data' as a regular value for strata vars but will discard such cases for the axes vars. \cr
#' - when `allVariables` it will return 'no data' as a regular value for all variables. \cr
#' - when `noVariables` it will do the sensible thing and return complete cases only. \cr
#' - not return statsTables \cr
#' - allow smoothed means and agg values etc over axes values where we have no data 
#' for the strata vars \cr
#' - return a total count of plotted incomplete cases \cr
#' - represent missingness poorly, conflate the stories of completeness and missingness, 
#' mislead you and steal your soul \cr
#' @param data data.frame to make plot-ready data for
#' @param variables veupathUtils::VariableMetadataList
#' sourceId and its position in the plot. Recognized plotRef values are 'xAxisVariable', 
#' 'yAxisVariable', 'overlayVariable', 'facetVariable1' and 'facetVariable2'
#' @param binWidth numeric value indicating width of bins, character (ex: 'year') if xaxis is a date
#' @param value character indicating whether to calculate 'mean', 'median', 'geometricMean', 'proportion' for y-axis
#' @param errorBars boolean indicating if we want 95% confidence intervals per x-axis tick
#' @param viewport List of min and max values to consider as the range of data
#' @param numeratorValues character vector of values from the y-axis variable to consider the numerator
#' @param denominatorValues character vector of values from the y-axis variable to consider the denominator
#' @param overlayValues veupathUtils::BinList providing overlay values of interest
#' @param sampleSizes boolean indicating if sample sizes should be computed
#' @param completeCases boolean indicating if complete cases should be computed
#' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') 
#' @param collectionVariablePlotRef string indicating the plotRef to be considered as a collectionVariable. 
#' Accepted values are 'overlayVariable' and 'facetVariable1'. Required whenever a set of 
#' variables should be interpreted as a collectionVariable.
#' @param computedVariableMetadata named list containing metadata about a computed variable(s) involved in a plot. 
#' Metadata can include 'displayName', 'displayRangeMin', 'displayRangeMax', and 'collectionVariable'. Will be included as an attribute of the returned plot object.
#' @param verbose boolean indicating if timed logging is desired
#' @return data.table plot-ready data
#' @examples
#' df <- data.table('entity.xvar' = sample(1:20, 100, replace=T),
#'                  'entity.yvar' = rnorm(100), stringsAsFactors = F)
#' 
#' # Create VariableMetadataList that specifies variable role in the plot and supplies variable metadata
#' variables <- veupathUtils::VariableMetadataList(
#'   veupathUtils::VariableMetadata(
#'     variableClass = veupathUtils::VariableClass(value = 'native'),
#'     variableSpec = veupathUtils::VariableSpec(variableId = 'xvar', entityId = 'entity'),
#'     plotReference = veupathUtils::PlotReference(value = 'xAxis'),
#'     dataType = veupathUtils::DataType(value = 'NUMBER'),
#'     dataShape = veupathUtils::DataShape(value = 'CONTINUOUS')
#'   ),
#'   veupathUtils::VariableMetadata(
#'     variableClass = veupathUtils::VariableClass(value = 'native'),
#'     variableSpec = veupathUtils::VariableSpec(variableId = 'yvar', entityId = 'entity'),
#'     plotReference = veupathUtils::PlotReference(value = 'yAxis'),
#'     dataType = veupathUtils::DataType(value = 'NUMBER'),
#'     dataShape = veupathUtils::DataShape(value = 'CONTINUOUS')
#'   )
#' )
#' 
#' # Returns a data table with plot-ready data
#' dt <- lineplot.dt(df, map, value = 'median')
#' @export
lineplot.dt <- function(data, 
                         variables, 
                         binWidth = NULL, 
                         value = c('mean',
                                   'median',
                                   'geometricMean',
                                   'proportion'),
                         errorBars = c(TRUE, FALSE),
                         viewport = NULL,
                         numeratorValues = NULL,
                         denominatorValues = NULL,
                         overlayValues = NULL,
                         sampleSizes = c(TRUE, FALSE),
                         completeCases = c(TRUE, FALSE),
                         evilMode = c('noVariables', 'allVariables', 'strataVariables'),
                         verbose = c(TRUE, FALSE)) {

  value <- veupathUtils::matchArg(value)
  errorBars <- veupathUtils::matchArg(errorBars)
  sampleSizes <- veupathUtils::matchArg(sampleSizes)
  completeCases <- veupathUtils::matchArg(completeCases)
  evilMode <- veupathUtils::matchArg(evilMode) 
  verbose <- veupathUtils::matchArg(verbose)  

  if (!'data.table' %in% class(data)) {
    data.table::setDT(data)
  }

  xVM <- veupathUtils::findVariableMetadataFromPlotRef(variables, 'xAxis')
  if (is.null(xVM)) {
    stop("Must provide x-axis variable for plot type line.")
  }

  yVM <- veupathUtils::findVariableMetadataFromPlotRef(variables, 'yAxis')
  collectionVM <- veupathUtils::findCollectionVariableMetadata(variables)
  if (is.null(yVM)) {
    if (is.null(collectionVM)) {
      stop("Must provide y-axis variable for plot type line.")
    }
  }

  # Handle collectionVars
  if (!is.null(collectionVM)) {
    if (!collectionVM@plotReference@value %in% c('overlay', 'facet1', 'facet2')) stop('Collection variable PlotReference must be either overlay, facet1, or facet2 for lineplot.')
  }


  .line <- newLinePD(.dt = data,
                            variables = variables,
                            viewport = viewport,
                            numeratorValues = numeratorValues,
                            denominatorValues = denominatorValues,
                            binWidth,
                            value = value,
                            errorBars = errorBars,
                            overlayValues = overlayValues,
                            sampleSizes = sampleSizes,
                            completeCases = completeCases,
                            evilMode = evilMode,
                            verbose = verbose)

  .line <- validateLinePD(.line, verbose)
  veupathUtils::logWithTime(paste('New line plot object created with parameters viewport =', viewport, 
                                                                             ', binWidth =', binWidth, 
                                                                             ', value =', value, 
                                                                             ', errorBars =', errorBars, 
                                                                             ', evilMode =', evilMode, 
                                                                             ', numeratorValues = ', numeratorValues, 
                                                                             ', denominatorValues = ', denominatorValues, 
                                                                             ', sampleSizes = ', sampleSizes,
                                                                             ', completeCases = ', completeCases,
                                                                             ', verbose = ', verbose), verbose)

  return(.line)
}

#' Line Plot data file
#'
#' This function returns the name of a json file containing 
#' plot-ready data with one row per group (per panel). Columns 
#' 'seriesX' and 'seriesY' contain the raw data for the 
#' line plot. Column 'group' and 'panel' specify the group the 
#' series data belongs to.
#' 
#' @section Evil Mode:
#' An `evilMode` exists. It will do the following: \cr
#' - when `strataVariables` it will return 'no data' as a regular value for strata vars but will discard such cases for the axes vars. \cr
#' - when `allVariables` it will return 'no data' as a regular value for all variables. \cr
#' - when `noVariables` it will do the sensible thing and return complete cases only. \cr
#' - not return statsTables \cr
#' - allow smoothed means and agg values etc over axes values where we have no data for 
#' the strata vars \cr
#' - return a total count of plotted incomplete cases \cr
#' - represent missingness poorly, conflate the stories of completeness and missingness, 
#' mislead you and steal your soul \cr
#' @param data data.frame to make plot-ready data for
#' @param variables veupathUtils::VariableMetadataList
#' and its position in the plot. Recognized plotRef values are 'xAxisVariable', 'yAxisVariable', 
#' 'overlayVariable', 'facetVariable1' and 'facetVariable2'
#' @param binWidth numeric value indicating width of bins, character (ex: 'year') if xaxis is a date
#' @param value character indicating whether to calculate 'mean', 'median', 'geometricMean', 'proportion' for y-axis
#' @param errorBars boolean indicating if we want 95% confidence intervals per x-axis tick
#' @param viewport List of min and max values to consider as the range of data
#' @param numeratorValues character vector of values from the y-axis variable to consider the numerator
#' @param denominatorValues character vector of values from the y-axis variable to consider the denominator
#' @param overlayValues veupathUtils::BinList providing overlay values of interest
#' @param sampleSizes boolean indicating if sample sizes should be computed
#' @param completeCases boolean indicating if complete cases should be computed
#' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') 
#' @param collectionVariablePlotRef string indicating the plotRef to be considered as a collectionVariable. 
#' Accepted values are 'overlayVariable' and 'facetVariable1'. Required whenever a set of variables 
#' should be interpreted as a collectionVariable.
#' @param computedVariableMetadata named list containing metadata about a computed variable(s) involved in a plot. 
#' Metadata can include 'displayName', 'displayRangeMin', 'displayRangeMax', and 'collectionVariable'. Will be included as an attribute of the returned plot object.
#' @param verbose boolean indicating if timed logging is desired
#' @return character name of json file containing plot-ready data
#' @examples
#' # Construct example data
#' df <- data.table('entity.xvar' = sample(1:20, 100, replace=T),
#'                  'entity.yvar' = rnorm(100), stringsAsFactors = F)
#' 
#' # Create VariableMetadataList that specifies variable role in the plot and supplies variable metadata
#' variables <- veupathUtils::VariableMetadataList(
#'   veupathUtils::VariableMetadata(
#'     variableClass = veupathUtils::VariableClass(value = 'native'),
#'     variableSpec = veupathUtils::VariableSpec(variableId = 'xvar', entityId = 'entity'),
#'     plotReference = veupathUtils::PlotReference(value = 'xAxis'),
#'     dataType = veupathUtils::DataType(value = 'NUMBER'),
#'     dataShape = veupathUtils::DataShape(value = 'CONTINUOUS')
#'   ),
#'   veupathUtils::VariableMetadata(
#'     variableClass = veupathUtils::VariableClass(value = 'native'),
#'     variableSpec = veupathUtils::VariableSpec(variableId = 'yvar', entityId = 'entity'),
#'     plotReference = veupathUtils::PlotReference(value = 'yAxis'),
#'     dataType = veupathUtils::DataType(value = 'NUMBER'),
#'     dataShape = veupathUtils::DataShape(value = 'CONTINUOUS')
#'   )
#' )
#' 
#' # Returns the name of a json file
#' lineplot(df, map, value = 'median')
#' @export
lineplot <- function(data,
                      variables,
                      binWidth = NULL,
                      value = c('mean', 
                                'median',
                                'geometricMean',
                                'proportion'),
                      errorBars = c(TRUE, FALSE),
                      viewport = NULL,
                      numeratorValues = NULL,
                      denominatorValues = NULL,
                      overlayValues = NULL,
                      sampleSizes = c(TRUE, FALSE),
                      completeCases = c(TRUE, FALSE),
                      evilMode = c('noVariables', 'allVariables', 'strataVariables'),
                      collectionVariablePlotRef = NULL,
                      computedVariableMetadata = NULL,
                      verbose = c(TRUE, FALSE)) {

  verbose <- veupathUtils::matchArg(verbose)

  .line <- lineplot.dt(data,
                           variables,
                           binWidth,
                           value = value,
                           errorBars = errorBars,
                           viewport = viewport,
                           numeratorValues = numeratorValues,
                           denominatorValues = denominatorValues,
                           overlayValues = overlayValues,
                           sampleSizes = sampleSizes,
                           completeCases = completeCases,
                           evilMode = evilMode,
                           verbose = verbose)
                           
  outFileName <- writeJSON(.line, evilMode, 'lineplot', verbose)

  return(outFileName)
}
VEuPathDB/plot.data documentation built on Feb. 20, 2025, 6:33 p.m.