R/incidence_utils.R

Defines functions verticalize_incidence create_profiler create_step_tracker calc_weekly_order calc_weekly_delay default.params create_syndrome_columns

Documented in calc_weekly_delay calc_weekly_order create_step_tracker create_syndrome_columns verticalize_incidence

#' Create syndrom columns from factor encoded column
#'
#' Create one boolean column for each level of the given column
#'
#' @param weekly weekly data.frame()
#' @param column column containing syndrom name
#' @export
create_syndrome_columns = function(weekly, column) {
  syndromes = levels(weekly[[column]])
  names(syndromes) = syndromes

  for(i in 1:length(syndromes)) {
    n = names(syndromes)[i]
    weekly[, syndromes[i] ] = ifelse( weekly[[column]] == n, TRUE, FALSE)
  }
  attr(weekly, "syndromes") <- syndromes
  weekly
}

#' Default set parameters replace default value if exits in params
#'
#' @noRd
#' @param params list of parameters
#' @param def.params default parameters list
default.params = function(params, def.params) {
  for(p in names(def.params)) {
    v = params[[p]]
    if( is.null(v) ) {
      v = def.params[[p]]
    }
    params[[p]] = v
  }
  params
}


#' Compute delay between 2 surveys for each participant
#'
#' weekly should be ordered by person_id then timestamp !
#' @param weekly data.frame with at least (person_id, [time.col]) columns
#' @param time.col column containg time value to compute delay from
#' @export
calc_weekly_delay = function(weekly, time.col) {
  unlist(tapply(weekly[, time.col], list(weekly$person_id), function(x) { c(NA, diff(x))} ))
}

#' Compute order of weekly response
#'
#' @param weekly weekly data.frame()
#' @export
calc_weekly_order = function(weekly) {
  weekly = weekly[ order(weekly$person_id, weekly$timestamp),]
  weekly$order = unlist( lapply(tapply(weekly$person_id, weekly$person_id, length), function(n) { 1:n }))
  weekly
}


#' Create a step counter
#'
#' Simple interface to hold a step counter
#'
#' @return list
create_step_tracker = function() {

  steps = list()

  list(
    add = function(name, n) {
      if(is.data.frame(n)) {
        n = nrow(n)
      }

      steps[name] <<- n
    },
    get_steps = function() {
      steps
    }

  )
}

create_profiler = function() {
  times = data.frame()
  last.time = Sys.time()

  list(
    track = function(point, step=NULL) {
      time = Sys.time() - last.time
      r = list(time=time, point=point)
      if(!is.null(step)) {
        r$step = step
      }
      times <<- dplyr::bind_rows(times, r)
      last.time <<- Sys.time()
    },
    get = function() {
      times
    }
  )
}

#' Verticalize incidence data after computation
#'
#' Incidence data are organized horizontally, estimation for each syndrom are in columns (with suffixes .upper, .lower, .w2)
#' This function reshape to a vertical storage format
#' Output will be [syndrome, type, value, upper, lower]
#'
#'
#' @seealso \code{\link{calc_adjusted_incidence}}
#'
#' @param inc data.frame with horizontal incidence (a set of columns for each syndrom)
#' @param ids names of the row indentifiyng column
#' @param syndromes list of syndrome names used to produce this dataset
#' @param syndrome.column name of the column that will contain syndromic name
verticalize_incidence = function(inc, ids, syndromes, syndrome.column='syndrome') {

  if(is.null(syndrome.column)) {
    syndrome.column = formals()$syndrome.column
  }

  # data data.frame
  # id.vars id columns
  # vars list of vars column [var].[mesure]
  # measures list of measures
  # v.name : name of column containing variable name in vertical data
  # var.measure : name of the column containing the value of the column with variable name (without measure)
  extract_columns = function(data, id.vars, vars, measures, v.name, var.measure=NULL) {
    dd = lapply(vars, function(variable) {
      if(length(measures) > 0) {
        columns = paste(variable, measures, sep='.')
      } else {
        columns = c()
      }

      if(!is.null(var.measure)) {
        columns = c(columns, variable)
      }

      n = columns[columns %in% names(data)]
      d = data[, c(id.vars, n), drop=FALSE]

      nn = names(d)
      if(!is.null(var.measure)) {
        nn[ nn == variable ] = var.measure
      }
      nn = gsub(paste0("^", variable,"\\."),"", nn)
      names(d) <- nn
      d[, v.name] = variable
      d
    })
    dd = dplyr::bind_rows(dd)
    dd
  }

  # Verticalize syndromes
  data = extract_columns(inc, syndromes,id.vars=ids, measures=c('crude','adj', 'crude.lower','crude.upper','adj.upper','adj.lower'), v.name=syndrome.column, var.measure="count")

  # Verticalize estimation type
  data = extract_columns(data, id.vars=c(ids, syndrome.column), c('count','crude','adj'),  c('upper','lower'), v.name="type", var.measure = "value")

  active = extract_columns(inc, id.vars=c(ids), 'active', c(), v.name=syndrome.column, var.measure = "value")

  active$type = "count"
  active$upper = NA
  active$lower = NA

  data = dplyr::bind_rows(data, active)
  data
}
cturbelin/ifnBase documentation built on Aug. 26, 2024, 12:54 p.m.