#' #' \code{RevenueData}
#' #'
#' #' @description Cleans and tidies data for use in growth accounting
#' #' computations for a startup. Turns all dates with 29th of Feb into the 28th.
#' #' Times are converted to dates.
#' #' @param value A vector of containing the revenue per transaction.
#' #' @param from A vector of class \code{POSIXct} or \code{POSIXt},
#' #' recording the date and time each subscription commences.
#' #' @param to A vector of class \code{POSIXct} or \code{POSIXt},
#' #' recording the date and time each subscription ends
#' #' @param start The date at which the analysis outputs should
#' #' commence. By default, the earliest date recorded in
#' #' \code{from}.
#' #' @param end The date at which the analysis ends, which is used to
#' #' determine churn. By default, the most recent date recorded in
#' #' \code{from}.
#' #' @param id A vector of \code{character}, unique identifier for
#' #' subscribers that made the transactions (e.g., email addresses,
#' #' names, subscriber keys).
#' #' @param subscription.length The time unit that describes the
#' #' subscription length: \code{year} to view the data by year,
#' #' \code{quarter}, and \code{month}. This is assumed to be the
#' #' billing period when determining if subscribers have churned or
#' #' not.
#' #' @param subset An optional vector specifying a subset of
#' #' observations to be used in the calculations
#' #' @param profiling A \code{data.frame} containing data, unique by
#' #' \code{id}, to be included in the final
#' #' \code{data.frame}. Either it must contain the unique
#' #' identifiers in a variable called \code{id}, or, the
#' #' \code{rownames} must match the values of \code{id}.
#' #' @param trim.id The maximum length of the strings to be used showing
#' #' ID names (used to avoid situations where string names are so
#' #' long as to make reading of tables impossible.
#' #' @param detail If \code{TRUE}, diagnostics are printed. Defaults to \code{FALSE}.
#' #' @return A \code{\link{data.frame}} where the rows represent
#' #' unique combinations of periods and subscribers. Where a
#' #' subscriber has multiple transactions in a period, they are
#' #' aggregated. Contains the following variables, along with any
#' #' other variables in the \code{data}:
#' #' \itemize{
#' #' \code{id} {The unique identifier.}
#' #' \code{value} {The total fee/price for a subscription.}
#' #' \code{from} {The commencement date of a subscription.}
#' #' \code{from.period} {The \code{period} of the subscription commencement as a character.}
#' #' \code{from.renewal} {The beginning of the renewal period (i.e., the ealiest from data
#' #' of invoices that are in the same period.}
#' #' \code{from.renewal.period} {The \code{period} for \code{from.renewal}.}
#' #' \code{to.renewal} {The end of the renewal period (i.e., the latest end date of invoices
#' #' that are in the same period.}
#' #' \code{to.renewal.period} {The end of the renewal period.}
#' #' \code{period.counter} {The \code{period} for \code{to.renewal}.}
#' #' \code{to} {The end-date of a subscription.}
#' #' \code{to.period} {The \code{period} of the subscription end as a character.}
#' #' \code{subscriber.from} {The date of a customer's first subscription's commencement.}
#' #' \code{subscriber.from.period} {The \code{period} of \code{subscriber.from}}.
#' #' \code{subscriber.to} {The final date of their most recent subscription.}
#' #' \code{subscriber.to.period} {The period of \code{subscriber.to}}.
#' #' \code{last.from} {The \code{from} date of the most recent subscription.}
#' #' \code{last.from.period} {The period of \code{last.from}}.
#' #' \code{churned} {A \code{logical} indicating if the subscriber had ceased subscribing prior to \code{end}}.
#' #' \code{churn} {A \code{logical} indicating if the subscriber had ceased subscribing in that period.}
#' #' \code{tenure} {The number of whole periods from the begining of the first subscription to the end of the most recent.}
#' #' \code{observation} {The invoice number for a particular customer, starting from 1.}
#' #' \code{observation.within.period} {The number of the subscription for a particular customer,
#' #' starting from 1 for each new subscription period (as determined by a common to.period).}
#' #' \code{recurring.value} {The value divided by proportion of the typicaly invoice period
#' #' that was covered by the invoice. There are some rounding error issues (e.g., leap years,
#' #' inconsistencies in how people enter data)}.
#' #' }
#' #' @importFrom lubridate period year years quarter month week weeks
#' #' day days interval floor_date as.duration tz
#' #' @importFrom flipTime Period Periods AsDate DiffPeriod Change29FebTo28th
#' #' @importFrom stats ave
#' #' @importFrom plyr mapvalues
#' RevenueData <- function(value,
#' from,
#' to,
#' start = as.Date(min(from)),
#' end = as.Date(max(from)),
#' id,
#' subscription.length = "year",
#' subset = rep(TRUE, length(id)),
#' profiling = NULL,
#' trim.id = 50,
#' detail = FALSE) #, tolerance = .01)
#' {
#' # Checking the input variables.
#' n = length(value)
#' checkVariableForLengthAndMissingData(value, n)
#' checkVariableForLengthAndMissingData(from, n)
#' checkVariableForLengthAndMissingData(to, n)
#' checkVariableForLengthAndMissingData(id, n)
#'
#' # Removing leap years
#' from <- Change29FebTo28th(from)
#' to <- Change29FebTo28th(to)
#'
#' default.start.end <- start == min(from, na.rm = TRUE) & end == max(from, na.rm = TRUE)
#' # Units.
#' units <- Periods(1, subscription.length)
#' data <- data.frame(id = as.character(id), value, from, to)
#'
#' # Filtering data.
#' n.initial <- nrow(data)
#' if (detail)
#' cat(paste0(n.initial, " transactions.\n"))
#' n.subset <- sum(subset)
#' if (n.subset < n.initial)
#' {
#' if (detail)
#' cat(paste0(n.initial - n.subset, " transactions filtered out.\n"))
#' data <- subset(data, subset = subset)
#' }
#' # Sorting by company name and start-date
#' data <- data[with(data, order(id, from)), ]
#' # Removing observations that start after the end.
#' zero <- data$value == 0
#' n.zero <- sum(zero)
#' if (n.zero > 0)
#' {
#' if (detail)
#' cat(paste0(n.zero, " transactions removed due to having 0 value.\n"))
#' data <- subset(data, !zero)
#' }
#' negative <- data$value < 0
#' n.negative <- sum(negative)
#' if (n.negative > 0)
#' {
#' if (detail)
#' cat(paste0(n.negative, " transactions removed due to having a negative value.\n"))
#' data <- subset(data, !negative)
#' }
#' n <- nrow(data)
#' if (detail)
#' cat(paste0(n, " transactions remaining.\n"))
#' if (n == 0)
#' return(NULL)
#' # Aggregating transactions that occur in the same time period.
#' data$to.period <- Period(data$to, subscription.length)
#' data <- aggregate(value ~ id + from + to, data = data, FUN = sum)#data <- aggregate(value ~ id + from + to, data = data, FUN = sum)
#' n <- nrow(data)
#' if (detail)
#' cat(paste0(n, " aggregated transactions (i.e., summed together when sharing a from and end date) remaining.\n"))
#' # Subscriber-level calculations.
#' id.data <- aggregate(from ~ id, data, min)
#' names(id.data)[2] <- "subscriber.from"
#' id.data$subscriber.to <- aggregate(to ~ id, data, max)[, 2]
#' if (!is.null(profiling))
#' {
#' if (("id" %in% names(profiling)))
#' {
#' profiling.id <- as.character(profiling$id)
#' profiling$id <- NULL
#' }
#' else
#' profiling.id <- rownames(profiling)
#' profiling.id <- as.character(profiling.id)
#'
#' lookup <- match(as.character(id.data$id), profiling.id)
#' if (sum(!is.na(lookup)) == 0)
#' {
#' stop("The 'profiling' data is needs to either contain an 'id' variable, or have rownames that contain the 'id' values.")
#' }
#' else if (sum(is.na(lookup)) > 0)
#' {
#' missing.ids <- paste(id.data$id[is.na(lookup)], collapse = ",")
#' stop(paste0("The 'profiling' data is missing some ids: ", missing.ids))
#' }
#' if (pos <- "value" %in% names(profiling))
#' {
#' names(profiling)[pos] <- "value.profiling"
#' if (detail)
#' cat("'value' in 'profiling' has been renamed as 'value.profiling'.")
#' }
#' if (pos <- "from" %in% names(profiling))
#' {
#' names(profiling)[pos] <- "from.profiling"
#' if (detail)
#' cat("'from' in 'profiling' has been renamed as 'from.profiling'.")
#' }
#' if (pos <- "to" %in% names(profiling))
#' {
#' names(profiling)[pos] <- "to.profiling"
#' if (detail)
#' cat("'to' in 'profiling' has been renamed as 'to.profiling'.")
#' }
#' id.data <- cbind(id.data, profiling[lookup, ])
#' }
#' # Creating time-based metrics.
#' id.data$last.from <- aggregate(from ~ id, data, max)[, 2]
#' id.data$last.from.period <- Period(floor_date(id.data$last.from, subscription.length), subscription.length)
#' if (detail)
#' cat(paste0(nrow(id.data), " subscribers.\n"))
#' id.data$subscription.to <- aggregate(to ~ id, data, max)$to
#' tenure.interval <- interval(id.data$subscriber.from, id.data$subscriber.to)
#' id.data$tenure <- tenure.interval %/% units
#' id.data$subscriber.from.period <- Period(id.data$subscriber.from, subscription.length)
#' id.data$subscriber.to.period <- Period(id.data$subscriber.to, subscription.length)
#' id.data$churned <- id.data$subscriber.to <= end
#' not.churned <- !id.data$churned
#' data <- merge(data, id.data, by = "id", all.x = TRUE, sort = TRUE)
#' data$from.period <- Period(data$from, subscription.length)
#' data$to.period <- Period(data$to, subscription.length)
#' data$id.to.period <- paste(data$id, data$to.period)
#' # Dealing with situations where invoices start or end within a billing cycle
#' fr <- aggregate(from ~ id.to.period, data = data, FUN = min)
#' from.renewal <- mapvalues(data$id.to.period, fr[, 1], as.character(fr[,2]))
#' data$from.renewal <- AsDate(from.renewal)
#' data$from.renewal.period <- Period(data$from.renewal, subscription.length)
#'
#' tmp <- aggregate(from.renewal ~ id, data, max)
#' data$last.from.renewal <- tmp[match(as.character(data$id), as.character(tmp[, 1])), 2]
#' data$last.from.renewal.period <- Period(floor_date(data$last.from.renewal, subscription.length), subscription.length)
#'
#' t <- aggregate(to ~ id.to.period, data = data, FUN = max)
#' to.renewal <- mapvalues(data$id.to.period, t[, 1], as.character(t[,2]))
#' data$to.renewal <- AsDate(to.renewal)
#' data$to.renewal.period <- Period(data$to.renewal, subscription.length)
#' data$id.to.period <- NULL
#'
#' data$churn <- data$churned & data$from.renewal.period == data$last.from.renewal.period
#' data$period.counter <- interval(data$subscriber.from, data$from) %/% units
#' # Sorting.
#' data <- data[order(data$id, data$from),]
#' # Creating a variable indicating observation number. Randomly sorts ties.
#' observation <- observation.within.period <- rep(1, n <- nrow(data))
#' if (n > 1)
#' {
#' ids <- data$id
#' for (i in 2:n)
#' if (ids[i] == ids[i - 1])
#' {
#' if (data$to.period[i] == data$to.period[i - 1])
#' observation.within.period[i] = observation.within.period[i - 1] + 1
#' observation[i] = observation[i - 1] + 1
#'
#' }
#' }
#' data$observation <- observation
#' data$observation.within.period <- observation.within.period
#'
#' data$id <- sub("\\s+$", "", as.character(data$id))
#' if (!default.start.end)
#' {
#' window <- interval(start, end)
#' from0 <- AsDate(data$from.period, on.parse.failure = "silent")
#' to0 <- AsDate(data$to.period, on.parse.failure = "silent")
#'
#' # ignore hour/timezone which has been unreliable since calling aggregate in line 192
#' from <- ISOdate(year(from0), month(from0), day(from0), tz = tz(from0))
#' to <- ISOdate(year(to0), month(to0), day(to0), tz = tz(to0))
#' data <- data[to %within% window | from %within% window | from < start & to > end, ]
#' if (detail)
#' {
#' cat(paste0(nrow(data), " aggregated transactions left after taking 'start' and/or 'end' into account.\n"))
#' cat(paste0(length(unique(data$id)), " subscribers left after taking 'start' and/or 'end' into account.\n"))
#' }
#' }
#' attr(data, "subscription.length") <- subscription.length
#' attr(data, "end") <- end
#' attr(data, "start") <- start
#' # Computing recurring.revenue
#' period.proportion = as.numeric(data$to - data$from, "days") / as.numeric(as.duration(units), "days")
#' rounded.period.proportion = round(period.proportion, 2)
#' rnd = rounded.period.proportion %in% c(.25,.5, .75, 1, 2, 3, 4, 5, 6)
#' period.proportion[rnd] = rounded.period.proportion[rnd]
#' data$recurring.value = data$value / period.proportion
#' class(data) <- c(class(data), "RevenueData")
#' data
#' }
#'
#'
#' #' @details The Start parameter is not passed through to RevenueData, as instead it
#' #' is used to modify the results of analyses that use RevenueData.
#' #' @inherit RevenueData
#' revenueDataForRevenueMetrics <- function(value,
#' from,
#' to,
#' start,
#' end,
#' id,
#' subscription.length,
#' subset,
#' profiling,
#' trim.id)
#' {
#' data <- RevenueData(value,
#' from,
#' to,
#' start = min(from),
#' end,
#' id,
#' subscription.length,
#' subset,
#' profiling,
#' trim.id)
#' if (is.null(data))
#' return(data)
#' attr(data, "start") <- as.Date(start)
#' data
#' }
#'
#'
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.