R/geoxpreanalysisdata.R

Defines functions GeoExperimentPreanalysisData

Documented in GeoExperimentPreanalysisData

# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Creates a GeoExperimentPreanalysisData object.
#'
#' @param obj a GeoTimeseries object.
#' @param period.lengths an integer-valued vector of length 2 or 3, denoting
#'   the lengths (in days) of pre-period, test, and the (optional)
#'   cooldown periods, respectively. The test period must be at least 7
#'   days; the pre-period must be at least as long as the test period. The
#'   cooldown period can be 0 or more days; if it is zero, it will be
#'   ignored as if there were only 2 periods.
#' @param geos (GeoStrata or GeoAssignment object) object to use for choosing
#'   the geo groups at the time of simulating (using
#'   SimulateGeoExperimentData); if 'geos' is a GeoAssignment object, then
#'   the geo assignment will be fixed; iteration; if 'geos' is a
#'   GeoStrata, then the geo assignment will be generated by a call to
#'   'Randomize'.
#' @param recycle (flag) if TRUE, creates an augmented data set in order to
#'   create a larger time series, reusing data from the head of the time
#'   series.
#' @return A GeoExperimentPreanalysis object, which inherits from
#'   GeoExperimentData, with NAs in 'geo.group', 'period', and
#'   'assignment'. The default treatment assignment (spend change only for
#'   period 1 and group 2) is stored into the object.
#'
#' @note
#' A GeoExperimentPreanalysisData object stores a historical data set
#' and the information about the length of the experiment periods. It is
#' used as the generator of pseudo-data sets for simulation purposes. See
#' also: 'SimulateGeoExperimentData' to generate a GeoExperimentData
#' object.

GeoExperimentPreanalysisData <- function(obj, period.lengths, geos,
                                         recycle=TRUE) {
  kClassName <- "GeoExperimentPreanalysisData"
  SetMessageContextString(kClassName)
  on.exit(SetMessageContextString())

  assert_that(is.integer.valued(period.lengths),
              length(period.lengths) %in% c(2L, 3L),
              period.lengths[2] >= 7L,
              period.lengths[1] >= period.lengths[2],
              length(period.lengths) == 2 || period.lengths[3] >= 0)
  period.lengths <- as.integer(period.lengths)
  if (length(period.lengths) == 3 && period.lengths[3] == 0) {
    period.lengths <- period.lengths[-3]
  }
  assert_that(inherits(geos, "GeoStrata") ||
              inherits(geos, "GeoAssignment"))

  orig.date.range <- range(obj[[kDate]])
  n.orig.days <- (as.integer(diff(orig.date.range)) + 1L)

  experiment.length <- sum(period.lengths)
  assert_that(experiment.length <= n.orig.days,
              msg=Messagef(paste0("Not enough data (%d days max) to simulate ",
                  "experiments of length %d days"), n.orig.days,
                  experiment.length))

  if (recycle) {
    # Add data to the tail of time series from the head, omitting the last
    # date.
    obj.add <- obj[obj[[kDate]] < orig.date.range[2], , drop=FALSE]
    weekday.range <- as.integer(strftime(orig.date.range, format="%u"))
    # The weekday of the first date of the augmented portion of the data set
    # must be equal to that of the first date of the original data set.  For
    # example if the original data set is from Wed to Fri the first date in
    # this 2nd half must be the last date of the original data set plus (3 - 5)
    # mod 7 = 5, so that the next date counted from the last date (Fri) is
    # Wed. An exception: if the weekdays are the same, the shift should be 7
    # and not 0, otherwise the dates would be doubled.
    weekday.shift <- ((weekday.range[1] - weekday.range[2]) %% 7L)
    if (weekday.shift == 0) {
      weekday.shift <- 7L
    }
    date.shift <- (orig.date.range[2] - orig.date.range[1])
    obj.add[[kDate]] <- (obj.add[[kDate]] + date.shift + weekday.shift)
    obj <- rbind(obj, obj.add)
    metrics <- GetInfo(obj, "metrics")
    obj <- GeoTimeseries(obj, metrics=metrics)
  }

  # dates: all distinct dates (incl. possibly the fake ones added above).
  dates <- sort(unique(obj[[kDate]]))

  # i.max: number of distinct time series to obtain from this object.
  i.max <- sum(dates + experiment.length - 1L <= max(dates))

  day.index <- as.integer(dates - min(dates) + 1L)

  if (inherits(geos, "GeoAssignment")) {
    geo.assignment <- geos
  } else {
    geo.assignment <- NULL
  }

  obj.result <- GeoExperimentData(obj, periods=NULL,
                                  geo.assignment=geo.assignment,
                                  treat.assignment=DefaultTreatmentAssignment())

  obj.result <- SetInfo(obj.result,
                        i.max=i.max,
                        dates=dates,
                        day.index=day.index,
                        experiment.length=experiment.length,
                        period.lengths=period.lengths,
                        geos=geos)

  if (!inherits(obj.result, kClassName)) {
    class(obj.result) <- c(kClassName, class(obj.result))
  }

  return(obj.result)
}
google/GeoexperimentsResearch documentation built on May 17, 2019, 7:42 a.m.