R/utils.R

Defines functions logit logit_inverse logit_transform guer_cv guerrero auto_lambda box_cox_auto box_cox_transform box_cox_inverse box_cox tstransform fourier_series check_xreg sampling_frequency sampling_sequence future_dates calendar_eom

calendar_eom <- function(date, ...)
{
  if (!is(date, "Date")) date <- as.Date(date)
  # Add a month, then subtract a day:
  date.lt <- as.POSIXlt(date, format = "%Y-%m-%d", tz = tz(date))
  mon <- date.lt$mon + 2
  year <- date.lt$year
  # If month was December add a year
  year <- year + as.integer(mon == 13)
  mon[mon == 13] <- 1
  iso <- ISOdate(1900 + year, mon, 1, hour = 0, tz = tz(date))
  result <- as.POSIXct(iso) - 86400 # subtract one day
  result <- result + (as.POSIXlt(iso)$isdst - as.POSIXlt(result)$isdst)*3600
  result <- as.Date(result)
  return(result)
}

future_dates <- function(start, frequency, n = 1)
{
  if (frequency %in% c("days", "weeks", "months","years")) {
    switch(frequency,
           "days"   = as.Date(start) %m+% days(1:n),
           "weeks"  = as.Date(start) %m+% weeks(1:n),
           "months" = calendar_eom(as.Date(start) %m+% months(1:n)),
           "years"  = as.Date(start) %m+% years(1:n))
  } else if (grepl("secs|mins|hours|",frequency)) {
    # Add one extra point and eliminate first one
    seq(as.POSIXct(start), length.out = n + 1, by = frequency)[-1]
  } else{
    as.Date(start) + (1:n)
  }
}

sampling_sequence <- function(period)
{
  if (period == "days") {
    out <- c(NA, NA, NA, 1, 7, 30.4167, 365.25)
  }
  if (period == "weeks") {
    out <- c(NA, NA, NA, NA, 1, 4.34524, 52.1429)
  }
  if (period == "months") {
    out <- c(NA, NA, NA, NA, NA, 1, 12)
  }
  if (period == "years") {
    out <- c(NA, NA, NA, NA, NA, NA, 1)
  }
  if (grepl("hours", period)) {
    split <- strsplit(period, " ")[[1]]
    if (length(split) > 1) {
      f <- as.numeric(split[1])
    }
    else {
      f <- 1
    }
    out <- c(NA, NA, 1, 24, 168, 730.001, 8760)/f
  }
  if (grepl("mins", period)) {
    split <- strsplit(period, " ")[[1]]
    if (length(split) > 1) {
      f <- as.numeric(split[1])
    }
    else {
      f <- 1
    }
    out <- c(NA, 1, 60, 1440, 10080, 43800, 525600)/f
  }
  if (grepl("secs", period)) {
    split <- strsplit(period, " ")[[1]]
    if (length(split) > 1) {
      f <- as.numeric(split[1])
    }
    else {
      f <- 1
    }
    out <- c(1, 60, 3600, 86400, 604800, 2628000, 31540000)/f
  }
  names(out) <- c("secs", "mins", "hours", "days", "weeks",
                  "months", "years")
  return(out)
}

sampling_frequency <- function(x)
{
  if (is(x, "Date") || length(grep("POSIX", class(x))) > 0) {
    dates <- x
  } else {
    dates <- index(x)
  }
  u <- min(diff(dates))
  count <- attr(u, 'units')
  if (count == 'days') {
    u <- round(u)
    daily   <- c(1, 2, 3)
    weekly  <- c(4, 5, 6, 7)
    monthly <- c(27, 28, 29, 30, 31, 32)
    yearly  <-  355:370
    if (u %in% daily) {
      period <- "days"
      attr(period,"date_class") <- "Date"
    } else if (u %in% weekly) {
      period <- "weeks"
      attr(period,"date_class") <- "Date"
    } else if (u %in% monthly) {
      period <- "months"
      attr(period,"date_class") <- "Date"
    } else if (u %in% yearly) {
      period <- "years"
      attr(period,"date_class") <- "Date"
    } else {
      period <- "unknown"
      attr(period,"date_class") <- "POSIXct"
    }
  } else if (count == "hours") {
    period <- paste0(u, " hours")
    attr(period,"date_class") <- "POSIXct"
  } else if (count == "mins") {
    period <- paste0(u, " mins")
    attr(period,"date_class") <- "POSIXct"
  } else if (count == "secs") {
    period <- paste0(u," secs")
    attr(period,"date_class") <- "POSIXct"
  } else {
    period <- "unknown"
    attr(period,"date_class") <- "POSIXct"
  }
  if (period == "unknown") warning("\ncould not determine sampling frequency")
  return(period)
}

check_xreg <- function(xreg, valid_index)
{
  if (is.null(xreg)) return(xreg)
  n <- length(valid_index)
  if (NROW(xreg) != n) {
    stop("\nxreg does not have the same number of rows as y")
  }
  if (!all.equal(index(xreg),valid_index)) {
    stop("\nxreg time index does not match that of y")
  }
  if (any(is.na(xreg))) {
    stop("\nNAs found in xreg object")
  }
  if (is.null(colnames(xreg))) {
    colnames(xreg) <- paste0("x",1:ncol(xreg))
  }
  return(xreg)
}

fourier_series <- function(dates, period = NULL, K = NULL)
{
  N <- length(dates)
  if (is.character(period) | is.null(period)) {
    pd <- sampling_frequency(dates)
    period <- na.omit(sampling_sequence(pd))
    period <- period[which(period > 1)]
    if (length(period) == 0)
      return(NULL)
  }
  if (is.null(K)) {
    K <- pmax(1, period/2 - 1)
  }
  if (length(period) != length(K)) {
    stop("Need to provide K for each period")
  }
  if (any(K > period/2)) {
    stop("K is too big (greater than period/2)")
  }
  kseq <- numeric(0)
  name_seq <- character(0)
  for (k in seq_along(K)) {
    if (K[k] > 0) {
      kseq <- c(kseq, 1:K[k]/period[k])
      name_seq <- c(name_seq, paste(1:K[k], period[k],
                                    sep = "-"))
    }
  }
  no_dupe <- !duplicated(kseq)
  kseq <- kseq[no_dupe]
  name_seq <- name_seq[no_dupe]
  bigK <- length(kseq)
  design <- matrix(NA_real_, nrow = N, ncol = 2 * bigK)
  labels <- character(length = 2 * bigK)
  for (j in seq_along(kseq)) {
    design[, 2 * j - 1] <- sinpi(2 * kseq[j] * (1:N))
    design[, 2 * j] <- cospi(2 * kseq[j] * (1:N))
    labels[2 * j - 1] <- paste0("Sin", name_seq[j])
    labels[2 * j] <- paste0("Cos", name_seq[j])
  }
  colnames(design) <- labels
  design <- design[, !is.na(colSums(design)), drop = FALSE]
  attr(design, "K") <- K
  attr(design, "period") <- period
  return(design)
}

tstransform <- function(method = "box-cox", lambda = NULL, lower = 0, upper = 1, ...)
{
  method = match.arg(method[1], c("box-cox", "logit"))
  f <- switch(method[1], `box-cox` = box_cox(lambda = lambda, lower = lower, upper = upper, ...),
              logit = logit(lower = lower, upper = upper, ...))
  return(f)
}

box_cox <- function(lambda = NA, lower = 0, upper = 1.5, multivariate = FALSE, ...)
{
  .lambda <- lambda
  .lower <- lower
  .upper <- upper
  if (multivariate) {
    f <- function(y, lambda = .lambda, frequency = 1, ...){
      n <- NCOL(y)
      if (length(frequency) == 1) {
        frequency <- rep(frequency, n)
      } else if (length(frequency) != n ) {
        stop("\nlength of frequency no equal ncol y")
      }
      ixs <- which(frequency > 1)
      # seasonally adjust series
      ynew <- y
      if (length(ixs) > 0) {
        for (i in ixs) {
          tmp <- ts(as.numeric(y[,i]), frequency = frequency[i])
          tmp <- tmp - stlplus(tmp, s.window = "periodic", n.p = frequency[i])$data$seasonal
          ynew[,i] <- as.numeric(tmp)
        }
      }
      if (length(lambda) ==  1) {
        if (is.na(lambda)) {
          lambda <- powerTransform(coredata(ynew))$lambda
          # constrain to lower lambda
          if (any(lambda < .lower)) {
            lambda[which(lambda < .lower)] <- .lower
          }
          if (any(lambda > .upper)) {
            lambda[which(lambda > .upper)] <- .upper
          }
          out <- do.call(cbind, lapply(1:ncol(y), function(i){ box_cox_transform(y[,i], lambda = lambda[i]) }))
          colnames(out) <- colnames(y)
          attr(out, "lambda") <- lambda
          return(out)
        } else {
          lambda <- rep(lambda, n)
          out <- do.call(cbind, lapply(1:ncol(y), function(i){ box_cox_transform(y[,i], lambda = lambda[i]) }))
          colnames(out) <- colnames(y)
          attr(out, "lambda") <- lambda
          return(out)
        }
      } else {
        if (length(lambda) == n) {
          if (any(is.na(lambda))) {
            use <- which(is.na(lambda))
            for (i in use) lambda[i] <- box_cox_auto(y[,use], lower = .lower[1], upper = .upper[1], frequency = frequency[1])
            out <- do.call(cbind, lapply(1:ncol(y), function(i){ box_cox_transform(y[,i], lambda = lambda[i]) }))
            colnames(out) <- colnames(y)
            attr(out, "lambda") <- lambda
            return(out)
          } else {
            out <- do.call(cbind, lapply(1:ncol(y), function(i){ box_cox_transform(y[,i], lambda = lambda[i]) }))
            colnames(out) <- colnames(y)
            attr(out, "lambda") <- lambda
            return(out)
          }
        } else {
          stop("\nlength of lambda should either be 1 or equal to number of columns of data")
        }
      }
    }
    fi <- function(y, lambda, ...){
      out <- do.call(cbind, lapply(1:ncol(y), function(i){ box_cox_inverse(y[,i], lambda = lambda[i]) }))
      colnames(out) <- colnames(y)
      return(out)
    }
  } else {
    f <- function(y, lambda = .lambda, frequency = 1, ...){
      if (NCOL(y) > 1) stop("\ny is multivariate. Call the function with multivariate = TRUE argument.")
      if (is.na(lambda)) lambda <- box_cox_auto(y, lower = .lower, upper = .upper, frequency = frequency)
      out <- box_cox_transform(y, lambda = lambda)
      attr(out, "lambda") <- lambda
      return(out)
    }
    fi <- function(y, lambda, ...){
      box_cox_inverse(y, lambda)
    }
  }
  return(list(transform = f, inverse = fi))
}

box_cox_inverse <- function(y, lambda = 1)
{
  if (lambda < 0) {
    y[y > -1/lambda] <- NA
  }
  if (lambda == 0) {
    y <- exp(y)
  } else {
    xx <- y * lambda + 1
    y <- sign(xx) * abs(xx)^(1/lambda)
  }
  return(y)
}

box_cox_transform <- function(y, lambda = 1)
{
  if (lambda < 0) {
    y[y < 0] <- NA
  }
  if (lambda == 0) {
    y <- log(y)
  } else {
    y <- (sign(y) * abs(y)^lambda - 1)/lambda
  }
  return(y)
}

box_cox_auto <- function(y, lower = 0, upper = 1, nonseasonal_length = 2, frequency = 1)
{
  if (any(y <= 0, na.rm = TRUE)) {
    lower <- max(lower, 0)
  }
  if (length(y) <= 2 * frequency) {
    return(1)
  }
  return(guerrero(y, lower, upper, nonseasonal_length, frequency))
}


auto_lambda <- function(y, lower = 0, upper = 1, nonseasonal_length = 2, frequency = 1, ...)
{
  return( box_cox_auto(y = y, lower = lower, upper = upper, nonseasonal_length = nonseasonal_length,
                       frequency = frequency) )
}

guerrero <- function(x, lower = 0, upper = 1, nonseasonal_length = 2, frequency = 1)
{
  return(optimize(guer_cv, c(lower, upper), x = x, nonseasonal_length = nonseasonal_length, frequency = frequency)$minimum)
}

guer_cv <- function(lambda, x, nonseasonal_length = 2, frequency)
{
  period <- round(max(nonseasonal_length, frequency))
  nobsf <- NROW(x)
  nyr <- floor(nobsf/period)
  nobst <- floor(nyr * period)
  x_mat <- matrix(x[(nobsf - nobst + 1):nobsf], period, nyr)
  x_mean <- apply(x_mat, 2, mean, na.rm = TRUE)
  x_sd <- apply(x_mat, 2, sd, na.rm = TRUE)
  x_rat <- x_sd/x_mean^(1 - lambda)
  return(sd(x_rat, na.rm = TRUE)/mean(x_rat, na.rm = TRUE))
}


logit_transform <- function(x, lower = 0, upper = 1.0, ...) {
  if (any(x > upper, na.rm = TRUE) | any(x < lower, na.rm = TRUE)) stop("\nrange of x is outside of lower and upper")
  return( -1.0 * log( ((upper - lower)/(x - lower)) - 1.0) )
}

logit_inverse <- function(x, lower = 0, upper = 1.0, ...) {
  return((upper - lower)/(1 + exp(-x)) + lower )
}

logit <- function(lower = 0, upper = 1.0, ...)
{
  f <- function(y, ...){
    logit_transform(y, lower, upper)
  }
  fi <- function(y, ...){
    logit_inverse(y, lower, upper)
  }
  return(list(transform = f, inverse = fi))
}
tsmodels/tssimulator documentation built on April 29, 2024, 9:11 a.m.