R/xts.methods.R

Defines functions binsearch window.xts window_idx index_bsearch .toPOSIXct `[.xts` .subset_xts .subsetTimeOfDay

Documented in .subset_xts window.xts

#
#   xts: eXtensible time-series 
#
#   Copyright (C) 2008  Jeffrey A. Ryan jeff.a.ryan @ gmail.com
#
#   Contributions from Joshua M. Ulrich
#   window.xts contributed by Corwin Joy
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

.subsetTimeOfDay <- function(x, fromTimeString, toTimeString) {
  validateTimestring <- function(time) {
    h    <- "(?:[01]?\\d|2[0-3])"
    hm   <- paste0(h, "(?::?[0-5]\\d)")
    hms  <- paste0(hm, "(?::?[0-5]\\d)")
    hmsS <- paste0(hms, "(?:\\.\\d{1,9})?")
    pattern <- paste(h, hm, hms, hmsS, sep = ")$|^(")
    pattern <- paste0("^(", pattern, "$)")

    if (!grepl(pattern, time)) {
      # FIXME: this isn't necessarily true...
      # colons aren't required, and neither are all of the components
      stop("Supply time-of-day subsetting in the format of T%H:%M:%OS/T%H:%M:%OS",
           call. = FALSE)
    }
  }

  validateTimestring(fromTimeString)
  validateTimestring(toTimeString)

  getTimeComponents <- function(time) {
    # split on decimal point
    time. <- strsplit(time, ".", fixed = TRUE)[[1]]
    hms <- time.[1L]

    # ensure hms string has even nchar
    nocolon <- gsub(":", "", hms, fixed = TRUE)
    if (nchar(nocolon) %% 2 > 0) {
      # odd nchar means leading zero is omitted from hours
      # all other components require zero padding
      hms <- paste0("0", hms)
    }
    # add colons
    hms <- gsub("(.{2}):?", ":\\1", hms, perl = TRUE)
    # remove first character (a colon)
    hms <- substr(hms, 2, nchar(hms))

    # extract components
    comp <- strsplit(hms, ":", fixed = TRUE)[[1]]
    complist <-
      list(hour = comp[1L],
           min = comp[2L],
           sec = comp[3L],
           subsec = time.[2L])
    # remove all missing components
    complist <- complist[!vapply(complist, is.na, logical(1))]
    # convert to numeric
    complist <- lapply(complist, as.numeric)

    # add timezone and return
    c(tz = "UTC", complist)
  }

  # first second in period (no subseconds)
  from <- do.call(firstof, getTimeComponents(fromTimeString)[-5L])
  secBegin <- as.numeric(from) %% 86400L

  # last second in period
  to <- do.call(lastof, getTimeComponents(toTimeString))
  secEnd <- as.numeric(to) %% 86400L

  # do subsetting
  tz <- tzone(x)
  secOfDay <- as.POSIXlt(index(x), tz = tz)
  secOfDay <- secOfDay$hour * 60 * 60 + secOfDay$min * 60 + secOfDay$sec

  if (secBegin <= secEnd) {
    i <- secOfDay >= secBegin & secOfDay <= secEnd
  } else {
    i <- secOfDay >= secBegin | secOfDay <= secEnd
  }
  which(i)
}

.subset_xts <- function(x, i, j, ...) {
  if(missing(i)) {
    i <- 1:NROW(x)
  }
  if(missing(j)) {
    j <- 1:NCOL(x)
  }
  .Call(C__do_subset_xts, x, i, j, FALSE)
}

`.subset.xts` <- `[.xts` <-
function(x, i, j, drop = FALSE, which.i=FALSE,...) 
{
    USE_EXTRACT <- FALSE # initialize to FALSE

    dimx <- dim(x)
    if(is.null(dimx)) {
      nr <- length(x)
      if(nr==0 && !which.i) {
        idx <- index(x)
        if(length(idx) == 0) {
          # this is an empty xts object (zero-length index and no columns)
          # return it unchanged to match [.zoo
          return(x)
        } else {
          return(xts(rep(NA, length(idx)), idx)[i])
        }
      }
      nr <- length(.index(x))
      nc <- 1L
    } else {
      nr <- dimx[1L]
      nc <- dimx[2L]
    }
    
    if(!missing(i)) {
    # test for negative subscripting in i
    if (is.numeric(i)) {
      #if(any(i < 0)) {
      if(.Call(C_any_negative, i)) {
        if(!all(i <= 0))
          stop('only zeros may be mixed with negative subscripts')
        i <- (1:nr)[i]
      }
      # check boundary; length check avoids Warning from max(), and
      # any_negative ensures no NA (as of r608)
      #if(max(i) > nr)
      if(length(i) > 0 && max(i) > nr)
        stop('subscript out of bounds')
      #i <- i[-which(i == 0)]
    } else
    if (timeBased(i) || (inherits(i, "AsIs") && is.character(i)) ) {
      # Fast binary search on set of dates
      i <- window_idx(x, index. = i)
    } else 
    if(is.logical(i)) {
      i <- which(i) #(1:NROW(x))[rep(i,length.out=NROW(x))]
    } else
    if (is.character(i)) {
      time.of.day.pattern <- "(^/T)|(^T.*?/T)|(^T.*/$)"
      if (length(i) == 1 && !identical(integer(), grep(time.of.day.pattern, i[1]))) {
        # time of day subsetting
        ii <- gsub("T", "", i, fixed = TRUE)
        ii <- strsplit(ii, "/", fixed = TRUE)[[1L]]

        if (length(ii) == 1) {
          # i is right open ended (T.*/)
          ii <- c(ii, "23:59:59.999999999")
        } else if (nchar(ii[1L]) == 0) {
          # i is left open ended (/T)
          ii[1L] <- "00:00:00.000000000"
        } # else i is bounded on both sides (T.*/T.*)
        i <- .subsetTimeOfDay(x, ii[1L], ii[2L])
      } else {
        # enables subsetting by date style strings
        # must be able to process - and then allow for operations???

        i.tmp <- NULL
        tz <- as.character(tzone(x))

        for(ii in i) {
          adjusted.times <- .parseISO8601(ii, .index(x)[1], .index(x)[nr], tz=tz)
          if(length(adjusted.times) > 1) {
            i.tmp <- c(i.tmp, index_bsearch(.index(x), adjusted.times$first.time, adjusted.times$last.time))
          }
        }
        i <- i.tmp
      }
      i_len <- length(i)

      if(i_len == 1L)  # IFF we are using ISO8601 subsetting
        USE_EXTRACT <- TRUE
    }
  
    if(!isOrdered(i,strictly=FALSE)) {
      i <- sort(i)
    }
    # subset is picky, 0's in the 'i' position cause failures
    zero.index <- binsearch(0L, i, FALSE)
    if(!is.na(zero.index)) {
      # at least one 0; binsearch returns location of last 0
      i <- i[-(1L:zero.index)]
    }

    if(length(i) <= 0 && USE_EXTRACT) 
      USE_EXTRACT <- FALSE

    if(which.i)
      return(i)

    } # if(!missing(i)) { end

    if (missing(j)) {
      if(missing(i))
        i <- seq_len(nr)

      if(length(x)==0) {
        cdata <- rep(NA, length(i))
        storage.mode(cdata) <- storage.mode(x)
        x.tmp <- .xts(cdata, .index(x)[i], tclass(x), tzone(x),
                      dimnames = list(NULL, colnames(x)))
        return(x.tmp)
      } else {
        if(USE_EXTRACT) {
          return(.Call(C_extract_col,
                       x, as.integer(1:nc),
                       drop,
                       as.integer(i[1]), as.integer(i[length(i)])))
        } else {
          return(.Call(C__do_subset_xts,
                       x, as.integer(i),
                       as.integer(1:nc), 
                       drop))
        }
      }
    } else
    # test for negative subscripting in j
    if (is.numeric(j)) {
      if(min(j,na.rm=TRUE) < 0) {
        if(max(j,na.rm=TRUE) > 0)
          stop('only zeros may be mixed with negative subscripts')
        j <- (1:nc)[j]
      }
      if(max(j,na.rm=TRUE) > nc)
        stop('subscript out of bounds')
    } else
    if(is.logical(j)) {
      if(length(j) == 1) {
        j <- (1:nc)[rep(j, nc)]
      }
      else if (length(j) > nc) {
          stop("(subscript) logical subscript too long")
      } else j <- (1:nc)[j]
    } else
    if(is.character(j)) {
      j <- match(j, colnames(x), nomatch=0L)
      # ensure all j are in colnames(x)
      if(any(j==0))
        stop("subscript out of bounds")
    }

    j0 <- which(!as.logical(j))
    if(length(j0)) 
      j <- j[-j0]
    if(length(j) == 0 || (length(j)==1 && (is.na(j) || j==0))) {
      if(missing(i))
        i <- seq_len(nr)

      output <- .xts(coredata(x)[i,j,drop=FALSE], .index(x)[i],
                     tclass(x), tzone(x), class = class(x))
      xtsAttributes(output) <- xtsAttributes(x)
      return(output)
    } 
    if(missing(i))
      return(.Call(C_extract_col, x, as.integer(j), drop, 1, nr))
    if(USE_EXTRACT) {
          return(.Call(C_extract_col,
                       x, as.integer(j),
                       drop,
                       as.integer(i[1]), as.integer(i[length(i)])))
    } else
    return(.Call(C__do_subset_xts, x, as.integer(i), as.integer(j), drop))
}

# Replacement method for xts objects
#
# Adapted from [.xts code, making use of NextGeneric as
# replacement function in R already preserves all attributes
# and index value is left untouched

`[<-.xts` <-
#`xtsreplacement` <-
function(x, i, j, value) 
{
    if (!missing(i)) {
      i <- x[i, which.i=TRUE]
    }
    .Class <- "matrix"
    NextMethod(.Generic)
}

# Convert a character or time type to POSIXct for use by subsetting and window
# We make this an explicit function so that subset and window will convert dates consistently.
.toPOSIXct <-
function(i, tz) {
  if(inherits(i, "POSIXct")) {
    dts <- i
  } else if(is.character(i)) {
    dts <- as.POSIXct(as.character(i),tz=tz)  # Need as.character because i could be AsIs from I(dates)
  } else if (timeBased(i)) {
    if(inherits(i, "Date")) {
      dts <- as.POSIXct(as.character(i),tz=tz)
    } else {
      # force all other time classes to be POSIXct
      dts <- as.POSIXct(i,tz=tz)
    }
  } else {
    stop("invalid time / time based class")
  }
  dts
}

# find the rows of index. where the date is in [start, end].
# use binary search.
# convention is that NA start or end returns empty
index_bsearch <- function(index., start, end)
{
  if(!is.null(start) && is.na(start)) return(NULL)
  if(!is.null(end) && is.na(end)) return(NULL)

  if(is.null(start)) {
    si <- 1
  } else {
    si <- binsearch(start, index., TRUE)
  }
  if(is.null(end)) {
    ei <- length(index.)
  } else {
    ei <- binsearch(end, index., FALSE)
  }
  if(is.na(si) || is.na(ei) || si > ei) return(NULL)
  firstlast <- seq.int(si, ei)
  firstlast
}

# window function for xts series
# return indexes in x matching dates
window_idx <- function(x, index. = NULL, start = NULL, end = NULL)
{
  if(is.null(index.)) {
    usr_idx <- FALSE
    index. <- .index(x)
  } else {
    # Translate the user index to the xts index
    usr_idx <- TRUE
    idx <- .index(x)

    index. <- .toPOSIXct(index., tzone(x))
    index. <- unclass(index.)
    index. <- index.[!is.na(index.)]
    if(is.unsorted(index.)) {
      # index. must be sorted for index_bsearch
      # N.B!! This forces the returned values to be in ascending time order, regardless of the ordering in index, as is done in subset.xts.
      index. <- sort(index.)
    }
    # Fast search on index., faster than binsearch if index. is sorted (see findInterval)
    base_idx <- findInterval(index., idx)
    base_idx <- pmax(base_idx, 1L)
    # Only include indexes where we have an exact match in the xts series
    match <- idx[base_idx] == index.
    base_idx <- base_idx[match]
    index. <- index.[match]
    index. <- .POSIXct(index., tz = tzone(x))
    if(length(base_idx) < 1) return(x[NULL,])
  }

  if(!is.null(start)) {
    start <- .toPOSIXct(start, tzone(x))
  }

  if(!is.null(end)) {
    end <- .toPOSIXct(end, tzone(x))
  }

  firstlast <- index_bsearch(index., start, end)

  if(usr_idx && !is.null(firstlast)) {
    # Translate from user .index to xts index
    # We get back upper bound of index as per findInterval
    tmp <- base_idx[firstlast]

    res <- .Call(C_fill_window_dups_rev, tmp, .index(x))
    firstlast <- rev(res)
  }

  firstlast
}

# window function for xts series, use binary search to be faster than base zoo function
# index. defaults to the xts time index.  If you use something else, it must conform to the standard for order.by in the xts constructor.
# that is, index. must be time based,
window.xts <- function(x, index. = NULL, start = NULL, end = NULL, ...)
{
  # scalar NA values are treated as NULL
  if (isTRUE(is.na(start))) start <- NULL
  if (isTRUE(is.na(end))) end <- NULL
  
  if(is.null(start) && is.null(end) && is.null(index.)) return(x)

  # dispatch to window.zoo() for yearmon and yearqtr
  if(any(tclass(x) %in% c("yearmon", "yearqtr"))) {
    return(NextMethod(.Generic))
  }

  firstlast <- window_idx(x, index., start, end) # firstlast may be NULL

  .Call(C__do_subset_xts,
     x, as.integer(firstlast),
     seq.int(1, ncol(x)),
     drop = FALSE)
}

# Declare binsearch to call the routine in binsearch.c
binsearch <- function(key, vec, start=TRUE) {
  # Convert to double if both are not integer
  if (storage.mode(key) != storage.mode(vec)) {
    storage.mode(key) <- storage.mode(vec) <- "double"
  }
  .Call(C_binsearch, key, vec, start)
}

# Unit tests for the above code may be found in runit.xts.methods.R

Try the xts package in your browser

Any scripts or data that you put into this service are public.

xts documentation built on April 17, 2023, 1:07 a.m.