R/utils.R

Defines functions mySpline rollex detect.peak complete.time.series get.period circular.cc select_random_subset

Documented in circular.cc complete.time.series detect.peak get.period mySpline rollex select_random_subset

#' mySpline
#'
#' Interpolate points of a trajectory by spline. Returns the interpolated points
#' along with the data in a single trajectory.
#' @param x,y vectors giving the coordinates of the points to be interpolated.
#' @param n interpolation takes place at n equally spaced points spanning the
#'   interval [xmin, xmax]
#'
#' @details Spline interpolation is made according to stats::spline. See doc for
#'   details and default.
#' @return A list containing components x and y which give the ordinates where
#'   interpolation took place and the interpolated values.
#' @export
#'
#' @examples
#' x <- sin(seq(0,5,0.75))
#' x_spline <- mySpline(seq_along(x), x, 3*length(x))
#' plot(seq_along(x), x)
#' plot(x_spline$x, x_spline$y, type = 'b')
#'
mySpline <- function(x, y, n) {
    fit <- spline(x, y, n)
    x2 <- c(x, fit$x)
    y2 <- c(y, fit$y)
    y2 <- y2[order(x2)]
    x2 <- x2[order(x2)]
    return(list(x = x2, y = y2))
}


#' rollex
#'
#' Extended rolling mean, fill the extremeties with linear extrapolation. This
#' way the output has same length as the original vector.
#' @param x a numerical vector
#' @param k integer, width of the window for rolling mean
#'
#' @return A numerical vector of same length as x.
#' @export
#'
#' @examples
#' # Sinusoid + Gaussian white noise
#' x <- sin(seq(0,30,0.05))
#' x <- x + rnorm(length(x), mean = 0, sd = 0.2)
#' x_rollex <- rollex(x, k = 3)
#' plot(x)
#' lines(x_rollex, col = 'blue', lwd = 2)
rollex <- function(x, k = 5) {
    require(zoo)
    roll <- rollmean(x, k, na.pad = T)
    NonNAindex <- which(!is.na(roll))
    fst <- min(NonNAindex)
    lst <- max(NonNAindex)

    # Left and Right slopes
    lslope <- (roll[fst + 1] - roll[fst])/2
    rslope <- (roll[lst] - roll[lst - 1])/2

    # Replace NAs generated by roll with approx
    for (i in 1:(fst - 1)) {
        roll[i] <- roll[fst] + lslope * (i - fst)
    }
    for (i in (lst + 1):length(roll)) {
        roll[i] <- roll[lst] + rslope * (i - lst)
    }
    return(roll)
}


#' detect.peak
#'
#' Detect peaks by identifying local maxima/minima in a rolling window.
#' @param x numerical vector
#' @param window.size integer, width of rolling window.
#' @param what character indicating whether maxima or minima should be looked
#'   for. One of c('maxi', 'mini').
#'
#' @return A logical vector. TRUE indicates a local maxima/minima. NAs are
#'   padded at the extremeties to keep same length as x.
#' @export
#'
#' @examples
#' x <- sin(seq(0,25,0.1))
#' plot(x)
#' abline(v = which(detect.peak(x, 7, "maxi")), col = 'red', lty = 'dashed')
#' abline(v = which(detect.peak(x, 7, "mini")), col = 'blue', lty = 'dashed')
#'
detect.peak <- function(x, window.size, what = "maxi") {
    require(zoo)
    if (window.size%%2 == 0)
        stop("window size must be odd")
    if (!what %in% c("maxi", "mini"))
        stop("what must be one of c('maxi', 'mini')")
    middle <- ceiling(window.size/2)
    if (what == "maxi") {
        out <- rollapply(x, window.size, function(x) which.max(x) == middle)
    } else if (what == "mini") {
        out <- rollapply(x, window.size, function(x) which.min(x) == middle)
    }
    out <- c(rep(NA, middle - 1), out, rep(NA, middle - 1))
    return(out)
}


#' complete.time.series
#'
#' Add rows for missing measurements in time series.
#' @param data a data.table in long format with at least 4 columns:
#' condition, label, time and measurement.
#' @param cond.col column name for grouping. Typically an ID for experimental
#'   conditions.
#' @param lab.col column name for second grouping. Typically an ID for
#'   trajectories. This ID can be shared between different conditions (first
#'   grouping).
#' @param time.col column name of time.
#' @param time.vector numerical vector. Over which time should ALL time series
#'   span? Missing times will be added to trajectory where it's not present.
#' @param meas.col column name with measurements.
#' @param impute logical. If TRUE, uses linear interpolation to replace NAs.
#' Careful because this won't make the distinction between introduced NAs and
#' the ones already present beforehand.
#'
#' @return A data.table with extra rows for missing measurements.
#' @export
#'
#' @examples
#' # Simulate 10 phase-shifted sinusoids with 3 different level of noises
#' # ("experimental conditions", first grouping).
#' x <- multi_sims(type = "ps", noises = c(0.2,0.4), n = 10)
#' plot_sim(x)
#' # Remove 50 random rows
#' x[, row.nber := 1:nrow(x)] # For manually checking missing values
#' row_to_del <- sample(1:nrow(x), size = 50, replace = F)
#' row_to_keep <- setdiff(1:nrow(x), row_to_del)
#' x <- x[row_to_keep]
#' # Recreate the missing rows
#' x_complete <- complete.time.series(data = x,
#'  cond.col = "noise", lab.col = "variable",
#'  time.col = "Time", time.vector = unique(x$Time),
#'  meas.col = "value")
#'
complete.time.series <- function(data, cond.col, lab.col, time.col, time.vector, meas.col, impute = FALSE){
  require(data.table)
  temp <- CJ(unique(data[[cond.col]]), unique(data[[lab.col]]), time.vector)
  names(temp) <- c(cond.col, lab.col, time.col)
  out <- merge(temp, data, by = c(cond.col, lab.col, time.col), all.x = T)
  if(impute){
    out[, (meas.col) := na.interpolation(get(meas.col)), by = c(cond.col, lab.col)]
  }
  return(out)
}


#' get.period
#'
#' Compute power spectrum and returns period (1/frequency) at which
#' its density is maximized.
#' @param x a numerical vector.
#'
#' @return Period (1/frequency) of maximum power density
#' @export
#'
#' @examples
#' # Regular motif of length 15 repeated 5 times + Linear Trend
#' x <- rep(1:15, 5)
#' x <- x + seq(0, 10, length.out = length(x))
#' x_period <- get.period(x)
#' # Use the period to perform classical decomposition
#' x_decomp <- classical.decomposition(ts = x, period = x_period)
#' plot_decomposition(x_decomp)
#'
get.period <- function(x){
  x.spec <- spectrum(x, plot = FALSE)
  x.period <- round(1/x.spec$freq[which.max(x.spec$spec)])
  return(x.period)
}



#' circular.cc
#'
#' Cross-correlation with circular boundaries.
#' @param x,y numerical vector.
#'
#' @return The vector of cross-correlation between x and y at all possible lags.
#' @export
#'
#' @examples
#' # Same pattern shifted by 2 time units
#' x <- rep(c(0,1,2), 10)
#' y <- rep(c(2,0,1), 10)
#' xy.cc <- circular.cc(x, y)
#' plot(names(xy.cc),xy.cc, type="h",
#' xlab = "Lag", ylab = "Correlation")
#' abline(h = 0, lty = "dashed", col = "blue")
#'
circular.cc <- function(x, y){
  # Cross-correlation with circular boundaries
  if(length(x) != length(y)) stop("Both vectors should be of the same length")
  require(wavethresh)
  out <- rep(NA, length(x))
  lags <- 0:length(x)
  for(i in 1:length(lags)){
    out[i] <- cor(x = x, y = guyrot(y, lags[i]), method = "pearson")
  }
  names(out) <- lags
  return(out)
}


#' Select Random
#'
#' Select a random subset of time series from a dataset by using unique ID
#' subset. Optionally, a minimum number of observation for the individual IDs
#' can be provided.
#'
#' @param in.file A character or a variable for the current environment. If a
#'   character, a path to a .csv file is expected.
#' @param ntraj Numeric. How many unique IDs to extract?
#' @param col.uniqID Character. Name of the column with unique IDs.
#' @param col.time Character. Name of the column with time.
#' @param col.whatokeep Optional character. Name of the columns to be returned
#'   in the output. If NULL returns all columns. Columns with uniqID and time
#'   are always returned.
#' @param out.file Optional character. If provided the output is saved as a .csv
#'   file to the path indicated by out.file. Otherwise, the new dataset is
#'   returned.
#' @param min.obs Optional numeric. Minimum number of observations of an ID.
#'
#' @return A data.table with the columns indicated in col.whatokeep.
#' @export
#'
#' @examples
#' # Simulate 10 phase-shifted sinusoids with 3 different level of noises
#' # ("experimental conditions", first grouping).
#' x <- multi_sims(type = "ps", noises = c(0.2,0.4), n = 10)
#' plot_sim(x)
#' 
#' # Read/export 5 random TS from/in environment
#' x[, uniqID := paste(noise, variable, sep ="_")]
#' x_subset <- select_random_subset(in.file=x, ntraj=5, col.uniqID="uniqID", col.time="Time")
select_random_subset <- function(in.file, ntraj, col.uniqID, col.time, min.obs=NULL, col.whatokeep=NULL, out.file=NULL){
  if(is.character(in.file)) dt <- fread(in.file)
  else dt <- in.file
  # Remove ID and time to avoid duplicate when whatokeep is null
  if(is.null(col.whatokeep)) col.whatokeep <- setdiff(colnames(dt), c(col.uniqID, col.time))
  # Choose IDs to keep, with=FALSE to use arguments provided as characters in data.table
  if(!is.null(min.obs)){
    candidates <- dt[, .N, by=get(col.uniqID)][N >= min.obs, get]
    trajs <- sample(candidates, size=ntraj, replace=FALSE)
  } else {
    trajs <- sample(unlist(unique(dt[, col.uniqID, with=FALSE])), size=ntraj, replace=FALSE)
  }
  dt <- dt[get(col.uniqID) %in% trajs, c(col.uniqID, col.time, col.whatokeep), with=FALSE]
  if(!is.null(out.file)) write.csv(dt, out.file, quote = FALSE, row.names = FALSE)
  else return(dt)
}
majpark21/TSexploreR documentation built on Oct. 16, 2019, 2:46 p.m.