R/ts.R

Defines functions ts_symbolic ts_discrete ts_duration ts_changeindex ts_permtest_transmat ts_permtest_block

Documented in ts_changeindex ts_discrete ts_duration ts_permtest_block ts_permtest_transmat ts_symbolic

# package casnet ----
#
# Time Series HELPERS
#
# ts functions


#' Permutation Test: Block Randomisation
#'
#' Use block randomistion to get a permutation test evaluation of the deviation of an observed value at each time point from a target value. To do block permutation without any tests, pass `NULL` for argument `targetValue`.
#'
#' @param y1 Time series 1. The goal of the permutation test will be to decide whether the difference `y1-targetValue != 0` for each time point, given `alpha`.
#' @param y2 An optional second time series. If this timeseries is provided then the goal of the permutation test will be the to decide wether the difference `y2-y1 != targetValue` for each time point, given `alpha`.
#' @param targetValue The target value for the permutation test. If `NULL`, the function will return a data frame with the block randomised surrogates  columns (default = `0`)
#' @param Nperms Number of permutations (default = `19`)
#' @param sim Value passed to the `sim` argument of [boot::tsboot()] valid options are: `"model","fixed","geom","scramble"` (default = `"geom"`)
#' @param l Block sizes to use, see [boot::tsboot()] for details (default = `3`)
#' @param alpha Alpha level for deciding significance  (default = `0.05`)
#' @param returnBootObject Return the `boot` object (default = `FALSE`)
#' @param ... Other arguments passed to function [boot::tsboot()]
#'
#' @return A data frame with the difference time series and variables indicating N and significance.
#'
#' @family Time series operations
#'
#' @export
#'
#' @examples
#'
#' \donttest{set.seed(4321)
#' y1 <- rnorm(5000)
#' y2 <- y1-(mean(y1)+rnorm(1))
#'
#' ts_permtest_block(y1 = y1, y2 = y2)}
#'
ts_permtest_block <- function(y1, y2 = NULL, targetValue = 0, Nperms = 19, sim = "geom", l = 3, alpha = .05, returnBootObject = FALSE,...){

  checkPkg("boot")

  tsSame <- function(y){
    y
  }

  #  dotArgs <- list(...)
  #  Args <- methods::formalArgs(boot::tsboot)
  #   Args <- Args[!Args%in%c("tseries","statistic","R","l","sim")]
  #   nameOK  <- names(dotArgs)%in%Args
  #   if(!all(nameOK)){
  #     dotArgs    <- formals(boot::tsboot)
  #     nameOk <- rep(TRUE,length(dotArgs))
  #   }
  #
  #   dotArgs$RM <- dmat
  #   do.call(rp_plot, dotArgs[nameOk])
  # }

  #dat <- ts(data.frame(y1 = y1, y2=y2))

  returnOnlyPerms <- FALSE
  if(is.null(targetValue)){
    targetValue <- 0
    returnOnlyPerms <- TRUE
  }

  ts.boot1 <- boot::tsboot(tseries = stats::ts(y1), statistic = tsSame, R = Nperms, sim = sim, l = l)
  ts.boot <- ts.boot1

  if(!is.null(y2)){
    if(NROW(y2)!=NROW(y1)){stop("y1 and y2 have different lengths.")}
    ts.boot2 <- boot::tsboot(tseries = stats::ts(y2), statistic = tsSame, R = Nperms, sim = sim, l = l)
    ts.boot$t0 <- y1-y2
    ts.boot$t <- ts.boot1$t-ts.boot2$t
  } else {
    ts.boot$t0 <- y1-targetValue
    ts.boot$t  <- ts.boot1$t-targetValue
    targetValue <- 0
  }

  if(!returnOnlyPerms){

    out <- list()
    for(t in 1:NROW(y1)){
      if(ts.boot$t0[t] < targetValue){dir <- "lesser"} else {dir <- "greater"}
      if(dir%in%"lesser"){
        Ds <- sum(ts.boot$t[,t] <= ts.boot$t0[t], na.rm = TRUE) # how many <= to the observed diff <= targetValue?
      } else {
        Ds <- sum(ts.boot$t[,t] >= ts.boot$t0[t], na.rm = TRUE) # how many >= to the observed diff > targetValue?
      }
      SD <- stats::sd(c(ts.boot$t[,t],ts.boot$t0[t]), na.rm = TRUE)
      out[[t]] <- data.frame(time = t,
                             Ori  = ts.boot$t0[t],
                             targetValue = targetValue,
                             Nperms     = Nperms,
                             Ori.Diff   = dir,
                             Ori.Rank   = Ds,
                             Ori.Rank.p = Ds / (Nperms + 1),
                             alpha = alpha,
                             sig = NA,
                             Mean = mean(ts.boot$t[,t],na.rm = TRUE),
                             SD = SD,
                             SE = SD/sqrt(NROW(y1)+1)
      )
    }

    df_sig     <- plyr::ldply(out)
    df_sig$sig <- df_sig$Ori.Rank.p < alpha

    if(returnBootObject){
      return(list(df_sig = df_sig, bootOut = ts.boot))
    } else {
      return(df_sig = df_sig)
    }

  } else {
    if(!is.null(y2)){
      out <- list(y1_RND = t(ts.boot1$t),
                  y2_RND = t(ts.boot2$t))
      names(out) <- c(deparse(substitute(y1)),deparse(substitute(y2)))
      plyr::ldply(out,.id="Source")
    } else {
      out <- list(y1_RND = t(ts.boot1$t))
      names(out) <- deparse(substitute(y1))
      return(plyr::ldply(out,.id="Source"))
    }
  }

}


#' Permutation Test: Transition Matrix
#'
#' Monte Carlo resampling of a time series using a discretised version of `y`, a sequence of `bin` numbers with unique values equal to `nbins`:
#'    1. The discrete version of `y` will be used to generate a transition matrix of size `nbins X nbins`.
#'    2. This transition matrix will be used to resample values
#'
#' @inheritParams ts_permtest_block
#' @param nbins Number of bins to use (default = `ceiling(2*length(y1)^(1/3))`)
#' @param keepNA keepNA
#'
#' @return Resampled series
#' @export
#'
#' @family Time series operations
#'
#' @examples
#'
#' set.seed(4321)
#' y <- rnorm(5000)
#' ts_permtest_transmat(y)
#'
ts_permtest_transmat <- function(y1, y2 = NULL,
                                 targetValue = 0,
                                 nbins = ceiling(2*length(y1)^(1/3)),
                                 Nperms = 19,
                                 alpha = .05,
                                 keepNA = TRUE){
  if(is.null(y2)){
    y <- y1
  } else {
    y <- y2-y1
  }

  bins <- ts_discrete(y, nbins, keepNA = keepNA)

  yn   <- cbind(obin = bins, oy = y)
  tMat <- ts_transmat(yd = bins, nbins = nbins)

  y_mc <- matrix(NA,NROW(tMat),2, dimnames = list(NULL,c("rbin","ry")))
  y_mc[1,] <- yn[sample(NROW(yn),1),]

  for(t in 2:NROW(tMat)){
    y_mc[t,1] <- seq(nbins)[stats::rmultinom(n = 1, size = 1, prob = tMat[y_mc[(t-1),1],])==1]
    pset <- yn[(yn[,1] %in% y_mc[t,1]),2]
    if(length(pset)==0){
      y_mc[t,1] <- NA
    } else {
      y_mc[t,2] <- pset[sample(length(pset),1)]
    }
  }
  return(y_mc)

}


#' Find change indices
#'
#' @param y An indicator variable representing different levels of a variable or factor
#' @param returnRectdata Return a dataframe suitable for shading a `ggplot2` graph with [ggplot2::geom_rect()]
#' @param groupVar Pass a value (length 1) or variable (length of y) that can be used as a variable to join the indices by if `returnRectdata = TRUE`
#' @param labelVar If `y` is not a character vector, provide a vector of labels equal to `length(y)`
#' @param discretize If `y` is a continuous variable, setting `discretize = TRUE` will partition the values of `y` into `nbins` number of bins, each value of `y` will be replaced by its bin number.
#' @param nbins Number of bins to use to change a continuous `y` (if `discretize = TRUE`) into a variable with `nbins` levels
#'
#' @return Either a vector with the indices of change in `y`, or, a data frame with variables `xmin,xmax,ymin,ymax,label`
#'
#' @family Time series operations
#'
#' @export
#'
#' @examples
#'
#'  library(ggplot2)
#'
#'  set.seed(1234)
#'  yy     <- noise_powerlaw(standardise = TRUE, N=50, alpha = -1)
#'  tr     <- ts_levels(yy, doTreePlot = TRUE)
#'  breaks <- ts_changeindex(tr$pred$p, returnRectdata = TRUE)
#'  breaks$cols <- casnet::getColours(length(breaks$label))
#'
#'  ggplot(tr$pred) +
#'    geom_rect(data = breaks,
#'    aes(xmin = xmin, xmax=xmax, ymin=ymin, ymax=ymax, colour = label, fill = label),
#'    alpha = .3) +
#'    scale_colour_manual(values = breaks$cols) +
#'    scale_fill_manual(values = breaks$cols) +
#'    scale_x_continuous("time", expand = c(0,0)) +
#'    geom_line(aes(x=x,y=y)) +
#'    geom_step(aes(x=x,y=p), colour = "red3", size=1) +
#'    theme_bw() + theme(panel.grid.minor = element_blank())
#'
ts_changeindex <- function(y, returnRectdata=FALSE, groupVar = NULL, labelVar = NULL, discretize=FALSE, nbins = 5){

  if(is.null(names(y))){
    y <- as.numeric_discrete(y)
  }

  if(!is.null(names(y))&is.null(labelVar)){
    labelVar <- names(y)
  }

  if(length(unique(labelVar))>10){
    warning("More than 10 epochs detected!")
  }

  xmax <- c((which(diff(y)!=0))+1, NROW(y))
  names(xmax) <- labelVar[xmax]
  xmin <- c(1,xmax[1:(NROW(xmax)-1)])
  names(xmin) <- labelVar[xmin]
  group <- rep(1,length(xmin))

  if(!is.null(groupVar)){
    if(length(groupVar)==1){
      group <- groupVar
    } else {
      if(length(groupVar)==length(y)){
        group <- groupVar[xmax]
      } else {
        warning("Group values incorrect, using: groupVar = 1")
      }
    }
  }

  if(!is.null(labelVar)){
    label <- labelVar[xmax]
  } else {
    label <- y[xmax]
  }

  if(returnRectdata){
    return(data.frame(group = group, xmin = xmin, xmax = xmax, ymin = -Inf, ymax = +Inf, label = label))
  } else {
    return(unique(c(xmin,xmax)[-c(1,length(c(xmin,xmax)))]))
  }
}


#' Time series to Duration series
#'
#' @param y A time series, numeric vector, or categorical variable.
#' @param timeVec A vector, same length as `y` containing timestamps, or, sample indices.
#' @param fs Optional sampling frequency if timeVec represents sample indices. An extra column `duration.fs` will be added which represents `1/fs * duration in samples`
#' @param tolerance A number `tol` indicating a range `[y-tol,y+tol]` to consider the same value. Useful when `y` is continuous (`default = 0`)
#'
#' @return A data frame
#' @export
#'
#' @family Time series operations
#'
#' @examples
#' library(invctr)
#' # Create data with events and their timecodes
#' coder <- data.frame(beh=c("stare","stare","coffee","type","type","stare"),t=c(0,5,10,15,20,25))
#'
#' ts_duration(y = coder$beh, timeVec = coder$t)
#'
ts_duration <- function(y, timeVec = stats::time(y), fs = stats::frequency(y), tolerance = 0){


  if(plyr::is.discrete(y)){
    y.n <- as.numeric_discrete(y)
  } else {
    y.n <- as.numeric(y)
    names(y.n) <- paste(y.n)
    if(all(is.na(y.n))){
      stop("Conversion to numeric failed for all elements of y.")
    }
  }

  y <- y.n
  tID <- seq_along(y)[-1]
  y[NROW(y)+1]       <- max(y, na.rm = TRUE) + tolerance + 1
  timeVec[NROW(timeVec)+1] <- max(timeVec, na.rm = TRUE)

  same <- list()
  same[[1]] <- data.frame(y = y[1],
                          y.name  = names(y)[1],
                          t.start = timeVec[1],
                          t.end   = timeVec[1],
                          duration.time    = 0,
                          duration.samples = 1,
                          duration.fs      = fs,
                          keep = TRUE)

  for(i in tID){
    # Same as previous?
    if(y[i]%[]%c((y[i-1]-tolerance),(y[i-1]+tolerance))){
      same[[i]] <- data.frame(y = y[i],
                              y.name  = names(y[i]),
                              t.start = same[[i-1]]$t.start,
                              t.end   = timeVec[i],
                              duration.time    = 0,
                              duration.samples = (same[[i-1]]$duration.samples+1),
                              duration.fs      = fs,
                              keep = TRUE)
      same[[i-1]]$keep <- FALSE
    } else {
      same[[i-1]]$t.end <- timeVec[i]
      # Same as upcoming?
      if(y[i]%[]%c((y[i+1]-tolerance),(y[i+1]+tolerance))){
        same[[i]] <- data.frame(y = y[i],
                                y.name  = names(y[i]),
                                t.start = timeVec[i],
                                t.end   = timeVec[i+1],
                                duration.time    = 0,
                                duration.samples = 1,
                                duration.fs = fs,
                                keep = FALSE)
      } else {

        same[[i]] <- data.frame(y = y[i],
                                y.name  = names(y[i]),
                                t.start = timeVec[i],
                                t.end   = timeVec[i+1],
                                duration.time    = 0,
                                duration.samples = 1,
                                duration.fs = fs,
                                keep = TRUE)
      }
    }
  }
  same.out <- plyr::ldply(same)
  same.out <- same.out[same.out$keep,1:6]
  if(!is.null(fs)){same.out$duration.fs = (1/fs)*same.out$duration.samples}
  same.out$duration.time = (same.out$t.end - same.out$t.start)
  row.names(same.out) <- paste(seq_along(same.out$t.start))
  return(same.out)
}



#' Delay embedding of a time series
#'
#' Create a state vector based on an embedding lag and a number of embedding dimanesions.
#'
#' @param y Time series
#' @param emDim Embedding dimension
#' @param emLag Embedding lag
#' @param returnOnlyIndices Return only the index of y for each surrogate dimension, not the values (default = `FALSE`)
#' @param doEmbed Should the series be embedded? If `FALSE` adds attributes.
#' @param silent Silent-ish mode
#'
#' @note If `emLag = 0`, the assumption is the columns in `y` represent the dimensions and `y` will be returned with attributes `emLag = 0` and `emDim = NCOL(y)`. If `emLag > 0` and `NCOL(y)>1` the first column of `y` will used for embedding and a warning will be triggered.
#'
#' @return The lag embedded time series
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
#' @export
#'
#' @family Time series operations
#'
ts_embed <- function (y, emDim, emLag, returnOnlyIndices = FALSE, doEmbed = TRUE, silent = TRUE){

  id    <- ifelse(is.null(colnames(y)),ifelse(is.null(names(y)),deparse(substitute(y)),names(y)[1]),colnames(y)[1])
  y.ori <- y
  N     <- NROW(y)

  if(!is.null(dim(y))&emLag>0){
    y <- y_data <- as.numeric(y[,1])
    #if(!silent){cat("\ntaking first column...\n")}
    if(NCOL(y)>1){
      warning(cat("\nMultiple columns available, taking first column of y...\n"))
    }
  } else {
    y_data  <- y
  }

  if(any(stats::is.ts(y), zoo::is.zoo(y), xts::is.xts(y))){
    y <- stats::time(y)
    emTime <- lubridate::as_datetime(y[emLag+1])- lubridate::as_datetime(y[1])
  } else {
    y <- zoo::index(y)
    emTime <- emLag
  }


  if(emLag==0){emDim <- 1}

  #   emY   <- matrix(nrow = N, ncol = NCOL(y.ori), byrow = TRUE, dimnames = list(NULL,colnames(y.ori)))
  #   #emY   <- as.matrix(y.ori)
  #
  # } else {

    if((emDim-1) * emLag > N){
      stop(paste0("Time series length (N = ",N,") is too short to embed in ",emDim," dimensions with delay ",emLag))
    }


    if(emDim > 1){
      lag.id    <- seq(1, (emDim*emLag), emLag)
      maxN      <- (N+1) - max(lag.id)
      emY       <- matrix(nrow = maxN, ncol = emDim)

      for(tau in seq_along(lag.id)){
        emY[,tau] = y[lag.id[tau]:((N+1)-(rev(lag.id)[tau]))]
      }
      colnames(emY) <- paste0("tau.",0:(emDim-1))

    } else {
      ids <- colnames(y.ori)
      if(is.null(ids)){ids <- 0}
      emY <- as.matrix(y.ori)
      dimnames(emY) <- list(NULL,paste0("tau.",ids))
    }

    # Alternative: rollapply(y, list(-d * seq(0, k-1)), c)

  if(!doEmbed){
    emDim <- NCOL(emY)
  }

  #} # if emLag = 0

  attr(emY, "embedding.dims") <- emDim
  attr(emY, "embedding.lag")  <- emLag
  attr(emY, "embedding.time") <- emTime
  attr(emY, "variable.y")     <- id

  if(returnOnlyIndices){
    attr(emY, "data.y") <- y_data
    return(emY)
  } else {
    if(emDim>1&emLag>0){
      for(c in 1:NCOL(emY)){
        emY[,c] <-  y_data[emY[,c]]
      }
    }
    #else{
    #  emY <- as.matrix(y_data)
    #}
    return(emY)
  }
}


#' Discrete representation
#'
#'  Return a discrete representation of `y` by binning the observed values and returning the transfer probabilities.
#'
#' @param y Numeric vector or time series to be discretised.
#' @param nbins Number of bins to use for calculating transfer probabilities (default = `ceiling(2*length(y)^(1/3))`)
#' @param keepNA If `TRUE`, any `NA` values will first be removed and later re-inserted into the discretised time series.
#'
#' @return A discretised version of `y`
#'
#' @family Time series operations
#'
#' @export
#'
ts_discrete <- function(y, nbins=ceiling(2*NROW(y)^(1/3)), keepNA = TRUE){

  idNA <- is.na(y)
  y <- y[!idNA]

  # Make a binvector
  binvec <- seq(min(y)-0.001, max(y),length.out=nbins+1)
  # Apply the bins
  bins <- vector("numeric",NROW(y))
  for(b in 1:nbins) {
    bins[y%(]%c(binvec[b],binvec[b+1])] <- b
  }

  if(keepNA){
    y <- rep(NA,length(idNA))
    y[!idNA] <- bins
  } else {
    y <- bins
  }

  return(y)
}

#' Symbolic representation
#'
#'  Return a discrete representation of `y` by transforming it into an unordered categorical variable which indicates whether a value went up, down, or remained the same relative to the previous value.
#'
#' @param y Numeric vector or matrix to be discretised. Will return attributes with labels for each column in the matrix.
#' @param keepNA If `TRUE`, any `NA` values will first be removed and later re-inserted into the discretised time series. (default = `TRUE`)
#' @param usePlateaus Give consecutive `"same"` values after `"peak"` or `"trough"` a `"peak"` or `"trough"` label instrad of `"same"`. (default = `FALSE`)
#' @param doPlot Create a plot of the symbolized series. (default = `FALSE)`)
#'
#' @return A symbolic version of `y`
#'
#' @family Time series operations
#'
#' @export
#'
ts_symbolic <- function(y, keepNA = TRUE, usePlateaus = FALSE, doPlot = FALSE){

  cl <- class(y)
  cnames <- colnames(y)
  if(is.null(cnames)){
    cnames = paste0("Y",1:NCOL(y))
  }

  idNA <- plyr::colwise(.fun = is.na)(as.data.frame(y))
  #y <- as.data.frame(y)[stats::complete.cases(!idNA),]
  y <- as.data.frame(y)[stats::complete.cases(y),]

  # returns matrix
  sym_num <- symbolize(xy = y)

  if(is.null(dim(y))) {
    ymat <-  as.matrix(y)
  } else {
    ymat <- y
  }

  # Adjust last value
  for(i in 1:NCOL(sym_num)) {
    t = NROW(sym_num)
    # if(!is.na(ymat[t-1,i])){
    if(ymat[t-1,i]>ymat[t,i]){
      sym_num[t,i] <- 2
    }
    if(ymat[t-1,i]<ymat[t,i]){
      sym_num[t,i] <- 4
    }
    if(ymat[t-1,i]==ymat[t,i]){
      sym_num[t,i] <- 3
    }
    #}
  }

  if(keepNA&any(colSums(idNA,na.rm = TRUE)>0)){
    sym_NA <- matrix(NA,nrow=NROW(idNA),ncol = NCOL(idNA))
    for(c in 1:NCOL(y)){
      sym_NA[!idNA[,c],c] <- sym_num[,c]
    }
    sym_num <- sym_NA
    rm(sym_NA)
  }

  sym_label <- sym_num

  if(!is.null(ncol(sym_num))){
    for(c in 1:NCOL(sym_num)){
      sym_label[,c] <-  dplyr::case_when(
        sym_num[,c]==1 ~ "trough",
        sym_num[,c]==2 ~ "decrease",
        sym_num[,c]==3 ~ "same",
        sym_num[,c]==4 ~ "increase",
        sym_num[,c]==5 ~ "peak",
        is.na(sym_num[,c]) ~ NA_character_)
      if(usePlateaus){
        tmp <- symHelper(sym_label[,c],sym_num[,c])
        sym_label[,c] <- tmp[[1]]
        sym_num[,c]   <- tmp[[2]]}
    }
  } else {
    sym_label <-  dplyr::case_when(
      sym_num==1 ~ "trough",
      sym_num==2 ~ "decrease",
      sym_num==3 ~ "same",
      sym_num==4 ~ "increase",
      sym_num==5 ~ "peak",
      is.na(sym_num) ~ NA_character_)
    if(usePlateaus){
      tmp <- symHelper(sym_label,sym_num)
      sym_label <- tmp[[1]]
      sym_num   <- tmp[[2]]}
  }

  # id <- gregexpr("((increase){1}\\s(same)+\\s(trough){1})+", yc)
  #
  # yc<-paste0(y,collapse=" ")
  # substr(yc,9,9+18-1)

  if(any(cl%in%c("matrix","data.frame","numeric","tbl","tbl_df"))){
    out    <- sym_num
    anames <- paste0(cnames,"_sym_label")

    for(c in 1:NCOL(out)){
        attr(out,anames[c]) <- factor(sym_label[,c],exclude=NULL)
    }
  } else { # Possibly a 1D vector
    if(!is.null(ncol(y))){
      for(c in 1:NCOL(sym_label)){
        sym_label[,c]  <- factor(sym_label[,c],exclude=NULL)
      }
      colnames(sym_num) <- paste0(cnames,"_sym_num")
      colnames(sym_label) <- paste0(cnames,"_sym_label")
      out <- cbind(sym_num,sym_label) #,stringsAsFactors = FALSE)
    } else { # in case of single vector
      out <- factor(sym_label, exclude=NULL)
      attr(out,"sym_numeric") <- sym_num
    }
  }

  if(doPlot){

    shp <- c("trough" = 25, "decrease" = 21, "same"=23,"increase" = 22, "peak" = 24,"missing" = 8)
    col <- c("trough"="darkkhaki","decrease"="orange","same"="grey60","increase"="steelblue","peak"="olivedrab","missing"="red")
    labs <- c("0" = "missing","1"="trough","2"="decrease","3"="same","4"="increase","5"="peak")

    tmpOut <- data.frame(time = 1:NROW(out), out)

    if(!is.null(ncol(sym_num))){

      # All this code is because we had to store the results in attributes to keep dims of the input same as output
      if(NCOL(dplyr::select(tmpOut,dplyr::ends_with("_sym_label")))==0){
        cname <- which(grepl(pattern = "_sym_label",x = names(attributes(out))))
        #which(names(attributes(tmpOut))[grepl(pattern = "_sym_label",x = names(attributes(tmpOut)))])
        for(c in cname){
          tmpOut[,NCOL(tmpOut)+1] <- attributes(out)[[c]]
          colnames(tmpOut)[NCOL(tmpOut)] <- names(attributes(out)[c])
        }
        #colnames(tmpOut)["label"%ci%tmpOut] <- names(attributes(out)[cname])
      }
      if(NCOL(dplyr::select(tmpOut,dplyr::ends_with("_sym_num")))==0){
        cname <- which(grepl(pattern = "_sym_num",x = names(attributes(tmpOut))))
        tmpOut$num <-  attributes(tmpOut)[[cname]]
        colnames(tmpOut)["num"%ci%tmpOut] <- cname
      }

      df_p1 <- tmpOut %>% dplyr::as_tibble() %>%
        dplyr::select(dplyr::ends_with("_sym_label"),"time") %>%
        tidyr::gather(key="lab_var", value="sym_label", -"time")

      #df_p1$sym_label[is.na(df_p1$sym_label)] <- "missing"
      df_p2 <- tmpOut %>% dplyr::as_tibble() %>%
        dplyr::select(dplyr::ends_with("_sym_num")) %>%
        tidyr::gather(key="num_var", value="sym_num")

      df_p2[is.na(df_p2)] <- 0
      df_plot <- cbind(df_p1,df_p2)
      df_plot$num_var <- gsub("_sym_num","",df_plot$num_var)
    } else {
      df_plot <- data.frame(time = 1:NROW(tmpOut), sym_num= attr(tmpOut,"sym_numeric"), sym_label = tmpOut)
      df_plot$sym_num[is.na(df_plot$sym_num)] <- 0
      #levels(df_plot$sym_label)[is.na(df_plot$sym_num)] <- "missing"
    }

    # Generate plot
    g <- ggplot(df_plot,aes_(x=~time,y=~sym_num)) +
      geom_line(color="grey80") +
      geom_point(aes_(shape=~sym_label, color=~sym_label, fill=~sym_label),size=2)

    if(!is.null(df_plot$num_var)){
      if(length(unique(df_plot$num_var))>1){
        g <- g + facet_grid(num_var~.)
      }
    }

    g <- g +
      scale_x_continuous("Time") +
      scale_shape_manual("Symbolic value", values = shp) +
      scale_color_manual("Symbolic value", values = col) +
      scale_fill_manual("Symbolic value", values = col) +
      scale_y_continuous("",labels = labs) +
      theme_bw() + theme(panel.grid = element_blank())

    graphics::plot(g)

    return(list(data=out,plot=invisible(g)))
  } else {
    if(cl%in%"numeric"&NCOL(y)==1){dim(out) <- NULL}
    return(out)
  }

}


#' Correct for plateaus in symbolic series
#'
#' @param sym_label Labels
#' @param sym_num Numeric labels
#'
#' @return data frame
#' @export
#'
#' @keywords internal
#'
symHelper <- function(sym_label,sym_num){
  out_num <- sym_num
  out_label <- sym_label
  i <- 1
  same <- 0
  while(i<=length(sym_label)){
    if(sym_label[i]%in%c("increase","decrease")){
      samesame <- TRUE
      r <- i+1
      same <- 0
      while(samesame){
        if(sym_label[r]%in%"same"){
          same <- same+1
          r <- r+1
        } else {
          samesame <- FALSE
        }
      }
      if(same>0){

        # sym_num[,c]==1 ~ "trough",
        # sym_num[,c]==2 ~ "decrease",
        # sym_num[,c]==3 ~ "same",
        # sym_num[,c]==4 ~ "increase",
        # sym_num[,c]==5 ~ "peak",

        if(all(!sym_label[i]%in%c("increase"),sym_label[i+same+1]%in%c("peak","increase"))){
          out_label[i:(i+same)] <- "trough"
          out_num[i:(i+same)] <- 1
        } else {
          if(all(!sym_label[i]%in%c("decrease")&sym_label[i+same+1]%in%c("trough","decrease"))){
            out_label[i:(i+same)] <- "peak"
            out_num[i:(i+same)] <- 5
          }
        }
      }
    }
    if(same>0){
      i <- (i+same)
    } else {
      i <- i+1
    }
  }
  return(list(out_label,out_num))
}


#' Convert numeric vectors to symbolic vectors.
#'
#' `symbolize` converts numeric vectors to symbolic vectors. It is a helper
#'   function for `muti`.
#'
#' @param xy An n x 2 `matrix` or `data.frame` containing the two
#'   vectors of interest.
#'
#' @return An (n-2) x 2 `matrix` of integer symbols that indicate whether
#'   the i-th value, based on the i-1 and i+1 values, is a "trough" (=1),
#'   "decrease" (=2), "same" (=3), "increase" (=4), or "peak" (=5).
#'
#' @author Mark Scheuerell (https://mdscheuerell.github.io/muti/)
#' @export
#'
#' @keywords internal
#'
symbolize <- function(xy) {
  ## of interest and converts them from numeric to symbolic.
  ## check input for errors
  if(is.null(dim(xy))) {
    xy <-  as.matrix(xy)
  }
  ## get ts length
  TT <- dim(xy)[1]
  ## init su matrix
  su <- matrix(NA, TT, NCOL(xy))
  ## convert to symbols
  ## loop over 2 vars
  for(i in 1:NCOL(xy)) {
    for(t in 2:(TT-1)) {
      ## if xy NA, also assign NA to su
      if(any(is.na(xy[(t-1):(t+1),i]))) {
        su[t,i] <- NA }
      ## else get correct symbol
      else {
        if(xy[t,i] == xy[t-1,i] | xy[t,i] == xy[t+1,i]) {
          ## same
          su[t,i] <- 3
        }
        if(xy[t,i] > xy[t-1,i]) {
          ## peak
          if(xy[t,i] > xy[t+1,i]) { su[t,i] <- 5 }
          ## increase
          else { su[t,i] <- 4 }
        }
        if(xy[t,i] < xy[t-1,i]) {
          ## trough
          if(xy[t,i] < xy[t+1,i]) { su[t,i] <- 1 }
          ## decrease
          else { su[t,i] <- 2 }
        }
      } ## end else
    } ## end t loop
  } ## end i loop
  ## return su matrix
  return(su)
}


#' Derivative of time series
#'
#' Iteratively differenced series up to `order`. The same length as the original series is recovered by calculating the mean of two vectors for each iteration: One with a duplicated first value and one with a duplicated last value.
#'
#' @param y A timeseries object or numeric vector or a matrix in which columns are variables and rows are numeric values observed over time.
#' @param order How many times should the difference iteration be applied? (default = `1`)
#' @param addColumns Should the derivative(s) be added to the input vector/matrix as columns? (default = `TRUE`)
#' @param keepDerivatives If `TRUE` and `order > 1`, all derivatives from `1:order` will be returned as a matrix )default = `FALSE`)
#' @param maskEdges Mask the values at the edges of the derivatives by any numeric type that is not `NULL` (default = `NULL`)
#' @param silent Silent-ish mode
#'
#' @return Depending on the setting of `addColumns` and the object type passed as `y`, a vector of equal length as `y` iteratively differenced by `order` times; a matrix with derivatives, or a matrix with original(s) and derivative(s).
#'
#' @note The values at the edges of the derivatives represent endpoint averages and should be excluded from any subsequent analyses. Set argument `maskEdges` to a value of your choice.
#'
#' @export
#'
#' @family Time series operations
#'
#' @examples
#'
#' # Here's an interesting numeric vector
#'y<-c(1,1,2,3,5,8,13,21,34,55,89,144,233,377,610,987,1597,2584,4181,6765,10946,17711,28657,46368)
#'
#' # Return the first order derivative as a vector
#' ts_diff(y=y,addColumns=FALSE)
#'
#' # Return original and derivative as a matrix
#' plot(stats::ts(ts_diff(y=y, addColumns=TRUE)))
#'
#' # Works on multivariate data objects with mixed variable types
#' df <- data.frame(x=letters, y=1:26, z=sin(1:26))
#'
#' # Returns only derivatives of the numeric colunmns
#' ts_diff(y=df,addColumns=FALSE)
#'
#' # Returns original data with derivatives of the numeric columns
#' ts_diff(y=df, order=4, addColumns=TRUE)
#'
#' # Plot logistic S-curve and derivatives 1 to 3
#' S <- stats::plogis(seq(-5,5,.1))
#' plot(stats::ts(ts_diff(S, order=3, keepDerivatives = TRUE)))
#' abline(v=which(seq(-5,5,.1)==0), col= "red3", lwd=3)
#'
#' # Plot again, but with masked edges
#' (maskEdge <- ts_diff(S, order=3, keepDerivatives = TRUE, maskEdges = NA))
#' plot(stats::ts(maskEdge))
#' abline(v=which(seq(-5,5,.1)==0), col= "red3", lwd=3)
#'
ts_diff <- function(y, order= 1, addColumns = TRUE, keepDerivatives = FALSE, maskEdges = NULL, silent = TRUE){

  N <- NCOL(y)
  if(NROW(y)<N){
    warning(paste0("Assuming variables in columns and observations in rows!\nDo you really have ",N," columns with time series of length ",NROW(y),"?"))
  }

  if(!is.null(maskEdges)){
    if(is.numeric(maskEdges)|is.na(maskEdges)){
      if(length(maskEdges)==1){
        maskEdges <- as.numeric(maskEdges)
      } else {
        maskEdges = NULL
      }
    } else {
      maskEdges = NULL
    }
  }

  Nnum <- plyr::laply(y,is.numeric)
  if(length(Nnum)<1){stop("Need at least one numeric vector!")}

  if(N==1){
    Nnum <- TRUE
    yy <- matrix(y)
  } else {
    yy <- as.matrix(y[,(1:N)[Nnum]])
  }

  if(is.null(dimnames(yy)[[2]])){
    dimnames(yy) <- list(dimnames(yy)[[1]], gsub("[[:punct:]]","", paste0(deparse(substitute(y)),1:NCOL(yy))))
  }

  dynames <- dimnames(yy)[[2]]

  # dy <- matrix(y[,(1:N)[Nnum]], dimnames = dimnames(y))
  #
  # if(is.null(dimnames(dy)[[2]])){
  #    dynames <- paste0("y",1:NCOL(dy))
  #   } else {
  #    dynames <- paste(dimnames(dy)[[2]])
  #   }
  #
  # dimnames(dy) <- list(dimnames(dy)[[1]],paste0(dynames,"_d",order))

  if((N-NCOL(yy))>=0){
    if(!silent){cat(paste0("\nCalulating derivative for ",NCOL(yy)," time series.\n"))}
  }

  keepDiffs <- list() # matrix(NA, nrow = NROW(dy),ncol = length(1:order)*NCOL(dy))
  maskWhich <- list()

  dy <- yy
  # Repeat the difference operation
  for(o in 1:order){
    dif <- diff(dy)
    dy  <- cbind((rbind(dif[1,], dif)+rbind(dif,dif[NROW(dif),]))/2)
    if(!is.null(maskEdges)){
      maskWhich[[o]] <- c(1:o,(NROW(dy)-o+1):NROW(dy))
    }
    if(keepDerivatives){
      keepDiffs[[o]] <- dy
    }
  }

  if(keepDerivatives){
    dy <- as.data.frame(keepDiffs)
    colnames(dy) <- unlist(plyr::llply(1:order, function(n) paste0(dynames,"_d",n)))
  } else {
    dy <- as.data.frame(dy)
    colnames(dy) <- paste0(dynames,"_d",order)
  }

  if(!is.null(maskEdges)){
    for(c in seq_along(maskWhich)){
      dy[maskWhich[[c]],c] <- maskEdges
    }
  }

  if(addColumns){
    return(cbind(yy,dy))
  } else {
    if(N==1){
      return(dy[,1])
    } else {
      return(dy)
    }
  }
}



#' Adjust time series by summation order
#'
#' Many fluctuation analyses assume a time series' Hurst exponent is within the range of `0.2 - 1.2`. If this is not the case it is sensible to make adjustments to the time series, as well as the resutling Hurst exponent.
#'
#' @param y A time series of numeric vector
#' @param scaleS The scales to consider for `DFA1`
#' @param polyOrder Order of polynomial for detrending in DFA (default = `1`)
#' @param dataMin Minimum number of data points in a bin needed to calculate detrended fluctuation
#'
#' @return The input vector, possibly adjusted based on `H` with an attribute `"Hadj"` containing an integer by which a Hurst exponent calculated from the series should be adjusted.
#'
#' @details Following recommendations by <https://www.frontiersin.org/files/Articles/23948/fphys-03-00141-r2/image_m/fphys-03-00141-t001.jpg>{Ihlen (2012)}, a global Hurst exponent is estimated using DFA and `y` is adjusted accordingly:
#' \itemize{
#' \item{`1.2 < H < 1.8` first derivative of y, atribute `Hadj = 1`}
#' \item{`H > 1.8` second derivative of y, atribute `Hadj = 2`}
#' \item{`H < 0.2` y is centered and integrated, atribute `Hadj = -1`}
#' \item{`0.2 <= H <= 1.2 ` y is unaltered, atribute `Hadj = 0`}
#' }
#'
#' @references Ihlen, E. A. F. E. (2012). Introduction to multifractal detrended fluctuation analysis in Matlab. Frontiers in physiology, 3, 141.
#'
#' @family Time series operations
#'
#' @export
#'
ts_sumorder <- function(y, scaleS = NULL, polyOrder = 1, dataMin = 4){

  if(is.null(scaleS)){
    scaleS <- unique(round(2^(seq(2, floor(log2(NROW(y)/2)), by=((floor(log2(NROW(y)/2))-2)/30)))))
  }

  # Check global H
  TSm      <- as.matrix(cbind(t=1:NROW(y),y=ts_integrate(ts_center(y))))
  Hglobal  <- monoH(TSm = TSm, scaleS = scaleS, polyOrder = polyOrder, returnPLAW = TRUE, returnSegments = TRUE)
  rm(TSm)

  fitRange <- which(lapply(Hglobal$segments,NROW)>=dataMin)

  lmfit1        <- stats::lm(Hglobal$PLAW$bulk ~ Hglobal$PLAW$size.log2, na.action=stats::na.omit)
  H1  <- lmfit1$coefficients[2]
  lmfit2        <- stats::lm(Hglobal$PLAW$bulk.log2[fitRange] ~ Hglobal$PLAW$size.log2[fitRange], na.action=stats::na.omit)
  H2  <- lmfit2$coefficients[2]


  # Adjust TS by global H
  Hadj <- 0
  if(H2%(]%c(1.2,1.8)){
    y <- ts_diff(y,addColumns = FALSE)
    Hadj=1
  }
  if(H2>1.8){
    y <- ts_diff(ts_diff(y, addColumns = FALSE), addColumns = FALSE)
    Hadj <- 2
  }
  if(H2%[]%c(0.2,1.2)){
    Hadj <- 0
  }
  if(H2 < 0.2){
    y <- ts_integrate(ts_center(y))
    Hadj <- -1
  }

  attr(y,"Hglobal.full") <- H1
  attr(y,"Hglobal.excl") <- H2
  attr(y,"Hadj")         <- Hadj

  return(y)
}

#' Check and/or Fix a vector
#'
#' @param y A time series object or numeric vector
#' @param checkNumericVector is 1D numeric vector?
#' @param checkTimeVector has time vector?
#' @param checkWholeNumbers contains only wholenumbers?
#' @param checkPow2 length is power of 2?
#' @param checkScale checkScale
#' @param checkSummationOrder checkSummationOrder
#' @param checkNonStationarity checkNonStationarity
#' @param checkNonHomogeneity checkNonHomogeneity
#' @param fixNumericVector return a 1D numeric vector (WARNING: Data frames and Matrices with NCOL > 1 wil be converted to long form)
#' @param fixWholeNumbers fixWholeNumber
#' @param fixTimeVector fixTimeVector
#' @param fixPow2 foxPow2
#' @param fixNA fixNA
#' @param fixScale fixScale
#' @param fixSummationOrder fixSummationOrder
#' @param fixNonStationarity fixNonStationarity
#' @param fixNonHomogeneity fixNonHomogeneity
#'
#' @return A 'check' report and/or a 'fixed' vector y.
#'
#' @family Time series operations
#'
#' @export
#'
ts_checkfix <- function(y, checkNumericVector = TRUE, checkWholeNumbers = FALSE, checkTimeVector = FALSE, checkPow2 = FALSE, checkScale = FALSE, checkSummationOrder = FALSE, checkNonStationarity = FALSE, checkNonHomogeneity = FALSE, fixNumericVector = FALSE, fixWholeNumbers = FALSE, fixTimeVector = FALSE, fixPow2 = FALSE, fixNA = TRUE, fixScale = FALSE, fixSummationOrder = FALSE, fixNonStationarity = FALSE, fixNonHomogeneity = FALSE){

  outtext <- list()
  pre <- "\ny"
  post <- "\n"
  i <- 0

  whichClass <- class(y)
  yesNumeric <- FALSE
  yesWholeNumbers <- FALSE
  yesTime    <- FALSE
  yesPow2    <- FALSE
  yesScaled  <- FALSE
  yesNonStationary <- FALSE

  # Numeric
  tmptxt <- ""
  txt <- "NUMERIC"
  if(checkNumericVector){
    if(is.numeric(y)){
      txt <- c(txt,"is numeric")
      yesNumeric <- TRUE
    } else {
      tmptxt <- "is NOT numeric"
    }
  }

  if(fixNumericVector&!yesNumeric){
    if(is.null(dim(y))){
      suppressWarnings(y <- as.numeric(unlist(y)))
    } else {
      if(dim(y)[[2]]==1){suppressWarnings(y <- as.numeric(y[,1]))
      } else {
        if(dim(y)[[2]]>1){suppressWarnings(y <- y %>%  tidyr::gather(key="colname",value="value") %>%  as.numeric(.data$value))
        }
      }
    }
    tmptxt <- paste(tmptxt,"... FIXED by as.numeric(y)")
    yesNumeric <- TRUE
  }
  txt <- c(txt, tmptxt)
  outtext[[i%++%1]] <- paste(pre,txt,post)
  rm(txt,tmptxt)


  # Wholenumber
  tmptxt <- ""
  txt <- "WHOLENUMBER"
  if(all(is.wholenumber(y))){
    txt <- c(txt,"only contains whole numbers")
    yesWholeNumbers <- TRUE
  } else {
    tmptxt <- "does NOT only contain whole numbers"
  }
  if(fixWholeNumbers&!yesWholeNumbers){
    suppressWarnings(y <- round(y))
    tmptxt <- paste(tmptxt,"... FIXED by round(y)")
    yesWholeNumbers <- TRUE
  }
  txt <- c(txt, tmptxt)
  outtext[[i%++%1]] <- paste(pre,txt,post)
  rm(txt,tmptxt)

  # TimeVector
  tmptxt <- ""
  txt <- "TIMEVECTOR"
  if(checkTimeVector){
    if(grepl("(ts|mts|xts|zoo)",whichClass)){
      txt <-c(txt,"has a time vector:",stats::tsp(y))
      yesTime <- TRUE
    } else {
      tmptxt <- "does not have a time vector"
    }
  }

  if(is.list(fixTimeVector&!yesTime)){
    if(all(names(fixTimeVector)%in%names(formals(stats::ts)))){
      fixTimeVector$data <- NULL
      etxt <- paste0("stats::ts(data = y, ",paste0(names(fixTimeVector)," = ",fixTimeVector,collapse = ", "),")")
      y <- eval(parse(text = paste(etxt)))
      tmptxt <- paste(tmptxt,"... FIXED by",etxt)
      yesTime <- TRUE
      rm(etxt)
    } else {
      y <- stats::ts(y)
      tmptxt <- paste(tmptxt,"... FIXED by ts(y)")
      yesTime <- TRUE
    }
  }
  txt <- c(txt, tmptxt)
  outtext[[i%++%1]] <- paste(pre,txt,post)
  rm(txt,tmptxt)

  # Stationarity
  tmptxt <- ""
  txt<-"NONSTATIONARY"
  if(checkNonStationarity){
    tst1 <- suppressWarnings(tseries::kpss.test(y,null = "Trend"))$p.value
    tst2 <- 1-suppressWarnings(tseries::adf.test(y, alternative = "stationary"))$p.value
    tst3 <- 1-suppressWarnings(tseries::pp.test(y, alternative = "stationary"))$p.value
    result <- sum(c(tst1,tst2,tst3)<.05, na.rm = TRUE)
    if(result>1){
      txt <- c(txt,paste0(result,"is NOT stationary (",result,"/3 tests conclude y is stationary)"))
      yesNonStationary <- TRUE
    } else {
      txt <- c(txt,paste0(result,"is stationary (",result,"/3 tests conclude y is not stationary)"))
    }
  }
  if(fixNonStationarity&!yesNonStationary){
    y <- ts_detrend(y)
  }
  txt <- c(txt, tmptxt)
  outtext[[i%++%1]] <- paste(pre,txt,post)
  rm(txt,tmptxt)

  return(y)

}

#' Trim or Fill Vectors
#'
#' Trim the largest vector by cutting it, or filling it with `NA`.
#' Fill the shortest vector with padding.
#'
#' @param x A numeric vector
#' @param y A numeric vector
#' @param action Use `"fill"` to fill the shortest vector with `padding` (default); `"trim.cut"` to trim the longest vector to the length of the shortest; `"trim.NA"` to fill the longest vector with `NA`. This is a shortcut for running `action = "trim.cut"` with `padding=NA`, which can be useful if one wants to match the shortest series, but preserve the original length of largest vector.
#' @param type Should trimming or filling take place at the `"end"` (default), or `"front"` of the vector? The option `"center"` will try to distribute trimming by `NA` or filling by `padding` evenly across the front and end of the vector.
#' @param padding A value to use for padding (default = `0`)
#' @param silent Run silent-ish
#'
#' @return A list with two vectors of equal length.
#'
#' @seealso il_mi
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
#' @export
#'
#' @family Time series operations
#'
#'
ts_trimfill <- function(x,y,action=c("fill","trim.cut","trim.NA")[1],type=c("end","center","front")[1],padding=0,silent=TRUE){

  if(all(is.numeric(x),is.numeric(y))){

    l        <- lengths(list(x=x,y=y))
    oriMin   <- names(l)[which.min(l)]
    oriMax   <- names(l)[which.max(l)]
    ldiff    <- abs(diff(l))

    if(ldiff>0){

      if(any(action%in%c("trim.NA","fill"))){
        if(action=="fill"){
          if(!silent){message(paste("Padded shortest input",names(which.min(l)),"with", ldiff,"times",padding))}
        } else {
          padding <- NA
          if(!silent){message(paste("Trimmed longest input",names(which.max(l)),"with",ldiff,"times",padding))}
        } # if "filll
        if(type=="end"){
          out <- list(list(x,y)[[which.max(l)]], c(list(x,y)[[which.min(l)]],rep(padding,ldiff)))
        }
        if(type=="front"){
          out <- list(list(x,y)[[which.max(l)]], c(rep(padding,ldiff),list(x,y)[[which.min(l)]]))
        }
        if(type=="centered"){
          front<- floor(ldiff/2)
          end  <- ceiling(ldiff/2)
          out  <- list(list(x,y)[[which.max(l)]], c(rep(padding,front),list(x,y)[[which.min(l)]],rep(padding,end)) )
        }
      }

      if(action=="trim.cut"){
        if(!silent){message(paste("Trimmed longest input",names(which.max(l)),"by",ldiff))}
        if(type=="end"){
          out <- list(list(x,y)[[which.max(l)]][1:(NROW(list(x,y)[[which.max(l)]])-ldiff)], list(x,y)[[which.min(l)]])
        }
        if(type=="front"){
          out <- list(list(x,y)[[which.max(l)]][ldiff:NROW(list(x,y)[[which.max(l)]])], list(x,y)[[which.min(l)]])
        }
        if(type=="centered"){
          front<-floor(ldiff/2)
          end  <-ceiling(ldiff/2)
          out <- list(list(x,y)[[which.max(l)]][front:(NROW(list(x,y)[[which.max(l)]])-end)], list(x,y)[[which.min(l)]])
        }
      } # if "trim.cut"

    } else { # if ldiff > 0
      if(!silent){message("Vectors have equal length.")}
      out <- list(x=x,y=y)
    }

    names(out) <- c(oriMax,oriMin)
    return(out[order(c(oriMax,oriMin))])

  } else { # is.numeric
    stop("Please use 2 numeric vectors!")
  }
}

#' Get sliding window indices
#'
#' @param y A time series or numeric vector
#' @param win Size of the window to slide across `y`
#' @param step Size of steps between windows. Can be larger than `win`, but is ignored if `overlap` is not {NA}.
#' @param overlap A value between `[0 .. 1]`. If overlap is not `NA` (default), the value of `step` is ignored and set to `floor(overlap*win)`. This produces indices in which the size of `step` is always smaller than `win`, e.g. for fluctuation analyses that use binning procedures to represent time scales.
#' @param adjustY If not `NA`, or, `FALSE` a list object with fields that match one or more arguments of \link[casnet]{ts_trimfill} (except for `x,y`), e.g. `list(action="trim.NA",type="end",padding=NA,silent=TRUE)`. See `Return value` below for details.
#' @param alignment Whether to right (`"r"`), center (`"c"`), or left (`"l"`) align the window.
#' @param silent Silent mode.
#'
#' @return If `adjustY` is a list object with fields that represent arguments of \link[casnet]{ts_trimfill}, then the (adjusted) vector `y` is returned with an attribute `"windower"`. This is a list object with fields that contain the indices for each window that fits on `y`, given `win`, `step` or `overlap` and the settings of `adjustY`. If `adjustY = NA`, only the list object is returned.
#' An attribute `time` is returned with the time index based on the `alignment` arguments.
#' @export
#'
#' @family Time series operations
#' @family Tools for windowed analyses
#'
ts_windower <- function(y, win=length(y), step=NA, overlap=NA, adjustY=NA, alignment = c("r","c","l")[1], silent = TRUE){

  adjustOK <- FALSE
  if(is.list(adjustY)){
    if(any(names(adjustY)%in%names(formals(ts_trimfill)))){
      adjustY[!names(adjustY)%in%names(formals(ts_trimfill))[-c(1,2)]] <- NULL
      adjustY <- adjustY[!lengths(adjustY)==0]
      if(is.null(adjustY)){
        stop("No valid arguments of ts_trimfill passed to adjustY!")
      } else {
        adjustY<- plyr::llply(adjustY, function(e) if(is.character(e)){shQuote(e)} else {e})
        adjustOK <- TRUE
      }
    }
  }

  if(!is.na(overlap)&!is.na(win)){
    if(overlap%[]%c(0,1)){
      if(!is.na(step)){
        if(!silent){
          message("Step will be ignored, because overlap has a value.")
        }
      }
      step <- floor(overlap*win)
    } else {
      stop("Overlap must be in [0,1]")
    }
  }

  if(!is.na(win)&is.na(step)&is.na(overlap)){
    step <- win
    if(!silent){message("Stepsize was set to window size.")}
  }

  wIndices <- list()
  wIndex <- seq(from = 1, to = (NROW(y) - win + step), by = step)

  iDiff    <- (dplyr::last(wIndex)+win-1)-NROW(y)
  if(abs(iDiff)>=(win/2)){
    if(iDiff>0){
      wIndex <- wIndex[1:(length(wIndex)-1)]
    } else {
      extra <- dplyr::last(wIndex)+1
      wIndex[length(wIndex)+1] <- extra
    }
  }
  wIndices <- plyr::llply(wIndex, function(i){i:min(i+win-1,length(y))})

  if(adjustOK){
    if(iDiff<0){
      if(adjustY$action%in%"fill"){
        wIndices[[length(wIndices)]] <- c(wIndices[[length(wIndices)]], seq(max(wIndices[[length(wIndices)]]), max(wIndices[[length(wIndices)]]) + abs(iDiff)))
      }
    }
  }

  wID   <- seq_along(wIndices)
  width <- nchar(paste(max(wID)))
  names(wIndices) <- paste0("window: ", formatC(wID, width = width, format = "d", flag = "0"), " | start: ",wIndex," | stop: ",wIndex+win-1)

  iDiff <- max(wIndices[[length(wIndices)]])-NROW(y)

  if(iDiff!=0|any(lengths(wIndices)!=win)){
    if(iDiff<0|any(lengths(wIndices)>win)){
      wIndices[[length(wIndices)]] <- c(wIndices[[length(wIndices)]],(dplyr::last(wIndices[[length(wIndices)]])+1):NROW(y))
      #warning("Last window was enlarged to include all data")
      if(!silent){
        warning(paste0("Window ",seq_along(wIndices)[lengths(wIndices)>win])," is larger than window size ",win,".\n")
        }
    }
    if(iDiff>0|any(lengths(wIndices)<win)){
      wIndices[[length(wIndices)]] <- c(dplyr::first(wIndices[[length(wIndices)]]):NROW(y))
      if(!silent){
      warning(paste0("Window ",seq_along(wIndices)[lengths(wIndices)<win])," is smaller than window size ",win,".\n")
      }
      #warning("Last window was shrunk to fit data")
    }
    names(wIndices)[length(wIndices)] <- paste0("window: ",max(seq_along(wIndices))," | start: ",max(wIndex)," | stop: ",max(wIndices[[length(wIndices)]]))
  }

  switch(alignment,
         l = tIndex <- plyr::laply(wIndices, min),#seq(from = 1,     to = (NROW(y) - win + step), by = step),
         c = tIndex <- plyr::laply(wIndices, mean),#seq(from = floor(win/2), to = (NROW(y) - floor(win/2) + step), by = step),
         r = tIndex <- plyr::laply(wIndices, max) #seq(from = win,   to = NROW(y), by = step)
  )

  attr(x = wIndices, which = "time") <- tIndex

  if(adjustOK){
    warning("asjustY not implemented, use  ts_trimfill() first.")
    #y <- ts_trimfill(x=y, adjustY)
  }

  #names(wIndices) <- paste("stepsize",floor(step*win),"| window",1:length(wIndex))
  return(wIndices)
}



#' Detrend a time series
#'
#' @param y A time series of numeric vector
#' @param polyOrder order Order of polynomial trend to remove
#'
#' @return Residuals after detrending polynomial of order `order`
#'
#' @export
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
#'
ts_detrend <- function(y, polyOrder=1, adaptive = FALSE){

  if(adaptive){
     pOrders <- 1:polyOrder
     Rsq <- laply(pOrders, function(pO){
       fit <- stats::lm(formula = y~stats::poly(1:length(y), degree = pO, raw=TRUE))
       return(summary(fit)$adj.r.squared)
       })
     polyOrder <- which.max(Rsq)
  }

  detR <- stats::lm(formula = y~stats::poly(1:length(y), polyOrder, raw = TRUE))$residual
  return(detR)
}


#' Detect levels in time series
#'
#'  Use recursive partitioning function [rpart::rpart()] to perform a 'classification' of relatively stable levels in a timeseries.
#'
#' @param y A time series of numeric vector
#' @param minDataSplit An integer indicating how many datapoints should be in a segment before it will be analysed for presence of a level change (default = `12`)
#' @param minLevelDuration Minimum duration (number of datapoint) of a level (default = `round(minDataSplit/3)`)
#' @param changeSensitivity A number indicating a criterion of change that must occur before declaring a new level. Higher numbers indicate higher levels of change must occur before a new level is considered. For example, if `method = "anova"`, the overall `R^2` after a level is introduced must increase by the value of `changeSensitivity`, see the `cp` parameter in [rpart::rpart.control()].     (default = `0.01`)
#' @param maxLevels Maximum number of levels in one series (default = floor(max(NROW(y), na.rm = TRUE)/minLevelDuration))
#' @param method The partitioning method to use, see the manual pages of [rpart] for details.
#' @param minChange After the call to [rpart], adjust detected level changes to a minimum absolute change in `y`. If a level change is smaller than `minChange`, the previous level will be continued. Note that this is an iterative process starting at the beginning of the series and 'correcting' towards the end. The results are stored in `p_adj`. Set to `NA` to skip, which means `p_adj` will be identical to `p` (default  = `sd(y, na.rm = TRUE)`)
#' @param doLevelPlot Should a plot with the original series and the levels be produced? (default = `FALSE`)
#' @param doTreePlot Should a plot of the decision tree be produced. This requires package [partykit](https://cran.r-project.org/web/packages/partykit/index.html) (default = `FALSE`)
#'
#' @return A list object with fields `tree` and `pred`. The latter is a data frame with columns `x` (time), `y` (the variable of interest) and `p` the predicted levels in `y` and `p_adj`, the levels in `p` but adjusted for the value passed to `minChange`.
#'
#' @export
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
#' @examples
#'
#' # Levels in white noise?
#'
#' set.seed(4321)
#' y <- rnorm(100)
#' wn <- ts_levels(y)
#' plot(wn$pred$x,wn$pred$y, type = "l")
#' lines(wn$pred$p, col = "red3", lwd = 2)
#'
#' # This is due to the default changeSensitivity of 0.01
#'
#' wn2 <- ts_levels(y,changeSensitivity = .1)
#' lines(wn2$pred$p, col = "steelblue", lwd = 2)
#'
#'
ts_levels <- function(y,
                      minDataSplit = NROW(y)/4,
                      minLevelDuration=round(minDataSplit/3),
                      changeSensitivity = 0.01,
                      maxLevels= floor(NROW(y)/minLevelDuration),
                      method=c("anova","poisson","class","exp")[1],
                      minChange = NA,
                      doLevelPlot = FALSE,
                      doTreePlot = FALSE){

  checkPkg("rpart")

  if(any(is.na(y))){
    NAind <- which(is.na(y))
  } else {
    NAind <- NA
  }

  x <- seq_along(y)
  dfs  <- data.frame(x=x, y=y)
  tree <- rpart::rpart(y ~ x,
                       method = method,
                       control = list(
                         minsplit  = minDataSplit,
                         minbucket = minLevelDuration,
                         maxdepth  = maxLevels,
                         cp = changeSensitivity),
                       data=dfs)

  dfs$p     <- stats::predict(tree, data.frame(x=x))
  dfs$p_adj <- dfs$p

  minChange <- abs(minChange)
  if(!is.na(minChange%00%NA)){
    if(is.numeric(minChange)){

    if(minChange>=max(abs(diff(dfs$p)), na.rm = TRUE)){
      warning("The largest level difference is smaller than the value of argument minChange.\nPartitioning results are stored in variable `p`, the adjusted results are stored in variable `p_adj`.")
    }

    if(minChange>0){
      checkPkg("imputeTS")
      ind <- which(abs(diff(c(dfs$p[1],dfs$p)))%()%c(0,minChange))
      for(i in ind){
        if(all(diff(dfs$p_adj[i:length(dfs$p_adj)])==0)){
          dfs$p_adj[i:dplyr::last(which(diff(dfs$p_adj[i:length(dfs$p_adj)])==0)+i)] <- NA
        } else {
          dfs$p_adj[i:(which(diff(dfs$p_adj[i:length(dfs$p_adj)])!=0)[1]+i-1)] <- NA
        }
      }
      dfs$p_adj <- imputeTS::na_locf(dfs$p_adj)
      if(any(is.na(NAind))){
        dfs$p_adj[NAind] <- NA
      }
    }
    } # numeric
   } else {
     message("Skipping adjustment by argument minChange...")
   }# NA


  if(doLevelPlot){
   g <- ggplot2::ggplot(dfs) +
      geom_line(aes(x=x,y=y))
   if(!is.na(minChange)){
     g <- g + geom_step(aes(x=x,y=p_adj), colour = "red3", size=1)
   } else {
     g <- g + geom_step(aes(x=x,y=p), colour = "red3", size=1)
   }
    g <- g + theme_bw() + theme(panel.grid.minor = element_blank())
   print(g)
  }

  if(doTreePlot){
    checkPkg("partykit")
    plot(partykit::as.party(tree))
  }

  return(list(tree  = tree,
              pred  = dfs))
}



#' Detect slopes in time series
#'
#'  Use recursive partitioning function [rpart::rpart()] to perform a 'classification' of relatively stable slopes in a timeseries.
#'
#' @param y A time series of numeric vector
#' @param minDataSplit An integer indicating how many datapoints should be in a segment before it will be analysed for presence of a slope (default = `12`)
#' @param minSlopeDuration Minimum duration (number of datapoints) of a slope (default = `round(minDataSplit/3)`)
#' @param changeSensitivity A number indicating a criterion of change that must occur before declaring the presence of a slope Higher numbers indicate higher levels of change must occur before a slope is considered. For example, if `method = "anova"`, the overall `R^2` after a slope is introduced must increase by the value of `changeSensitivity`, see the `cp` parameter in [rpart::rpart.control()]. (default = `0.01`)
#' @param maxSlopes Maximum number of levels in one series (default = floor(max(NROW(y), na.rm = TRUE)/minSlopeDuration))
#' @param method The partitioning method to use, see the manual pages of [rpart] for details.
#' @param minChange After the call to [rpart], adjust detected slope value to a minimum absolute change in `y`. If a slope value is smaller than `minChange`, the previous slope will be continued. Set e.g. to `sd(diff(y), na.rm = TRUE)`. Note that this is an iterative process starting at the beginning of the series and 'correcting' towards the end. The results are stored in `p_adj`. Set to `NA` to skip, which means `p_adj` will be identical to `p` (default  = `NA`)
#' @param doSlopePlot Should a plot with the original series and the levels be produced? (default = `FALSE`)
#' @param doTreePlot Should a plot of the decision tree be produced. This requires package [partykit](https://cran.r-project.org/web/packages/partykit/index.html) (default = `FALSE`)
#'
#' @return A list object with fields `tree` and `pred`. The latter is a data frame with columns `x` (time), `y` (the variable of interest) and `p` the predicted slopes in `y` and `p_adj`, the slopes in `p` but adjusted for the value passed to `minChange`.
#'
#' @export
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
#' @examples
#'
#' # Slopes in white noise?
#'
#' set.seed(4321)
#' y <- rnorm(100)
#' wn <- ts_levels(y)
#' plot(wn$pred$x,wn$pred$y, type = "l")
#' lines(wn$pred$p, col = "red3", lwd = 2)
#'
#' # This is due to the default changeSensitivity of 0.01
#'
#' wn2 <- ts_slopes(y,changeSensitivity = .1)
#' lines(wn2$pred$p, col = "steelblue", lwd = 2)
#'
#'
ts_slopes <- function(y,
                      minDataSplit = NROW(y)/4,
                      minSlopeDuration=round(minDataSplit/3),
                      changeSensitivity = 0.01,
                      maxSlopes= floor(NROW(y)/minSlopeDuration),
                      method=c("anova","poisson","class","exp")[1],
                      minChange = NA,
                      doSlopePlot = FALSE,
                      doTreePlot = FALSE){

  checkPkg("rpart")

  if(any(is.na(y))){
    NAind <- which(is.na(y))
  } else {
    NAind <- NA
  }

  x    <- seq_along(y)
  dfs  <- data.frame(x=x, y = y, y_diff = diff(c(y[1],y)))
  tree <- rpart::rpart(y_diff ~ x,
                       method = method,
                       control = list(
                         minsplit  = minDataSplit,
                         minbucket = minSlopeDuration,
                         maxdepth  = maxSlopes,
                         cp = changeSensitivity),
                       data=dfs)

  dfs$p     <- stats::predict(tree, data.frame(x=x))
  dfs$p_adj <- dfs$p

  minChange <- abs(minChange)
  if(!is.na(minChange%00%NA)){
    if(is.numeric(minChange)){

      if(minChange>=abs(max(dfs$y, na.rm = TRUE)-min(dfs$y, na.rm = TRUE))){
        warning("The largest slope value is smaller than the value of argument minChange.\nPartitioning results are stored in variable `p`, the adjusted results are stored in variable `p_adj`.")
      }

      if(minChange>0){
        checkPkg("imputeTS")
        ind <- which(abs(diff(c(dfs$p[1],dfs$p)))%()%c(0,minChange))
        for(i in ind){
          if(all(diff(dfs$p_adj[i:length(dfs$p_adj)])==0)){
            dfs$p_adj[i:dplyr::last(which(diff(dfs$p_adj[i:length(dfs$p_adj)])==0)+i)] <- NA
          } else {
            dfs$p_adj[i:(which(diff(dfs$p_adj[i:length(dfs$p_adj)])!=0)[1]+i-1)] <- NA
          }
        }
        dfs$p_adj <- imputeTS::na_locf(dfs$p_adj)
        if(any(is.na(NAind))){
          dfs$p_adj[NAind] <- NA
        }
      }
    } # numeric
  } else {
    message("Skipping adjustment by argument minChange...")
  }# NA


  if(!is.na(minChange)){
    dfs$slopes <- elascer(cumsum(round(dfs$p,2)))
  } else {
    dfs$slopes <- elascer(cumsum(round(dfs$p_adj,2)))
  }


  if(doSlopePlot){

  tmp <- dfs %>%
      select(c(1,2,6)) %>%
      pivot_longer(cols = c(2,3), names_to = "vars", values_to = "values")

  g <- ggplot2::ggplot(tmp, aes(x=x,y=values)) +
        geom_line() +
        facet_wrap(vars~., nrow=2, scales = "free_y") +
        theme_bw() +
        theme(panel.grid.minor = element_blank())

  print(g)

    g2 <- ggplot2::ggplot(dfs) +
      geom_line(aes(x=x,y=y))
    if(!is.na(minChange)){
      g2 <- g2 + geom_step(aes(x=x,y=p_adj), colour = "red3", size=1)
    } else {
      g2 <- g2 + geom_step(aes(x=x,y=p), colour = "red3", size=1)
    }
    g2 <- g2 + theme_bw() + theme(panel.grid.minor = element_blank())
    print(g2)
  }

  if(doTreePlot){
    checkPkg("partykit")
    plot(partykit::as.party(tree))
  }

  return(list(tree  = tree,
              pred  = dfs))
}



#' Find Peaks or Wells
#'
#' @param y A time series or numeric vector
#' @param window Window in which to look for peaks or wells
#' @param includeWells Find wells?
#' @param minPeakDist Minimum distance between peaks or wells
#' @param minPeakHeight Minimum height / depth for a peak / well
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
#' @return Index with peak or well coordinates
#' @export
#'
ts_peaks <- function (y,
                      window        = 3,
                      includeWells  = FALSE,
                      minPeakDist   = round(window/2),
                      minPeakHeight = .2*diff(range(y, na.rm = TRUE))){

  fp <- function(y,window){
    shape <- diff(sign(diff(y, na.pad = FALSE)))
    pksl <- sapply(which(shape < 0), FUN = function(i){
      z <- i - window + 1
      z <- ifelse(z > 0, z, 1)
      w <- i + window + 1
      w <- ifelse(w < length(y), w, length(y))
      if(all(y[c(z : i, (i + 2) : w)] <= y[i + 1])){
        return(i + 1)
      } else {
        return(numeric(0))
      }
    })
    return(unlist(pksl))
  }

  pks <- fp(y,window)
  if(includeWells){
    wls <- fp(-1*y,window)
    pks <- sort(c(pks,wls))
  }

  distOK    <- diff(c(NA,pks))>=minPeakDist
  distOK[1] <- TRUE

  heightOK <- rep(FALSE,length(pks))
  for(p in seq_along(pks)){
    if(abs(y[pks[p]] - mean(y[c(max(1,(pks[p]-window)):(pks[p]-1),(pks[p]+1):min((pks[p]+window),length(y)))])) >= minPeakHeight){
      heightOK[p] <- TRUE
    }
  }
  return(pks[heightOK&distOK])
}



#' Center a vector
#'
#' @param numvec A numeric vector
#' @param na.rm Set the `na.rm` field
#' @param type Center on the `"mean"` (default) or the `"median"` of the vector.
#'
#' @return A mean or median centered vector
#' @export
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
ts_center <- function(numvec, na.rm=TRUE, type = c("mean","median")[1]){
  if(!is.numeric(numvec)){
    stop("Vector must be numeric!")
  } else {
    switch(type,
           mean   = return(numvec -   mean(numvec, na.rm=na.rm)),
           median = return(numvec - stats::median(numvec, na.rm=na.rm))
    )
  }
}


#' Standardise a vector
#'
#'
#' @param y A time series or numeric vector
#' @param na.rm Set the `na.rm` field
#' @param keepNAvalues If `na.rm = TRUE` and `keepNAvalues = TRUE`, any `NA` values in `y` will be re-inserted after transformation.
#' @param type Center on the `"mean"` and divide by `sd` (default), or center on `"median"` and divide by `mad`
#' @param adjustN If `TRUE`, apply Bessel's correction (divide by `N-1`) or return the unadjusted `SD` (divide by `N`) (default = `TRUE`)
#'
#' @return A standardised vector
#'
#' @export
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
ts_standardise <- function(y, na.rm = TRUE, keepNAvalues = TRUE, type = c("mean.sd","median.mad")[1], adjustN = TRUE){
  if(!is.numeric(y)){
    stop("Vector must be numeric!")
  } else {
    N <- NROW(y)
    if(adjustN){
      SDtype <- "Bessel"
    } else {
      SDtype <- "unadjusted"
    }
    if(keepNAvalues){
      idNA <- !logical(length = N)
    } else {
      idNA <- !is.na(y)
    }
    switch(type,
           mean.sd    = return(((y - mean(y, na.rm = na.rm)) / ts_sd(y,na.rm = na.rm, type = SDtype))[idNA]),
           median.mad = return(((y - stats::median(y, na.rm=na.rm)) / stats::mad(y, na.rm = na.rm))[idNA])
    )
  }
}

#' Standard Deviation estimates
#'
#' Calculates the population estimate of the standard deviation, or the unadjusted standard deviation.
#'
#' @param y Time series or numeric vector
#' @param na.rm Remove missing values before calculations
#' @param type Apply Bessel's correction (divide by N-1) or return unadjusted SD (divide by N)
#' @param silent Silent-ish mode (default = `TRUE`)
#'
#' @return Standard deviation of `y`
#' @export
#'
#' @family Time series operations
#'
ts_sd <- function(y, na.rm = TRUE, type = c("Bessel","unadjusted")[1], silent=TRUE){

  if(!is.numeric(y)){
    stop("Vector must be numeric!")
  }

  if(na.rm){
    y<-y[stats::complete.cases(y)]
  }

  N          <- NROW(y)
  Y_mean     <- mean(y)
  Y_sd       <- stats::sd(y)
  Bessel     <- sqrt((sum((y - Y_mean)^2)/(N-1)))
  unadjusted <- sqrt((sum((y - Y_mean)^2)/N))

  if(type=="Bessel"){
    if(all.equal(Y_sd,Bessel)){
      if(!silent){
        cat(paste0("\nDifference adjusted-unadjusted SD:",Bessel-unadjusted,"\n"))
      }
    } else {
      cat(paste0("\nWARNING: Computation of SD based on N = ",N," and Mean = ",Y_mean," differs from function sd(y)!!!\n"))
      cat(paste0("Difference Bessel-sd(y):",Bessel-Y_sd,"\n"))
    }
    out <- Bessel
  }

  if(type=="unadjusted"){
    if(all.equal(unadjusted,(Y_sd/sqrt(N/(N-1))))){
      if(!silent){
        cat(paste0("\nDifference Unadjusted-Adjusted:",unadjusted-Bessel,"\n"))
      }
    } else {
      cat(paste0("\nWARNING: Computation of SD based on N = ",N," and Mean = ",Y_mean," differs from function sd(y)!!!\n"))
      cat(paste0("Difference unadjusted-(Y_sd/sqrt(N/(N-1))):",unadjusted-(Y_sd/sqrt(N/(N-1))),"\n"))
    }
    out <- unadjusted
  }

  return(out)
}

#' Slice a Matrix
#'
#' Slices rows of a matrix into a list of matrices representing epochs of length `epochSz`.
#'
#' @param y A matrix with timeseries as columns
#' @param epochSz Epoch size
#' @param removeUnequal Do not return bins whose length is not equal to `epochSz` (default = `FALSE`)
#'
#' @return A list with epochs
#'
#' @export
#'
#' @family Time series operations
#'
#' @author Fred Hasselman
#'
ts_slice <- function(y , epochSz=4, overlap = NA, removeUnequal = FALSE){

  bins <- ts_windower(y, win = epochSz, step = epochSz, overlap = overlap, adjustY = FALSE, alignment = "c", silent = TRUE)

  if(!is.matrix(y)){
    yy <- as.matrix(y)
  } else {
    yy <- y
  }

  N <- dim(yy)
  #wIndex <- plyr::llply(seq(1,N[1],epochSz),function(i) yy[i:min(i+epochSz-1,N[1]),1:N[2]])
  wIndex <- plyr::llply(bins, function(i) yy[i, 1:N[2]])
  delID <- which(lengths(wIndex)!=epochSz)%00%NA
  for(s in delID){
    if(length(wIndex[[s]])>epochSz){
      #wIndex[[s]][(epochSz+1):length(wIndex[[s]])] <- NA
      rest <- wIndex[[s]][(epochSz+1):length(wIndex[[s]])]
      wIndex[[s]]   <- wIndex[[s]][1:epochSz]
      wIndex[[s+1]] <- rest
    }
  }

  if(removeUnequal){
    delID <- which(lengths(wIndex)!=epochSz)%00%NA
  for(del in delID){
    if(!any(is.na(delID))){wIndex <- wIndex[-del]}
    }
  }

  names(wIndex) <- paste("epoch",1:length(wIndex),"| size",lengths(wIndex))
  return(wIndex)
}



#' Create a timeseries profile
#'
#' @param y A 1D timeseries
#'
#' @return The profile calculated as `cumsum(y[-length(y)]*diff(t))`, with `t` as `stats::time(y)`
#' @export
#'
#' @family Time series operations
#'
#' @examples
#' y <- runif(1000,-3,3)
#' plot(ts(y))
#' y_i <- ts_integrate(y)
#' plot(ts(y_i))
#'
ts_integrate <-function(y){
  #require(zoo)
  if(!all(is.numeric(y),is.null(dim(y)))){
    warning("Need a 1D numeric vector! Trying to convert...")
    y <- as.numeric(as.vector(y))
  }
  idOK <- !is.na(y)
  #t    <- zoo::index(y)
  t    <- stats::time(y)
  t[!idOK] <- NA
  yy  <- c(y[idOK][1],y[idOK])
  tt  <- c(t[idOK][1],t[idOK])
  yc  <- y
  yc[t[idOK]] <- cumsum(yy[-length(yy)]*diff(tt))
  # recover initial diff value
  yc <- y[idOK][1] + yc
  return(yc)
}



#' Turn a 1D time series vector into a 2D curve
#'
#' @param y A 1D time series object or numeric vector.
#' @param unitSquare Convert the series to a unit square? (default = `FALSE`)
#' @param toSparse Convert to sparse Matrix (default = `FALSE`)
#' @param resolution Factor by which dimensions will be multiplied (default = `2`)
#'
#' @return A (sparse) matrix representing the time series as a curve in 2D space
#' @export
#'
#' @family Time series operations
#'
#' @examples
#'
#' \donttest{
#' y <- rnorm(100)
#' plot(ts(y))
#'
#' y_img <- ts_rasterize(y)
#' image(y_img,col=c("white","black"))}
#'
#'
ts_rasterize <- function(y, unitSquare = FALSE, toSparse = TRUE, resolution = 2){

  if(!is.vector(y, mode = "numeric")){
    stop('Expecting a numeric vector')
  }

  N <- NROW(y)
  x <- stats::time(y)

  if(unitSquare){
    y <- elascer(as.numeric(y))
    x <- elascer(as.numeric(x))
    r <- raster::raster(ncols = N*resolution, nrows = N*resolution, xmn = 0, xmx = 1, ymn = 0, ymx = 1)
  } else {
    r  <- raster::raster(ncols = N*resolution, nrows = length(unique(y))*resolution, xmn = min(x, na.rm = TRUE), xmx = max(x, na.rm = TRUE), ymn = min(y, na.rm = TRUE), ymx = max(y, na.rm = TRUE))
  }

  xy <- cbind(xc=x,yc=y)
  lns <- raster::spLines(xy)
  rm(x,y,xy)

  spm <- t(raster::as.matrix(raster::rasterize(lns, r, background = 0)))
  rm(lns,r)

  if(toSparse){
    spm <- Matrix::as.matrix(spm, sparse = TRUE)
  }

  return(spm)

}


# Help lme4 get a better convergence
# nlopt <- function(par, fn, lower, upper, control) {
#   # Add to call: control = lmerControl(optimizer = "nloptwrap", calc.derivs = FALSE
#   .nloptr <<- res <- nloptr(par, fn, lb = lower, ub = upper,
#                             opts = list(algorithm = "NLOPT_LN_BOBYQA", print_level = 1,
#                                         maxeval = 1000, xtol_abs = 1e-6, ftol_abs = 1e-6))
#   list(par = res$solution,
#        fval = res$objective,
#        conv = if (res$status > 0) 0 else res$status,
#        message = res$message
#   )
# }


#' Transition matrix
#'
#' Create a transition matrix from a discrete time series, e.g. to generate Monte Carlo simulations.
#'
#' @param yd A discrete numeric vector or time series, e.g. transformed using [ts_discrete()], or, [ts_symbolic()].
#' @param nbins The number of bins used to transform a continuous time series, or, the number of expected (given `nbins`, or, theoretically possible) values for a discrete series (default = `length(unique(yd))`)
#'
#' @return A transition probability matrix
#' @export
#'
#' @examples
#'
#' set.seed(4321)
#'
#' # Random uniform numbers
#' y  <- runif(10,0,20)
#'
#' # Discrete version
#' yd <- ts_discrete(y, nbins = 10)
#'
#' # Transition probabilities
#' ts_transmat(yd = yd, nbins = 10)
#'
#' # Note: The number of 'observed' bins differs from 'expected' bins
#' table(yd)
#'
#' # Not specifying the expected bins will generate a different matrix!
#' ts_transmat(yd = yd, nbins = length(unique(yd)))
#'
ts_transmat <- function(yd, nbins = length(unique(yd))){

  if(!all(is.wholenumber(yd))){
    stop("yd is not a vector of discrete numbers!")
  }

  transmat <- matrix(0,nbins,nbins)
  for(i in 1:nbins){
    for(j in 1:nbins){
      transmat[i,j] <- sum(yd[-NROW(yd)]==i & yd[-1]==j, na.rm = TRUE)
    }
    if(sum(transmat[i,])>0) {
      transmat[i,] <- transmat[i,]/sum(transmat[i,], na.rm = TRUE)
    } else {
      transmat[i,] <- 1/nbins
    }
  }
  return(transmat)
}


#' Change Profile
#'
#' @param y Time series
#' @param win A window in which
#' @param align Alignment of the window, see [zoo::rollapply()] (default = `"right"`)
#' @param keepNA Remove `NA` or return `y` with `NA` (default = `TRUE`)
#'
#' @return Transformed time series
#'
#' @export
#'
#' @references Hasselman, F. and Bosman, A.M.T. (2020). Studying Complex Adaptive Systems With Internal States: A Recurrence Network Approach to the Analysis of Multivariate Time-Series Data Representing Self-Reports of Human Experience. Frontiers of Applied Mathematics and Statistics, 6:9. doi: 10.3389/fams.2020.00009
#'
ts_cp <- function(y, win, align = "right", keepNA = TRUE){

 idOK     <- !is.na(y)

 x <- y[idOK]
 WINmean <- zoo::rollapply(x, width = win, mean, na.rm=TRUE, fill=NA, align=align)

 y[idOK] <- ts_integrate(x-WINmean[1:length(x)])

 return(y)
 #df_se[idP,paste(cn,"cp")%ci%df_se] <- ts_integrate(y)
}



#' Course grain a time series
#'
#' Generate a course grained version of a time series by summarising values into bins.
#'
#' @param y A numeric vector
#' @param grain The bin size in which to summarise the values (default = `2`)
#' @param summaryFunction How should the data be summarized in the bins? Can be `"mean"`, `"median"`, `"min"`, `"max"`, or, `"maxFreq"`. Value `"maxFreq"` is for categorical data and will pick the most frequently occurring category within the bin (default = `"mean"`)
#' @param retainLength Return only the bin values (`FALSE`), or retain the length of the original series? (default = `TRUE`)
#'
#' @return A coarse grained version of `y`.
#'
#' @export
#'
#' @examples
#'
#' set.seed(1234)
#' y <- rnorm(100)
#' y1 <- ts_coarsegrain(y, grain = 3)
#' y2 <- ts_coarsegrain(y, grain = 3, retainLength = TRUE)
#' y3 <- ts_coarsegrain(y, grain = 3, retainLength = TRUE, summaryFunction = "max")
#'
#' t1 <- seq(1,length(y), by = 3)
#'
#' plot(t1+1, y1, col = "red3", type = "l", ylim = c(-3,3), xlab = "time", ylab = "Y")
#' lines(y, col = "grey70")
#' lines(y2, col = "steelblue")
#' lines(y3, col = "green3")
#' legend(60, -1.3, legend=c("Original", "Mean", "Mean + Retain Length", "Max + Retain Length"),
#' lty = 1, col=c("grey70", "red3", "steelblue","green3"), cex = 0.7)
#'
#'
ts_coarsegrain <- function(y, grain = 2, summaryFunction = c("mean","median","min","max","maxFreq")[1], retainLength = FALSE){

  y <- as.numeric(y)

  if(grain>length(y)){
    stop("Cannot coarse grain beyond series length!")
  }

  if(is.na(grain%00%NA)|grain<1){
    grain <- 1
    u   <- length(y)
  } else {
    u <- ceiling(length(y)/grain)
  }
  yy <- matrix(0, nrow = grain, ncol = u)
  yy[1:length(y)] <- y

  if(NCOL(yy) != NCOL(y)){

    mostFreq <- function(x){
      if(all(is.na(x))){
        return(NA)
      } else {
        return(as.numeric(attributes(stats::ftable(x))$col.vars$x[[which.max(stats::ftable(x))]]))
        }
    }

    yy <-  switch(summaryFunction,
                  min  =  plyr::colwise(min, na.rm = TRUE)(data.frame(yy)),
                  max  =  plyr::colwise(max, na.rm = TRUE)(data.frame(yy)),
                  mean =  plyr::colwise(mean, na.rm = TRUE)(data.frame(yy)),
                  median = plyr::colwise(median, na.rm = TRUE)(data.frame(yy)),
                  maxFreq = plyr::colwise(mostFreq)(data.frame(yy))
                  )

    if(retainLength){
      yy <- rep(yy, each = grain)
      if(NCOL(yy) != NCOL(y)){
        warning("Coarse grained series is not exactly the same length as the original.")
      }
      #yy <- yy[1:length(y)]
    }
  }

  attr(yy,"grain") <- grain

  return(as.numeric(yy))
}





#' Calculate Kendall's tau in sliding window or around change point.
#'
#'  Kendall's tau at different lags or change point ranges
#'
#' @param y A numeric vector
#' @param changepoint If not `NA`, it has to be an index of `y`. If `win` is `NA` Kendall's tau will be calculated in `1:changepoint`, otherwise the values of `win` will be used to create a window. If both `changepoint` and `win` are `NA` Kendall's tau will be calculated on all of `y`.
#' @param win Numeric vector with 1 or 2 values. If `changepoint != NA`, Kendall's tau will be calculated in a window around the change point. If one value is provided a symmetric window, otherwise `changepoint - win[1]` and `changepoint + win[2]`. If `changepoint == NA` Kendall's tau will be calculated in a sliding  window.
#' @param doPlot provide a plot
#'
#' @return A data frame
#' @export
#'
#' @examples
#'
#' ts_slope(y=rnorm(100), doPlot = TRUE)
#'
ts_slope <- function(y, win = NA, changepoint = NA, doPlot = FALSE){


  cnt <- 0

  ms_out <- list()

  slWindow  <- FALSE
  cpWindow  <- FALSE

  if(!all(is.na(win))&!is.na(changepoint)){
    if(changepoint%[]%c(1,NROW(y))){

      if(length(win)==1){
        N <- changepoint + (abs(c(win,win)) * c(-1,1))
      } else {
        N <- changepoint + abs(win) * c(-1,1)
      }

      N <- N[1]:N[2]

      if(all(N %[]% c(1,NROW(y)))){
        cpWindow <- TRUE
        xl <- "Changepoint ± win"
        X  <- c(changepoint,changepoint)
      } else {
        stop("Invalid combination of win and changepoint.")
      }
    }
  }

  if(all(is.na(win))&!is.na(changepoint)){
    N <- 1:changepoint
    if(all(N %[]% c(1,NROW(y)))){
      cpWindow <- TRUE
      doPlot   <- FALSE
    } else {
      stop("Invalid combination of changepoint and length of y.")
    }
  }

  if(!all(is.na(win))&is.na(changepoint)){
    if(length(win)>=2){
      N <- abs(win[1])
      message("Using first value in argument win")
    }

    if(N%[]%c(1,NROW(y))){
      slWindow <- TRUE
      xl <- "Sliding window"
      X  <- c(0,0)
    } else {
      stop("Invalid combination of win and changepoint.")
    }
  }

  if(cpWindow){
    ms_out <- data.frame(winID = paste("window: 001 | start:",N[1],"| stop:", max(N)),
                         win = length(N), ktau = stats::cor(x = seq_along(N), y = y[N], method = "kendall"),
                         sig = stats::cor.test(x = seq_along(N), y = y[N], method = "kendall")$p.value)
  }

  if(slWindow){
    winList <- ts_windower(y = y, win=win, step = 1)
    ms_out <- plyr::ldply(winList, function(w){
      print(w)
      if(sum(!is.na(y[w]))<2){
        return(data.frame(win = win,
                          ktau = NA,
                          sig  = NA))
      } else {
        return(data.frame(win = win,
                          ktau = stats::cor(x = seq_along(w), y = y[w], method = "kendall"),
                          sig  = stats::cor.test(x = seq_along(w), y = y[w], method = "kendall")$p.value))
      }
    },
    .id = "winID")
  }

  if(doPlot){
    plot(x=X, y = c(-1,1), type = "l", xlab = xl, ylab = "Kendall's tau", xlim = c(1,NROW(ms_out)) )
    lines(1:NROW(ms_out) , ms_out$ktau, lwd=2)
  }

  return(ms_out)
}
FredHasselman/casnet documentation built on April 20, 2024, 3:05 p.m.