Nothing
#' Data Generator
#'
#' @description Generating data with a given model = changepoint relative positions + parameters + type of cost + (standard deviation + gamma decay)
#' @param n number of data points to generate
#' @param changepoints vector of positions of the changepoints in (0,1] (last element is always 1).
#' @param parameters vector of means for the consecutive segments (same length as changepoints)
#' @param type a string defining the cost model to use: \code{"mean"}, \code{"variance"}, \code{"poisson"}, \code{"exp"}, \code{"negbin"}
#' @param sigma a positive number = the standard deviation of the data
#' @param gamma a number between 0 and 1 : the coefficient of the exponential decay (by default = 1 for piecewise constant signals)
#' @param size parameter of the \code{rnbinom} function
#' @return a vector of size n generated by the chosen model
#' @examples
#' dataGenerator(500, c(0.3, 0.6, 1), c(1, 2, 3), type = "mean", sigma = 0.5)
#'
#' dataGenerator(1000, c(0.2, 0.33,0.5, 1), c(4, 0.2, 3,0.5), type = "variance")
#'
#' dataGenerator(800, c(0.4, 0.8, 1), c(15, 5, 8), type = "mean", gamma = 0.95, sigma = 0.4)
#'
#' dataGenerator(400, c(0.4, 0.9, 1), c(2, 1.5, 3), type = "poisson")
#'
#' dataGenerator(1000, c(0.44, 0.86, 1), c(0.5, 0.2, 0.4), type = "negbin", size = 3)
dataGenerator <- function(n, changepoints, parameters, type = "mean", sigma = 1, gamma = 1, size = 10)
{
### STOP ###
if(is.numeric(n) == FALSE || n%%1 != 0){stop('n is not an integer.')}
if(n < 2){stop('n must be a >= 1')}
if(is.numeric(changepoints) == FALSE){stop('changepoints is not a numeric vector')}
if(!all(changepoints <= 1) || !all(changepoints > 0)){stop('changepoints has values outside (0,1]')}
if(changepoints[length(changepoints)] != 1){stop('the last value in changepoints vector is not 1')}
if(length(changepoints) > 1)
{
for(i in 1:(length(changepoints)-1)){if(changepoints[i+1] <= changepoints[i]){stop('changepoints not an increasing vector')}}
}
allowed.types <- c("mean", "variance", "poisson", "exp", "negbin")
if(!type %in% allowed.types){stop('type must be one of: ', paste(allowed.types, collapse=", "))}
if(sigma < 0){stop('sigma is a nonnegative value')}
if(gamma > 1 || gamma <= 0){stop('gamma is not between 0 and 1 (0 excluded)')}
### ###
SegLength <- diff(c(0, floor(changepoints*n)))
vectData <- NULL
if(type == "mean")
{
if(gamma != 1)
{
decay <- NULL
for(i in 1:length(SegLength)){decay <- c(decay, cumprod(rep(gamma, SegLength[i])))}
vectData <- rep(parameters, SegLength)*decay + rnorm(n, 0, sigma)
}
else
{
vectData <- rep(parameters, SegLength) + rnorm(n, 0, sigma)
}
}
if(type == "variance"){vectData <- rnorm(n, 0, rep(sqrt(parameters), SegLength))}
if(type == "poisson"){vectData <- rpois(n, rep(parameters, SegLength))}
if(type == "exp"){vectData <- rexp(n, rep(parameters, SegLength))}
if(type == "negbin")
{
if(any(parameters > 1) || any(parameters <= 0)){stop('parameters for negbin type should be positive probabilities')}
vectData <- rnbinom(n, size = size, prob = rep(parameters, SegLength))
}
return(vectData)
}
#' sdDiff
#'
#' @description sdDiff is a function based on the difference operator (or difference order for HALL method) estimating the time-series standard deviation.
#' The estimation works for time-series generated by Gaussian
#' random variables with constant standard deviation and multiple changes in mean.
#' Three estimators are available:
#' \itemize{
#' \item \code{HALL} : the so-called HALL-estimator of order 3. For more details see: \emph{(1990) Asymptotically optimal difference-based estimation of variance in nonparametric regression. Authors: Hall, Peter and Kay, JW and Titterinton, DM. Biometrika, pages 521--528}
#' \item \code{MAD} : the median absolute deviation estimator computed on \code{diff(x)/sqrt(2)} with x the vector of datapoints
#' \item \code{SD} : the standard deviation estimator (function sd) computed on \code{diff(x)/sqrt(2)} with x the vector of datapoints
#' }
#' @param x vector of datapoints
#' @param method Three available methods: \code{"HALL"}, \code{"MAD"} and \code{"SD"}
#' @return a value equal to the estimated standard deviation
#' @examples
#' data <- dataGenerator(300, seq(0.1,1,0.1), sample(10), sigma = 2)
#' sdDiff(data, method = "HALL")
#' sdDiff(data, method = "MAD")
#' sdDiff(data, method = "SD")
sdDiff <- function(x, method = "HALL")
{
if(is.numeric(x) == FALSE || length(x) < 5){stop('x is not a numeric vector or length < 5 (the HALL method cannot be used)')}
if(method == "HALL")
{
n = length(x)
wei <- c(0.1942, 0.2809, 0.3832, -0.8582)
mat <- wei %*% t(x)
mat[2, -n] = mat[2, -1]
mat[3, -c(n-1, n)] = mat[3, -c(1, 2)]
mat[4, -c(n-2, n-1, n)] = mat[4, -c(1, 2, 3)]
return(sqrt(sum(apply(mat[, -c(n-2, n-1, n)], 2, sum)^2) / (n-3)))
}
if(method == "MAD")
{
return(mad(diff(x)/sqrt(2)))
}
if(method == "SD")
{
return(sd(diff(x)/sqrt(2)))
}
return(NULL)
}
###############################################
# invisible function for the user
getDerivativePenalty <- function(D, n, c1 = 2, c2 = 5)
{
return(c1*log(n) - c1*(log(D) + 1) + c2)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.