Nothing
#' Randomly generate data for internal testing of `auto_rate()`'s linear method.
#'
#' Generate data of size `len` that is coerced to mimic common respirometry
#' data. This is an internal function not intending for public use, though may
#' prove of interest or utility. We may modify this function at any time, which
#' may irreversibly change the outputs. This function was first created to test
#' `auto_rate()` using the other internal function, `test_lin()`, but we decided
#' to publish the code as it is an effective (and visually-appealing) tool for
#' teaching, testing and visualising purposes. This function is by no means
#' comprehensive and we encourage users to generate data that suit their own
#' unique situations.
#'
#' `sim_data()` creates 3 types of data that we think are common in respirometry
#' or oxygen flux data. The data types can be selected using the `type`
#' input:
#'
#' - `"default"`: data is made up of a linear segment of known length and slope,
#' with a non-linear segment, generated by a sine or cosine function depending
#' on whether the slope is positive or negative, appended to the beginning of
#' the data. The shape of the dataset is designed to mimic many similar data
#' whereby the initial sections of the data are often non-linear. Here the slope
#' is randomly generated using `rnorm(1, 0, 0.025)`, the length of the inital
#' segment randomly generated using `floor(abs(rnorm(1, .25*len, .05*len)))`
#' where `len` is the total number of observations in the data, and the
#' amplitude of the segment also randomly generated using `rnorm(1, .8, .05)`.
#'
#' - `"corrupted"`: same as `"default"`, but "corrupted" data is inserted
#' randomly at any point in the linear segment. The data corruption is chosen as
#' a sudden dip in the reading, which recovers. This event mimics equipment
#' interference that sometimes happens, but does not necessarily invalidate the
#' dataset if the corrupted section is omitted from analysis, The dip is
#' generated by a cosine function of fixed amplitude of 1, and the length is
#' randomly generated.
#'
#' - `"segmented"`: same as `"default"`, but the data is modified to contain two
#' linear segments. The slope of the second linear segment is randomly picked at
#' between 0.5 and 0.6 of the first linear segment. Its length is also randomly
#' generated but always smaller than the first segment.
#'
#' Normally-distributed noise is added to the dataset to add variation, which
#' can be modified using the `"sd"` input.
#'
#' @param len numeric. Defaults at 300. Number of observations in the dataset.
#' @param type character. What kind of data should the function generate?
#' Available for use: "default", "corrupted" and "segmented".
#' @param sd numeric. Defaults at 0.05. This is the amount of noise to add to
#' the system, randomly generated as a standard deviation based on the entire
#' data set.
#' @param preview logical. Defaults to TRUE. Plots the generated data as an xy.
#'
#' @return A list containing the dataframe, the slope and the length of the
#' linear section of the data, to be used for analysis in the function
#' `test_lin()`.
#'
#' @export
#' @keywords internal
#'
#' @seealso `test_lin()`
#'
#' @examples
#' # Generate data of length 200
#' sim_data(len = 200)
#'
#' # Generate data that contains a "corruption"
#' sim_data(type = "corrupted")
#'
#' # Generate noisy data
#' sim_data(type = "segmented", sd = .2)
#'
#' # Generate "perfect" non-noisy data
#' sim_data(sd = 0)
sim_data <- function(len = 300, type = "default", sd = .05, preview = TRUE) {
# snipR::refresh()
# len = 300
# type = "segmented"
# sd = .05
# preview = TRUE
## generate start curve segment at approx 25 percent of total length
n <- floor(abs(rnorm(1, .25*len, .05*len))) # generate length
ampli <- rnorm(1, .8, .05) # generate height
## dip or rise, based on whether slope is negative or positive
dip <- ampli * cos(seq(0, pi/2, length = n)) - 1 + ampli # generate dip
ris <- ampli * cos(seq(pi, pi/2, length = n)) - ampli # generate rise
ris <- ris - max(ris)
## generate corruption (random dip) at approx 25 percent of total length
cor <- cos(seq(0, 2 * pi, length = rnorm(1, .25 * len, .05 * len)))
## calculate remaining length
len_x <- len - n
## if type is not defualt, generate length for the second distort
if (any(type == "corrupted"))
len_z <- floor(rnorm(1, .25 * len_x, .02 * len_x))
if (any(type == "segmented"))
len_z <- floor(rnorm(1, .35 * len_x, .02 * len_x))
## adjust length of main linear segment, if needed
if (!any(type == "default")) len_x <- len_x - len_z
## ok, generate the main linear segment
x <- seq(1, len_x)
b_1 <- rnorm(1, 0, 0.020) # randomly pick the slope
y <- b_1 * x # generate y values based on x and slope
## generate second distort, if needed
if (any(type == "corrupted")) {
cor <- cos(seq(0, 2 * pi, length = len_z))
poke <- sample(1:len_x, 1)
y <- append(y, cor + y[poke] - 1, after = poke)
} else if (any(type == "segmented")) {
x_2 <- seq(1, len_z)
b_s <- b_1 * runif(1, 0.5, 0.6)
y_2 <- b_s * x_2
y <- c(y, y_2 + tail(y, 1))
}
## OK. Time to join up with the start curve
## append dip if negative slope, rise if positive
if (b_1 >= 0) {
joined <- c(ris, y)
} else joined <- c(dip, y + tail(dip, 1))
noise <- rnorm(length(joined), 0, sd)
dat <- joined + noise + 9 # ADD NOISE, raise data
df <- data.frame(
x = seq.int(0, length(dat) - 1, 1),
y = dat
) # convert to data frame
if (preview) {
plot(df, xlab = "", ylab = "", cex = .75, mgp = c(0,0,0), tck = .01,
pch = pch_def, panel.first = grid())
}
## Grab indices for benchmark tests
if (type == "default") {
linseg <- max(seq(1,n)) + seq(1, len_x)
} else if (type == "corrupted") {
seg2 <- seq(n+1, poke+n) # linear segment before corruption
seg3 <- seq(max(seg2)+1+len_z, len) # linear segment after corruption
if (length(seg2) > length(seg3)) {
linseg <- seg2
}else linseg <- seg3
} else if (type == "segmented") {
linseg <- seq(n+1, len_x+n)
}
## let's grab and measure the slope of the linear line after noise
lindf <- df[linseg, ]
coefi <- lm(y~x, lindf)$coefficients[[2]]
out <- list(
df = df,
coef = coefi,
len_main = len_x,
seg_index = linseg
)
return(invisible(out))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.