R/perturbations.R

Defines functions .sim.convert_baseline .sim.convert_range .sim.adjusted_time sim.baseline sim.sharp_push sim.smooth_push sim.smooth_flex sim.sharp_flex sim.cut sim.sigmoid sim.exponential sim.contract sim.prune sim.round sim.stretch sim.concatenate

Documented in sim.baseline sim.concatenate sim.contract sim.cut sim.exponential sim.prune sim.sharp_flex sim.sharp_push sim.sigmoid sim.smooth_flex sim.smooth_push sim.stretch

.sim.convert_baseline <- function(baseline) {
  if (class(baseline) != "list") {
    df <- baseline
  } else {
    if (is.null(baseline$df)) {
      stop("Invalid baseline")
    }
    df <- baseline$df
  }
  return(df)
}

.sim.convert_range <- function(range,
                               time_vector,
                               range_by_time = TRUE) {
  range1 <- head(range, 1)
  range2 <- tail(range, 1)

  if (range2 == Inf) {
    if (range_by_time) {
      range2 <- max(time_vector)
    } else {
      range2 <- length(time_vector)
    }
  }

  if (is.null(range)) {
    if (range_by_time) {
      range <- 1:max(time_vector)
    } else {
      range <- 1:length(time_vector)
    }
  }

  if (range_by_time) {
    range <- findInterval(c(range1, range2), vec = time_vector)
    range <- range[1]:range[2]
  } else {
    range <- range1:range2
  }

  return(range)
}

.sim.adjusted_time <- function(flow_rate) {
  fr <- .sim.convert_baseline(flow_rate)

  from <- c(1, fr$time[!is.na(fr$value)])
  to <- c(sapply(2:length(from), function(i) from[i] - 1), tail(na.omit(fr$time), 1))
  intervals <- data.frame(from = from, to = to)
  counts <- unlist(sapply(1:nrow(intervals), function(i) {
    i1 <- intervals$from[i]
    i2 <- intervals$to[i]
    return(round(sum(fr$value[i1:i2], na.rm = TRUE)))
  }))
  intervals <- cbind(intervals, unlist(counts)); colnames(intervals)[3] <- "count"
  intervals <- intervals[-1, ]

  adjusted_time <- unlist(sapply(1:nrow(intervals), function(i) {
    i1 <- intervals$from[i]
    i2 <- intervals$to[i]
    i3 <- intervals$count[i]
    return(rep(i1, i3))
  }))

  return(adjusted_time)
}

#' Create a baseline signal
#'
#' Create a baseline flow rate or fluorescence signal.
#'
#' @param N total number of time points over which events are distributed.
#' @param mu mean of distribution from which values are sampled (independently per each time point).
#' @param sigma standard deviation (see \code{mu}).
#' @param transform type of transformation applied to values per each time tick. Can be \code{log}, \code{asinh}. Defaults to \code{NULL}.
#' @param transform_par parameter for non-null \code{transform}. For \code{log}, this is the base. For \code{asinh}, this is \code{b} in \code{y := asinh(y/b)} where \code{y} is the vector of original values.
#' @param distort time-dependent distortion of the baseline values. Can be \code{sin}, \code{slope}. Defaults to \code{NULL}.
#' @param distort_par parameter for non-null \code{distort}. For \code{sin}, this is a vector of (1) amplitude, (2) period. For \code{slope}, this is \code{a} in \code{offset := a*x + mu}, where \code{x} is a vector of time points and \code{offset} is added to the original values.
#' @param output specifies type of output. If 1, the function produces a list containing a \code{df} slot with data frame of \code{time} and \code{values} and a \code{params} slot containing parameters of the signal: \code{mu}, \code{sigma}, \code{D} (equal to \code{"gauss"}, since baseline
#'  values are sampled from a Gaussian distribution), \code{transform}, \code{transform_par}, \code{distort}, \code{distort_par} and \code{rate_adjusted} (Boolean, indicated if parameter \code{flow_rate_baseline} was used). If 2, only the data frame (\code{df}) is returned. If 3, only the
#'  \code{value} column (without \code{time}) of the data frame is returned as numeric vector.
#' @param flow_rate_baseline for simulating fluorescence signal congruent with flow rate signal, as generated by \code{sim.baseline} (with possible perturbations). When \code{flow_rate_signal} is non-null, this overrides \code{N} and events are distributed in the way specified by the flow rate.
#' @param verbose Boolean, indicates whether the total number of events (\code{N}) should be printed when \code{flow_rate_baseline} is non-null.
#'
#' @return See parameter \code{output}
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.baseline <- function(N,
                         mu,
                         sigma,
                         transform = NULL,
                         transform_par = 2,
                         distort = NULL,
                         distort_par = 100,
                         output = c(1, 2, 3),
                         flow_rate_baseline = NULL,
                         verbose = FALSE) {

  if (!is.null(flow_rate_baseline)) {
    fr <- .sim.convert_baseline(flow_rate_baseline)

    adj <- .sim.adjusted_time(fr)
    N <- length(adj)
    if (verbose) {
      cat("N set to", N, "\n")
    }
  }

  if (!is.null(distort)) {
    if (distort[1] == "sin") {
      if (!is.null(distort_par)) {
        a <- distort_par[1]
        b <- 2*pi/distort_par[2]
        c <- mu
        y <- a*sin(b*seq_len(N)) + c
        offset <- rnorm(n = N, mean = 0, sd = sigma)
        y <- y + offset
      } else {
        stop("Invalid distort_par")
      }
    } else if (distort[1] == "slope") {
      if (!is.null(distort_par)) {
        a <- distort_par[1]
        b <- mu
        y <- a*seq_len(N) + b
        offset <- rnorm(n = N, mean = 0, sd = sigma)
        y <- y + offset
      } else {
        stop("Invalid distort_par")
      }
    }
  } else {
    y <- rnorm(n = N, mean = mu, sd = sigma)
  }

  if (!is.null(transform)) {
    if (transform == "log") {
      y <- log(y, base = transform_par)
    } else if (transform == "asinh") {
      y <- asinh(y/transform_par)
    } else {
      warning("Invalid transform, skipped")
    }
  }

  if (!is.null(flow_rate_baseline)) {
    x <- adj
  } else {
    x <- seq_len(N)
  }

  if (output[1] == 1) {
    return(
      list(df = data.frame(time = x,
                           value = y),
           params = list(mu = mu,
                         sigma = sigma,
                         D = "gauss",
                         transform = ifelse(is.null(transform), "null", transform),
                         transform_par = ifelse(is.null(transform_par), "null", transform_par)),
           rate_adjusted = !is.null(flow_rate_baseline),
           distort = ifelse(is.null(distort), "null", distort),
           distort_par = ifelse(is.null(distort_par), "null", distort_par))
    )
  } else if (output[1] == 2) {
    return(
      data.frame(time = x,
                 value = y)
    )
  } else if (output[1] == 3) {
    return(y)
  } else {
    stop("Invalid output parameter")
  }
}

#' Signal perturbation: sharp push
#'
#' Distort a baseline signal by offsetting a given range of values uniformly.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param range indices of affected values. Either a start and end index or the whole range. If length of \code{range} is 2 and second value is infinity, the second value is converted to highest possible value. If \code{range} is \code{NULL}, maximum range (1 to max) is taken.
#' @param range_by_time Boolean, indicates whether values in \code{range} correspond to value of \code{time} of signal, rather than row indices. This is recommended, since time values for fluorescence signals with non-uniform flow rate are also not uniform.
#' @param strength linear term in offsetting selected values.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.sharp_push <- function(baseline,
                           range = NULL,
                           range_by_time = TRUE,
                           strength) {
  df <- .sim.convert_baseline(baseline)

  range <- .sim.convert_range(range = range, time_vector = df$time, range_by_time = range_by_time)

  df[range, "value"] <- df[range, "value"] + strength
  baseline$df <- df

  return(baseline)
}

#' Signal perturbation: smooth push
#'
#' Distort a baseline signal by offsetting a given range of values by adding values generated by a probability density function.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param range indices of affected values. Either a start and end index or the whole range. If length of \code{range} is 2 and second value is infinity, the second value is converted to highest possible value. If \code{range} is \code{NULL}, maximum range (1 to max) is taken.
#' @param range_by_time Boolean, indicates whether values in \code{range} correspond to value of \code{time} of signal, rather than row indices. This is recommended, since time values for fluorescence signals with non-uniform flow rate are also not uniform.
#' @param strength maximum value of cell offset.
#' @param spikiness measure of how needle-shaped (vs. bell-shaped) the perturbation should be: values between 0 and 1.
#' @param D distribution from which to sample. Can be \code{"gauss"} (Gauss), \code{"chisq"} (chi-squared), \code{"-chisq"} (\code{"chisq"} flipped along the time axis).
#' @param D_par parameter for probability density function of given distribution. For \code{"chisq"} and \code{"-chisq"}, this is a vector of (1) degrees of freedom and (2) non-centrality parameter. (See function \code{dchisq}.)
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.smooth_push <- function(baseline,
                            range,
                            range_by_time = TRUE,
                            strength,
                            spikiness,
                            D = c("gauss", "chisq", "-chisq"),
                            D_par = c(3, 3)) {
  df <- .sim.convert_baseline(baseline)

  range <- .sim.convert_range(range = range, time_vector = df$time, range_by_time = range_by_time)
  range1 <- head(range, 1)
  range2 <- tail(range, 1)

  if (D[1] == "gauss") {
    mu <- (range2 + range1)/2
    sigma <- (1 - spikiness)/2 * mu/2
    offset <- dnorm(x = range, mean = mu, sd = sigma)
    offset <- offset / max(offset) * strength
  } else if (D[1] %in% c("chisq", "-chisq")) {
    if (length(D_par) < 2) {
      stop("Invalid D_par for chisq distribution")
    }
    r <- seq(0, 35, length.out = length(range))
    offset <- dchisq(x = r, df = D_par[1], ncp = D_par[2])
    offset <- offset / max(offset) * strength
    if (D[1] == "-chisq") {
      offset <- rev(offset)
    }
  } else {
    stop("Invalid D")
  }

  df[range, "value"] <- df[range, "value"] + offset

  if (class(baseline) == "list") {
    baseline$df <- df
  } else {
    baseline <- df
  }

  return(baseline)
}

#' Signal perturbation: smooth flex
#'
#' Distort a baseline signal by stretching or contracting the signal values (independently of time), effectively changing their variance, by factors generated by a probability density function.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param range indices of affected values. Either a start and end index or the whole range. If length of \code{range} is 2 and second value is infinity, the second value is converted to highest possible value. If \code{range} is \code{NULL}, maximum range (1 to max) is taken.
#' @param range_by_time Boolean, indicates whether values in \code{range} correspond to value of \code{time} of signal, rather than row indices. This is recommended, since time values for fluorescence signals with non-uniform flow rate are also not uniform.
#' @param strength maximum factor by which to stretch the selected values.
#' @param spikiness measure of how needle-shaped (vs. bell-shaped) the perturbation should be: values between 0 and 1.
#' @param D distribution from which to sample. Can be \code{"gauss"} (Gauss).
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.smooth_flex <- function(baseline,
                            range,
                            range_by_time = TRUE,
                            strength,
                            spikiness,
                            D = c("gauss"))
{
  df <- .sim.convert_baseline(baseline)

  range <- .sim.convert_range(range = range, time_vector = df$time, range_by_time = range_by_time)

  if (D[1] == "gauss") {
    range1 <- head(range, 1)
    range2 <- tail(range, 1)

    x <- range
    mu <- (range1 + range2)/2
    sigma <- (1 - spikiness)/2 * mu/2
    offset <- dnorm(x = x, mean = mu, sd = sigma)
    offset <- offset - min(offset)
    offset <- offset / max(offset) * strength + 1

  } else {
    stop("Invalid D")
  }

  v <- df[range, "value"]

  if (class(baseline) == "list") {
    avg <- baseline$params$mu
  } else {
    avg <- mean(df[max(range[1] - 5, 1):min(range[2] + 5, nrow(df)), "value"])
  }

  v <- v - avg
  v <- v*offset
  v <- v + avg
  df[range, "value"] <- v

  if (class(baseline) == "list") {
    baseline$df <- df
  } else {
    baseline <- df
  }

  return(baseline)
}

#' Signal perturbation: sharp flex
#'
#' Distort a baseline signal by stretching or contracting the signal values (independently of time), effectively changing their variance, uniformly.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param range indices of affected values. Either a start and end index or the whole range. If length of \code{range} is 2 and second value is infinity, the second value is converted to highest possible value. If \code{range} is \code{NULL}, maximum range (1 to max) is taken.
#' @param range_by_time Boolean, indicates whether values in \code{range} correspond to value of \code{time} of signal, rather than row indices. This is recommended, since time values for fluorescence signals with non-uniform flow rate are also not uniform.
#' @param strength factor by which to stretch the selected values.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.sharp_flex <- function(baseline,
                           range,
                           range_by_time = TRUE,
                           strength)
{
  df <- .sim.convert_baseline(baseline)

  range <- .sim.convert_range(range = range, time_vector = df$time, range_by_time = range_by_time)

  offset <- strength

  v <- df[range, "value"]

  if (class(baseline) == "list") {
    avg <- baseline$params$mu
  } else {
    avg <- mean(df[max(range[1] - 5, 1):min(range[2] + 5, nrow(df)), "value"])
  }

  v <- v - avg
  v <- v*offset
  v <- v + avg
  df[range, "value"] <- v

  if (class(baseline) == "list") {
    baseline$df <- df
  } else {
    baseline <- df
  }

  return(baseline)
}

#' Cut signal along time axis
#'
#' Cut out selected values of a simulated signal.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param range indices of affected values. Either a start and end index or the whole range. If length of \code{range} is 2 and second value is infinity, the second value is converted to highest possible value. If \code{range} is \code{NULL}, maximum range (1 to max) is taken.
#' @param range_by_time Boolean, indicates whether values in \code{range} correspond to value of \code{time} of signal, rather than row indices. This is recommended, since time values for fluorescence signals with non-uniform flow rate are also not uniform.
#' @param reindex Boolean, indicates whether \code{time} values should be re-indexed to 1..N after cutting.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.cut <- function(baseline,
                    range,
                    range_by_time = TRUE,
                    reindex = FALSE) {
  df <- .sim.convert_baseline(baseline)

  range <- .sim.convert_range(range = range, time_vector = df$time, range_by_time = range_by_time)

  df <- df[-range, ]
  if (reindex) {
    df$time <- df$time - min(df$time)
  }

  if (class(baseline) == "list") {
    baseline$df <- df
  } else {
    baseline <- df
  }

  return(baseline)
}

#' Signal perturbation: sigmoid curve
#'
#' Distort a baseline signal by permanently shifting part of the baseline by a specified amount, inserting a sigmoid curve as transition to the new part of the baseline.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param range indices of affected values. Either a start and end index or the whole range. If length of \code{range} is 2 and second value is infinity, the second value is converted to highest possible value. If \code{range} is \code{NULL}, maximum range (1 to max) is taken.
#' @param range_by_time Boolean, indicates whether values in \code{range} correspond to value of \code{time} of signal, rather than row indices. This is recommended, since time values for fluorescence signals with non-uniform flow rate are also not uniform.
#' @param shift offset of the newly elevated/decreased level of the baseline.
#' @param right_side Boolean, increases whether the shift should occur over increasing time (versus decreasing).
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.sigmoid <- function(baseline,
                        range,
                        range_by_time = TRUE,
                        shift,
                        right_side = TRUE) {
  df <- .sim.convert_baseline(baseline)

  N <- nrow(df)

  range <- .sim.convert_range(range = range, time_vector = df$time, range_by_time = range_by_time)

  sequence <- seq(-7, 7, length.out = length(range))

  if (right_side) {
    offset <- 1/(1 + exp(-sequence))
  } else {
    offset <- 1/(1 + exp(sequence))
  }

  range1 <- head(range, 1)
  range2 <- tail(range, 1)

  if (right_side) {
    range <- range1:N
    offset <- c(offset, rep(1, length(range2:N) - 1))
  } else {
    range <- 1:range2
    offset <- c(rep(1, length(1:range1) - 1), offset)
  }

  offset <- offset * shift

  v <- df[range, "value"]
  v <- v + offset
  df[range, "value"] <- v

  if (class(baseline) == "list") {
    baseline$df <- df
  } else {
    baseline <- df
  }

  return(baseline)
}

#' Signal perturbation: exponential
#'
#' Distort a baseline signal by creating an exponentially increasing/decreasing (quasi-)blow-up in the signal values.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param range indices of affected values. Either a start and end index or the whole range. If length of \code{range} is 2 and second value is infinity, the second value is converted to highest possible value. If \code{range} is \code{NULL}, maximum range (1 to max) is taken.
#' @param range_by_time Boolean, indicates whether values in \code{range} correspond to value of \code{time} of signal, rather than row indices. This is recommended, since time values for fluorescence signals with non-uniform flow rate are also not uniform.
#' @param strength measure of maximum offset versus baseline.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.exponential <- function(baseline,
                            range,
                            range_by_time = TRUE,
                            strength) {
  df <- .sim.convert_baseline(baseline)

  N <- nrow(df)

  range <- .sim.convert_range(range = range, time_vector = df$time, range_by_time = range_by_time)

  range1 <- head(range, 1)
  range2 <- tail(range, 1)

  if (class(baseline) == "list") {
    avg <- baseline$params$mu
  } else {
    avg <- mean(df[max(range1 - 5, 1):min(range2 + 5, nrow(df)), "value"])
  }

  ceiling <- avg * strength
  exp_max <- log(ceiling)

  sequence <- seq(0, exp_max, length.out = length(range))

  offset <- exp(sequence)

  v <- df[range, "value"]
  v <- v + offset
  df[range, "value"] <- v

  if (class(baseline) == "list") {
    baseline$df <- df
  } else {
    baseline <- df
  }

  return(baseline)
}

#' Signal manipulation: contract
#'
#' Contracts signal data. Given a baseline signal, this function separates signal values into segments of size \code{N}, with respect to time. Then, from each segment, \code{i} values from (1..N) are removed at random
#' (unless \code{j} is not \code{NULL}, in which case always remove the \code{j}-th values in the given segment and disregard \code{i}).
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param i see description above. Defaults to 1.
#' @param j see description above. Defaults to \code{NULL}.
#' @param N see description above. Defaults to \code{N}.
#' @param reindex Boolean, indicates whether \code{time} values should be re-indexed to 1..N after contraction.
#' @param pruning_only Boolean, indicates whether the function should only use the specified parameters to remove events, but keep the time scale fixed. This is equivalent to using function \code{sim.prune}.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.contract <- function(baseline,
                         i = 1,
                         j = NULL,
                         N = 2,
                         reindex = FALSE,
                         pruning_only = FALSE) {

  data <- .sim.convert_baseline(baseline)

  n_segs <- length(data$time)/N
  segments.vec <- rep(1:n_segs, each = N)
  if (length(segments.vec) != length(data$time)) {
    segments.vec <- c(1, segments.vec)
  }
  segments.u <- sort(unique(segments.vec))

  to_remove <- lapply(segments.u, function(s) {
    idcs <- which(segments.vec == s)
    if (!is.null(j)) {
      return(idcs[j])
    } else {
      return(sample(idcs, size = i, replace = FALSE))
    }
  })
  to_remove <- unlist(to_remove)

  if (pruning_only) {
    data[to_remove, ] <- NA
  } else {
    data <- data[-to_remove, ]
  }

  if (reindex) {
    data$time <- seq_len(nrow(data))
  }

  if (class(baseline) == "list") {
    baseline$df <- data
  } else {
    baseline <- data
  }

  return(baseline)
}

#' Signal manipulation: prune
#'
#' Prunes signal data. Given a baseline signal, this function separates signal values into segments of size \code{N}, with respect to time. Then, from each segment, \code{i} values (1..N) are removed at random
#' (unless \code{j} is not \code{NULL}, in which case always remove the \code{j}-th values in the given segment and disregard \code{i}). Crucially, the removed values are only set to \code{NA}, time scale stays in tact.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param i see description above. Defaults to 1.
#' @param j see description above. Defaults to \code{NULL}.
#' @param N see description above. Defaults to \code{N}.
#' @param reindex Boolean, indicates whether \code{time} values should be re-indexed to 1..N after contraction.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.prune <- function(baseline,
                      i = 1,
                      j = NULL,
                      N = 2,
                      reindex = FALSE) {

  sim.contract(baseline, i = i, j = j, N = N, reindex = reindex, pruning_only = TRUE)
}

sim.round <- function(baseline) {
  if (class(baseline) == "list") {
    baseline$df$value <- round(baseline$df$value)
  } else {
    baseline$value <- round(baseline$value)
  }
  return(baseline)
}

#' Signal manipulation: stretch
#'
#' Stretches out signal data with respect to time. Signal values are interlaced such that 0 or more \code{NA}s are inserted after each signal value and additional time points are generated accordingly.
#'
#' @param baseline baseline, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param n number of time points which should be inserted after each original time point. This can either be a single integer, or a vector of integers from which values are drawn randomly.
#' @param prob vector of probabilities. If \code{n} is a vector: if \code{prob} is \code{NULL}, probability distribution over \code{n} is uniform, otherwise if \code{prob} is a vector of values, these are the corresponding probabilities for drawing each element.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.stretch <- function(baseline,
                        n = 1,
                        prob = NULL) {
  data <- .sim.convert_baseline(baseline)

  if (is.vector(n)) {
    interlace <- sample(x = n, size = nrow(data), replace = TRUE, prob = prob)
  } else {
    interlace <- rep(n, nrow(data))
  }

  vals <- as.list(data$value)
  vals <- lapply(1:length(interlace), function(i) {
    this_n <- interlace[[i]]
    this_val <- vals[[i]]
    if (this_n > 0) {
      return(insert(this_val, 2, rep(NA, this_n)))
    } else {
      return(this_val)
    }
  })
  vals <- unlist(vals)

  data <- data.frame(cbind(time = seq_len(length(vals)),
                           value = vals))

  if (class(baseline) == "list") {
    baseline$df <- data
  } else {
    baseline <- data
  }

  return(baseline)
}

#' Signal concatenation
#'
#' Concatenates multiple fluorescence signals (for instance, negative and positive population signals) with the same time scale.
#'
#' @param ... baselines, as generated by function \code{sim.baseline}, possibly perturbed by other functions.
#' @param reindex Boolean, indicates whether \code{time} values should be re-indexed to 1..N after contraction. Do not use this if the baseline signals have been generated based on a specific flow rate.
#' @param contract_time Boolean, indicates whether to contract time values. Since length of measurement gets multiplied by number of signals due to concatenation, contracting time will downsample to preserve the
#'  original length of measurement.
#'  @param output see parameter \code{output} of function \code{sim.baseline}.
#'
#' @return Modified \code{baseline} input.
#'
#' @seealso See functions beginning with \code{sim.sample.} for examples of usage.
#'
#' @export
sim.concatenate <- function(...,
                            reindex = FALSE,
                            contract_time = TRUE,
                            flow_rate = NULL,
                            output = c(1, 2, 3)) {
  baselines <- list(...)
  B <- lapply(baselines, function(b) {
    if (class(b) == "list") {
      b <- b$df
    } else if (class(b) != "data.frame") {
      stop("Invalid data")
    }
    return(b)
  })

  data <- do.call(rbind, B)

  time_order <- order(data$time)
  data <- data[time_order, ]

  if (reindex) {
    data$time <- seq_len(nrow(data))
  }

  if (contract_time) {
    data <- sim.contract(baseline = data, i = 1, j = NULL, N = length(baselines), reindex = reindex, pruning_only = FALSE)
  }


  if (output[1] == 1) {
    if (!any(unlist(lapply(baselines, function(b) {
      return(is.null(b$params))
    })))) {
      return(
        list(df = data,
             params = list(mu = unlist(lapply(baselines, function(b) {b$params$mu})),
                           sigma = unlist(lapply(baselines, function(b) {b$params$sigma})),
                           D = unlist(lapply(baselines, function(b) {b$params$D})),
                           transform = unlist(lapply(baselines, function(b) {
                             tt <- b$params$transform
                             if (is.null(tt)) {
                               return ("null")
                             } else {
                               return(tt)
                             }
                           })),
                           transform_par = unlist(lapply(baselines, function(b) {
                             tt <- b$params$transform_par
                             if (is.null(tt)) {
                               return ("null")
                             } else {
                               return(tt)
                             }
                           })),
                           rate_adjusted = unlist(lapply(baselines, function(b) {
                             b$params$rate_adjusted
                           })),
                           distort = unlist(lapply(baselines, function(b) {
                             tt <- b$params$distort
                             if (is.null(tt)) {
                               return ("null")
                             } else {
                               return(tt)
                             }
                           })),
                           distort_par = unlist(lapply(baselines, function(b) {
                             tt <- b$params$distort_par
                             if (is.null(tt)) {
                               return ("null")
                             } else {
                               return(tt)
                             }
                           }))))
      )
    } else {
      stop("Cannot generate output type 1, since some input baselines are not of type 1")
    }
  } else if (output[1] == 2) {
    return(data)
  } else if (output[1] == 3) {
    return(data$value)
  } else {
    stop("Invalid output parameter")
  }
}
davnovak/qctoy documentation built on Nov. 4, 2019, 9:45 a.m.