Nothing
#' Perform a smooth spline fit on growth data
#'
#' \code{growth.gcFitSpline} performs a smooth spline fit on the dataset and determines
#' the highest growth rate as the global maximum in the first derivative of the spline.
#'
#' @param time Vector of the independent variable (usually time).
#' @param data Vector of dependent variable (usually: growth values).
#' @param gcID (Character) The name of the analyzed sample.
#' @param control A \code{grofit.control} object created with \code{\link{growth.control}},
#' defining relevant fitting options.
#' @param biphasic (Logical) Shall \code{growth.gcFitSpline} try to extract growth
#' parameters for two different growth phases (as observed with, e.g., diauxic shifts)
#' (\code{TRUE}) or not (\code{FALSE})?
#'
#' @details If \code{biphasic = TRUE}, the following steps are performed to define a
#' second growth phase: \enumerate{ \item Determine local minima within the first
#' derivative of the smooth spline fit. \item Remove the 'peak' containing the highest
#' value of the first derivative (i.e., \eqn{mu_{max}}) that is flanked by two local
#' minima. \item Repeat the smooth spline fit and identification of maximum slope for
#' later time values than the local minimum after \eqn{mu_{max}}. \item Repeat the
#' smooth spline fit and identification of maximum slope for earlier time values than
#' the local minimum before \eqn{mu_{max}}. \item Choose the greater of the two
#' independently determined slopes as \eqn{mu_{max}2}. }
#'
#' @return A \code{gcFitSpline} object. The lag time is estimated as the intersection between the
#' tangent at the maximum slope and the horizontal line with \eqn{y = y_0}, where
#' \code{y0} is the first value of the dependent variable. Use \code{\link{plot.gcFitSpline}} to
#' visualize the spline fit and derivative over time.
#' \item{time.in}{Raw time values provided to the function as \code{time}.}
#' \item{data.in}{Raw growth data provided to the function as \code{data}.}
#' \item{raw.time}{Filtered time values used for the spline fit.}
#' \item{raw.data}{Filtered growth values used for the spline fit.}
#' \item{gcID}{(Character) Identifies the tested sample.}
#' \item{fit.time}{Fitted time values.}
#' \item{fit.data}{Fitted growth values.}
#' \item{parameters}{List of determined growth parameters.}
#' \itemize{
#' \item \code{A}: Maximum growth.
#' \item \code{dY}: Difference in maximum growth and minimum growth.
#' \item \code{mu}: Maximum growth rate (i.e., maximum in first derivative of the spline).
#' \item \code{tD}: Doubling time.
#' \item \code{t.max}: Time at the maximum growth rate.
#' \item \code{lambda}: Lag time.
#' \item \code{b.tangent}: Intersection of the tangent at the maximum growth rate with the abscissa.
#' \item \code{mu2}: For biphasic growth: Growth rate of the second growth phase.
#' \item \code{tD2}: Doubling time of the second growth phase.
#' \item \code{lambda2}: For biphasic growth: Lag time determined for the second growth phase.
#' \item \code{t.max2}: For biphasic growth: Time at the maximum growth rate of the second growth phase.
#' \item \code{b.tangent2}: For biphasic growth: Intersection of the tangent at the maximum growth rate of the second growth phase with the abscissa.
#' \item \code{integral}: Area under the curve of the spline fit.
#' }
#' \item{spline}{\code{smooth.spline} object generated by the \code{\link{smooth.spline}} function.}
#' \item{spline.deriv1}{list of time ('x') and growth ('y') values describing the first derivative of the spline fit.}
#' \item{reliable}{(Logical) Indicates whether the performed fit is reliable (to be set manually).}
#' \item{fitFlag}{(Logical) Indicates whether a spline fit was successfully performed on the data.}
#' \item{fitFlag2}{(Logical) Indicates whether a second growth phase was identified.}
#' \item{control}{Object of class \code{grofit.control} containing list of options passed to the function as \code{control}.}
#'
#' @family growth fitting functions
#'
#' @references Matthias Kahm, Guido Hasenbrink, Hella Lichtenberg-Frate, Jost Ludwig, Maik Kschischo (2010). _grofit: Fitting Biological Growth Curves with R_. Journal of Statistical Software, 33(7), 1-21. DOI: 10.18637/jss.v033.i07
#'
#' @export
#'
#' @examples
#' # Create random growth dataset
#' rnd.dataset <- rdm.data(d = 35, mu = 0.8, A = 5, label = 'Test1')
#'
#' # Extract time and growth data for single sample
#' time <- rnd.dataset$time[1,]
#' data <- rnd.dataset$data[1,-(1:3)] # Remove identifier columns
#'
#' # Perform spline fit
#' TestFit <- growth.gcFitSpline(time, data, gcID = 'TestFit',
#' control = growth.control(fit.opt = 's'))
#'
#' plot(TestFit)
#'
#'
growth.gcFitSpline <- function(
time, data, gcID = "undefined", control = growth.control(biphasic = FALSE)
)
{
if (!is.null(control$t0) &&
!is.na(control$t0) &&
control$t0 != "")
{
t0 <- as.numeric(control$t0)
} else
{
t0 <- 0
}
tmax <- control$tmax
max.growth <- control$max.growth
if (methods::is(control) !=
"grofit.control")
stop("control must be of class grofit.control!")
if (!any(control$fit.opt %in% c("s", "a")))
stop(
"Fit option is not set for a spline fit. See growth.control()"
)
if (length(data[data < 0]) >
0)
{
data <- data + abs(min(data[data < 0])) +
0.01 # add the absolute value of the minimum negative growth (+ 0.01) to the data
}
time.in <- time <- as.vector(as.numeric(as.matrix(time)))[!is.na(time)][!is.na(data)]
data.in <- data <- as.vector(as.numeric(as.matrix(data)))[!is.na(time)][!is.na(data)]
if (length(time) !=
length(data))
stop("gcFitSpline: length of input vectors differ!")
bad.values <- (is.na(time)) |
(is.na(data)) |
(!is.numeric(time)) |
(!is.numeric(data))
if (TRUE %in% bad.values)
{
if (control$neg.nan.act == FALSE)
{
time <- time[!bad.values]
data <- data[!bad.values]
} else
{
stop("Bad values in gcFitSpline")
}
}
if (control$log.y.spline == TRUE)
{
bad.values <- (data <= 0)
if (TRUE %in% bad.values)
{
if (control$neg.nan.act == FALSE)
{
time <- time[!bad.values]
data <- data[!bad.values]
} else
{
stop("Bad values in gcFitSpline")
}
}
}
if (max(data) <
control$growth.thresh * data[1])
{
if (control$suppress.messages == FALSE)
message(
paste0(
"gcFitSpline: No significant growth detected (with all values below ",
control$growth.thresh, " * start_value)."
)
)
gcFitSpline <- list(
time.in = time.in, data.in = data.in, raw.time = time,
raw.data = data, fit.time = rep(NA, length(time.in)),
fit.data = rep(NA, length(data.in)),
parameters = list(
A = 0, dY = 0, mu = 0, t.max = NA,
lambda = NA, b.tangent = NA, mu2 = NA,
t.max2 = NA, lambda2 = NA, b.tangent2 = NA,
integral = NA
),
spline = NA, spline.deriv1 = NA, reliable = NULL,
fitFlag = FALSE, fitFlag2 = FALSE, control = control
)
class(gcFitSpline) <- "gcFitSpline"
return(gcFitSpline)
}
if (length(data) <
5)
{
warning(
"gcFitSpline: There is not enough valid data. Must have at least 5!"
)
gcFitSpline <- list(
time.in = time.in, data.in = data.in, raw.time = time,
raw.data = data, gcID = gcID, fit.time = NA,
fit.data = NA, parameters = list(
A = NA, dY = NA, mu = NA, t.max = NA,
lambda = NA, b.tangent = NA, mu2 = NA,
t.max2 = NA, lambda2 = NA, b.tangent2 = NA,
integral = NA
),
spline = NA, spline.deriv1 = NA, reliable = NULL,
fitFlag = TRUE, fitFlag2 = FALSE, control = control
)
class(gcFitSpline) <- "gcFitSpline"
return(gcFitSpline)
} else
{
if (control$log.x.gc == TRUE)
{
bad.values <- (time <= 0)
if (TRUE %in% bad.values)
{
if (control$neg.nan.act == FALSE)
{
time <- time[!bad.values]
data <- data[!bad.values]
} else
{
stop("Bad values in gcFitSpline")
}
}
time <- log(1 + time)
}
if (control$log.y.spline == TRUE)
{
data.log <- log(data/data[1])
}
time.raw <- time
data.raw <- if (control$log.y.spline == TRUE)
{
data.log
} else
{
data
}
# Implement min.growth into dataset
if (!is.null(control$min.growth))
{
if (!is.na(control$min.growth) &&
control$min.growth != 0)
{
if (control$log.y.spline == TRUE)
{
# perfom log transformation on
# min.growth (Ln(y/y0))
min.growth <- log(control$min.growth/data[1])
time <- time[max(
which.min(abs(time - t0)),
which.min(abs(data.log - min.growth))
):length(time)]
data.log <- data.log[max(
which.min(abs(time.raw - t0)),
which.min(abs(data.log - min.growth))
):length(data.log)]
} else
{
min.growth <- control$min.growth
time <- time[max(
which.min(abs(time - t0)),
which.min(abs(data - min.growth))
):length(data)]
data <- data[max(
which.min(abs(time.raw - t0)),
which.min(abs(data - min.growth))
):length(data)]
}
}
}
# Implement max.growth into dataset
if (!is.null(control$max.growth))
{
if (!is.na(control$max.growth))
{
if (control$log.y.spline == TRUE)
{
# perfom log transformation on
# max.growth (Ln(y/y0))
max.growth <- log(control$max.growth/data[1])
time <- time[data.log <= max.growth]
data.log <- data.log[data.log <=
max.growth]
} else
{
max.growth <- control$max.growth
time <- time[data <= max.growth]
data <- data[data <= max.growth]
}
}
}
# Implement t0 into dataset
if (is.numeric(t0) &&
t0 > 0)
{
if (control$log.y.spline == TRUE)
{
data.log <- data.log[which.min(abs(time - t0)):length(data.log)]
} else
{
data <- data[which.min(abs(time.raw - t0)):length(data)]
}
time <- time[which.min(abs(time.raw - t0)):length(time)]
}
# Implement tmax into dataset
if (is.numeric(tmax) &&
tmax > t0)
{
if (control$log.y.spline == TRUE)
{
data.log <- data.log[time <= tmax]
} else
{
data <- data[time <= tmax]
}
time <- time[time <= tmax]
}
# Run spline fit
try(
spline <- smooth.spline(
time, y = if (control$log.y.spline ==
TRUE)
{
data.log
} else
{
data
}, spar = control$smooth.gc, cv = NA,
keep.data = FALSE
)
)
if (!exists("spline") ||
is.null(spline) ==
TRUE)
{
if (control$suppress.messages == FALSE)
message(
"gcFitSpline: Spline could not be fitted to data!"
)
gcFitSpline <- list(
time.in = time.in, data.in = data.in,
raw.time = time, raw.data = data,
fit.time = rep(NA, length(time.in)),
fit.data = rep(NA, length(data.in)),
parameters = list(
A = NA, dY = NA, mu = NA, t.max = NA,
lambda = NA, b.tangent = NA, mu2 = NA,
t.max2 = NA, lambda2 = NA, b.tangent2 = NA,
integral = NA
),
spline = NA, spline.deriv1 = NA,
reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(gcFitSpline) <- "gcFitSpline"
return(gcFitSpline)
} # if(!exists('spline') || is.null(spline) == TRUE)
else
{
# Perform first derivative and
# extract parameters
deriv1 <- stats::predict(spline, deriv = 1)
# consider only slopes at growth
# values greater than the initial
# value
deriv1.growth <- deriv1
deriv1.growth$x <- deriv1.growth$x[spline$y >
spline$y[1]]
deriv1.growth$y <- deriv1.growth$y[spline$y >
spline$y[1]]
if (length(deriv1.growth$y) <
3)
{
if (control$suppress.messages == FALSE)
message(
"gcFitSpline: No significant amount of growth values above the start value!"
)
gcFitSpline <- list(
time.in = time.in, data.in = data.in,
raw.time = time, raw.data = data,
fit.time = spline$x, fit.data = spline$y,
parameters = list(
A = NA, dY = NA, mu = NA, t.max = NA,
lambda = NA, b.tangent = NA, mu2 = NA,
t.max2 = NA, lambda2 = NA, b.tangent2 = NA,
integral = NA
),
spline = spline, spline.deriv1 = deriv1,
reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(gcFitSpline) <- "gcFitSpline"
return(gcFitSpline)
}
mumax.index <- which.max(deriv1.growth$y) # index of data point with maximum growth rate in first derivative fit
mumax.index.spl <- which(spline$x == deriv1.growth$x[mumax.index]) # index of data point with maximum growth rate in spline fit
t.max <- deriv1.growth$x[mumax.index] # time of maximum growth rate
mumax <- max(deriv1.growth$y) # maximum value of first derivative of spline fit (i.e., greatest slope in growth curve spline fit)
y.max <- spline$y[which(deriv1$y == deriv1.growth$y[mumax.index])] # cell growth at time of max growth rate
b.spl <- y.max - mumax * t.max # the y-intercept of the tangent at µmax
lambda.spl <- (spline$y[1] - b.spl)/mumax # lag time
integral <- low.integrate(spline$x, spline$y)
if (control$biphasic)
{
# determine number of data
# points in period until
# maximum growth
n.spl <- length(time[which.min(abs(time - t0)):which.max(data)])
# Find local minima that frame
# µmax and remove the 'peak'
# from deriv1
n <- round((log(n.spl + 4, base = 2.1))/0.75)/2
minima <- inflect(deriv1$y, threshold = n)$minima
# consider only minima with a
# slope value of <= 0.75 * µmax
minima <- minima[deriv1$y[minima] <=
0.75 * mumax]
minima <- c(1, minima)
for (i in 1:(length(minima) -
1))
{
if (any(
minima[i]:minima[i + 1] %in%
mumax.index.spl
))
{
min.ndx <- c(minima[i], minima[i + 1])
}
}
if (exists("min.ndx"))
{
deriv1.2 <- deriv1
deriv1.2$y[min.ndx[1]:min.ndx[2]] <- 0
# find second mumax
mumax2 <- max(deriv1.2$y) # maximum value of first derivative of spline fit (i.e., greatest slope in growth curve spline fit)
# accept second mumax only if
# it is at least 10% of the
# global mumax
if (mumax2 >= 0.1 * mumax)
{
mumax2.index <- which.max(deriv1.2$y) # index of data point with maximum growth rate in first derivative fit
mumax2.index.spl <- which(spline$x == deriv1.2$x[mumax2.index]) # index of data point with maximum growth rate in spline fit
t.max2 <- deriv1.2$x[mumax2.index] # time of maximum growth rate
y.max2 <- spline$y[mumax2.index.spl] # cell growth at time of max growth rate
b.spl2 <- y.max2 - mumax2 * t.max2 # the y-intercept of the tangent at µmax
lambda.spl2 <- (spline$y[1] -
b.spl2)/mumax2 # lag time
fitFlag2 <- TRUE
} else
{
mumax2.index <- NA
mumax2.index.spl <- NA
t.max2 <- NA
mumax2 <- NA
y.max2 <- NA
b.spl2 <- NA
lambda.spl2 <- NA
fitFlag2 <- FALSE
}
} else
{
mumax2.index <- NA
mumax2.index.spl <- NA
t.max2 <- NA
mumax2 <- NA
y.max2 <- NA
b.spl2 <- NA
lambda.spl2 <- NA
fitFlag2 <- FALSE
}
} # if(control$biphasic)
else
{
mumax2.index <- NA
mumax2.index.spl <- NA
t.max2 <- NA
mumax2 <- NA
y.max2 <- NA
b.spl2 <- NA
lambda.spl2 <- NA
fitFlag2 <- FALSE
}
# # extract local minima and maxima of deriv2 n <- round((log(n.spl+4,
# base=2.1))/0.75)/2 minima <- inflect(deriv2_spline$y, threshold = n)$minima
# maxima <- inflect(deriv2_spline$y, threshold = n)$maxima # Regard only
# (negative) minima and (positive) maxima with a deriv2 value of >= 5% mumax
# minima <- minima[deriv2_spline$y[minima] <= 0.05 * (-mumax)] maxima <-
# maxima[deriv2_spline$y[maxima] >= 0.05 * mumax] # Find roots with positive
# slope in deriv2 after µmax # Find adjacent min - max pairs minmax <-
# c(minima, maxima)[order(c(minima, maxima))] # combine minima and maxima in
# ascending order minmax.min <- as.list(as.numeric(minmax %in% minima)) # list
# for each element in minmax vector, value 0 if maximum, value 1 if minimum #
# get indices of minima in minmax that are followed by a maximum ndx.minmax <-
# c() for(i in 1:(length(minmax.min)-1)){ if(minmax.min[[i]]==1 &
# minmax.min[[i+1]]==0){ ndx.minmax <- c(ndx.minmax, i) } } # define pair
# candidates; store in list pairs.post <- list() for(i in
# 1:length(ndx.minmax)){ pairs.post[[i]] <- c(minmax[[ndx.minmax[[i]]]],
# minmax[[ndx.minmax[[i]]+1]]) } # extract candidate that is closest to µmax,
# and respective index of minimum and maximum ndx.minima_cand <- c()
# ndx.minima_cand <- unlist(lapply(1:length(pairs.post), function(x)
# c(ndx.minima_cand, pairs.post[[x]][1]))) min.ndx <-
# ndx.minima_cand[which.min((ndx.minima_cand-
# mumax.index.spl)[(ndx.minima_cand- mumax.index.spl)>0])] max.ndx <-
# pairs.post[[which.min((ndx.minima_cand- mumax.index.spl)[(ndx.minima_cand-
# mumax.index.spl)>0])]][2] # Linear interpolation between minimum and maximum
# to find root interpol <- approxfun(y = c(deriv2$x[min.ndx],
# deriv2$x[max.ndx]), x = c(deriv2$y[min.ndx], deriv2$y[max.ndx])) t.turn <-
# interpol(0) t.turn_post.ndx <- which.min(abs(deriv2$x-t.turn)) # The time
# index closest to the turning point after µmax # Find roots with negative
# slope in deriv2 before µmax # get indices of maxima in minmax that are
# followed by a minimum ndx.maxmin <- c() for(i in 1:(length(minmax.min)-1)){
# if(minmax.min[[i]]==0 & minmax.min[[i+1]]==1){ ndx.maxmin <- c(ndx.maxmin,
# i) } } # define pair candidates; store in list pairs.pre <- list() for(i in
# 1:length(ndx.maxmin)){ pairs.pre[[i]] <- c(minmax[[ndx.maxmin[[i]]]],
# minmax[[ndx.maxmin[[i]]+1]]) } # extract candidate that is closest to µmax,
# and respective index of minimum and maximum ndx.maxima_cand <- c()
# ndx.maxima_cand <- unlist(lapply(1:length(pairs.pre), function(x)
# c(ndx.maxima_cand, pairs.pre[[x]][1]))) min.ndx <-
# ndx.maxima_cand[which.min((ndx.maxima_cand-
# mumax.index.spl)[(ndx.maxima_cand- mumax.index.spl)>0])] max.ndx <-
# pairs.pre[[which.min((ndx.maxima_cand- mumax.index.spl)[(ndx.maxima_cand-
# mumax.index.spl)>0])]][2] # Linear interpolation between minimum and maximum
# to find root interpol <- approxfun(y = c(deriv2$x[min.ndx],
# deriv2$x[max.ndx]), x = c(deriv2$y[min.ndx], deriv2$y[max.ndx])) t.turn <-
# interpol(0) t.turn_pre.ndx <- which.min(abs(deriv2$x-t.turn)) # The time
# index closest to the turning point after µmax # Remove plot(deriv1.2$x,
# deriv1.2$y, type = 'l') points( deriv1.2$x[minima], deriv1.2$y[minima], pch
# = 16, col = 'Blue', cex = 1.5 ) points(deriv1.2$x[mumax2.index],
# deriv1.2$y[mumax2.index], pch = 16, col = 'Red', cex = 1.5)
# points(deriv1$x[mumax.index], deriv1$y[mumax.index], pch = 16, col =
# 'Green', cex = 1.5) points(deriv1$x[t.turn_pre.ndx],
# deriv1$y[t.turn_pre.ndx], pch = 16, col = 'Black', cex = 1.5)
# points(deriv1$x[t.turn_post.ndx], deriv1$y[t.turn_post.ndx], pch = 16, col =
# 'Black', cex = 1.5) plot(deriv2$x, deriv2$y, type = 'l', main = 'Minima
# \nVariable Thresholds') points( deriv2$x[minima], deriv2$y[minima], pch =
# 16, col = 'Blue', cex = 1.5 ) points(deriv2$x[maxima], deriv2$y[maxima], pch
# = 16, col = 'Red', cex = 1.5) points(deriv2$x[mumax.index],
# deriv2$y[mumax.index], pch = 16, col = 'Green', cex = 1.5)
# points(deriv2$x[t.turn_pre.ndx], deriv2$y[t.turn_pre.ndx], pch = 16, col =
# 'Black', cex = 1.5) points(deriv2$x[t.turn_post.ndx],
# deriv2$y[t.turn_post.ndx], pch = 16, col = 'Black', cex = 1.5)
# plot(spline$x, spline$y, type = 'l') points(spline$x[minima],
# spline$y[minima], pch = 16, col = cf.2(1)[1], cex = 1.5)
# points(spline$x[maxima], spline$y[maxima], pch = 16, col = cf.1(1)[1], cex =
# 1.5) plot(deriv2_spline$x, deriv2_spline$y, type = 'l', main = 'Minima
# \nVariable Thresholds') points( deriv2_spline$x[minima],
# deriv2_spline$y[minima], pch = 16, col = cf.2(1)[1], cex = 1.5 )
# points(deriv2_spline$x[maxima], deriv2_spline$y[maxima], pch = 16, col =
# cf.1(1)[1], cex = 1.5) }
} # else of if (!exists('spline') || is.null(spline) == TRUE)
} # else of if (length(data) < 5)
gcFitSpline <- list(
time.in = time.in, data.in = data.in, raw.time = time.raw,
raw.data = data.raw, gcID = gcID, fit.time = spline$x,
fit.data = spline$y, parameters = list(
A = if (control$log.y.spline == TRUE)
{
# Correct ln(N/N0) transformation
# for max growth value
data[1] * exp(max(spline$y))
} else
{
max(spline$y)
}, dY = if (control$log.y.spline == TRUE)
{
data[1] * exp(max(spline$y)) -
data[1] * exp(spline$y[1])
} else
{
max(spline$y) -
spline$y[1]
}, mu = mumax, tD = log(2)/mumax,
t.max = t.max, lambda = lambda.spl, b.tangent = b.spl,
mu2 = mumax2, tD2 = log(2)/mumax2,
t.max2 = t.max2, lambda2 = lambda.spl2,
b.tangent2 = b.spl2, integral = integral
),
spline = spline, spline.deriv1 = deriv1, fitFlag = TRUE,
fitFlag2 = fitFlag2, control = control
)
class(gcFitSpline) <- "gcFitSpline"
invisible(gcFitSpline)
}
#' Perform a bootstrap on growth vs. time data followed by spline fits for each resample
#'
#' \code{growth.gcBootSpline} resamples the growth-time value pairs in a dataset with replacement and performs a spline fit for each bootstrap sample.
#'
#' @param time Vector of the independent variable (usually: time).
#' @param data Vector of dependent variable (usually: growth values).
#' @param gcID (Character) The name of the analyzed sample.
#' @param control A \code{grofit.control} object created with \code{\link{growth.control}}, defining relevant fitting options.
#'
#' @family growth fitting functions
#'
#' @return A \code{gcBootSpline} object containing a distribution of growth parameters and
#' a \code{gcFitSpline} object for each bootstrap sample. Use \code{\link{plot.gcBootSpline}}
#' to visualize all bootstrapping splines as well as the distribution of
#' physiological parameters.
#' \item{raw.time}{Raw time values provided to the function as \code{time}.}
#' \item{raw.data}{Raw growth data provided to the function as \code{data}.}
#' \item{gcID}{(Character) Identifies the tested sample.}
#' \item{boot.time}{Table of time values per column, resulting from each spline fit of the bootstrap.}
#' \item{boot.data}{Table of growth values per column, resulting from each spline fit of the bootstrap.}
#' \item{boot.gcSpline}{List of \code{gcFitSpline} object, created by \code{\link{growth.gcFitSpline}} for each resample of the bootstrap.}
#' \item{lambda}{Vector of estimated lambda (lag time) values from each bootstrap entry.}
#' \item{mu}{Vector of estimated mu (maximum growth rate) values from each bootstrap entry.}
#' \item{A}{Vector of estimated A (maximum growth) values from each bootstrap entry.}
#' \item{integral}{Vector of estimated integral values from each bootstrap entry.}
#' \item{bootFlag}{(Logical) Indicates the success of the bootstrapping operation.}
#' \item{control}{Object of class \code{grofit.control} containing list of options passed to the function as \code{control}.}
#'
#' @references Matthias Kahm, Guido Hasenbrink, Hella Lichtenberg-Frate, Jost Ludwig, Maik Kschischo (2010). _grofit: Fitting Biological Growth Curves with R_. Journal of Statistical Software, 33(7), 1-21. DOI: 10.18637/jss.v033.i07
#'
#' @export
#'
#' @examples
#' # Create random growth dataset
#' rnd.dataset <- rdm.data(d = 35, mu = 0.8, A = 5, label = 'Test1')
#'
#' # Extract time and growth data for single sample
#' time <- rnd.dataset$time[1,]
#' data <- rnd.dataset$data[1,-(1:3)] # Remove identifier columns
#'
#' # Introduce some noise into the measurements
#' data <- data + stats::runif(97, -0.01, 0.09)
#'
#' # Perform bootstrapping spline fit
#' TestFit <- growth.gcBootSpline(time, data, gcID = 'TestFit',
#' control = growth.control(fit.opt = 's', nboot.gc = 50))
#'
#' plot(TestFit, combine = TRUE, lwd = 0.5)
#'
growth.gcBootSpline <- function(
time, data, gcID = "undefined", control = growth.control()
)
{
if (methods::is(control) !=
"grofit.control")
stop("control must be of class grofit.control!")
if (control$nboot.gc == 0)
stop(
"Number of bootstrap samples is zero! See growth.control()"
)
time <- as.vector(as.numeric(as.matrix(time)))[!is.na(time)][!is.na(data)]
data <- as.vector(as.numeric(as.matrix(data)))[!is.na(time)][!is.na(data)]
if (length(time) !=
length(data))
stop("gcBootSpline: length of input vectors differ!")
if (control$log.y.spline == TRUE)
{
bad.values <- (is.na(time)) |
(is.na(data)) |
(!is.numeric(time)) |
(!is.numeric(data) |
(data <= 0))
} else
{
bad.values <- (is.na(time)) |
(is.na(data)) |
is.infinite(data) |
(!is.numeric(time)) |
(!is.numeric(data))
}
if (TRUE %in% bad.values)
{
if (control$neg.nan.act == FALSE)
{
time <- time[!bad.values]
data <- data[!bad.values]
} else stop("Bad values in gcBootSpline")
}
if (length(data) <
6)
{
if (control$suppress.messages == FALSE)
message(
"gcBootSpline: There is not enough valid data. Must have at least 6 unique values!"
)
gcBootSpline <- list(
raw.time = time, raw.data = data, gcID = gcID,
boot.time = NA, boot.data = NA, boot.gcSpline = NA,
lambda = NA, mu = NA, A = NA, integral = NA,
bootFlag = FALSE, control = control
)
class(gcBootSpline) <- "gcBootSpline"
return(gcBootSpline)
}
A <- NA
mu <- NA
lambda <- NA
integral <- NA
dY <- NA
boot.y <- array(NA, c(control$nboot.gc, length(time)))
boot.x <- array(NA, c(control$nboot.gc, length(time)))
nonpara <- list()
control.change <- control
control.change$fit.opt <- "s"
if (control$nboot.gc > 0)
{
for (j in 1:control$nboot.gc)
{
choose <- sort(
c(
1, sample(
1:length(time[-1]),
length(time) -
1, replace = TRUE
)
)
)
while (length(unique(choose)) <
5)
{
choose <- sort(
c(
1, sample(
1:length(time[-1]),
length(time) -
1, replace = TRUE
)
)
)
}
time.cur <- time[choose]
data.cur <- data[choose]
if (stats::IQR(time.cur) >
0)
{
nonpara[[j]] <- growth.gcFitSpline(time.cur, data.cur, gcID, control.change)
if (nonpara[[j]]$fitFlag == FALSE |
is.na(nonpara[[j]]$fit.data[1]))
{
boot.y[j, 1:length(nonpara[[j]]$fit.data)] <- rep(NA, length(nonpara[[j]]$fit.data))
boot.x[j, 1:length(nonpara[[j]]$fit.time)] <- rep(NA, length(nonpara[[j]]$fit.time))
} else
{
boot.y[j, 1:length(nonpara[[j]]$fit.data)] <- nonpara[[j]]$fit.data
boot.x[j, 1:length(nonpara[[j]]$fit.time)] <- nonpara[[j]]$fit.time
}
lambda[j] <- nonpara[[j]]$parameters$lambda
mu[j] <- nonpara[[j]]$parameters$mu
A[j] <- nonpara[[j]]$parameters$A
integral[j] <- nonpara[[j]]$parameters$integral
dY[j] <- nonpara[[j]]$parameters$dY
}
}
lambda[which(!is.finite(lambda))] <- NA
mu[which(!is.finite(lambda))] <- NA
A[which(!is.finite(lambda))] <- NA
integral[which(!is.finite(lambda))] <- NA
dY[which(!is.finite(dY))] <- NA
if (control$clean.bootstrap == TRUE)
{
lambda[which(lambda < 0)] <- NA
mu[which(mu < 0)] <- NA
A[which(A < 0)] <- NA
integral[which(integral < 0)] <- NA
dY[which(dY < 0)] <- NA
}
}
if (control$log.x.gc == TRUE)
{
bad.values <- (time <= -1)
if (TRUE %in% bad.values)
{
time <- time[!bad.values]
data <- data[!bad.values]
}
time.log <- log(1 + time)
}
if (control$log.y.spline == TRUE)
{
data.log <- log(data/data[1])
bad.values <- (data.log <= 0)
if (TRUE %in% bad.values)
{
time <- time[!bad.values]
data.log <- data.log[!bad.values]
}
}
if (all(is.na(mu)))
{
if (control$suppress.messages == FALSE)
message(
"gcBootSpline: No spline fit could be performed successfully!"
)
gcBootSpline <- list(
raw.time = if (control$log.x.gc == TRUE)
{
time.log
} else
{
time
}, raw.data = if (control$log.y.spline ==
TRUE)
{
data.log
} else
{
data
}, gcID = gcID, boot.time = NA, boot.data = NA,
boot.gcSpline = NA, lambda = NA, mu = NA,
A = NA, integral = NA, bootFlag = FALSE,
control = control
)
class(gcBootSpline) <- "gcBootSpline"
return(gcBootSpline)
}
gcBootSpline <- list(
raw.time = if (control$log.x.gc == TRUE)
{
time.log
} else
{
time
}, raw.data = if (control$log.y.spline == TRUE)
{
data.log
} else
{
data
}, gcID = gcID, boot.time = boot.x, boot.data = boot.y,
boot.gcSpline = nonpara, lambda = lambda, mu = mu,
A = A, dY = dY, integral = integral, bootFlag = TRUE,
control = control
)
class(gcBootSpline) <- "gcBootSpline"
invisible(gcBootSpline)
}
#' Perform a smooth spline fit on fluorescence data
#'
#' \code{flFitSpline} performs a smooth spline fit on the dataset and determines
#' the greatest slope as the global maximum in the first derivative of the spline.
#'
#' @param time Vector of the independent variable: time (if \code{x_type = 'time'} in \code{fl.control} object.
#' @param growth Vector of the independent variable: growth (if \code{x_type = 'growth'} in \code{fl.control} object.
#' @param fl_data Vector of dependent variable: fluorescence.
#' @param ID (Character) The name of the analyzed sample.
#' @param control A \code{fl.control} object created with \code{\link{fl.control}}, defining relevant fitting options.
#' @param biphasic (Logical) Shall \code{\link{flFitLinear}} and \code{\link{flFitSpline}} try to extract fluorescence parameters for two different phases (as observed with, e.g., regulator-promoter systems with varying response in different growth stages) (\code{TRUE}) or not (\code{FALSE})?
#' @param x_type (Character) Which data type shall be used as independent variable? Options are \code{'growth'} and \code{'time'}.
#' @param log.x.spline (Logical) Indicates whether _ln(x+1)_ should be applied to the independent variable for _spline_ fits. Default: \code{FALSE}.
#' @param log.y.spline (Logical) Indicates whether _ln(y/y0)_ should be applied to the fluorescence data for _spline_ fits. Default: \code{FALSE}
#' @param smooth.fl (Numeric) Parameter describing the smoothness of the spline fit; usually (not necessary) within (0;1]. \code{smooth.gc=NULL} causes the program to query an optimal value via cross validation techniques. Especially for datasets with few data points the option \code{NULL} might cause a too small smoothing parameter. This can result a too tight fit that is susceptible to measurement errors (thus overestimating slopes) or produce an error in \code{\link{smooth.spline}} or lead to overfitting. The usage of a fixed value is recommended for reproducible results across samples. See \code{\link{smooth.spline}} for further details. Default: \code{0.55}
#' @param t0 (Numeric) Minimum time value considered for linear and spline fits.
#' @param min.growth (Numeric) Indicate whether only values above a certain threshold should be considered for linear regressions or spline fits.
#'
#' @details If \code{biphasic = TRUE}, the following steps are performed to define a
#' second phase: \enumerate{ \item Determine local minima within the first
#' derivative of the smooth spline fit. \item Remove the 'peak' containing the highest
#' value of the first derivative (i.e., \eqn{mu_{max}}) that is flanked by two local
#' minima. \item Repeat the smooth spline fit and identification of maximum slope for
#' later time values than the local minimum after \eqn{mu_{max}}. \item Repeat the
#' smooth spline fit and identification of maximum slope for earlier time values than
#' the local minimum before \eqn{mu_{max}}. \item Choose the greater of the two
#' independently determined slopes as \eqn{mu_{max}2}. }
#'
#' @family fluorescence fitting functions
#'
#' @return A \code{flFitSpline} object. The lag time is estimated as the intersection between the
#' tangent at the maximum slope and the horizontal line with \eqn{y = y_0}, where
#' \code{y0} is the first value of the dependent variable. Use \code{\link{plot.flFitSpline}} to
#' visualize the spline fit and derivative over time.
#' \item{x.in}{Raw x values provided to the function as \code{time} or \code{growth}.}
#' \item{fl.in}{Raw fluorescence data provided to the function as \code{fl_data}.}
#' \item{raw.x}{Filtered x values used for the spline fit.}
#' \item{raw.fl}{Filtered fluorescence values used for the spline fit.}
#' \item{ID}{(Character) Identifies the tested sample.}
#' \item{fit.x}{Fitted x values.}
#' \item{fit.fl}{Fitted fluorescence values.}
#' \item{parameters}{List of determined parameters.}
#' \itemize{
#' \item \code{A}: Maximum fluorescence.
#' \item \code{dY}: Difference in maximum fluorescence and minimum fluorescence.
#' \item \code{max_slope}: Maximum slope of fluorescence-vs.-x data (i.e., maximum in first derivative of the spline).
#' \item \code{x.max}: Time at the maximum slope.
#' \item \code{lambda}: Lag time.
#' \item \code{b.tangent}: Intersection of the tangent at the maximum slope with the abscissa.
#' \item \code{max_slope2}: For biphasic course of fluorescence: Maximum slope of fluorescence-vs.-x data of the second phase.
#' \item \code{lambda2}: For biphasic course of fluorescence: Lag time determined for the second phase.
#' \item \code{x.max2}: For biphasic course of fluorescence: Time at the maximum slope of the second phase.
#' \item \code{b.tangent2}: For biphasic course of fluorescence: Intersection of the tangent at the maximum slope of the second phase with the abscissa.
#' \item \code{integral}: Area under the curve of the spline fit.
#' }
#' \item{spline}{\code{smooth.spline} object generated by the \code{\link{smooth.spline}} function.}
#' \item{spline.deriv1}{list of time ('x') and growth ('y') values describing the first derivative of the spline fit.}
#' \item{reliable}{(Logical) Indicates whether the performed fit is reliable (to be set manually).}
#' \item{fitFlag}{(Logical) Indicates whether a spline fit was successfully performed on the data.}
#' \item{fitFlag2}{(Logical) Indicates whether a second phase was identified.}
#' \item{control}{Object of class \code{fl.control} containing list of options passed to the function as \code{control}.}
#'
#' @export
#'
#' @examples
#' # load example dataset
#' input <- read_data(data.growth = system.file("lac_promoters_growth.txt", package = "QurvE"),
#' data.fl = system.file("lac_promoters_fluorescence.txt", package = "QurvE"),
#' csvsep = "\t",
#' csvsep.fl = "\t")
#'
#' # Extract time and normalized fluorescence data for single sample
#' time <- input$time[4,]
#' data <- input$norm.fluorescence[4,-(1:3)] # Remove identifier columns
#'
#' # Perform linear fit
#' TestFit <- flFitSpline(time = time,
#' fl_data = data,
#' ID = 'TestFit',
#' control = fl.control(fit.opt = 's', x_type = 'time'))
#'
#' plot(TestFit)
#
flFitSpline <- function(
time = NULL, growth = NULL, fl_data, ID = "undefined",
control = fl.control(
biphasic = FALSE, x_type = c("growth", "time"),
log.x.spline = FALSE, log.y.spline = FALSE,
smooth.fl = 0.75, t0 = 0, min.growth = NA
)
)
{
x_type <- control$x_type
if (!is.null(control$t0) &&
!is.na(control$t0) &&
control$t0 != "")
{
t0 <- as.numeric(control$t0)
} else
{
t0 <- 0
}
if (is.na(control$tmax))
tmax <- NULL
if (!is.null(tmax))
tmax <- as.numeric(control$tmax)
if (!is.null(control$max.growth))
max.growth <- as.numeric(control$max.growth)
if (is(control) !=
"fl.control")
stop("control must be of class fl.control!")
if (!any(control$fit.opt %in% "s"))
stop(
"Fit option is not set for a fluorescence spline fit. See fl.control()"
)
if (!is.null(time))
time.in <- time <- as.vector(as.numeric(as.matrix(time)))[!is.na(as.vector(as.numeric(as.matrix(time))))][!is.na(as.vector(as.numeric(as.matrix(fl_data))))]
if (!is.null(growth))
growth.in <- growth <- as.vector(as.numeric(as.matrix(growth)))[!is.na(as.vector(as.numeric(as.matrix(growth))))][!is.na(as.vector(as.numeric(as.matrix(fl_data))))]
fl_data.in <- fl_data <- as.vector(as.numeric(as.matrix(fl_data)))[!is.na(as.vector(as.numeric(as.matrix(fl_data))))]
bad.values <- (fl_data < 0)
if (TRUE %in% bad.values)
{
fl_data <- fl_data.in <- fl_data[!bad.values]
if (x_type == "growth")
{
growth <- growth.in <- growth[!bad.values]
} else
{
time <- time.in <- time[!bad.values]
}
}
if (x_type == "growth" && is.null(growth))
stop(
"To perform a spline fit of fluorescence vs. growth data, please provide a 'growth' vector of the same length as 'fl_data'."
)
if (x_type == "time" && is.null(time))
stop(
"To perform a spline fit of fluorescence vs. time data, please provide a 'time' vector of the same length as 'fl_data'."
)
if (x_type == "growth" && length(growth) !=
length(fl_data))
stop(
"flFitSpline: length of input vectors (growth and fl_data) differ!"
)
if (x_type == "time" && length(time) !=
length(fl_data))
stop(
"flFitSpline: length of input vectors (time and fl_data) differ!"
)
if (length(fl_data) <
5)
{
warning(
"flFitSpline: There is not enough valid data. Must have at least 5!"
)
flFitSpline <- list(
time.in = time.in, growth.in = growth.in,
fl_data.in = fl_data.in, raw.time = time,
raw.growth = growth, raw.fl = fl_data,
fit.x = rep(
NA, length(
get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
)
)
),
fit.fl = rep(NA, length(fl_data.in)),
parameters = list(
A = NA, dY = NA, max_slope = NA, x.max = NA,
lambda = NA, b.tangent = NA, max_slope2 = NA,
x.max2 = NA, lambda2 = NA, b.tangent2 = NA,
integral = NA
),
spline = NA, reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(flFitSpline) <- "flFitSpline"
return(flFitSpline)
}
# Consider only data points up to max growth
# or time, respectively
if (x_type == "time")
{
ndx.max <- which.max(time)
time <- time[1:ndx.max]
fl_data <- fl_data[1:ndx.max]
}
if (x_type == "growth")
{
ndx.max <- which.max(growth)
growth <- growth[1:ndx.max]
fl_data <- fl_data[1:ndx.max]
bad.values <- (fl_data < 0)
}
fl_data.log <- log(fl_data/fl_data[1])
if (x_type == "growth")
{
bad.values <- (is.na(growth)) |
(is.na(fl_data)) |
fl_data < 0 | (!is.numeric(growth)) |
(!is.numeric(fl_data))
if (TRUE %in% bad.values)
{
growth <- growth[!bad.values]
fl_data <- fl_data[!bad.values]
fl_data.log <- fl_data.log[!bad.values]
}
if (control$log.x.spline == TRUE)
{
bad.values <- (growth < 0)
if (TRUE %in% bad.values)
{
growth <- growth[!bad.values]
fl_data <- fl_data[!bad.values]
fl_data.log <- fl_data.log[!bad.values]
}
growth.log <- log(growth/growth[1])
growth.raw <- growth.log
} else {
growth.raw <- growth
}
if (max(growth) <
control$growth.thresh * growth[1])
{
if (control$suppress.messages == FALSE)
message(
paste0(
"flFitSpline: No significant growth detected (with all values below ",
control$growth.thresh, " * start_value)."
)
)
flFitSpline <- list(
x.in = growth.in, fl.in = fl_data.in,
raw.x = growth, raw.fl = fl_data,
fit.x = rep(
NA, length(
get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
)
)
),
fit.fl = rep(NA, length(fl_data.in)),
parameters = list(
A = NA, dY = NA, max_slope = NA,
x.max = NA, lambda = NA, b.tangent = NA,
max_slope2 = NA, x.max2 = NA, lambda2 = NA,
b.tangent2 = NA, integral = NA
),
spline = NA, reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(flFitSpline) <- "flFitSpline"
return(flFitSpline)
}
fl_data.raw <- if (control$log.y.spline == TRUE)
{
fl_data.log
} else
{
fl_data
}
# Implement min.growth into dataset
if (!is.null(control$min.growth))
{
if (!is.na(control$min.growth) &&
control$min.growth != 0)
{
min.growth <- control$min.growth
if (control$log.y.spline == TRUE)
{
# perfom log transformation
# on min.growth (Ln(y/y0))
min.growth <- log(min.growth/growth[1])
fl_data.log <- fl_data.log[growth.log >=
min.growth]
growth.log <- growth.log[growth.log >=
min.growth]
} else
{
fl_data <- fl_data[growth >= min.growth]
growth <- growth[growth >= min.growth]
}
}
}
# Implement max.growth into dataset
if (!is.null(control$max.growth))
{
if (!is.na(control$max.growth))
{
if (control$log.x.spline == TRUE)
{
# perfom log transformation
# on max.growth (Ln(y/y0))
max.growth <- log(control$max.growth/growth[1])
fl_data.log <- fl_data.log[growth.log <=
max.growth]
growth.log <- growth.log[growth.log <=
max.growth]
} else
{
max.growth <- control$max.growth
fl_data <- fl_data[growth <= max.growth]
growth <- growth[growth <= max.growth]
}
}
}
if (control$log.x.spline == FALSE)
{
x <- growth
} else
{
x <- growth.log
}
if (length(x) <
4)
{
message(
"flFitSpline: Not enough data points above the chosen min.growth."
)
flFitSpline <- list(
x.in = growth.in, fl.in = fl_data.in,
raw.x = growth, raw.fl = fl_data,
fit.x = rep(
NA, length(
get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
)
)
),
fit.fl = rep(NA, length(fl_data.in)),
parameters = list(
A = NA, dY = NA, max_slope = NA,
x.max = NA, lambda = NA, b.tangent = NA,
max_slope2 = NA, x.max2 = NA, lambda2 = NA,
b.tangent2 = NA, integral = NA
),
spline = NA, reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(flFitSpline) <- "flFitSpline"
return(flFitSpline)
}
} # if(x_type == 'growth')
if (x_type == "time")
{
bad.values <- (is.na(time)) |
(is.na(fl_data)) |
fl_data < 0 | (!is.numeric(time)) |
(!is.numeric(fl_data))
if (TRUE %in% bad.values)
{
time <- time[!bad.values]
fl_data <- fl_data[!bad.values]
fl_data.log <- fl_data.log[!bad.values]
}
if (control$log.x.spline == TRUE)
{
bad.values <- (time <= 0)
if (TRUE %in% bad.values)
{
time <- time[!bad.values]
fl_data <- fl_data[!bad.values]
fl_data.log <- fl_data.log[!bad.values]
}
time.log <- log(time)
time.raw <- time.log
} else {
time.raw <- time
}
if (max(time) <
control$t0)
{
if (control$suppress.messages == FALSE)
message(
paste0(
"flFitSpline: All time values are below the chosen 't0'."
)
)
flFitSpline <- list(
x.in = time.in, fl.in = fl_data.in,
raw.x = time, raw.fl = fl_data, fit.x = rep(
NA, length(
get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
)
)
),
fit.fl = rep(NA, length(fl_data.in)),
parameters = list(
A = NA, dY = NA, max_slope = NA,
x.max = NA, lambda = NA, b.tangent = NA,
max_slope2 = NA, x.max2 = NA, lambda2 = NA,
b.tangent2 = NA, integral = NA
),
spline = NA, reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(flFitSpline) <- "flFitSpline"
return(flFitSpline)
}
fl_data.raw <- if (control$log.y.spline == TRUE)
{
fl_data.log
} else
{
fl_data
}
# Implement t0 into dataset
if (is.numeric(t0) &&
t0 > 0)
{
if (control$log.y.spline == TRUE)
{
fl_data.log <- fl_data.log[which.min(abs(time - t0)):length(fl_data.log)]
} else
{
fl_data <- fl_data[which.min(abs(time - t0)):length(fl_data)]
}
if (control$log.x.spline == FALSE)
{
time <- time[which.min(abs(time - t0)):length(time)]
} else
{
t0 <- log(t0)
time.log <- time.log[which.min(abs(time.log - t0)):length(time.log)]
}
}
# Implement tmax into dataset
if (is.numeric(tmax) &&
tmax > t0)
{
if (control$log.y.spline == TRUE)
{
fl_data.log <- fl_data.log[time <=
tmax]
} else
{
fl_data <- fl_data[time <= tmax]
}
time <- time[time <= tmax]
}
if (control$log.x.spline == TRUE)
{
x <- time.log
} else
{
x <- time
}
if (length(x) <
4)
{
message(
"flFitSpline: Not enough data points above the chosen t0."
)
flFitSpline <- list(
x.in = growth.in, fl.in = fl_data.in,
raw.x = growth, raw.fl = fl_data,
fit.x = rep(
NA, length(
get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
)
)
),
fit.fl = rep(NA, length(fl_data.in)),
parameters = list(
A = NA, dY = NA, max_slope = NA,
x.max = NA, lambda = NA, b.tangent = NA,
max_slope2 = NA, x.max2 = NA, lambda2 = NA,
b.tangent2 = NA, integral = NA
),
spline = NA, reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(flFitSpline) <- "flFitSpline"
return(flFitSpline)
}
} # if(x_type == 'time')
x.in = get(
ifelse(x_type == "growth", "growth.in", "time.in")
)
try(
spline <- smooth.spline(
x = x, y = if (control$log.y.spline ==
TRUE)
{
fl_data.log
} else
{
fl_data
}, spar = control$smooth.fl, cv = NA, keep.data = FALSE
)
)
if (!exists("spline") ||
is.null(spline))
{
if (control$suppress.messages == FALSE)
message(
"flFitSpline: Spline could not be fitted to data!"
)
flFitSpline <- list(
x.in = get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
),
fl.in = fl_data.in, raw.x = get(
ifelse(
x_type == "growth", "growth",
"time"
)
),
raw.fl = fl_data, fit.x = rep(
NA, length(
get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
)
)
),
fit.fl = rep(NA, length(fl_data.in)),
parameters = list(
A = NA, dY = NA, max_slope = NA,
x.max = NA, lambda = NA, b.tangent = NA,
max_slope2 = NA, x.max2 = NA, lambda2 = NA,
b.tangent2 = NA, integral = NA
),
spline = NA, reliable = NULL, fitFlag = FALSE,
fitFlag2 = FALSE, control = control
)
class(flFitSpline) <- "flFitSpline"
return(flFitSpline)
} # if(!exists('spline') || is.null(spline) == TRUE)
else
{
# Perform spline fit and extract
# parameters
deriv1 <- stats::predict(spline, x, deriv = 1)
# find maximum in deriv1, exclude maxima
# at beginning of fit, if x_type is
# 'time'
deriv1.test <- deriv1
spline.test <- spline
# if(x_type == 'time'){ Take only max slope values that are not at the start of
# the curve
# success <- FALSE
# while (!success){
# if(length(deriv1.test$y) > 2){
# max_slope.index <- which.max(deriv1.test$y)
# if(!(max_slope.index %in% 1:3)){
# max_slope.index <- max_slope.index success <- TRUE
#} else {
# deriv1.test <- lapply(1:length(deriv1.test), function(x) deriv1.test[[x]][-max_slope.index])
# names(deriv1.test) <- c('x', 'y') spline.test$x <- spline$x[-max_slope.index]
# spline.test$y <- spline$y[-max_slope.index] }
# } else {
# max_slope.index <- which.max(deriv1$y)
# spline.test <- spline success <- TRUE
# }
# }
# } else {
# max_slope.index <- which.max(deriv1$y)
# }
max_slope.index <- which.max(deriv1$y) # index of data point with maximum growth rate in first derivative fit
max_slope.index.spl <- which(spline$x == deriv1$x[max_slope.index]) # index of data point with maximum slope in spline fit
x.max <- deriv1$x[max_slope.index] # x of maximum slope
max_slope <- max(deriv1$y) # maximum value of first derivative of spline fit (i.e., greatest slope in fluorescence curve spline fit)
y.max <- spline$y[max_slope.index.spl] # cell growth at x of max slope
b.spl <- y.max - max_slope * x.max # the y-intercept of the tangent at mumax
# lambda.spl <- -b.spl/max_slope # lag x
lambda.spl <- (spline$y[1] - b.spl)/max_slope # lag x
integral <- low.integrate(spline$x, spline$y)
if (control$biphasic)
{
# determine number of data points
# in period until maximum growth
n.spl <- length(x[which.min(abs(x)):which.max(fl_data)])
# Find local minima that frame
# max_slope and remove the 'peak'
# from deriv1
n <- round((log(n.spl + 4, base = 2.1))/0.75)/2
minima <- inflect(deriv1$y, threshold = n)$minima
# consider only minima with a slope value of <= 0.75 * max_slope
minima <- minima[deriv1$y[minima] <= 0.75 * max_slope]
minima <- c(1, minima)
if (length(minima) > 0) {
if (length(minima) > 1) {
for (i in 1:(length(minima) -
1))
{
if (any(minima[i]:minima[i + 1] %in% max_slope.index))
{
min.ndx <- c(minima[i], minima[i + 1])
} else if (any(minima[i]:length(spline$x) %in% max_slope.index))
{
min.ndx <- c(minima[i], length(spline$x))
}
}
} else if (any(minima:length(spline$x) %in%
max_slope.index)
)
{
min.ndx <- c(minima, length(spline$x))
} else if (any(1:minima %in% max_slope.index))
{
min.ndx <- c(1, minima)
}
}
if (exists("min.ndx"))
{
deriv1.2 <- deriv1
deriv1.2$y[min.ndx[1]:min.ndx[2]] <- 0
# find second max_slope
max_slope2 <- max(deriv1.2$y) # maximum value of first derivative of spline fit (i.e., greatest slope in fluorescence curve spline fit)
# accept second max_slope only if it is at least 10% of the global max_slope
if (max_slope2 >= 0.1 * max_slope)
{
max_slope2.index <- which.max(deriv1.2$y) # index of data point with maximum slope in first derivative fit
max_slope2.index.spl <- which(spline$x == deriv1.2$x[max_slope2.index]) # index of data point with maximum slope in spline fit
x.max2 <- deriv1.2$x[max_slope2.index] # x of maximum slope
y.max2 <- spline$y[max_slope2.index.spl] # cell growth at x of max slope
b.spl2 <- y.max2 - max_slope2 * x.max2 # the y-intercept of the tangent at mumax
# lambda.spl2 <- -b.spl2/max_slope2 # lag x
lambda.spl2 <- (spline$y[1] - b.spl2)/max_slope2 # lag time
fitFlag2 <- TRUE
} else {
max_slope2.index <- NA
max_slope2.index.spl <- NA
x.max2 <- NA
max_slope2 <- NA
y.max2 <- NA
b.spl2 <- NA
lambda.spl2 <- NA
fitFlag2 <- FALSE
}
} else
{
max_slope2.index <- NA
max_slope2.index.spl <- NA
x.max2 <- NA
max_slope2 <- NA
y.max2 <- NA
b.spl2 <- NA
lambda.spl2 <- NA
fitFlag2 <- FALSE
}
} # if(control$biphasic)
else
{
max_slope2.index <- NA
max_slope2.index.spl <- NA
x.max2 <- NA
max_slope2 <- NA
y.max2 <- NA
b.spl2 <- NA
lambda.spl2 <- NA
fitFlag2 <- FALSE
}
} # else of if (!exists('spline') || is.null(spline) == TRUE)
flFitSpline <- list(
x.in = get(
ifelse(
x_type == "growth", "growth.in",
"time.in"
)
),
fl.in = fl_data.in, raw.x = get(ifelse(x_type == "growth", "growth.raw", "time.raw")),
raw.fl = fl_data.raw, ID = ID, fit.x = spline$x,
fit.fl = spline$y, parameters = list(
A = if (control$log.y.spline == TRUE)
{
# Correct ln(N/N0) transformation
# for max growth value
fl_data[1] * exp(max(spline$y))
} else
{
max(spline$y)
}, dY = if (control$log.y.spline == TRUE)
{
fl_data[1] * exp(max(spline$y)) -
fl_data[1] * exp(spline$y[1])
} else
{
max(spline$y) -
spline$y[1]
}, max_slope = max_slope, x.max = x.max,
lambda = lambda.spl, b.tangent = b.spl,
max_slope2 = max_slope2, x.max2 = x.max2,
lambda2 = lambda.spl2, b.tangent2 = b.spl2,
integral = integral
),
spline = spline, spline.deriv1 = deriv1, reliable = NULL,
fitFlag = TRUE, fitFlag2 = fitFlag2, control = control
)
class(flFitSpline) <- "flFitSpline"
invisible(flFitSpline)
}
#' flBootSpline: Function to generate a bootstrap
#'
#' \code{fl.gcBootSpline} resamples the fluorescence-'x' value pairs in a dataset with replacement and performs a spline fit for each bootstrap sample.
#'
#' @param time Vector of the independent variable: time (if \code{x_type = 'time'} in \code{fl.control} object.
#' @param growth Vector of the independent variable: growth (if \code{x_type = 'growth'} in \code{fl.control} object.
#' @param fl_data Vector of dependent variable: fluorescence.
#' @param ID (Character) The name of the analyzed sample.
#' @param control A \code{fl.control} object created with \code{\link{fl.control}}, defining relevant fitting options.
#'
#' @return A \code{gcBootSpline} object containing a distribution of fluorescence parameters and
#' a \code{flFitSpline} object for each bootstrap sample. Use \code{\link{plot.gcBootSpline}}
#' to visualize all bootstrapping splines as well as the distribution of
#' physiological parameters.
#' \item{raw.x}{Raw time values provided to the function as \code{time}.}
#' \item{raw.fl}{Raw growth data provided to the function as \code{data}.}
#' \item{ID}{(Character) Identifies the tested sample.}
#' \item{boot.x}{Table of time values per column, resulting from each spline fit of the bootstrap.}
#' \item{boot.fl}{Table of growth values per column, resulting from each spline fit of the bootstrap.}
#' \item{boot.flSpline}{List of \code{flFitSpline} object, created by \code{\link{flFitSpline}} for each resample of the bootstrap.}
#' \item{lambda}{Vector of estimated lambda (lag time) values from each bootstrap entry.}
#' \item{max_slope}{Vector of estimated max_slope (maximum slope) values from each bootstrap entry.}
#' \item{A}{Vector of estimated A (maximum fluorescence) values from each bootstrap entry.}
#' \item{integral}{Vector of estimated integral values from each bootstrap entry.}
#' \item{bootFlag}{(Logical) Indicates the success of the bootstrapping operation.}
#' \item{control}{Object of class \code{fl.control} containing list of options passed to the function as \code{control}.}
#'
#' @family fluorescence fitting functions
#'
#' @export
#'
#' @examples
#' # load example dataset
#' input <- read_data(data.growth = system.file("lac_promoters_growth.txt", package = "QurvE"),
#' data.fl = system.file("lac_promoters_fluorescence.txt", package = "QurvE"),
#' csvsep = "\t",
#' csvsep.fl = "\t")
#'
#' # Extract time and normalized fluorescence data for single sample
#' time <- input$time[4,]
#' data <- input$norm.fluorescence[4,-(1:3)] # Remove identifier columns
#'
#' # Perform linear fit
#' TestFit <- flBootSpline(time = time,
#' fl_data = data,
#' ID = 'TestFit',
#' control = fl.control(fit.opt = 's', x_type = 'time',
#' nboot.fl = 50))
#'
#' plot(TestFit, combine = TRUE, lwd = 0.5)
#
flBootSpline <- function(
time = NULL, growth = NULL, fl_data, ID = "undefined",
control = fl.control()
)
{
x_type <- control$x_type
if (is(control) !=
"fl.control")
stop("control must be of class fl.control!")
if (control$nboot.fl == 0)
stop(
"Number of bootstrap samples is zero! See ?fl.control"
)
if (!is.null(time))
time.in <- time <- as.vector(as.numeric(as.matrix(time)))[!is.na(as.vector(as.numeric(as.matrix(time))))]
if (!is.null(growth))
growth.in <- growth <- as.vector(as.numeric(as.matrix(growth)))[!is.na(as.vector(as.numeric(as.matrix(growth))))]
fl_data.in <- fl_data <- as.vector(as.numeric(as.matrix(fl_data)))[!is.na(as.vector(as.numeric(as.matrix(fl_data))))]
if (control$log.y.spline == TRUE)
{
fl_data.log <- log(fl_data/fl_data[1])
}
if (x_type == "growth" && length(growth) !=
length(fl_data))
stop(
"flBootSpline: length of input vectors (growth and fl_data) differ!"
)
if (x_type == "time" && length(time) !=
length(fl_data))
stop(
"flBootSpline: length of input vectors (time and fl_data) differ!"
)
# Consider only data points up to max growth
# or time, respectively
if (x_type == "growth")
{
ndx.max <- which.max(growth)
growth <- growth[1:ndx.max]
fl_data <- fl_data[1:ndx.max]
bad.values <- (is.na(growth)) |
(is.na(fl_data)) |
(!is.numeric(growth)) |
(!is.numeric(fl_data))
if (TRUE %in% bad.values)
{
growth <- growth[!bad.values]
fl_data <- fl_data[!bad.values]
}
if (control$log.x.spline == TRUE)
{
bad.values <- (growth < 0)
if (TRUE %in% bad.values)
{
growth <- growth[!bad.values]
fl_data <- fl_data[!bad.values]
}
growth.log <- log(growth/growth[1])
}
}
if (x_type == "time")
{
ndx.max <- which.max(time)
time <- time[1:ndx.max]
fl_data <- fl_data[1:ndx.max]
bad.values <- (is.na(time)) |
(is.na(fl_data)) |
(!is.numeric(time)) |
(!is.numeric(fl_data))
if (TRUE %in% bad.values)
{
time <- time[!bad.values]
if (control$log.y.spline == TRUE)
{
fl_data.log <- fl_data.log[!bad.values]
} else
{
fl_data <- fl_data[!bad.values]
}
}
if (control$log.x.spline == TRUE)
{
bad.values <- (time < 0)
if (TRUE %in% bad.values)
{
time <- time[!bad.values]
fl_data <- fl_data[!bad.values]
}
time.log <- log(time + 1)
}
}
if (length(fl_data) <
6)
{
if (control$suppress.messages == FALSE)
message(
"flBootSpline: There is not enough valid data. Must have at least 6 unique values!"
)
flBootSpline <- list(
raw.x = get(ifelse(x_type == "growth", "growth", "time")),
raw.fl = fl_data, ID = ID, boot.x = NA,
boot.fl = NA, boot.flSpline = NA, lambda = NA,
max_slope = NA, A = NA, integral = NA,
bootFlag = FALSE, control = control
)
class(flBootSpline) <- "flBootSpline"
return(flBootSpline)
}
x <- get(ifelse(x_type == "growth", "growth", "time"))
A <- NA
max_slope <- NA
lambda <- NA
integral <- NA
dY = NA
boot.y <- array(NA, c(control$nboot.fl, length(x)))
boot.x <- array(NA, c(control$nboot.fl, length(x)))
nonpara <- list()
control.change <- control
control.change$fit.opt <- "s"
if (control$nboot.fl > 0)
{
for (j in 1:control$nboot.fl)
{
choose <- sort(
sample(
1:length(x),
length(x),
replace = TRUE
)
)
while (length(unique(choose)) <
5)
{
choose <- sort(
sample(
1:length(x),
length(x),
replace = TRUE
)
)
}
x.cur <- x[choose]
fl.cur <- fl_data[choose]
if (stats::IQR(x.cur) >
0)
{
if (x_type == "growth")
{
nonpara[[j]] <- flFitSpline(
growth = x.cur, fl_data = fl.cur,
ID = ID, control = control.change
)
} else
{
nonpara[[j]] <- flFitSpline(
time = x.cur, fl_data = fl.cur,
ID = ID, control = control.change
)
}
if (nonpara[[j]]$fitFlag == FALSE |
is.na(nonpara[[j]]$fit.fl[1]))
{
boot.y[j, 1:length(nonpara[[j]]$fit.x)] <- rep(NA, length(nonpara[[j]]$fit.fl))
boot.x[j, 1:length(nonpara[[j]]$fit.fl)] <- rep(NA, length(nonpara[[j]]$fit.x))
} else
{
boot.y[j, 1:length(nonpara[[j]]$fit.fl)] <- nonpara[[j]]$fit.fl
boot.x[j, 1:length(nonpara[[j]]$fit.x)] <- nonpara[[j]]$fit.x
}
lambda[j] <- nonpara[[j]]$parameters$lambda
max_slope[j] <- nonpara[[j]]$parameters$max_slope
A[j] <- nonpara[[j]]$parameters$A
integral[j] <- nonpara[[j]]$parameters$integral
dY[j] <- nonpara[[j]]$parameters$dY
}
}
lambda[which(!is.finite(lambda))] <- NA
max_slope[which(!is.finite(lambda))] <- NA
A[which(!is.finite(lambda))] <- NA
integral[which(!is.finite(lambda))] <- NA
dY[which(!is.finite(dY))] <- NA
# remove negative values which occured
# during bootstrap
lambda[which(lambda < 0)] <- NA
max_slope[which(max_slope < 0)] <- NA
A[which(A < 0)] <- NA
integral[which(integral < 0)] <- NA
dY[which(dY < 0)] <- NA
}
if (control$log.x.spline == TRUE)
{
bad.values <- (x < 0)
if (TRUE %in% bad.values)
{
time <- x[!bad.values]
fl_data <- fl_data[!bad.values]
}
x.log <- log(1 + x)
}
flBootSpline <- list(
raw.x = if (control$log.x.spline == TRUE)
{
x.log
} else
{
x
}, raw.fl = if (control$log.y.spline == TRUE)
{
fl_data.log
} else
{
fl_data
}, ID = ID, boot.x = boot.x, boot.fl = boot.y,
boot.flSpline = nonpara, lambda = lambda, max_slope = max_slope,
A = A, dY = dY, integral = integral, bootFlag = TRUE,
control = control
)
class(flBootSpline) <- "flBootSpline"
invisible(flBootSpline)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.