R/FitnessEstimation.R

Defines functions makeFitness2 numericalfitness2

Documented in makeFitness2 numericalfitness2

#'Internal QFA function. Do not call directly.
#'
#'This function does numerical fitness estimation by calculating numerical AUC, max slope and time to max slope and others.
numericalfitness2 = function(obsdat, AUCLim, STP, nrate = T) {
    # estimates fitness parameters like area under curve (AUC), max slope and time to max slope from the data itself without the assumption of a model

    # Generate numerical AUC
    if (length(obsdat$Growth) > 1) {
        loapproxfree = loapproxfun(obsdat$Expt.Time, obsdat$Growth, span = 0.5)
        loapprox = function(x) pmax(0, loapproxfree(x))
        nAUCfn = function(AUCLim) as.numeric(integrate(loapprox, 0, AUCLim)$value)
        nSTPfn = function(STP) as.numeric(loapprox(STP))
        nAUC = sapply(AUCLim, nAUCfn)
        nSTP = sapply(STP, nSTPfn)
    } else {
        # If there's only one photograph (e.g. single time point 1536 assay)
        nAUC = rep(NA, length(AUCLim))
        nSTP = rep(obsdat$Growth[1], length(STP))
    }

    res = c(nAUC, nSTP)

    if (length(AUCLim) == 1) {
        nAUCnames = c("nAUC")
    } else {
        nAUCnames = paste("nAUC", sprintf("%04d", round(AUCLim * 24 * 60)), sep = "")
    }
    if (length(STP) == 1) {
        nSTPnames = c("nSTP")
    } else {
        nSTPnames = paste("nSTP", sprintf("%04d", round(STP * 24 * 60)), sep = "")
    }
    names(res) = c(nAUCnames, nSTPnames)

    if (length(AUCLim) > 1)
        res["nAUC"] = res[nAUCnames[length(nAUCnames)]]
    if (length(STP) > 1)
        res["nSTP"] = res[nSTPnames[length(nSTPnames)]]

    # here comes the max slope and time to max slope estimate call to numerical_r2
    if (nrate) {
        numr_lst = numerical_r2(obsdat, mkPlots = F)
        res["nr"] = numr_lst$nr
        res["nr_t"] = numr_lst$nr_t
        res["maxslp"] = numr_lst$mslp
        res["maxslp_t"] = numr_lst$mslp_t
    }
    return(res)
}


# fitness estimation ----

#' Generate QFA fitness
#'
#' This function generates a variety of informative fitnesses from the model parameters of the model used in qfa.fit2. Namely
#' K, r, g & v for generalised and standard logistic model (with v = 1 for standard logisitc) or K, r and b for the Gompertz model.
#' It takes a data frame as generated by the qfa.fit2 function and appends columns for Maximum Doubling Rate (MDR),
#' Maximum Doubling Potential (MDP), Addinall et al. style fitness (MDRMDP), Doubling Time (DT) and Area Under Curve (AUC).
#' Note that this model-based AUC is distinct from the model-free nAUC which is generated directly from observed data by the qfa.fit2 function.
#' The two versions of AUC should be very similar in the vast majority of cases, however. Also, for MDR and MDP estimation of the Gompertz
#' model, a g parameter (lower asymptote, starting growth value) is estimated  from the Gompertz model parameters.
#' Additionally, the user can indicate if qfaFitnessPlot should directly be called with all the same parameters specified in the help page
#' of qfaFitnessPlot with changes to the parameter plotFitness of makeFitness2. Please refer to the help page of qfaFitnessPlot for details.
#'
#' @param results data.frame. This is the output of qfa.fit2 containing model parameter estimations to compute fitness measurements from
#' @param AUClim Numeric. Indicates the max time until which the AUC should be calculated. If set to NA (default), the max time within
#' the initial data.frame used to generate the results data.frame is used
#' @param dtmax Numeric. Indicates max possible doubling time (DT). If a DT higher than dtmax is calculated, it will be set to dtmax.
#' This is implemented as infinte DT would be estimated for dead culture spots.
#' @param filename If this is anything else than NA, a pdf file will be saved into the current working directory named filename
#' @param plotX String. Name of the column of the results data.frame used as a factor seperating boxplots on the x-axis
#' @param plotoFacet String. Name of the column of the results data.frame used as a factor creaeting facets
#' @param plotFill String. Name of the column of the results data.frame used as a factor coloring the boxplots
#' @param plotFitness String. Indicating if fitness boxplots should be produced. Default set to "no".
#' Either set to "All" to plot all three types of fitness measurements if the Gompertz model was used, otherwise
#' only MDR-based and modelfree fitness estimates are plotted. If set to "MDR" only MDR*MDP based fitness estimates are displayed.
#' If set to "Gmp" only Gompertz model based fitness estimates are displayed. If set to "ModelFree" only model free fitness estimates
#' are displayed.
#'
#' @return data.frame. The output data.frame contains the same information as the input data.frame with added fitness estimates for
#' MDR, MDP, MDP*MDP based fitness estimates, DT and AUC.
#'
#' @keywords qfa.fit2
#' @examples
#' data(qfa.testdata)
#' #Strip non-experimental edge cultures
#' qfa.testdata = qfa.testdata[(qfa.testdata$Row!=1) & (qfa.testdata$Col!=1) & (qfa.testdata$Row!=8) & (qfa.testdata$Col!=12),]
#' # Define which measure of cell density to use
#' qfa.testdata$Growth = qfa.testdata$Intensity
#' GmpFit = qfa.fit2(qfa.testdata, inocguess=NULL, detectThresh=0, globalOpt=F, AUCLim=NA, TimeFormat="h", Model="Gmp")
#' # Construct fitness measures
#' GmpFit = makeFitness2(GmpFit, AUCLim=NA, plotFitness="All", filename="Example_Gmp_fitness.pdf")
#' # Create plot
#' qfa.plot2("Example_Gmp_GrowthCurves.pdf", GmpFit, qfa.testdata, maxt=30)
makeFitness2 <- function(results, AUCLim = NA, dtmax = Inf, plotFitness = "no", filename = NA, plotX = "ORF", plotFacet = "ScreenID", plotFill = "Treatment") {
    # function to be called after qfa.fit2 was run. Estimates different variations of fitness, add these to the output file. Also possible to directly plot these fitness
    # values by specifying plotFitness. Function parameters: results: output of QFA.fit2 AUCLim: timelimit for calculation of AUC. If NA, use arbitrary number (5days).
    # Usually this is already defined within the QFA.fit2 output file as max time of the experiment dtmax: unknwon plotFitness: Defines if and which kind of fitness
    # measurements are plotted. Generates ggplot boxplots with given inputparameters plotX, plotFacet and plotFill if makeFitness2 is called as XY=makeFitness2,
    # modifications to the plots with stanard ggplot arguments can be used (e.g. XY+ggtitle('alternative title')) ='no': no plots generated. ='All': all possible plots
    # are printed. ='MDR': MDR and MDP based fitness measurements plots are generated (MDR, MDP, and MDP*MDR). ='Gmp': Gmp based fitness measurements plots are
    # generated. Gives error if the QFA.fit2 output was not generated with Gmp model. (plots r, b and fitness defined as r/b). ='ModelFree': model-free fitness
    # measurements plots are generated. (plots max slope, time to max slope and fitness defined as max slope / time to max slope) filename: if not NA, specifies the name
    # of a pdf file stored with the plots in the current working directory plotX: Column in results dataframe used as the factor dividing plots on the x-axis. plotFacet:
    # Column in results dataframe used as the factor subdividing plots plotFill: Column in results dataframe used as the factor defining fill color of the boxplots

    # first read the model used in the qfa.fit2
    Model <- unique(results$Model)
    if (Model == "Gmp") {
        Gmp = T
        fixG = F
        print("Gompertz model used")
    } else if (Model == "Glog") {
        Gmp = F
        glog = T
        print("Generalized logistic model used")
    } else if (Model == "Slog") {
        Gmp = F
        glog = F
        print("Standard logistic model used")
    } else {
        stop("Your dataset was generated with an older version. Please re-run qfa.fit2 with a Model= specified")
    }

    # and the timeformat
    TimeFormat = unique(results$TimeFormat)
    if (is.na(AUCLim)) {
        if ("AUCLim" %in% names(results)) {
            AUCLim = unique(results$AUCLim)
        } else {
            if (TimeFormat == "d") {
                AUCLim = 5
            } else {
                AUCLim = 5 * 24
            }
        }

    }
    # Fitness definitions from Addinall et al. 2011 Update the inoculum density parameter in the case where tshift!=0 if('tshift'%in%names(results))
    # results$g=Glogist(results$K,results$r,results$g,results$v,0-results$tshift) different fitness stuff for Glog, Slog or Gmp
    if (!Gmp) {

        naVect = !is.na(results$r) & !is.na(results$K) & !is.na(results$g) & !is.na(results$v)
        results$MDP[naVect] = mdp(results$K[naVect], results$r[naVect], results$g[naVect], results$v[naVect], Gmp = Gmp)
        results$MDR[naVect] = mdr(results$K[naVect], results$r[naVect], results$g[naVect], results$v[naVect], Gmp = Gmp)
        results$MDRMDP[naVect] = mdrmdp(results$K[naVect], results$r[naVect], results$g[naVect], results$v[naVect], Gmp = Gmp)
        if ("t0" %in% rownames(results)) {
            results$DT[naVect] = dtl(results$K[naVect], results$r[naVect], results$g[naVect], results$v[naVect], results$t0[naVect]) * 24
        } else {
            results$DT[naVect] = (1/results$MDR[naVect]) * 24
        }
        Glog = function(K, r, g, v, t) {
            # Eliminate problems with division by zero
            g = max(g, 1e-09)
            K = max(K, 1e-09)
            ifelse(is.na(Glogist(K, r, g, v, t)), 0, Glogist(K, r, g, v, t))  # Temporarily replace NA with zero here to allow integrate function below to handle modelFit=FALSE
        }

        AUC <- function(dno, tstar, dat) max(0, integrate(Glog, lower = 0, upper = tstar, K = dat$K[dno], r = dat$r[dno], g = dat$g[dno], v = dat$v[dno], subdivisions = 1000)$value -
            tstar * dat$g[dno])
        results$AUC = NA
        results$AUC[naVect] = sapply(1:length(results[naVect, 1]), AUC, tstar = AUCLim, dat = results[naVect, ])
        results$glog_maxslp[naVect] = gen_logistic_maxslp(results$K[naVect], results$r[naVect], results$g[naVect], results$v[naVect])
        results$DT[abs(results$DT) > dtmax] = dtmax

        # Set spurious fitnesses to zero. Ignore that, from qfa v1. results$MDR[(results$r>7)&(results$K<0.0275)]=0 results$MDRMDP[(results$r>7)&(results$K<0.0275)]=0

    } else {
        # gmp stuff

        naVect = !is.na(results$r) & !is.na(results$K) & !is.na(results$b)
        results$MDP[naVect] = mdp(results$K[naVect], results$r[naVect], results$g[naVect], Gmp = Gmp)
        results$MDR[naVect] = mdr(results$K[naVect], results$r[naVect], results$g[naVect], results$v[naVect], Gmp = Gmp)
        results$MDRMDP[naVect] = results$MDP[naVect] * results$MDR[naVect]
        AUC <- function(dno, tstar, dat) max(0, integrate(Gmprtz, lower = 0, upper = tstar, K = dat$K[dno], r = dat$r[dno], b = dat$b[dno], subdivisions = 1000)$value -
            tstar * 1e-06)
        results$AUC = NA
        results$AUC[naVect] = sapply(1:length(results[naVect, 1]), AUC, tstar = AUCLim, dat = results[naVect,])
        results$DT = (1/results$MDR) * 24
        results$DT[abs(results$DT) > dtmax] = dtmax



    }

    # part for the fitness model boxplots defined by userinput
    if (plotFitness != "no") {
      qfaFitnessPlot(results=results, filename=filename, plotX=plotX, plotFacet=plotFacet, plotFill=plotFill, plotFitness=plotFitness)
    }
    # Doubling time (doublings per hour)

    # If doubling time is Inf, set to dtmax

    # Area under curve

    return(results)
}
JulBaer/baQFA documentation built on Feb. 19, 2023, 10:32 p.m.