R/gtoxFit.R
In GladiaTOX: R Package for Processing High Content Screening data

Documented in gtoxFit

#####################################################################
## This program is distributed in the hope that it will be useful, ##
## but WITHOUT ANY WARRANTY; without even the implied warranty of  ##
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the    ##
## GNU General Public License for more details.                    ##
#####################################################################

#-------------------------------------------------------------------------------
# gtoxFit: Fit the data
#-------------------------------------------------------------------------------

#' @title Fit the data with the constant, hill, and gain-loss models
#'
#' @description
#' \code{gtoxFit} fits the constant, hill, and gain-loss models to the given 
#' data and returns some summary statistics and the fit parameters in a list.
#'
#' @param logc Numeric, log concentration values
#' @param resp Numeric, normalized response values
#' @param bmad Numeric, the baseline median absolute deviation for the entire
#'             assay
#' @param force.fit Logical, TRUE indicates to attempt fitting every
#'                  concentration series
#' @param \dots Any other data to be included in list output.
#'
#' @details
#' By default, \code{gtoxFit} will only attempt to fit concentration series
#' when at least one median value is greater than 3*bmad.
#'
#' @examples
#' logc <- 1:10
#' resp <- sapply(1:10, gtoxHillVal, ga = 5, tp = 50, gw = 0.5)
#' params <- gtoxFit(logc = logc, resp = resp, bmad = 10)
#' plot(resp ~ logc)
#' gtoxAddModel(pars = params, modl = "hill")
#'
#' @return List of summary values and fit parameters for the given data.
#'
#' @seealso \code{\link{gtoxObjCnst}}, \code{\link{gtoxObjHill}},
#' \code{\link{gtoxObjGnls}}, \code{\link{constrOptim}}
#'
#' @importFrom numDeriv hessian
#' @importFrom stats optim constrOptim median mad
#' @importFrom methods is
#' @export


gtoxFit <- function(logc, resp, bmad, force.fit=FALSE, ...) {

    ## Variable-binding to pass R CMD Check
    hill_tp <- hill_ga <- hill_gw <- gnls_ga <- gnls_gw <- gnls_la <- NULL
    gnls_lw <- gnls_tp <- hill_tp_sd <- hill_ga_sd <- hill_gw_sd <- NULL
    hill_er <- hill_er_sd <- gnls_tp_sd <- gnls_ga_sd <- gnls_gw_sd <- NULL
    gnls_la_sd <- gnls_lw_sd <- gnls_er <- gnls_er_sd <- NULL

    fenv <- environment()

    bmad <- min(bmad)
    rmns <- tapply(resp, logc, mean)
    rmds <- tapply(resp, logc, median)
    mmed <- max(rmds)
    mmed_conc <- as.numeric(names(which.max(rmds)))

    hprs <- paste0("hill_", c("tp", "ga", "gw", "er"))
    hsds <- paste0("hill_", c("tp", "ga", "gw", "er"), "_sd")
    gprs <- paste0("gnls_", c("tp", "ga", "gw", "la", "lw", "er"))
    gsds <- paste0("gnls_", c("tp", "ga", "gw", "la", "lw", "er"), "_sd")

    resp_max <- max(resp)
    resp_min <- min(resp)
    logc_med <- median(logc)
    logc_min <- min(logc)
    logc_max <- max(logc)

    ncnc <- lu(logc)
    npts <- length(resp)
    ## Meidan number replicates
    nrep <- as.numeric(median(tapply(resp, logc, lu)))
    nmed_gtbl <- lw(rmds >= 3 * bmad) # Number of medians above 3 * bmad

    ## Do not fit anything with less than four concentrations of data.
    if (length(rmds) >= 4) {

        er_est <- if ((rmad <- mad(resp)) > 0) log(rmad) else log(1e-32)

###----------------------- Fit the Constant Model -----------------------###
        cfit <- optim(
            er_est,
            gtoxObjCnst,
            method="Brent",
            lower=er_est - 2,
            upper=er_est + 2,
            control=list(
                fnscale=-1,
                reltol=1e-4,
                maxit=500
            ),
            resp=resp
        )

        if (!is(cfit, "try-error")) {

            cnst <- 1L
            cnst_er <- cfit$par
            ## 2*length(cfit$par) - 2*cfit$value
            caic <- 2 - 2*cfit$value

            ## Calculate the rmse for constant
            crme <- sqrt(mean((0 - resp)^2, na.rm=TRUE))

        } else {

            cnst <- 0L
            cnst_er <- NA_real_
            caic <- NA_integer_
            crme <- NA_real_

        }

        if (lw(rmds >= 3*bmad) > 0 | force.fit) {

###------------------------ Fit the Hill Model ------------------------###
            ## Starting parameters for the Hill Model
            ## cind <- (ceiling(length(meds)/2) + 1):length(meds)
            h <- c(
                mmed, # top (tp)
                mmed_conc - 1, # logAC50 (ga)
                1.2, # hill coefficient (gw)
                er_est # logSigma (er)
            )

            if (h[1] == 0) h[1] <- 0.1

            hUi <- matrix(
                ## Generate the bound matrices to constrain the model.
                ## tp   ga   gw   er
                c(
                    1,   0,   0,   0,
                    -1,   0,   0,   0,
                    0,   1,   0,   0,
                    0,  -1,   0,   0,
                    0,   0,   1,   0,
                    0,   0,  -1,   0),
                byrow=TRUE, nrow=6, ncol=4
            )

            hbnds <- c(
                0, -1.2*resp_max, # tp bounds
                logc_min - 2, -(logc_max + 0.5), # ga bounds
                0.3, -8
            ) # gw bounds

            hCi <- matrix(hbnds, nrow=6, ncol=1)

            ## Optimize the hill model
            hfit <- try(
                constrOptim(
                    h,
                    gtoxObjHill,
                    ui=hUi,
                    ci=hCi,
                    mu=1e-6,
                    method="Nelder-Mead",
                    control=list(
                        fnscale=-1,
                        reltol=1e-10,
                        maxit=6000
                    ),
                    lconc=logc,
                    resp=resp
                ),
                silent=TRUE
            )

            ## Generate some summary statistics
            if (!is(hfit, "try-error")) { # Hill model fit the data

                hill <- 1L
                haic <- 8 - 2*hfit$value # 2*length(hfit$par) - 2*hfit$value
                mapply(
                    assign,
                    c(hprs),
                    hfit$par,
                    MoreArgs=list(envir=fenv)
                )

                ## Calculate rmse for hill
                hill_modl <- hill_tp/(1 + 10^((hill_ga - logc)*hill_gw))
                hrme <- sqrt(mean((hill_modl - resp)^2, na.rm=TRUE))

                ## Calculate the sd for the hill parameters
                hfit$cov <- try(
                    solve(
                        -hessian(
                            gtoxObjHill,
                            hfit$par,
                            lconc=logc,
                            resp=resp
                        )
                    ),
                    silent=TRUE
                )

                if (!is(hfit$cov, "try-error")) { # Could invert hill Hessian

                    hcov <- 1L
                    hdiag_sqrt <- suppressWarnings(sqrt(diag(hfit$cov)))
                    if (any(is.nan(hdiag_sqrt))) {
                        mapply(
                            assign,
                            hsds,
                            NaN,
                            MoreArgs=list(envir=fenv)
                        )
                    } else {
                        mapply(
                            assign,
                            hsds,
                            hdiag_sqrt,
                            MoreArgs=list(envir=fenv)
                        )
                    }

                } else { # Could not invert hill Hessian

                    hcov <- 0L
                    mapply(
                        assign,
                        c(hsds),
                        NA_real_,
                        MoreArgs=list(envir=fenv)
                    )

                }

            } else { # Hill model did not fit the data

                hill <- 0L
                haic <- NA_real_
                hcov <- NA_integer_
                hrme <- NA_real_

                mapply(
                    assign,
                    c(hprs, hsds),
                    NA_real_,
                    MoreArgs=list(envir=fenv)
                )

            }

###--------------------- Fit the Gain-Loss Model ----------------------###
            ## Starting parameters for the Gain-Loss Model
            ## cind <- (ceiling(length(meds)/2) + 1):length(meds)
            g <- c(
                mmed, # top (tp)
                mmed_conc - 1, # gain logAC50 (ga)
                1.2, # gain hill coefficient (gw)
                mmed_conc + 0.1, # loss logAC50 (la)
                5, # loss hill coefficient (lw)
                er_est # logSigma (er)
            )

            if (g[1] == 0) g[1] <- 0.1

            gUi <- matrix(
                ## Generate the bound matrices to constrain the model.
                ##  tp   ga   gw   la   lw   er
                c(
                    1,   0,   0,   0,   0,   0,
                    -1,   0,   0,   0,   0,   0,
                    0,   1,   0,   0,   0,   0,
                    0,  -1,   0,   0,   0,   0,
                    0,   0,   1,   0,   0,   0,
                    0,   0,  -1 ,  0,   0,   0,
                    0,   0,   0,   1,   0,   0,
                    0,   0,   0,  -1,   0,   0,
                    0,   0,   0,   0,   1,   0,
                    0,   0,   0,   0,  -1,   0,
                    0,  -1,   0,   1,   0,   0
                ),
                byrow=TRUE, nrow=11, ncol=6
            )

            gbnds <- c(
                0, -1.2*resp_max, # tp bounds
                logc_min - 2, -(logc_max), # ga bounds
                0.3, -8, # gw bounds
                logc_min - 2, -(logc_max + 2), # la bounds
                0.3, -18, # lw bounds
                0.25 # ga < la
            )

            ## if (mmed_conc > logc_min) g[7] <- mmed_conc - 0.25

            gCi <- matrix(gbnds, nrow=11, ncol=1)

            ## Optimize the gnls model
            gfit <- try(
                constrOptim(
                    g,
                    gtoxObjGnls,
                    ui=gUi,
                    ci=gCi,
                    mu=1e-6,
                    method="Nelder-Mead",
                    control=list(
                        fnscale=-1,
                        reltol=1e-10,
                        maxit=6000
                    ),
                    lconc=logc,
                    resp=resp),
                silent=TRUE
            )
            
            ## Generate some summary statistics
            if (!is(gfit, "try-error")) { # Gain-loss fit the data

                gnls <- 1L
                gaic <- 12 - 2*gfit$value # 2*length(gfit$par) - 2*gfit$value
                mapply(
                    assign,
                    c(gprs),
                    gfit$par,
                    MoreArgs=list(envir=fenv)
                )

                ## Calculate rmse for gnls
                gnls_gn <- (1/(1 + 10^((gnls_ga - logc)*gnls_gw)))
                gnls_ls <- (1/(1 + 10^((logc - gnls_la)*gnls_lw)))
                gnls_modl <- gnls_tp * gnls_gn * gnls_ls
                grme <- sqrt(mean((gnls_modl - resp)^2, na.rm=TRUE))

                ## Calculate the sd for the gnls parameters
                gfit$cov <- try(
                    solve(
                        -hessian(
                            gtoxObjGnls,
                            gfit$par,
                            lconc=logc,
                            resp=resp
                        )
                    ),
                    silent=TRUE
                )

                if (!is(gfit$cov, "try-error")) { # Could invert gnls Hessian

                    gcov <- 1L
                    gdiag_sqrt <- suppressWarnings(sqrt(diag(gfit$cov)))
                    if (any(is.nan(gdiag_sqrt))) {
                        mapply(
                            assign,
                            gsds,
                            NaN,
                            MoreArgs=list(envir=fenv)
                        )
                    } else {
                        mapply(
                            assign,
                            gsds,
                            gdiag_sqrt,
                            MoreArgs=list(envir=fenv)
                        )
                    }

                } else { # Could not invert gnls Hessian

                    gcov <- 0L
                    mapply(
                        assign,
                        c(gsds),
                        NA_real_,
                        MoreArgs=list(envir=fenv)
                    )

                }

            } else { # Gain-loss did not fit the data

                gnls <- 0L
                gaic <- NA_real_
                gcov <- NA_integer_
                grme <- NA_real_

                mapply(
                    assign,
                    c(gprs, gsds),
                    NA_real_,
                    MoreArgs=list(envir=fenv)
                )

            }

        } else { # None of the response values fell outside 3*bmad

            hill <- NA_integer_
            haic <- NA_real_
            hcov <- NA_integer_
            hrme <- NA_real_
            gnls <- NA_integer_
            gaic <- NA_real_
            gcov <- NA_integer_
            grme <- NA_real_

            mapply(
                assign,
                c(hprs, hsds, gprs, gsds),
                NA_real_,
                MoreArgs=list(envir=fenv)
            )

        }

    } else { # Data has response data for less than four concentrations.

        cnst <- NA_integer_
        cnst_er <- NA_real_
        caic <- NA_real_
        crme <- NA_real_

        hill <- NA_integer_
        haic <- NA_real_
        hcov <- NA_integer_
        hrme <- NA_real_
        gnls <- NA_integer_
        gaic <- NA_real_
        gcov <- NA_integer_
        grme <- NA_real_

        mapply(
            assign,
            c(hprs, hsds, gprs, gsds),
            NA_real_,
            MoreArgs=list(envir=fenv)
        )

    }

    out <- list(
        resp_max     =resp_max,
        resp_min     =resp_min,
        max_mean     =max(rmns),
        max_mean_conc=as.numeric(names(which.max(rmns))),
        max_med      =mmed,
        max_med_conc =mmed_conc,
        logc_max     =logc_max,
        logc_min     =logc_min,
        cnst         =cnst,
        hill         =hill,
        hcov         =hcov,
        gnls         =gnls,
        gcov         =gcov,
        cnst_er      =cnst_er,
        cnst_aic     =caic,
        cnst_rmse    =crme,
        hill_tp      =hill_tp,
        hill_tp_sd   =hill_tp_sd,
        hill_ga      =hill_ga,
        hill_ga_sd   =hill_ga_sd,
        hill_gw      =hill_gw,
        hill_gw_sd   =hill_gw_sd,
        hill_er      =hill_er,
        hill_er_sd   =hill_er_sd,
        hill_aic     =haic,
        hill_rmse    =hrme,
        gnls_tp      =gnls_tp,
        gnls_tp_sd   =gnls_tp_sd,
        gnls_ga      =gnls_ga,
        gnls_ga_sd   =gnls_ga_sd,
        gnls_gw      =gnls_gw,
        gnls_gw_sd   =gnls_gw_sd,
        gnls_la      =gnls_la,
        gnls_la_sd   =gnls_la_sd,
        gnls_lw      =gnls_lw,
        gnls_lw_sd   =gnls_lw_sd,
        gnls_er      =gnls_er,
        gnls_er_sd   =gnls_er_sd,
        gnls_aic     =gaic,
        gnls_rmse    =grme,
        nconc        =ncnc,
        npts         =npts,
        nrep         =nrep,
        nmed_gtbl    =nmed_gtbl,
        ...
    )

    out

}

#-------------------------------------------------------------------------------