gdscIC50_advanced: Pipeline for GDSC Curve Fitting

################################################################################
# Copyright (c) 2015, 2016, 2017 Genome Research Ltd. 
# Copyright (c) 2015, 2016, 2017 The Netherlands Cancer Institute (NKI)
#  
# Author: Howard Lightfoot <cancerrxgene@sanger.ac.uk> 
# Author: Dieudonne van der Meer
# Author: Daniel J Vis
# 
# This file is part of gdscIC50. 
# 
# gdscIC50 is free software: you can redistribute it and/or modify it under 
# the terms of the GNU General Public License as published by the Free Software 
# Foundation; either version 3 of the License, or (at your option) any later 
# version. 
#  
# This program is distributed in the hope that it will be useful, but WITHOUT 
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 
# details. 
#  
# You should have received a copy of the GNU General Public License along with 
# this program. If not, see <http://www.gnu.org/licenses/>. 
################################################################################
#' @import nlme
NULL

groupNlmeData <- function(nlme_data) {
  gDat <- nlme::groupedData(y ~ x | drug/CL, data = nlme_data, FUN = mean,
                      labels = list(x = "Concentration", y = "Viability"),
                      units = list(x = "uM/l", y = "percentage killed"))
  return(gDat)
}

GetXMax <- function(grouped_data, x){
  xmax <- max(grouped_data$x)
  return(xmax)
}

#' logist3
#' 
#' The 2 parameter logistic function used to model GDSC dose response data. This
#' model is described further in published in Vis, D.J. et al. Pharmacogenomics
#'  2016, 17(7):691-700)
#'
#' @param x concentration of drug treatment usually on transformed scale. 
#'   See setXFromConc().
#' @param xmid location parameter of logistic curve
#' @param xmax is the maximum VALUE of x, positioned on a curve's plateau
#' @param scal scale parameter of logistic curve
#'
#' @return a function object of class selfStart
#' @export
logist3 <- stats::selfStart( ~ xmax/(1 + exp(-(x - xmid)/scal)),
                             initial = function(mCall, LHS, data){
                              # This is a constructor function for the class of sortedXyData objects. These objects are mostly used in the initial function for a self-starting nonlinear regression model, which will be of the SelfStart class.  
                               xy <- stats::sortedXyData(mCall[["x"]], LHS, data)
                               if(nrow(xy) < 3) {
                                 stop("Too few distinct input values to fit a logistic")
                                 }
                               xmid <- stats::NLSstClosestX(xy, 0.5*xmax ) 
                               scal <- stats::NLSstClosestX(xy, 0.75*xmax ) - xmid
                               value <- c(xmid, scal)
                               names(value) <- mCall[c("xmid", "scal")]
                               value
                               },
                             parameters = c("xmid", "scal")
                             )

#' fitModel
#' 
#' Used internally by fitModelNlmeData
#' 
#' @param gDat grouped data frame input
#' @param vStart starting parameter values for xmid and scal
#' @param bLargeScale equivalent to isLargeData in fitModelNlmeData
#' @param bSilent verbose output or not
#'
#' @return a fitted nlme model
#'
fitModel <- function(gDat, vStart = c(8.886464, 1.495953 ), bLargeScale=TRUE, bSilent=TRUE){
  # start with sanity checks; catch most common mistakes early
  # first, do we have all requires columns?
  if(length(which(colnames(gDat)=='CL'))==0){
    stop('gDat is required to contain the cell lines names in a column with name CL')  
  }
  if(length(which(colnames(gDat)=='x'))==0){
    stop('gDat is required to contain the X-from concentration step names in a column with name x')  
  }
  if(length(which(colnames(gDat)=='y'))==0){
    stop('gDat is required to contain the relative kill (1-viability) in a column with name y')  
  }
  if(length(which(colnames(gDat)=='drug'))==0){
    stop('gDat is required to contain the drug in a column with name drug')  
  }
  if(length(which(colnames(gDat)=='maxc'))==0){
    stop('gDat is required to contain the maxc (maximum concentration) in a column with name maxc')  
  }
  # coding of relative kill (1-viability) correct; mean of grouped_data$y value at max(grouped_data$x) should be higher than at min(gDat$x)
  tmp.minx <- min(gDat$x)
  tmp.maxx <- max(gDat$x)
  tmp.whichxmin <- which(gDat$x == tmp.minx)
  tmp.whichxmax <- which(gDat$x == tmp.maxx)
  tmp.ymin <- mean(stats::na.omit(gDat$y[tmp.whichxmin]))
  tmp.ymax <- mean(stats::na.omit(gDat$y[tmp.whichxmax]))
  if(tmp.ymin > tmp.ymax){
    stop('Coding of relative viabilities seems incorrect; note that y is defined as 1-viability.')
  }
  if(bLargeScale){
    fmv5 <- nlme::nlme(y ~ gdscIC50::logist3(x, xmid, scal),
                       fixed= xmid+scal~1,
                       random=list(CL = nlme::pdSymm(xmid+scal~1), 
                                   drug = nlme::pdDiag(xmid~1)),
                       data = gDat, start=vStart, method='REML')
    if(!bSilent){
      summary(fmv5)
    }
    return(fmv5)
  }else{
    fmv5 <- nlme::nlme(y ~ gdscIC50::logist3(x, xmid, scal),
                       fixed = xmid + scal ~  1, 
                       random = list(CL = nlme::pdDiag(xmid + scal ~ 1),
                                     drug = nlme::pdDiag(xmid ~ 1)), 
                       data = gDat, start = vStart, method = "REML",
                       control = nlme::nlmeControl(pnlsTol = 0.2,
                                             msVerbose = FALSE,
                                             tolerance=1e-4,
                                             returnObject = T)
                       )
    if(!bSilent){
      summary(fmv5)
    }
    return(fmv5)
  }
}

#' Fit an nlme dose reponse model to GDSC data.
#' 
#' @param nlme_data a data frame as specified by \code{\link{prepNlmeData}}
#' @param isLargeData logical. Default \code{(isLargeData = T)}. Set to \code{F}
#'  if model fitting fails to converge, e.g., 
#'  \code{step halving factor reduced below minimum in PNLS step}
#'  
#'  With this \code{isLargeData = TRUE}, the covariance between the position and
#'  scale parameter on the cell line level are assumed to be correlated. This 
#'  further stabilizes the fit. In small bespoke screens this is set to FALSE as
#'  the model otherwise struggles to converge.
#' 
#' @return a fitted nlme model
#' 
#' @examples 
#' data("gdsc_example")
#' gdsc_example <- removeFailedDrugs(gdsc_example)
#' gdsc_example <- removeMissingDrugs(gdsc_example)
#' gdsc_example <- normalizeData(gdsc_example)
#' gdsc_example <- setConcsForNlme(gdsc_example)
#' nlme_data <- prepNlmeData(gdsc_example, "COSMIC_ID")
#' \dontrun{
#' nlme_model <- fitModelNlmeData(nlme_data, isLargeData = F)
#' }
#' 
#' @seealso \code{\link{prepNlmeData}}, \code{\link{getModelCoef}}, 
#'  \code{\link{calcNlmeStats}}.
#' 
#' @export
fitModelNlmeData <- function (nlme_data, isLargeData = T) {
  gDat <- groupNlmeData(nlme_data)
  nlme_model <- fitModel(gDat, bLargeScale = isLargeData)
  return(nlme_model)
}

logistInit4 <- function(mCall, LHS, data){
    xy <- sortedXyData(mCall[["x"]], LHS, data)
    if(nrow(xy) < 3) {
        stop("Too few distinct input values to fit a logistic")
    }
    # Use inverse linear interpolation to approximate the x VALUE at which the function represented by xy is equal to yval
    xmid <- NLSstClosestX(xy, 0.5*xmax )
    scal <- NLSstClosestX(xy, 0.75*xmax ) - xmid
    ANCHOR_VIAB <- 0.9*xmax
    value <- c(ANCHOR_VIAB,xmid, scal)
    names(value) <- mCall[c("xmid")]
    value
}

#' logist4
#' 
#' The 2 parameter logistic function used to model the combination expected curve. 
#' Takes the anchor viability into account
#'
#' @param x concentration of drug treatment usually on transformed scale. 
#'   See setXFromConc().
#' @param xmid location parameter of logistic curve
#' @param scal scale parameter of logistic curve
#' @param ANCHOR_VIAB the anchor viability
#'
#' @return a function object of class selfStart
#' @export
logist4 <- stats::selfStart( ~ (xmax-ANCHOR_VIAB)+(ANCHOR_VIAB/(1 + exp(-(x - xmid)/(scal)))), 
                             initial = logistInit4, parameters = c("xmid"))


logistInit5 <- function(mCall, LHS, data)
{
    xy <- sortedXyData(mCall[["x"]], LHS, data)
    if(nrow(xy) < 3) {
        stop("Too few distinct input values to fit a logistic")
    }
    xmid <- NLSstClosestX(xy, 0.5*xmax )
    scal <- NLSstClosestX(xy, 0.75*xmax ) - xmid
    # ANCHOR_VIAB <- 0.99
    value <- c(ANCHOR_VIAB,scal,xmid)
    names(value) <- mCall[c("xmid","scal")]
    value
}

#' logist5
#' 
#' The 2 parameter logistic function used to model combination observed curve. 
#'
#' @param x concentration of drug treatment usually on transformed scale. 
#'   See setXFromConc().
#' @param SYNERGY_XMID location parameter of logistic curve of the combination observed
#' @param LIBRARY_SACL scale parameter of logistic curve for the combination observed comes
#'   from the library alone.
#' @param ANCHOR_VIAB the anchor viability
#'
#' @return a function object of class selfStart
#' @export
logist5 <- stats::selfStart( ~ (xmax-ANCHOR_VIAB)+(ANCHOR_VIAB/(1 + exp(-(x - xmid)/(scal)))), 
                             initial = logistInit5, parameters = c("xmid","scal"))

#' getIntegral
#'
#' @param x concentration of drug treatment usually on transformed scale. 
#'   See setXFromConc()
#' @param xmid location parameter of logistic curve
#' @param scal scale parameter of logistic curve
#'
#' @return integral value
#' @export
getIntegral <- function(x,xmid,scal){
  a <- xmid
  b <- scal
  return(b*log(exp(a/b)+exp(x/b))-a)
}

yemelianovskyi/gdscIC50_advanced documentation built on Jan. 30, 2020, 12:27 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

yemelianovskyi/gdscIC50_advanced
Pipeline for GDSC Curve Fitting

R/nlme_fit.R
In yemelianovskyi/gdscIC50_advanced: Pipeline for GDSC Curve Fitting

Defines functions groupNlmeData GetXMax fitModel logistInit4 logistInit5 getIntegral

R Package Documentation

Browse R Packages

We want your feedback!

yemelianovskyi/gdscIC50_advanced Pipeline for GDSC Curve Fitting

R/nlme_fit.R In yemelianovskyi/gdscIC50_advanced: Pipeline for GDSC Curve Fitting

Defines functions groupNlmeData GetXMax fitModel logistInit4 logistInit5 getIntegral

R Package Documentation

Browse R Packages

We want your feedback!

yemelianovskyi/gdscIC50_advanced
Pipeline for GDSC Curve Fitting

R/nlme_fit.R
In yemelianovskyi/gdscIC50_advanced: Pipeline for GDSC Curve Fitting