cellGrowth: Fitting cell population growth models

#' Fit growth curves for multiple wells
#' 
#' Essentially a wrapper for \code{\link{fitCellGrowth}}. The function gets a well object and fits
#' a growth curve on all wells. It computes the doubling frequency observed in a well and extracts
#' the maximal growth rate ( 1/minimal doubling time).
#' The raw values from the machine might not be directly optical densities (OD),
#' which is needed to infer doubling time. Calibration functions for each machine can be
#' provided to map raw values into OD using the \code{calibration} parameter.
#' If the parameter plot.folder is set, the function creates a folder within plot.folder
#' for each file in the well object. For each well a plot is written into that folder,
#' named \code{well_id.png}. 
#' @title Fit multiple growth curves
#' @param well data.frame with mandatory columns directory, filename, well. See \code{\link{wellDataFrame}}
#' @param plot.folder see details
#' @param model model to choose for fitting growth curve
#' @param fileParser Converts the file generated by the machine to
#' proper R format. See \code{\link{readYeastGrower}} for details.
#' @param xlab plot parameter
#' @param ylab plot parameter
#' @param scaleX useful if you want to get the doubling in another unit, e.g. days instead
#' of seconds.
#' @param scaleY \code{function} applied to the calibrated data.
#' @param calibration \code{function} or \code{list} of \code{functions}. If function, calibration
#' is applied to all raw data. If list, the well dataframe must contain a column \code{machine}.
#' Depending on that column the according function in the list is applied to the raw data. See details
#' @param getWellIds function or vector. If function its parameter is
#' the return value of fileParser. It should return a vector containing
#' the well ids (e.g. A01, A02, ...). You can set the well ids vector directly.
#' See \code{\link{getWellIdsTecan}}.
#' @param locfit.h bandwidth parameter for local polynomial fitting. If set to "bandwidthCV" bandwidth
#' is automatically selected through \code{\link{bandwidthCV}}
#' @param bandwidths passed to \code{\link{bandwidthCV}} if locfit.h="bandwidthCV"
#' @param nFold passed to \code{\link{bandwidthCV}} if locfit.h="bandwidthCV"
#' @param nWell passed to \code{\link{bandwidthCV}} if locfit.h="bandwidthCV"
#' @param cutoff passed to \code{\link{bandwidthCV}} if locfit.h="bandwidthCV"
#' @param ... Parameter is passed to \code{\link{fitCellGrowth}}
#' @export
#' @return dataframe with entries
#' \item{maxGrowthRate}{maximal growth rate}
#' \item{pointOfMaxGrowthRate}{datapoint where growth rate is maximal}
#' \item{max}{inferred maximum among the time points}
#' \item{pointOfMax}{datapoint of the max fitted value}
#' @author Julien Gagneur and Andreas Neudecker
#' @seealso \code{\link{fitCellGrowth}}
#' @examples
#' plateLayout <- system.file("extdata", "plateLayout.txt", package="cellGrowth")
#' machineRun <- system.file("extdata", "machineRun.txt", package="cellGrowth")
#' well <- wellDataFrame(plateLayout,machineRun)
#' cal <- function(x){x+1}
#' fit <- fitCellGrowths(well,plot.folder="data",calibration=cal)
#' 

fitCellGrowths = function(
		well, 
		plot.folder=NULL,
		model = "locfit",
		xlab="time",
		ylab=expression(log2( Absorption )),
		scaleX = 1,
		scaleY = log2,
		calibration=identity,
		fileParser=readYeastGrower,
		getWellIds=getWellIdsTecan,
		locfit.h=3*60*60,
		bandwidths=seq(0.5*3600,10*3600, length.out=30),
		nFold=10,
		nWell=100,
		cutoff=0.95,
		...
){
	if ( locfit.h == "bandwidthCV" )
	{
		bw = bandwidthCV(
				well,
				fileParser=fileParser,
				getWellIds=getWellIds,
				bandwidths=bandwidths,
				calibration=calibration,
				nFold=nFold,
				nWell=nWell,
				cutoff=cutoff
		)
		locfit.h = bw$bandwidth
	}
	## sanity check on nWell
	if ( nWell > nrow(well))
	{
		warning("Not enough wells in the well object to select ",nWell," out of them. nWell is set to nrow(well)= ",nrow(well))
		nWell = nrow(well)		
	}
	
	
	
    ## proceed by unique file
    files = unique( file.path(well$directory, well$filename))
    cat("treating", length(files), "unique tecan files.\n")
    mu = pointOfMu = max = pointOfMax = rep(as.numeric(NA), length=nrow(well))
    for(f in files){
        cat("treating file", f, "\n")
       
        if(!is.null(plot.folder))
            if(!file.exists(plot.folder)) dir.create(plot.folder)
        
		## get data from file
        dat = fileParser(f)
		
        ## use aliases to match to well index
		if ( is.function ( getWellIds ))
        	wellIds = getWellIds( dat )
		else
			wellIds = getWellIds
		
        x = dat$time
		
		## to cope with a recent bug of the tecan reader software
        if(any(is.na(x)))
			stop("time ended up with an NA. Likely due to a duplicated OD value for one of the time point. Check and correct the file:", f)
		
        for(w in as.character(well$well[file.path(well$directory, well$filename)==f])){			
            wh = which(file.path(well$directory, well$filename)==f & well$well == w)
            if(length(wh)>1)
                stop("2 well annotations for same well", well[wh,])
			
			## calibration
			if ( !is.function(calibration) && !is.list(calibration))
			{
				warning(paste("No valid calibration function for file",f,", well",w,". No calibration is done."))
				y = dat$OD[[ match(w, wellIds)]]
			}
			if ( is.function(calibration))
				y = sapply(dat$OD[[ match(w, wellIds) ]],calibration)
			if ( is.list(calibration))
			{
				machine = well$machine[wh]
				if ( is.null( machine ) || is.null(calibration[[machine]]) )
				{
					if ( is.null( machine ) )
						warning(paste("Machine not found in dataframe for file",f,", well",w,". No calibration is done."))
					if (is.null(calibration[[machine]]))
						warning(paste("No calibration function for the machine",machine,"in file",f,", well",w,". No calibration is done."))
					y = dat$OD[[ match(w, wellIds)]]
				}
				else
					y = sapply(dat$OD[[ match(w, wellIds) ]],calibration[[machine]])
				
			}
			## negative ODs?
			if ( !all(y>=0))
			{
				warning("Negative ODs found. If you are using a calibration function, this might be the problem. Values are set to NAs")
				y[y<0] = NA
			}
			
            z = scaleY( y )
            ii = !is.nan(z)
            if( any( is.nan(z) ) )
                warning("some NaN in log2(OD). Likely negative OD values. Fit on non-NaN values")
            
            ## fit
            fit = fitCellGrowth( x = x[ii], z = z[ii], model = model, locfit.h=locfit.h, ...)
            if( !is.null(attr(fit,"class"))){
                mu[wh] = attr(fit, "maxGrowthRate")
				pointOfMu[wh] = attr(fit,"pointOfMaxGrowthRate")
                max[wh] = attr(fit, "max")
				pointOfMax[wh] = attr(fit,"pointOfMax")
            }
            ## plot
            if(!is.null(plot.folder)){
				if(!file.exists(file.path(plot.folder, paste(well$filename[wh],"_plot",sep="")))) 
					dir.create(file.path(plot.folder, paste(well$filename[wh],"_plot",sep="")))
                png( file.path(plot.folder, paste(well$filename[wh],"_plot",sep=""),paste(w, ".png",sep="") ) )
				plot(
						fit, 
						scaleX = scaleX,
						ylab = ylab,
						xlab=xlab,
						main = paste(well$filename[wh],"\n", w)
				)
                dev.off()
            }
            
        }
    }
    
    return (
			data.frame(
					maxGrowthRate=mu,
					pointOfMaxGrowthRate=pointOfMu,
					max=max,
					pointOfMax=pointOfMax
	))
}