smt-beta: Statistical analysis of single molecule trajectories

Documented in .one.comp.fit.rt

#### fitResidenceTime.R
#### Wu Lab, Johns Hopkins University
#### Author: Xiaona Tang
#### Reference: fitCDF() by Sheng Liu
#### Date: Sep 20, 2017

## fitRT-methods
##
###############################################################################
##' @name fitRT
##' @aliases fitRT
##' @title Fitting Residence time (1-CDF)
##' @rdname fitRT-methods
##' @docType methods
##' @description Caclulate average residence time for trajecotries by  
##' fitting 1-CDF (survival distribution).
##'
##' @usage
##'
##' fitRT(trackll=NULL,x.max=30,N.min=1.5,t.interval=0.5,maxiter.search=1e3,
##' maxiter.optim=1e3,k.ns=FALSE)
##'
##' @param trackll trajectory list generated by createTrackll() and 
##' processing. if NULL, user will be prompted to enter the trackll name.
##' @param x.max The maximum range of X axis, i.e. time, for the output plot. 
##' Default 30 sec.
##' @param N.min The minimal duration of trajectory length in the unit of s
##' econd, trajectories shorter than N.min will be filtered out. 
##' Default 1.5 sec.
##' @param t.interval time interval for image aquisition. Default 0.5 sec.
##' @param k.ns Logical indicate or numeric used to control fitting.
##' @param maxiter.optim A positive integer specifying the maximum number of 
##' iterations allowed for nls.
##' @param maxiter.search A positive integer specifying the maximum number of 
##' iterations allowed for nls.
##' @return
##' \itemize{
##' \item{On the Console output:} Result of both one and two-component fit and 
##' parameters of goodness of the fit.
##' \item{Plot:} fitting curves will be plotted over the raw data, with the 
##' number of tracks and result of two-component fitting shown in the plot 
##' area.
##' }
##' @details Calculating average residence time of particles by fitting 1-CDF 
##' (survival distribution) of its trajectorys.
##'
##' Input track list (trackll) should be processed, merged trackll, the 
##' fitting will start right away after running the function. 
##' The maximum time range to be plotted can be set using x.max, this will not 
##' change the fitting result. The result of two-component fit will be shown 
##' on the plot as well as in the console.
##' Fitting result is determined by the input trackll, and N.min (filtering of 
##' the trackll).

##'
##' @examples
##'
##' # Generate trackll, and process, 
##' # e.g. mask region of interest, merge tracks from multiple files.
##' folder=system.file("extdata","HSF",package="sojourner")
##' trackll=createTrackll(folder=folder, input=3)
##' trackll=maskTracks(folder,trackll)
##' trackll=mergeTracks(folder,trackll)
##'
##' # Fit the residence time of trackll
##' fitRT(trackll=trackll,x.max=30,N.min=1.5,t.interval=0.5)

##' @importFrom mltools empirical_cdf
##' @export fitRT

###############################################################################
###############################################################################


## Function for one component 1-CDF fitting. 
## Initial values are not flexible for users, but fitting result is quite 
## consistent.
.one.comp.fit.rt=function(name,t,P,t.interval=t.interval,
                          start=list(k.s=c(1/600,1/t.interval)),
                          maxiter.optim=1e3){
    
    ## Equation for one component 1-CDF fitting. shift the values of time 
    ## points
    ## corresponding to each P value, 
    ## so that the first time point is 0. This would make the fitting more 
    ## accurate.
    p1 =function(t,k.s){
    exp(-k.s*(t-min(t)))}

    title1=paste("One components fit for ",name)
    cat("\n\n","==>",title1,"\n")

    # Determine an initial value

    k.s=truncnorm::rtruncnorm(1,a=data.frame(start)[1,],
                              b=data.frame(start)[2,])
    # this defaults normal distribution with mean = 0, sd = 1

    start=list(k.s=k.s)

    # fit equation 1 to data P
    ocfit=nls(P ~ p1(t,k.s),
              start=start,control = nls.control(maxiter = maxiter.optim))
    print(ocfit);cat("\n")

    ### tring to avoid r cmd check warning
    ## plot the fitting curve over raw data. Shift the time points back.
    #p1.model =function(x,k.s){exp(-k.s*x-min(t))}
    p1_model=function(x){exp(-coef(ocfit)["k.s"]*(x-min(t)))}
    curve(p1_model,
    #p1.model=function(t,k.s){
    #    exp(-k.s*t)}
    #curve(p1.model(x-min(t),
    #               coef(ocfit)["k.s"]),
    add=TRUE,col="green",lwd=4
    )

    return(ocfit)
}

## Function for two component 1-CDF fitting.
## Initial values are not flexible for users, but fitting result is quite 
## consistent.
## There is an option to fix the value of one of the two component k.ns.
.two.comp.fit.rt=function(name,t,P,t.interval,
                          start=list(k.s=c(1/600,1/t.interval),
                                     k.ns=c(1/600,1/t.interval),
                                     alpha=c(1e-3,1)),maxiter.search=1e3,
                          maxiter.optim=1e3,k.ns=FALSE){

    ##Equation for two component 1-CDF fitting. shift the values of time points
    ##corresponding to each P value, so that the first time point is 0. This
    ##would make the fitting more accurate.
    p3 =function(t,k.s,k.ns,alpha){
    alpha*exp(-k.s*(t-min(t))) + (1-alpha)*exp(-k.ns*(t-min(t)))}

    title2=paste("Two components fit for ",name)
    cat("\n\n","==>",title2,"\n")

    #cat("\nBrute force random search start value...\n\n")
    k.search.tcfit=nls2::nls2(P ~ p3(t,k.s,k.ns,alpha),start=data.frame(start),
                            # algorithm="brute-force",
                            algorithm="random-search",
                            control = nls.control(maxiter = maxiter.search))

    print(coef(k.search.tcfit))

    ## Local optimization using nlsLM.
    cat("\nLocal optimization...\n\n")
    if(k.ns == FALSE){
    tcfit=minpack.lm::nlsLM(P ~ p3(t,k.s,k.ns,alpha),
                            start=coef(k.search.tcfit),
                            lower=c(0,0,0),
                            upper=c(Inf,Inf,1))
    }
    ## Use a fixex k.ns value given by user.
    else{
    tcfit=minpack.lm::nlsLM(P ~ p3(t,k.s,k.ns,alpha),
                            start=coef(k.search.tcfit),
                            lower=c(0,k.ns,0),
                            upper=c(Inf,k.ns,1))
    }

    print(tcfit);cat("\n")

    ## Plot the fitting curve over raw data. Shift the time points back.
    p3_model = function(x){
        coef(tcfit)["alpha"]*exp(-coef(tcfit)["k.s"]*(x-min(t))) + 
            (1- coef(tcfit)["alpha"])*exp(-coef(tcfit)["k.ns"]*(x-min(t)))
    }
    #p3.model =function(t,k.s,k.ns,alpha){
    #  alpha*exp(-k.s*t) + (1-alpha)*exp(-k.ns*t)}
    curve(p3_model,
    #curve(p3.model(x-min(t),
    #               coef(tcfit)["k.s"],
    #               coef(tcfit)["k.ns"],
    #               coef(tcfit)["alpha"]),
        add=TRUE,col="red",lwd=4
    )
    ## Usually two-component fitting is better. Output this fitting result on
    ## the final plot as figure legend.
    par(mar=c(0, 0, 0, 0),xpd=FALSE)
    if (coef(tcfit)["k.s"]<coef(tcfit)["k.ns"]){
    result.text=c(
        expression(italic(P(t) == alpha*e^-k[s]*''^t+(1-alpha)*e^-k[ns]*''^t)),
                    as.expression(
                        bquote(italic('k'['s']==.(coef(tcfit)["k.s"])))),
                    as.expression(
                        bquote(italic('k'['ns']==.(coef(tcfit)["k.ns"])))),
                    as.expression(
                        bquote(italic(alpha==.(coef(tcfit)["alpha"])))),
                    as.expression(
                        bquote(italic('RSS'==.(deviance(tcfit))))),
                    " "
    )
    }
    else {
    result.text=c(
        expression(italic(P(t) == alpha*e^-k[s]*''^t+(1-alpha)*e^-k[ns]*''^t)),
                    as.expression(
                        bquote(italic('k'['s']==.(coef(tcfit)["k.ns"])))),
                    as.expression(
                        bquote(italic('k'['ns']==.(coef(tcfit)["k.s"])))),
                    as.expression(
                        bquote(italic(alpha==.(1-coef(tcfit)["alpha"])))),
                    as.expression(
                        bquote(italic('RSS'==.(deviance(tcfit))))),
                    " "
    )
    }
    legend("right",legend=result.text,pch=NULL,y.intersp=0.3,x.intersp=0.3,
           bty="n",cex=1)
    par(mar=c(5.1, 5.1, 4.1, 4.1),xpd=FALSE)

    return(tcfit)
}


## Master function for residence time (1-CDF) fitting.
fitRT=function(trackll=NULL,x.max=30,N.min=1.5,t.interval=0.5,
               maxiter.search=1e3,
                maxiter.optim=1e3,k.ns=FALSE){

    ############ Get trajectory information####################
    if(is.null(trackll)){
        trackll.label<-readline(cat("Enter the trackll you want to fit:    "))
        name=names(get(paste(trackll.label)))
        name=gsub("ST_","",name)
        title=paste("Residence time fitting of ",name,sep="")
        trackll<-get(paste(trackll.label))
    } else {
        name=names(trackll)
        name=gsub("ST_","",name)
        title=paste("Residence time fitting of ",name,sep="")
    }
    filter=c(min=N.min/t.interval,max=Inf)
    trackll<-filterTrack(trackll,filter=filter)

    ########## Generate 1-CDF of dwell time (trajectory length) ###############
    #library(mltools)
    trajLength<-vapply(trackll[[1]],function(x){(
        x$Frame[dim(x)[1]]-x$Frame[1]+1)*t.interval}, FUN.VALUE=double(1))
    CDF<-mltools::empirical_cdf(trajLength,ubounds=seq(t.interval, 
                                                       max(trajLength), 
                                                       by=t.interval))
    P<-(1-(CDF$CDF))
    ## Remove multiple "1" of P values, leaving only one "1" value.
    ## The rest P values will be used for fitting.
    if(length(which(P == 1))>=2){
    P.fit<-P[-(seq_len(length(which(P == 1))-1))]
    }else{
    P.fit<-P
    }
    ## Set the x axis range according to user input (t.plot), only the part in 
    ## the range will be plotted.
    ## However, all the P.fit and t.fit values will be used for fitting.
    t.plot<-seq(t.interval,x.max,by=t.interval)
    if(length(which(P == 1))>=2){
    t.fit<-seq(t.interval*length(which(P == 1)),max(trajLength),by=t.interval)
    }else{
    t.fit<-seq(t.interval,max(trajLength),by=t.interval)
    }

    ##### Plot raw data ###############
    par(mar=c(5.1, 5.1, 4.1, 4.1),xpd=FALSE)
    par(mfrow=c(1,1),bg="white",fg="black")
    plot(t.plot,P[seq_along(t.plot)],main=title,xlab="Time (s)",
         ylab="Uncorrected survival probability (1-CDF)",
        cex.main=1.5,xlim=c(0,x.max),ylim=c(0,max(P)),cex.axis=1.5,cex.lab=1.5,
        pch=20,cex=1.5)

    ###### Fitting and add fitting curves over raw data###############
    result.1=.one.comp.fit.rt(name,t=t.fit,t.interval,
                              P=P.fit,start=list(k.s=c(1/600,1/t.interval)),
                            maxiter.optim=maxiter.optim)
    if(k.ns == FALSE){
    result.2=.two.comp.fit.rt(name,t=t.fit,t.interval,P=P.fit,
                              start=list(k.s=c(1/600,1/t.interval),
                                         k.ns=c(1/600,1/t.interval),
                                         alpha=c(1e-3,1)),
                                maxiter.search=maxiter.search,
                                maxiter.optim=maxiter.optim,k.ns=k.ns)
    }
    ## Use a fixex k.ns value given by user.
    else{
    result.2=.two.comp.fit.rt(name,t=t.fit,P=P.fit,
                              start=list(k.s=c(1/600,1/t.interval),
                                         k.ns=k.ns,alpha=c(1e-3,1)),
                                maxiter.search=maxiter.search,
                                maxiter.optim=maxiter.optim,k.ns=k.ns)
    }


    ####### Add legend to the plot ##################
    legend("topright",legend=c("Raw data","One component fit",
                               "Two component fit",
                               paste0("n = ",length(trackll[[1]]))),
            pch=NA,lty=c(3,1,1,NA),lwd=4,col=c("black","green","red"),cex=1,
            y.intersp=0.5,x.intersp=0.3,bty = "n")
    legend("bottomleft",legend=bquote(italic('N'['min']==.(N.min))*' s'),
           bty = "n")

    return(list("One-Component Fit"=result.1,"Two-Component Fit"=result.2))
}