R/Dcoef.Helpers.R

Defines functions Dcoef.log rsquare.filter.roll rsquare.filter Dcoef.perc Dcoef.roll Dcoef.static

## Dcoef helpers

## < r^2 > = 2 * d * D * △t = 4 D △t

## < r^2 >, mean square displacement
## d, dimensionality of the problem
## D, diffusion coefficient
## t, time

## D △t = < r^2 > / 4


# divide the values by 2 and 2
# first 2 comes from Einstein Brownian motion equation k = sqrt(2 * D * dT)
# second 2 comes from dimension

##------------------------------------------------------------------------------
## Dcoef.static

## return a list of coefficients
Dcoef.static=function(MSD,lag.start=2,lag.end=5,t.interval=0.010){

    # linear fitting of the MSD curves between dt 2 and 5
    cat("lag.start  ",lag.start,"\t","lag.end  ",lag.end,"\n")

    # specifying x
    x=(lag.start:lag.end)*t.interval
    dimension=2

    # fitting y to x
    D.coef=lapply(MSD,function(msd){
            apply(msd[lag.start:lag.end,],MARGIN=2,function(y){
                fit=lm(y~x)
                MSDslope=coefficients(fit)[2]/(2*dimension)
                MSDcorr=summary(fit)$r.squared
                sc=c(MSDslope,MSDcorr)
                names(sc)=c("slope","corr")
                return(sc)
            })
        })
    # this is the lapply-for setup, see below for alternative for-lapply setup
    # and its comparison

    # change shape of the matrix
    D.coef = lapply(D.coef, function(x) {
        t(x)
    })
    
    return(D.coef)
}

# see below for alternative for-lapply setup; reverse lapply-for setup is
# better, list are named and can easily be paralleled; the cons being it can
# only pass in one parameter, if more than two is not as easily setup

# for-lapply setup
#     D.coef=list()
#     for (i in 1:length(MSD)){
#         D.coef[[i]]=apply(MSD[[i]][lag.start:lag.end,],MARGIN=2,function(y){
#             fit=lm(y~x)
#             MSDslope=coefficients(fit)[2]/2/dimension
#             MSDcorr=summary(fit)$r.squared
#             sc=c(MSDslope,MSDcorr)
#             names(sc)=c("slope","corr")
#             return(sc)
#         })
#
#     }
#     names(D.coef)=names(MSD)

# change shape of the matrix
#     D.coef=sapply(D.coef,function(x){
#         x=t(x)
#         # colnames(x)=c("slope","corr")
#         return(x)
#     },simplify = F)



##------------------------------------------------------------------------------
## .Dcoef.roll

## cant use roll on MSD method=percentage, as its MSD is different
## length, MSD method = percentage is calculated differently, using
## msd.track.vecdt(), instead of msd().

Dcoef.roll=function(MSD,window.size=4,t.interval=0.010){

    D.coef=list()
    D.coef.roll=list()
    names.D.coef.roll=c()
    window = seq_len(window.size)
    dt=dim(MSD[[1]])[1]
    dimension=2


    for (i in seq_along(MSD)) {

        # zero stands for the first window as subsetting using window+j
        for ( j in 0:(dt-window.size)){
            print(window+j)
            x=window+j
            D.coef.roll[[j+1]]=apply(MSD[[i]][window+j,],
                                     MARGIN=2,
                                     function(y){
                                         x=x*t.interval
                                         fit=lm(y~x)
                                             MSDslope = 
                                                 coefficients(fit)[2]/2/dimension
                                         MSDcorr=summary(fit)$r.squared
                                         sc=c(MSDslope,MSDcorr)
                                         names(sc)=c("slope","corr")
                                         return(sc)
                                     })
            names.D.coef.roll=c(names.D.coef.roll,
                                paste(as.character(window+j),collapse=" "))
        }

        names(D.coef.roll)=names.D.coef.roll

        D.coef[[i]]=D.coef.roll
        names.D.coef.roll=c()
    }
    names(D.coef)=names(MSD)


    # change shape of the matrix
    for (i in seq_along(D.coef)) {
        for (j in seq_along(D.coef[[i]])) {
            D.coef[[i]][[j]]=t(D.coef[[i]][[j]])
        }
    }

#     # alternative
#     D.coef=lapply(D.coef,function(x){
#         sapply(x,function(x){t(x)},simplify = F)
#
#     })

    return(D.coef)
}



# alternative treat Dcoef.roll as special case of Dcoef.static, except
# start and end is rolling

# D.coef.roll=function(MSD,window.size=4,t.interval=0.010){
# 
#     dt=dim(MSD[[1]])[1]
# 
#     start=1:(dt-window.size+1)
#     end=lag.start+window.size-1
# 
#     D.coef=list()
#     D.coef.vec=c()
#     for (i in start){
#         dd=Dcoef.static(MSD,lag.start=start[i],lag.end=end[i],t.interval=0.010)
#         D.coef.vec=c(D.coef.vec,list(dd))
#     }
# 
# 
#     # rearrange
#     D.coef.vec
# 
#     mapply(c,start,end)
# 
#     # generate names
#     name.vec=c()
#     for (i in start) {
#         name=paste(start[[i]]:end[[i]],collapse=" ")
#         name.vec=c(name.vec,name)
#     }
# 
#     names(D.coef.vec)=name.vec
# 
# 
# #n=c()
#     # take the first item of the list to form a new list
# #     for (i in start){
# #         for (file.number in 1:length(MSD)){
# #             n=c(n,list(D.coef.vec[[i]][file.number]))
# #         }
# #
# #     }
# #
# #     n=c()
# #     for (file.number in 1:length(MSD)){
# #
# #         for (i in start){
# #
# #             n=c(n,list(D.coef.vec[[i]][file.number]))
# #
# #         }
# #     }
# #
# 
# 
# 
#     name2=names(MSD)
#     l=list()
#     for (j in name2 ){
#         ind=which(name2 == j)
#         l[[ind]]=lapply(D.coef.vec,function(x,i){x[i]},i=j)
#     }
# 
#     names(l)=name2
# 
#     # remove last level of list
#     z=l
#     for (i in 1:length(l)){
#         for (j in 1:length(l[[i]])){
#             z[[i]][j]=l[[i]][[j]]
#         }
#     }
# 
#     #z[[1]][[1]]=l[[1]][1]
# 
# }





##------------------------------------------------------------------------------
## percentage
## To determine the diffusion constant from a trajectory, a line was fit to MSD(nt) with n running from 1 to the largest integer less than or equal to L/4 (Saxton, 1997)..

## "The short-range diffusion coefficients D*(0: 4) and D*(O: 8) are well determined; the longest-range diffusion coefficients D*(0: 512) and D*(0: 1024) are so broadly dis- tributed as to be useless (Fig. 2 a)" (Saxton, 1997)
## from this the maximum track length used for determining diffusion coefficient should be restricted to 32, which yeilds 1/4*32=8. We can then use the trimTrack() to realize this cut-off.

## 0~4 frames equals 4 steps, so the minimum frame taken into account in sojourner's numbering system (which start with frame 1 rather than 0) should be 1~5, D(1:5) and D(1:9) to allow sampling from 0~4. Anything below 8, should be directly using percentage =1, what tracks that has length 9

# "D*(2: 4), the short-range diffusion coefficient used by Kusumi et al. (1993).
# A short-range D* has the advantages that it is accurately obtained and the
# influence of directed and confined motion is minimized. D*(2: 4) advantageous
# for analyzing experi- mental data. The range of D is wide enough that it is
# convenient to plot the distribution of log D. (Saxton, 1997)

# any track length >32, take 32
# 22~32, percentage 1/4, use first 5~8 points, excluding initial point (ie.2~6-2~8);
# 15~21, percentage 0.4, use first 5~8 points, excluding initial point (ie.2~6-2~8);
# 10~14, percentage 0.6, use first 5~8 points, excluding initial point (i.e.2~6-2~8);(1)
# 7~9, percentage 1, use all 5~9 frames, excluding initital point (i.e dt 2~6-2~8).(2)
# 5~6, percentage 1, use all points (i.e. dt 2~4-2~5)(3)

# (1) 10*0.6 = 6 frames, 6 time lags, exclude 1st, 5 points for fitting
# (2) 7 frames, 6 time lags, exclude 1 st, 5 points for fitting.
# (3) 5 frames, 4 time step, remove 2, left 2, use all points if less than 7

# this tired percentage setup makes it always use first 2~4-2~8 points for fitting and deriving diffusion coefficient.



Dcoef.perc = function(trackll, percentage = 0.25, weighted = FALSE, 
    filter = c(min = 7, max = Inf), trimmer = c(min = 1, max = 31), 
    resolution = 0.107, t.interval = 0.01) {

    # calculate msd using msd.perc()
    msd.lst = msd.perc(trackll, percentage = percentage, filter = filter, 
        trimmer = trimmer, resolution = resolution, output = FALSE)

    # exclude the first time lag for fitting for all category
    msd.remove1st=lapply(msd.lst,function(x){
        for (i in seq_along(x)) {
            x[[i]] = x[[i]][-1]
        }
        return(x)
    })


#     # the reverse setup is not as convenient
#     msd.remove2st=list()
#     for (i in 1:length(msd.list)){
#         msd.remove2st[[i]]=lapply(msd.list[[i]],function(x){
#             x=x[-1]
#         })
#     }
#     names(msd.remove2st)=names(msd.lst)


    # copy trackll's structure
    D.coef=list()
    length(D.coef)=length(msd.lst)
    names(D.coef)=names(msd.lst)


    dimension=2


    ## this works for trajecotries length >=7 frames
    for (i in seq_along(msd.remove1st)) {
        
        for (j in seq_along(msd.remove1st[[i]])) {

            y=msd.remove1st[[i]][[j]]
            len=length(msd.remove1st[[i]][[j]])

            # exclude 1st x value for fitting
            x = seq(from = t.interval * 2, to = (len + 1) * t.interval, 
                by = t.interval)
            
            if (weighted == TRUE) {
                w = seq_len(len)
                fit = lm(y ~ x, weights = w)
            }else{
                fit=lm(y~x)
            }

            MSDslope=coefficients(fit)[2]/2/dimension
            MSDcorr=summary(fit)$r.squared
            sc=c(MSDslope,MSDcorr)
            names(sc)=c("slope","corr")
            D.coef[[i]][[j]]=sc
        }

    }


    # lapply can't be used as need two variable function
    # however you can use lapply and for loop inside

#     for (i in 1:length(y)){
#         lapply(y[[i]],function(x,t.interval){
#             len=length(x)
#
#         })
#    }


    # this changes shape/format into a matrix
    D.coef = lapply(D.coef, function(x) {
        do.call(rbind, x)
    })

    return(D.coef)

}


# # an attempt to incorperate trajecotry has length less than 6
#     # remove first element of msd.lst
#     msd.remove1st=lapply(msd.lst,function(x){
#         for (i in 1:length(x)){
#
#             # if 3=<frames<=6 are selected, all points needs to be used
#
#             if (length(x[[i]])<=3) {stop(
#                 "tracks must at least have 3 time steps (i.e. 4 frames) for confident coefficient fitting, please filter track first.")
#
#             }else if (length(x[[i]])>=5 & length(x[[i]])<=6){
#                 x[[i]]=x[[i]]
#             }else{
#                 x[[i]]=x[[i]][-1]
#             }
#
#         }
#         return(x)
#     })
#






# x=lst()
# for (i in 1:length(D.coef)){
#     x[[i]]=do.call(rbind,D.coef[i])
# }
#
# dx=sapply(D.coef,function(x){
#     do.call(rbind,x)})
# # this returns a matrix
#
# }
# names(D.coef)=names(MSD)


# Weights are set to be the number of points (length of trajectory?)
# averaged to generate the mean square displacement value at the given
# delay (in this case, it is the 25%). Thus, we give more weight to MSD
# curves with greater certainty (larger number of elements averaged).

# weights are essentially the lenght of the msd list

#     % - M the weighted mean of MSD for each delay
#     % - STD the weighted standard deviation
#     % - N the number of degrees of freedom in the weighted mean
#     % (see http://en.wikipedia.org/wiki/Weighted_mean)

# plot those coef and get the mean of all

# The only requirement for weights is that the vector supplied must be
# the same length as the data.

# simplest weights index of the msd (as it shows how many points is
# used to generate the msd, steps)

# more sophistacted  1/theta^2 (variance)



# would be nice to have a subsetting method for a sojourner class,
# there are so many levels of subsetting, each time it needs a lapply

## instead of trackll, it maybe better to store tracks in data.table,
## then folders instead of (second) list of data.frame, it may be worth
## the effort simply making it a data.frame (data.table) with 4th column
## as trajectory numbers.

## it makes program (maybe) easier, computation faster

##------------------------------------------------------------------------------
## rsquare.filter
## r.squared >= rsquare as quality control

##export rsquare.filter
rsquare.filter=function(D.coef,rsquare=0.8){

    cat("\nApplying r square filter...",rsquare,"\n")

    slope=lapply(D.coef,function(x){x[,"slope"]})   # x[colnames(x) == "slope"]
    corr=lapply(D.coef,function(x){x[,"corr"]})    # x[colnames(x) == "corr"]

    # the "still" molecule wil generate a NA in correlation, thus is.na(x) == F
    corr.filter=lapply(corr,function(x){x>=rsquare & is.na(x) == FALSE})

    
    # add corr and slope in the output

    # mapply("[",D.coef,corr.filter,SIMPLIFY=F)
    # directly mapply to D.coef, lost the matrix structure

    D.coef.slope.subset = mapply("[", slope, corr.filter, SIMPLIFY = FALSE)
    D.coef.corr.subset = mapply("[", corr, corr.filter, SIMPLIFY = FALSE)
    D.coef.subset = mapply(cbind, D.coef.slope.subset, D.coef.corr.subset, 
        SIMPLIFY = FALSE)
    
    # add colnames
    D.coef.subset=lapply(D.coef.subset,function(x){
        colnames(x)=c("slope","corr")
        return(x)
    })

    return(D.coef.subset)
}

rsquare.filter.roll=function(D.coef,rsquare=0.8){
    D.coef.subset=list()
    length(D.coef.subset)=length(D.coef)
    names(D.coef.subset)=names(D.coef)
    for (i in seq_along(D.coef)) {
        for (j in seq_along(D.coef[[i]])) {
            D.coef.subset[[i]][j] = rsquare.filter(D.coef[[i]][j], 
                rsquare = rsquare)
            #names(D.coef.subset[[i]][j])=names(D.coef[[i]][j])

        }
        # increase a level to name the list
        names(D.coef.subset[[i]])=names(D.coef[[i]])
    }
    return(D.coef.subset)
}




# to varify the fit
# fit=lm(MSD[[1]][2:5,][,1]~x); plot(fit)

# DONE: output goodness of fit


## the next two filterTrack blocks maybe combined to increase efficiency,
## however for now the efficiency is secondary, let the logic stand
## clear, then improve the efficiency. as many times efficiency is at
## the expense of sacrifice clearness of the code, hard to read or
## interpretate later.


## alternative  works
#             for (m in 1:length(corr)){
#                 for (n in 1:length(corr[[m]])){
#                     if (corr[[m]][n]<rsquare)
#                         slope[[m]][n]=NaN
#                 }
#             }

# D.coef.subset[[i]]=slope



## filter without losing location information
## or filter in the last step, has to be replaced before log
## if corr <rsquare, replace slope with NaN

## alternative
#     for (i in 1: length(D.coef)){
#
#         for (j in 1: length(D.coef[[i]])){
#
#             # dim(D.coef[[i]][[j]])[2] is the length of the matrix
#             for (k in 1:dim(D.coef[[i]][[j]])[2]){
#
#
#                 if (D.coef[[i]][[j]][,k]["corr"]<rsquare)
#                     D.coef[[i]][[j]][,k]["slope"]=NaN
#
#             }
#         }
#     }







##------------------------------------------------------------------------------
## Dcoef.log
##@export Dcoef.log
# Dcoef.log=function(D.coef.subset,static=T){
#     if (static){
#
#         #Log.D.coef=suppressWarnings(lapply(D.coef,log))
#         # worth noting "log computes logarithms, by default natural logarithms"
#         Log.D.coef=lapply(D.coef.subset,log10)
#
#         # remove NaN if wanted
#         # Log.D.coef=lapply(Log.D.coef, function(x){
#         #    x[!is.nan(x)]
#         #})
#     }else{
#         ## logorithm
#         Log.D.coef=list()
#         for (i in 1:length(D.coef.subset)){
#             #Log.D.coef=suppressWarnings(lapply(D.coef,log))
#             Log.D.coef[[i]]=lapply(D.coef.subset[[i]],log)
#         }
#         names(Log.D.coef)=names(D.coef.subset)
#         return(Log.D.coef)
#
#     }
#     return(Log.D.coef)
#
# }

# not used but keep
Dcoef.log=function(D.coef.subset){


        #Log.D.coef=suppressWarnings(lapply(D.coef,log))
        # worth noting "log computes logarithms, by default natural logarithms"
        Log.D.coef=lapply(D.coef.subset,log10)

        # remove NaN if wanted
        # Log.D.coef=lapply(Log.D.coef, function(x){
        #    x[!is.nan(x)]
        #})

    return(Log.D.coef)
}
sheng-liu/smt-beta-0.3.9.1 documentation built on April 2, 2021, 1:38 a.m.