R/fitTimeSeries.R
In metagenomeSeq: Statistical analysis for sparse high-throughput sequencing

Documented in fitMultipleTimeSeries fitSSTimeSeries fitTimeSeries plotClassTimeSeries plotTimeSeries ssFit ssIntervalCandidate ssPerm ssPermAnalysis trapz ts2MRexperiment

#' Trapezoidal Integration
#' 
#' Compute the area of a function with values 'y' at the points 'x'.
#' Function comes from the pracma package.
#' 
#' @param x x-coordinates of points on the x-axis
#' @param y y-coordinates of function values
#' @return Approximated integral of the function from 'min(x)' to 'max(x)'. 
#'  Or a matrix of the same size as 'y'.
#' @rdname trapz
#' @export
#' @examples
#' 
#' # Calculate the area under the sine curve from 0 to pi:
#'  n <- 101
#'  x <- seq(0, pi, len = n)
#'  y <- sin(x)
#'  trapz(x, y)          #=> 1.999835504
#' 
#' # Use a correction term at the boundary: -h^2/12*(f'(b)-f'(a))
#'  h  <- x[2] - x[1]
#'  ca <- (y[2]-y[1]) / h
#'  cb <- (y[n]-y[n-1]) / h
#'  trapz(x, y) - h^2/12 * (cb - ca)  #=> 1.999999969
#'
trapz <- function(x,y){
    if (missing(y)) {
        if (length(x) == 0) 
            return(0)
        y <- x
        x <- 1:length(x)
    }
    if (length(x) == 0) 
        return(0)
    if (!(is.numeric(x) || is.complex(x)) || !(is.numeric(y) || 
        is.complex(y))) 
        stop("Arguments 'x' and 'y' must be real or complex.")
    m <- length(x)
    xp <- c(x, x[m:1])
    yp <- c(numeric(m), y[m:1])
    n <- 2 * m
    p1 <- sum(xp[1:(n - 1)] * yp[2:n]) + xp[n] * yp[1]
    p2 <- sum(xp[2:n] * yp[1:(n - 1)]) + xp[1] * yp[n]
    return(0.5 * (p1 - p2))
}

#' smoothing-splines anova fit
#' 
#' Sets up a data-frame with the feature abundance, 
#' class information, time points, sample ids and returns
#' the fitted values for the fitted model.
#' 
#' @param formula Formula for ssanova. Of the form: abundance ~ ... where ... includes any pData slot value.
#' @param abundance Numeric vector of abundances.
#' @param class Class membership (factor of group membership).
#' @param time Time point vector of relative times (same length as abundance).
#' @param id Sample / patient id.
#' @param include Parameters to include in prediction.
#' @param pd Extra variable.
#' @param ... Extra parameters for ssanova function (see ?ssanova).
#' @return \itemize{A list containing:
#' \item     data        : Inputed data
#' \item     fit         : The interpolated / fitted values for timePoints
#' \item     se          : The standard error for CI intervals
#' \item     timePoints  : The time points interpolated over
#' }
#' @seealso \code{\link{cumNorm}} \code{\link{fitTimeSeries}} \code{\link{ssPermAnalysis}} \code{\link{ssPerm}} \code{\link{ssIntervalCandidate}}
#' @rdname ssFit
#' @export
#' @examples
#'
#' # Not run
#'
ssFit <- function(formula,abundance,class,time,id,include=c("class", "time:class"),pd,...) {
    df = data.frame(abundance = abundance, class = factor(class),
       time=time,id = factor(id),pd)
    
    # The smoothing splines anova model
    if(missing(formula)){
        mod = gss::ssanova(abundance ~ time * class, data=df,...)
    } else{
        mod = gss::ssanova(formula,data=df,...)
    }

    fullTime = seq(min(df$time), max(df$time), by=1)
    values = data.frame(time=fullTime, class=factor(levels(df[,"class"]))[2])
    fit = predict(mod, values, include=include, se=TRUE)
    
    res = list(data=df, fit=fit$fit, se=fit$se, timePoints=fullTime)
    return(res)
}

#' class permutations for smoothing-spline time series analysis
#' 
#' Creates a list of permuted class memberships for the time series permuation tests.
#' 
#' @param df Data frame containing class membership and sample/patient id label.
#' @param B Number of permutations.
#' @return A list of permutted class memberships
#' @seealso \code{\link{cumNorm}} \code{\link{fitTimeSeries}} \code{\link{ssFit}} \code{\link{ssPermAnalysis}} \code{\link{ssIntervalCandidate}}
#' @rdname ssPerm
#' @examples
#'
#' # Not run
#'
ssPerm <- function(df,B) {
    dat = data.frame(class=df$class, id=df$id)
    # id  = table(dat$id)
    id = table(interaction(dat$class,dat$id))
    id = id[id>0]
    classes = unique(dat)[,"class"]
    permList = lapply(1:B,function(i){
        rep(sample(classes, replace=FALSE),id)
    })
    return(permList) 
}

#' smoothing-splines anova fits for each permutation
#' 
#' Calculates the fit for each permutation and estimates 
#' the area under the null (permutted) model for interesting time 
#' intervals of differential abundance.
#' 
#' @param data Data used in estimation.
#' @param formula Formula for ssanova. Of the form: abundance ~ ... where ... includes any pData slot value.
#' @param permList A list of permutted class memberships
#' @param intTimes Interesting time intervals.
#' @param timePoints Time points to interpolate over.
#' @param include Parameters to include in prediction.
#' @param ... Options for ssanova
#' @return A matrix of permutted area estimates for time intervals of interest.
#' @seealso \code{\link{cumNorm}} \code{\link{fitTimeSeries}} \code{\link{ssFit}} \code{\link{ssPerm}} \code{\link{ssIntervalCandidate}}
#' @rdname ssPermAnalysis
#' @export
#' @examples
#'
#' # Not run
#'
ssPermAnalysis <- function(data,formula,permList,intTimes,timePoints,include=c("class", "time:class"),...){
    resPerm=matrix(NA, length(permList), nrow(intTimes))
    permData=data
    case = data.frame(time=timePoints, class=factor(levels(data$class)[2]))
    for (j in 1:length(permList)){
        
        permData$class = permList[[j]]
        # The smoothing splines anova model
        if(!missing(formula)){
            permModel = gss::ssanova(formula, data=permData,...)
        } else{
            permModel = gss::ssanova(abundance ~ time * class,data=permData,...)
        }

        permFit = cbind(timePoints, (2*predict(permModel,case,include=include, se=TRUE)$fit))
            for (i in 1:nrow(intTimes)){
                permArea=permFit[which(permFit[,1]==intTimes[i,1]) : which(permFit[,1]==intTimes[i, 2]), ]
                resPerm[j, i]=metagenomeSeq::trapz(x=permArea[,1], y=permArea[,2])
            }
        if(j%%100==0) show(j)
    }
    return(resPerm)
}

#' calculate interesting time intervals
#' 
#' Calculates time intervals of interest using SS-Anova fitted confidence intervals.
#' 
#' @param fit SS-Anova fits.
#' @param standardError SS-Anova se estimates.
#' @param timePoints Time points interpolated over.
#' @param positive Positive region or negative region (difference in abundance is positive/negative).
#' @param C Value for which difference function has to be larger or smaller than (default 0).
#' @return Matrix of time point intervals of interest
#' @seealso \code{\link{cumNorm}} \code{\link{fitTimeSeries}} \code{\link{ssFit}} \code{\link{ssPerm}} \code{\link{ssPermAnalysis}}
#' @rdname ssIntervalCandidate
#' @export
#' @examples
#'
#' # Not run
#'
ssIntervalCandidate <- function(fit, standardError, timePoints, positive=TRUE,C=0){
    lowerCI = (2*fit - (1.96*2*standardError))
    upperCI = (2*fit + (1.96*2*standardError))
    if (positive){
        abundanceDifference = which( lowerCI>=0 & abs(lowerCI)>=C )
    }else{
        abundanceDifference = which( upperCI<=0 & abs(upperCI)>=C )
    }
    if (length(abundanceDifference)>0){
        intIndex=which(diff(abundanceDifference)!=1)
        intTime=matrix(NA, (length(intIndex)+1), 4)
        if (length(intIndex)==0){
            intTime[1,1]=timePoints[abundanceDifference[1]]
            intTime[1,2]=timePoints[tail(abundanceDifference, n=1)]
        }else{
            i=1
            while(length(intTime)!=0 & length(intIndex)!=0){
                intTime[i,1]=timePoints[abundanceDifference[1]]
                intTime[i,2]=timePoints[abundanceDifference[intIndex[1]]]
                abundanceDifference=abundanceDifference[-c(1:intIndex[1])]
                intIndex=intIndex[-1]
                i=i+1
            }
        intTime[i,1] = timePoints[abundanceDifference[1]]
        intTime[i,2] = timePoints[tail(abundanceDifference, n=1)]
        }
    }else{
        intTime=NULL   
    }   
    return(intTime)    
}

#' Discover differentially abundant time intervals using SS-Anova
#' 
#' Calculate time intervals of interest using SS-Anova fitted models.
#' Fitting is performed uses Smoothing Spline ANOVA (SS-Anova) to find interesting intervals of time. 
#' Given observations at different time points for two groups, fitSSTimeSeries 
#' calculates a  function that models the difference in abundance between two 
#' groups across all time. Using permutations we estimate a null distribution 
#' of areas for the time intervals of interest and report significant intervals of time.
#' Use of the function for analyses should cite:
#' "Finding regions of interest in high throughput genomics data using smoothing splines"
#' Talukder H, Paulson JN, Bravo HC. (In preparation)
#' 
#' @param obj metagenomeSeq MRexperiment-class object.
#' @param formula Formula for ssanova. Of the form: abundance ~ ... where ... includes any pData slot value.
#' @param feature Name or row of feature of interest.
#' @param class Name of column in phenoData of MRexperiment-class object for class memberhip.
#' @param time Name of column in phenoData of MRexperiment-class object for relative time.
#' @param id Name of column in phenoData of MRexperiment-class object for sample id.
#' @param lvl Vector or name of column in featureData of MRexperiment-class object for aggregating counts (if not OTU level).
#' @param include Parameters to include in prediction.
#' @param C Value for which difference function has to be larger or smaller than (default 0).
#' @param B Number of permutations to perform
#' @param norm When aggregating counts to normalize or not.
#' @param log Log2 transform.
#' @param sl Scaling value.
#' @param featureOrder Hierarchy of levels in taxonomy as fData colnames
#' @param ... Options for ssanova
#' @return List of matrix of time point intervals of interest, Difference in abundance area and p-value, fit, area permutations, and call.
#' @return A list of objects including:
#' \itemize{
#'  \item{timeIntervals - Matrix of time point intervals of interest, area of differential abundance, and pvalue.}
#'  \item{data  - Data frame of abundance, class indicator, time, and id input.}
#'  \item{fit - Data frame of fitted values of the difference in abundance, standard error estimates and timepoints interpolated over.}
#'  \item{perm - Differential abundance area estimates for each permutation.}
#'  \item{call - Function call.}
#' }
#' @rdname fitSSTimeSeries
#' @seealso \code{\link{cumNorm}} \code{\link{ssFit}} \code{\link{ssIntervalCandidate}} \code{\link{ssPerm}} \code{\link{ssPermAnalysis}} \code{\link{plotTimeSeries}}
#' @export
#' @examples
#'
#' data(mouseData)
#' res = fitSSTimeSeries(obj=mouseData,feature="Actinobacteria",
#'    class="status",id="mouseID",time="relativeTime",lvl='class',B=2)
#'
fitSSTimeSeries <- function(obj,formula,feature,class,time,id,lvl=NULL,include=c("class", "time:class"),C=0,B=1000,norm=TRUE,log=TRUE,sl=1000,featureOrder=NULL,...) {
    
    if(!is.null(lvl)){
        aggData = aggregateByTaxonomy(obj,lvl,norm=norm,sl=sl, featureOrder=featureOrder)
        abundance = MRcounts(aggData,norm=FALSE,log=log,sl=1)[feature,]
    } else { 
        abundance = MRcounts(obj,norm=norm,log=log,sl=sl)[feature,]
    }
    class = pData(obj)[,class]
    time  = pData(obj)[,time]
    id    = pData(obj)[,id]
    if(any(sapply(list(id,time,class),length)==0)){
        stop("provide class, time, and id names")
    }

    if(!missing(formula)){
        prep=ssFit(formula=formula,abundance=abundance,class=class,
            time=time,id=id,include=include,pd=pData(obj),...)
    } else {
        prep=ssFit(abundance=abundance,class=class,time=time,id=id,
            include=include,pd=pData(obj),...)
    }
    indexPos = ssIntervalCandidate(fit=prep$fit, standardError=prep$se, 
        timePoints=prep$timePoints, positive=TRUE,C=C)
    indexNeg = ssIntervalCandidate(fit=prep$fit, standardError=prep$se, 
        timePoints=prep$timePoints, positive=FALSE,C=C)
    indexAll = rbind(indexPos, indexNeg)

    if(sum(indexAll[,1]==indexAll[,2])>0){
        indexAll=indexAll[-which(indexAll[,1]==indexAll[,2]),]
    }

    fit = 2*prep$fit
    se  = 2*prep$se
    timePoints = prep$timePoints
    fits = data.frame(fit = fit, se = se, timePoints = timePoints)
    
    if(!is.null(indexAll)){
      if(length(indexAll)>0){
        indexAll=matrix(indexAll,ncol=4)
        colnames(indexAll)=c("Interval start", "Interval end", "Area", "p.value")
        predArea    = cbind(prep$timePoints, (2*prep$fit))
        permList    = ssPerm(prep$data,B=B)
        if(!missing(formula)){
            permResult  = ssPermAnalysis(data=prep$data,formula=formula,permList=permList,
                intTimes=indexAll,timePoints=prep$timePoints,include=include,...)
        } else {
            permResult  = ssPermAnalysis(data=prep$data,permList=permList,
                intTimes=indexAll,timePoints=prep$timePoints,include=include,...)
        }
        
        for (i in 1:nrow(indexAll)){
            origArea=predArea[which(predArea[,1]==indexAll[i,1]):which(predArea[,1]==indexAll[i, 2]), ]
            actArea=trapz(x=origArea[,1], y=origArea[,2])
            indexAll[i,3] = actArea
            if(actArea>0){
                indexAll[i,4] = 1 - (length(which(actArea>permResult[,i]))+1)/(B+1)
            }else{
                indexAll[i,4] = (length(which(actArea>permResult[,i]))+1)/(B+1)
            }
        if(indexAll[i,4]==0){ 
        indexAll[i,4] = 1/(B+1)
        }
        }

        res = list(timeIntervals=indexAll,data=prep$data,fit=fits,perm=permResult)
        return(res)
      }
    }else{
        indexAll = "No statistically significant time intervals detected"
        res = list(timeIntervals=indexAll,data=prep$data,fit=fits,perm=NULL)
        return(res)
    }
}

#' Discover differentially abundant time intervals
#' 
#' Calculate time intervals of significant differential abundance.
#' Currently only one method is implemented (ssanova). fitSSTimeSeries is called with method="ssanova".
#' 
#' @param obj metagenomeSeq MRexperiment-class object.
#' @param formula Formula for ssanova. Of the form: abundance ~ ... where ... includes any pData slot value.
#' @param feature Name or row of feature of interest.
#' @param class Name of column in phenoData of MRexperiment-class object for class memberhip.
#' @param time Name of column in phenoData of MRexperiment-class object for relative time.
#' @param id Name of column in phenoData of MRexperiment-class object for sample id.
#' @param method Method to estimate time intervals of differentially abundant bacteria (only ssanova method implemented currently).
#' @param lvl Vector or name of column in featureData of MRexperiment-class object for aggregating counts (if not OTU level).
#' @param include Parameters to include in prediction.
#' @param C Value for which difference function has to be larger or smaller than (default 0).
#' @param B Number of permutations to perform.
#' @param norm When aggregating counts to normalize or not.
#' @param log Log2 transform.
#' @param sl Scaling value.
#' @param featureOrder Hierarchy of levels in taxonomy as fData colnames
#' @param ... Options for ssanova
#' @return List of matrix of time point intervals of interest, Difference in abundance area and p-value, fit, area permutations, and call.
#' @return A list of objects including:
#' \itemize{
#'  \item{timeIntervals - Matrix of time point intervals of interest, area of differential abundance, and pvalue.}
#'  \item{data  - Data frame of abundance, class indicator, time, and id input.}
#'  \item{fit - Data frame of fitted values of the difference in abundance, standard error estimates and timepoints interpolated over.}
#'  \item{perm - Differential abundance area estimates for each permutation.}
#'  \item{call - Function call.}
#' }
#' @rdname fitTimeSeries
#' @seealso \code{\link{cumNorm}} \code{\link{fitSSTimeSeries}} \code{\link{plotTimeSeries}}
#' @export
#' @examples
#'
#' data(mouseData)
#' res = fitTimeSeries(obj=mouseData,feature="Actinobacteria",
#'    class="status",id="mouseID",time="relativeTime",lvl='class',B=2)
#'
fitTimeSeries <- function(obj,formula,feature,class,time,id,method=c("ssanova"),
                        lvl=NULL,include=c("class", "time:class"),C=0,B=1000,
                        norm=TRUE,log=TRUE,sl=1000,featureOrder=NULL,...) {
    if(method=="ssanova"){
        if(requireNamespace("gss")){
            if(missing(formula)){
                res = fitSSTimeSeries(obj=obj,feature=feature,class=class,time=time,id=id,
                        lvl=lvl,C=C,B=B,norm=norm,log=log,sl=sl,include=include,featureOrder=featureOrder,...)
            } else {
                res = fitSSTimeSeries(obj=obj,formula=formula,feature=feature,class=class,
                        time=time,id=id,lvl=lvl,C=C,B=B,norm=norm,log=log,sl=sl,
                        include=include,featureOrder=featureOrder,...)
            }
        }
    }
    res = c(res,call=match.call())
    return(res)
}

#' Plot difference function for particular bacteria
#' 
#' Plot the difference in abundance for significant features.
#' 
#' @param res Output of fitTimeSeries function
#' @param C Value for which difference function has to be larger or smaller than (default 0).
#' @param xlab X-label.
#' @param ylab Y-label.
#' @param main Main label.
#' @param ... Extra plotting arguments.
#' @return Plot of difference in abundance for significant features.
#' @rdname plotTimeSeries
#' @seealso \code{\link{fitTimeSeries}}
#' @export
#' @examples
#'
#' data(mouseData)
#' res = fitTimeSeries(obj=mouseData,feature="Actinobacteria",
#'    class="status",id="mouseID",time="relativeTime",lvl='class',B=10)
#' plotTimeSeries(res)
#'
plotTimeSeries<-function(res,C=0,xlab="Time",ylab="Difference in abundance",main="SS difference function prediction",...){
    fit = res$fit$fit
    se  = res$fit$se
    timePoints = res$fit$timePoints
    confInt95 = 1.96
    sigDiff = res$timeIntervals

    minValue=min(fit-(confInt95*se))-.5
    maxValue=max(fit+(confInt95*se))+.5

    plot(x=timePoints, y=fit, ylim=c(minValue, maxValue), xlab=xlab, ylab=ylab, main=main, ...)

    for (i in 1:nrow(sigDiff)){
        begin=sigDiff[i,1]
        end=sigDiff[i,2]
        indBegin=which(timePoints==begin)
        indEnd=which(timePoints==end)
        x=timePoints[indBegin:indEnd]
        y=fit[indBegin:indEnd]
        xx=c(x, rev(x))
        yy=c(y, rep(0, length(y)))
        polygon(x=xx, yy, col="grey")
    }
    lines(x=timePoints, y=fit, pch="")
    lines(x=timePoints, y=fit+(confInt95*se), pch="", lty=2)
    lines(x=timePoints, y=fit-(confInt95*se), pch="", lty=2)
    abline(h=C)
}

#' Plot abundances by class
#' 
#' Plot the abundance of values for each class using 
#' a spline approach on the estimated full model.
#' 
#' @param res Output of fitTimeSeries function
#' @param formula Formula for ssanova. Of the form: abundance ~ ... where ... includes any pData slot value.
#' @param xlab X-label.
#' @param ylab Y-label.
#' @param color0 Color of samples from first group.
#' @param color1 Color of samples from second group.
#' @param include Parameters to include in prediction.
#' @param ... Extra plotting arguments.
#' @return Plot for abundances of each class using a spline approach on estimated null model.
#' @rdname plotClassTimeSeries
#' @seealso \code{\link{fitTimeSeries}}
#' @export
#' @examples
#'
#' data(mouseData)
#' res = fitTimeSeries(obj=mouseData,feature="Actinobacteria",
#'    class="status",id="mouseID",time="relativeTime",lvl='class',B=10)
#' plotClassTimeSeries(res,pch=21,bg=res$data$class,ylim=c(0,8))
#'
plotClassTimeSeries<-function(res,formula,xlab="Time",ylab="Abundance",color0="black",
                            color1="red",include=c("1","class", "time:class"),...){
    dat = res$data
    if(missing(formula)){
        mod = gss::ssanova(abundance ~ time * class, data=dat)
    } else{
        mod = gss::ssanova(formula,data=dat)
    }
    
    timePoints = seq(min(dat$time),max(dat$time),by=1)
    group0 = data.frame(time=timePoints,class=factor(levels(dat$class)[1]))
    group1 = data.frame(time=timePoints,class=factor(levels(dat$class)[2]))

    pred0  = predict(mod, newdata=group0,include=include, se=TRUE)
    pred1  = predict(mod, newdata=group1,include=include, se=TRUE)
    
    plot(x=dat$time,y=dat$abundance,xlab=xlab,ylab=ylab,...)
    lines(x=group0$time,y=pred0$fit,col=color0)
    lines(x=group0$time,y=pred0$fit+(1.96*pred0$se),lty=2,col=color0)
    lines(x=group0$time,y=pred0$fit-(1.96*pred0$se),lty=2,col=color0)

    lines(x=group1$time,y=pred1$fit,col=color1)
    lines(x=group1$time,y=pred1$fit+(1.96*pred1$se),lty=2,col=color1)
    lines(x=group1$time,y=pred1$fit-(1.96*pred1$se),lty=2,col=color1)
}

#' Discover differentially abundant time intervals for all bacteria
#' 
#' Calculate time intervals of significant differential abundance over all
#' bacteria of a particularly specified level (lvl). If not lvl is specified,
#' all OTUs are analyzed. Warning, function can take a while
#' 
#' @param obj metagenomeSeq MRexperiment-class object.
#' @param lvl Vector or name of column in featureData of MRexperiment-class object for aggregating counts (if not OTU level).
#' @param B Number of permutations to perform.
#' @param featureOrder Hierarchy of levels in taxonomy as fData colnames
#' @param ... Options for \code{\link{fitTimeSeries}}, except feature.
#' @return List of lists of matrices of time point intervals of interest, Difference in abundance area and p-value, fit, area permutations.
#' @return A list of lists for which each includes:
#' \itemize{
#'  \item{timeIntervals - Matrix of time point intervals of interest, area of differential abundance, and pvalue.}
#'  \item{data  - Data frame of abundance, class indicator, time, and id input.}
#'  \item{fit - Data frame of fitted values of the difference in abundance, standard error estimates and timepoints interpolated over.}
#'  \item{perm - Differential abundance area estimates for each permutation.}
#'  \item{call - Function call.}
#' }
#' @rdname fitMultipleTimeSeries
#' @seealso \code{\link{cumNorm}} \code{\link{fitSSTimeSeries}} \code{\link{fitTimeSeries}}
#' @export
#' @examples
#'
#' data(mouseData)
#' res = fitMultipleTimeSeries(obj=mouseData,lvl='phylum',class="status",
#'           id="mouseID",time="relativeTime",B=1)
#'
fitMultipleTimeSeries <- function(obj,lvl=NULL,B=1,featureOrder=NULL,...) {
    if(is.null(lvl)){
        bacteria = seq(nrow(obj))
    } else {
        if(is.factor(fData(obj)[,lvl])){
            fData(obj)[,lvl] = as.character(fData(obj)[,lvl])
        }
        bacteria = unique(fData(obj)[,lvl])
    }
    fits = lapply(bacteria,function(bact){
        try(fitTimeSeries(obj,lvl=lvl,feature=bact,B=B,featureOrder=featureOrder,...))
    })
    names(fits) = bacteria
    fits = c(fits,call=match.call())
    return(fits)
}

#' With a list of fitTimeSeries results, generate
#' an MRexperiment that can be plotted with metavizr
#' 
#' @param obj Output of fitMultipleTimeSeries
#' @param sampleNames Sample names for plot
#' @param sampleDescription Description of samples for plot axis label
#' @param taxonomyLevels Feature names for plot
#' @param taxonomyHierarchyRoot Root of feature hierarchy for MRexperiment
#' @param taxonomyDescription Description of features for plot axis label
#' @param featuresOfInterest The features to select from the fitMultipleTimeSeries output
#' @param featureDataOfInterest featureData for the resulting MRexperiment
#' @return MRexperiment that contains fitTimeSeries data, featureData, and phenoData
#' @rdname ts2MRexperiment
#' @seealso \code{\link{fitTimeSeries}} \code{\link{fitMultipleTimeSeries}}
#' @export
#' @examples
#'
#' data(mouseData)
#' res = fitMultipleTimeSeries(obj=mouseData,lvl='phylum',class="status",
#'           id="mouseID",time="relativeTime",B=1)
#' obj = ts2MRexperiment(res)
#' obj
#'
ts2MRexperiment<-function(obj,sampleNames=NULL,
                          sampleDescription="timepoints",
                          taxonomyLevels=NULL,
                          taxonomyHierarchyRoot="bacteria",
                          taxonomyDescription="taxonomy",
                          featuresOfInterest = NULL,
                          featureDataOfInterest=NULL){
  if(is.null(obj)){
    stop("Matrix cannot be null")
  }
  if(is.null(sampleNames)){
    numSamples <- dim(obj[[1]]$fit)[1]
    sampleNames <- paste("Timepoint", 1:numSamples, sep="_")
  }
  
  if(is.null(featuresOfInterest)){
    hasFit <- lapply(1:(length(obj)-1), function(i) which(!is.null(obj[[i]]$fit)))
    featuresOfInterest <- which(hasFit == 1)
    hasFit <- (hasFit == 1)
    hasFit <- !is.na(hasFit)
    temp <- 1:length(hasFit)
    temp[!hasFit] <- 0
    hasFit <- temp
  }
  
  if(is.null(taxonomyLevels)){
    numLevels <- 1:length(hasFit)
    taxonomyLevels <- names(obj)[1:length(hasFit)]
  }
  
  numSamples <- length(sampleNames)
  numLevels <- length(taxonomyLevels)
  numFeaturesOfInterest <- length(featuresOfInterest)
  
  rangeSamples <- 1:numSamples
  rangeFeaturesOfInterest <- 1:numFeaturesOfInterest
#  print(hasFit)
  
  results <- do.call(rbind, lapply(hasFit,function(i){ if (i != 0) t(obj[[i]]$fit)[1,] else rep(NA, numSamples) }))
  
  dfSamples <- data.frame(x=rangeSamples,row.names=sampleNames)
  metaDataSamples <-data.frame(labelDescription=sampleDescription)
  annotatedDFSamples <- AnnotatedDataFrame()
  pData(annotatedDFSamples) <- dfSamples
  varMetadata(annotatedDFSamples) <- metaDataSamples
  validObject(annotatedDFSamples)
  
  if(is.null(featureDataOfInterest)){
    dfFeatures <- data.frame(taxonomy1=rep(taxonomyHierarchyRoot, numLevels),taxonomy2=taxonomyLevels)
    metaDataFeatures <-data.frame(labelDescription=paste(taxonomyDescription, 1:2, sep=""))
    annotatedDFFeatures <- AnnotatedDataFrame()
    pData(annotatedDFFeatures) <- dfFeatures
    varMetadata(annotatedDFFeatures) <- metaDataFeatures
    validObject(annotatedDFFeatures)
  }
  else{
    annotatedDFFeatures <- featureDataOfInterest
  }
  
  fitTimeSeriesMRexp <- newMRexperiment(counts=results,
                                        phenoData=annotatedDFSamples,
                                        featureData=annotatedDFFeatures)
  return(fitTimeSeriesMRexp)
}
# load("~/Dropbox/Projects/metastats/package/git/metagenomeSeq/data/mouseData.rda")
# classMatrix = aggregateByTaxonomy(mouseData,lvl='class',norm=TRUE,out='MRexperiment')
# data(mouseData)
# fitTimeSeries(obj=mouseData,feature="Actinobacteria",class="status",id="mouseID",time="relativeTime",lvl='class',B=10)
Any scripts or data that you put into this service are public.
metagenomeSeq documentation built on Nov. 8, 2020, 5:34 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
metagenomeSeq
Statistical analysis for sparse high-throughput sequencing

R/fitTimeSeries.R
In metagenomeSeq: Statistical analysis for sparse high-throughput sequencing

Defines functions ts2MRexperiment fitMultipleTimeSeries plotClassTimeSeries plotTimeSeries fitTimeSeries fitSSTimeSeries ssIntervalCandidate ssPermAnalysis ssPerm ssFit trapz

Documented in fitMultipleTimeSeries fitSSTimeSeries fitTimeSeries plotClassTimeSeries plotTimeSeries ssFit ssIntervalCandidate ssPerm ssPermAnalysis trapz ts2MRexperiment

Try the metagenomeSeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

metagenomeSeq Statistical analysis for sparse high-throughput sequencing

R/fitTimeSeries.R In metagenomeSeq: Statistical analysis for sparse high-throughput sequencing

Defines functions ts2MRexperiment fitMultipleTimeSeries plotClassTimeSeries plotTimeSeries fitTimeSeries fitSSTimeSeries ssIntervalCandidate ssPermAnalysis ssPerm ssFit trapz

Documented in fitMultipleTimeSeries fitSSTimeSeries fitTimeSeries plotClassTimeSeries plotTimeSeries ssFit ssIntervalCandidate ssPerm ssPermAnalysis trapz ts2MRexperiment

Try the metagenomeSeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

metagenomeSeq
Statistical analysis for sparse high-throughput sequencing

R/fitTimeSeries.R
In metagenomeSeq: Statistical analysis for sparse high-throughput sequencing