#' @title Community Time Series Comparison
#'
#' @description Compute properties of community time series generated with generateTS.
#'
#' @details If infotheo is installed, the entropy will be also computed. Noise types are computed with smooth set to true.
#' The neutrality test is carried out with ntests set to 500 and method logitnorm.
#'
#' @param input.folder the folder where results (settings and time series) of function generateTS are stored.
#' @param expIds the experiment identifiers of time series to be considered
#' @param modif.folder the folder with time series sub-sets as generated by sliceTS.R or with noisy time series as generated by addNoise.R (settings are read in from input.folder)
#' @param modif modification (shared name of time series in modif.folder, sliced for time series sub-sets and pois/multi for time series with Poisson or multinomial noise, respectively)
#' @param testNeutral test neutrality (requires libraries WrightFisher and logitnorm)
#' @param sliceDef consider the time series sub-set defined by the given start and end time point for property computation, an end time point of NA means that the entire time series is considered, a single value T in the sliceDef vector means that the last T time points are considered; timeDecaySliceDef and varEvolSliceDef refer to the outcome of sliceDef
#' @param epsilon allowed deviation from the expected slope for noise type identification
#' @param predef recognize noise types with predefined slope boundaries (see identifyNoisetypes for details)
#' @param detrend remove linear trends before computing the periodogram (recommended)
#' @param norm normalize time series by dividing each entry in a sample by the sum of its sample; if norm is true, entropy computation is omitted
#' @param hurstBins binning thresholds for Hurst exponent (three thresholds required for four bins)
#' @param maxautocorBins binning thresholds for maximal autocorrelation (three thresholds required for four bins)
#' @param timeDecaySliceDef the time series subset considered to compute the time decay, a vector with the start and the end time point (if the end time point is NA, the entire time series is used)
#' @param varEvolSliceDef (optional, by default not computed) the time series subset considered to compute the evolution of variance, a vector with the start and the end time point (if the end time point is NA, the entire time series is used)
#' @param radSliceDef (optional, by default not computed) the time series subset considered to analyse the (non-normalized also when norm is enabled) species abundance versus rank curve, a vector with the start and the end time point (if the end time point is NA, the entire time series is used); cannot be used together with sliceDef
#' @param returnDistribs return distributions at the final time point (no time series properties are computed, cannot be used together with returnTS)
#' @param returnTS return the time series (no time series properties are computed, cannot be used together with returnDistribs)
#' @return a table with experiment parameters (algorithm, connectance, sigma, theta and so on) and time series properties (noise types percentages, slope of Taylor's law etc.); if returnDistrib is true, a list with the abundances at the last time point, if returnTS is true, a list with the time series
#' @export
compareTS <- function(input.folder="",expIds=c(), modif.folder="", modif="", testNeutral=FALSE, sliceDef=c(1,NA), epsilon=0.2, predef=FALSE, detrend=TRUE, norm=FALSE, hurstBins=c(0.5,0.7,0.9), maxautocorBins=c(0.3,0.5,0.8), timeDecaySliceDef=c(1,50), varEvolSliceDef=c(), radSliceDef=c(), returnDistribs=FALSE, returnTS=FALSE){
# infotheo needed for entropy computation
infotheoThere=FALSE
# https://stat.ethz.ch/pipermail/r-help/2005-September/078958.html
searchInfotheo=length(grep(paste("^package:","infotheo", "$", sep=""), search()))
if(searchInfotheo>0){
infotheoThere=TRUE
}else{
if(norm==FALSE){
print("Please install/load infotheo if you want to compute the entropy of non-normalized time series.")
}
}
# neutrality test dependencies
wfThere=FALSE
# https://stat.ethz.ch/pipermail/r-help/2005-September/078958.html
searchWf=length(grep(paste("^package:","WrightFisher", "$", sep=""), search()))
if(searchWf>0){
wfThere=TRUE
}else{
if(testNeutral){
warning("Please install/load WrightFisher if you want to test for neutrality. Test is not carried out.")
}
}
logitnormThere=FALSE
# https://stat.ethz.ch/pipermail/r-help/2005-September/078958.html
searchLN=length(grep(paste("^package:","logitnorm", "$", sep=""), search()))
if(searchLN>0){
logitnormThere=TRUE
}else{
if(testNeutral){
warning("Please install/load logitnorm if you want to test for neutrality. Test is not carried out.")
}
}
if(testNeutral && !norm){
stop("Data are supposed to be normalized for neutrality test.")
}
if(length(hurstBins) != 3){
stop("Three Hurst bin thresholds required!")
}
if(length(sliceDef)>0 && length(radSliceDef)>0){
stop("radSliceDef and sliceDef cannot be used together.")
}
if(length(maxautocorBins) != 3){
stop("Three maximal autocorrelation bin thresholds required!")
}
if(returnDistribs==TRUE && returnTS==TRUE){
stop("Cannot use returnDistribs and returnTS together.")
}
if(input.folder != ""){
if(!file.exists(input.folder)){
stop(paste("The input folder",input.folder,"does not exist!"))
}
input.settings.folder=file.path(input.folder,"settings")
if(!file.exists(input.settings.folder)){
stop("The input folder does not have a settings subfolder!")
}
if(modif.folder == ""){
input.timeseries.folder=file.path(input.folder,"timeseries")
if(!file.exists(input.timeseries.folder)){
stop("The input folder does not have a time series subfolder!")
}
}else{
if(!file.exists(modif.folder)){
stop("The folder with sliced time series does not exist!")
}
}
}else{
stop("Please provide the input folder!")
}
# experiment properties
taxa=c()
samples=c()
peps=c()
initmode=c()
connectances=c()
algorithms=c()
sigmas=c()
thetas=c()
migrations=c()
deathrates=c()
individuals=c()
samplingfreqs=c()
# time series properties
taylorslopes=c()
taylorR2=c()
timedecayslopes=c()
timedecayR2=c()
varevolslopes=c()
varevolR2=c()
percentblack=c()
percentbrown=c()
percentpink=c()
percentwhite=c()
thetaprobs=c()
neutralityPvals=c()
# from lowest to highest maximal autocorrelation
binA4Name=paste("autocor",maxautocorBins[3],"Inf",sep="")
binA3Name=paste("autocor",maxautocorBins[2],maxautocorBins[3],sep="")
binA2Name=paste("autocor",maxautocorBins[1],maxautocorBins[2],sep="")
binA1Name=paste("autocor","negInf",maxautocorBins[1],sep="")
percentmaxautocorbin1=c()
percentmaxautocorbin2=c()
percentmaxautocorbin3=c()
percentmaxautocorbin4=c()
binH4Name=paste("hurst",hurstBins[3],"Inf",sep="")
binH3Name=paste("hurst",hurstBins[2],hurstBins[3],sep="")
binH2Name=paste("hurst",hurstBins[1],hurstBins[2],sep="")
binH1Name=paste("hurst","negInf",hurstBins[1],sep="")
lowHursts=c()
middleHursts=c()
highHursts=c()
veryHighHursts=c()
entropy=c()
autoslopes=c()
saddistribsvegan=c()
saddistribs=c()
saddistribsfitscore=c()
saddistribsfitscorevegan=c()
Algorithm=""
Input_experiment_identifier=NA
Sampling_frequency=NA
init_abundance_mode=NA
theta=NA
immigration_rate_Hubbell=NA
deathrate_Hubbell=NA
# distribution list
distribList = list()
# time series list
tsList = list()
# collect time series properties
for(expId in expIds){
print(paste("Processing identifier",expId))
input.settings.name=paste(expId,"settings",sep="_")
input.settings.expId.folder=file.path(input.settings.folder,input.settings.name)
#print(input.settings.expId.folder)
if(!file.exists(input.settings.expId.folder)){
stop("The input settings folder does not have a subfolder for the input experiment identifier!")
}
if(modif.folder == ""){
input.timeseries.name=paste(expId,"timeseries",sep="_")
input.timeseries.expId.folder=file.path(input.timeseries.folder,input.timeseries.name)
if(!file.exists(input.timeseries.expId.folder)){
stop("The input time series folder does not have a subfolder for the input experiment identifier!")
}
}
# read settings file
input.settings.expId.file=paste(expId,"settings.txt",sep="_")
settings.path=file.path(input.settings.expId.folder,input.settings.expId.file)
if(!file.exists(settings.path)){
stop(paste("The settings file",settings.path,"does not exist!"))
}
source(settings.path, local=TRUE)
algorithms=c(algorithms,Algorithm)
samplingfreqs=c(samplingfreqs,Sampling_frequency)
# read interaction matrix
if(returnDistribs == FALSE && returnTS==FALSE){
if(Algorithm == "ricker" || Algorithm == "soc" || Algorithm == "soi" || Algorithm == "glv"){
source.expId = expId
interactionmatrix.folder=input.settings.expId.folder
# interaction matrix for current experiment was read from another experiment
if(!is.na(Input_experiment_identifier) && Input_experiment_identifier!=FALSE){
source.expId=Input_experiment_identifier
source.expId.folder=paste(source.expId,"settings",sep="_")
interactionmatrix.folder=file.path(input.settings.folder,source.expId.folder)
}
A.name=paste(source.expId,"interactionmatrix.txt",sep="_")
input.path.A=file.path(interactionmatrix.folder,A.name)
print(paste("Reading interaction matrix from:",input.path.A,sep=" "))
A=read.table(file=input.path.A,sep="\t",header=FALSE)
A=as.matrix(A)
# the requested and target PEP differ
peps=c(peps,round(getPep(A),2))
connectances=c(connectances,getConnectance(A))
}else{
peps=c(peps,NA)
connectances=c(connectances,NA)
}
}
initmode=c(initmode,init_abundance_mode)
# read algorithm-specific parameters
if(Algorithm == "ricker"){
sigmas=c(sigmas,sigma)
}else{
sigmas=c(sigmas,NA)
}
if(Algorithm == "dm"){
thetas=c(thetas,theta)
}else{
thetas=c(thetas,NA)
}
if(Algorithm == "hubbell"){
migrations=c(migrations,immigration_rate_Hubbell)
deathrates=c(deathrates, deathrate_Hubbell)
}else{
migrations=c(migrations,NA)
deathrates=c(deathrates,NA)
}
if(Algorithm == "soc" || Algorithm == "hubbell"){
individuals=c(individuals,I)
#print(paste("individuals",I))
}else{
individuals=c(individuals,NA)
}
# read time series file
if(modif.folder == ""){
ts.name=paste(expId,"timeseries.txt",sep="_")
input.path.ts=file.path(input.timeseries.expId.folder,ts.name)
}else{
ts.name=paste(expId,modif,"timeseries.txt",sep="_")
input.path.ts=file.path(modif.folder, ts.name)
}
print(paste("Reading time series from:",input.path.ts,sep=" "))
ts=read.table(file=input.path.ts,sep="\t",header=FALSE)
ts=as.matrix(ts)
# store non-normalized samples for RAD testing
if(length(radSliceDef)==2){
startTD=radSliceDef[1]
stopTD=radSliceDef[2]
if(is.na(stopTD)){
stopTD=ncol(ts)
}
if(ncol(ts) < startTD){
warning(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given start point for RAD properties! The first time point is used instead."))
startTD=1
}
if(ncol(ts) < stopTD){
warning(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given end point for RAD properties! The last time point is used instead."))
stopTD=ncol(ts)
}
sadSlice=ts[,startTD:stopTD]
}
lastSample=ts[,ncol(ts)]
if(norm==TRUE){
ts=seqtime::normalize(ts)
}
if(sliceDef[1]>1 || !is.na(sliceDef[2])){
if(length(sliceDef)==1){
subsetStop=ncol(ts)
subsetStart=subsetStop-sliceDef[1]
if(subsetStart < 1){
warning(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given start point! The first time point is used instead!"))
subsetStart=1
}
}else{
subsetStart=sliceDef[1]
subsetStop=sliceDef[2]
if(subsetStop > ncol(ts)){
warning(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given end point! The last time point is used instead!"))
subsetStop=ncol(ts)
}
if(is.na(sliceDef[2])){
subsetStop=ncol(ts)
}
}
ts=ts[,subsetStart:subsetStop]
}
N=nrow(ts)
print(paste("Read time series with",N,"taxa and",ncol(ts),"samples."))
onePerc=N/100
taxa=c(taxa,N)
samples=c(samples,ncol(ts))
if(returnDistribs == FALSE && returnTS==FALSE){
# entropy
if(norm==FALSE && infotheoThere==TRUE){
# discretization needed (also for David data, because of interpolation)
if(Algorithm=="glv" || Algorithm == "ricker" || Algorithm == "davida" || Algorithm == "davidb"){
disc=infotheo::discretize(t(ts),disc="equalwidth")
ent=infotheo::entropy(disc)
}else{
ent=entropy(t(ts))
}
entropy=c(entropy,ent)
}else{
entropy=c(entropy,NA)
}
if(testNeutral && wfThere && logitnormThere && norm){
# runs logitnorm by default
neutralityPvals=c(neutralityPvals, WrightFisher::NeutralCovTest(t(ts), ntests=500))
}else{
neutralityPvals=c(neutralityPvals,NA)
}
# auto-correlation vs species number slope
autocorOut=autocorVsTaxonNum(ts,lag=1,plot=FALSE)
autoslopes=c(autoslopes,autocorOut$slope)
# bin Hurst exponent
hursts=binByMemory(ts,thresholds=hurstBins,method="hurst")
lowHursts=c(lowHursts,length(hursts[[binH1Name]])/onePerc)
middleHursts=c(middleHursts,length(hursts[[binH2Name]])/onePerc)
highHursts=c(highHursts,length(hursts[[binH3Name]])/onePerc)
veryHighHursts=c(veryHighHursts,length(hursts[[binH4Name]])/onePerc)
# bin maximal autocorrelations
autocorBins=binByMemory(ts,thresholds=maxautocorBins,method="autocor")
percentmaxautocorbin4=c(percentmaxautocorbin4, length(autocorBins[[binA4Name]])/onePerc)
percentmaxautocorbin3=c(percentmaxautocorbin3, length(autocorBins[[binA3Name]])/onePerc)
percentmaxautocorbin2=c(percentmaxautocorbin2, length(autocorBins[[binA2Name]])/onePerc)
percentmaxautocorbin1=c(percentmaxautocorbin1, length(autocorBins[[binA1Name]])/onePerc)
# calculate taylor's law slope
taylorRes=taylor(ts,type="taylor",plot=FALSE, pseudo=0.0000001)
noisetypesRes=identifyNoisetypes(ts,abund.threshold = 0, epsilon=epsilon, smooth=TRUE,predef=predef, detrend=detrend)
taylorslopes=c(taylorslopes,taylorRes$slope)
taylorR2=c(taylorR2,taylorRes$adjR2)
percentbrown=c(percentbrown,length(noisetypesRes$brown)/onePerc)
percentpink=c(percentpink,length(noisetypesRes$pink)/onePerc)
percentwhite=c(percentwhite,length(noisetypesRes$white)/onePerc)
percentblack=c(percentblack,length(noisetypesRes$black)/onePerc)
# species abundance versus rank distribution shape
if(length(radSliceDef)==2){
# scale abundances to integers
if(Algorithm=="ricker" || Algorithm=="glv"){
sadSlice=round(sadSlice*1000)
}
else if(Algorithm=="davida" || Algorithm=="davidb"){
sadSlice=round(sadSlice)
}
sad.out.distrib=rad(sadSlice,remove.zeros=TRUE, fit.distrib=TRUE, plot=FALSE)
sad.out.vegan=rad(sadSlice,remove.zeros=TRUE, fit.rad=TRUE, plot=FALSE)
saddistribsvegan=c(saddistribsvegan,sad.out.vegan$distrib)
saddistribs=c(saddistribs,sad.out.distrib$distrib)
saddistribsfitscorevegan=c(saddistribsfitscorevegan,sad.out.vegan$score)
saddistribsfitscore=c(saddistribsfitscore,sad.out.distrib$score)
# fit neutral model to slice
thetaprobs=c(thetaprobs,rad(sadSlice, remove.zeros = TRUE,fit.neutral = TRUE,plot=FALSE)$thetaprob)
}else{
if(length(radSliceDef)>0){
warning("The RAD slice is not defined correctly!")
}
saddistribsvegan=c(saddistribsvegan,NA)
saddistribs=c(saddistribs,NA)
saddistribsfitscore=c(saddistribsfitscore,NA)
saddistribsfitscorevegan=c(saddistribsfitscorevegan,NA)
# if no RAD slice defined: fit neutral model to last sample
if(Algorithm=="ricker" || Algorithm=="glv"){
lastSample=round(lastSample*1000)
}
else if(Algorithm=="davida" || Algorithm=="davidb"){
lastSample=round(lastSample)
}
thetaprobs=c(thetaprobs,rad(lastSample,remove.zeros = TRUE,fit.neutral = TRUE,plot=FALSE)$thetaprob)
}
# evolution of variance
if(length(varEvolSliceDef)==2){
startTD=varEvolSliceDef[1]
stopTD=varEvolSliceDef[2]
if(is.na(stopTD)){
stopTD=ncol(ts)
}
if(ncol(ts) < startTD){
stop(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given start point for variance evolution!"))
}
if(ncol(ts) < stopTD){
warning(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given end point for variance evolution! The last time point is used instead!"))
stopTD=ncol(ts)
}
varEvolSlice=ts[,startTD:stopTD]
varEvolRes=varEvol(varEvolSlice, plot=FALSE)
varevolslopes=c(varevolslopes,varEvolRes$slope)
varevolR2=c(varevolR2,varEvolRes$adjR2)
}else{
if(length(varEvolSliceDef)>0){
warning("The variance evolution slice is not defined correctly!")
}
varevolslopes=c(varevolslopes,NA)
varevolR2=c(varevolR2,NA)
}
# time decay
if(length(timeDecaySliceDef)==2){
startTD=timeDecaySliceDef[1]
stopTD=timeDecaySliceDef[2]
if(is.na(stopTD)){
stopTD=ncol(ts)
}
if(ncol(ts) < startTD){
stop(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given start point for the time decay!"))
}
if(ncol(ts) < stopTD){
warning(paste("Time series",expId,"has less samples (namely",ncol(ts),") than the given end point for the time decay! The last time point is used instead!"))
stopTD=ncol(ts)
}
timeDecaySlice=ts[,startTD:stopTD]
timedecayRes=timeDecay(timeDecaySlice, plot=FALSE, logdissim=TRUE, logtime=TRUE)
timedecayslopes=c(timedecayslopes,timedecayRes$slope)
timedecayR2=c(timedecayR2,timedecayRes$adjR2)
}else{
warning("The time decay slice is not defined correctly!")
timedecayslopes=c(timedecayslopes,NA)
timedecayR2=c(timedecayR2,NA)
}
}else if(returnDistribs==TRUE){
# last column
name=paste("exp",expId,sep="")
distribList[[name]]=ts[,ncol(ts)]
}else if(returnTS==TRUE){
name=paste("exp",expId,sep="")
tsList[[name]]=ts
}
} # end loop over experiments
# assemble table
if(returnDistribs == FALSE && returnTS == FALSE){
resulttable=list(expIds,samples,algorithms,samplingfreqs,initmode,peps,connectances,sigmas,thetas,migrations,individuals, deathrates,entropy,taylorslopes,taylorR2, percentblack,percentbrown,percentpink,percentwhite, percentmaxautocorbin1,percentmaxautocorbin2,percentmaxautocorbin3,percentmaxautocorbin4, lowHursts, middleHursts, highHursts,veryHighHursts, timedecayslopes, timedecayR2, varevolslopes, varevolR2, autoslopes, neutralityPvals, saddistribs, saddistribsvegan, saddistribsfitscore, saddistribsfitscorevegan, thetaprobs)
names(resulttable)=c("id","samplenum","algorithm","interval","initabundmode","pep","c","sigma","theta","m","individuals","deaths","entropy","taylorslope","taylorr2","black","brown","pink","white","maxautocorbin1","maxautocorbin2","maxautocorbin3","maxautocorbin4", "lowhurst","middlehurst","highhurst","veryhighhurst","timedecayslope","timedecayr2","varevolslope","varevolr2","autoslope","neutral","raddistrib","raddistribvegan","radfitscore","radfitscorevegan","thetaprobs")
}else if(returnDistribs == TRUE){
resulttable=distribList
}else if(returnTS == TRUE){
resulttable=tsList
}
return(resulttable)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.