#' @title LIMITS
#'
#' @description LIMITS is an algorithm developed by Fisher & Mehta to estimate the interaction matrix from time series data
#' assuming a Ricker model.
#'
#' @param x time series with taxa as rows and time points as columns
#' @param indices.perturb indices of species considered as environmental parameters
#' @param bagging.iter the number of iterations used for bagging
#' @param verbose print which taxon LIMITS is processing
#' @return a list with the estimated interaction matrix as Aest and the estimation error as error
#' @references Fisher & Mehta (2014). Identifying Keystone Species in the Human Gut Microbiome from Metagenomic Timeseries using Sparse Linear Regression. PLoS One 9:e102451
#' @examples
#' \dontrun{
#' N=20
#' A=generateA(N,c=0.1)
#' ts=ricker(N=N,A=A)
#' Aest=limits(ts,verbose=TRUE)$Aest
#' par(mfrow=c(2,1))
#' plotA(A,header="original")
#' plotA(Aest,header="estimated")
#' par(mfrow=c(1,1))
#' }
#' @export
limits<-function(x, indices.perturb = NA, bagging.iter=100, verbose=FALSE){
x=t(x)
print(paste("Time series has",ncol(x),"taxa"))
if(verbose==TRUE){
print("Processing first taxon.")
}
results<-limitscolumnwise(x, 1, indices.perturb, r=bagging.iter)
Aest<-t(results[[1]])
error<-results[[2]]
# loop taxa
for(i in 2:ncol(x)){
if(verbose==TRUE){
print(paste("Processing taxon",i))
}
results<-limitscolumnwise(x, i, indices.perturb, r=bagging.iter)
Aest.temp<-results[[1]]
error.temp<-results[[2]]
Aest<-cbind(Aest,t(Aest.temp))
error<-append(error,error.temp)
}
res=list(t(Aest),error)
names(res)=c("Aest","error")
return(res)
}
#### R is a time series with the columns corresponding to the species N, and
#### lines to different time points Ntp. If the first column of your time series
#### i.e. dim(R)=Ntp N
#### labels time, remove it before applying limits (ie, do R<-R[:,2:end] ).
#### i is the column of the interaction matrix we try to reconstruct.
#### output
#### Beval : the output of the function is the estimation of the "ith" line of the interaction matrix
#### error : mean of the errors made on evaluation of "y" using the estimated B matrix normalized by the variance of y ("one-time-step evalutaion").
#### errorF : idem as error but without the normalization by the variance.
# by Sophie de Buyl, translated from a Mathematica code provided by Charles Fisher and Pankaj Mehta.
limitscolumnwise <- function(R,i,indices.perturb, r=100){
listnumbkeysp<-c(); #list of number species kept. NOT MANDATORY
# choices to be made:
thresh <- .5 # orignially put to 5 (diminish to increase precision)
# manipulating R to put the data in the appropriated form to apply limits:
R[R <= 0] <- 2.22507e-308 # we will have to take the log, so let's remove zero's.
sd<-dim(R)
N<-sd[2] #number of species
Ntp<-sd[1]-1 #number of time points
# comput medians column-wise
colMedians=apply(R,2,median)
# the median should not be substracted for the perturbations
if(all(is.na(indices.perturb)) == FALSE){
colMedians[indices.perturb]=rep(0,length(indices.perturb))
}
# formulation with dependency on matrixStats
#data<- R-t(kronecker(matrix(1,1,sd[1]),t(t(colMedians(R)))));#first N column of data matrix needed for limits
data<- R-t(kronecker(matrix(1,1,sd[1]),colMedians))
data<-data[1:(sd[1]-1),]
data<-cbind(data,(log(R[2:sd[1],i])-log(R[1:(sd[1]-1),i])))
if(sum(is.na(log(R)))>0){
print(which(is.na(log(R)),arr.ind = T))
}
# variable initiation
res<-array(0,c(N,1)) # array for storing results
errorlist<-c()
listspecies<-seq(1,N) # to construct the choices of excluded/includes species
for(k in 1:r){ # we do r times the same thing to get r estimations the interaction matrix B
#initialize covariates to be included/excluded from regression model
if (i!= 1 & i!=N){
c1<-1:(i-1)
c2<-(i+1):N
excluded <- c(c1,c2)
included <- i
}
if(i==1){
excluded<-2:N
included<-i
}
if(i==N){
excluded<-1:(N-1)
included<-N
}
#randomly partition data into training and test sets
trainset <- as.matrix(sample(1:Ntp,round(Ntp/2)))
testset <- as.matrix(setdiff(1:Ntp,trainset))
data1 <- data[trainset,]
data2 <- data[testset,]
test <- included
results<- restrictedleastsquare(data1,data2,test) #perform the estimation
errorEt <-results[[1]]
B1t<-results[[2]]
# loop that adds covariates(=species) as long as prediction error decreases by thresh
xxx=1
yyy=1
while(xxx == 1 && yyy != N){
yyy=yyy+1
#loop to create list of performances for all regression models including an additional covariate
#initial the loop
test <- c(included,excluded[1])
errorlist<-restrictedleastsquare(data1,data2,test)[[1]]
#the loop
for(kk in 2:(length(excluded)-1)){
test = c(included,excluded[kk])
errorlist<-c(errorlist,restrictedleastsquare(data1,data2,test)[[1]])
}
#sort the list so that prev[[1]] has the lowest prediction error
ind<-which(errorlist == min(errorlist), arr.ind = TRUE)
if(dim(as.matrix(ind))[1]>1){
ind<-ind[[1]]
}
test <- c(included,excluded[ind]) #we choose the test set with the smallest error
resulttemp <- restrictedleastsquare(data1, data2,test) #we re-run limits for that test set since we didn't store results
errorEtemp <-resulttemp[[1]]
B1temp<-resulttemp[[2]]
# redefine included excluded covariates to account for including new covariate
included <- test
excluded <- setdiff(union(listspecies,test),intersect(listspecies,test))
# if improvement over the current best model by thresh, include new species, otherwise exit loop
if(is.na(errorEt)==FALSE && errorEt!=0 && (100.0*(errorEtemp - errorEt)/errorEt< -thresh)){ #we keep adding species
errorEt <- errorEtemp# we update the error to be compared with.
B1t <- B1temp #useless
errorEtempt <- errorEtemp #useless
testt <- test
} else{ # we stop adding species
xxx<-0
listnumbkeysp<-c(listnumbkeysp,yyy)
}
} # end of while loop
# store final regression coefficients in res
B <- matrix(0,N,1)
B[test]<-t(B1temp)
res<-cbind(res, B)
errorlist<-c(errorlist,errorEtemp)
} #end or r loop
# Bagging step: output median of res
res<-res[,-1]
# compute medians row-wise
rowMedians=apply(res,1,median)
Beval<-t(as.matrix(rowMedians))
error.median<-median(errorlist)
#listnumbkeysp
return(list(Beval,error.median))
}
##############################################################
# restrictedleastsquare
# function need for limits.r
#
# by Sophie de Buyl, translated from a Mathematica code provided by Charles Fisher and Pankaj Mehta.
################################################################
restrictedleastsquare <- function(inbag,outbag,test){
# This subroutine performs the linear regression and computes the error on the test set
#"inbag" is a matrix containing the training set. One row of the matrix looks like {x_1(t) - u_1, ..., x_N(t) -
# u_N, ln x_i(t+1) - ln x_i(t) },
#where u_i is the median abundance of species i. ;
#"outbag" is a matrix containing the test set.
#It is in the same form as inbag.;
#"test" is a vector of integers specifying which covariates are included in the regression. I.e. if species 1,2,3 are included then test = {1,2,3}.
#perform linear regression on training set
temp<-dim(inbag)
lastcol<-temp[2]
X1 <- as.matrix(inbag[,test])
y1 <- as.matrix(inbag[,lastcol])
B1 <- as.matrix(MASS::ginv(X1,tol=2e-308)%*%y1)
# calculate prediction error on test set
X2 <- as.matrix(outbag[,test])
y2 <- as.matrix(outbag[,lastcol])
errorE <- mean((y2-X2%*%B1)*(y2-X2%*%B1))/var(y2)
result <- list(errorE,B1)
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.