R/mdm.R
In multdyn: Multiregression Dynamic Models

Documented in binom.nettest center corTs dlm.lpl exhaustive.search getAdjacency getModel getThreshAdj getWinner gplotMat mdm.group model.generator node patel patel.group perf perm.test priors.spec read.subject reshapeTs rmdiag rmna scaleTs stepwise.backward stepwise.combine stepwise.forward subject

#' Specify the priors. Without inputs, defaults will be used.
#' 
#' @param m0 the value of the prior mean at time \code{t=0}, scalar (assumed to be the same
#'  for all nodes). The default is zero.
#' @param CS0 controls the scaling of the prior variance matrix \code{C*_{0}} at time 
#'  \code{t=0}. The default is 3, giving a non-informative prior for \code{C*_{0}, 3 x (p x p)}
#'  identity matrix. \code{p} is the number of thetas.
#' @param n0 prior hyperparameter of precision \code{phi ~ G(n_{0}/2; d_{0}/2)}. The default
#'  is a non-informative prior, with \code{n0 = d0 = 0.001}. \code{n0} has to be higher than 0.
#' @param d0 prior hyperparameter of precision \code{phi ~ G(n_{0}/2; d_{0}/2)}. The default
#'  is a non-informative prior, with \code{n0 = d0 = 0.001}.
#' 
#' @details At time \code{t=0}, \code{(theta_{0} | D_{0}, phi) ~ N(m_{0},C*_{0} x phi^{-1})},
#'  where \code{D_{0}} denotes the set of initial information.
#' 
#' @return \code{priors} a list with the prior hyperparameters. Relevant to \code{\link{dlm.lpl},
#'  \link{exhaustive.search}, \link{node}, \link{subject}}.
#'  
#' @references West, M. & Harrison, J., 1997. Bayesian Forecasting and Dynamic Models. Springer New York.
priors.spec <- function(m0 = 0, CS0 = 3, n0 = 0.001, d0 = 0.001) {
  
  priors = list(m0 = m0, CS0 = CS0, n0 = n0, d0 = d0)
  return(priors)
  
}

#' Calculate the log predictive likelihood for a specified set of parents and a fixed delta.
#'
#' @param Yt the vector of observed time series, length \code{T}.
#' @param Ft the matrix of covariates, dim = number of thetas (\code{p}) x number of time
#'  points (\code{T}), usually a row of 1s to represent an intercept and the time series of
#'  the parent nodes.
#' @param delta discount factor (scalar).
#' @param priors list with prior hyperparameters.
#' 
#' @return
#' \item{mt}{the vector or matrix of the posterior mean (location parameter), dim = \code{p x T}.}
#' \item{Ct}{and \code{CSt} the posterior scale matrix \code{C_{t}} is \code{C_{t} = C*_{t} x S_{t}},
#'  with dim = \code{p x p x T}, where \code{S_{t}} is a point estimate for the observation variance
#'  \code{phi^{-1}}}
#' \item{Rt}{and \code{RSt} the prior scale matrix \code{R_{t}} is \code{R_{t} = R*_{t} x S_{t-1}},
#'  with dim = \code{p x p x T}, where \code{S_{t-1}} is a point estimate for the observation
#'  variance \code{phi^{-1}} at the previous time point.}
#' \item{nt}{and \code{dt} the vectors of the updated hyperparameters for the precision \code{phi}
#'  with length \code{T}.}
#' \item{S}{the vector of the point estimate for the observation variance \code{phi^{-1}} with
#'  length \code{T}.}
#' \item{ft}{the vector of the one-step forecast location parameter with length \code{T}.}
#' \item{Qt}{the vector of the one-step forecast scale parameter with length \code{T}.}
#' \item{ets}{the vector of the standardised forecast residuals with length \code{T},
#'  \eqn{\newline} defined as \code{(Y_{t} - f_{t}) / sqrt (Q_{t})}.}
#' \item{lpl}{the vector of the Log Predictive Likelihood with length \code{T}.}
#' 
#' @references West, M. & Harrison, J., 1997. Bayesian Forecasting and Dynamic Models. Springer New York.
dlm.lpl <- function(Yt, Ft, delta, priors = priors.spec() ) {
  
  m0 = priors$m0
  CS0 = priors$CS0
  n0 = priors$n0
  d0 = priors$d0
  
  CS0 = CS0*diag(nrow(Ft))
  m0 = rep(m0,nrow(Ft))
  Nt = length(Yt)+1 # the length of the time series + t0
  p = nrow(Ft)      # the number of parents and one for an intercept (i.e. the number of thetas)
  
  Y = numeric(Nt)
  Y[2:Nt] = Yt
  
  F1 = array(0, dim=c(p,Nt))
  F1[,2:Nt] = Ft
  
  # Set up allocation matrices, including the priors
  mt = array(0,dim=c(p,Nt))
  mt[,1]=m0
  
  Ct = array(0,dim=c(p,p,Nt))
  CSt = array(0,dim=c(p,p,Nt))
  CSt[,,1] = CS0
  
  Rt = array(0,dim=c(p,p,Nt))
  RSt = array(0,dim=c(p,p,Nt))
  
  nt = numeric(Nt) 
  nt[1]=n0
  
  dt = numeric(Nt)
  dt[1]=d0
  
  S = numeric(Nt)
  S[1]=dt[1]/nt[1]
  
  ft = numeric(Nt)
  Qt = numeric(Nt)
  ets = numeric(Nt)
  lpl = numeric(Nt)
  
  # Updating
  
  for (i in 2:Nt){
    
    # Posterior at {t-1}: (theta_{t-1}|D_{t-1}) ~ T_{n_{t-1}}[m_{t-1}, C_{t-1} = C*_{t-1} x d_{t-1}/n_{t-1}]
    # Prior at {t}: (theta_{t}|D_{t-1}) ~ T_{n_{t-1}}[m_{t-1}, R_{t}]
    # D_{t-1} = D_{0},Y_{1},...,Y_{t-1} D_{0} is the initial information set
    
    # R*_{t} = C*_{t-1}/delta
    RSt[,,i] = CSt[,,(i-1)] / delta
    Rt[,,i] = RSt[,,i] * S[(i-1)] 
    # One-step forecast: (Y_{t}|D_{t-1}) ~ T_{n_{t-1}}[f_{t}, Q_{t}]
    ft[i] = t(F1[,i]) %*% mt[,(i-1)]
    QSt = as.vector(1 + t(F1[,i]) %*% RSt[,,i] %*% F1[,i])
    Qt[i] = QSt * S[(i-1)]
    et = Y[i] - ft[i]
    ets[i] = et / sqrt(Qt[i])
    
    # Posterior at t: (theta_{t}|D_{t}) ~ T_{n_{t}}[m_{t}, C_{t}]
    # D_{t} = D_{0},Y_{1},...,Y_{t}
    At = (RSt[,,i] %*% F1[,i])/QSt
    mt[,i] = mt[,(i-1)] + (At*et)
    
    nt[i] = nt[(i-1)] + 1
    dt[i] = dt[(i-1)] + (et^2)/QSt
    S[i]=dt[i]/nt[i] 
    
    CSt[,,i] = RSt[,,i] - (At %*% t(At))*QSt
    Ct[,,i] = S[i]*CSt[,,i]
    
    # Log Predictive Likelihood 
    lpl[i] = lgamma((nt[(i-1)]+1)/2)-lgamma(nt[(i-1)]/2)-0.5*log(pi*nt[(i-1)]*Qt[i])-((nt[(i-1)]+1)/2)*log(1+(1/nt[(i-1)])*et^2/Qt[i])
  }
  
  mt = mt[,2:Nt]; Ct = Ct[,,2:Nt]; CSt = CSt[,,2:Nt]; Rt = Rt[,,2:Nt]; RSt = RSt[,,2:Nt]
  nt = nt[2:Nt]; dt = dt[2:Nt]; S = S[2:Nt]; ft = ft[2:Nt]; Qt = Qt[2:Nt]; ets = ets[2:Nt]; lpl = lpl[2:Nt]
  
  output <- list(mt=mt,Ct=Ct,CSt=CSt,Rt=Rt,RSt=RSt,nt=nt,dt=dt,S=S,ft=ft,Qt=Qt,ets=ets,lpl=lpl)
  return(output)
}

#' A function to generate all the possible models. 
#'
#' @param Nn number of nodes; the number of columns of the dataset can be used.
#' @param node The node to find parents for.
#'
#' @return
#' output.model = a matrix with dimensions (Nn-1) x number of models, where number of models = 2^(Nn-1).
#' 
#' @export
model.generator<-function(Nn,node){
  
  # Create the model 'no parents' (the first column of the matrix is all zeros)
  empt=rep(0,(Nn-1)) 
  
  for (k in 1:(Nn-1)) {
    
    # Calculate all combinations when number of parents = k
    #m=combn(c(1:Nn)[-node],k)
    if (Nn==2 & node==1) {
      model = matrix(c(0,2),1,2)
    } else { 
      m=combn(c(1:Nn)[-node],k) 
      
      # Expand the array so that unconnected edges are represented by zeros  
      empt.new=array(0,dim=c((Nn-1),ncol(m)))
      empt.new[1:k,]=m
      
      # Bind the matrices together; the next set of models are added to this matrix
      model=cbind(empt,empt.new)
      empt=model
    } 
  }
  
  colnames(model)=NULL
  output.model<-model
  
  return(output.model)
  
}

#' A function for an exhaustive search, calculates the optimum value of the discount factor.
#'
#' @param Data  Dataset with dimension number of time points T x Number of nodes Nn.
#' @param node  The node to find parents for.
#' @param nbf   Log Predictive Likelihood will sum from (and including) this time point. 
#' @param delta a vector of potential values for the discount factor.
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param priors list with prior hyperparameters.
#'
#' @return
#' model.store a matrix with the model, LPL and chosen discount factor for all possible models.
#' runtime an estimate of the run time of the function, using proc.time().
#' @export
exhaustive.search <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, priors=priors.spec() ) {
  
  m0 = priors$m0
  CS0 = priors$CS0
  n0 = priors$n0
  d0 = priors$d0
  
  ptm=proc.time()  
  
  Nn=ncol(Data) # the number of nodes
  Nm=2^(Nn-1)   # the number of models per node
  
  M=model.generator(Nn,node) # Generate all the possible models
  models=rbind(1:Nm,M) # Label each model with a 'model number'
  
  
  Yt=Data[,node]    # the time series of the node we wish to find parents for
  Nt=length(Yt)     # the number of time points
  nd=length(delta)  # the number of deltas
  
  # Create empty arrays for the lpl scores and the optimum deltas
  lpldet=array(NA,c(Nm,length(delta)))
  lplmax=rep(NA,Nm)
  DF.hat=rep(NA,Nm)
  
  # Now create Ft. 
  for (z in 1:Nm) {
    pars=models[(2:Nn),z] 
    pars=pars[pars!=0]
    Ft=array(1,dim=c(Nt,length(pars)+1))
    if (ncol(Ft)>1) {
      Ft[,2:ncol(Ft)]=Data[,pars] # selects parents 
    }  
    
    # Calculate the log predictive likelihood, for each value of delta
    for (j in 1:nd) {
      if (cpp) {
        # new C++ implementation
        lpl=c(dlmLplCpp(Yt, t(Ft), delta[j], m0, CS0, n0, d0))
        lpldet[z,j]=sum(lpl[nbf:Nt])
      } else {
        # original native R
        a=dlm.lpl(Yt, t(Ft), delta=delta[j], priors=priors)
        lpldet[z,j]=sum(a$lpl[nbf:Nt])
      }
    }
    
    if (sum(is.na(lpldet[z,])) == length(lpldet[z,])) {
      lplmax[z] = -.Machine$double.xmax
    } else {
      lplmax[z]=max(lpldet[z,],na.rm=TRUE)
      DF.hat[z] = delta[which.max(lpldet[z,])]
    }
  }
  
  lpl_noint = 99 # TODO
  
  # Output model.store
  model.store=rbind(models,lplmax,DF.hat)
  rownames(model.store)=NULL
  
  runtime=(proc.time()-ptm)
  
  output<-list(model.store=model.store,runtime=runtime, lpl_noint=lpl_noint)
  return(output)
}

#' Mean centers timeseries in a 2D array timeseries x nodes,
#' i.e. each timeseries of each node has mean of zero.
#'
#' @param X 2D array with dimensions timeseries x nodes.
#'
#' @return M 2D array.
#' @export
center <- function(X) {
  d = dim(X)
  M = matrix(NA, d[1], d[2])
  
  for (i in 1:d[2]) {
    M[,i]=scale(X[,i], center = T, scale = F)
  }
  
  return(M)
}

#' Estimate subject's full network: runs exhaustive search on very node.
#'
#' @param X array with dimensions timeseries x nodes.
#' @param id subject ID. If set, results are saved to a txt file.
#' @param nbf  Log Predictive Likelihood will sum from (and including) this time point. 
#' @param delta a vector of potential values for the discount factor.
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param bf bayes factor for network thresholding.
#' @param priors list with prior hyperparameters.
#' @param path a path where results are written.
#'
#' @return store list with results.
#' @export
subject <- function(X, id=NULL, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, bf = 20,
                    priors = priors.spec(), path = getwd() ) {
  N=ncol(X)  # nodes
  M=2^(N-1)  # rows/models
  models = array(rep(0,(N+2)*M*N),dim=c(N+2,M,N))
  
  for (n in 1:N) {
    tmp=exhaustive.search(X, n, nbf=nbf, delta=delta, cpp=cpp, priors=priors)
    models[,,n]=tmp$model.store
    if (!is.null(id)) {
      write(t(models[,,n]), file=file.path(path, sprintf("%s_node_%03d.txt", id, n)), ncolumns = M)
    }
  }
  
  store=list()
  store$models=models
  store$winner=getWinner(models,N)
  store$adj=getAdjacency(store$winner,N)
  store$thr=getThreshAdj(store$adj, store$models, store$winner, bf = bf)
  
  return(store)
}

#' Runs exhaustive search on a single node and saves results in txt file.
#'
#' @param X array with dimensions timeseries x nodes.
#' @param n node number.
#' @param id subject ID. If set, results are saved to a txt file.
#' @param nbf  Log Predictive Likelihood will sum from (and including) this time point. 
#' @param delta a vector of potential values for the discount factor.#'
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param priors list with prior hyperparameters.
#' @param path a path where results are written.
#' 
#' @return store list with results.
#' @export
node <- function(X, n, id=NULL, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, priors=priors.spec(),
                 path=getwd() ) {
  N=ncol(X)  # nodes
  M=2^(N-1)  # rows/models
  
  store=exhaustive.search(X, n, nbf=nbf, delta=delta, cpp=cpp, priors=priors)
  if (!is.null(id)) {
    write(t(store$model.store), file=file.path(path, sprintf("%s_node_%03d.txt", id, n)), ncolumns = M)
  }
  return(store)
}

#' Reads single subject's network from txt files.
#' @param path path.
#' @param id identifier to select all subjects' nodes, e.g. pattern containing subject ID and session number.
#' @param nodes number of nodes.
#' @param bf bayes factor for network thresholding.
#'
#' @return store list with results.
#' @export
read.subject <- function(path, id, nodes, bf = 20) {
  models = array(0,dim=c(nodes+2,2^(nodes-1),nodes))
  for (n in 1:nodes) {
    #file=sprintf("%s_node_%03d.txt", id, n)
    #models[,,n] = as.matrix(read.table(file)) # quite slow
    file=list.files(path, pattern=glob2rx(sprintf("%s*_node_%03d.txt", id, n)))
    models[,,n] = as.matrix(fread(file.path(path,file))) # faster, from package "data.table"
  }
  store=list()
  store$models=models
  store$winner=getWinner(models,nodes)
  store$adj=getAdjacency(store$winner,nodes)
  store$thr=getThreshAdj(store$adj, store$models, store$winner, bf = bf)
  
  return(store)
}

#' Get winner network by maximazing log predictive likelihood (LPL)
#' from a set of models.
#'
#' @param models 2D matrix, or 3D models x node.
#' @param nodes number of nodes.
#'
#' @return winner array with highest scored model(s).
#' @export
getWinner <- function(models, nodes) {
  
  dims=length(dim(models))
  
  if (dims==2) {
    winner = models[,which.max(models[nodes+1,])]
  } else if (dims==3) {
    winner = array(0, dim=c(nodes+2,nodes))
    for (n in 1:nodes) {
      winner[,n]=models[,which.max(models[nodes+1,,n]),n]
    }
  }
  return(winner)
}

#' Get adjacency and associated likelihoods (LPL) and disount factros (df) of winning models.
#'
#' @param winner, 2D matrix.
#' @param nodes number of nodes.
#'
#' @return adj, 2D adjacency matrix.
#' @export
getAdjacency <- function(winner, nodes) {
  
  am = array(rep(0,nodes*nodes),dim=c(nodes,nodes))
  lpl = df = array(rep(NA,nodes*nodes),dim=c(nodes,nodes))
  for (n in 1:nodes) {
    p = winner[2:nodes,n]  # parents
    p = p[p>0]
    am[p,n] = 1
    lpl[p,n] = winner[nodes+1,n]
    df[p,n]  = winner[nodes+2,n]
  }
  return(list(am=am, lpl=lpl, df=df))
}

#' Plots network as adjacency matrix.
#'
#' @param adj 2D adjacency matrix.
#' @param title title.
#' @param colMapLabel label for colormap.
#' @param hasColMap FALSE turns off color map, default is NULL (on).
#' @param lim vector with min and max value for color scaling.
#' @param gradient gradient colors.
#' @param nodeLabels node labels.
#' @param axisTextSize text size of the y and x tick labels.
#' @param xAngle orientation of the x tick labels.
#' @param titleTextSize text size of the title.
#'
#' @export
gplotMat <- function(adj, title=NULL, colMapLabel=NULL, hasColMap=NULL, lim=c(0, 1),
                     gradient=c("white", "orange", "red"), nodeLabels=waiver(), axisTextSize=12, xAngle=0, titleTextSize=12) {
  x = melt(adj)
  names(x)[1] = "Parent"
  names(x)[2] = "Child"
  
  # handle scales in case custom labeling is set
  if (is.list(nodeLabels)) {
    x_scale = scale_x_continuous()
    y_scale = scale_y_reverse()
  } else {
    x_scale = scale_x_continuous(breaks = 1:ncol(adj), labels = nodeLabels)
    y_scale = scale_y_reverse(breaks = 1:ncol(adj), labels = nodeLabels)
  }
  
  ggplot(x, aes_string(x = "Child", y = "Parent", fill = "value")) +
    geom_tile(color = "gray60") +

    scale_fill_gradient2(
      low  = gradient[1],
      mid  = gradient[2],
      high = gradient[3],
      midpoint = sum(lim)/2,
      limit = lim,
      space = "Lab",
      name = colMapLabel) + 

    theme(#axis.ticks.x = element_blank(),
          axis.ticks = element_blank(),
          axis.line = element_blank(),
          text = element_text(size=12),
          plot.title = element_text(size=titleTextSize),
          axis.text.x = element_text(size=axisTextSize,angle=xAngle),
          axis.text.y = element_text(size=axisTextSize),
          #panel.grid.major = element_line(colour="black", size = (1.5)),
          #panel.grid.minor = element_line(size = (0.2), colour="grey")
          ) +

    x_scale + y_scale + ggtitle(title) + guides(fill=hasColMap)
}

#' Performes a binomial test with FDR correction for network edge occurrence.
#'
#' @param adj adjacency matrix, nodes x nodes x subj, or nodes x nodes x runs x subj.
#' @param alter type of binomial test, "two.sided" (default), "less", or "greater"
#' @param fdr false discovery rate (FDR) control, default is 0.05.
#'
#' @return store list with results.
#' @export
binom.nettest <- function(adj, alter="two.sided", fdr=0.05) {
  
  mydim=dim(adj)
  M = sum(adj) # total edges over all N subjects, all R(R-1) edges
  
  if (length(mydim) == 3) { # without runs
    N=mydim[3] # No. of subjects
    N_Comp=mydim[1]
    adj_ = apply(adj, c(1,2), sum)
    
  } else if (length(mydim) == 4) { # with runs
    N=mydim[4] # No. of subjects
    N_runs=mydim[3]
    N_Comp=mydim[1]
    N=N*N_runs # adjust N by for no. of runs
    adj_ = apply(adj, c(1,2), sum) # sum acrosss subjects and runs
  }
  
  # binom test for every edge occurance
  p0 = M/N/N_Comp/(N_Comp-1) # H0 edge probability
  
  p = array(NA, dim=c(N_Comp,N_Comp))
  for (i in 1:N_Comp) {
    for (j in 1:N_Comp) {
      if (i !=j) {
        tmp=binom.test(adj_[i,j],N,p=p0, alternative=alter)
        p[i,j]=tmp$p.value
      }
    }
  }
  
  # FDR
  p_fdr=matrix(p.adjust(p, method = "fdr"),N_Comp,N_Comp)
  adj_fdr=adj_
  adj_fdr[p_fdr>=fdr]=NA
  
  store=list()
  store$p0=p0
  store$p=p
  store$p_fdr=p_fdr
  store$adj=adj_/N
  store$adj_fdr=adj_fdr/N # significant proportions
  
  return(store)
}

#' Reshapes a 2D concatenated time series into 3D according to no. of subjects and volumes.
#'
#' @param ts a 2D time series volumes x nodes.
#' @param N No. of subjects.
#' @param V No. of volumes.
#' 
#' @return M 3D matrix, time series x nodes x subjects.
#' @export
reshapeTs <- function(ts, N, V) {
  NC = ncol(ts)
  M = array(NA, dim=c(V,NC,N))
  for (i in 1:N) {
    idx = ((i-1)*V+1):(V*i)
    M[,,i] = ts[idx,]
  }
  return(M)
}

#' Correlation of time series.
#'
#' @param ts a 3D time series time series x nodes x subjects.
#' 
#' @return M correlation matrix.
#' @export
corTs <- function(ts) {
  d=dim(ts)
  N=d[3] # No. subjects
  N_nodes=d[2]
  R=array(NA, dim=c(N_nodes,N_nodes,N))
  for (s in 1:N) {
    R[,,s]=cor(ts[,,s])
  }
  M = apply(R, c(1,2), mean)
  return(M)
}

#' Get specific parent model from all models.
#'
#' @param models a 2D model matrix.
#' @param parents a vector with parent nodes.
#' 
#' @return mod specific parent model.
#' @export
getModel <- function(models, parents) {
  Nn = nrow(models) - 2 # No. of nodes
  Nm = ncol(models) # No. of models
  parents = c(parents, rep(0, (Nn - 1) - length(parents))) # add fill zeros
  for (m in 1:Nm) {
    if (sum(models[2:Nn,m] == parents) == Nn - 1) {
      mod = models[,m]
    }
  }
  return(mod)
}

#' A group is a list containing restructured data from subejcts for easier group analysis.
#'
#' @param subj a list of subjects.
#' 
#' @return group a list.
#' @export
mdm.group <- function(subj) {
  Nn=ncol(subj[[1]]$adj$am)
  N=length(subj)
  
  am = lpl = df = tam = tbi = array(NA, dim=c(Nn,Nn,N))
  df_ = array(NA, dim=c(N,Nn))
  tlpls = array(NA, dim=c(Nn,Nn,2,N))
  winner = array(NA, dim=c(Nn+2,Nn,N))
  models = array(NA, dim=c(Nn+2,2^(Nn-1),Nn,N))
  for (s in 1:N) {
    am[,,s]  = subj[[s]]$adj$am
    lpl[,,s] = subj[[s]]$adj$lpl
    df[,,s]  = subj[[s]]$adj$df
    df_[s,]  = subj[[s]]$winner[nrow(subj[[s]]$winner),]
    winner[,,s]  = subj[[s]]$winner
    models[,,,s]  = subj[[s]]$models
    
    # thresholded measures
    tam[,,s]  = subj[[s]]$thr$am
    tbi[,,s]  = subj[[s]]$thr$bi
    tlpls[,,,s]= subj[[s]]$thr$lpls
  }
  
  group=list(am=am,lpl=lpl,df=df,tam=tam,tbi=tbi,tlpls=tlpls,
             df_=df_,winner=winner,models=models)
  return(group)
}

#' A group is a list containing restructured data from subejcts for easier group analysis.
#'
#' @param subj a list of subjects.
#' 
#' @return group a list.
#' @export
patel.group <- function(subj) {
  Nn=ncol(subj[[1]]$kappa)
  N=length(subj)
  
  kappa = tkappa = tau = ttau = net = tnet = array(NA, dim=c(Nn,Nn,N))
  for (s in 1:N) {
    kappa[,,s]  = subj[[s]]$kappa
    tkappa[,,s] = subj[[s]]$tkappa
    tau[,,s]    = subj[[s]]$tau
    ttau[,,s]   = subj[[s]]$ttau
    net[,,s]    = subj[[s]]$net
    tnet[,,s]   = subj[[s]]$tnet
  }
  
  group=list(kappa=kappa,tkappa=tkappa,tau=tau,ttau=ttau,net=net,tnet=tnet)
  return(group)
}

#' Get thresholded adjacency network.
#'
#' @param adj list with network adjacency from getAdjacency().
#' @param models matrix 3D with full model estimates.
#' @param winner matrix 2D with winning models.
#' @param bf bayes factor for network thresholding.
#' 
#' @return thr list with thresholded network adjacency.
#' @export
getThreshAdj <- function(adj, models, winner, bf = 20) {
  
  Nn = ncol(adj$am)
  # determine bidirectional edges
  bi = array(0, dim=c(Nn, Nn))
  for (i in 1:Nn) {
    for (j in 1:Nn) {
      if (adj$am[i,j] == 1 & adj$am[j,i] == 1) {
        bi[i,j] = 1
      }
    }
  }
  
  # Calculate models
  B=bi*upper.tri(bi)
  lpls=array(NA, dim=c(Nn, Nn, 2))
  for (i in 1:Nn) {
    for (j in 1:Nn) {
      # if bidirectional, calculate 3 models
      # A+B:i<>j, A:i>j, B:i<j
      # A+B: LPLj + LPLi
      # A  : LPLj + LPLi-j
      # B  : LPLi + LPLj-i
      if (B[i,j] == 1) {
        
        # bidirectional LPL
        lpls[i,j,1] = lpls[j,i,1] = adj$lpl[i,j] + adj$lpl[j,i]
        
        # uni i->j, LPL without parent j
        p = winner[,i][2:Nn]
        p = p[p != j & p!= 0] # remove node j
        lpls[i,j,2] = adj$lpl[i,j] + getModel(models[,,i], p)[Nn+1]
        
        # uni j->i, LPL without parent i
        p = winner[,j][2:Nn]
        p = p[p != i & p!= 0] # remove node i
        lpls[j,i,2] = adj$lpl[j,i] + getModel(models[,,j], p)[Nn+1]
      }
    }
  }
  
  # am matrix
  am=adj$am
  for (i in 1:Nn) {
    for (j in 1:Nn) {
      if (B[i,j] == 1) {
        if (lpls[i,j,1] - bf <= max(lpls[i,j,2], lpls[j,i,2]) ) {
          # if unidirectional lpl is larger than bidirectional with a
          # bayes factor penatly, take the simpler unidirectional model.
          if (lpls[i,j,2] > lpls[j,i,2]) {
            am[i,j] = 1; am[j,i] = 0
          } else if (lpls[i,j,2] < lpls[j,i,2])  {
            am[i,j] = 0; am[j,i] = 1
          }
        }
      }
    }
  }
    
  thr=list()
  thr$bi=bi # bidirectional edges
  thr$lpls=lpls # lpls
  thr$am=am # adjacency matrix (thresholded)
  
  return(thr)
}

#' Performance of estimates, such as sensitivity, specificity, and more.
#'
#' @param x estimated binary network matrix.
#' @param xtrue, true binary network matrix.
#' 
#' @return perf vector.
#' @export
perf <- function(x, xtrue) {
  
  # in case xtrue continas NA instead of 0
  xtrue[is.na(xtrue)]=0
  
  d = dim(x)
  Nn=d[1]
  if (length(d) == 3) {
    N=d[3]
  } else if (length(d) == 2) {
    x=array(x, dim=c(Nn,Nn,1))
    N=1
  }
  
  perf=array(NA,dim=c(N,8))
  
  for (i in 1:N) {
    # see https://en.wikipedia.org/wiki/Sensitivity_and_specificity
    TP = sum(x[,,i] & xtrue)
    FP = sum((x[,,i] - xtrue) == 1)
    FN = sum((xtrue - x[,,i]) == 1)
    TN = sum(!x[,,i] & !xtrue) - ncol(x[,,i])
    
    tpr = TP/(TP+FN) # 1
    spc = TN/(TN+FP) # 2
    ppv = TP/(TP+FP) # 3
    npv = TN/(TN+FN) # 4
    fpr = FP/(FP+TN) # 5
    fnr = FN/(TP+FN) # 6
    fdr = FP/(TP+FP) # 7
    acc = (TP+TN)/(TP+FP+FN+TN) # 8
    
    perf[i,]=c(tpr,spc,ppv,npv,fpr,fnr,fdr,acc)
  }
  
  return(perf)
}
  
#' Scaling data. Zero centers and scales the nodes (SD=1).
#'
#' @param X time x node 2D matrix, or 3D with subjects as the 3rd dimension.
#'
#' @return S centered and scaled matrix.
#' @export
scaleTs <- function(X) {
  D=dim(X)
  if (length(D)==2) {
    tmp=array(NA, dim=c(D[1],D[2],1))
    tmp[,,1]=X
    X=tmp
    D=dim(X)
  }
  
  S=array(NA, dim=c(D[1],D[2],D[3]))
  
  # center data
  for (s in 1:D[3]) {
    colm=colMeans(X[,,s])
    S[,,s]=X[,,s]-t(array(rep(colm, D[1]), dim=c(D[2],D[1])))
  }
  
  # scale data
  for (s in 1:D[3]) {
    SD=sqrt(mean(apply(S[,,s], 2, var)))
    S[,,s]=S[,,s]/SD
  }
  
  if (D[3]==1) {
    S=S[,,1]
  }
  return(S)
}

#' Patel.
#'
#' @param X time x node 2D matrix.
#' @param lower percentile cuttoff.
#' @param upper percentile cuttoff for 0-1 scaling.
#' @param bin threshold for conversion to binary values.
#' @param TK significance threshold for connection strength kappa.
#' @param TT significance threshold for direction tau.
#'
#' @return PT list with strengths kappa, direction tau, and net structure.
#' @export
patel <- function(X, lower=0.1, upper=0.9, bin=0.75, TK=0, TT=0) {
  
  nt=nrow(X)
  nn=ncol(X)
  
  # scale data into 0,1 interval
  X10 = apply(X, 2, quantile, lower) # cutoff 0.1 percentile
  X90 = apply(X, 2, quantile, upper) # cutoff 0.9 percentile
  a = sweep(sweep(X, 2, X10), 2, X90-X10, FUN="/") # center, scale data
  X2 = apply(a, c(1,2), function(v) max(min(v,1),0)) # keep between 0,1
  
  # binarize
  X2=(X2>bin)*1 # convert to double
  # Joint activation probability of timeseries a, b
  # See Table 2, Patel et al. 2006
  theta1 = crossprod(X2)/nt        # a=1, b=1 -- a and b active
  theta2 = crossprod(X2,1-X2)/nt   # a=1, b=0 -- a active, b not
  theta3 = crossprod(1-X2,X2)/nt   # a=0, b=1
  theta4 = crossprod(1-X2,1-X2)/nt # a=0, b=0
  
  # directionality tau [-1, 1]
  tau = matrix(0, ncol(X2), ncol(X2))
  inds = theta2 >= theta3
  tau[inds] = 1 - (theta1[inds] + theta3[inds])/(theta1[inds] + theta2[inds])
  tau[!inds] = (theta1[!inds] + theta2[!inds])/(theta1[!inds] + theta3[!inds]) - 1
  tau=-tau
  tau[as.logical(diag(nn))]=NA
  # tau(a,b) positive, a is ascendant to b (a is parent)
  
  # functional connectivity kappa [-1, 1]
  E=(theta1+theta2)*(theta1+theta3)
  max_theta1=min(theta1+theta2,theta1+theta3)
  min_theta1=max(0,2*theta1+theta2+theta3-1)
  inds = theta1>=E
  D = matrix(0, ncol(X2), ncol(X2))
  D[inds]=0.5+(theta1[inds]-E[inds])/(2*(max_theta1-E[inds]))
  D[!inds]=0.5-(theta1[!inds]-E[!inds])/(2*(E[!inds]-min_theta1))
  
  kappa=(theta1-E)/(D*(max_theta1-E) + (1-D)*(E-min_theta1))
  kappa[as.logical(diag(nn))]=NA
  
  # theresholding
  tkappa = kappa
  tkappa[kappa >= TK[1] & kappa <= TK[2]] = 0
  ttau = tau
  ttau[tau >= TT[1] & tau <= TT[2]] = 0
  
  # binary nets: we focus on positive associations
  net = (tkappa > 0)*(tau > 0)  # only kappa thresholded
  net[diag(nn)==1]=0 # remove NA
  tnet = (tkappa > 0)*(ttau > 0) # both tau and kappa thresholded
  tnet[diag(nn)==1]=0 # remove NA
  
  PT=list(kappa=kappa, tau=tau, tkappa=tkappa, ttau=ttau, net=net, tnet=tnet)
  return(PT)
}

#' Permutation test for Patel's kappa. Creates a distribution of values
#' kappa under the null hypothesis.
#'
#' @param X time x node x subjects 3D matrix.
#' @param alpha sign. level
#'
#' @return stat lower and upper significance thresholds.
#' @export
perm.test <- function(X, alpha=0.05) {
  
  low = alpha/2      # two sided test
  up  = 1 - alpha/2
  
  N  = dim(X)[3] # Nr. of subjects
  Nn = dim(X)[2] # Nr. of nodes
  
  # shuffle across subjects with fixed nodes
  ka = array(NA,dim=c(Nn,Nn,N)) # kappa null distribution
  ta = array(NA,dim=c(Nn,Nn,N)) # kappa null distribution
  X_= array(NA, dim=dim(X))
  for (s in 1:N) {
    for (n in 1:Nn) {
      X_[,n,s]=X[,n,sample(N,1)] # draw a random subject (with repetition)
    }
    p=patel(X_[,,s])
    ka[,,s]=p$kappa
    ta[,,s]=p$tau
  }
  
  # two sided sign. test
  stat = list()
  stat$kappa = quantile(ka[!is.na(ka)], probs=c(low, up)) # two sided
  stat$tau   = quantile(ta[!is.na(ta)], probs=c(low, up)) # two sided
  
  return(stat)
}

#' Removes NAs from matrix.
#' 
#' @param M Matrix
#'
#' @return matrix with NAs removed.
#' @export
rmna <- function(M) {
  
  M[is.na(M)] = 0
  return(M)
  
}

#' Removes diagnoal from matrix with NAs.
#' 
#' @param M Matrix
#'
#' @return matrix with diagnoal of NAs.
#' @export
rmdiag <- function(M) {
  
  M[as.logical(diag(nrow(M)))]=0
  return(M)
}

#' Stepise forward non-exhaustive greedy search, calculates the optimum value of the discount factor.
#'
#' @param Data  Dataset with dimension number of time points \code{T} x number of nodes \code{Nn}.
#' @param node  The node to find parents for.
#' @param nbf   The Log Predictive Likelihood will sum from (and including) this time point. 
#' @param delta A vector of values for the discount factor.
#' @param max.break If \code{TRUE}, the code will break if adding / removing parents does not
#' improve the LPL. If \code{FALSE}, the code will continue to the zero parent / all parent model.
#' Default is \code{TRUE}.
#' @param priors List with prior hyperparameters.
#'
#' @return
#' model.store The parents, LPL and chosen discount factor for the subset of models scored using this method.
#' @export
stepwise.forward <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01), 
                             max.break=TRUE, priors=priors.spec()){
  
  # Define the prior hyperparameters
  m0 = priors$m0
  CS0 = priors$CS0
  n0 = priors$n0
  d0 = priors$d0
  
  Nn = ncol(Data) # the number of nodes
  Nm = 2^(Nn-1)   # the number of models (per node)
  
  # Begin at model 1, the no parent (intercept only) model
  model.store = array(0,dim=c((Nn+2),1))
  
  # Find the Log Predicitive Likelihood and discount factor for the zero parent model
  Yt = Data[,node]    # the time series of the node we wish to find parents for
  Nt = length(Yt)     # the number of time points
  nd = length(delta)  # the number of deltas
  
  # Create Ft. For the zero parent model, this is simply a column of ones, representing an intercept.
  Ft=rep(1,Nt)      
  lpl.delta=rep(NA,nd) 
  
  for (j in 1:nd){
    lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
    lpl.delta[j]=sum(lpl[nbf:Nt])}
  
  lpl.max = max(lpl.delta,na.rm=TRUE)
  w = which(lpl.delta==lpl.max)
  DF.hat = delta[w]
  
  # Store the model number, model, LPL and discount factor
  model.store[(Nn+1),] = lpl.max
  model.store[(Nn+2),] = DF.hat
  
  # The parents in the model
  pars = numeric(0)
  
  for (N in 1:(Nn-1)){
    
  # Find all the models with the correct length and containing the previously selected parents
  pars_add = c(1:Nn)[-c(node,pars)]
    
  ms.new = array(0,dim=c((Nn+2),(Nn-N)))
  if (length(pars>0)){ms.new[2:(length(pars)+1),] = pars}
  ms.new[(length(pars)+2),] = pars_add
    
  # Find the LPL and discount factor of this subset of models models
  nms=ncol(ms.new) # How many models are being considered?
    
  # Create empty arrays for the LPL scores and the deltas
  lpl.delta=array(NA,c(nms,length(delta)))
  lpl.max=rep(NA,nms)
  DF.hat=rep(NA,nms)
    
  # Now create Ft.
  for (i in 1:nms){
    pars = ms.new[(2:Nn),i] 
    pars = pars[pars!=0]
    Ft=array(1,dim=c(Nt,length(pars)+1))
    if (ncol(Ft)>1){Ft[,2:ncol(Ft)] = Data[,pars]}
      
  # Calculate the Log Predictive Likelihood for each value of delta, for the specified models
    for (j in 1:nd){
      lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
      lpl.delta[i,j]=sum(lpl[nbf:Nt])}
      
    lpl.max[i] = max(lpl.delta[i,],na.rm=TRUE)
    w = which(lpl.delta[i,]==lpl.max[i])
    DF.hat[i] = delta[w]}
    
  ms.new[(Nn+1),] = lpl.max
  ms.new[(Nn+2),] = DF.hat
    
  # Find the highest LPL so far
  max.score = max(model.store[(Nn+1),]) 
  logBF = lpl.max - max.score
  W = which(logBF > 0)
    
  # Update model.store
  model.store = cbind(model.store,ms.new)
    
  if (length(W)==0 & max.break==TRUE){break} 
  else{W.max = which.max(logBF)
    
  # Update the model parents
  pars = ms.new[(2:Nn),W.max]
  pars = pars[pars!=0]}}
  
model.store[1,] = c(1:ncol(model.store)) # attach a model number
  
return(model.store)}

#' Stepise backward non-exhaustive greedy search, calculates the optimum value of the discount factor.
#'
#' @param Data  Dataset with dimension number of time points \code{T} x number of nodes \code{Nn}.
#' @param node  The node to find parents for.
#' @param nbf   The Log Predictive Likelihood will sum from (and including) this time point. 
#' @param delta A vector of values for the discount factor.
#' @param max.break If \code{TRUE}, the code will break if adding / removing parents does not
#' improve the LPL. If \code{FALSE}, the code will continue to the zero parent / all parent model.
#' Default is \code{TRUE}.
#' @param priors List with prior hyperparameters.
#'
#' @return
#' model.store The parents, LPL and chosen discount factor for the subset of models scored using this method.
#' @export
stepwise.backward <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01), 
                              max.break=TRUE, priors=priors.spec()){
  
  # Define the prior hyperparameters
  m0 = priors$m0
  CS0 = priors$CS0
  n0 = priors$n0
  d0 = priors$d0
  
  Nn = ncol(Data) # the number of nodes
  Nm = 2^(Nn-1)   # the number of models (per node)
  
  # Begin at the all parent model
  model.store = array(0,dim=c((Nn+2),1)) 
  model.store[(2:Nn),1] = c(1:Nn)[-node]
  
  # Find the Log Predicitive Likelihood and discount factor for the all parent model
  Yt = Data[,node]    # the time series of the node we wish to find parents for
  Nt = length(Yt)     # the number of time points
  nd = length(delta)  # the number of deltas
  
  # The parents in the model
  pars = model.store[2:Nn,1]
  pars = pars[pars!=0]
  
  Ft = array(1,dim=c(Nt,Nn)) 
  Ft[,2:Nn] = Data[,pars]
  
  lpl.delta=rep(NA,nd) 
  
  for (j in 1:nd){
    lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
    lpl.delta[j]=sum(lpl[nbf:Nt])}
  
  lpl.max = max(lpl.delta,na.rm=TRUE)
  w = which(lpl.delta==lpl.max)
  DF.hat = delta[w]
  
  model.store[(Nn+1),] = lpl.max
  model.store[(Nn+2),] = DF.hat
  
  for (N in 1:(Nn-1)){
    
  # Find all the models with the correct length and missing the previously removed parents
  pars_add = combn(pars,(Nn-(N+1)))
    
  ms.new = array(0,dim=c((Nn+2),(Nn-N))) 
  if (length(pars_add>0)){ms.new[2:(nrow(pars_add)+1),] = pars_add}
    
  # Find the LPL and discount factor of these models
  nms=ncol(ms.new) # How many models are being considered?
    
  # Create empty arrays for the lpl scores and the deltas
  lpl.delta=array(NA,c(nms,length(delta)))
  lpl.max=rep(NA,nms)
  DF.hat=rep(NA,nms)
    
  # Now create Ft. 
  for (i in 1:nms){
    pars=ms.new[(2:Nn),i]
    pars=pars[pars!=0]
      
    Ft=array(1,dim=c(Nt,length(pars)+1))
    if (ncol(Ft)>1){Ft[,2:ncol(Ft)]=Data[,pars]}
      
  # Calculate the log predictive likelihood for each value of delta, for the specified models
    for (j in 1:nd){
    lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
    lpl.delta[i,j]=sum(lpl[nbf:Nt])}
      
  lpl.max[i] = max(lpl.delta[i,],na.rm=TRUE)
  w = which(lpl.delta[i,]==lpl.max[i])
  DF.hat[i] = delta[w]}
    
  ms.new[(Nn+1),] = lpl.max
  ms.new[(Nn+2),] = DF.hat
    
  max.score = max(model.store[(Nn+1),]) # Find the highest LPL calculated so far
  logBF = lpl.max - max.score
  W = which(logBF > 0)  
    
  # Update model.store
  model.store = cbind(model.store,ms.new)
    
  if (length(W)==0 & max.break==TRUE){break}
  else{W.max = which.max(logBF)
    
 # Update the model parents 
  pars = ms.new[(2:Nn),W.max]
  pars = pars[pars!=0]}}
  
model.store[1,] = c(1:ncol(model.store)) # attach a model number
  
return(model.store)}

#' Stepise combine: combines the stepwise forward and the stepwise backward model.
#'
#' @param forward_matrix The winning sets of parents using a Forward Selection model search. A
#' matrix with dimension \code{Nn+2} x \code{Nn}, rows \code{1:Nn} are the parents (ones and zeros),
#' rows \code{(Nn+1):(Nn+2)} are the LPL and discount factor. forward matrix.
#' @param backward_matrix backward_matrix}{The winning sets of parents using a Backward Elimination
#' model search. A matrix with dimension \code{Nn+2} x \code{Nn}, rows \code{1:Nn} are the parents
#' (ones and zeros), rows \code{(Nn+1):(Nn+2)} are the LPL and discount factor.
#'
#' @return
#' stepwise_combine_matrix The adjacency network, LPLs and discount factors when the Forward
#' Selection and Backward Elimination model searches are combined.
#' @export
stepwise.combine <- function(forward_matrix,backward_matrix){
  
  Nn = ncol(forward_matrix)
  step_combine_matrix = array(NA,dim=c((Nn+2),Nn))
  
  for (k in 1:Nn){
    
    if (forward_matrix[(Nn+1),k] >= backward_matrix[(Nn+1),k]){step_combine_matrix[,k] = forward_matrix[,k]}
    if (forward_matrix[(Nn+1),k] < backward_matrix[(Nn+1),k]){step_combine_matrix[,k] = backward_matrix[,k]}
    
}
  
return(step_combine_matrix)}