# R/mdm.R In multdyn: Multiregression Dynamic Models

#' Specify the priors. Without inputs, defaults will be used.
#'
#' @param m0 the value of the prior mean at time \code{t=0}, scalar (assumed to be the same
#'  for all nodes). The default is zero.
#' @param CS0 controls the scaling of the prior variance matrix \code{C*_{0}} at time
#'  \code{t=0}. The default is 3, giving a non-informative prior for \code{C*_{0}, 3 x (p x p)}
#'  identity matrix. \code{p} is the number of thetas.
#' @param n0 prior hyperparameter of precision \code{phi ~ G(n_{0}/2; d_{0}/2)}. The default
#'  is a non-informative prior, with \code{n0 = d0 = 0.001}. \code{n0} has to be higher than 0.
#' @param d0 prior hyperparameter of precision \code{phi ~ G(n_{0}/2; d_{0}/2)}. The default
#'  is a non-informative prior, with \code{n0 = d0 = 0.001}.
#'
#' @details At time \code{t=0}, \code{(theta_{0} | D_{0}, phi) ~ N(m_{0},C*_{0} x phi^{-1})},
#'  where \code{D_{0}} denotes the set of initial information.
#'
#' @return \code{priors} a list with the prior hyperparameters. Relevant to \code{\link{dlm.lpl},
#'
#' @references West, M. & Harrison, J., 1997. Bayesian Forecasting and Dynamic Models. Springer New York.
priors.spec <- function(m0 = 0, CS0 = 3, n0 = 0.001, d0 = 0.001) {

priors = list(m0 = m0, CS0 = CS0, n0 = n0, d0 = d0)
return(priors)

}

#' Calculate the log predictive likelihood for a specified set of parents and a fixed delta.
#'
#' @param Yt the vector of observed time series, length \code{T}.
#' @param Ft the matrix of covariates, dim = number of thetas (\code{p}) x number of time
#'  points (\code{T}), usually a row of 1s to represent an intercept and the time series of
#'  the parent nodes.
#' @param delta discount factor (scalar).
#' @param priors list with prior hyperparameters.
#'
#' @return
#' \item{mt}{the vector or matrix of the posterior mean (location parameter), dim = \code{p x T}.}
#' \item{Ct}{and \code{CSt} the posterior scale matrix \code{C_{t}} is \code{C_{t} = C*_{t} x S_{t}},
#'  with dim = \code{p x p x T}, where \code{S_{t}} is a point estimate for the observation variance
#'  \code{phi^{-1}}}
#' \item{Rt}{and \code{RSt} the prior scale matrix \code{R_{t}} is \code{R_{t} = R*_{t} x S_{t-1}},
#'  with dim = \code{p x p x T}, where \code{S_{t-1}} is a point estimate for the observation
#'  variance \code{phi^{-1}} at the previous time point.}
#' \item{nt}{and \code{dt} the vectors of the updated hyperparameters for the precision \code{phi}
#'  with length \code{T}.}
#' \item{S}{the vector of the point estimate for the observation variance \code{phi^{-1}} with
#'  length \code{T}.}
#' \item{ft}{the vector of the one-step forecast location parameter with length \code{T}.}
#' \item{Qt}{the vector of the one-step forecast scale parameter with length \code{T}.}
#' \item{ets}{the vector of the standardised forecast residuals with length \code{T},
#'  \eqn{\newline} defined as \code{(Y_{t} - f_{t}) / sqrt (Q_{t})}.}
#' \item{lpl}{the vector of the Log Predictive Likelihood with length \code{T}.}
#'
#' @references West, M. & Harrison, J., 1997. Bayesian Forecasting and Dynamic Models. Springer New York.
dlm.lpl <- function(Yt, Ft, delta, priors = priors.spec() ) {

m0 = priors$m0 CS0 = priors$CS0
n0 = priors$n0 d0 = priors$d0

CS0 = CS0*diag(nrow(Ft))
m0 = rep(m0,nrow(Ft))
Nt = length(Yt)+1 # the length of the time series + t0
p = nrow(Ft)      # the number of parents and one for an intercept (i.e. the number of thetas)

Y = numeric(Nt)
Y[2:Nt] = Yt

F1 = array(0, dim=c(p,Nt))
F1[,2:Nt] = Ft

# Set up allocation matrices, including the priors
mt = array(0,dim=c(p,Nt))
mt[,1]=m0

Ct = array(0,dim=c(p,p,Nt))
CSt = array(0,dim=c(p,p,Nt))
CSt[,,1] = CS0

Rt = array(0,dim=c(p,p,Nt))
RSt = array(0,dim=c(p,p,Nt))

nt = numeric(Nt)
nt=n0

dt = numeric(Nt)
dt=d0

S = numeric(Nt)
S=dt/nt

ft = numeric(Nt)
Qt = numeric(Nt)
ets = numeric(Nt)
lpl = numeric(Nt)

# Updating

for (i in 2:Nt){

# Posterior at {t-1}: (theta_{t-1}|D_{t-1}) ~ T_{n_{t-1}}[m_{t-1}, C_{t-1} = C*_{t-1} x d_{t-1}/n_{t-1}]
# Prior at {t}: (theta_{t}|D_{t-1}) ~ T_{n_{t-1}}[m_{t-1}, R_{t}]
# D_{t-1} = D_{0},Y_{1},...,Y_{t-1} D_{0} is the initial information set

# R*_{t} = C*_{t-1}/delta
RSt[,,i] = CSt[,,(i-1)] / delta
Rt[,,i] = RSt[,,i] * S[(i-1)]
# One-step forecast: (Y_{t}|D_{t-1}) ~ T_{n_{t-1}}[f_{t}, Q_{t}]
ft[i] = t(F1[,i]) %*% mt[,(i-1)]
QSt = as.vector(1 + t(F1[,i]) %*% RSt[,,i] %*% F1[,i])
Qt[i] = QSt * S[(i-1)]
et = Y[i] - ft[i]
ets[i] = et / sqrt(Qt[i])

# Posterior at t: (theta_{t}|D_{t}) ~ T_{n_{t}}[m_{t}, C_{t}]
# D_{t} = D_{0},Y_{1},...,Y_{t}
At = (RSt[,,i] %*% F1[,i])/QSt
mt[,i] = mt[,(i-1)] + (At*et)

nt[i] = nt[(i-1)] + 1
dt[i] = dt[(i-1)] + (et^2)/QSt
S[i]=dt[i]/nt[i]

CSt[,,i] = RSt[,,i] - (At %*% t(At))*QSt
Ct[,,i] = S[i]*CSt[,,i]

# Log Predictive Likelihood
lpl[i] = lgamma((nt[(i-1)]+1)/2)-lgamma(nt[(i-1)]/2)-0.5*log(pi*nt[(i-1)]*Qt[i])-((nt[(i-1)]+1)/2)*log(1+(1/nt[(i-1)])*et^2/Qt[i])
}

mt = mt[,2:Nt]; Ct = Ct[,,2:Nt]; CSt = CSt[,,2:Nt]; Rt = Rt[,,2:Nt]; RSt = RSt[,,2:Nt]
nt = nt[2:Nt]; dt = dt[2:Nt]; S = S[2:Nt]; ft = ft[2:Nt]; Qt = Qt[2:Nt]; ets = ets[2:Nt]; lpl = lpl[2:Nt]

output <- list(mt=mt,Ct=Ct,CSt=CSt,Rt=Rt,RSt=RSt,nt=nt,dt=dt,S=S,ft=ft,Qt=Qt,ets=ets,lpl=lpl)
return(output)
}

#' A function to generate all the possible models.
#'
#' @param Nn number of nodes; the number of columns of the dataset can be used.
#' @param node The node to find parents for.
#'
#' @return
#' output.model = a matrix with dimensions (Nn-1) x number of models, where number of models = 2^(Nn-1).
#'
#' @export
model.generator<-function(Nn,node){

# Create the model 'no parents' (the first column of the matrix is all zeros)
empt=rep(0,(Nn-1))

for (k in 1:(Nn-1)) {

# Calculate all combinations when number of parents = k
#m=combn(c(1:Nn)[-node],k)
if (Nn==2 & node==1) {
model = matrix(c(0,2),1,2)
} else {
m=combn(c(1:Nn)[-node],k)

# Expand the array so that unconnected edges are represented by zeros
empt.new=array(0,dim=c((Nn-1),ncol(m)))
empt.new[1:k,]=m

# Bind the matrices together; the next set of models are added to this matrix
model=cbind(empt,empt.new)
empt=model
}
}

colnames(model)=NULL
output.model<-model

return(output.model)

}

#' A function for an exhaustive search, calculates the optimum value of the discount factor.
#'
#' @param Data  Dataset with dimension number of time points T x Number of nodes Nn.
#' @param node  The node to find parents for.
#' @param nbf   Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta a vector of potential values for the discount factor.
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param priors list with prior hyperparameters.
#'
#' @return
#' model.store a matrix with the model, LPL and chosen discount factor for all possible models.
#' runtime an estimate of the run time of the function, using proc.time().
#' @export
exhaustive.search <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, priors=priors.spec() ) {

m0 = priors$m0 CS0 = priors$CS0
n0 = priors$n0 d0 = priors$d0

ptm=proc.time()

Nn=ncol(Data) # the number of nodes
Nm=2^(Nn-1)   # the number of models per node

M=model.generator(Nn,node) # Generate all the possible models
models=rbind(1:Nm,M) # Label each model with a 'model number'

Yt=Data[,node]    # the time series of the node we wish to find parents for
Nt=length(Yt)     # the number of time points
nd=length(delta)  # the number of deltas

# Create empty arrays for the lpl scores and the optimum deltas
lpldet=array(NA,c(Nm,length(delta)))
lplmax=rep(NA,Nm)
DF.hat=rep(NA,Nm)

# Now create Ft.
for (z in 1:Nm) {
pars=models[(2:Nn),z]
pars=pars[pars!=0]
Ft=array(1,dim=c(Nt,length(pars)+1))
if (ncol(Ft)>1) {
Ft[,2:ncol(Ft)]=Data[,pars] # selects parents
}

# Calculate the log predictive likelihood, for each value of delta
for (j in 1:nd) {
if (cpp) {
# new C++ implementation
lpl=c(dlmLplCpp(Yt, t(Ft), delta[j], m0, CS0, n0, d0))
lpldet[z,j]=sum(lpl[nbf:Nt])
} else {
# original native R
a=dlm.lpl(Yt, t(Ft), delta=delta[j], priors=priors)
lpldet[z,j]=sum(a$lpl[nbf:Nt]) } } if (sum(is.na(lpldet[z,])) == length(lpldet[z,])) { lplmax[z] = -.Machine$double.xmax
} else {
lplmax[z]=max(lpldet[z,],na.rm=TRUE)
DF.hat[z] = delta[which.max(lpldet[z,])]
}
}

lpl_noint = 99 # TODO

# Output model.store
model.store=rbind(models,lplmax,DF.hat)
rownames(model.store)=NULL

runtime=(proc.time()-ptm)

output<-list(model.store=model.store,runtime=runtime, lpl_noint=lpl_noint)
return(output)
}

#' Mean centers timeseries in a 2D array timeseries x nodes,
#' i.e. each timeseries of each node has mean of zero.
#'
#' @param X 2D array with dimensions timeseries x nodes.
#'
#' @return M 2D array.
#' @export
center <- function(X) {
d = dim(X)
M = matrix(NA, d, d)

for (i in 1:d) {
M[,i]=scale(X[,i], center = T, scale = F)
}

return(M)
}

#' Estimate subject's full network: runs exhaustive search on very node.
#'
#' @param X array with dimensions timeseries x nodes.
#' @param id subject ID. If set, results are saved to a txt file.
#' @param nbf  Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta a vector of potential values for the discount factor.
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param bf bayes factor for network thresholding.
#' @param priors list with prior hyperparameters.
#' @param path a path where results are written.
#'
#' @return store list with results.
#' @export
subject <- function(X, id=NULL, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, bf = 20,
priors = priors.spec(), path = getwd() ) {
N=ncol(X)  # nodes
M=2^(N-1)  # rows/models
models = array(rep(0,(N+2)*M*N),dim=c(N+2,M,N))

for (n in 1:N) {
tmp=exhaustive.search(X, n, nbf=nbf, delta=delta, cpp=cpp, priors=priors)
models[,,n]=tmp$model.store if (!is.null(id)) { write(t(models[,,n]), file=file.path(path, sprintf("%s_node_%03d.txt", id, n)), ncolumns = M) } } store=list() store$models=models
store$winner=getWinner(models,N) store$adj=getAdjacency(store$winner,N) store$thr=getThreshAdj(store$adj, store$models, store$winner, bf = bf) return(store) } #' Runs exhaustive search on a single node and saves results in txt file. #' #' @param X array with dimensions timeseries x nodes. #' @param n node number. #' @param id subject ID. If set, results are saved to a txt file. #' @param nbf Log Predictive Likelihood will sum from (and including) this time point. #' @param delta a vector of potential values for the discount factor.#' #' @param cpp boolean true (default): fast C++ implementation, false: native R code. #' @param priors list with prior hyperparameters. #' @param path a path where results are written. #' #' @return store list with results. #' @export node <- function(X, n, id=NULL, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, priors=priors.spec(), path=getwd() ) { N=ncol(X) # nodes M=2^(N-1) # rows/models store=exhaustive.search(X, n, nbf=nbf, delta=delta, cpp=cpp, priors=priors) if (!is.null(id)) { write(t(store$model.store), file=file.path(path, sprintf("%s_node_%03d.txt", id, n)), ncolumns = M)
}
return(store)
}

#' Reads single subject's network from txt files.
#' @param path path.
#' @param id identifier to select all subjects' nodes, e.g. pattern containing subject ID and session number.
#' @param nodes number of nodes.
#' @param bf bayes factor for network thresholding.
#'
#' @return store list with results.
#' @export
read.subject <- function(path, id, nodes, bf = 20) {
models = array(0,dim=c(nodes+2,2^(nodes-1),nodes))
for (n in 1:nodes) {
#file=sprintf("%s_node_%03d.txt", id, n)
#models[,,n] = as.matrix(read.table(file)) # quite slow
file=list.files(path, pattern=glob2rx(sprintf("%s*_node_%03d.txt", id, n)))
models[,,n] = as.matrix(fread(file.path(path,file))) # faster, from package "data.table"
}
store=list()
store$models=models store$winner=getWinner(models,nodes)
store$adj=getAdjacency(store$winner,nodes)
store$thr=getThreshAdj(store$adj, store$models, store$winner, bf = bf)

return(store)
}

#' Get winner network by maximazing log predictive likelihood (LPL)
#' from a set of models.
#'
#' @param models 2D matrix, or 3D models x node.
#' @param nodes number of nodes.
#'
#' @return winner array with highest scored model(s).
#' @export
getWinner <- function(models, nodes) {

dims=length(dim(models))

if (dims==2) {
winner = models[,which.max(models[nodes+1,])]
} else if (dims==3) {
winner = array(0, dim=c(nodes+2,nodes))
for (n in 1:nodes) {
winner[,n]=models[,which.max(models[nodes+1,,n]),n]
}
}
return(winner)
}

#' Get adjacency and associated likelihoods (LPL) and disount factros (df) of winning models.
#'
#' @param winner, 2D matrix.
#' @param nodes number of nodes.
#'
#' @export

am = array(rep(0,nodes*nodes),dim=c(nodes,nodes))
lpl = df = array(rep(NA,nodes*nodes),dim=c(nodes,nodes))
for (n in 1:nodes) {
p = winner[2:nodes,n]  # parents
p = p[p>0]
am[p,n] = 1
lpl[p,n] = winner[nodes+1,n]
df[p,n]  = winner[nodes+2,n]
}
return(list(am=am, lpl=lpl, df=df))
}

#' Plots network as adjacency matrix.
#'
#' @param title title.
#' @param colMapLabel label for colormap.
#' @param hasColMap FALSE turns off color map, default is NULL (on).
#' @param lim vector with min and max value for color scaling.
#' @param nodeLabels node labels.
#' @param axisTextSize text size of the y and x tick labels.
#' @param xAngle orientation of the x tick labels.
#' @param titleTextSize text size of the title.
#'
#' @export
gplotMat <- function(adj, title=NULL, colMapLabel=NULL, hasColMap=NULL, lim=c(0, 1),
gradient=c("white", "orange", "red"), nodeLabels=waiver(), axisTextSize=12, xAngle=0, titleTextSize=12) {
names(x) = "Parent"
names(x) = "Child"

# handle scales in case custom labeling is set
if (is.list(nodeLabels)) {
x_scale = scale_x_continuous()
y_scale = scale_y_reverse()
} else {
x_scale = scale_x_continuous(breaks = 1:ncol(adj), labels = nodeLabels)
y_scale = scale_y_reverse(breaks = 1:ncol(adj), labels = nodeLabels)
}

ggplot(x, aes_string(x = "Child", y = "Parent", fill = "value")) +
geom_tile(color = "gray60") +

midpoint = sum(lim)/2,
limit = lim,
space = "Lab",
name = colMapLabel) +

theme(#axis.ticks.x = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
text = element_text(size=12),
plot.title = element_text(size=titleTextSize),
axis.text.x = element_text(size=axisTextSize,angle=xAngle),
axis.text.y = element_text(size=axisTextSize),
#panel.grid.major = element_line(colour="black", size = (1.5)),
#panel.grid.minor = element_line(size = (0.2), colour="grey")
) +

x_scale + y_scale + ggtitle(title) + guides(fill=hasColMap)
}

#' Performes a binomial test with FDR correction for network edge occurrence.
#'
#' @param adj adjacency matrix, nodes x nodes x subj, or nodes x nodes x runs x subj.
#' @param alter type of binomial test, "two.sided" (default), "less", or "greater"
#' @param fdr false discovery rate (FDR) control, default is 0.05.
#'
#' @return store list with results.
#' @export
binom.nettest <- function(adj, alter="two.sided", fdr=0.05) {

M = sum(adj) # total edges over all N subjects, all R(R-1) edges

if (length(mydim) == 3) { # without runs
N=mydim # No. of subjects
N_Comp=mydim

} else if (length(mydim) == 4) { # with runs
N=mydim # No. of subjects
N_runs=mydim
N_Comp=mydim
N=N*N_runs # adjust N by for no. of runs
}

# binom test for every edge occurance
p0 = M/N/N_Comp/(N_Comp-1) # H0 edge probability

p = array(NA, dim=c(N_Comp,N_Comp))
for (i in 1:N_Comp) {
for (j in 1:N_Comp) {
if (i !=j) {
p[i,j]=tmp$p.value } } } # FDR p_fdr=matrix(p.adjust(p, method = "fdr"),N_Comp,N_Comp) adj_fdr=adj_ adj_fdr[p_fdr>=fdr]=NA store=list() store$p0=p0
store$p=p store$p_fdr=p_fdr
store$adj=adj_/N store$adj_fdr=adj_fdr/N # significant proportions

return(store)
}

#' Reshapes a 2D concatenated time series into 3D according to no. of subjects and volumes.
#'
#' @param ts a 2D time series volumes x nodes.
#' @param N No. of subjects.
#' @param V No. of volumes.
#'
#' @return M 3D matrix, time series x nodes x subjects.
#' @export
reshapeTs <- function(ts, N, V) {
NC = ncol(ts)
M = array(NA, dim=c(V,NC,N))
for (i in 1:N) {
idx = ((i-1)*V+1):(V*i)
M[,,i] = ts[idx,]
}
return(M)
}

#' Correlation of time series.
#'
#' @param ts a 3D time series time series x nodes x subjects.
#'
#' @return M correlation matrix.
#' @export
corTs <- function(ts) {
d=dim(ts)
N=d # No. subjects
N_nodes=d
R=array(NA, dim=c(N_nodes,N_nodes,N))
for (s in 1:N) {
R[,,s]=cor(ts[,,s])
}
M = apply(R, c(1,2), mean)
return(M)
}

#' Get specific parent model from all models.
#'
#' @param models a 2D model matrix.
#' @param parents a vector with parent nodes.
#'
#' @return mod specific parent model.
#' @export
getModel <- function(models, parents) {
Nn = nrow(models) - 2 # No. of nodes
Nm = ncol(models) # No. of models
parents = c(parents, rep(0, (Nn - 1) - length(parents))) # add fill zeros
for (m in 1:Nm) {
if (sum(models[2:Nn,m] == parents) == Nn - 1) {
mod = models[,m]
}
}
return(mod)
}

#' A group is a list containing restructured data from subejcts for easier group analysis.
#'
#' @param subj a list of subjects.
#'
#' @return group a list.
#' @export
mdm.group <- function(subj) {
Nn=ncol(subj[]$adj$am)
N=length(subj)

am = lpl = df = tam = tbi = array(NA, dim=c(Nn,Nn,N))
df_ = array(NA, dim=c(N,Nn))
tlpls = array(NA, dim=c(Nn,Nn,2,N))
winner = array(NA, dim=c(Nn+2,Nn,N))
models = array(NA, dim=c(Nn+2,2^(Nn-1),Nn,N))
for (s in 1:N) {
am[,,s]  = subj[[s]]$adj$am
lpl[,,s] = subj[[s]]$adj$lpl
df[,,s]  = subj[[s]]$adj$df
df_[s,]  = subj[[s]]$winner[nrow(subj[[s]]$winner),]
winner[,,s]  = subj[[s]]$winner models[,,,s] = subj[[s]]$models

# thresholded measures
tam[,,s]  = subj[[s]]$thr$am
tbi[,,s]  = subj[[s]]$thr$bi
tlpls[,,,s]= subj[[s]]$thr$lpls
}

group=list(am=am,lpl=lpl,df=df,tam=tam,tbi=tbi,tlpls=tlpls,
df_=df_,winner=winner,models=models)
return(group)
}

#' A group is a list containing restructured data from subejcts for easier group analysis.
#'
#' @param subj a list of subjects.
#'
#' @return group a list.
#' @export
patel.group <- function(subj) {
Nn=ncol(subj[]$kappa) N=length(subj) kappa = tkappa = tau = ttau = net = tnet = array(NA, dim=c(Nn,Nn,N)) for (s in 1:N) { kappa[,,s] = subj[[s]]$kappa
tkappa[,,s] = subj[[s]]$tkappa tau[,,s] = subj[[s]]$tau
ttau[,,s]   = subj[[s]]$ttau net[,,s] = subj[[s]]$net
tnet[,,s]   = subj[[s]]$tnet } group=list(kappa=kappa,tkappa=tkappa,tau=tau,ttau=ttau,net=net,tnet=tnet) return(group) } #' Get thresholded adjacency network. #' #' @param adj list with network adjacency from getAdjacency(). #' @param models matrix 3D with full model estimates. #' @param winner matrix 2D with winning models. #' @param bf bayes factor for network thresholding. #' #' @return thr list with thresholded network adjacency. #' @export getThreshAdj <- function(adj, models, winner, bf = 20) { Nn = ncol(adj$am)
# determine bidirectional edges
bi = array(0, dim=c(Nn, Nn))
for (i in 1:Nn) {
for (j in 1:Nn) {
if (adj$am[i,j] == 1 & adj$am[j,i] == 1) {
bi[i,j] = 1
}
}
}

# Calculate models
B=bi*upper.tri(bi)
lpls=array(NA, dim=c(Nn, Nn, 2))
for (i in 1:Nn) {
for (j in 1:Nn) {
# if bidirectional, calculate 3 models
# A+B:i<>j, A:i>j, B:i<j
# A+B: LPLj + LPLi
# A  : LPLj + LPLi-j
# B  : LPLi + LPLj-i
if (B[i,j] == 1) {

# bidirectional LPL
lpls[i,j,1] = lpls[j,i,1] = adj$lpl[i,j] + adj$lpl[j,i]

# uni i->j, LPL without parent j
p = winner[,i][2:Nn]
p = p[p != j & p!= 0] # remove node j
lpls[i,j,2] = adj$lpl[i,j] + getModel(models[,,i], p)[Nn+1] # uni j->i, LPL without parent i p = winner[,j][2:Nn] p = p[p != i & p!= 0] # remove node i lpls[j,i,2] = adj$lpl[j,i] + getModel(models[,,j], p)[Nn+1]
}
}
}

# am matrix
am=adj$am for (i in 1:Nn) { for (j in 1:Nn) { if (B[i,j] == 1) { if (lpls[i,j,1] - bf <= max(lpls[i,j,2], lpls[j,i,2]) ) { # if unidirectional lpl is larger than bidirectional with a # bayes factor penatly, take the simpler unidirectional model. if (lpls[i,j,2] > lpls[j,i,2]) { am[i,j] = 1; am[j,i] = 0 } else if (lpls[i,j,2] < lpls[j,i,2]) { am[i,j] = 0; am[j,i] = 1 } } } } } thr=list() thr$bi=bi # bidirectional edges
thr$lpls=lpls # lpls thr$am=am # adjacency matrix (thresholded)

return(thr)
}

#' Performance of estimates, such as sensitivity, specificity, and more.
#'
#' @param x estimated binary network matrix.
#' @param xtrue, true binary network matrix.
#'
#' @return perf vector.
#' @export
perf <- function(x, xtrue) {

# in case xtrue continas NA instead of 0
xtrue[is.na(xtrue)]=0

d = dim(x)
Nn=d
if (length(d) == 3) {
N=d
} else if (length(d) == 2) {
x=array(x, dim=c(Nn,Nn,1))
N=1
}

perf=array(NA,dim=c(N,8))

for (i in 1:N) {
# see https://en.wikipedia.org/wiki/Sensitivity_and_specificity
TP = sum(x[,,i] & xtrue)
FP = sum((x[,,i] - xtrue) == 1)
FN = sum((xtrue - x[,,i]) == 1)
TN = sum(!x[,,i] & !xtrue) - ncol(x[,,i])

tpr = TP/(TP+FN) # 1
spc = TN/(TN+FP) # 2
ppv = TP/(TP+FP) # 3
npv = TN/(TN+FN) # 4
fpr = FP/(FP+TN) # 5
fnr = FN/(TP+FN) # 6
fdr = FP/(TP+FP) # 7
acc = (TP+TN)/(TP+FP+FN+TN) # 8

perf[i,]=c(tpr,spc,ppv,npv,fpr,fnr,fdr,acc)
}

return(perf)
}

#' Scaling data. Zero centers and scales the nodes (SD=1).
#'
#' @param X time x node 2D matrix, or 3D with subjects as the 3rd dimension.
#'
#' @return S centered and scaled matrix.
#' @export
scaleTs <- function(X) {
D=dim(X)
if (length(D)==2) {
tmp=array(NA, dim=c(D,D,1))
tmp[,,1]=X
X=tmp
D=dim(X)
}

S=array(NA, dim=c(D,D,D))

# center data
for (s in 1:D) {
colm=colMeans(X[,,s])
S[,,s]=X[,,s]-t(array(rep(colm, D), dim=c(D,D)))
}

# scale data
for (s in 1:D) {
SD=sqrt(mean(apply(S[,,s], 2, var)))
S[,,s]=S[,,s]/SD
}

if (D==1) {
S=S[,,1]
}
return(S)
}

#' Patel.
#'
#' @param X time x node 2D matrix.
#' @param lower percentile cuttoff.
#' @param upper percentile cuttoff for 0-1 scaling.
#' @param bin threshold for conversion to binary values.
#' @param TK significance threshold for connection strength kappa.
#' @param TT significance threshold for direction tau.
#'
#' @return PT list with strengths kappa, direction tau, and net structure.
#' @export
patel <- function(X, lower=0.1, upper=0.9, bin=0.75, TK=0, TT=0) {

nt=nrow(X)
nn=ncol(X)

# scale data into 0,1 interval
X10 = apply(X, 2, quantile, lower) # cutoff 0.1 percentile
X90 = apply(X, 2, quantile, upper) # cutoff 0.9 percentile
a = sweep(sweep(X, 2, X10), 2, X90-X10, FUN="/") # center, scale data
X2 = apply(a, c(1,2), function(v) max(min(v,1),0)) # keep between 0,1

# binarize
X2=(X2>bin)*1 # convert to double
# Joint activation probability of timeseries a, b
# See Table 2, Patel et al. 2006
theta1 = crossprod(X2)/nt        # a=1, b=1 -- a and b active
theta2 = crossprod(X2,1-X2)/nt   # a=1, b=0 -- a active, b not
theta3 = crossprod(1-X2,X2)/nt   # a=0, b=1
theta4 = crossprod(1-X2,1-X2)/nt # a=0, b=0

# directionality tau [-1, 1]
tau = matrix(0, ncol(X2), ncol(X2))
inds = theta2 >= theta3
tau[inds] = 1 - (theta1[inds] + theta3[inds])/(theta1[inds] + theta2[inds])
tau[!inds] = (theta1[!inds] + theta2[!inds])/(theta1[!inds] + theta3[!inds]) - 1
tau=-tau
tau[as.logical(diag(nn))]=NA
# tau(a,b) positive, a is ascendant to b (a is parent)

# functional connectivity kappa [-1, 1]
E=(theta1+theta2)*(theta1+theta3)
max_theta1=min(theta1+theta2,theta1+theta3)
min_theta1=max(0,2*theta1+theta2+theta3-1)
inds = theta1>=E
D = matrix(0, ncol(X2), ncol(X2))
D[inds]=0.5+(theta1[inds]-E[inds])/(2*(max_theta1-E[inds]))
D[!inds]=0.5-(theta1[!inds]-E[!inds])/(2*(E[!inds]-min_theta1))

kappa=(theta1-E)/(D*(max_theta1-E) + (1-D)*(E-min_theta1))
kappa[as.logical(diag(nn))]=NA

# theresholding
tkappa = kappa
tkappa[kappa >= TK & kappa <= TK] = 0
ttau = tau
ttau[tau >= TT & tau <= TT] = 0

# binary nets: we focus on positive associations
net = (tkappa > 0)*(tau > 0)  # only kappa thresholded
net[diag(nn)==1]=0 # remove NA
tnet = (tkappa > 0)*(ttau > 0) # both tau and kappa thresholded
tnet[diag(nn)==1]=0 # remove NA

PT=list(kappa=kappa, tau=tau, tkappa=tkappa, ttau=ttau, net=net, tnet=tnet)
return(PT)
}

#' Permutation test for Patel's kappa. Creates a distribution of values
#' kappa under the null hypothesis.
#'
#' @param X time x node x subjects 3D matrix.
#' @param alpha sign. level
#'
#' @return stat lower and upper significance thresholds.
#' @export
perm.test <- function(X, alpha=0.05) {

low = alpha/2      # two sided test
up  = 1 - alpha/2

N  = dim(X) # Nr. of subjects
Nn = dim(X) # Nr. of nodes

# shuffle across subjects with fixed nodes
ka = array(NA,dim=c(Nn,Nn,N)) # kappa null distribution
ta = array(NA,dim=c(Nn,Nn,N)) # kappa null distribution
X_= array(NA, dim=dim(X))
for (s in 1:N) {
for (n in 1:Nn) {
X_[,n,s]=X[,n,sample(N,1)] # draw a random subject (with repetition)
}
p=patel(X_[,,s])
ka[,,s]=p$kappa ta[,,s]=p$tau
}

# two sided sign. test
stat = list()
stat$kappa = quantile(ka[!is.na(ka)], probs=c(low, up)) # two sided stat$tau   = quantile(ta[!is.na(ta)], probs=c(low, up)) # two sided

return(stat)
}

#' Removes NAs from matrix.
#'
#' @param M Matrix
#'
#' @return matrix with NAs removed.
#' @export
rmna <- function(M) {

M[is.na(M)] = 0
return(M)

}

#' Removes diagnoal from matrix with NAs.
#'
#' @param M Matrix
#'
#' @return matrix with diagnoal of NAs.
#' @export
rmdiag <- function(M) {

M[as.logical(diag(nrow(M)))]=0
return(M)
}

#' Stepise forward non-exhaustive greedy search, calculates the optimum value of the discount factor.
#'
#' @param Data  Dataset with dimension number of time points \code{T} x number of nodes \code{Nn}.
#' @param node  The node to find parents for.
#' @param nbf   The Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta A vector of values for the discount factor.
#' @param max.break If \code{TRUE}, the code will break if adding / removing parents does not
#' improve the LPL. If \code{FALSE}, the code will continue to the zero parent / all parent model.
#' Default is \code{TRUE}.
#' @param priors List with prior hyperparameters.
#'
#' @return
#' model.store The parents, LPL and chosen discount factor for the subset of models scored using this method.
#' @export
stepwise.forward <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01),
max.break=TRUE, priors=priors.spec()){

# Define the prior hyperparameters
m0 = priors$m0 CS0 = priors$CS0
n0 = priors$n0 d0 = priors$d0

Nn = ncol(Data) # the number of nodes
Nm = 2^(Nn-1)   # the number of models (per node)

# Begin at model 1, the no parent (intercept only) model
model.store = array(0,dim=c((Nn+2),1))

# Find the Log Predicitive Likelihood and discount factor for the zero parent model
Yt = Data[,node]    # the time series of the node we wish to find parents for
Nt = length(Yt)     # the number of time points
nd = length(delta)  # the number of deltas

# Create Ft. For the zero parent model, this is simply a column of ones, representing an intercept.
Ft=rep(1,Nt)
lpl.delta=rep(NA,nd)

for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[j]=sum(lpl[nbf:Nt])}

lpl.max = max(lpl.delta,na.rm=TRUE)
w = which(lpl.delta==lpl.max)
DF.hat = delta[w]

# Store the model number, model, LPL and discount factor
model.store[(Nn+1),] = lpl.max
model.store[(Nn+2),] = DF.hat

# The parents in the model
pars = numeric(0)

for (N in 1:(Nn-1)){

# Find all the models with the correct length and containing the previously selected parents

ms.new = array(0,dim=c((Nn+2),(Nn-N)))
if (length(pars>0)){ms.new[2:(length(pars)+1),] = pars}

# Find the LPL and discount factor of this subset of models models
nms=ncol(ms.new) # How many models are being considered?

# Create empty arrays for the LPL scores and the deltas
lpl.delta=array(NA,c(nms,length(delta)))
lpl.max=rep(NA,nms)
DF.hat=rep(NA,nms)

# Now create Ft.
for (i in 1:nms){
pars = ms.new[(2:Nn),i]
pars = pars[pars!=0]
Ft=array(1,dim=c(Nt,length(pars)+1))
if (ncol(Ft)>1){Ft[,2:ncol(Ft)] = Data[,pars]}

# Calculate the Log Predictive Likelihood for each value of delta, for the specified models
for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[i,j]=sum(lpl[nbf:Nt])}

lpl.max[i] = max(lpl.delta[i,],na.rm=TRUE)
w = which(lpl.delta[i,]==lpl.max[i])
DF.hat[i] = delta[w]}

ms.new[(Nn+1),] = lpl.max
ms.new[(Nn+2),] = DF.hat

# Find the highest LPL so far
max.score = max(model.store[(Nn+1),])
logBF = lpl.max - max.score
W = which(logBF > 0)

# Update model.store
model.store = cbind(model.store,ms.new)

if (length(W)==0 & max.break==TRUE){break}
else{W.max = which.max(logBF)

# Update the model parents
pars = ms.new[(2:Nn),W.max]
pars = pars[pars!=0]}}

model.store[1,] = c(1:ncol(model.store)) # attach a model number

return(model.store)}

#' Stepise backward non-exhaustive greedy search, calculates the optimum value of the discount factor.
#'
#' @param Data  Dataset with dimension number of time points \code{T} x number of nodes \code{Nn}.
#' @param node  The node to find parents for.
#' @param nbf   The Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta A vector of values for the discount factor.
#' @param max.break If \code{TRUE}, the code will break if adding / removing parents does not
#' improve the LPL. If \code{FALSE}, the code will continue to the zero parent / all parent model.
#' Default is \code{TRUE}.
#' @param priors List with prior hyperparameters.
#'
#' @return
#' model.store The parents, LPL and chosen discount factor for the subset of models scored using this method.
#' @export
stepwise.backward <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01),
max.break=TRUE, priors=priors.spec()){

# Define the prior hyperparameters
m0 = priors$m0 CS0 = priors$CS0
n0 = priors$n0 d0 = priors$d0

Nn = ncol(Data) # the number of nodes
Nm = 2^(Nn-1)   # the number of models (per node)

# Begin at the all parent model
model.store = array(0,dim=c((Nn+2),1))
model.store[(2:Nn),1] = c(1:Nn)[-node]

# Find the Log Predicitive Likelihood and discount factor for the all parent model
Yt = Data[,node]    # the time series of the node we wish to find parents for
Nt = length(Yt)     # the number of time points
nd = length(delta)  # the number of deltas

# The parents in the model
pars = model.store[2:Nn,1]
pars = pars[pars!=0]

Ft = array(1,dim=c(Nt,Nn))
Ft[,2:Nn] = Data[,pars]

lpl.delta=rep(NA,nd)

for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[j]=sum(lpl[nbf:Nt])}

lpl.max = max(lpl.delta,na.rm=TRUE)
w = which(lpl.delta==lpl.max)
DF.hat = delta[w]

model.store[(Nn+1),] = lpl.max
model.store[(Nn+2),] = DF.hat

for (N in 1:(Nn-1)){

# Find all the models with the correct length and missing the previously removed parents

ms.new = array(0,dim=c((Nn+2),(Nn-N)))

# Find the LPL and discount factor of these models
nms=ncol(ms.new) # How many models are being considered?

# Create empty arrays for the lpl scores and the deltas
lpl.delta=array(NA,c(nms,length(delta)))
lpl.max=rep(NA,nms)
DF.hat=rep(NA,nms)

# Now create Ft.
for (i in 1:nms){
pars=ms.new[(2:Nn),i]
pars=pars[pars!=0]

Ft=array(1,dim=c(Nt,length(pars)+1))
if (ncol(Ft)>1){Ft[,2:ncol(Ft)]=Data[,pars]}

# Calculate the log predictive likelihood for each value of delta, for the specified models
for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[i,j]=sum(lpl[nbf:Nt])}

lpl.max[i] = max(lpl.delta[i,],na.rm=TRUE)
w = which(lpl.delta[i,]==lpl.max[i])
DF.hat[i] = delta[w]}

ms.new[(Nn+1),] = lpl.max
ms.new[(Nn+2),] = DF.hat

max.score = max(model.store[(Nn+1),]) # Find the highest LPL calculated so far
logBF = lpl.max - max.score
W = which(logBF > 0)

# Update model.store
model.store = cbind(model.store,ms.new)

if (length(W)==0 & max.break==TRUE){break}
else{W.max = which.max(logBF)

# Update the model parents
pars = ms.new[(2:Nn),W.max]
pars = pars[pars!=0]}}

model.store[1,] = c(1:ncol(model.store)) # attach a model number

return(model.store)}

#' Stepise combine: combines the stepwise forward and the stepwise backward model.
#'
#' @param forward_matrix The winning sets of parents using a Forward Selection model search. A
#' matrix with dimension \code{Nn+2} x \code{Nn}, rows \code{1:Nn} are the parents (ones and zeros),
#' rows \code{(Nn+1):(Nn+2)} are the LPL and discount factor. forward matrix.
#' @param backward_matrix backward_matrix}{The winning sets of parents using a Backward Elimination
#' model search. A matrix with dimension \code{Nn+2} x \code{Nn}, rows \code{1:Nn} are the parents
#' (ones and zeros), rows \code{(Nn+1):(Nn+2)} are the LPL and discount factor.
#'
#' @return
#' stepwise_combine_matrix The adjacency network, LPLs and discount factors when the Forward
#' Selection and Backward Elimination model searches are combined.
#' @export
stepwise.combine <- function(forward_matrix,backward_matrix){

Nn = ncol(forward_matrix)
step_combine_matrix = array(NA,dim=c((Nn+2),Nn))

for (k in 1:Nn){

if (forward_matrix[(Nn+1),k] >= backward_matrix[(Nn+1),k]){step_combine_matrix[,k] = forward_matrix[,k]}
if (forward_matrix[(Nn+1),k] < backward_matrix[(Nn+1),k]){step_combine_matrix[,k] = backward_matrix[,k]}

}

return(step_combine_matrix)}


## Try the multdyn package in your browser

Any scripts or data that you put into this service are public.

multdyn documentation built on May 2, 2019, 3:43 p.m.