#' Specify the priors. Without inputs, defaults will be used.
#'
#' @param m0 the value of the prior mean at time \code{t=0}, scalar (assumed to be the same
#' for all nodes). The default is zero.
#' @param CS0 controls the scaling of the prior variance matrix \code{C*_{0}} at time
#' \code{t=0}. The default is 3, giving a non-informative prior for \code{C*_{0}, 3 x (p x p)}
#' identity matrix. \code{p} is the number of thetas.
#' @param n0 prior hyperparameter of precision \code{phi ~ G(n_{0}/2; d_{0}/2)}. The default
#' is a non-informative prior, with \code{n0 = d0 = 0.001}. \code{n0} has to be higher than 0.
#' @param d0 prior hyperparameter of precision \code{phi ~ G(n_{0}/2; d_{0}/2)}. The default
#' is a non-informative prior, with \code{n0 = d0 = 0.001}.
#'
#' @details At time \code{t=0}, \code{(theta_{0} | D_{0}, phi) ~ N(m_{0},C*_{0} x phi^{-1})},
#' where \code{D_{0}} denotes the set of initial information.
#'
#' @return \code{priors} a list with the prior hyperparameters. Relevant to \code{\link{dlm.lpl},
#' \link{exhaustive.search}, \link{node}, \link{subject}}.
#'
#' @references West, M. & Harrison, J., 1997. Bayesian Forecasting and Dynamic Models. Springer New York.
#'
#' @examples
#' pr=priors.spec()
#' pr=priors.spec(n0=0.002)
priors.spec <- function(m0 = 0, CS0 = 3, n0 = 0.001, d0 = 0.001) {
priors = list(m0 = m0, CS0 = CS0, n0 = n0, d0 = d0)
return(priors)
}
#' Calculate the log predictive likelihood for a specified set of parents and a fixed delta.
#'
#' @param Yt the vector of observed time series, length \code{T}.
#' @param Ft the matrix of covariates, dim = number of thetas (\code{p}) x number of time
#' points (\code{T}), usually a row of 1s to represent an intercept and the time series of
#' the parent nodes.
#' @param delta discount factor (scalar).
#' @param priors list with prior hyperparameters.
#'
#' @return
#' \item{mt}{the vector or matrix of the posterior mean (location parameter), dim = \code{p x T}.}
#' \item{Ct}{and \code{CSt} the posterior scale matrix \code{C_{t}} is \code{C_{t} = C*_{t} x S_{t}},
#' with dim = \code{p x p x T}, where \code{S_{t}} is a point estimate for the observation variance
#' \code{phi^{-1}}}
#' \item{Rt}{and \code{RSt} the prior scale matrix \code{R_{t}} is \code{R_{t} = R*_{t} x S_{t-1}},
#' with dim = \code{p x p x T}, where \code{S_{t-1}} is a point estimate for the observation
#' variance \code{phi^{-1}} at the previous time point.}
#' \item{nt}{and \code{dt} the vectors of the updated hyperparameters for the precision \code{phi}
#' with length \code{T}.}
#' \item{S}{the vector of the point estimate for the observation variance \code{phi^{-1}} with
#' length \code{T}.}
#' \item{ft}{the vector of the one-step forecast location parameter with length \code{T}.}
#' \item{Qt}{the vector of the one-step forecast scale parameter with length \code{T}.}
#' \item{ets}{the vector of the standardised forecast residuals with length \code{T},
#' \eqn{\newline} defined as \code{(Y_{t} - f_{t}) / sqrt (Q_{t})}.}
#' \item{lpl}{the vector of the Log Predictive Likelihood with length \code{T}.}
#'
#' @references West, M. & Harrison, J., 1997. Bayesian Forecasting and Dynamic Models. Springer New York.
#'
#' @examples
#' data("utestdata")
#' Yt = myts[,1]
#' Ft = t(cbind(1,myts[,2:5]))
#' m = dlm.lpl(Yt, Ft, 0.7)
#'
#'
dlm.lpl <- function(Yt, Ft, delta, priors = priors.spec() ) {
m0 = priors$m0
CS0 = priors$CS0
n0 = priors$n0
d0 = priors$d0
CS0 = CS0*diag(nrow(Ft))
m0 = rep(m0,nrow(Ft))
Nt = length(Yt)+1 # the length of the time series + t0
p = nrow(Ft) # the number of parents and one for an intercept (i.e. the number of thetas)
Y = numeric(Nt)
Y[2:Nt] = Yt
F1 = array(0, dim=c(p,Nt))
F1[,2:Nt] = Ft
# Set up allocation matrices, including the priors
mt = array(0,dim=c(p,Nt))
mt[,1]=m0
Ct = array(0,dim=c(p,p,Nt))
CSt = array(0,dim=c(p,p,Nt))
CSt[,,1] = CS0
Rt = array(0,dim=c(p,p,Nt))
RSt = array(0,dim=c(p,p,Nt))
nt = numeric(Nt)
nt[1]=n0
dt = numeric(Nt)
dt[1]=d0
S = numeric(Nt)
S[1]=dt[1]/nt[1]
ft = numeric(Nt)
Qt = numeric(Nt)
ets = numeric(Nt)
lpl = numeric(Nt)
# Updating
for (i in 2:Nt){
# Posterior at {t-1}: (theta_{t-1}|D_{t-1}) ~ T_{n_{t-1}}[m_{t-1}, C_{t-1} = C*_{t-1} x d_{t-1}/n_{t-1}]
# Prior at {t}: (theta_{t}|D_{t-1}) ~ T_{n_{t-1}}[m_{t-1}, R_{t}]
# D_{t-1} = D_{0},Y_{1},...,Y_{t-1} D_{0} is the initial information set
# R*_{t} = C*_{t-1}/delta
RSt[,,i] = CSt[,,(i-1)] / delta
Rt[,,i] = RSt[,,i] * S[(i-1)]
# One-step forecast: (Y_{t}|D_{t-1}) ~ T_{n_{t-1}}[f_{t}, Q_{t}]
ft[i] = t(F1[,i]) %*% mt[,(i-1)]
QSt = as.vector(1 + t(F1[,i]) %*% RSt[,,i] %*% F1[,i])
Qt[i] = QSt * S[(i-1)]
et = Y[i] - ft[i]
ets[i] = et / sqrt(Qt[i])
# Posterior at t: (theta_{t}|D_{t}) ~ T_{n_{t}}[m_{t}, C_{t}]
# D_{t} = D_{0},Y_{1},...,Y_{t}
At = (RSt[,,i] %*% F1[,i])/QSt
mt[,i] = mt[,(i-1)] + (At*et)
nt[i] = nt[(i-1)] + 1
dt[i] = dt[(i-1)] + (et^2)/QSt
S[i]=dt[i]/nt[i]
CSt[,,i] = RSt[,,i] - (At %*% t(At))*QSt
Ct[,,i] = S[i]*CSt[,,i]
# Log Predictive Likelihood
lpl[i] = lgamma((nt[(i-1)]+1)/2)-lgamma(nt[(i-1)]/2)-0.5*log(pi*nt[(i-1)]*Qt[i])-((nt[(i-1)]+1)/2)*log(1+(1/nt[(i-1)])*et^2/Qt[i])
}
mt = mt[,2:Nt]; Ct = Ct[,,2:Nt]; CSt = CSt[,,2:Nt]; Rt = Rt[,,2:Nt]; RSt = RSt[,,2:Nt]
nt = nt[2:Nt]; dt = dt[2:Nt]; S = S[2:Nt]; ft = ft[2:Nt]; Qt = Qt[2:Nt]; ets = ets[2:Nt]; lpl = lpl[2:Nt]
output <- list(mt=mt,Ct=Ct,CSt=CSt,Rt=Rt,RSt=RSt,nt=nt,dt=dt,S=S,ft=ft,Qt=Qt,ets=ets,lpl=lpl)
return(output)
}
#' A function to generate all the possible models.
#'
#' @param Nn number of nodes; the number of columns of the dataset can be used.
#' @param node The node to find parents for.
#'
#' @return
#' output.model = a matrix with dimensions (Nn-1) x number of models, where number of models = 2^(Nn-1).
#'
#' @examples
#' m=model.generator(5,1)
#'
model.generator<-function(Nn,node){
# Create the model 'no parents' (the first column of the matrix is all zeros)
empt=rep(0,(Nn-1))
for (k in 1:(Nn-1)) {
# Calculate all combinations when number of parents = k
#m=combn(c(1:Nn)[-node],k)
if (Nn==2 & node==1) {
model = matrix(c(0,2),1,2)
} else {
m=combn(c(1:Nn)[-node],k)
# Expand the array so that unconnected edges are represented by zeros
empt.new=array(0,dim=c((Nn-1),ncol(m)))
empt.new[1:k,]=m
# Bind the matrices together; the next set of models are added to this matrix
model=cbind(empt,empt.new)
empt=model
}
}
colnames(model)=NULL
output.model<-model
return(output.model)
}
#' A function for an exhaustive search, calculates the optimum value of the discount factor.
#'
#' @param Data Dataset with dimension number of time points T x Number of nodes Nn.
#' @param node The node to find parents for.
#' @param nbf Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta a vector of potential values for the discount factor.
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param priors list with prior hyperparameters.
#'
#' @return
#' model.store a matrix with the model, LPL and chosen discount factor for all possible models.
#' runtime an estimate of the run time of the function, using proc.time().
#'
#' @examples
#' data("utestdata")
#' result=exhaustive.search(myts,3)
#'
exhaustive.search <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, priors=priors.spec() ) {
ptm=proc.time()
m0 = priors$m0
CS0 = priors$CS0
n0 = priors$n0
d0 = priors$d0
Nn=ncol(Data) # the number of nodes
Nm=2^(Nn-1) # the number of models per node
M=model.generator(Nn,node) # Generate all the possible models
models=rbind(1:Nm,M) # Label each model with a 'model number'
Yt=Data[,node] # the time series of the node we wish to find parents for
Nt=length(Yt) # the number of time points
nd=length(delta) # the number of deltas
# Create empty arrays for the lpl scores and the optimum deltas
lpldet=array(NA,c(Nm,length(delta)))
lplmax=rep(NA,Nm)
DF.hat=rep(NA,Nm)
# Now create Ft.
for (z in 1:Nm) {
pars=models[(2:Nn),z]
pars=pars[pars!=0]
Ft=array(1,dim=c(Nt,length(pars)+1))
if (ncol(Ft)>1) {
Ft[,2:ncol(Ft)]=Data[,pars] # selects parents
}
# Calculate the log predictive likelihood, for each value of delta
for (j in 1:nd) {
if (cpp) {
# new C++ implementation
lpl=c(dlmLplCpp(Yt, t(Ft), delta[j], m0, CS0, n0, d0))
lpldet[z,j]=sum(lpl[nbf:Nt])
} else {
# original native R
a=dlm.lpl(Yt, t(Ft), delta=delta[j], priors=priors)
lpldet[z,j]=sum(a$lpl[nbf:Nt])
}
}
if (sum(is.na(lpldet[z,])) == length(lpldet[z,])) {
lplmax[z] = -.Machine$double.xmax
} else {
lplmax[z]=max(lpldet[z,],na.rm=TRUE)
DF.hat[z] = delta[which.max(lpldet[z,])]
}
}
# Output model.store
model.store=rbind(models,lplmax,DF.hat)
rownames(model.store)=NULL
runtime=(proc.time()-ptm)
return(list(model.store=model.store,runtime=runtime))
}
#' Mean centers timeseries in a 2D array timeseries x nodes,
#' i.e. each timeseries of each node has mean of zero.
#'
#' @param X 2D array with dimensions timeseries x nodes.
#'
#' @return M 2D array.
#'
#' @examples
#' data("utestdata")
#' myts=center(myts)
#'
#'
center <- function(X) {
d = dim(X)
M = matrix(NA, d[1], d[2])
for (i in 1:d[2]) {
M[,i]=scale(X[,i], center = T, scale = F)
}
return(M)
}
#' Estimate subject's full network: runs exhaustive search on very node.
#'
#' @param X array with dimensions timeseries x nodes.
#' @param id subject ID. If set, results are saved to a txt file.
#' @param nbf Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta a vector of potential values for the discount factor.
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param priors list with prior hyperparameters.
#' @param path a path where results are written.
#' @param method ether exhaustive, foward, backward, or both.
#'
#' @return store list with results.
#'
#' @examples
#' data("utestdata")
#' # select only 3-nodes to speed-up this example
#' sub=subject(myts[,1:3])
#' sub=subject(myts[,1:3], method="both")
#'
subject <- function(X, id=NULL, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE,
priors = priors.spec(), path = getwd(), method = "exhaustive") {
if (!is.element(method, c("both", "exhaustive", "forward", "backward"))) {
stop("Method must be either exhaustive, forward, backward or both")
}
Nn=ncol(X) # nodes
models=list()
for (n in 1:Nn) {
if (method == "both") {
tmp=stepwise.combine(X, n, nbf=nbf, delta=delta, priors=priors)
models[[n]] = tmp$model.store
} else if (method == "forward") {
tmp=stepwise.forward(X, n, nbf=nbf, delta=delta, priors=priors)
models[[n]] = tmp$model.store
} else if (method == "backward") {
tmp=stepwise.backward(X, n, nbf=nbf, delta=delta, priors=priors)
models[[n]] = tmp$model.store
} else if (method == "exhaustive") {
tmp=exhaustive.search(X, n, nbf=nbf, delta=delta, cpp=cpp, priors=priors)
models[[n]] = tmp$model.store
}
if (!is.null(id)) {
write(t(models[[n]]), file=file.path(path, sprintf("%s_node_%03d.txt", id, n)),
ncolumns = ncol(tmp$model.store))
}
}
store=list()
store$models=models
store$winner=getWinner(models,Nn)
store$adj=getAdjacency(store$winner,Nn)
return(store)
}
#' Runs exhaustive search on a single node and saves results in txt file.
#'
#' @param X array with dimensions timeseries x nodes.
#' @param n node number.
#' @param id subject ID. If set, results are saved to a txt file.
#' @param nbf Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta a vector of potential values for the discount factor.#'
#' @param cpp boolean true (default): fast C++ implementation, false: native R code.
#' @param priors list with prior hyperparameters.
#' @param path a path where results are written.
#' @param method can be exhaustive (default), forward, backward, or both.
#'
#' @return store list with results.
#'
node <- function(X, n, id=NULL, nbf=15, delta=seq(0.5,1,0.01), cpp=TRUE, priors=priors.spec(),
path=getwd(), method = "exhaustive") {
if (!is.element(method, c("both", "exhaustive", "forward", "backward"))) {
stop("Method must be either exhaustive, forward, backward or both")
}
if (method == "both") {
store=stepwise.combine(X, n, nbf=nbf, delta=delta, priors=priors)
} else if (method == "forward") {
store=stepwise.forward(X, n, nbf=nbf, delta=delta, priors=priors)
} else if (method == "backward") {
store=stepwise.backward(X, n, nbf=nbf, delta=delta, priors=priors)
} else if (method == "exhaustive") {
store=exhaustive.search(X, n, nbf=nbf, delta=delta, cpp=cpp, priors=priors)
}
if (!is.null(id)) {
write(t(store$model.store), file=file.path(path, sprintf("%s_node_%03d.txt", id, n)),
ncolumns = ncol(store$model.store))
}
return(store)
}
#' Reads single subject's network from txt files.
#' @param path path.
#' @param id identifier to select all subjects' nodes, e.g. pattern containing subject ID and session number.
#' @param nodes number of nodes.
#' @param modelStore can be set to false to save memory.
#'
#' @return store list with results.
#'
#'
read.subject <- function(path, id, nodes, modelStore=TRUE) {
models = list()
for (n in 1:nodes) {
#file=sprintf("%s_node_%03d.txt", id, n)
#models[,,n] = as.matrix(read.table(file)) # quite slow
file=list.files(path, pattern=glob2rx(sprintf("%s*_node_%03d.txt", id, n)))
# we have to use a list as model dimension is variable in stepwise.
models[[n]] = as.matrix(fread(file.path(path,file))) # faster, from package "data.table"
}
store=list()
if (modelStore) {
store$models=models
}
store$winner=getWinner(models,nodes)
store$adj=getAdjacency(store$winner,nodes)
return(store)
}
#' Get winner network by maximazing log predictive likelihood (LPL)
#' from a set of models.
#'
#' @param models 2D matrix, or 3D models x node.
#' @param nodes number of nodes.
#'
#' @return winner array with highest scored model(s).
#'
getWinner <- function(models, nodes) {
if (is.matrix(models)) {
winner = models[,which.max(models[nodes+1,])]
} else if (is.list(models)) {
winner = array(0, dim=c(nodes+2,nodes))
for (n in 1:nodes) {
winner[,n]=models[[n]][,which.max(models[[n]][nodes+1,])]
}
}
return(winner)
}
#' Get adjacency and associated likelihoods (LPL) and disount factros (df) of winning models.
#'
#' @param winner, 2D matrix.
#' @param nodes number of nodes.
#'
#' @return adj, 2D adjacency matrix.
#'
getAdjacency <- function(winner, nodes) {
am = array(rep(0,nodes*nodes),dim=c(nodes,nodes))
lpl = df = array(rep(NA,nodes*nodes),dim=c(nodes,nodes))
for (n in 1:nodes) {
p = winner[2:nodes,n] # parents
p = p[p>0]
am[p,n] = 1
lpl[p,n] = winner[nodes+1,n]
df[p,n] = winner[nodes+2,n]
}
return(list(am=am, lpl=lpl, df=df))
}
#' Plots network as adjacency matrix.
#'
#' @param adj 2D adjacency matrix.
#' @param title title.
#' @param colMapLabel label for colormap.
#' @param hasColMap FALSE turns off color map, default is NULL (on).
#' @param lim vector with min and max value, data outside this range will be removed.
#' @param gradient gradient colors.
#' @param nodeLabels node labels.
#' @param axisTextSize text size of the y and x tick labels.
#' @param xAngle orientation of the x tick labels.
#' @param titleTextSize text size of the title.
#' @param barWidth width of the colorbar.
#' @param textSize width of the colorbar.
#'
#' @examples
#' # Generate some sample binary 5-node network structures for N=20, then compute
#' # proportion at each edge
#' N=20
#' x = array(rbinom(n=5*5*N, size=1, prob=0.30), dim=c(5,5,N))
#' A = apply(x, c(1,2), mean)
#' \donttest{
#' gplotMat(A, title = "network", colMapLabel = '%', barWidth = 0.3)
#' }
#'
gplotMat <- function(adj, title=NULL, colMapLabel=NULL, hasColMap=NULL, lim=c(0, 1),
gradient=c("white", "orange", "red"), nodeLabels=waiver(), axisTextSize=12,
xAngle=0, titleTextSize=12, barWidth = 1, textSize=12) {
colnames(adj)=NULL
rownames(adj)=NULL
x = melt(adj)
names(x)[1] = "Parent"
names(x)[2] = "Child"
# handle scales in case custom labeling is set
if (is.list(nodeLabels)) {
x_scale = scale_x_continuous()
y_scale = scale_y_reverse()
} else {
x_scale = scale_x_continuous(breaks = 1:ncol(adj), labels = nodeLabels)
y_scale = scale_y_reverse(breaks = 1:ncol(adj), labels = nodeLabels)
}
if (is.null(hasColMap)) {
myguides = guides(fill=guide_colorbar(barwidth = barWidth))
} else {
myguides = guides(fill=hasColMap)
}
ggplot(x, aes_string(x = "Child", y = "Parent", fill = "value")) +
geom_tile(color = "gray60") +
scale_fill_gradient2(
na.value = "transparent",
low = gradient[1],
mid = gradient[2],
high = gradient[3],
midpoint = sum(lim)/2,
limit = lim,
space = "Lab",
name = colMapLabel) +
theme(#axis.ticks.x = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
text = element_text(size=textSize),
plot.title = element_text(size=titleTextSize),
axis.text.x = element_text(size=axisTextSize,angle=xAngle),
axis.text.y = element_text(size=axisTextSize),
panel.background = element_blank()
#panel.grid.major = element_line(colour="black", size = (1.5)),
#panel.grid.minor = element_line(size = (0.2), colour="grey")
) +
x_scale + y_scale + ggtitle(title) + myguides
}
#' Performes a binomial test with FDR correction for network edge occurrence.
#'
#' @param adj adjacency matrix, nodes x nodes x subj, or nodes x nodes x runs x subj.
#' @param alter type of binomial test, "two.sided" (default), "less", or "greater"
#' @param fdr false discovery rate (FDR) control, default is 0.05.
#'
#' @return store list with results.
#'
#' @examples
#' # Generate some sample binary 5-node network structures for N=20, then perform
#' # significance testing.
#' N=20
#' x = rmdiag(array(rbinom(n=5*5*N, size=1, prob=0.10), dim=c(5,5,N)))
#' x[1,2,2:N]=1; x[2,3,seq(1,N,2)]=1 # add some consitent edges
#' A = apply(x, c(1,2), mean)
#' l = binom.nettest(x)
#'
binom.nettest <- function(adj, alter="two.sided", fdr=0.05) {
mydim=dim(adj)
M = sum(adj) # total edges over all N subjects, all R(R-1) edges
if (length(mydim) == 3) { # without runs
N=mydim[3] # No. of subjects
N_Comp=mydim[1]
adj_ = apply(adj, c(1,2), sum)
} else if (length(mydim) == 4) { # with runs
N=mydim[4] # No. of subjects
N_runs=mydim[3]
N_Comp=mydim[1]
N=N*N_runs # adjust N by for no. of runs
adj_ = apply(adj, c(1,2), sum) # sum acrosss subjects and runs
}
# binom test for every edge occurance
p0 = M/N/N_Comp/(N_Comp-1) # H0 edge probability
p = array(NA, dim=c(N_Comp,N_Comp))
for (i in 1:N_Comp) {
for (j in 1:N_Comp) {
if (i !=j) {
tmp=binom.test(adj_[i,j],N,p=p0, alternative=alter)
p[i,j]=tmp$p.value
}
}
}
# FDR
p_fdr=matrix(p.adjust(p, method = "fdr"),N_Comp,N_Comp)
adj_fdr=adj_
adj_fdr[p_fdr>=fdr]=NA
store=list()
store$p0=p0
store$p=p
store$p_fdr=p_fdr
store$adj=adj_/N
store$adj_fdr=adj_fdr/N # significant proportions
return(store)
}
#' Reshapes a 2D concatenated time series into 3D according to no. of subjects and volumes.
#'
#' @param ts a 2D time series volumes x nodes.
#' @param N No. of subjects.
#' @param V No. of volumes.
#'
#' @return M 3D matrix, time series x nodes x subjects.
#'
#' @examples
#' # Let's say subjects are concatenated in a 2D matrix
#' # (samples x nodes), with each having 200 samples.
#' # generate some sample data
#' N=20
#' Nn=5
#' x = array(rnorm(200*N*Nn), dim=c(200*N,Nn))
#' ts = reshapeTs(x,N,200)
#'
reshapeTs <- function(ts, N, V) {
NC = ncol(ts)
M = array(NA, dim=c(V,NC,N))
for (i in 1:N) {
idx = ((i-1)*V+1):(V*i)
M[,,i] = ts[idx,]
}
return(M)
}
#' Mean correlation of time series across subjects.
#'
#' @param ts a 3D time series time series x nodes x subjects.
#'
#' @return M correlation matrix.
#'
#' @examples
#' # create some sample data with 200 samples,
#' # 5 nodes, and 2 subjects
#' ts = array(rnorm(200*5*2), dim=c(200,5,2))
#' M = corTs(ts)
#'
corTs <- function(ts) {
d=dim(ts)
N=d[3] # No. subjects
N_nodes=d[2]
R=array(NA, dim=c(N_nodes,N_nodes,N))
for (s in 1:N) {
R[,,s]=cor(ts[,,s])
}
M = apply(R, c(1,2), mean)
return(M)
}
#' Extract specific parent model with assocated df and ME from complete model space.
#'
#' @param models a 2D model matrix.
#' @param parents a vector with parent nodes.
#'
#' @return mod specific parent model.
#'
#' @examples
#' data("utestdata")
#' r=exhaustive.search(myts,3)
#' # get model with parents 1, 2, and 4.
#' m=getModel(r$model.store,c(1,2,4))
#'
getModel <- function(models, parents) {
Nn = nrow(models) - 2 # No. of nodes
Nm = ncol(models) # No. of models
parents = c(parents, rep(0, (Nn - 1) - length(parents))) # add fill zeros
for (m in 1:Nm) {
if (sum(models[2:Nn,m] == parents) == Nn - 1) {
mod = models[,m]
}
}
return(mod)
}
#' Get model number from a set of parents.
#'
#' @param models a 2D model matrix.
#' @param parents a vector with parent nodes.
#'
#' @return nr model number.
getModelNr <- function(models, parents) {
Nn = nrow(models) + 1 # No. of nodes
Nm = ncol(models) # No. of models
parents = c(parents, rep(0, Nn-length(parents)-1)) # add fill zeros
for (m in 1:Nm) {
if (all(models[,m] == parents)) {
nr = m
break;
}
}
return(nr)
}
#' A group is a list containing restructured data from subejcts for easier group analysis.
#'
#' @param subj a list of subjects.
#'
#' @return group a list.
#'
#' @examples
#' # create some sample data with 200 samples,
#' # 3 nodes, and 2 subjects
#' ts = array(rnorm(200*3*2), dim=c(200,3,2))
#' mysubs=list()
#' mysubs[[1]]=subject(ts[,,1])
#' mysubs[[2]]=subject(ts[,,2])
#' g=dgm.group(mysubs)
#'
dgm.group <- function(subj) {
Nn=ncol(subj[[1]]$adj$am)
N=length(subj)
am = lpl = df = tam = tbi = array(NA, dim=c(Nn,Nn,N))
df_ = array(NA, dim=c(N,Nn))
tlpls = array(NA, dim=c(Nn,Nn,2,N))
winner = array(NA, dim=c(Nn+2,Nn,N))
for (s in 1:N) {
am[,,s] = subj[[s]]$adj$am
lpl[,,s] = subj[[s]]$adj$lpl
df[,,s] = subj[[s]]$adj$df
df_[s,] = subj[[s]]$winner[nrow(subj[[s]]$winner),]
winner[,,s] = subj[[s]]$winner
# pruning
if (!is.null(subj[[s]]$thr)) {
tam[,,s] = subj[[s]]$thr$am
tbi[,,s] = subj[[s]]$thr$bi
tlpls[,,,s]= subj[[s]]$thr$lpls
}
}
group=list(am=am,lpl=lpl,df=df,tam=tam,tbi=tbi,tlpls=tlpls,
df_=df_,winner=winner)
return(group)
}
#' A group is a list containing restructured data from subejcts for easier group analysis.
#'
#' @param subj a list of subjects.
#'
#' @return group a list.
#'
#' @examples
#' # create some sample data with 200 samples,
#' # 3 nodes, and 2 subjects
#' ts = array(rnorm(200*3*2), dim=c(200,3,2))
#' mysubs=list()
#' mysubs[[1]]=patel(ts[,,1])
#' mysubs[[2]]=patel(ts[,,2])
#' g=patel.group(mysubs)
patel.group <- function(subj) {
Nn=ncol(subj[[1]]$kappa)
N=length(subj)
kappa = tkappa = tau = ttau = net = tnet = array(NA, dim=c(Nn,Nn,N))
for (s in 1:N) {
kappa[,,s] = subj[[s]]$kappa
tkappa[,,s] = subj[[s]]$tkappa
tau[,,s] = subj[[s]]$tau
ttau[,,s] = subj[[s]]$ttau
net[,,s] = subj[[s]]$net
tnet[,,s] = subj[[s]]$tnet
}
group=list(kappa=kappa,tkappa=tkappa,tau=tau,ttau=ttau,net=net,tnet=tnet)
return(group)
}
#' Get pruned adjacency network.
#'
#' @param adj list with network adjacency from getAdjacency().
#' @param models list of models.
#' @param winner matrix 2D with winning models.
#' @param e bayes factor for network pruning.
#'
#' @return thr list with pruned network adjacency.
#'
#' @examples
#' data("utestdata")
#' # select only 3-nodes to speed-up this example
#' sub=subject(myts[,1:3])
#' p=pruning(sub$adj, sub$models, sub$winner)
pruning <- function(adj, models, winner, e = 20) {
Nn = ncol(adj$am)
# determine bidirectional edges
bi = array(0, dim=c(Nn, Nn))
for (i in 1:Nn) {
for (j in 1:Nn) {
if (adj$am[i,j] == 1 & adj$am[j,i] == 1) {
bi[i,j] = 1
}
}
}
# Calculate models
B=bi*upper.tri(bi)
lpls=array(NA, dim=c(Nn, Nn, 2))
for (i in 1:Nn) {
for (j in 1:Nn) {
# if bidirectional, calculate 3 models
# A+B:i<>j, A:i>j, B:i<j
# A+B: LPLj + LPLi
# A : LPLj + LPLi-j
# B : LPLi + LPLj-i
if (B[i,j] == 1) {
# bidirectional LPL
lpls[i,j,1] = lpls[j,i,1] = adj$lpl[i,j] + adj$lpl[j,i]
# uni i->j, LPL without parent j
p = winner[,i][2:Nn]
p = p[p != j & p!= 0] # remove node j
lpls[i,j,2] = adj$lpl[i,j] + getModel(models[[i]], p)[Nn+1]
# uni j->i, LPL without parent i
p = winner[,j][2:Nn]
p = p[p != i & p!= 0] # remove node i
lpls[j,i,2] = adj$lpl[j,i] + getModel(models[[j]], p)[Nn+1]
}
}
}
# am matrix
am=adj$am
for (i in 1:Nn) {
for (j in 1:Nn) {
if (B[i,j] == 1) {
if (lpls[i,j,1] - e <= max(lpls[i,j,2], lpls[j,i,2]) ) {
# if unidirectional lpl is larger than bidirectional with a
# bayes factor penatly, take the simpler unidirectional model.
if (lpls[i,j,2] > lpls[j,i,2]) {
am[i,j] = 1; am[j,i] = 0
} else if (lpls[i,j,2] < lpls[j,i,2]) {
am[i,j] = 0; am[j,i] = 1
}
}
}
}
}
thr=list()
thr$bi=bi # bidirectional edges
thr$lpls=lpls # lpls
thr$am=am # adjacency matrix (pruned)
return(thr)
}
#' Performance of estimates, such as sensitivity, specificity, and more.
#'
#' @param x estimated binary network matrix.
#' @param true, true binary network matrix.
#'
#' @return p list with results.
#'
#' @examples
#' trueNet=matrix(c(0,0,0,1,0,0,0,1,0),3,3)
#' am=matrix(c(0,0,0,1,0,1,0,1,0),3,3)
#' p=perf(am, trueNet)
perf <- function(x, true) {
d = dim(x)
Nn=d[1]
if (length(d) == 3) {
N=d[3]
} else if (length(d) == 2) {
x=array(x, dim=c(Nn,Nn,1))
N=1
}
subj=array(NA,dim=c(N,8))
cases=array(NA,dim=c(N,4))
for (i in 1:N) {
TP = sum(x[,,i] & true)
FP = sum((x[,,i] - true) == 1)
FN = sum((true - x[,,i]) == 1)
TN = sum(!x[,,i] & !true) - ncol(x[,,i])
cases[i,]=c(TP,FP,FN,TN)
# see https://en.wikipedia.org/wiki/Sensitivity_and_specificity
tpr = TP/(TP+FN) # 1
spc = TN/(TN+FP) # 2
ppv = TP/(TP+FP) # 3
npv = TN/(TN+FN) # 4
fpr = FP/(FP+TN) # 5
fnr = FN/(TP+FN) # 6
fdr = FP/(TP+FP) # 7
acc = (TP+TN)/(TP+FP+FN+TN) # 8
subj[i,]=c(tpr,spc,ppv,npv,fpr,fnr,fdr,acc)
}
colnames(cases) = c("TP", "FP", "FN", "TN")
colnames(subj) = c("tpr", "spc", "ppv", "npv", "fpr", "fnr", "fdr", "acc")
p = list()
p$subj=subj
p$cases=cases
p$tpr = sum(cases[,1])/(sum(cases[,1]) + sum(cases[,3]))
p$spc = sum(cases[,4])/(sum(cases[,4]) + sum(cases[,2]))
p$acc = (sum(cases[,1]) + sum(cases[,4]))/(sum(cases[,1]) + sum(cases[,2]) + sum(cases[,3]) + sum(cases[,4]))
p$ppv = sum(cases[,1])/(sum(cases[,1]) + sum(cases[,2]))
return(p)
}
#' Scaling data. Zero centers and scales the nodes (SD=1).
#'
#' @param X time x node 2D matrix, or 3D with subjects as the 3rd dimension.
#'
#' @return S centered and scaled matrix.
#'
#' @examples
#' # create some sample data
#' ts = array(rnorm(200*5, mean=5, sd=10), dim=c(200,5))
#' ts = scaleTs(ts)
#'
scaleTs <- function(X) {
D=dim(X)
if (length(D)==2) {
tmp=array(NA, dim=c(D[1],D[2],1))
tmp[,,1]=X
X=tmp
D=dim(X)
}
S=array(NA, dim=c(D[1],D[2],D[3]))
# center data
for (s in 1:D[3]) {
colm=colMeans(X[,,s])
S[,,s]=X[,,s]-t(array(rep(colm, D[1]), dim=c(D[2],D[1])))
}
# scale data
for (s in 1:D[3]) {
SD=sqrt(mean(apply(S[,,s], 2, var)))
S[,,s]=S[,,s]/SD
}
if (D[3]==1) {
S=S[,,1]
}
return(S)
}
#' Patel.
#'
#' @param X time x node 2D matrix.
#' @param lower percentile cuttoff.
#' @param upper percentile cuttoff for 0-1 scaling.
#' @param bin threshold for conversion to binary values.
#' @param TK significance threshold for connection strength kappa.
#' @param TT significance threshold for direction tau.
#'
#' @return PT list with strengths kappa, direction tau, and net structure.
#'
#' @examples
#' # Generate some sample data
#' x=array(rnorm(200*5), dim=c(200,5))
#' p=patel(x)
patel <- function(X, lower=0.1, upper=0.9, bin=0.75, TK=0, TT=0) {
nt=nrow(X)
nn=ncol(X)
# scale data into 0,1 interval
X10 = apply(X, 2, quantile, lower) # cutoff 0.1 percentile
X90 = apply(X, 2, quantile, upper) # cutoff 0.9 percentile
a = sweep(sweep(X, 2, X10), 2, X90-X10, FUN="/") # center, scale data
X2 = apply(a, c(1,2), function(v) max(min(v,1),0)) # keep between 0,1
# binarize
X2=(X2>bin)*1 # convert to double
# Joint activation probability of timeseries a, b
# See Table 2, Patel et al. 2006
theta1 = crossprod(X2)/nt # a=1, b=1 -- a and b active
theta2 = crossprod(X2,1-X2)/nt # a=1, b=0 -- a active, b not
theta3 = crossprod(1-X2,X2)/nt # a=0, b=1
theta4 = crossprod(1-X2,1-X2)/nt # a=0, b=0
# directionality tau [-1, 1]
tau = matrix(0, nn, nn)
inds = theta2 >= theta3
tau[inds] = 1 - (theta1[inds] + theta3[inds])/(theta1[inds] + theta2[inds])
tau[!inds] = (theta1[!inds] + theta2[!inds])/(theta1[!inds] + theta3[!inds]) - 1
#tau=-tau # inverse
tau[as.logical(diag(nn))]=NA
# tau(a,b) positive, a is ascendant to b (a is parent)
# functional connectivity kappa [-1, 1]
E=(theta1+theta2)*(theta1+theta3)
max_theta1=pmin(theta1+theta2,theta1+theta3)
min_theta1=pmax(array(0,dim=c(nn,nn)),2*theta1+theta2+theta3-1)
inds = theta1>=E
D = matrix(0, nn, nn)
D[inds]=0.5+(theta1[inds]-E[inds])/(2*(max_theta1[inds]-E[inds]))
D[!inds]=0.5-(theta1[!inds]-E[!inds])/(2*(E[!inds]-min_theta1[!inds]))
kappa=(theta1-E)/(D*(max_theta1-E) + (1-D)*(E-min_theta1))
kappa[as.logical(diag(nn))]=NA
# thresholding
tkappa = kappa
tkappa[kappa >= TK[1] & kappa <= TK[2]] = 0 # this is a tow-sided test, kappa ranges from -1 to 1
ttau = tau
ttau[tau >= TT[1] & tau <= TT[2]] = 0
# binary nets: we focus on positive associations
net = (tkappa > 0)*(tau < 0) # only kappa thresholded
net[diag(nn)==1]=0 # remove NA
tnet = (tkappa > 0)*(ttau < 0) # both tau and kappa thresholded
tnet[diag(nn)==1]=0 # remove NA
PT=list(kappa=kappa, tau=tau, tkappa=tkappa, ttau=ttau, net=net, tnet=tnet)
return(PT)
}
#' Randomization test for Patel's kappa. Creates a distribution of values
#' kappa under the null hypothesis.
#'
#' @param X time x node x subjects 3D matrix.
#' @param alpha sign. level
#' @param K number of randomizations, default is 1000.
#'
#' @return stat lower and upper significance thresholds.
#'
#' @examples
#' # create some sample data with 200 samples,
#' # 3 nodes, and 2 subjects
#' ts = array(rnorm(200*3*5), dim=c(200,3,5))
#' mysubs=list()
#' mysubs[[1]]=patel(ts[,,1])
#' mysubs[[2]]=patel(ts[,,2])
#' mysubs[[3]]=patel(ts[,,3])
#' mysubs[[4]]=patel(ts[,,4])
#' mysubs[[5]]=patel(ts[,,5])
#' g=patel.group(mysubs)
#' r=rand.test(rmdiag(g$kappa), K=100)
rand.test <- function(X, alpha=0.05, K=1000) {
low = alpha/2 # two sided test
up = 1 - alpha/2
N = dim(X)[3] # Nr. of subjects
Nn = dim(X)[2] # Nr. of nodes
Nt = dim(X)[1] # Nr. of nodes
ka = array(NA,dim=c(Nn,Nn,K)) # kappa null distribution
ta = array(NA,dim=c(Nn,Nn,K)) # tau null distribution
X_= array(NA, dim=c(Nt,Nn))
for (k in 1:K) {
s = sample(N,Nn, replace = F)
# shuffle across subjects with fixed nodes (because of node variance)
for (n in 1:Nn) {
X_[,n]=X[,n,s[n]]
}
p=patel(X_)
ka[,,k]=p$kappa
ta[,,k]=p$tau
}
# two sided sign. test
stat = list()
stat$kappa = quantile(ka[!is.na(ka)], probs=c(low, up)) # two sided
stat$tau = quantile(ta[!is.na(ta)], probs=c(low, up)) # two sided
return(stat)
}
#' Removes NAs from matrix.
#'
#' @param M Matrix
#'
#' @return matrix with NAs removed.
#'
#' @examples
#' M=array(NA, dim=c(3,3))
#' M[1,2]=0.9
#' M=rmna(M)
rmna <- function(M) {
M[is.na(M)] = 0
return(M)
}
#' Removes diagonal of NA's from matrix.
#'
#' @param M Matrix
#'
#' @return matrix with diagonal of 0's.
#'
#' @examples
#' M=array(rnorm(3*3), dim=c(3,3))
#' M[as.logical(diag(3))] = NA
#' M=rmna(M)
rmdiag <- function(M) {
M[as.logical(diag(nrow(M)))]=0
return(M)
}
#' Turns asymetric network into an symmetric network. Helper function to
#' determine the detection of a connection while ignoring directionality.
#'
#' @param M 3D matrix nodes x nodes x subjects
#'
#' @return 3D matrix nodes x nodes x subjects
#'
#' @examples
#' M=array(NA, dim=c(3,3,2))
#' M[,,1]=matrix(c(0,0,0,1,0,0,0,1,0),3,3)
#' M[,,2]=matrix(c(0,0,0,1,0,0,0,0,0),3,3)
#' M_=symmetric(M)
symmetric <- function(M) {
d = dim(M)
R = M
for (i in 1:d[3]) {
R[,,i] = pmax(M[,,i], t(M[,,i]))
}
return(R)
}
#' Stepise forward non-exhaustive greedy search, calculates the optimum value of the discount factor.
#'
#' @param Data Dataset with dimension number of time points \code{T} x number of nodes \code{Nn}.
#' @param node The node to find parents for.
#' @param nbf The Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta A vector of values for the discount factor.
#' @param max.break If \code{TRUE}, the code will break if adding / removing parents does not
#' improve the LPL. If \code{FALSE}, the code will continue to the zero parent / all parent model.
#' Default is \code{TRUE}.
#' @param priors List with prior hyperparameters.
#'
#' @return
#' model.store The parents, LPL and chosen discount factor for the subset of models scored using this method.
#'
stepwise.forward <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01),
max.break=TRUE, priors=priors.spec()){
ptm=proc.time()
# Define the prior hyperparameters
m0 = priors$m0
CS0 = priors$CS0
n0 = priors$n0
d0 = priors$d0
Nn = ncol(Data) # the number of nodes
Nm = 2^(Nn-1) # the number of models (per node)
# Begin at model 1, the no parent (intercept only) model
model.store = array(0,dim=c((Nn+2),1))
# Find the Log Predicitive Likelihood and discount factor for the zero parent model
Yt = Data[,node] # the time series of the node we wish to find parents for
Nt = length(Yt) # the number of time points
nd = length(delta) # the number of deltas
# Create Ft. For the zero parent model, this is simply a column of ones, representing an intercept.
Ft=rep(1,Nt)
lpl.delta=rep(NA,nd)
for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[j]=sum(lpl[nbf:Nt])}
lpl.max = max(lpl.delta,na.rm=TRUE)
w = which(lpl.delta==lpl.max)
DF.hat = delta[w]
# Store the model number, model, LPL and discount factor
model.store[(Nn+1),] = lpl.max
model.store[(Nn+2),] = DF.hat
# The parents in the model
pars = numeric(0)
for (N in 1:(Nn-1)){
# Find all the models with the correct length and containing the previously selected parents
pars_add = c(1:Nn)[-c(node,pars)]
ms.new = array(0,dim=c((Nn+2),(Nn-N)))
if (length(pars>0)){ms.new[2:(length(pars)+1),] = pars}
ms.new[(length(pars)+2),] = pars_add
# Find the LPL and discount factor of this subset of models models
nms=ncol(ms.new) # How many models are being considered?
# Create empty arrays for the LPL scores and the deltas
lpl.delta=array(NA,c(nms,length(delta)))
lpl.max=rep(NA,nms)
DF.hat=rep(NA,nms)
# Now create Ft.
for (i in 1:nms){
pars = ms.new[(2:Nn),i]
pars = pars[pars!=0]
Ft=array(1,dim=c(Nt,length(pars)+1))
if (ncol(Ft)>1){Ft[,2:ncol(Ft)] = Data[,pars]}
# Calculate the Log Predictive Likelihood for each value of delta, for the specified models
for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[i,j]=sum(lpl[nbf:Nt])}
lpl.max[i] = max(lpl.delta[i,],na.rm=TRUE)
w = which(lpl.delta[i,]==lpl.max[i])
DF.hat[i] = delta[w]}
ms.new[(Nn+1),] = lpl.max
ms.new[(Nn+2),] = DF.hat
# Find the highest LPL so far
max.score = max(model.store[(Nn+1),])
logBF = lpl.max - max.score
W = which(logBF > 0)
# Update model.store
model.store = cbind(model.store,ms.new)
if (length(W)==0 & max.break==TRUE){break}
else{W.max = which.max(logBF)
# Update the model parents
pars = ms.new[(2:Nn),W.max]
pars = pars[pars!=0]}}
model.store[1,] = c(1:ncol(model.store)) # attach a model number
runtime=(proc.time()-ptm)
return(list(model.store = model.store, runtime=runtime))
}
#' Stepise backward non-exhaustive greedy search, calculates the optimum value of the discount factor.
#'
#' @param Data Dataset with dimension number of time points \code{T} x number of nodes \code{Nn}.
#' @param node The node to find parents for.
#' @param nbf The Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta A vector of values for the discount factor.
#' @param max.break If \code{TRUE}, the code will break if adding / removing parents does not
#' improve the LPL. If \code{FALSE}, the code will continue to the zero parent / all parent model.
#' Default is \code{TRUE}.
#' @param priors List with prior hyperparameters.
#'
#' @return
#' model.store The parents, LPL and chosen discount factor for the subset of models scored using this method.
#'
stepwise.backward <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01),
max.break=TRUE, priors=priors.spec()){
ptm=proc.time()
# Define the prior hyperparameters
m0 = priors$m0
CS0 = priors$CS0
n0 = priors$n0
d0 = priors$d0
Nn = ncol(Data) # the number of nodes
Nm = 2^(Nn-1) # the number of models (per node)
# Begin at the all parent model
model.store = array(0,dim=c((Nn+2),1))
model.store[(2:Nn),1] = c(1:Nn)[-node]
# Find the Log Predicitive Likelihood and discount factor for the all parent model
Yt = Data[,node] # the time series of the node we wish to find parents for
Nt = length(Yt) # the number of time points
nd = length(delta) # the number of deltas
# The parents in the model
pars = model.store[2:Nn,1]
pars = pars[pars!=0]
Ft = array(1,dim=c(Nt,Nn))
Ft[,2:Nn] = Data[,pars]
lpl.delta=rep(NA,nd)
for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[j]=sum(lpl[nbf:Nt])}
lpl.max = max(lpl.delta,na.rm=TRUE)
w = which(lpl.delta==lpl.max)
DF.hat = delta[w]
model.store[(Nn+1),] = lpl.max
model.store[(Nn+2),] = DF.hat
for (N in 1:(Nn-1)){
# Find all the models with the correct length and missing the previously removed parents
pars_add = combn(pars,(Nn-(N+1)))
ms.new = array(0,dim=c((Nn+2),(Nn-N)))
if (length(pars_add>0)){ms.new[2:(nrow(pars_add)+1),] = pars_add}
# Find the LPL and discount factor of these models
nms=ncol(ms.new) # How many models are being considered?
# Create empty arrays for the lpl scores and the deltas
lpl.delta=array(NA,c(nms,length(delta)))
lpl.max=rep(NA,nms)
DF.hat=rep(NA,nms)
# Now create Ft.
for (i in 1:nms){
pars=ms.new[(2:Nn),i]
pars=pars[pars!=0]
Ft=array(1,dim=c(Nt,length(pars)+1))
if (ncol(Ft)>1){Ft[,2:ncol(Ft)]=Data[,pars]}
# Calculate the log predictive likelihood for each value of delta, for the specified models
for (j in 1:nd){
lpl = dlmLplCpp(Yt,t(Ft),delta=delta[j],m0_=m0,CS0_=CS0,n0=n0,d0=d0)
lpl.delta[i,j]=sum(lpl[nbf:Nt])}
lpl.max[i] = max(lpl.delta[i,],na.rm=TRUE)
w = which(lpl.delta[i,]==lpl.max[i])
DF.hat[i] = delta[w]}
ms.new[(Nn+1),] = lpl.max
ms.new[(Nn+2),] = DF.hat
max.score = max(model.store[(Nn+1),]) # Find the highest LPL calculated so far
logBF = lpl.max - max.score
W = which(logBF > 0)
# Update model.store
model.store = cbind(model.store,ms.new)
if (length(W)==0 & max.break==TRUE){break}
else{W.max = which.max(logBF)
# Update the model parents
pars = ms.new[(2:Nn),W.max]
pars = pars[pars!=0]}}
model.store[1,] = c(1:ncol(model.store)) # attach a model number
runtime=(proc.time()-ptm)
return(list(model.store = model.store, runtime=runtime))
}
#' Stepise combine
#'
#' @param Data Dataset with dimension number of time points \code{T} x number of nodes \code{Nn}.
#' @param node The node to find parents for.
#' @param nbf The Log Predictive Likelihood will sum from (and including) this time point.
#' @param delta A vector of values for the discount factor.
#' @param max.break If \code{TRUE}, the code will break if adding / removing parents does not
#' improve the LPL. If \code{FALSE}, the code will continue to the zero parent / all parent model.
#' Default is \code{TRUE}.
#' @param priors List with prior hyperparameters.
#'
#' @return
#' model.store The parents, LPL and chosen discount factor for the subset of models scored using this method.
#'
stepwise.combine <- function(Data, node, nbf=15, delta=seq(0.5,1,0.01),
max.break=TRUE, priors=priors.spec()) {
ptm=proc.time()
fw=stepwise.forward(Data, node, nbf=nbf, delta=delta, priors=priors, max.break=max.break)
bw=stepwise.backward(Data, node, nbf=nbf, delta=delta, priors=priors, max.break=max.break)
model.store = mergeModels(fw$model.store,bw$model.store)
runtime=(proc.time()-ptm)
return(list(model.store=model.store, runtime=runtime))
}
#' Calculate the location and scale parameters for the time-varying coefficients
#' given all the observations. West, M. & Harrison, J., 1997. Bayesian Forecasting
#' and Dynamic Models. Springer New York.
#'
#' @param mt the vector or matrix of the posterior mean (location parameter), dim = \code{p x T},
#' where \code{p} is the number of thetas (at any time \code{t}) and \code{T} is the number of time points
#' @param CSt the posterior scale matrix with dim = \code{p x p x T} (unscaled by the observation variance)
#' @param RSt the prior scale matrix with dim = \code{p x p x T} (unscaled by the observation variance)
#' @param nt vector of the updated hyperparameters for the precision \code{phi} with length \code{T}
#' @param dt vector of the updated hyperparameters for the precision \code{phi} with length \code{T}
#'
#' @return
#' smt = the location parameter of the retrospective distribution with dimension \code{p x T}
#' sCt = the scale matrix of the retrospective distribution with dimension \code{p x p x T}
#'
dlm.retro <- function(mt, CSt, RSt, nt, dt) {
# Convert vectors to matrices
if (is.vector(mt)){
mt = array(mt, dim=c(1,length(mt)))
CSt = array(CSt, dim=c(1,1,length(CSt)))
RSt = array(RSt, dim=c(1,1,length(RSt)))}
p = nrow(mt) # the number of thetas at any time t
Nt = ncol(mt) # the number of time points
smt = array(NA, dim=c(p,Nt))
sCSt = array(NA, dim=c(p,p,Nt))
# Values at the last time point
smt[,Nt] = mt[,Nt]
sCSt[,,Nt] = CSt[,,Nt]
# For other time points
for (i in (Nt-1):1){
#inv.sR = solvecov(RSt[,,(i+1)], cmax = 1e+10)$inv # overcomes the limitation when it cannot invert the matrix package:fpc
inv.sR = solve(RSt[,,(i+1)])
B = CSt[,,i] %*% inv.sR
smt[,i] = mt[, i] + B %*% (smt[,(i+1)] - mt[,i])
sCSt[,,i] = CSt[,,i] + B %*% (sCSt[,,(i+1)] - RSt[,,(i+1)]) %*% t(B)}
# Multiply by the observation variance
sCt = sCSt * (dt[Nt] / nt[Nt])
result = list(smt=smt, sCt=sCt)
return(result)
}
#' Checks results and returns job number for incomplete nodes.
#' @param path path to results.
#' @param ids subjects ids.
#' @param Nr Number of runs.
#' @param Nn Number of nodes.
#'
#' @return jobs job numbers
#'
getIncompleteNodes <- function(path, ids, Nr, Nn) {
f=list.files(path, pattern=glob2rx('*.txt'))
# get info flag
info = strsplit(f[1], "_")[[1]][6]
idx = rep(NA, length(ids)*Nr*Nn)
c=1
for (i in 1:length(ids)) {
for (r in 1:Nr) {
for (n in 1:Nn) {
s = sprintf("%s_Run_%03d_Comp_%03d_%s_node_%03d.txt", ids[i], r, n, info, n)
idx[c] = s %in% f
c=c+1
}
}
}
jobs=which(!idx)
return(jobs)
}
#' Merges forward and backward model store.
#' @param fw forward model.
#' @param bw backward model.
#'
#' @return m model store.
#'
mergeModels <- function(fw, bw) {
Nn = nrow(fw)-2
a = apply(fw[2:(Nn),], 2, sort)
b = apply(bw[2:(Nn),], 2, sort)
idx = array(NA, ncol(b))
for (i in 1:ncol(b)) {
idx[i] = any(apply(a, 2, function(x, want) isTRUE(all.equal(x, want)), b[,i]))
}
model.store=cbind(fw, bw[,!idx])
return(model.store)
}
#' Removes reciprocal connections in the lower diagnoal of the network matrix.
#' @param M adjacency matrix
#'
#' @return M adjacency matrix without reciprocal connections.
#'
rmRecipLow <- function(M) {
# get edges, just upper diagonal
edges = which((M*upper.tri(M))==1, arr.ind = T)
for (i in 1:nrow(edges)) {
# if edge symmatric
if (M[edges[i,1], edges[i,2]] == 1 &&
M[edges[i,2], edges[i,1]] == 1) {
M[edges[i,2], edges[i,1]] = 0 # remove edge in lower diag.
}
}
return(M)
}
#' Quick diagnostics on delta.
#' @param path path to results files.
#' @param id subject identifier.
#' @param nodes number of nodes.
#' @return x array node model's delta
#'
diag.delta <- function(path, id, nodes) {
s = read.subject(path=path, id=id, nodes=nodes)
x=t(as.matrix(s$winner[nodes+2,], 1, nodes))
rownames(x) = 'delta'
colnames(x) = seq(1,nodes)
return(x)
}
#' Threshold correlation matrix to match a given number of edges.
#' @param R correlation matrix.
#' @param n number of edges.
#' @return A thresholded matrix.
#'
cor2adj <- function(R, n) {
# if uneven change to even
if (n %% 2) {
n = n + sample(c(-1,1),1)
}
l = length(R)
v=quantile(abs(R), probs = 1-n/l)
A = R
A[R < v & R > -v] = 0
return(A)
}
#' Comparing two population proportions on the network with FDR correction.
#' @param x1 network matrix with successes in group 1.
#' @param n1 sample size group 1.
#' @param x2 network matrix with successes in group 2.
#' @param n2 sample size group 2.
#' @param alpha alpha level for uncorrected test.
#' @param fdr alpha level for FDR.
#'
#' @return store List with test statistics and p-values.
#'
prop.nettest <- function(x1, n1, x2, n2, alpha=0.05, fdr=0.05) {
# # Verified test with example from
# # https://onlinecourses.science.psu.edu/stat500/node/55
# x1 = 52; n1 = 69
# x2 = 120; n2 = 131
p1 = x1/n1
p2 = x2/n2
p = (x1+x2)/(n1+n2)
z = (p1 - p2)/sqrt(p*(1-p)*(1/n1 + 1/n2))
pval = 2*pnorm(-abs(z)) # get two-sided p-value
# FDR
pval_fdr=array(p.adjust(pval, method = "fdr"), dim=dim(pval))
z_fdr=z
z_fdr[pval_fdr>=fdr]=NA
z_uncorr=z
z_uncorr[pval>=alpha]=NA
store=list()
store$z=z
store$pval=pval
store$pval_fdr=pval_fdr
store$z_fdr=z_fdr
store$z_uncorr=z_uncorr
return(store)
}
#' Comparing connectivity strenght of two groups with FDR correction.
#' @param m matrix with Nn x Nn x N.
#' @param g group assignment, vector of type factor of size N.
#' @param fdr FDR alpha level.
#' @param alpha alpha level for uncorrected test.
#' @param perm optional permuation test, default is false.
#' @param n_perm number of permutations.
#'
#' @return store List with test statistics and p-values.
#'
ttest.nettest <- function(m, g, alpha=0.05, fdr=0.05, perm=FALSE, n_perm=9999) {
Nn = dim(m)[1]
N = dim(m)[3]
stat = array(NA, c(Nn, Nn))
pval = array(NA, c(Nn, Nn))
df = array(NA, c(Nn, Nn))
for (i in 1:Nn) {
for (j in 1:Nn){
if (!all(is.na(m[i,j,]))) { # if not NaN
if (!perm) {
tmp = t.test(m[i,j,] ~ g, var.equal=TRUE)
stat[i,j] = tmp$statistic
pval[i,j] = tmp$p.value
df[i,j] = tmp$parameter
} else if (perm) {
# using package "coin" here
tmp = oneway_test(m[i,j,] ~ g, distribution = approximate(B=n_perm))
stat[i,j] = statistic(tmp)
pval[i,j] = pvalue(tmp)
}
}
}
}
# FDR
pval_fdr = pval
idx = !is.na(pval)
pval_fdr[idx] = p.adjust(pval[idx], method = "fdr")
stat_fdr=stat
stat_fdr[pval_fdr>=fdr]=NA
stat_uncorr=stat
stat_uncorr[pval>=alpha]=NA
store=list()
store$stat=stat
store$pval=pval
store$stat_fdr=stat_fdr
store$pval_fdr=pval_fdr
store$stat_uncorr=stat_uncorr
return(store)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.