R/snha.R

#' \name{snha-package}
#' \alias{snha-package}
#' \title{ snha package - association chain graphs from correlation networks}
#' \description{The snha package can be used to construct association chain graphs 
#'    based on the St. Nicolas House Analysis (SNHA) algorithm as described in Groth et. al. 2019.
#'    and Hermanussen et. al. 2021.}
#' \details{The package provides the following functions:
#' Function for graph generation from data:
#' \describe{
#' \item{\link[snha:snha]{snha(data)}}{applys the SNHA method on the data and returns 
#'    a new snha graph object}
#' }
#' S3 methods for snha graphs:
#' \describe{
#' \item{\link[snha:plot.snha]{plot.snha(x)}}{plots a snha graph}
#' \item{\link[snha:as.list.snha]{as.list.snha(x)}}{return a list
#'    representation of a snha graph object}
#' }
#' Utility functions:
#' \describe{
#' \item{\link[snha:snha_get_chains]{snha_get_chains(g)}}{returns the chains found 
#'        by the algorithm as matrix}
#' \item{\link[snha:snha_graph2data]{snha_graph2data(A)}}{create for the given
#'        adjacency matrix some data with the appropiate correlations}
#' \item{\link[snha:snha_layout]{snha_layout(g)}}{calculate layout coordinates 
#'        for the given graph or adjacency matrix}
#' \item{\link[snha:snha_ll]{snha_ll(g,chain)}}{calculate log-likelihood for 
#'        the given chain of the snha graph}
#' \item{\link[snha:snha_rsquare]{snha_rsquare(data,g)}}{for given data and graph 
#'         or adjacency matrix calculate linear model r-square value}
#' }
#' }
#' \value{No return value} 
#' \examples{
#' library(MASS)
#' data(birthwt)
#' as=snha(birthwt[,-1])
#' plot(as)
#' as$theta
#' ls(as)
#' data(decathlon88)
#' head(decathlon88)
#' dec=snha(decathlon88,method="spearman",alpha=0.1)
#' plot(dec,layout='sam')
#' }
#' \author{Detlef Groth <dgroth@uni-potsdam.de>}
#' \references{
#' \itemize{
#'    \item Groth, D., Scheffler, C., & Hermanussen, M. (2019). 
#'    Body height in stunted Indonesian children depends directly on parental education and not 
#'     via a nutrition mediated pathway - 
#'     Evidence from tracing association chains by St. Nicolas House Analysis. 
#'     \emph{Anthropologischer Anzeiger}, 76 No. 5 (2019), p. 445 - 451.
#'     \doi{10.1127/anthranz/2019/1027}
#'    \item Hermanussen, M., Assmann, & Groth, D. (2021).
#'    Chain Reversion for Detecting Associations in Interacting Variables - St. Nicolas House Analysis.
#'    \emph{International Journal of Environmental Research and Public Health}.
#'    18, 4 (2021). \doi{10.3390/ijerph18041741}.
#'    \item Novine, M., Mattsson, C. C., & Groth, D. (2021).
#'    Network reconstruction based on synthetic data generated by a Monte Carlo
#'    approach. \emph{Human Biology and Public Health}, 3:26.
#'    \doi{10.52905/hbph2021.3.26}
#' }
#' }
""

#' \name{snha}
#' \alias{snha}
#' \title{Initialize a snha object with data.}
#' \description{
#'     The main entry function to initialize a snha object with data where
#'     variables are in columns and items are in rows
#' }
#' \usage{snha(
#'   data,
#'   alpha=0.05,
#'   method='pearson',
#'   threshold=0.01,
#'   check.singles=FALSE,
#'   prob=FALSE,
#'   prob.threshold=0.2,
#'   prob.n=25)
#' }
#' \arguments{
#' \item{data}{a data frame where network nodes are the row names and data
#'             variables are in the columns.}
#' \item{alpha}{confidence threshold for p-value edge cutting after all chains
#'              were generated, default: 0.05.}
#' \item{method}{method to calculate correlation/association values, can be
#'               'pearson', 'spearman' or 'kendall', default: 'pearson'.}
#' \item{threshold}{R-squared correlation coefficient threshold for which
#'           r-square values should be used for chain generation, r=0.1 is r-square of
#'           0.01, default: 0.01.}
#' \item{check.singles}{should isolated nodes connected with sufficient high R^2
#'           and significance, default: FALSE.}
#' \item{prob}{should be probabilities computed for each edge using
#'          bootstrapping. Only in this case the parameters starting with prob are used,
#'          default: FALSE}
#' \item{prob.threshold}{threshold to set an edge, a value of 0.5 means, that
#'          the edge must be found in 50\% of all samplings, default: 0.2}
#' \item{prob.n}{number of bootstrap samples to be taken, default: 25}
#' }
#' \keyword{network}
#' \keyword{correlation}
#' \value{A snha graph data object with the folling components:
#' \describe{
#' \item{chains}{association chains building the graph}
#' \item{data}{representing the original input data}
#' \item{p.values}{matrix with p-values for the pairwise correlations}
#' \item{probabilities}{in case of re-samplings, the proportion how often the chain was found}
#' \item{sigma}{correlation matrix used for the algorithm}
#' \item{theta}{adjacency  matrix found by the SNHA method}
#' }
#' }
#' \examples{
#' data(swiss)
#' sw.g=snha(swiss,method='spearman')
#' # what objects are there?
#' ls(sw.g)
#' sw.g$theta
#' round(sw.g$sigma,2)
#' sw.g=snha(swiss,method='spearman',check.singles=TRUE,prob=TRUE)
#' sw.g$theta
#' sw.g$probabilities
#' }
#' \seealso{ \link[snha:plot.snha]{plot.snha}}
#' 


snha <- function (data,alpha=0.05,method='pearson',threshold=0.01,
                     check.singles=FALSE,
                     prob=FALSE,prob.threshold=0.2,prob.n=25) {
    if (prob) {
        # check for correlation matrix
        if (nrow(data)==ncol(data) &  
                length(which(colnames(cor(data))==rownames(cor(data)))) == nrow(data)) {
                    stop("only data matrices, not correlation matrices, can be used for bootstrapping graphs")
        }

        as=snha(data,alpha=alpha,method=method,threshold=threshold,
                   check.singles=check.singles,prob=FALSE)
        rand.prob=as$theta
        rand.prob[]=0
        for (i in 1:prob.n) {
            sam=sample(1:nrow(data),nrow(data),replace=TRUE)
            asi=snha(data[sam,],alpha=alpha,method=method,threshold=threshold,
                        check.singles=check.singles,prob=FALSE)
            if (i == 1) {
                as$probabilities=asi$theta
            } else {
                as$probabilities=as$probabilities+asi$theta
                #recover()
            }
            rand.data=data
            for (i in 1:ncol(data)) {
                rand.data[,i]=sample(data[,i])
            }
            asr=snha(rand.data,alpha=alpha,method=method,threshold=threshold,
                        check.singles=check.singles,prob=FALSE)
            rand.prob=rand.prob+asr$theta
        }
        as$probabilities=as$probabilities/prob.n
        rand.prob=as.vector(rand.prob)/prob.n
        as$rand.probabilities=as$probabilities
        as$rand.probabilities[]=rand.prob
        as$theta[as$probabilities>=prob.threshold]=1
        as$theta[as$probabilities<prob.threshold]=0
        as$p.values=as$theta
        vprob=as.vector(as$probabilities)
        as$p.values[]=unlist(lapply(vprob,function (x) 1-length(which(x>rand.prob))/length(rand.prob)))
    } else {
        as=Asgp$data2chainGraph(data,alpha=alpha,method=method,threshold=threshold)
        if (check.singles) {
            cmt=as$sigma
            #cor(data,method=method,use='pairwise.complete.obs')
            if (method!= "kendall") {
                cmt=cmt^2
            } else {
                cmt=abs(cmt)
            }
            diag(cmt)=0
            idx=which(apply(as$theta,1,sum)==0) 
            for (i in idx) {
                if (max(cmt[i,])>threshold) {
                    j=which(max(cmt[i,])==cmt[i,])
                    if (cor.test(data[,i],data[,j])$p.value<alpha) {
                        as$theta[i,j]=0.5
                        as$theta[j,i]=0.5
                    }
                }
            }
        }
        as$probabilities=as$theta
    }
    as=ReduceChains(as)
    class(as)='snha'
    return(as)
}

#' \name{snha_get_chains}
#' \alias{snha_get_chains}
#' \title{Return the chains of an snha graph as data frame}
#' \usage{snha_get_chains(graph)}
#' \description{This is a utility function to return the chains which
#' constructs the graph as a matrix.}
#' \arguments{
#' \item{graph}{a snha graph object}
#' }
#' \value{matrix with one chain per row, shorter chains are filled up with empty strings}
#' \examples{
#' data(swiss)
#' sw.g=snha(swiss)
#' snha_get_chains(sw.g)
#' }
#' 

snha_get_chains <- function (graph) {
    mx=max(as.numeric(lapply(graph$chains,length)))
    l=length(as.numeric(lapply(graph$chains,length)))
    mt=matrix("",nrow=l,ncol=mx+1)
    colnames(mt)=c("Name",paste("Node",1:mx,sep=""))
    for (n in 1:length(names(graph$chains))) {
        name=names(graph$chains)[n]
        mt[n,1]=name
        for (m in 1:length(graph$chains[[name]])) {
            mt[n,m+1]=graph$chains[[name]][m]
        }
    }
    return(mt)
}


#' \name{snha_ll}
#' \alias{snha_ll}
#' \title{log-likelihood for the given snha graph and the given chain}
#' \usage{snha_ll(graph,chain=NULL)}
#' \description{This function returns the log-likelihood for the given snha
#' graph and the given chain. If the 'block.p.value' is lower than 0.05 than
#' that the chain is not sufficient to capture the variable dependencies,
#' p-values above 0.05 indicate a good coverage of the chain for the linear
#' dependencies between the nodes.}
#' \arguments{
#' \item{graph}{a snha graph object}
#' \item{chain}{a chain object of a snha graph, if not given a data frame with
#' the values is returned for all chains, default: NULL}
#' }
#' \value{list with the following components: 'll.total', 'll.chain',
#' 'll.rest', 'll.block', data frame 'df' with the columns 'chisq', 'p.value',
#' 'block.df', 'block.ch', 'block.p.value'. If chain is not given an overall
#' summary is made for all chains an returned as data frame.}
#' \examples{
#' data(swiss)
#' sw.g=snha(swiss)
#' snha_ll(sw.g,sw.g$chain$Catholic)
#' head(snha_ll(sw.g))
#' }
#'

snha_ll = function (graph,chain=NULL) {
    g=graph
    if (is.null(chain)) {
        return(LL.makeDF(g$data, g$chains))
    } else {
        return(LL.getChainLL(g$data,chain))
    }
}
#

#' \name{snha_rsquare}
#' \alias{snha_rsquare}
#' \title{linear model based r-square values for given data and graph}
#' \usage{snha_rsquare(data,graph=NULL)}
#' \description{ The function `snha_rsquare` calculates for given data and 
#' a graph the covered r-squared values by  a linear model for each node. 
#' The linear model predicts each node by an additive mode 
#' using it's neighbor nodes in the graph.}
#' \arguments{
#' \item{data}{data matrix or data frame where variables
#' are in columns and samples in rows or a snha graph}
#' \item{graph}{ graph object or adjacency matrix of an (un)directed graph, not
#' needed if data is a snha graph, default: NULL.}
#' }
#' \value{vector of rsquare values for each node of the graph}
#' \examples{ 
#' # random adjacency matrix
#' A=matrix(rbinom(100,1, 0.2),nrow=10,ncol=10)
#' diag(A)=0
#' colnames(A)=rownames(A)=LETTERS[1:10]
#' # random data
#' data=matrix(rnorm(1000),ncol=10)
#' colnames(data)=colnames(A)
#' snha_rsquare(data,A)
#' # real data
#' data(swiss)
#' sw.s=snha(swiss,method='spearman')
#' rsqs=snha_rsquare(sw.s)
#' plot(sw.s,main=paste("r =",round(mean(rsqs,2))),
#'    layout='star',star.center='Examination')
#' # some colors for r-square values
#' vcols=paste("grey",seq(80,40,by=-10),sep="")
#' scols=as.character(cut(snha_rsquare(swiss,sw.s$theta),
#'    breaks=c(0,0.1,0.3,0.5,0.7,1),labels=vcols))
#' plot(sw.s,main=paste("r =",round(mean(snha_rsquare(swiss,sw.s$theta)),2)),
#'    vertex.color=scols ,layout='star',star.center='Examination',
#'    vertex.size=10,edge.color=c('black','red'),edge.width=3)
#' }
#'

snha_rsquare = function (data,graph=NULL) {
    g=graph
    if (any(class(data) %in% "matrix")) {
        data=as.data.frame(data)
    }
    if (class(data)[1]=="snha") {
        g=data$theta
        data=data$data
    }
    if (class(g)[1]=="snha") {
        A=g$theta
    } else {
        A=g
    }
    if (is.null(class(g)[1])) {
        stop("Error: An adjacency matrix g was not given to snha_rsquare!")
    }
    res=c()
    colnames(A)=rownames(A)=gsub("^([0-9])","XYZ\\1",colnames(A))
    colnames(data)=gsub("^([0-9])","XYZ\\1",colnames(data))
    rn=rownames(A)
    for (i in 1:nrow(A)) {
        nms=names(which(A[i,]==1))
        if (length(nms) == 0) {
            res=c(res,0)
            next
        }
         frmla = reformulate(termlabels = nms,  response = rn[i])
         modelLm = lm(frmla, data = data)
         rsquared = summary(modelLm)$r.squared
         res=c(res,rsquared)
     }
     names(res)=rn
     names(res)=gsub("^XYZ([0-9])","\\1",names(res))
     return(res)
}


#' \name{snha_graph2data}
#' \alias{snha_graph2data}
#' \title{create correlated data for the given adjacency matrix representing
#' a directed graph or an undirected graph}
#' \usage{snha_graph2data(
#'   A,
#'   n=100,
#'   iter=50,
#'   val=100,
#'   sd=2,
#'   prop=0.025,
#'   noise=1,
#'   method="mc"
#'   )
#' }
#' \description{This function is a short implementation of the Monte Carlo
#' algorithm described in Novine et. al. 2022.}
#' \arguments{
#' \item{A}{an adjacency matrix}
#' \item{n}{number of values, measurements per node, default: 100}
#' \item{iter}{number of iterations, default: 50}
#' \item{sd}{initial standard deviation, default: 2}
#' \item{val}{initial node value, default: 100}
#' \item{prop}{proportion of the target node value take from the source node,
#' default: 0.025}
#' \item{noise}{sd for the noise value added after each iteration using rnorm
#' function with mean 0, default: 1}
#' \item{method}{method for data generation, either 'mc' for using Monte Carlo
#' simulation or 'pc' for using a precision matrix, default: 'mc'}
#' }
#' \value{matrix with the node names as rows and samplings in the columns}
#'\examples{
#' opar=par(mfrow=c(1,2),mai=rep(0.2,4))
#' A=matrix(0,nrow=6,ncol=6)
#' rownames(A)=colnames(A)=LETTERS[1:6]
#' A[1:2,3]=1
#' A[3,4]=1
#' A[4,5:6]=1
#' A[5,6]=1
#' plot.snha(A,layout="circle"); 
#' data=snha_graph2data(A)
#' round(cor(t(data)),2)
#' P=snha(t(data))
#' plot(P,layout="circle")
#' par(opar)
#' }
#' \references{
#' \itemize{
#' \item Novine, M., Mattsson, C. C., & Groth, D. (2021).
#' Network reconstruction based on synthetic data generated by a Monte Carlo approach. 
#' \emph{Human Biology and Public Health}, 3:26.
#' \doi{10.52905/hbph2021.3.26}
#' }}
#' 

snha_graph2data <- function (A,n=100,iter=50,val=100,sd=2,prop=0.025,noise=1,method="mc") {
    g=A
    res=matrix(0,ncol=n,nrow=nrow(A))
    rownames(res)=rownames(A)
    stopifnot(method %in% c('mc','pm'))
    if (method == "pm") {
        data=Snha_pmatrix(g,mu=val,n=n)
        return(data)
    }
    for (i in 1:n) {
        units=rnorm(nrow(A),mean=val,sd=sd)
        names(units)=rownames(A)
        nms=names(units)
        for (j in 1:iter) {
            for (node in sample(rownames(A))) {
                targets=colnames(A)[which(A[node,]!=0)]
                for (target in sample(targets)) {
                    P=abs(A[node,target])
                    nval=units[[node]]*(prop*P)
                    nval=nval+units[[target]]*(1-(prop*P))
                    if (A[node,target]<0) {
                        diff=nval-units[[target]]
                        nval=units[[target]]-diff/0.5
                    }
                    units[[target]]=nval;
                }
            }
            # scale back to mean 100 and sd of 2
            # units[]=scale(units)*sd+val
            #names(units)=nms
            units=units+rnorm(length(units),sd=noise)
        }
        res[,i]=units
    }
    rt=t(res)
    rt=scale(rt)*sd+val
    res=t(rt)
    rownames(res)=nms
    return(res)
}

# methods for a snha graph

#' \name{plot.snha}
#'
#' \alias{plot.snha}
#' \title{display network or correlation matrices of snha graphs}
#' \description{The function `plot.snha` provides a simple display of network 
#'   graphs correlation matrices using  filled circles (vertices) to represent
#'   variables and edges which connect the vertices with high absolute. 
#'   correlation values. Positive correlations are shown in black, negative
#'   correlations are shown in red. For more information see the details
#'   section.
#' }
#' \details{This is a plot function to display networks or correlation matrices
#'          of 'snha' graph objects. 
#'          In case of bootstrapping the graph by using the `snha` function with the `prob=TRUE` 
#'          option lines in style full, broken and dotted lines are drawn if
#'          they are found in more than 75, 50 or 25 percent of all re-samplings.
#'          You can change these limits by using the `threshold` argument.}
#' \usage{
#' \method{plot}{snha}(
#'  x,
#'  type = "network",
#'  layout = "circle",
#'  vertex.color = "salmon",
#'  cex = 1,
#'  vertex.size = 5,
#'  edge.width = 2,
#'  edge.color = c("grey70", "red"),
#'  edge.text = NULL,
#'  edge.cex = 0.8,
#'  edge.pch = 0,
#'  noise = FALSE,
#'  hilight.chain = NULL,
#'  chain.color = c("black", "red"),
#'  star.center = NULL,
#'  plot.labels = TRUE,
#'  lty = 1,
#'  threshold = c(0.25, 0.5, 0.75),
#'  interactive = FALSE,
#'  ...
#' )
#' }
#' \arguments{
#' \item{x}{snha graph object usually created with the 'snha' function or an
#'          adjacency matrix}
#' \item{type}{character string specifying the plot type either 'network' or
#' '         cor', default: 'network'}
#' \item{layout}{graph layout for plotting one of 'circle', 'sam', 'samd',
#'           'grid', 'mds', 'mdsd', 'star', default: 'circle'}
#' \item{vertex.color}{default color for the vertices, either a single value,
#'            all vertices have hen this color or a vector of values,
#'            for different colors for the nodes, default: 'salmon'}
#' \item{cex}{size of the vertex labels which are plotted on the vertices, default: 1}
#' \item{vertex.size}{number how large the vertices should be plotted, default: 5}
#' \item{edge.width}{number on how strong the edges should be plotted, if
#'           edge.width=0, then the number is based on the correlation values, 
#'           default: 2}
#' \item{edge.color}{color to be plotted for edges. Usually vector of length two.
#'           First color for positive correlations, second color for negative
#'           correlations. Default: c('grey','red')}
#' \item{edge.text}{optional matrix to give edge labels, default: NULL}
#' \item{edge.cex}{character expansion for edge labels, default: 0.8}
#' \item{edge.pch}{plotting character which should be placed below the edge.text,
#'            default: 0}
#' \item{noise}{should be noise added to the layout. Sometimes useful if nodes
#'            are too close. Default: FALSE}
#' \item{hilight.chain}{which chain should be highlighted, 
#'            default: NULL (no chain highlight)}
#' \item{chain.color}{which color for chain edges, default: black}
#' \item{star.center}{the centered node if layout is 'start', must be
#'            a character string for the node name, default: NULL}
#' \item{plot.labels}{should node labels plotted, default: TRUE}
#' \item{lty}{line type for standard edges in the graph, default: 1}
#' \item{threshold}{cutoff values for bootstrap probabilities for drawing edges
#'          as dotted, broken lines and solid lines, default: c(0.25,0.5,0.75)}
#' \item{interactive}{switch into interactive mode where you can click in the
#'          graph and move nodes with two clicks, first selecting the node,
#'          second click gives the new coordinates for the node, default: FALSE}
#' \item{\dots}{currently not used}
#' }
#' \value{returns the layout of the plotted network or NULL if type is `corrplot`
#'         (invisible)}
#' \examples{ 
#' data(swiss)
#' sw.g=snha(swiss,method='spearman')
#' sw.g$theta
#' round(sw.g$sigma,2)
#' plot(sw.g,type='network',layout='circle')
#' plot(sw.g,type='network',layout='sam')
#' plot(sw.g,type='corplot')
#' # adding correlation values
#' plot(sw.g,edge.text=round(sw.g$sigma,2),edge.cex=1.2,edge.pch=15)
#' sw.g=snha(swiss,method='spearman',prob=TRUE)
#' sw.g$theta
#' sw.g$probabilities
#' plot(sw.g,type='network',layout='sam')
#' sw.g$chains
#' # plot chains for a node
#' plot(sw.g,layout="sam",lty=2,hilight.chain="Infant.Mortality",
#'  edge.width=3,edge.color=c("black","red"))
#' # an example for an adjacency matrix
#' M=matrix(rbinom(100,1, 0.2),nrow=10,ncol=10)
#' diag(M)=0
#' colnames(M)=rownames(M)=LETTERS[1:10]
#' plot.snha(M)
#' }
#'

plot.snha = function (x,type='network',
                     layout='circle',
                     vertex.color='salmon',cex=1,
                     vertex.size=5,edge.width=2,
                     edge.color=c('grey70','red'),
                     edge.text=NULL,edge.cex=0.8,edge.pch=0,
                     noise=FALSE, 
                     hilight.chain=NULL,
                     chain.color=c('black','red'),
                     star.center=NULL,
                     plot.labels=TRUE,
                     lty=1,threshold=c(0.25,0.5,0.75),
                     interactive=FALSE,...) {
    g=x
    if (any(class(g) %in% "matrix")) {
        g=as.data.frame(g)
    }
    marrow <- function(x0, y0, x1, y1, cut.front, cut.back, col='black', ...){
        x0.new <- (1 - cut.front) * x0 + cut.front * x1
        y0.new <- (1 - cut.front) * y0 + cut.front * y1
        x1.new <- (1 - cut.back) * x1 + cut.back * x0
        y1.new <- (1 - cut.back) * y1 + cut.back * y0
        arrows(x0.new, y0.new, x1.new, y1.new, col=col, ...)
    }
    getPCols = function (n) {
        if(n > 20 |  n < 1) {
            stop("only between 1 and 20 colors can be given" ) 
        }
        pcols= c("#FFC5D0","#FDC8C3","#F6CBB7","#EDD0AE","#E2D4A8","#D4D8A7","#C5DCAB","#B6DFB4","#A8E1BF",
                 "#9EE2CB", "#99E2D8","#9BE0E5","#A4DDEF","#B3D9F7","#C4D5FB","#D5D0FC","#E4CBF9","#F0C7F2",
                 "#F9C5E9", "#FEC4DD")
                 idx=seq(1,20,by=floor(20/n))
                 return(pcols[idx])
    }

    if (type %in% c('corrplot','cor','corplot')) {
        if (is.data.frame(g)) {
            Snha_corrplot(g,text.lower=TRUE,cex=cex,...)
        } else {
            Snha_corrplot(g$sigma,text.lower=TRUE,cex=cex,...)
        }
        lay=NULL
    } else {
        if (any(class(layout) %in% "character")) {
            if (class(g)[1]=="snha") {
                xy=snha_layout(g,mode=layout,noise=noise,star.center=star.center,method=g$method)    
            } else {
                xy=snha_layout(g,mode=layout,noise=noise,star.center=star.center)    
            }
        } else if (any(class(layout) %in% 'matrix')) {
            xy=layout
            colnames(xy)=c("x","y")
            if (is.null(rownames(xy)[1])) {
                rownames(xy)=rownames(g)
            }
        }
        arrow <- function (x,y,cut=0.6,lwd=2,lty=1,arrow.col="#666666",...) {
            hx <- (1 - cut) * x[1] + cut * x[2]
            hy <- (1 - cut) * y[1] + cut * y[2]
            arrows(hx,hy,x[2],y[2],lwd=lwd,code=0,col=arrow.col,lty=lty,...)
            for (a in c(20,15,10,5)) {
                arrows(x[1],y[1],hx,hy,length=0.06*lwd,angle=a,lwd=lwd,col=arrow.col,lty=lty,...)
            }
        }

        doPlot = function (theta,sigma,xy,node=NULL,probabilities=NULL,cmatrix=NULL,...) {
            xrange=range(xy[,1])
            yrange=range(xy[,2])
            xlim=c(xrange[1]-1/10*diff(xrange),xrange[2]+1/10*diff(xrange))
            ylim=c(yrange[1]-1/10*diff(yrange),yrange[2]+1/10*diff(yrange))
            if (class(probabilities)[1] == "NULL") {
                probabilities=theta
            }
            plot(xy,pch=19,col="salmon",cex=vertex.size,axes=FALSE,xlab="",ylab="",
                 xlim=xlim,ylim=ylim,...)
            directed=!identical(theta,t(theta))
            for (i in 1:(nrow(theta))) {
                for (j in 1:nrow(theta)) {
                    if (i == j) { next }
                    x1=xy[rownames(theta)[i],1]
                    x2=xy[rownames(theta)[j],1]
                    y1=xy[rownames(theta)[i],2]
                    y2=xy[rownames(theta)[j],2]
                    if (edge.width==0) {
                        ew=as.numeric(cut(abs(sigma[i,j]),breaks=c(0,0.1,0.3,0.5,1)))
                    } else {
                        ew=edge.width
                    }
                    if (abs(probabilities[i,j])>threshold[3]) {
                        lty=1
                    } else if (abs(probabilities[i,j])>threshold[2]) {
                        lty=2
                    } else if (abs(probabilities[i,j])>threshold[1]) {
                        lty=3
                    } else {
                        lty=0
                    }
                    if (sigma[i,j]>0) {
                        col=chain.color[1]
                        col.edge=edge.color[1]
                    } else {
                        col=chain.color[2]
                        col.edge=edge.color[2]
                    }
                    if (theta[i,j] != 0 | theta [j,i] !=0 ) {
                        if (!is.null(hilight.chain) && cmatrix[i,j] > 0) {
                            lines(c(x1,x2),c(y1,y2),lwd=ew,col=col,lty=1)
                        } else {
                            if (directed) {
                                arrow(c(x1,x2),c(y1,y2),lwd=ew,arrow.col=col.edge,lty=lty)
                            } else {
                                lines(c(x1,x2),c(y1,y2),lwd=ew,col=col.edge,lty=lty)
                            }
                        }
                        if (is.matrix(edge.text) & lty > 0) {
                            hx <- 0.5 * x1 + 0.5 * x2
                            hy <- 0.5 * y1 + 0.5 * y2
                            if (edge.pch>0) {
                                points(hx,hy,pch=edge.pch,cex=5*edge.cex,col="#cccccc")
                            }
                            jitx=0;# jitter(diff(range(axTicks(1)))/30,amount=0.1)
                            jity=0; jitter(diff(range(axTicks(2)))/30,amount=0.1)
                            text(hx+jitx,hy+jity,edge.text[i,j],cex=edge.cex,col=col)
                        }

                    }
                }
            }
            points(xy,pch=19,col="black",cex=vertex.size+0.3)
            points(xy,pch=19,col=vertex.color,cex=vertex.size)
            if (plot.labels) {
                text(xy,rownames(theta),cex=cex)
            }
            return(xy)
        }
        lay=xy
        if (class(g)[1] =="snha") {
            if (class(hilight.chain)[1] != "NULL") {
                cmatrix=g$sigma
                cmatrix[]=0
                chns=grep(hilight.chain,names(g$chains))
                for (nm in names(g$chain)[chns]) {
                    for (i in 1:(length(g$chain[[nm]])-1)) {
                        x=g$chain[[nm]][i]
                        y=g$chain[[nm]][i+1]
                        cmatrix[x,y]=1
                        cmatrix[y,x]=1
                    }
                }
            }
            if (interactive) {
                print("click two times, first on the point to move, second where to move\nEnd with one or two right clicks!")
                while (TRUE) {
                    loc=locator(2)
                    if (is.null(loc) | is.null(loc$x[2]) | is.null(loc$x[1])) {
                        break
                    }
                    dlay=rbind(lay,c(loc$x[1],loc$y[1]))
                    d=as.matrix(dist(dlay))[nrow(dlay),1:(nrow(dlay)-1)]
                    nm=names(which(d==min(d)))[1]
                    lay[nm,1]=loc$x[2]
                    lay[nm,2]=loc$y[2]
                    lay=doPlot(g$theta,g$sigma,lay,probabilities=g$probabilities,cmatrix=cmatrix,...)
                }
            }
            doPlot(g$theta,g$sigma,lay,probabilities=g$probabilities,cmatrix=cmatrix,...)
        } else {
            if (interactive) {
                lay=doPlot(g,g,xy,probabilities=g,...)
                print("click two times, first on the point to move, second where to move\nEnd with one or two right clicks!")
                while (TRUE) {
                    loc=locator(2)
                    if (is.null(loc) | is.null(loc$x[2]) | is.null(loc$x[1])) {
                        break
                    }
                    dlay=rbind(lay,c(loc$x[1],loc$y[1]))
                    d=as.matrix(dist(dlay))[nrow(dlay),1:(nrow(dlay)-1)]
                    nm=names(which(d==min(d)))[1]
                    lay[nm,1]=loc$x[2]
                    lay[nm,2]=loc$y[2]
                    lay=doPlot(g,g,xy,probabilities=g,...)
                }
            }
            doPlot(as.matrix(g),as.matrix(g),lay,probabilities=as.matrix(g),...)
        }
    }
    invisible(lay)
}


#' \name{as.list.snha}
#' \alias{as.list.snha}
#' \title{return a list representation for an snha graph object}
#' \usage{\method{as.list}{snha}(x,...)}
#' \description{The function `as.list.snha` provides a S3 method to convert
#'     a snha graph object into a list object which can be for instance used 
#'     to write a report into an XLSX file  using the library openxlsx.}
#' \arguments{
#' \item{x}{ snha graph object created with the snha function}
#' \item{\ldots}{ additional arguments, delegated to the list command}
#' }
#' \value{list object with the components: 
#'   'chains' (the association chain),
#'   'data' (original data),
#'   'theta' (adjacency matrix, 
#'   'sigma' (correlations), 
#'   'p.value' (correlation p-values)
#' }
#' \examples{ 
#' data(swiss)
#' as=snha(swiss,method="spearman",alpha=0.1)
#' result=as.list(as)
#' ls(result)
#' result$settings
#' # can be writte as xlsx file for instance like:
#' # library(openxlsx)
#' # write.xlsx(result,file="some-result.xlsx")
#' }
#' \seealso{\link[snha:plot.snha]{plot.snha}, \link[snha:snha]{snha}}
#'


as.list.snha <- function (x,...) {
    df=as.data.frame(unlist(lapply(x$chains,paste,collapse=", ")))
    colnames(df)[1]="chain"
    return(
           list(data=x$data,theta=x$theta,
                sigma=x$sigma,p.value=x$p.value,
                chains=df,
                settings=data.frame(info=c("method","alpha","threshold"),
                                    values=c(x$method,x$alpha,x$threshold)),...)
           )
}

#' \name{snha_layout}
#'
#' \alias{snha_layout}
#' \title{Determine graph layouts}
#' \usage{snha_layout(
#'    A,
#'    mode='sam',
#'    method='pearson', 
#'    noise=FALSE, 
#'    star.center=NULL,
#'    interactive=FALSE)
#' }
#' \description{This function returns xy coordinates for a given input
#'       adjacency matrix or snha graph. It is useful if you like to plot
#'       the same set of nodes with different edge connections 
#'       in the same layout.}
#' \arguments{
#' \item{A}{an adjacency matrix or an snha graph object}
#' \item{mode}{character string for the layout type, can be either 
#'      'mds' (mds on graph using shortest paths), 'mdsd' (mds on data)
#'      'sam' (sammon on graph), 'samd' (sammon on data),
#'      'circle', 'grid' or 'star', default: 'sam'}
#' \item{method}{method for calculating correlation distance if mode is either
#'       'mdsd' or 'samd', default: 'pearson'}
#' \item{noise}{should some noise be added, default: FALSE}
#' \item{star.center}{the centered node if layout is 'star', must be
#'           a character string for the node name, default: NULL}
#' \item{interactive}{switch into interactive mode where you can click in the
#'        graph and move nodes with two clicks, first selecting the node, second click
#'        gives the new coordinates for the node, default: FALSE}
#' }
#' \keyword{network}
#' \keyword{layout}
#' \value{matrix with x and y columns for the layout}
#' \examples{
#' data(swiss)
#' sw.s=snha(swiss,method='spearman')
#' sw.p=snha(swiss,method='pearson')
#' lay=snha_layout(sw.s,mode='sam')
#' plot(sw.s,layout=lay)
#' plot(sw.p,layout=lay)
#' plot(sw.s,layout='star',star.center='Education')
#' rn1=rnorm(nrow(swiss))
#' nswiss=cbind(swiss,Rn1=rn1)
#' plot(snha(nswiss,method='spearman'),layout='sam')
#' plot(snha(nswiss,method='spearman'),layout='samd',
#'   vertex.size=2,vertex.color='beige')
#' }
snha_layout = function (A,mode='sam', method='pearson', noise=FALSE, star.center=NULL,interactive=FALSE) {
    if (mode %in% c('samd','mdsd')) {
        if(class(A)[1] == "snha") {
            A=as.matrix(A$sigma)
        }
    }
    if(class(A)[1] =="snha") {
        S=A$sigma
        A=as.matrix(A$theta)
        idx=which(apply(A,1,sum)==0)
        if (length(idx)>0) {
            diag(S)=0
            for (i in idx) {
                j=which(abs(S[i,])==max(abs(S[i,])))
                A[i,j]=1
                A[j,i]=1
            }
        }
    }
    if (mode %in% c('mds','sam')) {
        A=ConnectComponents(A)
        sp=Snha_shortest_paths(A)
        xy=cmdscale(sp)
        rownames(xy)=rownames(A)
        if (mode=='mds') {
            dxy=base::as.matrix(dist(xy))
            diag(dxy)=1
            idx=which(dxy<0.05,arr.ind=TRUE)
            if (nrow(idx)>1) {
                for (i in 1:nrow(idx)) {
                    # add noise to nodes on the same point
                    n=idx[i,1]
                    xy[n,1]=xy[n,1]+rnorm(1,mean=0,sd=0.1)
                    xy[n,2]=xy[n,2]+rnorm(1,mean=0,sd=0.1)
                }
            }
            return(xy)
        } else {
            xy=xy+jitter(xy)
            #xy=mcg.sam(sp)
            xy=MASS::sammon(sp,y=xy,trace=FALSE)$points
            # add some jitter
            #sp[]=sp+rnorm(nrow(projection)*2,sd=sd(projection)*0.1)
            #projection[]=
            #xy = MASS::sammon(sp)$points        
        }
    } else if (mode == "samd") {
        xy=MASS::sammon(as.dist(1-cor(A,method=method,use='pairwise.complete.obs')^2))$points
    } else if (mode == "mdsd") {
        xy=cmdscale(as.dist(1-cor(A,method=method,use='pairwise.complete.obs')^2))
        rownames(xy)=colnames(A)
    } else if (mode == 'circle') {
        x=0
        y=0
        a=0.5
        b=0.5
        rad2deg <- function(rad) {(rad * 180) / (pi)}
        deg2rad <- function(deg) {(deg * pi) / (180)}
        nodes=rownames(A)
        xy=matrix(0,ncol=2,nrow=length(nodes))
        rownames(xy)=nodes
        for (i in 1:length(nodes)) {
            t=deg2rad((360/length(nodes))*(i-1))
            xp = a*cos(t)*0.75 + x;
            yp = b*sin(t)*0.75 + y;
            xy[nodes[i],]=c(xp,yp)
        }
    } else if (mode == 'star') {
        oorder=rownames(A)
        if (class(star.center)[1] != "NULL") {
            norder=c(which(rownames(A)==star.center),which(rownames(A)!=star.center))
            A=A[norder,norder]
        }
        x=0
        y=0
        a=0.5
        b=0.5
        rad2deg <- function(rad) {(rad * 180) / (pi)}
        deg2rad <- function(deg) {(deg * pi) / (180)}
        nodes=rownames(A)
        xy=matrix(0,ncol=2,nrow=length(nodes))
        rownames(xy)=nodes
        xy[1,]=c(0.0,0.0)
        for (i in 2:length(nodes)) {
            t=deg2rad((360/(length(nodes)-1))*(i-2))
            xp = a*cos(t)*0.75 + x;
            yp = b*sin(t)*0.75 + y;
            xy[nodes[i],]=c(xp,yp)
        }
        xy=xy[oorder,]
    } else if (mode == 'grid') {
        n=nrow(A)
        xy=matrix(0,ncol=2,nrow=nrow(A))
        rownames(xy)=rownames(A)
        mody=ceiling(sqrt(n))
        x=0
        y=0
        for (r in rownames(A)) {
            if (x %% mody == 0) {
                y=y+1
                x=0
            }
            x=x+1
            xy[r,]=c(x,y)
        }
    } else {
        stop("unknown layout. Use mds, circle, or grid as layout")
    }
    xy=scale(xy)
    if (noise) {
        xy=xy+rnorm(length(xy),mean=0,sd=0.1)
    }
    colnames(xy)=c("x","y")
    if (interactive) {
        plot.snha(A,layout=xy)
        print("click two times, first on the point to move, second where to move\nEnd with one or two right clicks!")
        lay=xy
        while (TRUE) {
            loc=locator(2)
            if (is.null(loc) | is.null(loc$x[2]) | is.null(loc$x[1])) {
                break
            }
            dlay=rbind(lay,c(loc$x[1],loc$y[1]))
            d=as.matrix(dist(dlay))[nrow(dlay),1:(nrow(dlay)-1)]
            nm=names(which(d==min(d)))[1]
            lay[nm,1]=loc$x[2]
            lay[nm,2]=loc$y[2]
            lay=plot.snha(A,layout=lay)
        }
        xy=lay
        return(xy)
    } else {
        return(xy)
    }
}

Try the snha package in your browser

Any scripts or data that you put into this service are public.

snha documentation built on March 31, 2023, 11:58 p.m.