# R/jacobi_etal_2012.R In ConnMatTools: Tools for Working with Connectivity Data

#### Documented in betasVectorDefaultoptimalSplitConnMatqualitySubpopsrecSplitConnMatreducedConnMatsplitConnMatsubpopsVectorToList

#' Split connectivity matrix into subpopulations
#'
#' This function tries to optimally split a given subpopulation into
#' two smaller subpopulations.
#'
#' @param indices vector of indices of sites in a subpopulation
#' @param conn.mat a square connectivity matrix.  This matrix has
#' typically been normalized and made symmetric prior to using this
#' function.
#' @param beta controls degree of splitting of connectivity matrix,
#' with larger values generating more subpopulations.
#' @param tries how many times to restart the optimization algorithm. Defaults to 5.
#' @param threshold controls when to stop each "try".  Defaults to 1e-10.
#' @param alpha controls rate of convergence to solution
#' @param maxit Maximum number of iterations to perform per "try".
#'
#' @return List with one or two elements, each containing a vector of
#' indices of sites in a subpopulations
#'
#' @references Jacobi, M. N., Andre, C., Doos, K., and Jonsson,
#' P. R. 2012. Identification of subpopulations from connectivity
#' matrices. Ecography, 35: 1004-1016.
#'
#'
#' @author
#' David M. Kaplan \email{dmkaplan2000@@gmail.com}
#' @encoding UTF-8
#' @export
#' @importFrom stats runif
splitConnMat <- function(indices,conn.mat,beta,tries=5,
threshold=1e-10,alpha=0.1,maxit=500) {
# makes a submatrix of the total connectivity matrix only
# involving the sites in the index list
ppp = conn.mat[indices, indices, drop=F]

# al appears to be just 1/beta
al = 1/beta

n = dim(ppp);
s = matrix(1,nrow=n)

eNoSplit = -t(s) %*% ppp %*% s + al * sum(s)^2;

best = 1e10

for (kk in 1:tries) {
s = matrix( runif(n,min=-1,max=1), ncol = 1 )
ds = s + 1
sTot = sum(s)
sOld = sign(s)

for (it in 1:maxit) {
if (sqrt(sum((s-ds)^2)) <= threshold)
break

# the following three lines implements EQ. 8 in the paper
v = ppp %*% s - al * sum(s)
ds = sqrt(abs(v)) * sign(v)
s = alpha * s + (1-alpha) * ds
}

if (it == maxit) warning("Reached max iterations")

s = sign(s)
e = -t(s) %*% ppp %*% s + al * sum(s)^2;
# calclulates the value of the cost function

if (e < best) {
sBest = s
best = e
}
}

part = indices[ sBest == -1 ]
notPart = setdiff( indices, part )

if ( (best < eNoSplit) & (length(part)>0) & (length(notPart)>0) ) {
return(list(part,notPart))
}

return(list(indices))
}

#' Recursively subdivides a set of subpoplations
#'
#' This funtion recursively splits each subpopulation of a list of
#' subpopulations until none of the subpopulations can be split
#' further to improve the minimization.
#'
#' @param subpops.lst A list whose elements are vectors of indices for each subpopulation.  See \code{\link{subpopsVectorToList}}.
#' @param conn.mat A square connectivity matrix.  This matrix has
#' typically been normalized and made symmetric prior to using this
#' function.
#' @param beta Controls degree of splitting of connectivity matrix,
#' with larger values generating more subpopulations.
#' @param \dots further arguments to be passed to \code{\link{splitConnMat}}
#'
#' @references Jacobi, M. N., Andre, C., Doos, K., and Jonsson,
#' P. R. 2012. Identification of subpopulations from connectivity
#' matrices. Ecography, 35: 1004-1016.
#'
#'
#' @author
#' David M. Kaplan \email{dmkaplan2000@@gmail.com}
#' @encoding UTF-8
#' @export
recSplitConnMat <- function(subpops.lst, conn.mat, beta, ...) {
old = 0
while (length(subpops.lst) > old) {
old = length(subpops.lst)
subpops.lst = unlist( sapply( subpops.lst, splitConnMat,
conn.mat=conn.mat, beta=beta, ..., simplify=F),
recursive=F )
}
return(subpops.lst)
}

#' Merge subpopulations
#'
#' This function tries to merge random subopoulations, checking if the
#' result is a better soluton to the minimization problem.
#'
#' @param subpops.lst A list whose elements are vectors of indices for each subpopulation.  See \code{\link{subpopsVectorToList}}.
#' @param conn.mat A square connectivity matrix.  This matrix has
#' typically been normalized and made symmetric prior to using this
#' function.
#' @param beta Controls degree of splitting of connectivity matrix,
#' with larger values generating more subpopulations.
#'
#' @return List of the same format as subpops.lst, but with
#' potentially fewer subpopulations.
#'
#' @references Jacobi, M. N., Andre, C., Doos, K., and Jonsson,
#' P. R. 2012. Identification of subpopulations from connectivity
#' matrices. Ecography, 35: 1004-1016.
#'
#'
#' @author
#' David M. Kaplan \email{dmkaplan2000@@gmail.com}
#' @encoding UTF-8
#' @export
mergeSubpops <- function ( subpops.lst,  conn.mat, beta ) {
nIt = length(subpops.lst)^2
al = 1/beta

for (it in 1:nIt) {
if (length(subpops.lst) < 2) break # Must have at least 2 to merge

ii = sort(sample( 1:length(subpops.lst), 2 ))
li = c( subpops.lst[[ ii ]], subpops.lst[[ ii ]] )
pTest = conn.mat[li,li]

s = matrix(1,nrow=length(li))
eTogether = -t(s) %*% pTest %*% s + al * sum(s)^2
s[1:length(subpops.lst[[ ii ]])] = -1
eSplit = -t(s) %*% pTest %*% s + al * sum(s)^2

if (eTogether<eSplit) {
subpops.lst[[ ii ]] = li
subpops.lst = subpops.lst[ -ii ]
}
}

return(subpops.lst)
}

#' Reduced connectivity matrix according to a set of subpopulations
#'
#' Reduces a connectivity matrix based on a set of subpopulations.  If there are
#' N subpopulations, then the reduced matrix will have dimensions NxN.  The
#' reduced matrix will be ordered according to the order of subpopulations in
#' \code{subpops.lst}.
#'
#' @param subpops.lst A list whose elements are vectors of indices for each
#'   subpopulation.  If a vector of integers is given, then
#'   \code{\link{subpopsVectorToList}} is applied to convert it to a list of
#'   subpopulations.
#' @param conn.mat A square connectivity matrix.
#'
#' @return A reduced connectivity matrix.  The sum of all elements of this
#'   reduced connectivity matrix will be equal to the sum of all elements of the
#'   original connectivity matrix.
#'
#' @references Jacobi, M. N., Andre, C., Doos, K., and Jonsson, P. R. 2012.
#'   Identification of subpopulations from connectivity matrices. Ecography, 35:
#'   1004-1016.
#'
#'
#' @author David M. Kaplan \email{dmkaplan2000@@gmail.com}
#' @encoding UTF-8
#' @example tests/test.optimalSplitConnMat.R
#' @export
reducedConnMat <- function( subpops.lst, conn.mat ) {
if (!is.list(subpops.lst))
subpops.lst = subpopsVectorToList(subpops.lst)

pii = matrix( 0.0, nrow=dim(conn.mat), ncol=length(subpops.lst) )

for (kk in 1:length(subpops.lst)) {
pii[ subpops.lst[[kk]], kk ] = 1
}

# Note I use p instead of t(p), as was in Jacobi code.
# This is because my matrices are oriented columns to rows
#pt = t(pii) %*% p %*% pii %*% solve( t(pii) %*% pii )

# Somewhat quicker method
pt =  t(pii) %*% conn.mat %*% pii #%*% diag( 1 / sapply(subpops.lst,length) )
# No need to normalize by number of sites in cluster of larval
# origin because we are just going to normalize each colum

return(pt)
}

#' Quality measure for subpopulation division
#'
#' A measure of the leakage between subpopulations for a given division of the
#' connectivity matrix into subpopulations.  This statistic is equal to 1 -
#' mean(RLR) of the reduced connectivity matrix, where RLR=relative local
#' retention (\code{\link{relativeLocalRetention}}), i.e., the fraction of
#' settling individuals that originated at their site of settlement.
#'
#' @inheritParams reducedConnMat
#'
#' @return The quality statistic.
#'
#'   A smaller value of the quality statistic indicates less leakage.
#'
#' @references Jacobi, M. N., Andre, C., Doos, K., and Jonsson, P. R. 2012.
#'   Identification of subpopulations from connectivity matrices. Ecography, 35:
#'   1004-1016.
#'
#'
#' @author David M. Kaplan \email{dmkaplan2000@@gmail.com}
#' @encoding UTF-8
#' @export
#' @include retentionStats.R
qualitySubpops <- function( subpops.lst, conn.mat )
(1 - mean(relativeLocalRetention(reducedConnMat(subpops.lst,conn.mat))))

#' Compute vector of beta values
#'
#' Helper function to compute a set of beta values using formula used in Jacobi
#' et al. (2012).
#'
#' @param n numerator of formula from Jacobi et al. (2012).  Normally will be
#'   the number of columns in the connectivity matrix if one normalizes the
#'   columns (otherwise, it would typically be \code{N^2 / sum(conn.mat)}, where
#'   \code{N} is the number of columns of \code{conn.mat}.
#' @param steps number of beta values to return.  Defaults to 10.
#' @param cycles how many cycles of \code{2*pi} to do.
#' @param coeff coefficient in front of sine function
#' @param pwr exponent in denominator
#'
#' @return vector of beta values
#'
#' @references Jacobi, M. N., Andre, C., Doos, K., and Jonsson, P. R. 2012.
#'   Identification of subpopulations from connectivity matrices. Ecography, 35:
#'   1004-1016.
#'
#'
#' @author David M. Kaplan \email{dmkaplan2000@@gmail.com}
#' @encoding UTF-8
#' @export
betasVectorDefault <- function(n,steps=10,cycles=3/4,
coeff=0.8,pwr=3.0)
n/(1+coeff*sin(seq(0,cycles*2*pi,length.out=steps)))^pwr

#' Iteratively, optimally split a connectivity matrix
#'
#' Algorithm for iteratively determining subpopulations of
#' highly-connected sites.  Uses an iterative method described in
#' Jacobi et al. (2012)
#'
#' @param conn.mat A square connectivity matrix.
#' @param normalize.cols A boolean indicating if columns of conn.mat
#' should be normalized by the sum of all elements in the column.
#' Defaults to TRUE.
#' @param make.symmetric A string indicating how to force conn.mat to
#' be symmetric.  "mean" (the default) will replace C_ij by (C_ij +
#' C_ji)/2.  "max" will replace C_ij by the maximum of C_ij and C_ji.
#' @param remove.diagonal A boolean indicating if the diagonal
#' elements of conn.mat should be removed before determining the
#' subpopulations.  Defaults to FALSE.
#' @param cycles Number of times to pass over values in betas.
#' @param betas Vector of beta values to try.  If not given, will
#' @param steps Number of beta values to produce using
#' betasVectorDefault.  Ignored if betas argument is explicitly
#' given.
#' @param \dots further arguments to be passed to \code{\link{splitConnMat}}
#'
#' @return A list with the following elements:
#' \item{betas}{Vector of all beta values tested}
#'
#' \item{num.subpops}{Vector of number of subpopulations found for
#' each value of beta}
#'
#' \item{qualities}{Vector of the quality statistic for each
#' subpopulation division}
#'
#' \item{subpops}{A matrix with dimensions dim(conn.mat) X
#' length(betas) indicating which subpopulation each site belongs to}
#'
#' \item{best.splits}{A list indicating for each number of
#' subpopulations, which column of subpops contains the division with
#' the lowest quality statistic.  E.g.,
#' \code{best.splits[["4"]]$index} contains the column index of the #' optimal division of the connectivity matrix into 4 subpopulations.} #' #' @seealso See also \code{\link{splitConnMat}}, #' \code{\link{recSplitConnMat}}, \code{\link{mergeSubpops}}, #' \code{\link{qualitySubpops}} #' @author #' David M. Kaplan \email{dmkaplan2000@@gmail.com} #' @encoding UTF-8 #' @example tests/test.optimalSplitConnMat.R #' @export #' #' @note In Jacobi et al. (2012) paper, the connectivity matrix is #' oriented so that \eqn{C_ij} is dispersal from i to j, whereas in this R #' package, the connectivity matrix is oriented so that \eqn{C_ij} is #' dispersal from j to i. This choice of orientation is arbitrary, #' but one must always be consistent. From j to i is more common in #' population dynamics because it works well with matrix #' multiplication (e.g., \code{settlers = conn.mat \%*\% eggs}). #' #' @references Jacobi, M. N., Andre, C., Doos, K., and Jonsson, #' P. R. 2012. Identification of subpopulations from connectivity #' matrices. Ecography, 35: 1004-1016. #' optimalSplitConnMat <- function(conn.mat, normalize.cols=TRUE, make.symmetric="mean", remove.diagonal=FALSE, cycles = 2, betas=betasVectorDefault( ifelse(normalize.cols,dim(conn.mat), prod(dim(conn.mat))/sum(conn.mat)),steps), steps=10, ... ) { if (all(class(conn.mat) != "matrix")) stop("Input conn.mat must be a matrix.") pp <- conn.mat # Make larval loss uniform over space if (normalize.cols) for (kk in 1:dim(pp)) { ss = sum(pp[,kk]) if (ss>0) pp[,kk] = pp[,kk] / ss } # Force symmetric if not already the case mymax = function(x) { ii = x < t(x); x[ii] = t(x)[ii]; return(x) } pp = switch(make.symmetric, mean = (pp + t(pp)) / 2.0, max = mymax(pp), stop("Bad max.symmetric string")) # Not sure if this is obligatory or optional # I believe it should be optional if (remove.diagonal) diag(pp) <- 0 subpops = matrix(NA,nrow=dim(conn.mat),ncol=cycles*length(betas)) num.subpops = rep(NA,cycles*length(betas)) qualities = num.subpops for (kk in 1:cycles) { print( paste( "Starting cycle",kk ) ) # Initialize with one big cluster ta = list(1:dim(conn.mat)) # Loop over betas for (ll in 1:length(betas)) { beta = betas[ll] print( paste( "beta =", beta ) ) taOld = list() while (!setequal(ta,taOld)) { taOld = ta ta = recSplitConnMat(ta, pp, beta, ...) #if (length(unlist(ta))!=dim(pp)) stop("recSplitConnMat error") ta = mergeSubpops(ta, pp, beta ) #if (length(unlist(ta))!=dim(pp)) stop("mergeSubpops error") } # Store results nn = (kk-1)*length(betas)+ll qualities[nn] = qualitySubpops(ta,conn.mat) num.subpops[nn] = length(ta) for (mm in 1:length(ta)) subpops[ta[[mm]],nn] = mm } } # For each number of subpops, find the configuration with the # best quality measure imin <- function(x) { which( min(x) == x ) } best.splits = by( data.frame(quality=qualities, index=1:length(qualities)), INDICES=num.subpops, FUN=function(d) d[imin(d$quality),] )

return(list(betas=rep(betas,cycles),
subpops=subpops,qualities=qualities,
num.subpops=num.subpops,best.splits=best.splits))
}

#' Convert subpopulation vector to a list of indices
#'
#' A helper function to convert a vector of subpopulation identifications into a
#'
#' Note that subpopulations list will be ordered according to the numerical
#' order of the subpopulation indices in the original matrix, which will not
#' necessarily be that of the spatial order of sites in the original
#' connectivity matrix.
#'
#' @param x vector of subpopulation identifications
#'
#' @return A list where each element is a vector of indices for a given
#'   subpopulation.
#'
#'
#' @author David M. Kaplan \email{dmkaplan2000@@gmail.com}
#' @export
subpopsVectorToList <- function(x) {
xx = sort(unique(x))

ta = list()
for (yy in xx)
ta[[length(ta)+1]] = which( x == yy )
return(ta)
}


## Try the ConnMatTools package in your browser

Any scripts or data that you put into this service are public.

ConnMatTools documentation built on Feb. 3, 2020, 5:06 p.m.