Nothing
######################################################################
## Simulation and estimation of Exponential Random Partition Models ##
## Functions used to run the phase 1 of the estimation algorithm ##
## Author: Marion Hoffman ##
######################################################################
#' Phase 1 wrapper for single observation
#'
#'
#' @param partition observed partition
#' @param startingestimates vector containing initial parameter values
#' @param z.obs observed statistics
#' @param nodes node set (data frame)
#' @param effects effects/sufficient statistics (list with a vector "names", and a vector "objects")
#' @param objects objects used for statistics calculation (list with a vector "name", and a vector "object")
#' @param burnin integer for the number of burn-in steps before sampling
#' @param thining integer for the number of thining steps between sampling
#' @param gainfactor gain factor (useless now)
#' @param a.scaling scaling factor
#' @param r.truncation.p1 truncation factor (for stability)
#' @param length.p1 number of samples for phase 1
#' @param neighborhood vector for the probability of choosing a particular transition in the chain
#' @param fixed.estimates if some parameters are fixed, list with as many elements as effects, these elements equal a fixed value if needed, or NULL if they should be estimated
#' @param numgroups.allowed vector containing the number of groups allowed in the partition (now, it only works with vectors like num_min:num_max)
#' @param numgroups.simulated vector containing the number of groups simulated
#' @param sizes.allowed vector of group sizes allowed in sampling (now, it only works for vectors like size_min:size_max)
#' @param sizes.simulated vector of group sizes allowed in the Markov chain but not necessarily sampled (now, it only works for vectors like size_min:size_max)
#' @param parallel boolean to indicate whether the code should be run in parallel
#' @param cpus number of cpus if parallel = TRUE
#' @param verbose logical: should intermediate results during the estimation be printed or not? Defaults to FALSE.
#' @return a list
#' @importFrom stats cor
#' @importFrom snowfall sfExport sfLapply
#' @export
run_phase1_single <- function(partition,
startingestimates,
z.obs,
nodes,
effects,
objects,
burnin,
thining,
gainfactor,
a.scaling,
r.truncation.p1,
length.p1,
neighborhood,
fixed.estimates,
numgroups.allowed,
numgroups.simulated,
sizes.allowed,
sizes.simulated,
parallel = TRUE,
cpus = 1,
verbose = FALSE) {
num.nodes <- nrow(nodes)
num.effects <- length(effects$names)
# find a good starting point
#first.partition <- find_startingpoint_single(nodes, sizes.allowed)
first.partition <- partition
# simulate the statisticis distribution
if(parallel){
sfExport("startingestimates", "first.partition", "nodes", "effects", "objects", "burnin", "thining", "length.p1", "cpus", "neighborhood", "numgroups.allowed", "numgroups.simulated", "sizes.allowed", "sizes.simulated")
res <- sfLapply(1:cpus, fun = function(k) {
subres <- draw_Metropolis_single(startingestimates, first.partition, nodes, effects, objects, burnin, thining, ceiling(length.p1/cpus), neighborhood, numgroups.allowed, numgroups.simulated, sizes.allowed, sizes.simulated)
return(subres)
}
)
all.z <- c()
for(k in 1:cpus) all.z <- rbind(all.z,res[[k]]$draws)
length.p1 <- cpus * ceiling(length.p1/cpus)
results.phase1 <- list("draws" = all.z, "last.partition" = res[[cpus]]$last.partition, "all.partitions" = NULL)
}else{
results.phase1 <- draw_Metropolis_single(startingestimates, first.partition, nodes, effects, objects, burnin, thining, length.p1, neighborhood, numgroups.allowed, numgroups.simulated, sizes.allowed, sizes.simulated)
}
z.phase1 <- results.phase1$draws
# calculate autocorrelation to check afterhand
autocors <- rep(0,num.effects)
for(e in 1:num.effects){
autocors[e] <- cor(results.phase1$draws[1:(length.p1-1),e],results.phase1$draws[2:length.p1,e])
}
if (verbose){
cat("Autocorrelations in phase 1:\n")
cat(autocors, "\n\n")
}
# calculate the covariance and scaling matrices
inverted_matrices <- calculate_inverted_covariance_and_scaling(startingestimates,
z.obs,
nodes,
effects,
objects,
a.scaling,
length.phase = length.p1,
z.phase = z.phase1,
fixed.estimates)
inv.zcov <- inverted_matrices$inv.zcov
inv.scaling <- inverted_matrices$inv.scaling
# Phase 1 procedure
estimates.phase1 <- phase1(startingestimates,
inv.zcov,
inv.scaling,
z.phase1,
z.obs,
nodes,
effects,
objects,
r.truncation.p1,
length.p1,
fixed.estimates)
return( list("estimates" = estimates.phase1,
"inv.zcov" = inv.zcov,
"inv.scaling" = inv.scaling,
"autocorrelations" = autocors) )
}
#' Phase 1 wrapper for multiple observations
#'
#'
#' @param partitions observed partitions
#' @param startingestimates vector containing initial parameter values
#' @param z.obs observed statistics
#' @param presence.tables data frame to indicate which times nodes are present in the partition
#' @param nodes node set (data frame)
#' @param effects effects/sufficient statistics (list with a vector "names", and a vector "objects")
#' @param objects objects used for statistics calculation (list with a vector "name", and a vector "object")
#' @param burnin integer for the number of burn-in steps before sampling
#' @param thining integer for the number of thining steps between sampling
#' @param gainfactor gain factor (useless now)
#' @param a.scaling scaling factor
#' @param r.truncation.p1 truncation factor (for stability)
#' @param length.p1 number of samples for phase 1
#' @param neighborhood vector for the probability of choosing a particular transition in the chain
#' @param fixed.estimates if some parameters are fixed, list with as many elements as effects, these elements equal a fixed value if needed, or NULL if they should be estimated
#' @param numgroups.allowed vector containing the number of groups allowed in the partition (now, it only works with vectors like num_min:num_max)
#' @param numgroups.simulated vector containing the number of groups simulated
#' @param sizes.allowed vector of group sizes allowed in sampling (now, it only works for vectors like size_min:size_max)
#' @param sizes.simulated vector of group sizes allowed in the Markov chain but not necessarily sampled (now, it only works for vectors like size_min:size_max)
#' @param parallel boolean to indicate whether the code should be run in parallel
#' @param cpus number of cpus if parallel = TRUE
#' @param verbose logical: should intermediate results during the estimation be printed or not? Defaults to FALSE.
#' @return a list
#' @importFrom stats cor
#' @importFrom snowfall sfExport sfLapply
#' @export
run_phase1_multiple <- function(partitions,
startingestimates,
z.obs,
presence.tables,
nodes,
effects,
objects,
burnin,
thining,
gainfactor,
a.scaling,
r.truncation.p1,
length.p1,
neighborhood,
fixed.estimates,
numgroups.allowed,
numgroups.simulated,
sizes.allowed,
sizes.simulated,
parallel = FALSE,
cpus = 1,
verbose = FALSE) {
num.nodes <- nrow(nodes)
num.effects <- length(effects$names)
num.obs <- ncol(presence.tables)
# find good starting pointS
#first.partitions <- find_startingpoint_multiple(presence.tables,nodes,sizes.allowed)
first.partitions <- partitions
# simulate the statisticis distribution
if(parallel){
sfExport("startingestimates", "first.partitions", "presence.tables", "nodes", "effects", "objects", "burnin", "thining", "length.p1", "cpus", "neighborhood", "numgroups.allowed", "numgroups.simulated", "sizes.allowed", "sizes.simulated")
res <- sfLapply(1:cpus, fun = function(k) {
subres <- draw_Metropolis_multiple(startingestimates, first.partitions, presence.tables, nodes, effects, objects, burnin, thining, ceiling(length.p1/cpus), neighborhood, numgroups.allowed, numgroups.simulated, sizes.allowed, sizes.simulated)
return(subres)
}
)
all.z <- c()
for(k in 1:cpus) all.z <- rbind(all.z,res[[k]]$draws)
length.p1 <- cpus * ceiling(length.p1/cpus)
results.phase1 <- list("draws" = all.z, "last.partitions" = res[[cpus]]$last.partitions, "all.partitions" = NULL)
}else{
results.phase1 <- draw_Metropolis_multiple(startingestimates, first.partitions, presence.tables, nodes, effects, objects, burnin, thining, length.p1, neighborhood, numgroups.allowed, numgroups.simulated, sizes.allowed, sizes.simulated)
}
z.phase1 <- results.phase1$draws
# calculate autocorrelation to check afterhand
autocors <- rep(0,num.effects)
for(e in 1:num.effects){
autocors[e] <- cor(results.phase1$draws[1:(length.p1-1),e],results.phase1$draws[2:length.p1,e])
}
if (verbose) {
cat("Autocorrelations in phase 1:\n")
cat(autocors, "\n\n")
}
# calculate the covariance and scaling matrices
inverted_matrices <- calculate_inverted_covariance_and_scaling(startingestimates,
z.obs,
nodes,
effects,
objects,
a.scaling,
length.phase = length.p1,
z.phase = z.phase1,
fixed.estimates)
inv.zcov <- inverted_matrices$inv.zcov
inv.scaling <- inverted_matrices$inv.scaling
# Phase 1 procedure
estimates.phase1 <- phase1(startingestimates,
inv.zcov,
inv.scaling,
z.phase1,
z.obs,
nodes,
effects,
objects,
r.truncation.p1,
length.p1,
fixed.estimates)
return( list("estimates" = estimates.phase1,
"inv.zcov" = inv.zcov,
"inv.scaling" = inv.scaling,
"autocorrelations" = autocors) )
}
#' Core function for Phase 1
#'
#'
#' @param startingestimates vector containing initial parameter values
#' @param inv.zcov inverted covariance matrix
#' @param inv.scaling scaling matrix
#' @param z.phase1 statistics retrieved from phase 1
#' @param z.obs observed statistics
#' @param nodes node set (data frame)
#' @param effects effects/sufficient statistics (list with a vector "names", and a vector "objects")
#' @param objects objects used for statistics calculation (list with a vector "name", and a vector "object")
#' @param r.truncation.p1 numeric used to limit extreme values in the covariance matrix (for stability)
#' @param length.p1 number of samples in phase 1
#' @param fixed.estimates if some parameters are fixed, list with as many elements as effects, these elements equal a fixed value if needed, or NULL if they should be estimated
#' @param verbose logical: should intermediate results during the estimation be printed or not? Defaults to FALSE.
#' @return estimated parameters after phase 1
#' @export
phase1 <- function(startingestimates,
inv.zcov,
inv.scaling,
z.phase1,
z.obs,
nodes,
effects,
objects,
r.truncation.p1,
length.p1,
fixed.estimates,
verbose = FALSE) {
num.nodes <- nrow(nodes)
num.effects <- length(effects$names)
# normal procedure ( no fixed estimates)
if(is.null(fixed.estimates)){
# compute a first rough estimate of statistics by averaging all results of phase
z.mean <- rep(0, num.effects)
for(i in 1:length.p1) {
z.mean <- z.mean + z.phase1[i,]
}
z.mean <- z.mean / length.p1
# compute truncating factor
r <- 1
if(r.truncation.p1 > 0){
diff <- (z.mean - z.obs)
maxratio <- max(sqrt((t(diff) %*% inv.zcov %*% diff / num.effects)))
if(maxratio > r.truncation.p1) {
r <- r.truncation.p1 / maxratio
}
}
# compute new estimates
estimates.phase1 <- startingestimates - r*inv.scaling%*%(z.mean - z.obs)
# fixed parameters procedure
} else {
# find the indexes to remove from the estimation
fixed.indexes <- c()
unfixed.indexes <- c()
for(e in 1:num.effects){
if(!is.null(fixed.estimates[[e]]))
fixed.indexes <- c(fixed.indexes,e)
else
unfixed.indexes <- c(unfixed.indexes,e)
}
# compute a first rough estimate of statistics by averaging all results of phase 1
z.mean <- rep(0, length(unfixed.indexes))
for(i in 1:length.p1) {
z.mean <- z.mean + z.phase1[i,unfixed.indexes]
}
z.mean <- z.mean / length.p1
# compute truncating factor
r <- 1
if(r.truncation.p1 > 0){
r <- 1
diff <- (z.mean - z.obs[unfixed.indexes])
maxratio <- max(sqrt((t(diff) %*% inv.zcov %*% diff / length(unfixed.indexes))))
if(maxratio > r.truncation.p1) {
r <- r.truncation.p1 / maxratio
}
}
# compute new estimates
estimates.phase1 <- startingestimates
estimates.phase1[unfixed.indexes] <- estimates.phase1[unfixed.indexes] - r*inv.scaling%*%(z.mean - z.obs[unfixed.indexes])
}
if (verbose) {
cat("Covariance matrix\n")
cat(solve(inv.zcov), "\n\n")
cat("Invert scaling matrix\n")
cat(inv.scaling, "\n\n")
cat("Estimated statistics after phase 1\n")
cat(z.mean, "\n\n")
cat("Estimates after phase 1\n")
cat(estimates.phase1, "\n\n")
}
return(estimates.phase1)
}
# Calculation of the inverse of the covariance matrix and the scaling matrix
#' @importFrom stats cov
calculate_inverted_covariance_and_scaling <- function(startingestimates,
z.obs,
nodes,
effects,
objects,
a.scaling,
length.phase,
z.phase,
fixed.estimates){
num.nodes <- nrow(nodes)
num.effects <- length(effects$names)
# normal procedure ( no fixed estimates)
if(is.null(fixed.estimates)){
# compute a first rough estimate of statistics by averaging all results of phase
#z.mean <- rep(0, num.effects)
#for(i in 1:length.phase) {
# z.mean <- z.mean + z.phase[i,]
#}
#z.mean <- z.mean / length.phase
# compute the covariance matrix of all results
#z.cov <- matrix(0, nrow=num.effects, ncol=num.effects)
#for(i in 1:length.phase) {
# z.cov <- z.cov + z.phase[i,]%*%t(z.phase[i,])
#}
#z.cov <- z.cov / length.phase
#z.cov <- z.cov - z.mean %*% t(z.mean)
z.cov <- cov(z.phase,z.phase)
inv.zcov <- solve(z.cov)
# compute scaling matrix
scaling <- as.matrix(z.cov)
scaling[ row(scaling) != col(scaling) ] <- a.scaling * scaling[ row(scaling) != col(scaling) ]
inv.scaling <- solve(scaling)
# fixed parameters procedure
} else {
# find the indexes to remove from the estimation
fixed.indexes <- c()
unfixed.indexes <- c()
for(e in 1:num.effects){
if(!is.null(fixed.estimates[[e]]))
fixed.indexes <- c(fixed.indexes,e)
else
unfixed.indexes <- c(unfixed.indexes,e)
}
# compute a first rough estimate of statistics by averaging all results of phase 1
#z.mean <- rep(0, length(unfixed.indexes))
#for(i in 1:length.phase) {
# z.mean <- z.mean + z.phase[i,unfixed.indexes]
#}
#z.mean <- z.mean / length.phase
# compute the covariance matrix of all results
#z.cov <- matrix(0, nrow=length(unfixed.indexes),
# ncol=length(unfixed.indexes))
#for(i in 1:length.phase) {
# z.cov <- z.cov + z.phase[i,length(unfixed.indexes)]%*%t(z.phase[i,length(unfixed.indexes)])
#}
#z.cov <- z.cov / length.phase
#z.cov <- z.cov - z.mean %*% t(z.mean)
z.cov <- cov(z.phase[,unfixed.indexes],z.phase[,unfixed.indexes])
inv.zcov <- solve(z.cov)
# compute scaling matrix
scaling <- as.matrix(z.cov)
# # handle extreme cases
# if(det(as.matrix(z.cov)) == 0){
# z.cov[ row(z.cov) == col(z.cov) ] <- 0.1
# scaling[ row(scaling) == col(scaling) ] <- 0.1
# }
scaling[ row(scaling) != col(scaling) ] <- a.scaling * scaling[ row(scaling) != col(scaling) ]
inv.scaling <- solve(scaling)
}
return(list(inv.zcov = inv.zcov,
inv.scaling = inv.scaling))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.