R/tidy_mcmc.R

Defines functions tidy_error_probability tidy_augmented_data tidy_wcd tidy_cluster_probabilities tidy_cluster_assignment tidy_alpha tidy_rho tidy_mcmc

tidy_mcmc <- function(fits, data, model_options, compute_options) {
  fit <- list()
  fit$save_aug <- compute_options$save_aug
  rho_dims <- dim(fits[[1]]$rho)
  fit$rho_samples <- fits[[1]]$rho
  for (f in fits[-1]) {
    fit$rho_samples <- abind(fit$rho_samples, f$rho)
  }

  rhos <- lapply(seq_along(fits), function(i) fits[[i]]$rho)
  fit$rho <- do.call(rbind, lapply(seq_along(rhos), function(i) {
    tidy_rho(rhos[[i]], i, compute_options$rho_thinning, data$items)
  }))

  alpha_dims <- dim(fits[[1]]$alpha)
  alphas <- lapply(seq_along(fits), function(i) fits[[i]]$alpha)
  fit$alpha_samples <- matrix(as.vector(do.call(rbind, alphas)),
    nrow = alpha_dims[[1]], ncol = length(fits) * alpha_dims[[2]]
  )

  fit$alpha <- do.call(rbind, lapply(seq_along(alphas), function(i) {
    tidy_alpha(alphas[[i]], i, compute_options$alpha_jump)
  }))

  fit$cluster_assignment <- do.call(rbind, lapply(seq_along(fits), function(i) {
    tidy_cluster_assignment(
      fits[[i]]$cluster_assignment, i, model_options$n_clusters, data$n_assessors,
      compute_options$nmc
    )
  }))

  fit$cluster_probs <- do.call(rbind, lapply(seq_along(fits), function(i) {
    tidy_cluster_probabilities(
      fits[[i]]$cluster_probs, i, model_options$n_clusters,
      compute_options$nmc
    )
  }))

  fit$within_cluster_distance <- do.call(rbind, lapply(seq_along(fits), function(i) {
    tidy_wcd(fits[[i]]$within_cluster_distance, i)
  }))


  fit$augmented_data <- do.call(rbind, lapply(seq_along(fits), function(i) {
    tidy_augmented_data(
      fits[[i]]$augmented_data, i, data$items,
      compute_options$aug_thinning
    )
  }))

  fit$theta <- do.call(rbind, lapply(seq_along(fits), function(i) {
    tidy_error_probability(fits[[i]]$theta, i)
  }))

  fit$n_clusters <- model_options$n_clusters
  fit$data <- data
  fit$compute_options <- compute_options

  fit$acceptance_ratios <- list(
    alpha_acceptance = lapply(fits, function(x) x$alpha_acceptance),
    rho_acceptance = lapply(fits, function(x) x$rho_acceptance),
    aug_acceptance = lapply(fits, function(x) x$aug_acceptance)
  )

  return(fit)
}

tidy_rho <- function(rho_mat, chain, rho_thinning, items) {
  # Tidy rho
  rho_dims <- dim(rho_mat)
  # Item1, Item2, Item3, ...., Item1, Item2, Item3
  # Cluster1, Cluster1, Cluster1, ..., Cluster2, Cluster2, Cluster2
  # Iteration1, Iteration1, ..., Iteration1, Iteration1, Iteration1, Iteration2
  value <- c(rho_mat)

  item <- rep(items, times = rho_dims[[2]] * rho_dims[[3]])
  item <- factor(item, levels = items)

  cluster <- rep(
    seq(from = 1, to = rho_dims[[2]], by = 1),
    each = rho_dims[[1]],
    times = rho_dims[[3]]
  )
  cluster <- factor(paste("Cluster", cluster),
    levels = paste("Cluster", sort(unique(cluster)))
  )

  iteration <- rep(seq(from = 1, to = rho_dims[[3]] * rho_thinning, by = rho_thinning),
    each = rho_dims[[1]] * rho_dims[[2]]
  )

  # Store the final rho as a dataframe
  data.frame(
    chain = factor(chain),
    item = item,
    cluster = cluster,
    iteration = iteration,
    value = value
  )
}

tidy_alpha <- function(alpha_mat, chain, alpha_jump) {
  # Tidy alpha
  alpha_dims <- dim(alpha_mat)
  # Cluster1, Cluster2, ..., Cluster1, Cluster2
  # Iteration1, Iteration1, ..., Iteration2, Iteration2


  cluster <- rep(
    seq(from = 1, to = alpha_dims[[1]], by = 1),
    times = alpha_dims[[2]]
  )

  cluster <- factor(paste("Cluster", cluster),
    levels = paste("Cluster", sort(unique(cluster)))
  )

  iteration <- rep(
    seq(from = 1, to = alpha_dims[[2]] * alpha_jump, by = alpha_jump),
    each = alpha_dims[[1]]
  )

  data.frame(
    chain = factor(chain),
    cluster = cluster,
    iteration = iteration,
    value = c(alpha_mat)
  )
}

tidy_cluster_assignment <- function(
    cluster_assignment, chain, n_clusters, n_assessors, nmc) {
  if (n_clusters > 1) {
    cluster_dims <- dim(cluster_assignment)
    value <- paste("Cluster", cluster_assignment)
  } else if (n_assessors > 1) {
    cluster_dims <- c(n_assessors, nmc)
    value <- paste("Cluster", rep(1, prod(cluster_dims)))
  } else {
    return(data.frame())
  }


  # Assessor1, Assessor2, ..., Assessor1, Assessor2
  # Iteration1, Iteration1, ..., Iteration2, Iteration2

  assessor <- rep(
    seq(from = 1, to = cluster_dims[[1]], by = 1),
    times = cluster_dims[[2]]
  )
  iteration <- rep(
    seq(from = 1, to = cluster_dims[[2]], by = 1),
    each = cluster_dims[[1]]
  )

  data.frame(
    chain = factor(chain),
    assessor = assessor,
    iteration = iteration,
    value = value
  )
}

tidy_cluster_probabilities <- function(cluster_probs, chain, n_clusters, nmc) {
  # Tidy cluster probabilities
  if (n_clusters > 1) {
    clusprob_dims <- dim(cluster_probs)
    value <- c(cluster_probs)
  } else {
    clusprob_dims <- c(n_clusters, nmc)
    value <- rep(1, times = prod(clusprob_dims))
  }


  # Cluster1, Cluster2, ..., Cluster1, Cluster2
  # Iteration1, Iteration1, ..., Iteration2, Iteration2
  cluster <- rep(
    seq(from = 1, to = clusprob_dims[[1]], by = 1),
    times = clusprob_dims[[2]]
  )

  cluster <- factor(paste("Cluster", cluster),
    levels = paste("Cluster", sort(unique(cluster)))
  )

  iteration <- rep(
    seq(from = 1, to = clusprob_dims[[2]], by = 1),
    each = clusprob_dims[[1]]
  )

  data.frame(
    chain = factor(chain),
    cluster = cluster,
    iteration = iteration,
    value = value
  )
}


tidy_wcd <- function(within_cluster_distance, chain) {
  # Tidy the within-cluster distances, or delete the empty matrix
  if (!is.null(within_cluster_distance)) {
    wcd_dims <- dim(within_cluster_distance)
    value <- c(within_cluster_distance)

    # Cluster1, Cluster2, ..., Cluster1, Cluster2
    # Iteration1, Iteration1, ..., Iteration2, Iteration2

    cluster <- rep(
      paste("Cluster", seq(from = 1, to = wcd_dims[[1]], by = 1)),
      times = wcd_dims[[2]]
    )
    cluster <- factor(paste("Cluster", cluster),
      levels = paste("Cluster", sort(unique(cluster)))
    )

    iteration <- rep(
      seq(from = 1, to = wcd_dims[[2]], by = 1),
      each = wcd_dims[[1]]
    )

    data.frame(
      chain = factor(chain),
      cluster = cluster,
      iteration = iteration,
      value = value
    )
  } else {
    NULL
  }
}

tidy_augmented_data <- function(augmented_data, chain, items, aug_thinning) {
  # Tidy augmented data, or delete
  if (!is.null(augmented_data) && prod(dim(augmented_data)) != 0) {
    augdata_dims <- dim(augmented_data)

    # Item1, Item2, ..., Item1, Item2, ..., Item1, Item2, ..., Item1, Item2
    # Assessor1, Assessor1, ..., Assessor2, Assessor2, ... Assessor1, Assessor1, ..., Assessor2, Assessor2
    # Iteration1, Iteration1, ..., Iteration1, Iteration1, ..., Iteration2, Iteration2, ... Iteration2, Iteration2
    value <- c(augmented_data)

    item <- rep(items, times = augdata_dims[[2]] * augdata_dims[[3]])
    item <- factor(item, levels = items)

    assessor <- rep(seq(from = 1, to = augdata_dims[[2]], by = 1),
      each = augdata_dims[[1]],
      times = augdata_dims[[3]]
    )

    iteration <- rep(seq(from = 1, to = augdata_dims[[3]] * aug_thinning, by = aug_thinning),
      each = augdata_dims[[1]] * augdata_dims[[2]]
    )

    data.frame(
      chain = factor(chain),
      iteration = iteration,
      assessor = assessor,
      item = item,
      value = value
    )
  } else {
    NULL
  }
}


tidy_error_probability <- function(theta, chain) {
  theta_length <- length(theta)

  if (theta_length > 0) {
    data.frame(
      chain = factor(chain),
      iteration = seq(from = 1, to = theta_length, by = 1),
      value = c(theta)
    )
  } else {
    NULL
  }
}

Try the BayesMallows package in your browser

Any scripts or data that you put into this service are public.

BayesMallows documentation built on Sept. 11, 2024, 5:31 p.m.