Nothing
#' Calculate and plot posterior densities of the projected estimates
#'
#' The function \code{ridgePlot} replaces the previous function name \code{getSmoothedDensity} (before version 1.0.0).
#'
#' @param x output from \code{\link{smoothDirect}} for the smoothed direct estimates, or \code{\link{smoothCluster}} for the cluster-level estimates.
#' @param nsim number of posterior draws to take. Only used for cluster-level models when \code{draws} is NULL. Otherwise the posterior draws in \code{draws} will be used instead without resampling.
#' @param draws Output of \code{\link{getSmoothed}} with \code{save.draws} set to TRUE. This argument allows the previously sampled draws (by setting \code{save.draws} to be TRUE) be used in new aggregation tasks. This argument is only used for cluster-level models.
#' @param year.plot A vector indicate which years to plot
#' @param year_plot `r lifecycle::badge("deprecated")` replaced by year.plot
#' @param strata.plot Name of the strata to plot. If not specified, the overall is plotted.
#' @param strata_plot `r lifecycle::badge("deprecated")` replaced by strata.plot
#' @param by.year logical indicator for whether the output uses years as facets.
#' @param ncol number of columns in the output figure.
#' @param scale numerical value controlling the height of the density plots.
#' @param per1000 logical indicator to multiply results by 1000.
#' @param order order of regions when by.year is set to TRUE. Negative values indicate regions are ordered from high to low posterior medians from top to bottom. Positive values indicate from low to high. 0 indicate alphabetic orders.
#' @param direction Direction of the color scheme. It can be either 1 (smaller values are darker) or -1 (higher values are darker). Default is set to 1.
#' @param linewidth width of the ridgeline.
#' @param results output from \code{\link{ridgePlot}} returned object with \code{save.density = TRUE}. This argument can be specified to avoid calculating densities again when only the visualization changes.
#' @param save.density Logical indicator of whether the densities will be returned with the ggplot object. If set to TRUE, the output will be a list consisting of (1) a data frame of computed densities and (2) a ggplot object of the plot.
#' @param ... additional configurations passed to inla.posterior.sample.
#'
#' @return ridge plot of the density, and if \code{save.density = TRUE}, also a data frame of the calculated densities
#' @seealso \code{\link{plot.SUMMERproj}}
#' @importFrom methods as
#' @importFrom stats rnorm
#' @author Zehang Richard Li
#' @examples
#' \dontrun{
#' years <- levels(DemoData[[1]]$time)
#'
#' data <- getDirectList(births = DemoData,
#' years = years,
#' regionVar = "region", timeVar = "time",
#' clusterVar = "~clustid+id",
#' ageVar = "age", weightsVar = "weights",
#' geo.recode = NULL)
#' # obtain direct estimates
#' data_multi <- getDirectList(births = DemoData, years = years,
#' regionVar = "region", timeVar = "time", clusterVar = "~clustid+id",
#' ageVar = "age", weightsVar = "weights", geo.recode = NULL)
#' data <- aggregateSurvey(data_multi)
#'
#' # national model
#' years.all <- c(years, "15-19")
#' fit1 <- smoothDirect(data = data, geo = NULL, Amat = NULL,
#' year.label = years.all, year.range = c(1985, 2019),
#' rw = 2, m = 5)
#' ## Plot marginal posterior densities over time
#' ridgePlot(fit1, year.plot = years.all,
#' ncol = 4, by.year = FALSE)
#'
#' # subnational model
#' fit2 <- smoothDirect(data = data, geo = DemoMap$geo, Amat = DemoMap$Amat,
#' year.label = years.all, year.range = c(1985, 2019),
#' rw = 2, m = 5, type.st = 1)
#'
##' # Plot marginal posterior densities over time (regions are ordered alphabetically)
#' ridgePlot(fit2, year.plot = years.all, ncol = 4)
#'
##' # Re-order the regions and save the density to avoid re-compute later
#' density <- ridgePlot(fit2, year.plot = years.all,
#' ncol = 4, per1000 = TRUE, order = -1, save.density = TRUE)
#' density$g
#'
#' # Show each region (instead of each year) in a panel
#' ## Instead of recalculate the posteriors, we can use previously calculated densities as input
#' ridgePlot(results = density, year.plot = years.all,
#' ncol = 4, by.year=FALSE, per1000 = TRUE)
#'
#' # Show more years
#' ridgePlot(results = density, year.plot = c(1990:2019),
#' ncol = 4, by.year=FALSE, per1000 = TRUE)
#'
#'
#' # Example using surveyPrev package output
#'
#' library(surveyPrev)
#' dhsData <- getDHSdata(country = "Rwanda", indicator = "nmr", year = 2019)
#' data <- getDHSindicator(dhsData, indicator = "nmr")
#' geo <- getDHSgeo(country = "Rwanda", year = 2019)
#' poly.adm1 <- geodata::gadm(country="RWA", level=1, path=tempdir())
#' poly.adm1 <- sf::st_as_sf(poly.adm1)
#' poly.adm2 <- geodata::gadm(country="RWA", level=2, path=tempdir())
#' poly.adm2 <- sf::st_as_sf(poly.adm2)
#' cluster.info <- clusterInfo(geo = geo,
#' poly.adm1 = poly.adm1,
#' poly.adm2 = poly.adm2,
#' by.adm1 = "NAME_1",
#' by.adm2 = "NAME_2")
#'
#' fit1 <- directEST(data = data, cluster.info = cluster.info, admin = 1)
#' fit2 <- directEST(data = data, cluster.info = cluster.info, admin = 2)
#' ridgePlot(fit1, direction = -1)
#' ridgePlot(fit2, direction = -1)
#'
#' }
#'
#' @export
ridgePlot <- function(x=NULL, nsim = 1000, draws = NULL, year.plot = NULL, year_plot = deprecated(), strata.plot = NULL, strata_plot = deprecated(), by.year = TRUE, ncol = 4, scale = 2, per1000 = FALSE, order = 0, direction = 1, linewidth = 0.5, results = NULL, save.density = FALSE, ...){
if (lifecycle::is_present(year_plot)) {
lifecycle::deprecate_soft("2.0.0", "ridgePlot(year_plot)", "ridgePlot(year.plot)")
year.plot <- year_plot
}
if (lifecycle::is_present(strata_plot)) {
lifecycle::deprecate_soft("2.0.0", "ridgePlot(strata_plot)", "ridgePlot(strata.plot)")
strata.plot <- strata_plot
}
years <- y <- `..x..` <- region <- value <- region.name <- admin2.name.short <- NA
# FOR SURVEYPREV INPUT
if(class(x) %in% c("fhModel", "clusterModel", "directEST")){
x_att <- attributes(x)
domain.names <- x_att$domain.names
# USING SURVEYPREV CLASSES: smoothed model
if(x_att$class %in% c("fhModel", "clusterModel")){
if ("admin2_post" %in% x_att$names){
samples = x$admin2_post
}else{
samples = x$admin1_post
}
# USING SURVEYPREV CLASSES: direct est
}else{
if("res.admin1" %in% names(x)){
domain.names <- x$res.admin1$admin1.name
samples <- matrix(NA, nsim, length(domain.names))
for(i in 1:dim(x$res.admin1)[1]){
samples[, i] <- expit(rnorm(nsim, mean = x$res.admin1$direct.logit.est[i],
sd = (x$res.admin1$direct.logit.prec[i])^(-1/2)))
}
}else{
domain.names <- x$res.admin2$admin2.name.full
samples <- matrix(NA, nsim, length(domain.names))
for(i in 1:dim(x$res.admin2)[1]){
samples[, i] <- expit(rnorm(nsim, mean = x$res.admin2$direct.logit.est[i],
sd = (x$res.admin2$direct.logit.prec[i])^(-1/2)))
}
}
}
# samples is now a nsamp * nregion matrix
samples.long <- data.frame(region.name = rep(domain.names, each = nrow(samples)),
value = as.numeric(samples))
if("res.admin2" %in% names(x)){
upper <- x$res.admin2[, c("admin2.name.full", "admin1.name")]
upper$admin2.name.short <- NA
for(i in 1:dim(upper)[1]){
k <- nchar(as.character(upper$admin1.name[i]))
upper$admin2.name.short[i] <- substr(upper$admin2.name.full[i],
start = k+2,
stop = nchar(upper$admin2.name.full[i]))
}
colnames(upper) <- c("region.name", "group.name", "admin2.name.short")
samples.long <- dplyr::left_join(samples.long, upper)
}
n.levels <- dim(samples)[2]
ridge.max <- max(samples.long$value)*1.03
if(ridge.max>0.95){ridge.max=1}
ridge.min <- min(samples.long$value)*0.97
if(ridge.min<0.05){ridge.min=0}
g <- ggplot2::ggplot(samples.long) +
aes(x = value, y = region.name) +
ggridges::geom_density_ridges_gradient(aes(fill = ggplot2::after_stat(x))) +
ggplot2::scale_fill_viridis_c(lim = c(ridge.min,ridge.max), direction = direction) + ggplot2::theme(legend.position = "none") + xlab("") + ylab("")
if("group.name" %in% colnames(samples.long)){
g <- g + aes(y = admin2.name.short) + ggplot2::facet_wrap(~group.name, scale = "free_y")
}
return(g)
}
if (!isTRUE(requireNamespace("INLA", quietly = TRUE))) {
stop("You need to install the packages 'INLA'. Please run in your R terminal:\n install.packages('INLA', repos=c(getOption('repos'), INLA='https://inla.r-inla-download.org/R/stable'), dep=TRUE)")
}
# If INLA is installed, then attach the Namespace (so that all the relevant functions are available)
if (isTRUE(requireNamespace("INLA", quietly = TRUE))) {
if (!is.element("INLA", (.packages()))) {
attachNamespace("INLA")
}
is.density <- FALSE
if(!is.null(results)){
results <- results$data
region_names <- unique(results$region)
timelabel.yearly <- levels(results$years)
if(order != 0) warning("Plotting pre-calculated densities, order argument is ignored.")
is.density <- "y" %in% colnames(results)
}else{
Amat <- x$Amat
if(is.null(Amat) && is.null(draws)){
region_names <- "All"
region_nums <- 0
}else if(!is.null(draws)){
region_names <- unique(draws$overall$region)
region_nums <- 1:length(region_names)
}else{
region_names <- colnames(Amat)
region_nums <- 1:length(region_names)
}
########################
## Binomial methods
########################
# when posterior draws exist, make that into x
if(!is.null(draws)){
if("draws.est" %in% draws == FALSE && is(x, "list")) stop("draws argument is not correctly specified. It should be the whole output of getSmoothed() function.")
message("Use posterior draws from input.")
x <- draws
# when draws does not exist
}else if(is.null(x)){
# only take draws for cluster-level models
if(!is.null(x$family)){
message("Draws not specified. Use getSmoothed() to calculate posterior draws. Please be aware this does not take into account strata weighting.")
draws <- getSmoothed(x, Amat = Amat, nsim = nsim, save.draws = TRUE)
# overwrite x...
x <- draws
}
}else if(is.null(x$fit)){
stop("Neither fitted object nor posterior draws are provided.")
}
# at this point x is an object from getSmoothed or smoothDirect
##
## If the input is from getSmoothed
##
if((is(x, "list") || is(x, "SUMMERprojlist")) && is.null(x$fit)){
is.density <- FALSE
if(is.null(x$draws.est.overall)){
stop("Posterior draws not found. Please rerun getSmoothed() with save.draws = TRUE.")
}
tmp <- NULL
if(is.null(strata.plot)){
draws.plot <- x$draws.est.overall
}else{
draws.plot <- NULL
counter <- 1
for(i in 1:length(x$draws.est)){
if(x$draws.est[[i]]$strata == draws.plot){
draws.plot[[counter]] <- x$draws.est[[i]]
counter <- counter + 1
}
}
}
# draw.est is ordered
for(i in 1:length(draws.plot)) {
if(draws.plot[[i]]$years %in% tmp) next
tmp <- c(tmp, draws.plot[[i]]$years)
}
year.label <- tmp
timelabel.yearly <- year.label
results <- NULL
for(i in 1:length(timelabel.yearly)){
for(j in 1:length(region_names)){
draw_est_j <- draws.plot[lapply(draws.plot, '[[',"region")==region_names[j]]
draw_est_ij <- draw_est_j[lapply(draw_est_j, '[[', "years")==timelabel.yearly[i]]
tmp <- data.frame(draws = draw_est_ij[[1]]$draws)
tmp$region <- region_nums[j]
tmp$years <- timelabel.yearly[i]
results <- rbind(results, tmp)
}
}
results$years.num <- suppressWarnings(as.numeric(as.character(results$years)))
results$x <- results$draws
if(region_names[1] != "All"){
results$region <- region_names[results$region]
}else{
results$region <- "All"
}
results$years <- factor(results$years, levels = timelabel.yearly)
# reorder areas
if(order != 0 && by.year && length(region_names) > 1){
tmp <- data.frame(region = region_names, median = NA)
for(j in 1:length(region_names)){
tmp$median[j]<- median(results[results$region == region_names[j] & results$years == timelabel.yearly[length(timelabel.yearly)], ]$draws, na.rm = T)
}
tmp <- tmp[order(tmp$median, decreasing = (order > 0)), ]
results$region <- factor(results$region, levels = tmp$region)
}else{
results$region <- factor(results$region, rev(sort(as.character(unique(results$region)))))
}
########################
## Mercer et al. methods
########################
}else{
is.density <- TRUE
is.yearly = x$is.yearly
year.label <- x$year.label
if(is.yearly){
timelabel.yearly <- c(x$year.range[1] : x$year.range[2], year.label)
}else{
timelabel.yearly <- year.label
}
names <- expand.grid(area = region_nums, time = timelabel.yearly)
mod <- x$fit
lincombs.info <- x$lincombs.info
results <- NULL
for(i in 1:length(timelabel.yearly)){
for(j in 1:length(region_names)){
index <- lincombs.info$Index[lincombs.info$District == region_nums[j] & lincombs.info$Year == i]
tmp <- data.frame(INLA::inla.tmarginal(expit, mod$marginals.lincomb.derived[[index]]))
tmp$region <- region_nums[j]
tmp$years <- timelabel.yearly[i]
results <- rbind(results, tmp)
}
}
results$is.yearly <- !(results$years %in% year.label)
results$years.num <- suppressWarnings(as.numeric(as.character(results$years)))
if(region_names[1] != "All"){
results$region <- region_names[results$region]
}else{
results$region <- "All"
}
results$years <- factor(results$years, levels = timelabel.yearly)
# reorder areas
if(order != 0 && by.year && length(region_names) > 1){
tmp <- data.frame(region = region_names, median = NA)
for(j in 1:length(region_names)){
index <- lincombs.info$Index[lincombs.info$District == region_nums[j] & lincombs.info$Year == length(timelabel.yearly)]
tmp$median[j]<- INLA::inla.qmarginal(0.5, mod$marginals.lincomb.derived[[index]])
}
tmp <- tmp[order(tmp$median, decreasing = (order > 0)), ]
results$region <- factor(results$region, levels = tmp$region)
}else{
results$region <- factor(results$region, rev(sort(as.character(unique(results$region)))))
}
}
}
results.plot <- results
if(per1000) results.plot$x <- 1000 * results.plot$x
if(is.null(year.plot)){
year.plot <- year.label
}
# plot calculated density
if(by.year && is.density){
g <- ggplot2::ggplot(subset(results.plot, years %in% year.plot), ggplot2::aes(x = x, y = region, height = y, fill = ..x..))
}else if(is.density){
results.plot$years <- factor(results.plot$years, levels = rev(timelabel.yearly))
g <- ggplot2::ggplot(subset(results.plot, years %in% year.plot), ggplot2::aes(x = x, y = years, height = y, fill = ..x..))
}
if(is.density) g <- g + ggridges::geom_density_ridges_gradient(stat="identity", alpha = 0.5, linewidth = linewidth)
# plot draws
if(by.year && !is.density){
g <- ggplot2::ggplot(subset(results.plot, years %in% year.plot), ggplot2::aes(x = x, y = region))
}else if(!is.density){
results.plot$years <- factor(results.plot$years, levels = rev(timelabel.yearly))
g <- ggplot2::ggplot(subset(results.plot, years %in% year.plot), ggplot2::aes(x = x, y = years))
}
if(!is.density) g <- g + ggridges::geom_density_ridges_gradient(ggplot2::aes(fill = ..x..), scale = scale, alpha = 0.5, linewidth = linewidth)
g <- g + ggplot2::scale_fill_viridis_c(option = "D", direction = direction) + ggplot2::theme_bw() + ggplot2::ylab("") + ggplot2::theme(legend.position = 'none') + ggplot2::xlab("")
if(by.year){
if(length(year.plot) > 1) g <- g + ggplot2::facet_wrap(~years, ncol = ncol)
}else{
if(length(region_names) > 1) g <- g + ggplot2::facet_wrap(~region, ncol = ncol)
}
if(save.density){
return(list(data = results, g = g))
}else{
return(g)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.