R/plotCRT.R
In CRTspat: Workflow for Cluster Randomised Trials with Spillover

Documented in CRTwrite plotCRT

#' Graphical displays of the geography of a CRT
#'
#' \code{plotCRT} returns graphical displays of the geography of a CRT
#' or of the results of statistical analyses of a CRT
#' @param object object of class \code{'CRTanalysis'} produced by \code{CRTanalysis()}
#' @param map logical: indicator of whether a map is required
#' @param distance measure of distance or surround with options: \cr
#' \tabular{ll}{
#' \code{"nearestDiscord"} \tab distance to nearest discordant location (km)\cr
#' \code{"disc"} \tab disc\cr
#' \code{"hdep"} \tab Tukey's half space depth\cr
#' \code{"sdep"} \tab simplicial depth\cr
#' }
#' @param fill fill layer of map with options:
#' \tabular{ll}{
#' \code{'cluster'} \tab cluster assignment \cr
#' \code{'arms'}   \tab arm assignment \cr
#' \code{'nearestDiscord'} \tab distance to the nearest discordant location\cr
#' \code{'disc'} \tab disc measure of surround\cr
#' \code{'hdep'} \tab Tukey's half space depth\cr
#' \code{'sdep'} \tab simplicial depth\cr
#' \code{'prediction'}\tab model prediction of the outcome \cr
#' \code{'none'}\tab No fill \cr
#' }
#' @param showLocations logical: determining whether locations are shown
#' @param showClusterLabels logical: determining whether the cluster numbers are shown
#' @param showClusterBoundaries logical: determining whether cluster boundaries are shown
#' @param showBuffer logical: whether a buffer zone should be overlayed
#' @param cpalette colour palette (to use different colours for clusters this must be at
#' least as long as the number of clusters.
#' @param buffer_width width of buffer zone to be overlayed (km)
#' @param maskbuffer radius of buffer around inhabited areas (km)
#' @param labelsize size of cluster number labels
#' @param legend.position (using \code{ggplot2::themes} syntax)
#' @return graphics object produced by the \code{ggplot2} package
#' @importFrom magrittr %>%
#' @importFrom dplyr distinct group_by summarize
#' @importFrom ggplot2 geom_polygon
#' @importFrom ggplot2 aes
#' @details
#' If \code{map = FALSE} and the input is a trial data frame or a \code{CRTsp} object,
#' containing a randomisation to arms, a stacked bar chart of the outcome
#' grouped by the specified \code{distance} is produced. If the specified \code{distance}
#' has not yet been calculated an error is returned.\cr\cr
#' If \code{map = FALSE} and the input is a \code{CRTanalysis} object a plot of the
#' estimated spillover function is generated. The fitted spillover function is plotted
#' as a continuous blue line against the measure
#' the surround or of the distance to the nearest discordant location. Using the same axes, data summaries are plotted for
#' ten categories of distance from the boundary. Both the
#' average of the outcome and confidence intervals are plotted.
#' \itemize{
#' \item For analyses with logit link function the outcome is plotted as a proportion. \cr
#' \item For analyses with log or cloglog link function the data are plotted on a scale of the Williams mean
#' (mean of exp(log(x + 1))) - 1) rescaled so that the median matches the fitted curve at the midpoint.\cr
#' }
#' If \code{map = TRUE} a thematic map corresponding to the value of \code{fill} is generated.
#' \itemize{
#' \item \code{fill = 'clusters'} or leads to thematic map showing the locations of the clusters
#' \item \code{fill = 'arms'} leads to a thematic map showing the geography of the randomization
#' \item \code{fill = 'distance'} leads to a raster plot of the distance to the nearest discordant location.
#' \item \code{fill = 'prediction'} leads to a raster plot of predictions from an \code{'INLA'} model.
#' }
#' If \code{showBuffer = TRUE} the map is overlaid with a grey transparent layer showing which
#' areas are within a defined distance of the boundary between the arms. Possibilities are:
#' \itemize{
#' \item If the trial has not been randomised or if \code{showBuffer = FALSE} no buffer is displayed
#' \item If \code{buffer_width} takes a positive value then buffers of this width are
#' displayed irrespective of any pre-specified or spillover limits.
#' \item If the input is a \code{'CRTanalysis'} and spillover limits have been estimated by
#' an \code{'LME4'} or \code{'INLA'} model then these limits are used to define the displayed buffer.
#' \item If \code{buffer_width} is not specified and no spillover limits are available, then any
#' pre-specified buffer (e.g. one generated by \code{specify_buffer()}) is displayed.
#' }
#' A message is output indicating which of these possibilities applies.
#' @export
#' @importFrom ggplot2 aes alpha
#' @examples
#' {example <- readdata('exampleCRT.txt')
#' #Plot of data by distance
#' plotCRT(example)
#' #Map of locations only
#' plotCRT(example, map = TRUE, fill = 'none', showLocations = TRUE,
#'            showClusterBoundaries=FALSE, maskbuffer=0.2)
#' #show cluster boundaries and number clusters
#' plotCRT(example, map = TRUE, fill ='none', showClusterBoundaries=TRUE,
#'            showClusterLabels=TRUE, maskbuffer=0.2, labelsize = 2)
#' #show clusters in colour
#' plotCRT(example, map = TRUE, fill = 'clusters', showClusterLabels = TRUE,
#'           labelsize=2, maskbuffer=0.2)
#' #show arms
#' plotCRT(example, map = TRUE,
#' fill = 'arms', maskbuffer=0.2, legend.position=c(0.8,0.8))
#' #spillover plot
#' analysis <- CRTanalysis(example)
#'  plotCRT(analysis, map = FALSE)
#' }
#' @export
plotCRT <- function(object, map = FALSE, distance = "nearestDiscord", fill = "arms", showLocations = FALSE,
    showClusterBoundaries = TRUE, showClusterLabels = FALSE, showBuffer = FALSE,
    cpalette = NULL, buffer_width = NULL, maskbuffer = 0.2, labelsize = 4,
    legend.position = NULL) {

    control_curve <- intervention_curve <- scale_par <- buffer <- g <- NULL
    if (is.null(legend.position)) legend.position <- "none"
    if (!isa(object, what = 'CRTanalysis')) object <- CRTsp(object)
    trial <- object$trial
    if (is.null(trial)) {
        stop("*** No data points for plotting ***")
    }
    if (isa(object, what = 'CRTanalysis')) {
        distance <- object$options$distance
        scale_par <- object$options$scale_par
    } else {
        scale_par <- object$design[[distance]]$scale_par
    }
    distanceText <-  getDistanceText(distance = distance, scale_par = scale_par)
    if (!map) {
        if (isa(object, what = 'CRTanalysis')) {
            # if the object is the output from analysisCRT
            analysis <- object
            if (is.null(analysis$spillover$FittedCurve))
                stop("*** No fitted curve available ***")
            d <- average <- upper <- lower <- spilloverFunction <- NULL
            interval <- analysis$spillover$spillover_limits
            range <- max(analysis$trial[[distance]]) - min(analysis$trial[[distance]])
            data <- group_data(analysis = analysis, grouping = "quintiles")
            FittedCurve <- analysis$spillover$FittedCurve
            fitted_median <- median(c(FittedCurve$control_curve,FittedCurve$intervention_curve),na.rm = TRUE)
            data_median <- median(data$average)
            if (analysis$options$link %in% c('log', 'cloglog')) {
                scale_factor <- fitted_median/data_median
                data_scaled <- data.frame(
                                    d = data$d,
                                    arm = data$arm,
                                    average = data$average * scale_factor,
                                    lower = data$lower * scale_factor,
                                    upper = data$upper * scale_factor)
            } else {
                data_scaled <- data
            }
            g <- ggplot2::ggplot() + ggplot2::theme_bw()
            g <- g + ggplot2::geom_line(data = FittedCurve[!is.na(FittedCurve$control_curve), ],
                            ggplot2::aes(x = d, y = control_curve), linewidth = 2, colour = "#b2df8a")
            g <- g + ggplot2::geom_line(data = FittedCurve[!is.na(FittedCurve$intervention_curve), ],
                            ggplot2::aes(x = d, y = intervention_curve), linewidth = 2, colour = "#0072A7")
            g <- g + ggplot2::geom_point(data = data_scaled, ggplot2::aes(x = d, y = average,
                                        shape=factor(arm)), size = 2)
            g <- g + ggplot2::scale_shape_manual(name = "Arms", values = c(0, 16),
                                                 labels = c("Control", "Intervention"))
            g <- g + ggplot2::theme(legend.position = legend.position)
            g <- g + ggplot2::geom_errorbar(data = data_scaled, mapping = ggplot2::aes(x = d, ymin = upper,
                                        ymax = lower), linewidth = 0.5, width = range/50)
            if (identical(analysis$options$distance, "nearestDiscord")) {
                g <- g + ggplot2::geom_vline(xintercept = 0, linewidth = 1, linetype = "dashed")
                if (analysis$options$cfunc %in% c("L","P","S")) {
                    g <- g + ggplot2::geom_vline(xintercept = interval, linewidth = 1)
                    g <- g + ggplot2::geom_rect(data = NULL, ggplot2::aes(xmin = interval[1], xmax = interval[2],
                                                 ymin = -Inf, ymax = Inf), fill = alpha("#2C77BF", 0.2))
                }
            }
            g <- g + ggplot2::xlab(distanceText)
            g <- g + ggplot2::ylab("Outcome")
        } else {
            if (is.null(object$trial[[distance]])) {
                stop(paste0("*** First use compute_distance() to calculate ", distance, "***"))
            }
            # Plot of data by distance
            dcat <- value <- NULL
            if (is.null(object$trial$num)) {
                return(plot(object$trial))
            }
            analysis <- CRTanalysis(trial = object$trial, method = "EMP")
            data <- group_data(analysis = analysis, distance = distance, grouping = "equalwidth")
            data$dcat <- with(analysis, min(trial[[distance]]) +
                                 (data$cat - 0.5) * (max(trial[[distance]]) - min(trial[[distance]]))/10)
            data <- tidyr::gather(data[, c("dcat", "locations", "total",
                           "positives")], key = 'variable', value = 'value', -dcat, factor_key = TRUE)
            levels(data$variable) <- c("Locations", "Sum of denominators", "Sum of numerators")
            g <- ggplot2::ggplot(data = data) +
                ggplot2::theme_bw() +
                ggplot2::geom_bar(aes(x = dcat, y = value), colour = NA, fill = "lightgrey", stat = "identity") +
                ggplot2::geom_vline(xintercept = 0, linewidth = 1, linetype = "dashed") +
                ggplot2::xlab(distanceText) +
                ggplot2::ylab(ggplot2::element_blank()) +
                ggplot2::facet_wrap( ~ variable, ncol = 1, scales = "free")

        }
    } else {
        colourClusters <- identical(fill, "clusters")
        showArms <- identical(fill, "arms")
        if (isa(object, what = 'CRTanalysis')) {
            analysis <- object
            spillover_limits <- analysis$spillover$spillover_limits
             if(!(fill %in% c("arms", "clusters"))){
                # raster map derived from inla analysis
                x <- y <- prediction <- nearestDiscord <- NULL
                g <- ggplot2::ggplot()
                if (!identical(analysis$options$method, "INLA")) {
                    stop("*** Raster plots only available for outputs from INLA analysis ***")
                } else {
                    pixel <- analysis$inla_mesh$pixel
                    raster <- analysis$inla_mesh$prediction
                    if (identical(fill, "prediction")) distanceText <- "Prediction"
                    if (is.null(raster[[fill]])){
                        stop("*** Requested measure not available for this analysis ***")
                    } else {
                        raster$fill <- raster[[fill]]
                    }
                    g <- g + ggplot2::geom_tile(data = raster, aes(x = x, y = y,
                         fill = fill, width = pixel, height = pixel))
                    g <- g + ggplot2::scale_fill_gradient(name = distanceText,
                                                              low = "blue", high = "orange")
                    g <- g + ggplot2::theme(legend.title = ggplot2::element_text(size = 8),
                            legend.text = ggplot2::element_text(size = 8))
                }
             }
        }
        # vector plot starts here
        arm <- cluster <- x <- y <- NULL
        xlim <- c(min(trial$x - maskbuffer), max(trial$x + maskbuffer))
        ylim <- c(min(trial$y - maskbuffer), max(trial$y + maskbuffer))

        # The plotting routines require unique locations
        CRT <- aggregateCRT(trial)

        # The plotting routines use (x,y) coordinates
        if (is.null(CRT$trial$x)) {
            CRT <- latlong_as_xy(CRT)
        }
        trial <- CRT$trial

        # Adjust the required plots to exclude those for which there is no
        # data or combinations that are too cluttered or overprinted

        if (is.null(trial$cluster)) {
            trial$cluster <- rep(1, nrow(trial))
            showClusterBoundaries <- FALSE
            showClusterLabels <- FALSE
            colourClusters <- FALSE
        }
        if (is.null(trial$arm)) {
            trial$arm <- 0
            showArms <- FALSE
        }
        if (!showClusterBoundaries) {
            showClusterLabels <- FALSE
        }

        totalClusters <- length(unique(trial$cluster))

        if (is.null(cpalette))
            cpalette <- sample(rainbow(totalClusters))
        if (totalClusters == 1)
            cpalette <- c("white")

        if (showBuffer) trial <- modifyBuffer(trial = trial, buffer_width = buffer_width,
                                              spillover_limits = spillover_limits)
        if (is.null(trial$buffer)) showBuffer <- FALSE

        sf_objects <- sf_objects(trial = trial, maskbuffer = maskbuffer)

        if (is.null(g))
            g <- ggplot2::ggplot()
        if (colourClusters) {
            g <- g + ggplot2::geom_sf(data =  sf_objects$clusters, aes(fill = cluster), fill = cpalette,
                alpha = 0.8)
        }
        if (showArms) {
            g <- g + ggplot2::geom_sf(data =  sf_objects$arms, aes(fill = arm))
            # use standard colour-blind compatible palette
            g <- g + ggplot2::scale_fill_manual(name = "Arms", values = c("#b2df8a",
                "#1f78b4"), labels = c("Control", "Intervention"))
        }
        if (showBuffer) {
            # whether the point is within the buffer
            g <- g + ggplot2::geom_sf(data =  sf_objects$buffer, aes(alpha = buffer),
                               color = NA, fill = "black", show.legend = FALSE)
            g <- g + ggplot2::scale_alpha_manual(name = "Buffer", values = c(0, 0.2))
        }
        if (showClusterBoundaries) {
            g <- g + ggplot2::geom_sf(data =  sf_objects$clusters, color = "black", fill = NA)
        }
        g <- g + ggplot2::geom_sf(data = sf_objects$mask, fill = "grey")
        # Labels
        if (showClusterLabels) {
            showLocations <- FALSE
            # Positions of centroids of clusters for locating the labels
            cc <- data.frame(trial %>%
                                 dplyr::group_by(cluster) %>%
                                 dplyr::summarize(x = mean(x), y = mean(y), .groups = "drop"))
            g <- g + ggplot2::geom_text(data = cc, aes(x = x, y = y, label = cluster),
                                        hjust = 0.5, vjust = 0.5, size = labelsize)
        }
        if (showLocations) {
            g <- g + ggplot2::geom_point(data = trial, aes(x = x, y = y), size = 0.5)
        }
        g <- g + ggplot2::theme(legend.position = legend.position)
        g <- g + ggplot2::theme(panel.border = ggplot2::element_blank())
        g <- g + ggplot2::theme(axis.title = ggplot2::element_blank())
        g <- g + ggplot2::coord_sf(expand = FALSE, xlim = xlim, ylim = ylim)
    }
    return(g)
}

# Create simple feature objects either for plotting or export
sf_objects <- function(trial, maskbuffer, crs = "Euclidean",
                       centroid = NULL) {
    clusters <- arms <- buffer <- cluster <- arm <- x <- y <- NULL
    if (!identical(crs, "Euclidean") & !is.null(centroid)) {
    # convert coordinates to radians and apply crs of "WGS84"
    # scalef is is the number of degrees per kilometer
        scalef <- 180/(6371*pi)
        crs <- 4326
        trial$lat <- trial$y*scalef + centroid$lat
        trial$long <- trial$x*scalef + centroid$long
        coords <- c("lat", "long")
        xlim <- c(min(trial$x - maskbuffer * scalef),
                  max(trial$x + maskbuffer * scalef))
        ylim <- c(min(trial$y - maskbuffer * scalef),
                  max(trial$y + maskbuffer * scalef))
        # create pts
        pts <- tidyr::tibble(lat = trial$lat, long = trial$long) %>%
            sf::st_as_sf(coords = coords) %>%
            sf::st_set_crs(crs)
    } else {
        scalef <- 1
#        coords <- c("y", "x")
        coords <- c("x", "y")
        xlim <- c(min(trial$x - maskbuffer), max(trial$x + maskbuffer))
        ylim <- c(min(trial$y - maskbuffer), max(trial$y + maskbuffer))
        # create pts
        pts <- tidyr::tibble(y = trial$y, x = trial$x) %>%
            sf::st_as_sf(coords = coords)
    }
    tr <- sf::st_as_sf(trial, coords = coords) %>%
            sf::st_set_crs(crs)
    # voronoi of pts- if the coordinates are lat long this would generate a
    # warning, but the issue is trivial if the area is small or near the equator
    suppressWarnings(
        vor <- sf::st_voronoi(sf::st_combine(tr)) %>%
        sf::st_collection_extract("POLYGON") %>%
        sf::st_as_sf() %>%
        sf::st_set_crs(crs)
    )
    if (!is.null(trial$cluster)) {
        clusters <- vor %>%
            sf::st_join(tr, sf::st_intersects) %>%
            dplyr::group_by(cluster) %>%
            dplyr::summarize()
    }
    if (!is.null(trial$arm)) {
        arms <- vor %>%
            sf::st_join(tr, sf::st_intersects) %>%
            dplyr::group_by(arm) %>%
            dplyr::summarize()
    }
    # buffer zone
    if (!is.null(trial$buffer)) {
        buffer_tr <- tr[tr$buffer,,drop=FALSE]
        buffer <- vor %>%
            sf::st_join(tr, sf::st_intersects) %>%
            dplyr::group_by(buffer) %>%
            dplyr::summarize()

 #       sf::st_collection_extract("POLYGON") %>%
 #           sf::st_combine() %>%
 #           sf::st_as_sf() %>%
 #           sf::st_set_crs(crs)
    }

    # mask for excluded areas the mask needs to extend outside the plot
    # area
    x0 <- xlim[1] - 0.5 * scalef
    x1 <- xlim[2] + 0.5 * scalef
    y0 <- ylim[1] - 0.5 * scalef
    y1 <- ylim[2] + 0.5 * scalef
    bbox <- sf::st_polygon(list(cbind(x = c(x0, x1, x1, x0, x0), y = c(y0,
                                                                       y0, y1, y1, y0))))
    bbox <- sf::st_sfc(bbox)
    tr <- sf::st_as_sf(trial, coords = coords)
    buf1 <- sf::st_buffer(tr, maskbuffer * scalef)
    buf2 <- sf::st_union(buf1)
    mask <- sf::st_difference(bbox, buf2)
    sf_objects <- list(clusters = clusters,
                      arms = arms,
                      buffer = buffer,
                      mask = mask)
return(sf_objects)
}

#' Export of GIS layer from \code{'CRTsp'}
#'
#' \code{CRTwrite} exports a simple features object in a GIS format
#' @param object object of class \code{'CRTsp'}
#' @param dsn dataset name (relative path) for output objects
#' @param feature feature to be exported, options are:
#' \tabular{ll}{
#' \code{'cluster'}\tab cluster assignments \cr
#' \code{'arms'}\tab arm assignments \cr
#' \code{'buffer'}\tab buffer zone or spillover zone\cr
#' \code{'mask'}\tab mask for areas that are distant from habitations \cr
#' }
#' @param buffer_width width of buffer between discordant locations (km)
#' @param maskbuffer radius of buffer drawn around inhabited areas (km)
#' @param ... other arguments passed to \code{'sf::write_sf'}
#' @return \code{obj}, invisibly
#' @details
#' \code{'sf::write_sf'} is used to format the output. The function returns TRUE on success,
#' FALSE on failure, invisibly. \cr\cr
#' If the input object contains a \code{'centroid'} then this is used to compute lat long
#' coordinates, which are assigned the "WGS84" coordinate reference system.
#' Otherwise the objects have equirectangular co-ordinates with centroid (0,0).\cr\cr
#' If \code{feature = 'buffer'} then buffer width determination is as described under
#' \code{plotCRT()}.
#' \cr\cr
#' The output vector objects are constructed by forming a Voronoi tessellation of polygons around
#' each of the locations and combining these polygons. The polygons on the outside of the study area
#' extend outwards to an external rectangle. The \code{'mask'} is used to mask out the areas of
#' these polygons that are at a distance > \code{maskbuffer} from the nearest location.
#' @examples
#' \donttest{
#'         tmpdir = tempdir()
#'         dsn <- paste0(tmpdir,'/arms')
#'         CRTwrite(readdata('exampleCRT.txt'), dsn = dsn, feature = 'arms',
#'         driver = 'ESRI Shapefile', maskbuffer = 0.2)
#'     }
#' @export
CRTwrite <- function(object, dsn, feature = 'clusters', buffer_width, maskbuffer = 0.2, ...){
    spillover_limits <- NULL
    if (isa(object, what = 'CRTanalysis')) {
        spillover_limits <- ifelse(is.null(object$spillover$spillover_limits), NULL,
                                       object$spillover$spillover_limits)
        object <- object$trial
    }
    object <- CRTsp(object)
    centroid <- object$geom_full$centroid
    trial <- object$trial
    if (identical(feature,"buffer")) {
        trial <- modifyBuffer(trial = trial, buffer_width = buffer_width,
                  spillover_limits = spillover_limits)
        if (is.null(trial$buffer)) stop("No buffer available for export")
    }
    sf_objects <- sf_objects(trial = trial, maskbuffer = maskbuffer,
                             crs = "WGS84", centroid = centroid)
    sf::st_write(sf_objects[[feature]], dsn = dsn, ...)
}

modifyBuffer <- function(trial, buffer_width, spillover_limits){
    if (!is.null(trial$nearestDiscord)){
        if (!is.null(buffer_width)){
            trial$buffer <-  ifelse(dplyr::between(trial$nearestDiscord,
                               -buffer_width, buffer_width), TRUE, FALSE)
    message("Buffer includes locations within ", buffer_width*1000, "m of the opposing arm")
        } else if(!is.null(spillover_limits)) {
            trial$buffer <-  ifelse(dplyr::between(trial$nearestDiscord,
            spillover_limits[1], spillover_limits[2]), TRUE, FALSE)
            message("Buffer corresponds to estimated spillover zone")
        } else {
            if (!is.null(trial$buffer)) {
                message("Buffer corresponds pre-specified buffer zone")
            } else {
                message("No buffer available")
            }
        }
    } else {
        message("No buffer shown: distances to discordant locations are unavailable")
    }
return(trial)
}