Nothing
#' Representative Sequence plot
#'
#' Function for rendering representative sequence plots with
#' \code{\link[ggplot2]{ggplot2}} \insertCite{wickham2016}{ggseqplot} instead of base
#' R's \code{\link[base]{plot}} function that is used by
#' \code{\link[TraMineR:seqplot]{TraMineR::seqplot}} \insertCite{gabadinho2011}{ggseqplot}.
#'
#' @inheritParams ggseqdplot
#' @inheritParams TraMineR::seqrep
#' @param diss pairwise dissimilarities between sequences in \code{seqdata} (see \code{TraMineR::seqdist})
#' @param border if \code{TRUE} bars are plotted with black outline; default is \code{FALSE} (also accepts \code{NULL})
#' @param proportional if \code{TRUE} (default), the sequence heights are
#' displayed proportional to the number of represented sequences
#' @param stats if \code{TRUE} (default), mean discrepancy in each subset
#' defined by all sequences attributed to one representative sequence and the
#' mean distance to this representative sequence are displayed.
#' @param colored.stats specifies if representatives in stats plot should be
#' color coded; only recommended if number of representatives is small;
#' if set to \code{NULL} (default) colors are used if n rep. <= 10;
#' use \code{TRUE} or \code{FALSE} to change manually
#' @param facet_ncol specifies the number of columns in the plot (relevant if !is.null(group))
#'
#' @details
#' The representative sequence plot displays a set of distinct sequences as sequence index plot.
#' The set of representative sequences is extracted from the sequence data by an internal call of
#' \code{\link[TraMineR:seqrep]{TraMineR::seqrep}} according to the criteria listed in the
#' arguments section above.
#'
#' The extracted sequences are plotted by a call of \code{\link[ggseqplot:ggseqiplot]{ggseqiplot}} which uses
#' \code{\link[ggplot2:geom_rect]{ggplot2::geom_rect}} to render the sequences. If \code{stats = TRUE} the
#' index plots are complemented by information on the "quality" of the representative sequences.
#' For further details on representative sequence plots see \insertCite{gabadinho2011a;textual}{ggseqplot}
#' and the documentation of \code{\link[TraMineR:plot.stslist.rep]{TraMineR::plot.stslist.rep}},
#' \code{\link[TraMineR:seqplot]{TraMineR::seqplot}}, and \code{\link[TraMineR:seqrep]{TraMineR::seqrep}}.
#'
#' Note that \code{ggseqrplot} uses \code{\link[patchwork]{patchwork}} to combine the different components
#' of the plot. If you want to adjust the appearance of the composed plot, for instance by changing the
#' plot theme, you should consult the documentation material of \code{\link[patchwork]{patchwork}}.
#'
#' @return A representative sequence plot using \code{\link[ggplot2]{ggplot}}.
#' @export
#' @importFrom patchwork plot_layout
#'
#' @author Marcel Raab
#'
#' @references
#' \insertAllCited{}
#'
#' @examples
#' library(TraMineR)
#'
#' # Defining a sequence object with the data in columns 10 to 25
#' # (family status from age 15 to 30) in the biofam data set
#' data(biofam)
#' # Use sample of 300 cases
#' set.seed(123)
#' biofam <- biofam[sample(nrow(biofam),150),]
#' biofam.lab <- c("Parent", "Left", "Married", "Left+Marr",
#' "Child", "Left+Child", "Left+Marr+Child", "Divorced")
#' biofam.seq <- seqdef(biofam, 10:25, labels=biofam.lab)
#'
#' # Computing the distance matrix
#' biofam.dhd <- seqdist(biofam.seq, method="DHD")
#'
#' # Representative sequence plot (using defaults)
#' # ... with TraMineR::seqplot
#' seqrplot(biofam.seq, diss = biofam.dhd)
#'
#' # ... with ggseqrplot
#' ggseqrplot(biofam.seq, diss = biofam.dhd)
ggseqrplot <- function(seqdata,
diss,
group = NULL,
criterion = "density",
coverage = .25,
nrep = NULL,
pradius = 0.10,
dmax = NULL,
border = FALSE,
proportional = TRUE,
weighted = TRUE,
stats = TRUE,
colored.stats = NULL,
facet_ncol = NULL) {
if (!inherits(seqdata, "stslist")) {
stop("data are not stored as sequence object, use 'TraMineR::seqdef' to create one")
}
if (!is.null(nrep) && (nrep%%1!=0 || nrep == 0)) {
stop("nrep has to be a positive whole number")
}
border <- border %||% FALSE
group <- group %||% 1
if (length(unique(group)) > 10 && is.null(facet_ncol)) {
facet_ncol <- 5
}
if (length(unique(group)) <= 10 && is.null(facet_ncol)) {
facet_ncol <- c(1,2,3,2,3,3,4,4,5,5)[length(unique(group))]
}
if (inherits(group, "haven_labelled")) {
group_name <- deparse(substitute(group))
group <- haven::as_factor(group)
cli::cli_warn(c("i" = "group vector {.arg {group_name}} is of class {.cls haven_labelled} and has been converted into a factor"))
}
seq.rep <- purrr::map(
sort(unique(group)),
~ TraMineR::seqrep(seqdata[group == .x, ],
diss = diss[group == .x, group == .x],
criterion = criterion,
weighted = weighted,
coverage = coverage,
nrep = nrep,
pradius = pradius,
dmax = dmax
))
nrow.aux <- purrr::map(seq.rep, nrow) |>
unlist() |>
max()
if (is.null(colored.stats) && nrow.aux <=10) colored.stats <- TRUE
if (is.null(colored.stats) && nrow.aux > 10) colored.stats <- FALSE
if (proportional == TRUE) {
for(i in 1:length(unique(group))) {
wgt <- attributes(seq.rep[[i]])$Statistics[1:nrow(seq.rep[[i]]),2]
attributes(seq.rep[[i]])$weights <- wgt
}
}
aux <- purrr::map(seq.rep,
~ ggseqiplot(.x)$data)
ybreaks <- purrr::map(aux,
~(unique(.x$begin) + unique(.x$end)) / 2)
coverage <- purrr::map(seq.rep,
~attributes(.x)$Statistics[nrow(.x) + 1, 4])
coverage <- purrr::map(coverage, ~paste0(round(.x, 1), "%"))
rplotdata <- purrr::map(seq.rep,
~ attributes(.x)$Statistics |>
dplyr::transmute(
id = dplyr::row_number(),
`Discrepancy (mean dist. to center)` = .data$V,
`Mean dist. to representative seq.` = .data$MD
) |>
dplyr::filter(.data$id != max(.data$id)) |>
tidyr::pivot_longer(cols = -"id"))
rplotdata <- purrr::imap(sort(unique(group)),
~rplotdata[[.y]] |>
dplyr::mutate(group = .x, .before = 1))
if (colored.stats == TRUE && stats == TRUE) {
p2 <- purrr::map(rplotdata,
~ .x |>
ggplot(aes(
x = .data$value,
y = "1",
label = .data$id,
fill = factor(.data$id)
)) +
ggrepel::geom_label_repel(
size = 3, colour = "black",
alpha = .8, point.size = NA,
direction = "y"
) +
scale_y_discrete(
labels = NULL,
expand = expansion(add = 1.5)
) +
facet_wrap(vars(.data$name),
nrow = 2,
scales = "fixed"
) +
theme_minimal() +
theme(
legend.position = "none",
strip.text = element_text(size = 10),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_blank()
))
color <- purrr::map(p2,
~ggplot_build(.x)$data[[1]] |>
dplyr::group_by(.data$label) |>
dplyr::summarise(col = dplyr::first(.data$fill)) |>
dplyr::pull("col"))
labs <- purrr::map(color,
~glue::glue("<b style='color:{.x}'>{1:length(.x)}</b>"))
suppressMessages(
p1 <- purrr::map2(seq.rep, 1:length(unique(group)),
~ggseqplot::ggseqiplot(.x, border = border) +
scale_y_continuous(
breaks = ybreaks[[.y]],
labels = labs[[.y]],
guide = guide_axis(check.overlap = TRUE),
expand = expansion(add = 0)
) +
labs(subtitle = glue::glue("Coverage = {coverage[[.y]]}")) +
theme(
axis.title.y = element_blank(),
axis.text.y = ggtext::element_markdown(size = 11), # ggplot2 < 3.5
axis.text.y.left = ggtext::element_markdown(size = 11),
plot.subtitle = element_text(hjust = 0.5)
))
) } else {
suppressMessages(
p1 <- purrr::map2(seq.rep, 1:length(unique(group)),
~ggseqiplot(.x, border = border) +
scale_y_continuous(
breaks = ybreaks[[.y]],
labels = 1:nrow(.x),
guide = guide_axis(check.overlap = TRUE),
expand = expansion(add = c(0, 0))
) +
theme(axis.title.y = element_blank()))
)
if (length(unique(group)) > 1) {
p1 <- purrr::map(1:length(unique(group)),
~p1[[.x]] +
labs(subtitle = glue::glue("Coverage = {coverage[[.x]]}")) +
theme(plot.subtitle = element_text(hjust = 0.5)))
} else {
p1[[1]] <- p1[[1]] +
labs(caption = glue::glue("Coverage = {coverage[[1]]}; Criterion = {criterion}"))
}
p2 <- purrr::map(rplotdata,
~.x |>
ggplot(aes(
x = .data$value,
y = "1",
label = .data$id
)) +
ggrepel::geom_text_repel(
size = 4.5,
fontface = "bold",
point.size = NA,
direction = "y"
) +
scale_y_discrete(
labels = NULL,
expand = expansion(add = 1.5)
) +
facet_wrap(vars(.data$name), nrow = 2, scales = "fixed") +
theme_minimal() +
theme(
legend.position = "none",
strip.text = element_text(size = 12),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_blank()
))
}
if (length(unique(group)) > 1) {
p2 <- purrr::map(1:length(unique(group)),
~p2[[.x]] +
labs(title = sort(unique(group))[[.x]]) +
theme(plot.title = element_text(hjust = .5)))
}
patches <- vector(mode='character')
for (i in 1:facet_ncol) {
facx <- i - 1
idx <- 1:facet_ncol+facet_ncol*facx
if (min(idx) <= length(unique(group))) {
idy <- idx[idx > length(unique(group))]
idx <- idx[idx<=length(unique(group))]
if (stats == TRUE) {
patches <- paste0(patches, " + ", paste0("p2[[",idx,"]]", collapse = " + "))
if (length(idy) > 0) {
patches <- paste0(patches, " + ", paste0(rep("patchwork::plot_spacer()", length(idy)),
collapse = " + "))
}
}
patches <- paste0(patches, " + ", paste0("p1[[",idx,"]]", collapse = " + "))
if (length(idy) > 0) {
patches <- paste0(patches, " + ", paste0( rep("patchwork::plot_spacer()", length(idy)),
collapse = " + "))
}
}
}
if (facet_ncol == 1 && length(unique(group)) > 1) {
patches <- vector(mode='character')
for (i in 1:length(unique(group))) {
patches <- paste0(patches, " + ",
paste0("p2[[",i,"]] + p1[[",i,"]]", collapse = " + "))
}
}
patches <- substring(patches, 4)
prows <- ceiling(length(unique(group))/facet_ncol)
heights <- as.character(rep(c(.75,1),prows))
if (stats == TRUE) {
patches <- glue::glue("{patches} + plot_layout(guides = 'collect', ncol = {facet_ncol}, heights = ")
patches <- paste0(patches, "c(", paste0(heights, collapse = ", "), "))")
} else {
patches <- glue::glue("{patches} + plot_layout(guides = 'collect', ncol = {facet_ncol})")
}
patches <- glue::glue("{patches} + patchwork::plot_annotation(theme = theme(legend.position = 'bottom'))")
if (stats == FALSE && length(unique(group)) > 1) {
p1 <- purrr::map2(p1, sort(unique(group)),
~ .x +
ggtitle(.y) +
theme(plot.title = element_text(hjust = 0.5,
size = 16)))
}
if (colored.stats == FALSE) {
p1 <- purrr::map(p1,
~.x +
theme(axis.text.y = element_text(
color = "black",
size = 12,
face = "bold"
)))
}
rplot <- eval(parse(text = patches))
if (length(unique(group)) > 1) {
rplot <- rplot +
patchwork::plot_annotation(caption = paste("Criterion:", criterion))
}
if (length(unique(group)) > 6) {
cli::cli_inform(c(
"!" = "You are trying to render a representative sequence plot for many groups.",
" " = "The resulting output (if produced at all) might be hard to decipher.",
"i" = "Consider reducing the number of distinct groups."
))
}
if (length(unique(group)) > 3) {
cli::cli_inform(c(
"!" = "You are trying to render a representative sequence plot for many groups using just one column.",
" " = "The resulting output (if produced at all) might be hard to decipher.",
"i" = "Consider reducing the number of distinct groups or increase {.arg facet_ncol}."
))
}
return(rplot)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.