Nothing
#' @importFrom ggplot2 ggplot aes aes_string coord_flip xlab ylab labs stat_summary guides position_dodge annotate unit ggtitle
#' @importFrom ggplot2 geom_point geom_step geom_bar geom_rect geom_ribbon geom_line geom_boxplot geom_errorbar geom_label geom_segment geom_vline geom_text geom_tile
#' @importFrom ggplot2 scale_y_discrete scale_y_continuous scale_x_discrete scale_color_manual scale_fill_brewer scale_fill_manual scale_fill_distiller scale_x_continuous scale_x_log10
#' @importFrom ggplot2 theme_classic theme theme_bw theme_minimal element_line element_blank element_text
#' @importFrom ggplot2 scale_color_brewer ggplot_build
NULL
#' @importFrom dplyr mutate filter group_by count arrange summarize left_join summarise
#' @importFrom dplyr group_by_ arrange_
NULL
#' @importFrom survival survexp
NULL
#' @importFrom reshape2 melt
NULL
#' @importFrom utils packageVersion
NULL
#' @importFrom ggpubr ggarrange
NULL
#' Plot Factor Merger
#'
#' @param x object of a class \code{factorMerger}.
#' @param panel Type of panels to be plot. Possible values are \code{c("all", "response", "GIC", "tree")}.
#' All types of plots include the Factor Merger Tree. Apart from the Factor Merger Tree there are
#' also two possible panels: the Response Plot (response summary, specific to the model family),
#' the GIC Plot (GIC vs. loglikelihood/p-value).
#' \itemize{
#' \item \code{"all"} plots all panels and a short summary of the full model,
#' \item \code{"response"} plots the Factor Merger Tree and the Response Plot,
#' \item \code{"GIC"} plots the Factor Merger Tree and the GIC Plot,
#' \item \code{"tree"} plots the Factor Merger Tree only.
#' }
#' @param statistic Statistic to be displayed on the OX axis of the Factor Merger Tree.
#' Possible values are \code{c("loglikelihood", "p-value")}.
#' If \code{"p-value"} is chosen p-value for the Likelihood Ratio Test against the full model is plot on the OX axis.
#' @param nodesSpacing Type of vertical nodes spacing in the Factor Merger Tree). May be chosen from
#' \code{c("equidistant", "effects", "modelSpecific")}. \code{"effects"} arranges nodes according to
#' the model coefficients estimatiors (e.g. in Gaussian case on the OY axis group means are plotted).
#'
#' # TODO: Implement "modelSpecific".
#'
#' @param responsePanel Response panel type -- accepts the following values dependent on the model family:
#' \itemize{
#' \item multi dimensional Gaussian: \code{c("heatmap", "profile")},
#' \item single dimensional Gaussian: \code{c("means", "boxplot","tukey")},
#' \item binomial: \code{c("proportion")},
#' \item survival: \code{c("survival")}
#' }
#'
#' @param colorClusters Boolean. If \code{TRUE}, the default, the Factor Merger Tree is colored according
#' to the optimal factor split (defined by \code{splitStatistic} and \code{splitThreshold} or
#' \code{splitStatistic} and \code{penalty}).
#' @param splitStatistic Statistic used in the optimal split definition. Possible values are:
#' \code{c("GIC", "loglikelihood", "p-value")}. If \code{"GIC"} is chosen, factor is split to minimize GIC
#' with the penalty \code{penalty}. Otherwise, chooses the very last partition whose corresponding statistic
#' (model loglikelihood or p-value for the LRT test) is not lower than \code{splitThreshold}.
#' @param splitThreshold Threshold used in the optimal split definition. Used only with
#' \code{splitStatistic = c("loglikelihood", "p-value")}.
#' @param penalty GIC penalty used for defining the optimal partition with \code{splitStatistic = "GIC"}.
#' The same penalty is used in the GIC plot.
#' @param showSplit Boolean. If \code{TRUE} plots vertical line crossing the optimal split.
#' @param showSignificance Boolean. If \code{TRUE}, the default, marks partitions
#' that are significantly worse than their predecessors on the Factor Merger Tree (uses the Likelihood Ratio Test).
#'
#' Significance codes are: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1.
#' @param chisqQuantile Significance level used if \code{panelGrid = TRUE}.
#'
#' @param palette Color palette used in the Factor Merger Tree and the Response Plot.
#' @param responsePanelPalette Additional color palette used in the Response Plot if
#' palettes for the Factor Merger Tree and the Response Plot are to be different.
#' @param gicPanelColor Color used in the GIC plot.
#' @param title Factor Merger Tree plot's title.
#' @param subtitle Factor Merger Tree plot's subtitle.
#' @param panelGrid Boolean. If \code{TRUE}, each interval on
#' the OX axis of the Factor Merger Tree corresponds to the 1 - \code{chisqQuantile}
#' quantile of chi-square distribution. Otherwise, panel is blank.
#' @param ... Other arguments
#'
#' @export
plot.factorMerger <- function(x, panel = "all",
statistic = "loglikelihood",
nodesSpacing = "equidistant",
colorClusters = TRUE,
splitStatistic = "GIC",
splitThreshold = NULL,
penalty = 2,
showSplit = FALSE,
showSignificance = TRUE,
title = "Factor Merger Tree",
subtitle = " ",
palette = NULL,
responsePanel = NULL,
responsePanelPalette = NULL,
gicPanelColor = NULL,
panelGrid = TRUE,
chisqQuantile = 0.05, ...) {
stopifnot(panel %in% c("all", "response", "GIC", "tree"))
stopifnot(statistic %in% c("loglikelihood", "p-value"))
stopifnot(nodesSpacing %in% c("equidistant", "effects", "modelSpecific"))
stopifnot(splitStatistic == "GIC" | !is.null(splitThreshold))
clusterSplit <- getClusterSplit(splitStatistic, splitThreshold, penalty)
responsePanel <- checkResponsePanel(x, responsePanel)
if (is.null(responsePanelPalette) && !is.null(palette)) {
responsePanelPalette <- palette
}
responsePlot <- plotResponse(x, responsePanel,
colorClusters, clusterSplit,
responsePanelPalette)
# Set colors
if (colorClusters) {
colorsDf <- getOptimalPartitionDf(x,
clusterSplit[[1]],
clusterSplit[[2]])
} else {
colorsDf <- data.frame(
orig = levels(x$factor),
pred = levels(x$factor)
)
}
mergingPathPlot <- plotTree(x, statistic, nodesSpacing,
clusterSplit, showSplit, showSignificance,
chisqQuantile, colorClusters, colorsDf, palette,
title, subtitle, panelGrid)
switch(panel,
"tree" = {
return(mergingPathPlot)
},
"all" = {
anTable <- calculateAnovaTable(x$initialModel)
if(length(x$covariates)>0){
if(class(x)[2]=="gaussianFactorMerger"){
anTable <- t(anTable)
nGroups <- as.numeric(anTable[1,2])
nGroups <- nGroups-dim(x$covariates)[2]
anTable[1,2] <- nGroups
anTable <- t(anTable)
}
}
aovTable <- plotTable(anTable)
return(ggarrange(mergingPathPlot, responsePlot,
plotGIC(x, gicPanelColor,
penalty, statistic),
aovTable,
ncol = 2, nrow = 2, align = "hv",
widths = c(2, 1), heights = c(2, 1)))
},
"response" = {
return(ggarrange(mergingPathPlot, responsePlot,
ncol = 2, align = "hv",
widths = c(2, 1)))
},
"GIC" = {
return(ggarrange(mergingPathPlot,
plotGIC(x, gicPanelColor,
penalty, statistic),
nrow = 2, align = "hv",
heights = c(2, 1)))
})
}
getClusterSplit <- function(splitStatistic, splitThreshold, penalty) {
stopifnot(splitStatistic %in% c("GIC", "loglikelihood", "p-value"))
if (splitStatistic == "GIC") {
return(
list(stat = "GIC",
value = penalty)
)
}
return(
list(stat = splitStatistic,
value = splitThreshold)
)
}
# -------------------
# Factor Merger Tree
treeTheme <- function(panelGrid = TRUE) {
myTheme <- theme_minimal() +
theme(panel.grid.major.x = element_line(color = "lightgrey",
linetype = 2),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 12),
plot.title = element_text(size = 18),
plot.subtitle = element_text(size = 12),
legend.position = "none")
if (!panelGrid) {
myTheme <- myTheme + theme(panel.grid.major.x = element_blank())
}
return(myTheme)
}
#' Plot Tree - Helper Function
#'
#' @param factorMerger object to be plotted
#' @param statistic this parameter will be passed to plotSimpleTree
#' @param nodesSpacing this parameter will be passed to plotSimpleTree
#' @param clusterSplit this parameter will be passed to plotSimpleTree
#' @param markBestModel this parameter will be passed to plotSimpleTree
#' @param markStars this parameter will be passed to plotSimpleTree
#' @param alpha this parameter will be passed to plotSimpleTree
#' @param color this parameter will be passed to plotSimpleTree
#' @param colorsDf this parameter will be passed to plotSimpleTree
#' @param palette this parameter will be passed to plotSimpleTree
#' @param title this parameter will be passed to plotSimpleTree
#' @param subtitle this parameter will be passed to plotSimpleTree
#' @param panelGrid this parameter will be passed to plotSimpleTree
#' @export
plotTree <- function(factorMerger, statistic, nodesSpacing,
clusterSplit, markBestModel, markStars,
alpha, color, colorsDf,
palette = NULL, title, subtitle, panelGrid) {
stopifnot(statistic %in% c("loglikelihood", "p-value"))
if (nodesSpacing == "equidistant") {
return(
plotSimpleTree(factorMerger, statistic, clusterSplit,
markBestModel, markStars,
alpha, color, colorsDf, palette,
title, subtitle, panelGrid)
)
}
return(
plotCustomizedTree(factorMerger, statistic, clusterSplit,
nodesSpacing, markBestModel, markStars,
alpha, color, colorsDf, palette,
groupsStats(factorMerger),
title, subtitle, panelGrid)
)
}
plotSimpleTree <- function(factorMerger, statistic, clusterSplit,
markBestModel, markStars,
alpha, color, colorsDf, palette = NULL,
title, subtitle, panelGrid) {
# We want to have reverse order of variables! TODO
nodesPosition <- as.data.frame(getFinalOrder(factorMerger, TRUE))
mH <- mergingHistory(factorMerger)
noStep <- nrow(mH)
for (step in 1:noStep) {
which <- rownames(nodesPosition) %in% mH[step, ]
newLine <- nodesPosition[which, ] %>%
mean()
nodesPosition <- rbind(nodesPosition, newLine)
rownames(nodesPosition)[nrow(nodesPosition)] <-
paste(mH[step, ], collapse = "")
}
return(plotCustomizedTree(factorMerger, statistic, clusterSplit,
"equidistant", markBestModel, markStars,
alpha, color, colorsDf, palette,
nodesPosition, title, subtitle, panelGrid))
}
#' @importFrom scales hue_pal
plotCustomizedTree <- function(factorMerger, statistic, clusterSplit,
nodesSpacing, markBestModel, markStars,
alpha, color, colorsDf, palette = NULL,
nodesPosition = NULL, title,
subtitle, panelGrid) {
statisticColname <- getStatNameInTable(statistic)
if (nodesSpacing == "modelSpecific") {
nodesPosition <- applyModelTransformation(factorMerger, nodesPosition)
}
segment <- getTreeSegmentDf(factorMerger, statisticColname, nodesPosition)
df <- segment$df
pointsDf <- segment$pointsDf
labelsDf <- segment$labelsDf %>%
arrange(-y1)
showY <- nodesSpacing != "equidistant"
g <- ggplot(df) +
geom_segment(aes(x = x1, y = y1, xend = x2, yend = y2), size = 0.5) +
geom_point(data = pointsDf, aes(x = x1, y = y1), size = 0.75) +
scale_y_continuous(limits = getLimits(labelsDf, showY),
position = "right",
breaks = labelsDf$y1,
labels = getLabels(labelsDf,
factorMerger),
expand = c(0, 0)) +
ylab(getStatisticName(factorMerger)) + xlab(statistic) +
labs(title = title,
subtitle = subtitle) + treeTheme(panelGrid)
if (color) {
g <- addClustersColors(g, segment = segment,
factorMerger = factorMerger,
clusterSplit = clusterSplit,
statistic = statistic,
palette = palette)
} else {
clusterColors <- getClustersColorsNames(palette,
NROW(colorsDf),
factor(labelsDf$label,
levels = labelsDf$label))
g <- g + theme(axis.text.y = element_text(
color = rev(clusterColors),
size = 15))
}
g <- scaleAxis(g, statistic, alpha)
if (markBestModel) {
mark <- markOptimalModel(factorMerger, clusterSplit, statistic, alpha)
g <- g + geom_vline(xintercept = mark$intercept,
col = "mediumorchid3", linetype = "dotted") +
geom_label(x = mark$labelIntercept,
y = getLimits(labelsDf, showY)[1] +
0.1 * (getLimits(labelsDf, showY)[2] -
getLimits(labelsDf, showY)[1]),
label = mark$label,
alpha = 0.5,
col = "mediumorchid3",
angle = 90, size = 3,
fontface = "italic")
}
if (markStars) {
g <- g + geom_text(data = pointsDf, aes(x = x1, y = y1,
label = factor(significance)),
hjust = 1, vjust = 0.25, size = 5)
}
if (!is.null(palette)) {
g <- g + scale_fill_brewer(palette = palette) +
scale_color_brewer(palette = palette)
}
return(g)
}
getClustersColorsNames <- function(palette, nGroups, pred) {
if (is.null(palette)) {
return(factor(pred, labels = hue_pal()(nGroups)) %>%
as.character())
}
# Fake data
df <- data.frame(x = 1:nGroups, y = 1, col = letters[1:nGroups])
g <- ggplot(df, aes_string(x = "x", y = "y", color = "col")) +
geom_point() +
scale_color_brewer(palette = palette)
colors <- ggplot_build(g)$data[[1]]$colour
return(factor(pred, labels = colors) %>%
as.character())
}
addClustersColors <- function(plot, segment,
factorMerger, clusterSplit,
statistic, palette = NULL) {
segmentColoured <- getClustersColors(segment, factorMerger,
clusterSplit, statistic)
plot <- plot +
geom_segment(data = segmentColoured$df,
aes(x = x1, y = y1,
xend = x2, yend = y2,
col = pred),
size = 0.75) +
geom_point(data = segmentColoured$pointsDf,
aes(x = x1, y = y1, col = pred), size = 0.75)
nGroups <- length(unique(segmentColoured$labelsDf$pred))
clusterColors <- getClustersColorsNames(palette,
nGroups,
segmentColoured$labelsDf$pred)
plot <- plot + theme(axis.text.y = element_text(
color = rev(clusterColors),
size = 15))
return(plot)
}
scaleAxis <- function(plot, statistic, alpha) {
if (statistic == "p-value") {
plot <- plot + scale_x_log10()
}
if (statistic == "loglikelihood") {
labBr <- getChisqBreaks(plot$data, alpha)
plot <- plot +
scale_x_continuous(breaks = labBr$breaks, labels = labBr$labels)
}
return(plot)
}
markOptimalModel <- function(factorMerger, clusterSplit, statistic, alpha) {
optimalNSteps <- optimalNumberOfMerges(factorMerger,
clusterSplit[[1]],
clusterSplit[[2]])
mH <- mergingHistory(factorMerger, T, F)
intercept <- mH[optimalNSteps + 1, getStatNameInTable(statistic)]
if (statistic == "p-value") {
label <- paste0("alpha = ", round(intercept, 2))
labelIntercept <- log10(intercept)
}
if (statistic == "loglikelihood") {
label <- paste0("loglikelihood = ", round(intercept))
labelIntercept <- intercept
}
if (clusterSplit[[1]] == "GIC") {
label <- paste0("min GIC")
}
return(list(
label = label,
intercept = intercept,
labelIntercept = labelIntercept
))
}
applyModelTransformation <- function(object, nodesPosition) {
UseMethod("applyModelTransformation", object)
}
applyModelTransformation.default <- function(object, nodesPosition) {
warning("Model specific nodes spacing is not supported yet.")
return(nodesPosition)
}
# -------------------
# Response - top right plot
checkResponsePanel <- function(object, responsePanel) {
UseMethod("checkResponsePanel", object)
}
checkResponsePanel.gaussianFactorMerger <- function(factorMerger,
responsePanel) {
if (NCOL(factorMerger$response) > 1) {
responsePanelSet <- c("profile", "heatmap", "frequency")
} else {
responsePanelSet <- c("means", "boxplot", "frequency", "tukey")
}
warnIfUnexpectedResponsePanel(responsePanelSet, responsePanel)
}
checkResponsePanel.binomialFactorMerger <- function(factorMerger,
responsePanel) {
responsePanelSet <- c("proportion", "frequency")
warnIfUnexpectedResponsePanel(responsePanelSet, responsePanel)
}
checkResponsePanel.survivalFactorMerger <- function(factorMerger,
responsePanel) {
responsePanelSet <- c("survival", "frequency")
warnIfUnexpectedResponsePanel(responsePanelSet, responsePanel)
}
warnIfUnexpectedResponsePanel <- function(responsePanelSet,
responsePanel) {
if (is.null(responsePanel)) {
return(responsePanelSet[1])
}
if (!(responsePanel %in% responsePanelSet)) {
warning(
paste0(
"ResponsePanel '", responsePanel,
", is not supported by supplied model family -- ",
responsePanelSet[1], " used insted."
)
)
return(responsePanelSet[1])
}
return(responsePanel)
}
#' Plot Response - Helper Function
#' @param factorMerger object to be plotted
#' @param responsePanel logical - shall it be plotted
#' @param colorClusters colors
#' @param clusterSplit criteria for splitting
#' @param responsePanelPalette colors for reponse
#' @export
plotResponse <- function(factorMerger, responsePanel,
colorClusters, clusterSplit,
responsePanelPalette) {
switch(responsePanel,
"heatmap" = {
return(plotHeatmap(factorMerger,
colorClusters, clusterSplit,
responsePanelPalette))
},
"profile" = {
return(plotProfile(factorMerger,
colorClusters, clusterSplit,
responsePanelPalette))
},
"boxplot" = {
return(plotBoxplot(factorMerger,
colorClusters, clusterSplit,
responsePanelPalette))
},
"means" = {
return(plotMeansAndConfInt(factorMerger,
colorClusters, clusterSplit,
responsePanelPalette))
},
"survival" = {
return(plotSurvival(factorMerger,
colorClusters, clusterSplit,
responsePanelPalette))
},
"proportion" = {
return(plotProportion(factorMerger,
colorClusters, clusterSplit,
responsePanelPalette))
},
"frequency" = {
return(plotFrequency(factorMerger,
FALSE, clusterSplit))
},
"tukey" = {
return(plotTukey(factorMerger,
responsePanelPalette))
})
}
findSimilarities <- function(factorMerger) {
stats <- calculateMeansAndRanks(factorMerger$response,
factorMerger$covariate,
factorMerger$factor)
varsToBePloted <- reshape(stats %>% subset(select = -mean),
idvar = "level",
timevar = "variable",
direction = "wide")
distances <- dist(varsToBePloted[, -1] %>% t(), method = "manhattan")
distances[distances == 0] <- 1e-4
iso <- MASS::isoMDS(distances, k = 1, trace = FALSE)$points[, 1]
iso <- data.frame(var = stats$variable %>% unique(), proj = iso) %>%
arrange(proj)
stats$variable <- factor(stats$variable,
levels = iso$var)
return(stats)
}
#' Heatmap (multi-dimensional Gaussian)
#'
#' @description Plots heatmap for each dimension of the response variable. Vector of means of factor levels for a given
#' dimension is scaled to have mean equal to zero and standard deviation equal to one.
#' @param factorMerger object of a class \code{factorMerger}
#' @param color Boolean. If \code{TRUE}, the default, there is added aesthetic group corresponding
#' to the final cluster split.
#' @param clusterSplit final cluster split definition. A list with two fields:
#' \itemize{
#' \item \code{stat} cluster statistic (available statistics are: \code{"loglikelihood"}, \code{"pvalue"}, \code{"GIC"}),
#' \item \code{value} cut threshold / GIC penalty
#' }
#' @param palette custom palette
#'
#' @export
plotHeatmap <- function(factorMerger, color, clusterSplit, palette = "Greys") {
levels <- getFinalOrderVec(factorMerger)
factorMerger$factor <- factor(factorMerger$factor, levels = levels)
df <- findSimilarities(factorMerger)
tmp <- tapply(df$mean, df$variable, function(x) scale(x) %>% as.numeric())
df$mean <- (do.call(cbind, tmp) %>% reshape2::melt())$value
df %>% ggplot() +
geom_tile(aes_string(x = "level", y = "variable", fill = "mean")) +
coord_flip() + theme_minimal() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 12),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(size = 18),
plot.subtitle = element_text(size = 12),
legend.position = "none") +
xlab("") + scale_y_discrete(expand = c(0, 0)) +
scale_fill_distiller(palette = palette) +
labs(title = "Heatmap", subtitle = "Group means by variable")
}
#' Profile plot (multi-dimensional Gaussian)
#'
#' @description Plots rank plot - one series is a single factor level and one group
#' on the OX axis is a single dimension of the response.
#' @param factorMerger object of a class \code{factorMerger}
#' @param color Boolean. If \code{TRUE}, the default, there is added aesthetic group corresponding
#' to the final cluster split.
#' @param clusterSplit final cluster split definition. A list with two fields:
#' \itemize{
#' \item \code{stat} cluster statistic (available statistics are: \code{"loglikelihood"}, \code{"pvalue"}, \code{"GIC"}),
#' \item \code{value} cut threshold / GIC penalty
#' }
#' @param palette custom palette
#'
#' @export
plotProfile <- function(factorMerger, color, clusterSplit, palette = NULL) {
df <- findSimilarities(factorMerger)
profileLevels <- df %>%
filter(variable == levels(df$variable) %>%
head(1)) %>%
arrange(rank) %>%
subset(select = level)
df$group <- factor(df$level, levels = profileLevels$level)
noLevels <- length(levels(df$level))
df$rank <- factor(df$rank, levels = noLevels:1)
if (color) {
df <- df %>% left_join(getOptimalPartitionDf(factorMerger,
clusterSplit[[1]],
clusterSplit[[2]]),
by = c("level" = "orig"))
g <- df %>% ggplot(aes(x = variable, y = rank,
colour = pred, group = group, label = group))
} else {
g <- df %>% ggplot(aes(x = variable, y = rank,
col = group, group = group, label = group)) +
scale_color_manual(values = colorRamps::magenta2green(noLevels))
}
g <- g + geom_line(size = 1) +
geom_text(data =
subset(df,
variable == levels(df$variable) %>% tail(1)),
aes(x = variable),
size = 4, hjust = 0.8, nudge_x = 0.5) +
ylab("") + xlab("") +
labs(title = "Profile plot", subtitle = "Variable means ranks") +
theme_minimal() + theme(legend.position = "none",
plot.title = element_text(size = 18),
axis.text = element_text(size = 12),
plot.subtitle = element_text(size = 12)) +
scale_y_discrete(expand = c(1 / (2 * nrow(df)), 1 / (2 * nrow(df))))
if (!is.null(palette) & color) {
g <- g + scale_color_brewer(palette = palette) +
scale_fill_brewer(palette = palette)
}
return(g)
}
#' Boxplot (single-dimensional Gaussian)
#'
#' @description Plots boxplot with mean as a summary statistic groupping observation by factor levels.
#' @param factorMerger object of a class \code{factorMerger}
#' @param color Boolean. If \code{TRUE}, the default, there is added aesthetic group corresponding
#' to the final cluster split.
#' @param clusterSplit final cluster split definition. A list with two fields:
#' \itemize{
#' \item \code{stat} cluster statistic (available statistics are: \code{"loglikelihood"}, \code{"pvalue"}, \code{"GIC"}),
#' \item \code{value} cut threshold / GIC penalty
#' }
#' @param palette custom palette
#'
#' @export
plotBoxplot <- function(factorMerger, color, clusterSplit, palette = NULL) {
levels <- getFinalOrderVec(factorMerger)
factorMerger$factor <- factor(factorMerger$factor, levels = levels)
df <- data.frame(group = factorMerger$factor, y = factorMerger$response)
df <- calculateBoxPlotMoments(df)
if (color) {
df <- df %>% left_join(getOptimalPartitionDf(factorMerger,
clusterSplit[[1]],
clusterSplit[[2]]),
by = c("group" = "orig"))
g <- df %>% ggplot(aes(y = y, x = group, group = group, fill = pred))
} else {
g <- df %>% ggplot(aes(y = y, x = group, group = group))
}
g <- g + geom_boxplot(aes(ymin = y0, lower = y25, middle = y50, upper = y75, ymax = y100), coef = 1000) +
coord_flip() + treeTheme() + xlab("") + ylab("") +
theme(axis.text.y = element_blank()) +
labs(title = "Boxplot", subtitle = "Summary statistic: mean")
if (!is.null(palette)) {
g <- g +
scale_color_brewer(palette = palette) +
scale_fill_brewer(palette = palette)
}
return(g)
}
#' Means and standard deviation plot (single-dimensional Gaussian)
#'
#' @description For each factor level plots its mean and interval of the length equal to its standard deviation.
#' @param factorMerger object of a class \code{factorMerger}
#' @param color Boolean. If \code{TRUE}, the default, there is added aesthetic group corresponding
#' to the final cluster split.
#' @param clusterSplit final cluster split definition. A list with two fields:
#' \itemize{
#' \item \code{stat} cluster statistic (available statistics are: \code{"loglikelihood"}, \code{"pvalue"}, \code{"GIC"}),
#' \item \code{value} cut threshold / GIC penalty
#' }
#' @param palette custom palette
#'
#' @export
plotMeansAndConfInt <- function(factorMerger, color,
clusterSplit, palette = NULL) {
factor <- factor(factorMerger$factor,
levels = getFinalOrderVec(factorMerger))
df <- getMeansAndStds(factorMerger, factor)
if (color) {
part <- getOptimalPartitionDf(factorMerger,
clusterSplit[[1]],
clusterSplit[[2]])
part$orig <- factor(part$orig, levels = getFinalOrderVec(factorMerger))
df <- df %>% left_join(part, by = c("group" = "orig"))
g <- df %>% ggplot(aes(colour = pred, fill = pred,
x = as.factor(group),
y = mean, group = as.factor(group)))
} else {
g <- df %>% ggplot(aes(x = as.factor(group), colour = as.factor(group),
y = mean, group = as.factor(group)))
}
g <- g + geom_errorbar(aes(ymin = left, ymax = right),
width = .5,
position = position_dodge(.5)) + treeTheme() +
geom_point(size = 2) + coord_flip() +
theme(axis.title.x = element_text(),
axis.text.y = element_blank()) +
labs(title = "Group means",
subtitle = "with 95% confidence intervals") +
ylab("")
if (!is.null(palette)) {
g <- g +
scale_color_brewer(palette = palette) +
scale_fill_brewer(palette = palette)
}
return(g)
}
#' Proportion plot (binomial)
#'
#' @description Plots proportion of success for each factor level.
#' @param factorMerger object of a class \code{factorMerger}
#' @param color Boolean. If \code{TRUE}, the default, there is added aesthetic group corresponding
#' to the final cluster split.
#' @param clusterSplit final cluster split definition. A list with two fields:
#' \itemize{
#' \item \code{stat} cluster statistic (available statistics are: \code{"loglikelihood"}, \code{"pvalue"}, \code{"GIC"}),
#' \item \code{value} cut threshold / GIC penalty
#' }
#' @param palette custom palette
#'
#' @export
plotProportion <- function(factorMerger, color, clusterSplit, palette = NULL) {
levels <- getFinalOrderVec(factorMerger)
factorMerger$factor <- factor(factorMerger$factor, levels = levels)
df <- data.frame(group = factorMerger$factor, y = factorMerger$response)
if (color) {
df <- df %>% group_by(group) %>% summarise(mean = mean(y == 1))
df <- df %>% left_join(getOptimalPartitionDf(factorMerger,
clusterSplit[[1]],
clusterSplit[[2]]),
by = c("group" = "orig"))
g <- df %>% ggplot() +
geom_bar(aes(x = group, y = mean, fill = pred), stat = "identity")
} else {
g <- df %>% ggplot() +
geom_bar(aes(x = group, fill = as.factor(y)),
position = "fill")
}
g <- g + scale_y_continuous(labels = scales::percent, name = "") +
scale_x_discrete(expand = c(0, 0)) +
coord_flip() + treeTheme() +
theme(axis.title.y = element_blank(), axis.text.y = element_blank()) +
labs(title = "Success ratio")
if (!is.null(palette)) {
g <- g +
scale_fill_brewer(palette = palette)
}
return(g)
}
#' Survival plot (survival)
#'
#' @description Plots adjusted survival curves for coxph model for each group. Survival probabilities are calculated from \code{coxph} model.
#' @param factorMerger object of a class \code{factorMerger}
#' @param color Boolean. If \code{TRUE}, the default, there is added aesthetic group corresponding
#' to the final cluster split.
#' @param clusterSplit final cluster split definition. A list with two fields:
#' \itemize{
#' \item \code{stat} cluster statistic (available statistics are: \code{"loglikelihood"}, \code{"pvalue"}, \code{"GIC"}),
#' \item \code{value} cut threshold / GIC penalty
#' }
#' @param palette custom palette
#'
#' @export
plotSurvival <- function(factorMerger, color, clusterSplit, palette = NULL) {
levels <- getFinalOrderVec(factorMerger)
factorMerger$factor <- factor(factorMerger$factor,
levels = levels)
df <- data.frame(response = factorMerger$response, factor = factorMerger$factor)
model <- calculateModel(factorMerger, factorMerger$factor)
pred <- survexp(~factor, data = df, ratetable = model)
curve <- data.frame(time = rep(c(0,pred$time), length(levels)),
variable = factor(rep(levels, each=1+length(pred$time))),
surv = c(rbind(1, pred$surv)))
if (color) {
factorNew <- time <- surv <- NULL
df$factorNew <- cutTree(factorMerger,
clusterSplit[[1]],
clusterSplit[[2]])
keys <- unique(df[,c("factor", "factorNew")])
both <- merge(curve, keys, by.x = "variable", by.y = "factor")
curve <- summarise(
group_by(
both,
variable = factorNew, time),
surv = mean(surv, na.rm=TRUE))
}
g <- ggplot(curve, aes(x = time, y = surv, color=variable)) +
geom_step() +
treeTheme() +
scale_y_continuous(limits = c(0,1)) +
ylab("Survival rate") +
labs(title = "Survival plot",
subtitle = "")
ggpubr::ggpar(g, palette = palette)
}
getGICBreaks <- function(mH) {
return(c(mH$GIC[1], min(mH$GIC), mH$GIC[length(mH$GIC)]) %>%
unique() %>% round())
}
# -------------------
# GIC plot - bottom left plot
#' GIC plot
#'
#' @description Plots Generalized Information Criterion for models on the Factor Merger Tree.
#' @param factorMerger object of a class \code{factorMerger}
#' @param color GIC plot color.
#' @param statistic cluster split statistic
#' @param penalty GIC penalty
#' @importFrom grDevices hcl
#'
#' @export
plotGIC <- function(factorMerger, color, penalty = 2, statistic) {
if (is.null(color)) {
color <- "#762A83"
}
mH <- mergingHistory(factorMerger, T, F)
mH$GIC <- -2 * mH$model + penalty * nrow(mH):1
minGIC <- min(mH$GIC)
yBreaks <- getGICBreaks(mH)
minModel <- mH$model[which.min(mH$GIC)]
g <- mH %>%
ggplot(aes_string(x = getStatNameInTable(statistic),
y = "GIC")) +
geom_line(col = color, size = 1) +
geom_point(col = color, size = 1.5) +
geom_point(x = minModel, y = minGIC,
col = color, size = 2.5) +
geom_ribbon(aes_string(x = getStatNameInTable(statistic),
ymin = "minGIC", ymax = "GIC"),
fill = color, alpha = 0.2) +
treeTheme() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_text(),
panel.grid.major = element_blank()) +
scale_y_continuous(position = "right",
breaks = yBreaks)
annotation <- data.frame(
xpos = max(mH[, getStatNameInTable(statistic)]),
ypos = max(mH[, "GIC"]),
text = paste0("GIC penalty = ", round(penalty, 1)),
hjust = 1, vjust = 1)
g <- g +
geom_label(data = annotation,
aes(x = xpos, y = ypos, hjust = hjust,
vjust = vjust, label = text)) +
ylab("")
if (statistic == "p-value") {
g <- g + scale_x_log10()
}
return(g)
}
ggDefaultPalette <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
#' TukeyHSD Plot
#'
#' @description TODO: Aga
#'
#' @param factorMerger object of a class \code{factorMerger}
#' @param palette RColorBrewer color palette
#'
#' @export
plotTukey <- function(factorMerger, palette = NULL) {
response <- factorMerger$response
factor <- factorMerger$factor
tukeyGroups <- getTukeyGroups(response, factor)
tukeyGroups$level <- rownames(tukeyGroups)
tukeyGroups[tukeyGroups == FALSE] <- NA
levelsOrder <- getFinalOrderVec(factorMerger)
tukeyGroups <- tukeyGroups %>% melt(id.vars = "level") %>%
filter(!is.na(value))
cols <- scales::hue_pal()(length(levels(tukeyGroups$variable)))
tukPlot <- tukeyGroups %>%
ggplot(aes(variable, factor(level, levels = levelsOrder))) +
geom_tile(aes(fill = paste(value, variable), color = "#000000"),
show.legend = FALSE, na.rm = TRUE) +
scale_color_manual(values = "#000000") +
scale_y_discrete(expand = c(0, 0)) +
ggtitle(label = "Tukey HSD test", subtitle = " ") +
xlab("") + treeTheme(FALSE) + theme(axis.text.y = element_blank())
if (is.null(palette)) {
return(tukPlot +
scale_fill_manual(values = rev(cols)))
}
return(tukPlot + scale_fill_brewer(direction = -1, palette = palette))
}
# -------------------
# Frequency plot
#' Frequency plot
#'
#' @description Plots barplot with group frequencies.
#'
#' @param factorMerger object of a class \code{factorMerger}
#' @param color Boolean. If \code{TRUE}, the default, there is added aesthetic group corresponding
#' to the final cluster split.
#' @param clusterSplit final cluster split definition. A list with two fields:
#' \itemize{
#' \item \code{stat} cluster statistic (available statistics are: \code{"loglikelihood"}, \code{"pvalue"}, \code{"GIC"}),
#' \item \code{value} cut threshold / GIC penalty
#' }
#'
#' @export
plotFrequency <- function(factorMerger, color, clusterSplit) {
levels <- getFinalOrderVec(factorMerger)
factorMerger$factor <- factor(factorMerger$factor, levels = levels)
df <- data.frame(group = factorMerger$factor)
df <- df %>% count(group)
if (color) {
df <- df %>% left_join(getOptimalPartitionDf(factorMerger,
clusterSplit[[1]],
clusterSplit[[2]]),
by = c("group" = "orig"))
g <- df %>% ggplot() +
geom_bar(aes(x = group, y = n, fill = pred), stat = "identity")
} else {
g <- df %>% ggplot() +
geom_bar(aes(x = group, y = n), fill = "navy", stat = "identity")
}
g + scale_y_continuous(name = "") +
scale_x_discrete(expand = c(0, 0)) +
coord_flip() +
treeTheme() +
theme(panel.grid.major.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank()) +
xlab(" ") + ylab(" ") +
labs(title = "Groups frequencies", subtitle = " ")
}
plotTable <- function(tab) {
return(ggpubr::ggtexttable(tab, theme =
ggpubr::ttheme(base_colour = "gray50",
base_size = 25,
"classic")))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.