library(shiny)
library(shinyalert)
# Define UI for random distribution app ----
ui <- fluidPage(
# App title ----
titlePanel(tags$h1(tags$b("mixMVPLN:"),"Mixtures of Matrix Variate
Poisson-log Normal With Variational-EM Framework
for Clustering Three-way Count Data")),
# Sidebar layout with input and output definitions ----
sidebarLayout(
# Sidebar panel for inputs ----
sidebarPanel(
tags$p("Description: This is a Shiny App that is part of the mixMVPLN R package
(Silva et al., 2022). Most of the functions available via the package are made
available with Shiny App. The mixMVPLN is an R package for performing
clustering using mixtures of matrix variate Poisson-log normal (mixMVPLN)
distribution provided a three-way count dataset. The observations of the
dataset will be clustered into subgroups. The app permits to model select
using Bayesian information criterion (BIC), Integrated Complete Likelihood (ICL)
criterion, and Akaike Information Criterion (AIC) values. Results will
be available as dotted line plots of information criteria value versus
cluster size, heatmaps of input datset with cluster membership information,
alluvial plot of observations by cluster, and barplot of posterior
probabilities."),
# br() element to introduce extra vertical spacing ----
br(),
br(),
# input
tags$b("Instructions: Below, upload a three-way count dataset
in rds format, and enter or select values to perform
cluster analysis. Default values are shown. Then press
'Run'. Navigate through the different tabs to the right
to explore the results."),
# br() element to introduce extra vertical spacing ----
br(),
br(),
br(),
# input
shinyalert::useShinyalert(), # Set up shinyalert
uiOutput("tab2"),
actionButton(inputId = "data1",
label = "Dataset 1 Details"),
fileInput(inputId = "file1",
label = "Dataset: Select a three-way count dataset to analyze.
File should be in single R object (.rds) format containing
three-way data of units (n), defined by occasions/layers (r) and
the variables/responses (p) save as a list in R. You may
download an example dataset above and explore first.",
accept = c(".rds")),
textInput(inputId = "ngmin",
label = "ngmin: Enter the minimum value of components or clusters
for clustering. This should be a positive integer.", "1"),
textInput(inputId = "ngmax",
label = "ngmax: Enter the maximum value of components or clusters.
for clustering. This should be a positive integer, bigger
than ngmin and less than or equal to number of total units (n)
in the dataset.", "2"),
selectInput(inputId = 'typeinitMethod',
label = 'initMethod: Select the initialization method.',
choices = c("kmeans",
"random",
"medoids",
"clara",
"fanny")),
textInput(inputId = "nInitIterations",
label = "nInitIterations: Enter the number of initial iterations.
This should be a positive integer.", "1"),
selectInput(inputId = 'typenormalize',
label = 'Normalization: Select whether to perform normalization
or not. Currently, normalization is performed by
calculating normalization factors via TMM method of edgeR
package (Robinson, et al., 2010). The option Yes is recommended
for raw RNA sequencing count data.',
choices = c("'Yes' ",
"'No' ")),
# br() element to introduce extra vertical spacing ----
br(),
# actionButton
actionButton(inputId = "button2",
label = "Run"),
# br() element to introduce extra vertical spacing -
br(),
), # End of side pannel
# Main panel for displaying outputs
mainPanel(
# Output: Tabet
tabsetPanel(type = "tabs",
tabPanel("Input Summary",
h3("Instructions: Enter values and click 'Run' at the bottom left side."),
h3("Summary of Count Dataset:"),
br(),
verbatimTextOutput('textOut')),
tabPanel("Cluster Results",
h3("Instructions: Enter values and click 'Run' at the bottom left side."),
h3("Summary of Clustering Results:"),
br(),
verbatimTextOutput('clustering')),
tabPanel("Information Criteria Plot",
h3("Instructions: Enter values and click 'Run' at the bottom left side."),
h3("Model Selection Results:"),
br(),
fluidRow(
splitLayout(cellWidths = c("50%", "50%"), plotOutput('BICvalues'), plotOutput('ICLvalues')),
splitLayout(cellWidths = c("50%", "50%"), plotOutput('AIC3values'), plotOutput('AICvalues')),
)),
tabPanel("Alluvial Plot",
h3("Instructions: Enter values and click 'Run' at the bottom left side."),
h3("Alluvial Plot Showing Observation Memberships by Information Criteria for Input Dataset:"),
h5("Note, below the x-axis values are in the order of BIC, ICL, AIC, AIC3.
Colors are assigned based on cluster membership of model selected via BIC."),
br(),
fluidRow(
splitLayout(cellWidths = c("100%"), plotOutput("alluvialPlot")),
h5("Note, below the x-axis values are in the order of ICL, BIC, AIC, AIC3.
Colors are assigned based on cluster membership of model selected via ICL."),
splitLayout(cellWidths = c("100%"), plotOutput("alluvialPlot2")),
h5("Note, below the x-axis values are in the order of AIC3, ICL, BIC, AIC
Colors are assigned based on cluster membership of model selected via AIC3."),
splitLayout(cellWidths = c("100%"), plotOutput("alluvialPlot3")),
h5("Note, below the x-axis values are in the order of AIC, AIC3, ICL, BIC
Colors are assigned based on cluster membership of model selected via AIC."),
splitLayout(cellWidths = c("100%"), plotOutput("alluvialPlot4")),
)),
tabPanel("Barplot",
h3("Instructions: Enter values and click 'Run' at the bottom left side."),
h3("Barplot of Posterior Probabilities with Cluster Memberships:"),
h5("Note, the plots are in the order of models selected by: BIC (top, left), ICL (top, right) and AIC (bottom, left), AIC3 (bottom, right)."),
br(),
fluidRow(
splitLayout(cellWidths = c("50%", "50%"), plotOutput("barPlotBIC"), plotOutput('barPlotICL')),
splitLayout(cellWidths = c("50%", "50%"), plotOutput("barPlotAIC3"), plotOutput('barPlotAIC'))
))
)
)
)
)
# Define server logic for random distribution app ----
server <- function(input, output) {
# Reactive expression to generate the requested distribution ----
# This is called whenever the inputs change. The output functions
# defined below then use the value computed from this expression
# Step I: save input csv as a reactive
matrixInput <- reactive({
if (! is.null(input$file1))
as.list(readRDS(input$file1$datapath))
})
startclustering <- eventReactive(eventExpr = input$button2, {
withProgress(message = 'Clustering', value = 0, {
# Number of times we'll go through the loop
mixMVPLN::mvplnVGAclus(
dataset = matrixInput(),
membership = "none",
gmin = as.numeric(input$ngmin),
gmax = as.numeric(input$ngmax),
initMethod = as.character(input$typeinitMethod),
nInitIterations = as.numeric(input$nInitIterations),
normalize = "Yes")
})
})
# Textoutput
output$textOut <- renderText({
if (! is.null(startclustering))
a1 <- paste("Number of occasions/layers (r):", nrow(startclustering()$dataset[[1]]), "\n")
a2 <- paste("Number of variables/responses (p):", ncol(startclustering()$dataset[[1]]), "\n")
a3 <- paste("Number of units (n):", length(startclustering()$dataset), "\n")
paste(a1, a2, a3, sep = "\n")
})
# Step II: clustering
output$clustering <- renderText({
if (! is.null(startclustering))
aa <- paste("BIC model selected is:", startclustering()$BICAll$BICmodelselected, "\n")
bb <- paste("ICL model selected is:", startclustering()$ICLAll$ICLmodelselected, "\n")
cc <- paste("AIC model selected is:", startclustering()$AICAll$AICmodelselected, "\n")
dd <- paste("AIC3 model selected is:", startclustering()$AIC3All$AIC3modelselected, "\n")
paste(aa, bb, cc, dd, sep = "\n")
})
# Step III: visualize
# plot ICL value
output$ICLvalues <- renderPlot({
if (! is.null(startclustering))
if (length(startclustering()$loglikelihood) == 1) { # check if only one value
if(as.numeric(input$ngmax) == 1) { # check if only one value is because gmax = 1
plot(c(startclustering()$ICLAll$allICLvalues), type = "p",
xlab = "G", ylab = "ICL value",
main = paste("G vs ICL value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
} else { # check if only one value is because only one model is tested e.g., gmin = 4, gmax = 4
plot(c(rep(NA, as.numeric(input$ngmax) - 1), startclustering()$ICLAll$allICLvalues),
type = "p", xlab = "G", ylab = "ICL value",
main = paste("G vs ICL value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
}
} else { # ff more than one value
plot(x = c(as.numeric(input$ngmin):as.numeric(input$ngmax)),
y = startclustering()$ICLAll$allICLvalues, type = "l",
lty = 2, xlab = "G", ylab = "ICL value",
main = paste("G vs ICL value"), xaxt="n")
axis(1, at = seq(as.numeric(input$ngmin), as.numeric(input$ngmax), by = 1))
}
})
output$BICvalues <- renderPlot({
if (! is.null(startclustering))
if (length(startclustering()$loglikelihood) == 1) { # check if only one value
if(as.numeric(input$ngmax) == 1) { # check if only one value is because gmax = 1
plot(c(startclustering()$BICAll$allICLvalues), type = "p",
xlab = "G", ylab = "BIC value",
main = paste("G vs BIC value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
} else { # check if only one value is because only one model is tested e.g., gmin = 4, gmax = 4
plot(c(rep(NA, as.numeric(input$ngmax) - 1), startclustering()$BICAll$allBICvalues),
type = "p", xlab = "G", ylab = "BIC value",
main = paste("G vs BIC value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
}
} else { # ff more than one value
plot(x = c(as.numeric(input$ngmin):as.numeric(input$ngmax)),
y = startclustering()$BICAll$allBICvalues, type = "l",
lty = 2, xlab = "G", ylab = "BIC value",
main = paste("G vs BIC value"), xaxt="n")
axis(1, at = seq(as.numeric(input$ngmin), as.numeric(input$ngmax), by = 1))
}
})
output$AICvalues <- renderPlot({
if (! is.null(startclustering))
if (length(startclustering()$loglikelihood) == 1) { # check if only one value
if(as.numeric(input$ngmax) == 1) { # check if only one value is because gmax = 1
plot(c(startclustering()$AICAll$allAICvalues), type = "p",
xlab = "G", ylab = "AIC value",
main = paste("G vs AIC value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
} else { # check if only one value is because only one model is tested e.g., gmin = 4, gmax = 4
plot(c(rep(NA, as.numeric(input$ngmax) - 1), startclustering()$AICAll$allAICvalues),
type = "p", xlab = "G", ylab = "AIC value",
main = paste("G vs AIC value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
}
} else { # ff more than one value
plot(x = c(as.numeric(input$ngmin):as.numeric(input$ngmax)),
y = startclustering()$AICAll$allAICvalues, type = "l",
lty = 2, xlab = "G", ylab = "AIC value",
main = paste("G vs AIC value"), xaxt="n")
axis(1, at = seq(as.numeric(input$ngmin), as.numeric(input$ngmax), by = 1))
}
})
output$AIC3values <- renderPlot({
if (! is.null(startclustering))
if (length(startclustering()$loglikelihood) == 1) { # check if only one value
if(as.numeric(input$ngmax) == 1) { # check if only one value is because gmax = 1
plot(c(startclustering()$AIC3All$allAIC3values), type = "p",
xlab = "G", ylab = "AIC3 value",
main = paste("G vs AIC3 value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
} else { # check if only one value is because only one model is tested e.g., gmin = 4, gmax = 4
plot(c(rep(NA, as.numeric(input$ngmax) - 1), startclustering()$AIC3All$allAIC3values),
type = "p", xlab = "G", ylab = "AIC3 value",
main = paste("G vs AIC3 value"), xaxt="n")
axis(1, at = seq(as.numeric(0), as.numeric(input$ngmax), by = 1))
}
} else { # ff more than one value
plot(x = c(as.numeric(input$ngmin):as.numeric(input$ngmax)),
y = startclustering()$AIC3All$allAIC3values, type = "l",
lty = 2, xlab = "G", ylab = "AIC3 value",
main = paste("G vs AIC3 value"), xaxt="n")
axis(1, at = seq(as.numeric(input$ngmin), as.numeric(input$ngmax), by = 1))
}
})
# plot bar - ICL
barPlottingICL <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
if ((as.numeric(input$ngmax) - as.numeric(input$ngmin) + 1) == 1) {
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[1]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$ICLAll$ICLmodelselectedLabels),
printPlot = FALSE)
} else {
modelSelect <- which(seq(as.numeric(input$ngmin), as.numeric(input$ngmax), 1) == startclustering()$ICLAll$ICLmodelselected)
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[as.numeric(modelSelect)]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$ICLAll$ICLmodelselectedLabels),
printPlot = FALSE)
}
})
# plot bar - ICL
output$barPlotICL <- renderPlot({
barPlottingICL()
})
# plot bar - BIC
barPlottingBIC <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
if ((as.numeric(input$ngmax) - as.numeric(input$ngmin) + 1) == 1) {
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[1]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$BICAll$BICmodelselectedLabels),
printPlot = FALSE)
} else {
modelSelect <- which(seq(as.numeric(input$ngmin), as.numeric(input$ngmax), 1) == startclustering()$BICAll$BICmodelselected)
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[as.numeric(modelSelect)]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$BICAll$BICmodelselectedLabels),
printPlot = FALSE)
}
})
# plot bar - BIC
output$barPlotBIC <- renderPlot({
barPlottingBIC()
})
# plot bar - AIC3
barPlottingAIC3 <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
if ((as.numeric(input$ngmax) - as.numeric(input$ngmin) + 1) == 1) {
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[1]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$AIC3All$AIC3modelselectedLabels),
printPlot = FALSE)
} else {
modelSelect <- which(seq(as.numeric(input$ngmin), as.numeric(input$ngmax), 1) == startclustering()$AIC3All$AIC3modelselected)
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[as.numeric(modelSelect)]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$AIC3All$AIC3modelselectedLabels),
printPlot = FALSE)
}
})
# plot bar - AIC3
output$barPlotAIC3 <- renderPlot({
barPlottingAIC3()
})
# plot bar - AIC
barPlottingAIC <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
if ((as.numeric(input$ngmax) - as.numeric(input$ngmin) + 1) == 1) {
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[1]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$AICAll$AICmodelselectedLabels),
printPlot = FALSE)
} else {
modelSelect <- which(seq(as.numeric(input$ngmin), as.numeric(input$ngmax), 1) == startclustering()$AICAll$AICmodelselected)
MPLNClust::mplnVisualizeBar(
vectorObservations = 1:length(matrixInput()),
probabilities = as.matrix(startclustering()$allResults[[as.numeric(modelSelect)]]$probaPost),
clusterMembershipVector = as.numeric(startclustering()$AICAll$AICmodelselectedLabels),
printPlot = FALSE)
}
})
# plot bar - AIC
output$barPlotAIC <- renderPlot({
barPlottingAIC()
})
# Alluvial plot
alluvialPlotting <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
MPLNClust::mplnVisualizeAlluvial(nObservations = length(matrixInput()),
firstGrouping =
as.numeric(startclustering()$BICAll$BICmodelselectedLabels),
secondGrouping =
as.numeric(startclustering()$ICLAll$ICLmodelselectedLabels),
thirdGrouping =
as.numeric(startclustering()$AICAll$AICmodelselectedLabels),
fourthGrouping =
as.numeric(startclustering()$AIC3All$AIC3modelselectedLabels),
fileName = 'alluvial',
printPlot = FALSE)
})
alluvialPlotting2 <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
MPLNClust::mplnVisualizeAlluvial(nObservations = length(matrixInput()),
firstGrouping =
as.numeric(startclustering()$ICLAll$ICLmodelselectedLabels),
secondGrouping =
as.numeric(startclustering()$BICAll$BICmodelselectedLabels),
thirdGrouping =
as.numeric(startclustering()$AICAll$AICmodelselectedLabels),
fourthGrouping =
as.numeric(startclustering()$AIC3All$AIC3modelselectedLabels),
fileName = 'alluvial',
printPlot = FALSE)
})
alluvialPlotting3 <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
MPLNClust::mplnVisualizeAlluvial(nObservations = length(matrixInput()),
firstGrouping =
as.numeric(startclustering()$AIC3All$AIC3modelselectedLabels),
secondGrouping =
as.numeric(startclustering()$ICLAll$ICLmodelselectedLabels),
thirdGrouping =
as.numeric(startclustering()$BICAll$BICmodelselectedLabels),
fourthGrouping =
as.numeric(startclustering()$AICAll$AICmodelselectedLabels),
fileName = 'alluvial',
printPlot = FALSE)
})
alluvialPlotting4 <- eventReactive(eventExpr = input$button2, {
if (!is.null(startclustering))
MPLNClust::mplnVisualizeAlluvial(nObservations = length(matrixInput()),
firstGrouping =
as.numeric(startclustering()$AICAll$AICmodelselectedLabels),
secondGrouping =
as.numeric(startclustering()$AIC3All$AIC3modelselectedLabels),
thirdGrouping =
as.numeric(startclustering()$ICLAll$ICLmodelselectedLabels),
fourthGrouping =
as.numeric(startclustering()$BICAll$BICmodelselectedLabels),
fileName = 'alluvial',
printPlot = FALSE)
})
# Alluvial Plot
output$alluvialPlot <- renderPlot({
alluvialPlotting()
})
output$alluvialPlot2 <- renderPlot({
alluvialPlotting2()
})
output$alluvialPlot3 <- renderPlot({
alluvialPlotting3()
})
output$alluvialPlot4 <- renderPlot({
alluvialPlotting4()
})
# URLs for downloading data
url2 <- a("Download Example Dataset 1", href="https://drive.google.com/file/d/1Hbk6hH9QQ-3yR5YkgHCIUtlWkGWS5ipS/view?usp=sharing")
output$tab2 <- renderUI({
tagList("Dataset:", url2)
})
observeEvent(input$data1, {
# Show a modal when the button is pressed
shinyalert(title = "Example Dataset 1",
text = "This is a simulated dataset generated from mixtures of multivariate Poisson log-normal
distributions with G = 2 components. It has a size of n = 1000 observations along rows and d = 6
samples along columns. Data was generated January, 2022. To save the file, click on link, then click 'Download' from the top right side.
Citation: Silva, A., S. J. Rothstein, P. D. McNicholas, and S. Subedi (2019). A multivariate Poisson-log normal
mixture model for clustering transcriptome sequencing data. BMC Bioinformatics. 2019;20(1):394. URL https://pubmed.ncbi.nlm.nih.gov/31311497/",
type = "info")
})
}
# Create Shiny app ----
shinyApp(ui, server)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.