# PgR6M
#' @name PgR6M
#' @title PgR6 class with methods.
#' @description PgR6 with Methods. Final users should use \code{\link{pagoo}}
#' instead of this, since is more easy to understand.
#' Inherits: \code{\link[pagoo]{PgR6}}
#' @importFrom R6 R6Class
#' @import ggplot2
#' @import ggfortify
#' @importFrom shiny uiOutput sliderInput selectInput HTML fluidRow tabPanel splitLayout column renderUI observeEvent reactive eventReactive reactiveVal tags runApp
#' @importFrom shinyWidgets pickerInput addSpinner updatePickerInput
#' @importFrom shinydashboard dashboardSidebar dashboardPage dashboardHeader dashboardBody sidebarMenu infoBoxOutput tabBox box renderInfoBox infoBox
#' @importFrom DT DTOutput renderDT datatable formatSignif JS
#' @importFrom plotly plot_ly plotlyOutput renderPlotly layout add_lines subplot
#' @importFrom heatmaply heatmaply
#' @importFrom reshape2 melt
#' @importFrom vegan vegdist
#' @importFrom dendextend seriate_dendrogram
#' @importFrom magrittr `%>%`
#' @export
PgR6M <- R6Class('PgR6M',
inherit = PgR6,
public = list(
#' @description
#' Create a \code{PgR6M} object.
#' @param data A \code{data.frame} or \code{\link[S4Vectors:DataFrame-class]{DataFrame}} containing at least the
#' following columns: \code{gene} (gene name), \code{org} (organism name to which the gene belongs to),
#' and \code{cluster} (group of orthologous to which the gene belongs to). More columns can be added as metadata
#' for each gene.
#' @param org_meta (optional) A \code{data.frame} or \code{\link[S4Vectors:DataFrame-class]{DataFrame}}
#' containing additional metadata for organisms. This \code{data.frame} must have a column named "org" with
#' valid organisms names (that is, they should match with those provided in \code{data}, column \code{org}), and
#' additional columns will be used as metadata. Each row should correspond to each organism.
#' @param cluster_meta (optional) A \code{data.frame} or \code{\link[S4Vectors:DataFrame-class]{DataFrame}}
#' containing additional metadata for clusters. This \code{data.frame} must have a column named "cluster" with
#' valid organisms names (that is, they should match with those provided in \code{data}, column \code{cluster}), and
#' additional columns will be used as metadata. Each row should correspond to each cluster.
#' @param core_level The initial core_level (that's the percentage of organisms a core cluster must be in to be
#' considered as part of the core genome). Must be a number between 100 and 85, (default: 95). You can change it
#' later by using the \code{$core_level} field once the object was created.
#' @param sep A separator. By default is '__'(two underscores). It will be used to
#' create a unique \code{gid} (gene identifier) for each gene. \code{gid}s are created by pasting
#' \code{org} to \code{gene}, separated by \code{sep}.
#' @param verbose \code{logical}. Whether to display progress messages when loading class.
#' @param DF Deprecated. Use \code{data} instead.
#' @param group_meta Deprecated. Use \code{cluster_meta} instead.
#' @return An R6 object of class PgR6M. It contains basic fields and methods for analyzing a pangenome. It also
#' contains additional statistical methods for analyze it, and methods to make basic
#' exploratory plots.
initialize = function(data,
org_meta,
cluster_meta,
core_level = 95,
sep = '__',
verbose = TRUE,
DF,
group_meta){
# Deprecated args
if (!missing(DF)){
data <- DF
warning('Argument "DF" is deprecated. You should use "data" instead. ',
'This still exists for compatibility with previous versions, ',
'but will throw an error in the future.',
immediate. = TRUE, noBreaks. = TRUE)
}
if (!missing(group_meta)){
cluster_meta <- group_meta
warning('Argument "group_meta" is deprecated. You should use "cluster_meta" instead. ',
'This still exists for compatibility with previous versions, ',
'but will throw an error in the future.',
immediate. = TRUE, noBreaks. = TRUE)
}
super$initialize(data = data,
org_meta,
cluster_meta,
core_level = core_level,
sep = sep,
verbose = verbose)
},
#Analyses Methods
#' @description
#' Rarefact pangenome or corgenome. Compute the number of genes which belong to
#' the pangenome or to the coregenome, for a number of random permutations of
#' increasingly bigger sample of genomes.
#' @param what One of \code{"pangenome"} or \code{"coregenome"}.
#' @param n.perm The number of permutations to compute (default: 10).
#' @return A \code{matrix}, rows are the number of genomes added, columns are
#' permutations, and the cell number is the number of genes in each category.
rarefact = function(what = 'pangenome', n.perm = 10){
what <- match.arg(what, c('pangenome', 'coregenome'))
pm <- self$pan_matrix
pm[which(pm>1L, arr.ind = TRUE)] <- 1L
norgs <- dim(self$organisms)[1]
rmat <- matrix(0L, nrow = norgs, ncol = n.perm)
if (what=='pangenome'){
for (i in seq_len(n.perm)){
cm <- apply(pm[sample(norgs), ], 2, cumsum)
rmat[, i] <- rowSums(cm > 0)
}
}else{
sq <- seq_len(norgs)
for (i in seq_len(n.perm)){
cm <- apply(pm[sample(norgs), ], 2, cumsum)
cr <- apply(cm, 2, `==`, sq)
rmat[, i] <- rowSums(cr)
}
}
rownames(rmat) <- seq_len(norgs)
colnames(rmat) <- paste0('permut_', seq_len(n.perm))
rmat
},
#' @description
#' Compute distance between all pairs of genomes. The default dist method is
#' \code{"bray"} (Bray-Curtis distance). Another used distance method is \code{"jaccard"},
#' but you should set \code{binary = FALSE} (see below) to obtain a meaningful result.
#' See \code{\link[vegan]{vegdist}} for details, this is just a wrapper function.
#' @param method The distance method to use. See \link[vegan]{vegdist}
#' for available methods, and details for each one.
#' @param binary Transform abundance matrix into a presence/absence
#' matrix before computing distance.
#' @param diag Compute diagonals.
#' @param upper Return only the upper diagonal.
#' @param na.rm Pairwise deletion of missing observations when
#' computing dissimilarities.
#' @param ... Other parameters. See \link[vegan]{vegdist} for details.
#' @return A \code{dist} object containing all pairwise dissimilarities between genomes.
dist = function(method = 'bray',
binary = FALSE,
diag = FALSE,
upper = FALSE,
na.rm = FALSE,
...){
METHODS <- c("manhattan","euclidean","canberra","bray",
"kulczynski","jaccard","gower","altGower",
"morisita","horn","mountford","raup",
"binomial","chao","cao","mahalanobis")
method <- match.arg(method, METHODS)
if (method == 'jaccard' & binary == FALSE){
warning('It is recommended to set binary = TRUE when running dist(method = "jaccard")')
}
#vegan::vegdist()
vegan::vegdist(self$pan_matrix,
method = method,
diag = diag,
upper = upper,
na.rm = na.rm,
...)
},
#' @description
#' Performs a principal components analysis on the panmatrix
#' @param center a logical value indicating whether the variables should be shifted
#' to be zero centered. Alternately, a vector of length equal the number of columns of x can be
#' supplied. The value is passed to scale.
#' @param scale. a logical value indicating whether the variables should be scaled
#' to have unit variance before the analysis takes place. The default is TRUE.
#' @param ... Other arguments. See \link[stats]{prcomp}
#' @return Returns a list with class "prcomp". See \link[stats]{prcomp} for more information.
pan_pca = function(center = TRUE, scale. = FALSE, ...){
prcomp(self$pan_matrix, center = center, scale. = scale., ...)
},
#' @description
#' Fits a power law curve for the pangenome rarefaction simulation.
#' @param raref (Optional) A rarefaction matrix, as returned by \code{rarefact()}.
#' @param ... Further arguments to be passed to \code{rarefact()}. If \code{raref}
#' is missing, it will be computed with default arguments, or with the ones provided here.
#' @return A \code{list} of two elements: \code{$formula} with a fitted function, and \code{$params}
#' with fitted parameters. An attribute \code{"alpha"} is also returned (If
#' \code{alpha>1}, then the pangenome is closed, otherwise is open.
pg_power_law_fit = function(raref, ...){
# #micropan::heaps()
# heaps(self$pan_matrix ,n.perm = n.perm)
if (missing(raref)){
raref <- self$rarefact(...)
}
rm <- melt(raref)
# Power law linearization:
# y = K * x ^ delta ==> log(y) = log(K) + delta * log(x)
fitHeaps <- lm(log(rm$value) ~ log(rm$Var1))
logK <- summary(fitHeaps)$coef[1]
delta <- summary(fitHeaps)$coef[2]
K <- exp(logK)
alpha <- 1-delta
ret <- list(formula=NULL,
params=NULL)
ret$formula <- function(x) K * x ^ delta
ret$params <- c(K = K, delta = delta)
attr(ret, 'alpha') <- alpha
ret
},
#' @description
#' Fits an exponential decay curve for the coregenome rarefaction simulation.
#' @param raref (Optional) A rarefaction matrix, as returned by \code{rarefact()}.
#' @param pcounts An integer of pseudo-counts. This is used to better fit the function
#' at small numbers, as the linearization method requires to subtract a constant C, which is the
#' coregenome size, from \code{y}. As \code{y} becomes closer to the coregenome size, this operation
#' tends to 0, and its logarithm goes crazy. By default \code{pcounts=10}.
#' @param ... Further arguments to be passed to \code{rarefact()}. If \code{raref}
#' is missing, it will be computed with default arguments, or with the ones provided here.
#' @return A \code{list} of two elements: \code{$formula} with a fitted function, and \code{$params}
#' with fitted intercept and decay parameters.
cg_exp_decay_fit = function(raref, pcounts = 10, ...){
# Exponential decay linearization:
# y = A * exp(K * t) + C ==> y - C = K*t + log(A)
# C == core size
if (missing(raref)){
raref <- self$rarefact(what = 'core', ...)
}
rm <- melt(raref)
C <- min(rm$value)
fitExp <- lm(log(value - C + pcounts) ~ Var1, data= rm)
A <- exp(summary(fitExp)$coef[1])
B <- summary(fitExp)$coef[2]
ret <- list(formula=NULL,
params=NULL)
ret$formula <- function(x) A * exp(B * x) + C
ret$params <- c(A = A, B = B, C = C)
attr(ret, 'pseudo_counts') <- pcounts
ret
},
# @description
# Computes the genomic fluidity, which is a measure of population
# diversity. See \code{\link[micropan]{fluidity}} for more details.
# @param n.sim An integer specifying the number of random samples
# to use in the computations.
# @return A list with two elements, the mean fluidity and its sample standard
# deviation over the n.sim computed values.
# fluidity = function(n.sim = 10){
# #micropan::fluidity()
# fluidity(self$pan_matrix, n.sim = n.sim)
# },
# @description
# Fits binomial mixture models to the data given as a pan-matrix. From the
# fitted models both estimates of pan-genome size and core-genome size are
# available. See \code{\link[micropan]{binomixEstimate}} for more details.
# @param K.range The range of model complexities to explore. The
# vector of integers specify the number of binomial densities to combine in the
# mixture models.
# @param core.detect.prob The detection probability of core genes.
# This should almost always be 1.0, since a core gene is by definition always
# present in all genomes, but can be set fractionally smaller.
# @param verbose Logical indicating if textual output should be
# given to monitor the progress of the computations.
# @return A \code{Binomix} object (\code{micropan} package), which is a small (S3)
# extension of a \code{list} with two components. These two components are named
# \code{BIC.table} and \code{Mix.list}. Refer to the \code{micropan} function
# \code{\link[micropan]{binomixEstimate}} for a detailed explanation of this
# method.
# binomix_estimate = function(K.range = 3:5,
# core.detect.prob = 1,
# verbose = TRUE){
# # micropan::binomxEstimate()
# binomixEstimate(pan.matrix = self$pan_matrix,
# K.range = K.range,
# core.detect.prob = core.detect.prob,
# verbose = verbose)
# },
#Plot Methods
#' @description
#' Plot a barplot with the frequency of genes within the total number of
#' genomes.
#' @return A barplot, and a \code{gg} object (\code{ggplot2} package) invisibly.
gg_barplot = function(){
pm <- self$pan_matrix
pm[which(pm > 0, arr.ind = TRUE)] <- 1L
dd <- as.data.frame(table(colSums(pm)))
dd$Var1 <- as.integer(as.character(dd$Var1))
ggplot(dd, aes(x=Var1, y=Freq)) +
geom_bar(stat='identity') +
ylab('Number of genes') +
xlab('Number of genomes')
},
#' @description
#' Plot a pangenome binary map representing the presence/absence of each
#' gene within each organism.
#' @return A binary map (\code{ggplot2::geom_raster()}), and a \code{gg} object (\code{ggplot2}
#' package) invisibly.
gg_binmap = function(){
tpm <- t(self$pan_matrix)
tpm[which(tpm > 0, arr.ind = TRUE)] <- 1L
bm <- as.data.frame(tpm)
or <- order(rowSums(bm), decreasing = TRUE)
lvls <- rownames(bm)[or]
bm$Cluster <- factor(rownames(bm), levels = lvls)
bm <- melt(bm, 'Cluster')
bm$value <- factor(bm$value, levels = c(1, 0))
colnames(bm)[which(colnames(bm) == 'variable')] <- "Organism"
ggplot(bm, aes(Cluster, Organism, fill=value)) +
geom_raster() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
scale_fill_grey(start = .2, end = .9)
},
#' @description
#' Plot a heatmap showing the computed distance between all pairs of organisms.
#' @param method Distance method. One of "Jaccard" (default), or "Manhattan",
#' see above.
#' @param ... More arguments to be passed to \code{\link[micropan]{distManhattan}}.
#' @return A heatmap (\code{ggplot2::geom_tile()}), and a \code{gg} object (\code{ggplot2}
#' package) invisibly.
gg_dist = function(method = 'bray', ...){
m <- as.matrix(self$dist(method = method, ...))
me <- melt(m)
ggplot(me, aes(Var1, Var2, fill=value)) +
geom_tile()
},
#' @description
#' Plot a scatter plot of a Principal Components Analysis.
#' @param colour The name of the column in \code{$organisms} field from which points will take
#' colour (if provided). \code{NULL} (default) renders black points.
#' @param ... More arguments to be passed to \code{ggplot2::autoplot()}.
#' @return A scatter plot (\code{ggplot2::autoplot()}), and a \code{gg} object (\code{ggplot2}
#' package) invisibly.
gg_pca = function(colour = NULL, ...){
ocol <- colnames(self$organisms)
ocol <- ocol[ocol!='org']
color <- match.arg(colour, c(ocol, NULL))
pca <- self$pan_pca(...)
autoplot(pca,
data = as.data.frame(self$organisms),
colour = colour,
...)
},
#' @description
#' Plot a pie chart showing the number of clusters of each pangenome category: core,
#' shell, or cloud.
#' @return A pie chart (\code{ggplot2::geom_bar() + coord_polar()}), and a \code{gg} object
#' (\code{ggplot2} package) invisibly.
gg_pie = function(){
st <- self$summary_stats[-1, ]
# st <- st[-1, ]
st$Category <- factor(st$Category,
levels = c("Cloud", "Shell", "Core"))
ggplot(as.data.frame(st), aes(x='', y=Number, fill=Category)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0)
},
#' @description
#' Plot pangenome and/or coregenome curves with the fitted functions returned by \code{pg_power_law_fit()}
#' and \code{cg_exp_decay_fit()}. You can add points by adding \code{+ geom_points()}, of ggplot2 package
#' @param what One of \code{"pangenome"} or \code{"coregenome"}.
#' @param ... ????
#' @return A scatter plot, and a \code{gg} object (\code{ggplot2} package) invisibly.
gg_curves = function(what = c('pangenome', 'coregenome'),
...){
what <- match.arg(what, c('pangenome', 'coregenome'), several.ok = TRUE)
names(what) <- what
lrar <- lapply(what, function(x) self$rarefact(what = x))
lfun <- lapply(what, function(x){
if (x == 'pangenome'){
self$pg_power_law_fit(raref = lrar[[x]])#$formula
}else{
self$cg_exp_decay_fit(raref = lrar[[x]])#$formula
}
})
lrarm <- lapply(lrar, melt)
ll <- lapply(what, function(x) {
olst <- list(data = NULL, formula = NULL)
lrarm[[x]]$Category <- x
lrarm[[x]]
})
data <- Reduce(rbind, ll)
#plot
g <- ggplot(data, aes(x=Var1, y=value, colour=Category)) +
xlab('Number of genomes') +
ylab('Number of clusters')
for (i in seq_along(what)){
g <- g +
stat_function(data = data[which(data$Category == what[i]), ],
fun = lfun[[what[i]]]$formula)
}
g
},
#################
## ##
## SHINY APP ##
## ##
#################
#' @description
#' Launch an interactive shiny app. It contains a sidebar
#' with controls and switches to interact with the pagoo
#' object. You can drop/recover organisms from the dataset,
#' modify the core_level, visualize statistics, plots, and
#' browse cluster and gene information. In the main body, it
#' contains 2 tabs to switch between summary statistics plots
#' and core genome information on one side, and accessory
#' genome plots and information on the other.
#'
#' The lower part of each tab contains two tables, side by
#' side. On the "Summary" tab, the left one contain
#' information about core clusters, with one cluster per row.
#' When one of them is selected (click), the one on the right
#' is updated to show information about its genes (if
#' provided), one gene per row. On the "Accessory" tab, a
#' similar configuration is shown, but on this case only
#' accessory clusters/genes are displayed. There is a slider
#' on the sidebar where one can select the accessory
#' frequency range to display.
#'
#' Give it a try!
#'
#' Take into account that big pangenomes can slow down the
#' performance of the app. More than 50-70 organisms often
#' leads to a delay in the update of the plots/tables.
runShinyApp = function(){
pg <- self
app <- list(ui = dashboardPage(
header = dashboardHeader(
title = "Pagoo Shiny App",
titleWidth = 250
),
sidebar = dashboardSidebar(width = 250,
sidebarMenu(
# uiOutput("select_organisms"),
pickerInput(inputId = "organisms",
label = "Organisms",
choices = as.character(pg$.__enclos_env__$private$.organisms$org),
selected = as.character(pg$organisms$org),
multiple = TRUE,
options = list(`actions-box` = TRUE),
width = "100%"),
# uiOutput("select_variable"),
pickerInput(inputId = "variable",
label = "Variable",
choices = c("\a", colnames(pg$.__enclos_env__$private$.organisms)[-1]),
# selected = all_vars[1],
multiple = FALSE,
width = "100%"),
uiOutput("select_category"),
sliderInput("core_level",
"Core Level",
min=85,
max=100,
value=95),
uiOutput(outputId = "accs_slider"),
selectInput("color_meta_selected",
label = "Color by (select metadata):",
choices = c("\a", colnames(pg$.__enclos_env__$private$.organisms)[-1]))
)
),
body = dashboardBody(
tags$head(tags$style(HTML('.info-box {min-height: 45px;}
.info-box-icon {height: 45px; line-height: 45px;}
.info-box-content {padding-top: 0px; padding-bottom: 0px;}
.selectize-dropdown {z-index: 10000 !important;}'))),
tags$script(HTML("$('body').addClass('fixed');")),
fluidRow(
infoBoxOutput("num_orgs"),
infoBoxOutput("num_clus"),
infoBoxOutput("num_gene")
),
fluidRow(
tabBox(width = 12,
tabPanel(
title = "Summary",
fluidRow(
box(
title = "Core number",
status = "primary",
solidHeader = T,
width = 3,
height = 500,
# align = "center",
# DT::DTOutput("summary_counts"),
addSpinner(plotlyOutput("core_evol", width = "auto"))
),
box(
title = "Summary",
status = "primary",
solidHeader = T,
width = 3,
height = 500,
#offset = 0,
addSpinner(plotlyOutput("pie"))
),
box(
title = "Frequency plot",
status = "primary",
solidHeader = T,
width = 3,
height = 500,
#offset = 0,
addSpinner(plotlyOutput("barplot"))
),
box(
title = "Pangenome curves",
status = "primary",
solidHeader = T,
width = 3,
height = 500,
#offset = 0,
addSpinner(plotlyOutput("curves"))
)
),
# hr(),
fluidRow(
box(
title = "Core clusters",
status = "primary",
solidHeader = T,
width = 6,
addSpinner(DT::DTOutput("core_clusters"))
),
box(
title = "Core genes",
status = "primary",
solidHeader = T,
width = 6,
addSpinner(DT::DTOutput("core_genes"))
)
)
), # Ends tabPanel "Summary"
tabPanel(
title = "Accessory",
fluidRow(
box(
title = "Gene presence/absence",
status = "primary",
solidHeader = T,
height = 500,
addSpinner(plotlyOutput("heat_freq", height = 420))
),
box(
title = "PCA",
status = "primary",
solidHeader = T,
height = 500,
splitLayout(
addSpinner(DT::DTOutput("pca_summary")),
column(width = 12,
uiOutput("pca_x"),
uiOutput("pca_y"),
style="z-index:1002;"
),
cellWidths = c("80%", "20%")
),
box(addSpinner(plotlyOutput("pca", height = 250)), width = 12)
),
),
fluidRow(
box(
title = "Accessory clusters",
status = "primary",
solidHeader = T,
width = 6,
addSpinner(DT::DTOutput("accs_clusters"))
),
box(
title = "Accessory genes",
status = "primary",
solidHeader = T,
width = 6,
DT::DTOutput("accs_genes")
)
)
) # Ends tabPanel "Accessory"
)
)
)
# Ends tabPanel
),
server = function(input, output, session){
#############
## OPTIONS ##
#############
dataorg <- as.data.frame(pg$.__enclos_env__$private$.organisms)
all_orgs <- as.character(dataorg$org)
orgs <- reactiveVal(as.character(pg$organisms$org))
all_vars <- colnames(dataorg)[-1]
output$select_category <- renderUI(
pickerInput(inputId = "category",
label = "Category",
choices = "\a",
selected = "\a",
multiple = TRUE,
options = list(`actions-box` = TRUE),
width = "100%")
)
observeEvent(input$organisms, {
orgs(input$organisms)
})
updateOrganisms <- reactive({
ava_orgs <- as.character(pg$organisms[["org"]])
inp_orgs <- orgs()
toDrop <- ava_orgs[! ava_orgs %in% inp_orgs]
toReco <- inp_orgs[! inp_orgs %in% ava_orgs]
pg$drop(toDrop)
pg$recover(toReco)
})
observeEvent(input$variable, {
# updateOrganisms()
meta <- unique(as.character(dataorg[[req(input$variable)]]))
updatePickerInput(session = session,
inputId = "category",
label = "Category",
choices = meta,
selected = meta)
})
observeEvent(input$category, {
# updateOrganisms()
var <- input$variable
# orgs <- as.character(pg$organisms$org)
nw <- all_orgs[dataorg[[var]] %in% req(input$category)]
updatePickerInput(
session = session,
inputId = "organisms",
label = "Organisms",
choices = all_orgs,
selected = nw
)
})
updateCoreLevel <- eventReactive(input$core_level, {
pg$core_level <- input$core_level
pg
})
accs_level <- reactiveVal(value = pg$core_level - 1L)
observeEvent(input$core_level, {
accs_level(input$core_level-1L)
})
output$num_orgs <- renderInfoBox({
updateOrganisms()
x <- dim(pg$organisms)[1]
infoBox("Number of organisms", x)
})
output$num_clus <- renderInfoBox({
updateOrganisms()
x <- dim(pg$clusters)[1]
infoBox("Number of clusters", x, color = "purple")
})
output$num_gene <- renderInfoBox({
updateOrganisms()
x <- sum(pg$pan_matrix)
infoBox("Number of genes", x, color = "yellow")
})
#############
## SUMMARY ##
#############
output$core_evol <- renderPlotly({
updateOrganisms()
pm <- pg$pan_matrix
pm[which(pm>1L, arr.ind = TRUE)] <- 1L
sq <- seq(1, 0.85, -0.01)
cs <- colSums(pm)
norg <- dim(pm)[1]
ev <- sapply(sq, function(x){
length(which( cs >= floor(norg*x) ))
})
data <- data.frame(core_level = sq*100, core_number = ev)
plot_ly(data, x = ~core_level, y = ~core_number,
type = "scatter",
mode = "markers", height = 420) %>%
layout(xaxis = list(title = "Core Level"),
yaxis = list(title = "Core Number"))
})
output$pie <- renderPlotly({
# pg$core_level <- input$core_level
updateCoreLevel()
updateOrganisms()
st <- as.data.frame(pg$summary_stats[-1, ])
st$Category <- factor(st$Category, levels = c("Cloud", "Shell", "Core"))
plot_ly(st, labels = ~Category, values = ~Number,
type = "pie",
showlegend=FALSE,height = 400,
textposition = 'inside') %>%
layout(margin = list(
l = 20,
r = 20,
b = 0,
t = 20,
pad = 4
))
})
# Initialize accs_pm in order to not have to wait until user clicks on
# accessory tab.
pm <- pg$pan_matrix
norgs <- length(pg$organisms$org)
pmb <- pm
pmb[which(pmb>1L, arr.ind = TRUE)] <- 1L
accs_freq <- c(0, pg$core_level-1)
accs_num <- round(accs_freq * norgs / 100)
clsu <- colSums(pmb)
wh <- which( clsu >= min(accs_num) & clsu <= max(accs_num))
colnames(pmb)[wh]
accs_pm_i <- pm[, wh, drop = FALSE]
accs_pm <- reactiveVal(value = accs_pm_i)
observeEvent({
input$accs_freq
input$organisms
},{
updateOrganisms()
pm <- pg$pan_matrix
norgs <- length(pg$organisms$org)
pmb <- pm
pmb[which(pmb>1L, arr.ind = TRUE)] <- 1L
accs_freq <- req(input$accs_freq)
accs_num <- floor(accs_freq * norgs / 100)
clsu <- colSums(pmb)
wh <- which( clsu >= min(accs_num) & clsu <= max(accs_num))
colnames(pmb)[wh]
accs_pm(pm[, wh, drop = FALSE])
}, ignoreNULL = TRUE)
output$barplot <- renderPlotly({
updateOrganisms()
pm <- pg$pan_matrix
pm[which(pm > 0, arr.ind = TRUE)] <- 1L
dd <- as.data.frame(table(colSums(pm)))
colnames(dd) <- c("Count", "Frequency")
plot_ly(dd,
x = ~Count, y = ~Frequency,
type = "bar", height = 420) %>%
layout(xaxis = list(title = "Number of genomes"),
yaxis = list(title = 'Number of genes'))
})
output$curves <- renderPlotly({
updateOrganisms()
updateCoreLevel()
what <- c('pangenome', 'coregenome')
names(what) <- what
lrar <- lapply(what, function(x) pg$rarefact(what = x))
lfun <- lapply(what, function(x){
if (x == 'pangenome'){
pg$pg_power_law_fit(raref = lrar[[x]])#$formula
}else{
pg$cg_exp_decay_fit(raref = lrar[[x]])#$formula
}
})
lrarm <- lapply(lrar, melt)
ll <- lapply(what, function(x) {
lrarm[[x]]$category <- x
lrarm[[x]]
})
data <- Reduce(rbind, ll)
norgs <- dim(pg$organisms)[1]
interv <- (norgs - 1) / 512
interp <- seq(1, norgs, interv)
plot_ly(data,
x=~Var1,
y=~value,
color = ~category,
type="scatter",
mode="markers",
marker = list(opacity = 0.3),
showlegend = FALSE,
height = 420) %>%
add_lines(x = interp, y = lfun$pangenome$formula(interp), color = "pangenome") %>%
add_lines(x = interp, y = lfun$coregenome$formula(interp), color = "coregenome") %>%
layout(xaxis = list(title = "Number of genomes"),
yaxis = list(title = "Number of genes"))
})
output$core_clusters <- DT::renderDT({
updateOrganisms()
updateCoreLevel()
data <- as.data.frame(pg$core_clusters)
datatable(data,
selection = list(mode = "single", selected = 1),
options = list(
rownames = FALSE,
paging = FALSE,
# autoWidth = T,
scrollX = T,
scrollY = "200px",
# scrollCollapse = T,
pageLength = 3,
dom ='ft'
))
})
output$core_genes <- DT::renderDT({
updateOrganisms()
updateCoreLevel()
selected_cluster <- req(input$core_clusters_rows_selected)
data <- as.data.frame(pg$core_genes[[selected_cluster]])
tgt <- which(sapply(data, function(x) max(nchar(as.character(x), allowNA = T, type = "width")) )>=26)
datatable(data,
selection = "none",
options = list(
rownames = FALSE,
paging = FALSE,
scrollX = TRUE,
scrollY = "200px",
dom ='ft',
autoWidth = TRUE,
columnDefs = list(list(
targets = unname(tgt),
render = JS(
"function(data, type, row, meta) {",
"return type === 'display' && data.length > 30 ?",
"'<span title=\"' + data + '\">' + data.substr(0, 26) + '...</span>' : data;",
"}"),
width = "200px"
))
))
})
#############
# ACCESSORY #
#############
output$accs_slider <- renderUI({
# corelev <- input$core_level
sliderInput("accs_freq",
"Accessory frequency (%)",
min = 0,
max = accs_level(),
value=c(0, accs_level()))
})
output$pca_x <- renderUI({
updateOrganisms()
norgs <- dim(pg$organisms)[1]
selectInput("xpc",
label = "X-axis PC",
choices = paste0("PC", seq_len(norgs)),
selected = "PC1",
multiple = FALSE,
width = "100px")
})
output$pca_y <- renderUI({
updateOrganisms()
norgs <- dim(pg$organisms)[1]
selectInput("ypc",
label = "Y-axis PC",
choices = paste0("PC", seq_len(norgs)),
selected = "PC2",
multiple = FALSE,
width = "100px")
})
pca <- reactive({
pm <- accs_pm()
prcomp(pm)
})
output$pca <- renderPlotly({
updateOrganisms()
data <- as.data.frame(pca()$x)
xax <- req(input$xpc)
yax <- req(input$ypc)
data$xx <- data[[xax]]
data$yy <- data[[yax]]
opts <- list()
# color_by <- input$color_meta_selected
if (colorBy() != "\a"){
cls <- pg$organisms[[colorBy()]]
ln <- length(levels(cls))
clrs <- colorSet(ln)
opts <- c(opts, list(color = cls, colors = clrs))
}
opts <- c(opts, list(
data = data,
x = ~xx,
y = ~yy,
text = ~rownames(data),
mode = "markers",
marker = list(size = 12),
type = "scatter"
))
p <- do.call(plot_ly, opts)
p %>% layout(xaxis = list(title = xax), yaxis = list(title = yax))
})
output$pca_summary <- DT::renderDT({
updateOrganisms()
PCA <- pca()
sumpca <- summary(PCA)$importance
datatable(sumpca,
selection = "none",
extensions = "FixedColumns",
options = list(
pageLength = 3,
lengthChange = FALSE,
searching = FALSE,
scrollX = TRUE,
dom = "t",
# bsort = FALSE,
fixedColumns = list(leftColumns = 1),
autoWidth = TRUE,
columnDefs = list(list(width = '150px', targets = 0),
list(width = '40px', targets = 1:dim(sumpca)[2]))
)) %>% formatSignif(columns = T, digits = 3)
}, rownames = TRUE)
colorSet <- function(n) {
colors <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F",
"#E5C494", "#B3B3B3", "#1B9E77", "#D95F02", "#7570B3", "#E7298A",
"#66A61E", "#E6AB02", "#A6761D", "#666666")
colors[seq_len(n)]
}
colorBy <- reactiveVal("\a")
observeEvent(input$color_meta_selected, {
colorBy(input$color_meta_selected)
})
output$heat_freq <- renderPlotly({
updateOrganisms()
pm <- accs_pm()
pm[which(pm>1L, arr.ind = TRUE)] <- 1L
opts <- list()
# color_by <- input$color_meta_selected
# if (is.null(color_by)) color_by <- "\a"
if (colorBy() != "\a"){
cls <- as.data.frame(pg$organisms[colorBy()])
rownames(cls) <- as.character(pg$organisms$org)
opts <- c(opts, list(row_side_colors = cls, row_side_palette=colorSet))
}
# wh <- updateClusterList()
# pm <- pg$pan_matrix[, wh, drop = FALSE]
# wh <- updateClusterList()
# pm <- pm[, wh, drop=FALSE]
csm <- colSums(pm)
spl <- split(names(csm), csm)
tpm <- t(pm)
norder <- lapply(spl, function(x) {
if (length(x)<2){
x
}else{
d <- vegdist(tpm[x, , drop=F], method = "jaccard", binary = T, na.rm = T)
hc <- hclust(d, "single")
x[hc$order]
}
})
norder <- norder[order(as.integer(names(norder)), decreasing = T)]
forder <- unlist(norder)
pm <- pm[, forder, drop = F]
d <- vegdist(pm, method = "jaccard")
hc <- as.dendrogram(hclust(d))
dend <- dendextend::seriate_dendrogram(hc, d)
## HEATMAP
opts <- c(opts,
list(x = pm,
colors = c("#F7FBFF", "#08306B"),
Colv = FALSE,
Rowv = dend,
margin = c(0,0,0,0),
# distfun = vegan::vegdist,
# dist_method = "jaccard",
hide_colorbar = TRUE,
showticklabels = FALSE))
p1 <- do.call(heatmaply, opts)
## BARPLOT
rsum <- rowSums(pm)
odend <- rev(order.dendrogram(dend))
bar <- data.frame(org = factor(names(rsum),
levels = names(rsum)[odend]),
Count = rsum,
row.names = NULL)
# bar <- rsdata[hc$order, ]
p2 <- plot_ly(#data = bar,
y = ~bar$org,
x = ~bar$Count,
type = "bar",
orientation="h") %>%
layout(xaxis = list(autorange='reversed', showticklabels=FALSE),
yaxis = list(showticklabels = FALSE))
subplot(p2, p1, widths = c(.3,.7))
})
output$accs_clusters <- DT::renderDT({
updateOrganisms()
# wh <- updateClusterList()
wh <- colnames(accs_pm())
clust <- pg$clusters
ma <- match(wh, clust$cluster)
data <- as.data.frame(clust[ma, ,drop=FALSE])
datatable(data,
selection = list(mode = "single", selected = 1),
options = list(
rownames = FALSE,
paging = FALSE,
# autoWidth = T,
scrollX = T,
scrollY = "200px",
# scrollCollapse = T,
pageLength = 3,
dom ='ft'
))
})
output$accs_genes <- DT::renderDT({
updateOrganisms()
accs_rows <- req(input$accs_clusters_rows_selected)
wh <- colnames(accs_pm())
selected_cluster <- wh[accs_rows]
data <- as.data.frame(pg$genes[[selected_cluster]])
# chf <- which(lapply(data, class) %in% c("character", "factor"))
tgt <- which(sapply(data, function(x) max(nchar(as.character(x), allowNA = T, type = "width")) )>=26)
datatable(data,
selection = "none",
options = list(
rownames = FALSE,
paging = FALSE,
scrollX = TRUE,
scrollY = "200px",
dom ='ft',
autoWidth = TRUE,
columnDefs = list(list(
targets = unname(tgt),
render = JS(
"function(data, type, row, meta) {",
"return type === 'display' && data.length > 30 ?",
"'<span title=\"' + data + '\">' + data.substr(0, 26) + '...</span>' : data;",
"}"),
width = "200px"
))
))
})
})
runApp(app)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.