Nothing
#' MixviR Shiny Dashboard
#'
#' Open dashboard to explore mutation data generated with `call_mutations()`.
#' @param muts.df A data frame (produced by `call_mutations()`) storing mutation information for samples to analyze. Must contain columns SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, & DP. Alternatively, the mutation data can be read in from a (tab-separated) file with the `read.muts.from()` argument. See also the *write.mut.table* argument in `call_mutations()`.
#' @param read.muts.from An alternative to *muts.df* for providing mutation input. If a data frame generated by `call_mutations()` was previously written to a (comma separated) file (see *write.mut.table* in `call_mutations()`), the mutation data can be read in from that file by providing its path. The fields "SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, DP" must be present (additional fields will be ignored).
#' @param dates Path to optional csv file with cols "SAMP_NAME", "LOCATION", and "DATE". Sample names need to match those in the *muts.df* data frame created by `call_mutations()`. Dates should be provided in the format *mmddyyyy*.
#' @param lineage.muts Path to optional csv file with required cols "Gene", "Mutation", and "Lineage" defining mutations associated with lineages of interest. See example file at "https://github.com/mikesovic/MixviR/blob/main/mutation_files/outbreak_20211202.csv". Additional columns will be ignored.
#' @param all.target.muts Logical to indicate whether results for all target mutations were written to the output of the `call_mutations()` function. See *write.all.targets* option in `call_mutations()`. Default FALSE. If TRUE, more informative sequencing depth information can be provided in the dashboard.
#' @keywords shiny
#' @return Shiny Dashboard to Explore Data
#' @importFrom magrittr %>%
#' @export
#' @examples
#' if (interactive()) {explore_mutations(read.muts.from = system.file("extdata",
#' "sample_mutations.csv",
#' package = "MixviR"),
#' lineage.muts = system.file("extdata",
#' "example_lineage_muts.csv",
#' package = "MixviR"))}
#' if (interactive()) {explore_mutations(read.muts.from = system.file("extdata",
#' "sample_mutations.csv",
#' package = "MixviR"),
#' dates = system.file("extdata",
#' "example_location_date.csv",
#' package = "MixviR"),
#' lineage.muts = system.file("extdata",
#' "example_lineage_muts.csv",
#' package = "MixviR"))}
##Which arguments are defined determine what tabs are available in the Shiny dashboard.
##If dates and lineage.def are both defined, get all 4 tabs, with associated lineages in tables.
##If lineage.def is defined but dates is not, get plots for individual samples with all lineages along x-axis and mutation table without dates.
##If dates are defined but lineages aren't, get new mutations table, mutation freq plot, and mutations table (with no lineages).
##If neither are defined, get just mutations table with no lineages.
explore_mutations <- function(muts.df,
dates = NULL,
lineage.muts = NULL,
read.muts.from = NULL,
all.target.muts = FALSE) {
if (is.null(read.muts.from)) {
samp_data <- muts.df %>%
dplyr::select(SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, DP)
} else {
samp_data <- readr::read_csv(read.muts.from, show_col_types = FALSE) %>%
dplyr::select(SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, DP)
}
#create function to scale sublineage frequencies if present
sub_scale <- function(x) {
freq_sum <- sum(x)
if (freq_sum > 1) {
freq_sum <- x/freq_sum
}
return(freq_sum)
}
#read in mutations associated with lineages (lineage.muts) if present
#cols 'Gene', 'Mutation', and 'Lineage' are required in the file. 'Sublineage' is optional.
if (!is.null(lineage.muts)) {
lineage_muts <- readr::read_csv(lineage.muts, show_col_types = FALSE)
#check to see if there is a Sublineage column present. If not, add one.
if (!"Sublineage" %in% names(lineage_muts)) {
lineage_muts <- lineage_muts %>% dplyr::mutate("Sublineage" = NA)
}
#clarify column types and merge Gene and Mutation into ALT_ID
lineage_muts <- lineage_muts %>%
dplyr::mutate("Gene" = as.character(Gene),
"Mutation" = as.character(Mutation),
"Lineage" = as.character(Lineage),
"Sublineage" = as.character(Sublineage)) %>%
tidyr::unite("ALT_ID",
Gene, Mutation,
sep = "_")
#get major lineage muts and create indicator column that defines the characteristic mutations (those not occuring in more than one lineage)
maj_lineage_muts <- lineage_muts %>%
dplyr::filter(is.na(Sublineage)) %>%
dplyr::mutate("characteristic" = ifelse(ALT_ID %in% ALT_ID[duplicated(ALT_ID)], "N", "Y"))
#get sublineage muts, remove those shared with the respective major lineage, and create indicator column that defines the sub-lineage characteristic mutations.
maj_lineage_mut_list <- paste0(maj_lineage_muts$Lineage, "_", maj_lineage_muts$ALT_ID)
sub_lineage_muts <- lineage_muts %>%
dplyr::filter(!is.na(Sublineage)) %>%
tidyr::unite("test",
Lineage, ALT_ID,
sep = "_",
remove = FALSE) %>%
dplyr::filter(!test %in% maj_lineage_mut_list) %>%
dplyr::select(ALT_ID, Lineage, Sublineage) %>%
dplyr::group_by(Lineage) %>%
dplyr::mutate("sub_characteristic" = ifelse(ALT_ID %in% ALT_ID[duplicated(ALT_ID)], "N", "Y")) %>%
dplyr::ungroup()
}
#from the above, have
#1) df maj_lineage_muts that has mutations associated with major lineages along with an indicator variable (characteristic) that indicates whether the mutation is unique to the lineage.
#2) df sub_lineage_muts that has mutations that are only associated with sublineages (not present in main lineage lists) and has indicator variable (sub_characteristic) that indicates
#whether the mutation is unique to the sublineage within its respective lineage (in other words, if the sublineage mutation is not shared with other sublineages within the
#respective parent lineage, it is considered characteristic, even if it is associated with a sublineage under another parent lineage).
if (is.null(dates)) {
if (is.null(lineage.muts)) { #no dates or lineage.muts - get only mutations table with no lineage information column in dashboard
ui <- shiny::fluidPage(shiny::tabsetPanel(shiny::tabPanel("View Mutations",
shiny::selectInput(inputId = "Sample_mutTable",
label = "Sample",
choices = unique(samp_data$SAMP_NAME)),
shiny::numericInput(inputId = "DPThresh",
label = "Min Seq Depth",
value = 0),
DT::dataTableOutput("mut_table"))
)
)
server <- function(input, output, session){
output$mut_table <- DT::renderDataTable({
samp_data %>%
dplyr::filter(SAMP_NAME %in% input$Sample_mutTable & DP >= input$DPThresh) %>%
dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2"),
"AF" = round(AF, digits = 3)) %>%
dplyr::select(SAMP_NAME, CHR, POS, GENE, MUTATION, AF, DP) %>%
dplyr::rename("FREQ" = "AF",
"SEQ DEPTH" = "DP") %>%
as.data.frame()
})
}
## run Shiny app for no dates/no lineages
shiny::shinyApp(ui = ui, server = server)
} else{ #no dates, but do have lineage defining-mutations - get mutation table and barplot that has all lineages along x-axis.
ui <- shiny::fluidPage(shiny::tabsetPanel(
shiny::tabPanel("Lineages Present",
shiny::sidebarPanel(width = 2,
shiny::selectInput(inputId = "Sample_VarPres",
label = "Sample",
choices = unique(samp_data$SAMP_NAME)),
shiny::sliderInput(inputId = "propThresh",
label = "Presence Threshold",
min = 0, max = 1,
value = 0.5),
shiny::numericInput(inputId = "DPThreshPlot",
label = "Min Seq Depth",
value = 0),
shiny::selectInput(inputId = "scaled",
label = "Frequencies",
choices = c("Scaled", "Unscaled")),
shiny::selectInput(inputId = "metric",
label = "Freq Measure",
choices = c("Mean", "Median"))),
shiny::mainPanel(width = 10,
shiny::verticalLayout(
plotly::plotlyOutput(outputId = "lineages_present"),
plotly::plotlyOutput(outputId = "lineage_proportions"))
)
),
shiny::tabPanel("View Mutations",
shiny::selectInput(inputId = "Sample_MutTable",
label = "Sample",
choices = unique(samp_data$SAMP_NAME)),
shiny::numericInput(inputId = "DPThresh",
label = "Min Seq Depth",
value = 0),
DT::dataTableOutput("mut_table")
)))
server <- function(input, output, session){
output$lineages_present <- plotly::renderPlotly({
#get data for the selected Sample
samp_data <- samp_data %>%
dplyr::filter(SAMP_NAME %in% input$Sample_VarPres)
#get rid of any mutation duplicates, keeping one with highest seq depth
#these generally occur when an amino acid change is caused by two or more SNPs - if so, it's repeated for each variant called
samp_data <- samp_data %>%
dplyr::group_by(ALT_ID) %>%
dplyr::arrange(desc(DP), .by_group = TRUE) %>%
dplyr::ungroup() %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE)
#join mutations observed in current sample in to lineage_muts df
char_muts <- dplyr::left_join(x = maj_lineage_muts, y = samp_data, by = "ALT_ID")
#summarize by lineage
#pull out the mutations that only occur in a single lineage for analysis of lineages present in sample
#"Proportion Present" represents the proportion of characteristic mutations that are present with AF > 0 (this is necessary in case "write.all.targets" was used) and a seq depth greater than the threshold.
#if "write.all.muts" wasn't used, then all mutations in the dataset have AF > 0, and the only relevant value is the depth threshold, which can possibly be left at zero unless maybe worried about something like index hopping.
#if "write.all.muts" was used, then some mutations are included with AF=0. In this case, since all mutations of interest are reported, can check to see what proportion of them have depths that are reasonable ("Proportion High Depth"). This will be reported as a tooltip if all.target.muts is TRUE.
char_summary <- char_muts %>%
dplyr::filter(characteristic == "Y") %>%
dplyr::group_by(Lineage) %>%
dplyr::summarize("Proportion Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"Total Lineage-Characteristic Muts" = dplyr::n(),
"Proportion High Depth" = (sum(DP >= input$DPThreshPlot, na.rm = TRUE))/dplyr::n()) %>%
dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
"Proportion High Depth" = round(`Proportion High Depth`, 3)) %>%
dplyr::distinct()
#generate geom_col plot to represent proportion of lineage-characteristic mutations present for each lineage in the sample
#representing the proportion of characteristic mutations with high depth as a tooltip if all.target.muts = TRUE
if (all.target.muts == FALSE) {
pres_plot <- char_summary %>%
ggplot2::ggplot(ggplot2::aes(x = Lineage,
y = `Proportion Present`,
text = paste("Lineage: ", Lineage, "</br>", "</br>",
"Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>",
"Proportion Present: ", `Proportion Present`))) +
ggplot2::geom_col(fill = "#A6CEE3") +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::theme_classic() +
ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Sample_VarPres)) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
axis.text = ggplot2::element_text(size = 13),
axis.text.y = ggplot2::element_text(size = 14),
plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
axis.title.y = ggplot2::element_blank()) +
ggplot2::geom_hline(yintercept = input$propThresh, color = "gray", linetype = 2, alpha = 1)
} else {
pres_plot <- char_summary %>%
ggplot2::ggplot(ggplot2::aes(x = Lineage,
y = `Proportion Present`,
text = paste("Lineage: ", Lineage, "</br>", "</br>",
"Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>",
"Proportion Present: ", `Proportion Present`, "</br>",
"Proportion With Seq Depth > Threshold: ", `Proportion High Depth`))) +
ggplot2::geom_col(fill = "#A6CEE3") +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::theme_classic() +
ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Sample_VarPres)) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
axis.text = ggplot2::element_text(size = 13),
axis.text.y = ggplot2::element_text(size = 14),
plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
axis.title.y = ggplot2::element_blank()) +
ggplot2::geom_hline(yintercept = input$propThresh, color = "gray", linetype = 2, alpha = 1)
}
plotly::ggplotly(pres_plot, tooltip = "text")
})
output$lineage_proportions <- plotly::renderPlotly({
#get data for the selected Sample
samp_data <- samp_data %>%
dplyr::filter(SAMP_NAME %in% input$Sample_VarPres)
#get rid of any mutation duplicates, keeping one with highest DP
#these generally occur when an amino acid change is caused by two or more SNPs - if so, it's repeated for each variant called
samp_data <- samp_data %>%
dplyr::group_by(ALT_ID) %>%
dplyr::arrange(desc(DP), .by_group = TRUE) %>%
dplyr::ungroup() %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE)
#join mutations observed in current sample in to lineage_muts df
char_muts <- dplyr::left_join(x = maj_lineage_muts, y = samp_data, by = "ALT_ID")
#if defined, join sub lineage mutations observed in current sample in to sub_lineage_muts df
if (nrow(sub_lineage_muts) > 0) {
sub_char_muts <- dplyr::left_join(x = sub_lineage_muts, y = samp_data, by = "ALT_ID")
}
if (input$metric == "Mean") { #Estimated Frequecies to be plotted are dependent on whether they are based on the mean or median of the AF values
#summarize based on main lineages to get proportion characteristic mutations present for each lineage (same metric as above), associated estimated freqs, and mutation lists (muts present and muts absent - appear in tooltip)
char_summary <- char_muts %>%
dplyr::filter(characteristic == "Y") %>%
dplyr::group_by(Lineage) %>%
dplyr::summarize("Proportion Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"Estimated Freq" = mean(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
"muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
"muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
"lab1" = "Muts Present: ",
"lab2" = "\n Muts Absent: ") %>%
dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
"muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
"muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list),
"muts_list" = gsub(";$", "", muts_list)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))
#summarize for sublineages if there are sublineage-associated mutations present
#sublineage info will be shown in tooltip info
if(nrow(sub_lineage_muts) > 0) {
sub_char_summary <- sub_char_muts %>%
dplyr::filter(sub_characteristic == "Y") %>%
dplyr::group_by(Lineage, Sublineage) %>%
dplyr::summarize("sub_Proportion_Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"sub_Estimated_Freq" = mean(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
"sub_muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
"sub_muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";")) %>%
dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., is.nan(.), 0)) %>%
dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., `sub_Proportion_Present` < input$propThresh, 0)) %>%
#dplyr::mutate("sub_Proportion_Present" = round(`sub_Proportion_Present`, 3),
# "sub_Estimated_Freq" = round(`sub_Estimated_Freq`, 3)) %>%
#dplyr::mutate("sub_Estimated_Freq" = round(sub_Estimated_Freq, digits = 3)) %>%
dplyr::ungroup()
}
} else {
char_summary <- char_muts %>%
dplyr::filter(characteristic == "Y") %>%
dplyr::group_by(Lineage) %>%
dplyr::summarize("Proportion Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"Estimated Freq" = median(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
"muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
"muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
"lab1" = "Muts Present: ",
"lab2" = "\n Muts Absent: ") %>%
dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
"muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
"muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list),
"muts_list" = gsub(";$", "", muts_list)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))
#summarize for sublineages if there are sublineage-associated mutations present
if(nrow(sub_lineage_muts) > 0) {
sub_char_summary <- sub_char_muts %>%
dplyr::filter(sub_characteristic == "Y") %>%
dplyr::group_by(Lineage, Sublineage) %>%
dplyr::summarize("sub_Proportion_Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"sub_Estimated_Freq" = median(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
"sub_muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
"sub_muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";")) %>%
dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., is.nan(.), 0)) %>%
dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., `sub_Proportion_Present` < input$propThresh, 0)) %>%
#dplyr::mutate("sub_Proportion_Present" = round(`sub_Proportion_Present`, 3),
# "sub_Estimated_Freq" = round(`sub_Estimated_Freq`, 3)) %>%
#dplyr::mutate("sub_Estimated_Freq" = round(sub_Estimated_Freq)) %>%
dplyr::ungroup()
}
}
#round values
char_summary <- char_summary %>%
dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
"Estimated Freq" = round(`Estimated Freq`, 3))
if(nrow(sub_lineage_muts) > 0) {
sub_char_summary <- sub_char_summary %>%
dplyr::mutate("sub_Proportion_Present" = round(sub_Proportion_Present, 3),
"sub_Estimated_Freq" = round(sub_Estimated_Freq, 3))
}
if (input$scaled == "Scaled") {
freq_sum <- sum(char_summary$`Estimated Freq`)
if (freq_sum > 1) {
scaled_vals <- char_summary$`Estimated Freq`/freq_sum
char_summary <- char_summary %>%
dplyr::mutate("Estimated Freq" = scaled_vals)
}
#scale sublineage frequencies if necessary
if(exists("sub_char_summary")) {
sub_char_summary <- sub_char_summary %>%
dplyr::group_by(Sublineage) %>%
dplyr::mutate("sub_Estimated_Freq" = sub_scale(x = sub_Estimated_Freq)) %>%
dplyr::ungroup()
}
}
#generate geom_col plot to represent proportions of lineages present in the sample.
#only plotted for lineages identified as "present" based on chosen threshold (otherwise, frequencies were all adjusted to zero above).
if(input$metric == "Mean") {
measure <- "Mean"
} else {
measure <- "Median"
}
if(!exists("sub_char_summary")) {
freq_plot <- char_summary %>%
dplyr::mutate("lab" = "samp") %>%
dplyr::filter(`Estimated Freq` > 0) %>% #if a sample has no lineages designated as present (all estimated freqs are 0), the plot will throw an error about aesthetics.
ggplot2::ggplot(ggplot2::aes(x = lab,
y = `Estimated Freq`,
fill = Lineage,
text = paste("Lineage: ", Lineage, "</br>", "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list))) +
ggplot2::geom_bar(stat = "identity",
position = "stack") +
ggplot2::theme_classic() +
ggplot2::scale_fill_brewer(palette = "Paired") +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::ggtitle(paste0("Estimated Frequency of Lineages Present: ", input$Sample_VarPres)) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 13),
plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
axis.text.y = ggplot2::element_text(size = 14),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 12),
legend.title = ggplot2::element_blank()) +
ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
plotly::ggplotly(freq_plot, tooltip = c("text")) %>%
plotly::layout(hoverlabel = list(align = "left"))
} else { #there is sublineage info present that will be included in the plot
#sub_char_summary <- sub_char_summary %>%
# dplyr::select(Lineage, Sublineages_present, Sub_estimated_freqs, sub_muts_present)
sub_char_summary <- sub_char_summary %>%
dplyr::ungroup() %>%
dplyr::group_by(Lineage) %>%
dplyr::summarize("Sublineages_present" = paste0(c(Sublineage[sub_Proportion_Present >= input$propThresh]), collapse = ";"),
"Sub_estimated_freqs" = paste0(c(sub_Estimated_Freq[sub_Proportion_Present >= input$propThresh]), collapse = ";"),
"sub_muts_present" = paste0(c(unique(sub_muts_present)), collapse = ";")) %>%
dplyr::mutate("sub_muts_present" = gsub('(.{1,30})(;|$)', '\\1;\n', sub_muts_present)) %>%
dplyr::ungroup()
char_summary <- dplyr::left_join(x = char_summary, y = sub_char_summary, by = "Lineage")
test_char_summary <- char_summary %>%
dplyr::filter(`Estimated Freq` > 0)
if (nrow(test_char_summary) == 0) { #deal with case where everything is filtered out - otherwise get error related to aesthetics
freq_plot <- data.frame("lab" = "samp",
"EstimatedFreq" = 0,
"Lineage" = unique(char_summary$Lineage)) %>%
ggplot2::ggplot(mapping = ggplot2::aes(x = lab,
y = `EstimatedFreq`,
fill = Lineage)) +
ggplot2::geom_bar(stat = "identity",
position = "stack") +
ggplot2::theme_classic() +
ggplot2::scale_fill_brewer(palette = "Paired") +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::ggtitle(paste0("Estimated Frequency of Lineages Present: ", input$Sample_VarPres)) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 13),
plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
axis.text.y = ggplot2::element_text(size = 14),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 12),
legend.title = ggplot2::element_blank()) +
ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
}
else{
freq_plot <- char_summary %>%
dplyr::mutate("lab" = "samp") %>%
dplyr::filter(`Estimated Freq` > 0) %>%
ggplot2::ggplot(ggplot2::aes(x = lab,
y = `Estimated Freq`,
fill = Lineage,
text = paste("Lineage: ", Lineage, "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list, "</br>", "Sublineages Present: ", Sublineages_present, "</br>", "Sublineage Freqs: ", Sub_estimated_freqs, "</br>", "Sublineage Muts Present: ", sub_muts_present))) +
ggplot2::geom_bar(stat = "identity",
position = "stack") +
ggplot2::theme_classic() +
ggplot2::scale_fill_brewer(palette = "Paired") +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::ggtitle(paste0("Estimated Frequency of Lineages Present: ", input$Sample_VarPres)) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 13),
plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
axis.text.y = ggplot2::element_text(size = 14),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 12),
legend.title = ggplot2::element_blank()) +
ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
}
plotly::ggplotly(freq_plot, tooltip = c("text")) %>%
plotly::layout(hoverlabel = list(align = "left"))
}
})
#generate mutation table that includes column of lineages the mutation is associated with
output$mut_table <- DT::renderDataTable({
#join lineage muatation info into sample data
samp_data <- dplyr::left_join(x = samp_data,
y = lineage_muts,
by = "ALT_ID")
#combine all lineages and sublineages associated with a given mutation - separate with ';'
#this info will be included as a column in the table
samp_data <- samp_data %>%
dplyr::group_by(SAMP_NAME, CHR, POS, ALT_ID, AF, DP) %>%
dplyr::summarize("Group" = paste(unique(Lineage),collapse = ";"),
"Group_sub" = paste(unique(Sublineage[!is.na(Sublineage)]),collapse = ";"),
"GENE" = GENE) %>%
dplyr::ungroup() %>%
dplyr::distinct() %>%
dplyr::mutate("Group_sub" = stringr::str_replace(Group_sub, "(.*?;.*?;.*?)(;.*)", "\\1\\.\\.\\."))
#create table for selected sample
samp_data %>%
dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
dplyr::filter(SAMP_NAME %in% input$Sample_MutTable) %>%
dplyr::select(SAMP_NAME, CHR, POS, GENE, MUTATION, AF, Group, Group_sub, DP) %>%
dplyr::filter(DP >= input$DPThresh) %>%
dplyr::mutate("FREQ" = round(AF, digits = 3)) %>%
dplyr::rename("ASSOCIATED LINEAGE(S)" = "Group",
"ASSOCIATED SUBLINEAGE(S)" = "Group_sub",
"SEQ DEPTH" = "DP") %>%
dplyr::select(-AF) %>%
as.data.frame()
})
}
## run Shiny app for lineages but no dates
shiny::shinyApp(ui = ui, server = server)
}
} else { #samples have associated dates
#read in file with locations and dates for samples
dates_df <- readr::read_csv(dates, col_types = "cci")
samp_data <- dplyr::left_join(x = samp_data, y = dates_df, by = "SAMP_NAME") %>%
dplyr::mutate("date" = lubridate::mdy(DATE)) %>%
dplyr::mutate("Location" = as.character(LOCATION)) %>%
dplyr::select(-DATE, -LOCATION)
if (is.null(lineage.muts)) { #have dates but no lineage definitions - output new mutations table, mutation freq plot, and mutations table (without associated lineages column).
ui <- shiny::fluidPage(shiny::tabsetPanel(
shiny::tabPanel("New Mutations",
shiny::selectInput(inputId = "MaxDate",
label = "Mutations First Detected On Or After...",
choices = unique(samp_data$date)),
shiny::numericInput(inputId = "DPThresh",
label = "Minimum Depth",
value = 5),
DT::dataTableOutput("new_muts")),
shiny::tabPanel("Mutation Frequencies",
shiny::textInput(inputId = "Muts",
label = "Mutation(s) (i.e S_D614G or S_del144/144; separate with commas)"),
shiny::selectInput(inputId = "Location_Muts",
label = "Location",
choices = unique(samp_data$Location),
multiple = TRUE),
shiny::plotOutput(outputId = "mut_freqs")),
shiny::tabPanel("View Mutations",
shiny::selectInput(inputId = "Sample_MutTable",
label = "Location",
choices = unique(samp_data$Location)),
shiny::selectInput(inputId = "Date",
label = "Date",
choices = unique(samp_data$date)),
shiny::numericInput(inputId = "DPThresh2",
label = "Minimum Depth",
value = 0),
DT::dataTableOutput("mut_table"))
))
server <- function(input, output, session){
#create table with mutations first observed on or after a selected date
output$new_muts <- DT::renderDataTable({
#filter for mutations observed with a specific sequencing depth
samp_data <- samp_data %>%
dplyr::filter(DP >= input$DPThresh) %>%
dplyr::filter(AF > 0) #this applies if call_mutations was run with write.all.targets = TRUE - don't want to count things with zero reads.
#ID the mutations observed prior to the chosen date - these will be filtered out
already_observed <- samp_data %>%
dplyr::filter(date < input$MaxDate) %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
unlist()
#remove duplicate mutations (occur if multiple underlying mutations give rise to specific amino acid change)
all_uniqs <- samp_data %>%
dplyr::select(ALT_ID, AF, SAMP_NAME, Location, date, DP) %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
unlist()
#ID mutations first observed on or after selected date
new_mutations <- all_uniqs[!all_uniqs %in% already_observed]
#filter for new mutations
samp_data %>% dplyr::filter(ALT_ID %in% new_mutations) %>%
dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
dplyr::rename("DATE" = "date",
"LOCATION" = "Location",
"SEQ DEPTH" = "DP") %>%
dplyr::select(SAMP_NAME, DATE, LOCATION, CHR, POS, GENE, MUTATION, AF, `SEQ DEPTH`)
})
output$mut_freqs <- shiny::renderPlot({
#get set of mutations of interest
#these are entered as comma separated values in text box. Allows for spaces between or not.
target_muts <- stringr::str_split(input$Muts, pattern = ",\\s?") %>% unlist()
#plot frequencies of selected mutations for the selected Sample(s)
#multiple mutations are differentiated by color
#multiple sites are plotted as separate facets
samp_data %>%
dplyr::filter(Location %in% input$Location_Muts) %>%
dplyr::filter(ALT_ID %in% target_muts) %>%
ggplot2::ggplot(ggplot2::aes(x = date, y = AF, color = Location)) +
ggplot2::geom_line(size = 2) +
ggplot2::geom_point(size = 2) +
ggplot2::scale_color_brewer(palette = "Dark2") +
ggplot2::facet_wrap(dplyr::vars(c(ALT_ID))) +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::theme(legend.text = ggplot2::element_text(size = 14),
legend.title = ggplot2::element_text(size = 15),
strip.text = ggplot2::element_text(size = 15),
axis.text.x = ggplot2::element_text(size = 14),
axis.text.y = ggplot2::element_text(size = 14),
axis.title.y = ggplot2::element_text(size = 15)) +
ggplot2::labs(y = "Mutation Frequency", x = NULL)
})
#update the "Date" drop-down box to only include dates for which samples exist for the selected Sample
shiny::observeEvent(input$Sample_MutTable, {
date_options <- samp_data %>%
dplyr::filter(Location == input$Sample_MutTable) %>%
dplyr::distinct(date)
date_options <- date_options$date
shiny::updateSelectInput(session, "Date",
label = "Date",
choices = date_options)
})
#create table with all mutations observed in selected sample. Will not include column showing any lineages they have been associated with (don't have lineages info).
output$mut_table <- DT::renderDataTable({
#create table for selected sample/date
samp_data %>%
dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
dplyr::filter(Location %in% input$Sample_MutTable) %>%
dplyr::filter(date == input$Date) %>%
dplyr::select(SAMP_NAME, Location, date, CHR, POS, GENE, MUTATION, AF, DP) %>%
dplyr::filter(DP >= input$DPThresh2) %>%
dplyr::mutate("FREQ" = round(AF, digits = 3)) %>%
dplyr::rename("DATE" = "date",
"LOCATION" = "Location",
"SEQ DEPTH" = "DP") %>%
dplyr::select(-AF) %>%
dplyr::select(SAMP_NAME, LOCATION, DATE, CHR, POS, GENE, MUTATION, FREQ, `SEQ DEPTH`) %>%
as.data.frame()
})
}
## run Shiny app with dates but no lineages
shiny::shinyApp(ui = ui, server = server)
} else { #have both dates and lineage definitions - output all tabs
ui <- shiny::fluidPage(shiny::tabsetPanel(
shiny::tabPanel("Lineages Present",
shiny::sidebarPanel(width = 2,
shiny::selectInput(inputId = "Location",
label = "Location",
choices = unique(samp_data$Location)),
shiny::checkboxGroupInput(inputId = "Lineages",
label = "Lineage",
choices = unique(lineage_muts$Lineage),
selected = unique(lineage_muts$Lineage)),
shiny::selectInput(inputId = "DepthTag",
label = "Calculate Mean Depths On...",
choices = c("All Mutations", "Lineage-Characteristic Mutations Only")),
shiny::sliderInput(inputId = "propThresh",
label = "Presence Threshold",
min = 0, max = 1,
value = 0.5),
shiny::numericInput(inputId = "DPThreshPlot",
label = "Minimum Seq Depth",
value = 0),
shiny::selectInput(inputId = "metric",
label = "Freq Measure",
choices = c("Mean", "Median")),
shiny::selectInput(inputId = "scaled",
label = "Frequencies",
choices = c("Scaled", "Unscaled"))),
shiny::mainPanel(width = 10,
shiny::verticalLayout(
plotly::plotlyOutput(outputId = "lineages_present"),
plotly::plotlyOutput(outputId = "lineage_proportions"))
)
),
shiny::tabPanel("New Mutations",
shiny::selectInput(inputId = "MaxDate",
label = "Mutations First Detected On Or After...",
choices = unique(samp_data$date)),
shiny::numericInput(inputId = "DPThresh",
label = "Minimum Depth",
value = 5),
shiny::checkboxInput(inputId = "maj_sub",
label = "Analyze Sublineages",
value = FALSE),
DT::dataTableOutput("new_muts")
),
shiny::tabPanel("Mutation Frequencies",
shiny::textInput(inputId = "Muts",
label = "Mutation(s) (i.e S_D614G or S_del144/144; separate with commas)"),
shiny::selectInput(inputId = "Location_Muts",
label = "Location",
choices = unique(samp_data$Location),
multiple = TRUE),
shiny::plotOutput(outputId = "mut_freqs")
),
shiny::tabPanel("View Mutations",
shiny::selectInput(inputId = "Location_MutTable",
label = "Location",
choices = unique(samp_data$Location)),
shiny::selectInput(inputId = "Date",
label = "Date",
choices = unique(samp_data$date)),
shiny::numericInput(inputId = "DPThresh2",
label = "Minimum Depth",
value = 0),
shiny::checkboxInput(inputId = "maj_sub_muttab",
label = "Analyze Sublineages",
value = FALSE),
DT::dataTableOutput("mut_table")
)
))
server <- function(input, output, session){
output$lineages_present <- plotly::renderPlotly({
#filter for data for the selected Location
samp_data <- samp_data %>%
dplyr::filter(Location %in% input$Location)
#create master df that will store data for all selected lineages/variants
all_summary <- data.frame()
for (i in 1:length(input$Lineages)){ #loop over each selected lineage
#get the mutations characteristic of the current lineage
muts <- maj_lineage_muts %>%
dplyr::filter(characteristic == "Y") %>%
dplyr::filter(Lineage %in% input$Lineages[[i]]) %>%
dplyr::pull(ALT_ID)
#get the number of mutations characteristic of the current lineage
lineage_n <- length(muts)
#get proportion of characteristic mutations observed on each date for current Location and other summary info
summary <- samp_data %>%
#remove any duplicated mutations, keeping one with highest read depth
dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
dplyr::arrange(desc(DP), .by_group = TRUE) %>%
dplyr::ungroup() %>%
dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
dplyr::group_by(SAMP_NAME, date) %>%
dplyr::summarise("Proportion Present" = sum(ALT_ID[AF > 0 & DP >= input$DPThreshPlot] %in% muts)/lineage_n,
"Lineage" = paste0(input$Lineages[[i]], ""),
"Total Lineage-Characteristic Muts" = lineage_n,
#"Lineage_n" = paste0(input$Lineages[[i]], " (", lineage_n, ")"),
"Mean_Coverage_All" = mean(DP[DP >= input$DPThreshPlot], na.rm = TRUE),
"DP_n_all" = sum(!is.na(DP[DP >= input$DPThreshPlot])),
"PropHighDepth" = sum(DP[ALT_ID %in% muts] >= input$DPThreshPlot)/lineage_n) %>%
dplyr::ungroup() %>%
dplyr::mutate("ID" = paste0(SAMP_NAME, "_", date))
#recalculate Mean Coverage based on target mutations only and add in to 'summary' from above
summary2 <- samp_data %>%
dplyr::filter(ALT_ID %in% muts) %>%
dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
dplyr::arrange(desc(DP), .by_group = TRUE) %>%
dplyr::ungroup() %>%
dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
dplyr::group_by(SAMP_NAME, date) %>%
dplyr::summarise("Mean Coverage" = mean(DP[DP >= input$DPThreshPlot]),
"DP_n" = sum(!is.na(DP[DP >= input$DPThreshPlot]))) %>%
dplyr::ungroup() %>%
dplyr::mutate("ID" = paste0(SAMP_NAME, "_", date)) %>%
dplyr::select(ID, `Mean Coverage`, DP_n)
summary <- dplyr::left_join(x = summary, y = summary2, by = "ID")
#add data for current lineage to master df
all_summary <- dplyr::bind_rows(all_summary, summary)
}
#use days since earliest sample date as x axis (lubridate interval)
ints <- lubridate::interval(start = min(all_summary$date), end = all_summary$date)
#convert seconds to days
all_summary <- all_summary %>%
dplyr::mutate("days" = lubridate::int_length(ints)/86400)
#get vectors for labeling x-axis
break_dates <- unique(all_summary$date)
break_days <- unique(all_summary$days)
#check which type of mean depth to plot
if (input$DepthTag == "All Mutations") {
#replace Mean Coverage col with data from Mean_Coverage_All if plotting based on all mutations instead of just targert mutations.
all_summary <- all_summary %>%
dplyr::select(-`Mean Coverage`) %>%
dplyr::rename("Mean Coverage" = "Mean_Coverage_All") %>%
dplyr::select(-DP_n) %>%
dplyr::rename("DP_n" = "DP_n_all")
}
#create new column for fill to represent seq depth. Need a constant range for the scale for this to be useful, so capping it at 1000.
all_summary <- all_summary %>%
dplyr::mutate("mean_cov_scaled" = `Mean Coverage`) %>%
dplyr::mutate_at(dplyr::vars(mean_cov_scaled), ~replace(., mean_cov_scaled > 1000, 1000)) %>%
dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
"PropHighDepth" = round(`PropHighDepth`, 3),
"Mean Coverage" = round(`Mean Coverage`, 3))
#generate plot
if (all.target.muts == FALSE) {
pres_plot <- all_summary %>%
ggplot2::ggplot(ggplot2::aes(x = days,
y = `Proportion Present`,
text = paste("Lineage: ", Lineage, "</br>", "</br>",
"Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>",
"Proportion Present: ", `Proportion Present`, "</br>",
"Avg Seq Depth: ", `Mean Coverage`, "</br>",
"Avg Seq Based On ", DP_n, " mutations"))) +
ggplot2::geom_line(ggplot2::aes(colour = Lineage, group = Lineage), size = 1.5) +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::theme_classic() +
ggplot2::scale_color_brewer(palette = "Paired") +
ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Location)) +
ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(angle = 45, size = 9, vjust = 0.4),
axis.text.y = ggplot2::element_text(size = 9),
plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 9),
legend.title = ggplot2::element_blank()) +
ggplot2::geom_hline(yintercept = input$propThresh, color = "red", linetype = 2, alpha = 0.6) +
ggplot2::geom_point(data = all_summary, mapping = ggplot2::aes(x = days,
y = `Proportion Present`,
fill = mean_cov_scaled), size = 2.5, stroke = 0,
show.legend = FALSE) +
ggplot2::scale_fill_gradient(low = "#56B1F7",
high = "#132B43",
limits = c(0,1000))
plotly::ggplotly(pres_plot, tooltip = c("text"))
} else {
pres_plot <- all_summary %>%
ggplot2::ggplot(ggplot2::aes(x = days,
y = `Proportion Present`,
text = paste("Lineage: ", Lineage, "</br>", "</br>",
"Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>",
"Proportion Present: ", `Proportion Present`, "</br>",
"Proportion Exceeding Seq Depth Threshold: ", PropHighDepth, "</br>",
"Avg Seq Depth: ", `Mean Coverage`, "</br>",
"Avg Seq Based On ", DP_n, " mutations"))) +
ggplot2::geom_line(ggplot2::aes(colour = Lineage, group = Lineage), size = 1.5) +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::theme_classic() +
ggplot2::scale_color_brewer(palette = "Paired") +
ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Location)) +
ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(angle = 45, size = 9, vjust = 0.4),
axis.text.y = ggplot2::element_text(size = 9),
plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 9),
legend.title = ggplot2::element_blank()) +
ggplot2::geom_hline(yintercept = input$propThresh, color = "red", linetype = 2, alpha = 0.6) +
ggplot2::geom_point(data = all_summary, mapping = ggplot2::aes(x = days,
y = `Proportion Present`,
fill = mean_cov_scaled), size = 2.5, stroke = 0,
show.legend = FALSE) +
ggplot2::scale_fill_gradient(low = "#56B1F7",
high = "#132B43",
limits = c(0,1000))
plotly::ggplotly(pres_plot, tooltip = c("text"))
}
})
output$lineage_proportions <- plotly::renderPlotly({
#filter for data for the selected Location
samp_data <- samp_data %>%
dplyr::filter(Location %in% input$Location)
all_summary <- data.frame()
#loop over each selected lineage
for (i in 1:length(input$Lineages)){
#get the mutations characteristic of the current lineage
muts <- maj_lineage_muts %>%
dplyr::filter(characteristic == "Y") %>%
dplyr::filter(Lineage %in% input$Lineages[[i]]) %>%
dplyr::select(ALT_ID) %>%
unlist()
#get the number of mutations characteristic of the current lineage
lineage_n <- length(muts)
if (input$metric == "Mean") {
#get proportion of characteristic mutations observed in current sample and avg frequency of observed mutations
summary <- samp_data %>%
dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
dplyr::arrange(desc(DP), .by_group = TRUE) %>%
dplyr::ungroup() %>%
dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
dplyr::group_by(SAMP_NAME, date) %>%
dplyr::summarise("Proportion Present" = sum(ALT_ID[AF > 0 & DP >= input$DPThreshPlot] %in% muts)/lineage_n,
"Lineage" = paste0(input$Lineages[[i]], ""),
"Estimated Freq" = mean(AF[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]),
"muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]))), collapse = ";"),
"muts_absent" = paste0(c(sort(unique(muts[!muts %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
"lab1" = "\nMuts Present: ",
"lab2" = "\n\nMuts Absent: ") %>%
dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
"muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
"muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))
} else { #use median
summary <- samp_data %>%
dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
dplyr::arrange(desc(DP), .by_group = TRUE) %>%
dplyr::ungroup() %>%
dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
dplyr::group_by(SAMP_NAME, date) %>%
dplyr::summarise("Proportion Present" = sum(ALT_ID[AF > 0 & DP >= input$DPThreshPlot] %in% muts)/lineage_n,
"Lineage" = paste0(input$Lineages[[i]], ""),
"Estimated Freq" = median(AF[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]),
"muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]))), collapse = ";"),
"muts_absent" = paste0(c(sort(unique(muts[!muts %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
"lab1" = "\nMuts Present: ",
"lab2" = "\n\nMuts Absent: ") %>%
dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
"muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
"muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))
}
#add data for current lineage to master df
all_summary <- dplyr::bind_rows(all_summary, summary)
}
if(nrow(sub_lineage_muts) > 0) {
get_sub_props <- function(sampname, dat, lin) {
samp_data_sub <- samp_data %>%
dplyr::filter(SAMP_NAME %in% sampname & date %in% dat)
char_sub_muts <- sub_lineage_muts %>%
dplyr::filter(Lineage %in% lin & sub_characteristic == "Y")
char_sub_muts <- dplyr::left_join(x = char_sub_muts, y = samp_data_sub, by = "ALT_ID") %>%
dplyr::mutate("AF" = tidyr::replace_na(AF, 0),
"DP" = tidyr::replace_na(DP, 0))
char_sub_summary <- char_sub_muts %>%
dplyr::group_by(Sublineage) %>%
dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n()) %>%
dplyr::ungroup() %>%
dplyr::filter(sub_prop_present >= input$propThresh) %>%
dplyr::pull(Sublineage) %>%
paste(collapse = ";")
if (char_sub_summary == "") {char_sub_summary <- NA}
char_sub_summary
}
get_sub_freqs <- function(sampname, dat, lin) {
samp_data_sub <- samp_data %>%
dplyr::filter(SAMP_NAME %in% sampname & date %in% dat)
char_sub_muts <- sub_lineage_muts %>%
dplyr::filter(Lineage %in% lin & sub_characteristic == "Y")
char_sub_muts <- dplyr::left_join(x = char_sub_muts, y = samp_data_sub, by = "ALT_ID") %>%
dplyr::mutate("AF" = tidyr::replace_na(AF, 0),
"DP" = tidyr::replace_na(DP, 0))
if (input$metric == "Mean") {
char_sub_summary <- char_sub_muts %>%
dplyr::group_by(Sublineage) %>%
dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"sub_freqs" = mean(AF[AF > 0 & DP >= input$DPThreshPlot])) %>%
dplyr::ungroup() %>%
dplyr::mutate_at(dplyr::vars(sub_freqs), ~replace(., is.na(.), 0))
if (input$scaled == "Scaled") {
char_sub_summary <- char_sub_summary %>%
dplyr::mutate("sub_freqs" = sub_scale(sub_freqs))
}
char_sub_summary <- char_sub_summary %>%
dplyr::filter(sub_prop_present >= input$propThresh) %>%
dplyr::mutate("sub_freqs" = round(sub_freqs, 3)) %>%
dplyr::pull(sub_freqs) %>%
paste(collapse = ";")
} else { #calculate median
char_sub_summary <- char_sub_muts %>%
dplyr::group_by(Sublineage) %>%
dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"sub_freqs" = median(AF[AF > 0 & DP >= input$DPThreshPlot])) %>%
dplyr::ungroup() %>%
dplyr::mutate_at(dplyr::vars(sub_freqs), ~replace(., is.na(.), 0))
if (input$scaled == "Scaled") {
char_sub_summary <- char_sub_summary %>%
dplyr::mutate("sub_freqs" = sub_scale(sub_freqs))
}
char_sub_summary <- char_sub_summary %>%
dplyr::filter(sub_prop_present >= input$propThresh) %>%
dplyr::mutate("sub_freqs" = round(sub_freqs, 3)) %>%
dplyr::pull(sub_freqs) %>%
paste(collapse = ";")
}
if (char_sub_summary == "") {char_sub_summary <- NA}
char_sub_summary
}
get_sub_muts <- function(sampname, dat, lin) {
samp_data_sub <- samp_data %>%
dplyr::filter(SAMP_NAME %in% sampname & date %in% dat)
char_sub_muts <- sub_lineage_muts %>%
dplyr::filter(Lineage %in% lin & sub_characteristic == "Y")
char_sub_muts <- dplyr::left_join(x = char_sub_muts, y = samp_data_sub, by = "ALT_ID") %>%
dplyr::mutate("AF" = tidyr::replace_na(AF, 0),
"DP" = tidyr::replace_na(DP, 0))
char_sub_summary <- char_sub_muts %>%
dplyr::group_by(Sublineage) %>%
dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
"sub_muts" = unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot])) %>%
dplyr::ungroup() %>%
dplyr::filter(sub_prop_present >= input$propThresh) %>%
dplyr::pull(sub_muts) %>%
paste(collapse = ";")
if (char_sub_summary == "") {char_sub_summary <- NA}
char_sub_summary
}
all_summary <- all_summary %>%
dplyr::rowwise() %>%
dplyr::mutate("sub_Proportion_Present" = get_sub_props(sampname = SAMP_NAME, dat = date, lin = Lineage),
"sub_Estimated_Freq" = get_sub_freqs(sampname = SAMP_NAME, dat = date, lin = Lineage),
"sub_muts_present" = get_sub_muts(sampname = SAMP_NAME, dat = date, lin = Lineage)) %>%
dplyr::ungroup()
}
#use days since earliest sample date as x axis (lubridate interval)
ints <- lubridate::interval(start = min(all_summary$date), end = all_summary$date)
#convert seconds to days
all_summary <- all_summary %>%
dplyr::mutate("days" = lubridate::int_length(ints)/86400)
#get vectors for labeling x-axis
break_dates <- unique(all_summary$date)
break_days <- unique(all_summary$days)
#generate plot
#only plot bars for lineages defined as "present" in the sample based on chosen proportion threshold
if (input$scaled == "Scaled") {
all_summary_sum <- all_summary %>%
dplyr::ungroup() %>%
dplyr::group_by(date) %>%
dplyr::summarise("sum" = sum(`Estimated Freq`)) %>%
dplyr::mutate("over1" = ifelse(sum > 1, "Y", "N"))
all_summary_sum <- dplyr::left_join(x = all_summary, y = all_summary_sum, by = "date")
to_scale <- all_summary_sum %>%
dplyr::filter(over1 == "Y") %>%
dplyr::group_by(date) %>%
dplyr::mutate("Estimated Freq" = `Estimated Freq`/sum(`Estimated Freq`))
unscaled <- all_summary_sum %>%
dplyr::filter(over1 == "N")
all_summary <- dplyr::bind_rows(to_scale, unscaled)
}
if (input$metric == "Mean") {
measure <- "Mean"
} else {
measure <- "Median"
}
all_summary <- all_summary %>%
dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
"Estimated Freq" = round(`Estimated Freq`,3))
if (nrow(sub_lineage_muts) == 0) {
freq_plot <- all_summary %>%
dplyr::filter(`Estimated Freq` > 0) %>%
ggplot2::ggplot(ggplot2::aes(x = days, y = `Estimated Freq`, fill = Lineage,
text = paste("Lineage: ", Lineage, "</br>", "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list))) +
ggplot2::geom_bar(stat = "identity",
position = "stack") +
ggplot2::theme_classic() +
ggplot2::scale_fill_brewer(palette = "Paired") +
ggplot2::ggtitle(paste0("Estimated Frequency of Each Lineage Present: ", input$Location)) +
ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
axis.text = ggplot2::element_text(size = 9),
plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 9),
legend.title = ggplot2::element_blank()) +
ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
} else {
freq_plot <- all_summary %>%
dplyr::filter(`Estimated Freq` > 0) %>%
ggplot2::ggplot(ggplot2::aes(x = days,
y = `Estimated Freq`,
fill = Lineage,
text = paste("Lineage: ", Lineage, "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list, "</br>", "Sublineages Present: ", sub_Proportion_Present, "</br>", "Sublineage Freqs: ", sub_Estimated_Freq, "</br>", "Sublineage Muts Present: ", sub_muts_present))) +
ggplot2::geom_bar(stat = "identity",
position = "stack") +
ggplot2::theme_classic() +
ggplot2::scale_fill_brewer(palette = "Paired") +
ggplot2::ggtitle(paste0("Estimated Frequency of Each Lineage Present: ", input$Location)) +
ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
axis.text = ggplot2::element_text(size = 9),
plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 9),
legend.title = ggplot2::element_blank()) +
ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
}
plotly::ggplotly(p = freq_plot, tooltip = "text") %>%
plotly::layout(hoverlabel = list(align = "left"))
})
#create table with mutations first observed on or after a selected date
output$new_muts <- DT::renderDataTable({
if (input$maj_sub == FALSE) {
#merge in lineage information
samp_data <- dplyr::left_join(x = samp_data,
y = maj_lineage_muts,
by = "ALT_ID") %>%
dplyr::filter(AF > 0) #this applies if call_mutations was run with write.all.targets = TRUE - don't want to count things with zero reads.
#filter for mutations observed with a specific sequencing depth
#get all lineages a given mutation is associated with as a ';'-delimited string - will be printed to column
samp_data <- samp_data %>%
dplyr::filter(DP >= input$DPThresh) %>%
dplyr::group_by(SAMP_NAME, date, Location, ALT_ID, AF, DP, CHR, POS, GENE) %>%
dplyr::summarize("Group" = paste(unique(Lineage),collapse = ";")) %>%
dplyr::ungroup()
#ID the mutations observed prior to the chosen date - these will be filtered out
already_observed <- samp_data %>%
dplyr::filter(date < input$MaxDate) %>%
dplyr::select(ALT_ID, AF, SAMP_NAME, Group) %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
unlist()
#remove duplicate mutations (occur if multiple underlying mutations give rise to specific amino acid change)
all_uniqs <- samp_data %>%
dplyr::select(ALT_ID, AF, SAMP_NAME, Group, Location, date, DP) %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
unlist()
#ID mutations first observed on or after selected date
new_mutations <- all_uniqs[!all_uniqs %in% already_observed]
samp_data %>% dplyr::filter(ALT_ID %in% new_mutations) %>%
dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
dplyr::rename("DATE" = "date",
"LOCATION" = "Location",
"SEQ DEPTH" = "DP",
"ASSOCIATED LINEAGES" = "Group") %>%
dplyr::select(SAMP_NAME, DATE, LOCATION, CHR, POS, GENE, MUTATION, AF, `SEQ DEPTH`, `ASSOCIATED LINEAGES`)
} else{
#merge in lineage information
samp_data <- dplyr::left_join(x = samp_data,
y = sub_lineage_muts,
by = "ALT_ID") %>%
dplyr::filter(AF > 0) #this applies if call_mutations was run with write.all.targets = TRUE - don't want to count things with zero reads.
#filter for mutations observed with a specific sequencing depth
#get all lineages a given mutation is associated with as a ';'-delimited string - will be printed to column
samp_data <- samp_data %>%
dplyr::filter(DP >= input$DPThresh) %>%
dplyr::group_by(SAMP_NAME, date, Location, ALT_ID, AF, DP, CHR, POS, GENE) %>%
dplyr::summarize("Group" = paste(unique(Sublineage),collapse = ";")) %>%
dplyr::ungroup()
#ID the mutations observed prior to the chosen date - these will be filtered out
already_observed <- samp_data %>%
dplyr::filter(date < input$MaxDate) %>%
dplyr::select(ALT_ID, AF, SAMP_NAME, Group) %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
unlist()
#remove duplicate mutations (occur if multiple underlying mutations give rise to specific amino acid change)
all_uniqs <- samp_data %>%
dplyr::select(ALT_ID, AF, SAMP_NAME, Group, Location, date, DP) %>%
dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
unlist()
#ID mutations first observed on or after selected date
new_mutations <- all_uniqs[!all_uniqs %in% already_observed]
samp_data %>% dplyr::filter(ALT_ID %in% new_mutations) %>%
dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
dplyr::rename("DATE" = "date",
"LOCATION" = "Location",
"SEQ DEPTH" = "DP",
"ASSOCIATED SUBLINEAGES" = "Group") %>%
dplyr::select(SAMP_NAME, DATE, LOCATION, CHR, POS, GENE, MUTATION, AF, `SEQ DEPTH`, `ASSOCIATED SUBLINEAGES`)
}
})
output$mut_freqs <- shiny::renderPlot({
#get set of mutations of interest
#these are entered as comma separated values in text box. Allows for spaces between or not.
target_muts <- stringr::str_split(input$Muts, pattern = ",\\s?") %>% unlist()
#plot frequencies of selected mutations for the selected Sample(s)
#multiple mutations are differentiated by color
#multiple sites are plotted as separate facets
samp_data %>%
dplyr::filter(Location %in% input$Location_Muts) %>%
dplyr::filter(ALT_ID %in% target_muts) %>%
ggplot2::ggplot(ggplot2::aes(x = date, y = AF, color = Location)) +
ggplot2::geom_line(size = 2) +
ggplot2::geom_point(size = 2.5) +
ggplot2::scale_color_brewer(palette = "Dark2") +
ggplot2::theme_classic() +
ggplot2::facet_wrap(dplyr::vars(c(ALT_ID))) +
ggplot2::coord_cartesian(ylim = c(0,1)) +
ggplot2::theme(legend.text = ggplot2::element_text(size = 14),
legend.title = ggplot2::element_text(size = 15),
strip.text = ggplot2::element_text(size = 15),
axis.text.x = ggplot2::element_text(size = 14),
axis.text.y = ggplot2::element_text(size = 14),
axis.title.y = ggplot2::element_text(size = 15)) +
ggplot2::labs(y = "Mutation Frequency", x = NULL)
})
#update the "Date" drop-down box to only incude dates for which samples exist for the selected Sample
shiny::observeEvent(input$Location_MutTable, {
date_options <- samp_data %>%
dplyr::filter(Location == input$Location_MutTable) %>%
dplyr::distinct(date)
date_options <- date_options$date
shiny::updateSelectInput(session, "Date",
label = "Date",
choices = date_options)
})
#create table with all mutations observed in selected sample and any lineages they have been associated with.
output$mut_table <- DT::renderDataTable({
if (input$maj_sub_muttab == FALSE) {
#merge in info on lineages associated with specific mutations
samp_data <- dplyr::left_join(x = samp_data,
y = maj_lineage_muts,
by = "ALT_ID")
#combine all lineages associated with a given mutation - separate with ';'
samp_data <- samp_data %>%
dplyr::group_by(SAMP_NAME, Location, date, CHR, POS, ALT_ID, AF, DP) %>%
dplyr::summarize("Group" = paste(unique(Lineage),collapse = ";")) %>%
dplyr::ungroup()
#create table for selected sample/date
samp_data %>%
tidyr::separate(col = ALT_ID,
into = c("GENE", "MUTATION"),
sep = "_") %>%
dplyr::filter(Location %in% input$Location_MutTable) %>%
dplyr::filter(date == input$Date) %>%
dplyr::select(SAMP_NAME, Location, date, CHR, POS, GENE, MUTATION, AF, Group, DP) %>%
dplyr::filter(DP >= input$DPThresh2) %>%
dplyr::mutate("AF" = round(AF, digits = 3)) %>%
dplyr::rename("DATE" = "date",
"ASSOCIATED LINEAGE(S)" = "Group",
"FREQ" = "AF",
"SEQ DEPTH" = "DP",
"LOCATION" = "Location") %>%
as.data.frame()
} else {
#merge in info on lineages associated with specific mutations
samp_data <- dplyr::left_join(x = samp_data,
y = sub_lineage_muts,
by = "ALT_ID")
#combine all lineages associated with a given mutation - separate with ';'
samp_data <- samp_data %>%
dplyr::group_by(SAMP_NAME, Location, date, CHR, POS, ALT_ID, AF, DP) %>%
dplyr::summarize("Group" = paste(unique(Sublineage),collapse = ";")) %>%
dplyr::ungroup()
#create table for selected sample/date
samp_data %>%
tidyr::separate(col = ALT_ID,
into = c("GENE", "MUTATION"),
sep = "_") %>%
dplyr::filter(Location %in% input$Location_MutTable) %>%
dplyr::filter(date == input$Date) %>%
dplyr::select(SAMP_NAME, Location, date, CHR, POS, GENE, MUTATION, AF, Group, DP) %>%
dplyr::filter(DP >= input$DPThresh2) %>%
dplyr::mutate("AF" = round(AF, digits = 3)) %>%
dplyr::rename("DATE" = "date",
"ASSOCIATED SUBLINEAGE(S)" = "Group",
"FREQ" = "AF",
"SEQ DEPTH" = "DP",
"LOCATION" = "Location") %>%
as.data.frame()
}
})
}
## run Shiny app with dates but no lineages
shiny::shinyApp(ui = ui, server = server)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.