R/app.R
In MixviR: Analysis and Exploration of Mixed Microbial Genomic Samples

Documented in explore_mutations

#' MixviR Shiny Dashboard
#'
#' Open dashboard to explore mutation data generated with `call_mutations()`.
#' @param muts.df A data frame (produced by `call_mutations()`) storing mutation information for samples to analyze. Must contain columns SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, & DP. Alternatively, the mutation data can be read in from a (tab-separated) file with the `read.muts.from()` argument. See also the *write.mut.table* argument in `call_mutations()`.
#' @param read.muts.from An alternative to *muts.df* for providing mutation input. If a data frame generated by `call_mutations()` was previously written to a (comma separated) file (see *write.mut.table* in `call_mutations()`), the mutation data can be read in from that file by providing its path. The fields "SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, DP" must be present (additional fields will be ignored).
#' @param dates Path to optional csv file with cols "SAMP_NAME", "LOCATION", and "DATE". Sample names need to match those in the *muts.df* data frame created by `call_mutations()`. Dates should be provided in the format *mmddyyyy*.
#' @param lineage.muts Path to optional csv file with required cols "Gene", "Mutation", and  "Lineage" defining mutations associated with lineages of interest. See example file at "https://github.com/mikesovic/MixviR/blob/main/mutation_files/outbreak_20211202.csv". Additional columns will be ignored.
#' @param all.target.muts Logical to indicate whether results for all target mutations were written to the output of the `call_mutations()` function. See *write.all.targets* option in `call_mutations()`. Default FALSE. If TRUE, more informative sequencing depth information can be provided in the dashboard.
#' @keywords shiny
#' @return Shiny Dashboard to Explore Data
#' @importFrom magrittr %>%
#' @export
#' @examples
#' if (interactive()) {explore_mutations(read.muts.from = system.file("extdata", 
#'                                                                    "sample_mutations.csv", 
#'                                                                    package = "MixviR"),
#'                                       lineage.muts = system.file("extdata", 
#'                                                                  "example_lineage_muts.csv", 
#'                                                                  package = "MixviR"))}
#' if (interactive()) {explore_mutations(read.muts.from = system.file("extdata", 
#'                                                                    "sample_mutations.csv", 
#'                                                                    package = "MixviR"), 
#'                                       dates = system.file("extdata", 
#'                                                           "example_location_date.csv", 
#'                                                           package = "MixviR"), 
#'                                       lineage.muts = system.file("extdata", 
#'                                                                  "example_lineage_muts.csv", 
#'                                                                  package = "MixviR"))}

##Which arguments are defined determine what tabs are available in the Shiny dashboard.
##If dates and lineage.def are both defined, get all 4 tabs, with associated lineages in tables.
##If lineage.def is defined but dates is not, get plots for individual samples with all lineages along x-axis and mutation table without dates.
##If dates are defined but lineages aren't, get new mutations table, mutation freq plot, and mutations table (with no lineages).
##If neither are defined, get just mutations table with no lineages.

explore_mutations <- function(muts.df,
                              dates = NULL, 
                              lineage.muts = NULL, 
                              read.muts.from = NULL,
                              all.target.muts = FALSE) {
 
  if (is.null(read.muts.from)) {
    samp_data <- muts.df %>%
      dplyr::select(SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, DP) 
  } else {
    samp_data <- readr::read_csv(read.muts.from, show_col_types = FALSE) %>%
      dplyr::select(SAMP_NAME, CHR, POS, GENE, ALT_ID, AF, DP) 
  }
  
  
  #create function to scale sublineage frequencies if present
  sub_scale <- function(x) {
    freq_sum <- sum(x)
    if (freq_sum > 1) {
      freq_sum <- x/freq_sum
    }
    return(freq_sum)
  }
  
  #read in mutations associated with lineages (lineage.muts) if present
  #cols 'Gene', 'Mutation', and 'Lineage' are required in the file. 'Sublineage' is optional.
  
  if (!is.null(lineage.muts)) {
    lineage_muts <- readr::read_csv(lineage.muts, show_col_types = FALSE)
    
    #check to see if there is a Sublineage column present. If not, add one.
    if (!"Sublineage" %in% names(lineage_muts)) {
      lineage_muts <- lineage_muts %>% dplyr::mutate("Sublineage" = NA) 
    }
    
    #clarify column types and merge Gene and Mutation into ALT_ID
    lineage_muts <- lineage_muts %>%
      dplyr::mutate("Gene" = as.character(Gene),
                    "Mutation" = as.character(Mutation),
                    "Lineage" = as.character(Lineage),
                    "Sublineage" = as.character(Sublineage)) %>%
      tidyr::unite("ALT_ID",
                   Gene, Mutation,
                   sep = "_") 
    
    
    #get major lineage muts and create indicator column that defines the characteristic mutations (those not occuring in more than one lineage)
    maj_lineage_muts <- lineage_muts %>%
      dplyr::filter(is.na(Sublineage)) %>%
      dplyr::mutate("characteristic" = ifelse(ALT_ID %in% ALT_ID[duplicated(ALT_ID)], "N", "Y"))
    
    #get sublineage muts, remove those shared with the respective major lineage, and create indicator column that defines the sub-lineage characteristic mutations.
    maj_lineage_mut_list <- paste0(maj_lineage_muts$Lineage, "_", maj_lineage_muts$ALT_ID)
    
    sub_lineage_muts <- lineage_muts %>%
      dplyr::filter(!is.na(Sublineage)) %>%
      tidyr::unite("test",
                   Lineage, ALT_ID,
                   sep = "_", 
                   remove = FALSE) %>%
      dplyr::filter(!test %in% maj_lineage_mut_list) %>%
      dplyr::select(ALT_ID, Lineage, Sublineage) %>%
      dplyr::group_by(Lineage) %>%
      dplyr::mutate("sub_characteristic" = ifelse(ALT_ID %in% ALT_ID[duplicated(ALT_ID)], "N", "Y")) %>%
      dplyr::ungroup()
  }  
    
#from the above, have 
  #1) df maj_lineage_muts that has mutations associated with major lineages along with an indicator variable (characteristic) that indicates whether the mutation is unique to the lineage.
  #2) df sub_lineage_muts that has mutations that are only associated with sublineages (not present in main lineage lists) and has indicator variable (sub_characteristic) that indicates 
      #whether the mutation is unique to the sublineage within its respective lineage (in other words, if the sublineage mutation is not shared with other sublineages within the 
      #respective parent lineage, it is considered characteristic, even if it is associated with a sublineage under another parent lineage).
  
  
  
  if (is.null(dates)) {
    
    if (is.null(lineage.muts)) { #no dates or lineage.muts - get only mutations table with no lineage information column in dashboard
      
      ui <- shiny::fluidPage(shiny::tabsetPanel(shiny::tabPanel("View Mutations",
              shiny::selectInput(inputId = "Sample_mutTable",
                                 label = "Sample",
                                 choices = unique(samp_data$SAMP_NAME)),
              shiny::numericInput(inputId = "DPThresh",
                                  label = "Min Seq Depth",
                                  value = 0),
              DT::dataTableOutput("mut_table"))
            )
      )
      
      server <- function(input, output, session){
        
        output$mut_table <- DT::renderDataTable({
          
          samp_data %>% 
            dplyr::filter(SAMP_NAME %in% input$Sample_mutTable & DP >= input$DPThresh) %>%
            dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2"),
                          "AF" = round(AF, digits = 3)) %>%
            dplyr::select(SAMP_NAME, CHR, POS, GENE, MUTATION, AF, DP) %>% 
            dplyr::rename("FREQ" = "AF",
                          "SEQ DEPTH" = "DP") %>%
            as.data.frame()
        })
      }  
    
    ## run Shiny app for no dates/no lineages
    shiny::shinyApp(ui = ui, server = server)
      
      
    } else{ #no dates, but do have lineage defining-mutations - get mutation table and barplot that has all lineages along x-axis.
      
      ui <- shiny::fluidPage(shiny::tabsetPanel(
        shiny::tabPanel("Lineages Present",
                        shiny::sidebarPanel(width = 2,
                                            shiny::selectInput(inputId = "Sample_VarPres",
                                                              label = "Sample",
                                                              choices = unique(samp_data$SAMP_NAME)),
                                          shiny::sliderInput(inputId = "propThresh",
                                                              label = "Presence Threshold",
                                                              min = 0, max = 1,
                                                              value = 0.5),
                                          shiny::numericInput(inputId = "DPThreshPlot",
                                                              label = "Min Seq Depth",
                                                              value = 0),
                                          shiny::selectInput(inputId = "scaled",
                                                             label = "Frequencies",
                                                             choices = c("Scaled", "Unscaled")),
                                          shiny::selectInput(inputId = "metric",
                                                             label = "Freq Measure",
                                                             choices = c("Mean", "Median"))),
                        shiny::mainPanel(width = 10,
                                         shiny::verticalLayout(
                                           plotly::plotlyOutput(outputId = "lineages_present"),
                                           plotly::plotlyOutput(outputId = "lineage_proportions"))
                 )
        ),
        
        shiny::tabPanel("View Mutations",
                 
                 shiny::selectInput(inputId = "Sample_MutTable",
                                   label = "Sample",
                                   choices = unique(samp_data$SAMP_NAME)),
                 shiny::numericInput(inputId = "DPThresh",
                                     label = "Min Seq Depth",
                                     value = 0),
                 DT::dataTableOutput("mut_table")
        )))
      
      server <- function(input, output, session){ 
        
        output$lineages_present <- plotly::renderPlotly({
          #get data for the selected Sample
          samp_data <- samp_data %>%
            dplyr::filter(SAMP_NAME %in% input$Sample_VarPres)
          
          #get rid of any mutation duplicates, keeping one with highest seq depth
          #these generally occur when an amino acid change is caused by two or more SNPs - if so, it's repeated for each variant called
          samp_data <- samp_data %>% 
            dplyr::group_by(ALT_ID) %>%
            dplyr::arrange(desc(DP), .by_group = TRUE) %>%
            dplyr::ungroup() %>%
            dplyr::distinct(ALT_ID, .keep_all = TRUE)
          
          #join mutations observed in current sample in to lineage_muts df 
          char_muts <- dplyr::left_join(x = maj_lineage_muts, y = samp_data, by = "ALT_ID") 
          
          #summarize by lineage
          #pull out the mutations that only occur in a single lineage for analysis of lineages present in sample
          #"Proportion Present" represents the proportion of characteristic mutations that are present with AF > 0 (this is necessary in case "write.all.targets" was used) and a seq depth greater than the threshold.
          #if "write.all.muts" wasn't used, then all mutations in the dataset have AF > 0, and the only relevant value is the depth threshold, which can possibly be left at zero unless maybe worried about something like index hopping.
          #if "write.all.muts" was used, then some mutations are included with AF=0. In this case, since all mutations of interest are reported, can check to see what proportion of them have depths that are reasonable ("Proportion High Depth"). This will be reported as a tooltip if all.target.muts is TRUE.
          char_summary <- char_muts %>%
            dplyr::filter(characteristic == "Y") %>%
            dplyr::group_by(Lineage) %>%
            dplyr::summarize("Proportion Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                             "Total Lineage-Characteristic Muts" = dplyr::n(),
                             "Proportion High Depth" = (sum(DP >= input$DPThreshPlot, na.rm = TRUE))/dplyr::n()) %>%
            dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
                          "Proportion High Depth" = round(`Proportion High Depth`, 3)) %>%
            dplyr::distinct()
          
          #generate geom_col plot to represent proportion of lineage-characteristic mutations present for each lineage in the sample
          #representing the proportion of characteristic mutations with high depth as a tooltip if all.target.muts = TRUE
          if (all.target.muts == FALSE) {
            pres_plot <- char_summary %>%
              ggplot2::ggplot(ggplot2::aes(x = Lineage, 
                                           y = `Proportion Present`, 
                                           text = paste("Lineage: ", Lineage, "</br>", "</br>", 
                                                        "Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>", 
                                                        "Proportion Present: ", `Proportion Present`))) +
              ggplot2::geom_col(fill = "#A6CEE3") +
              ggplot2::coord_cartesian(ylim = c(0,1)) +
              ggplot2::theme_classic() +
              ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Sample_VarPres)) +
              ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                    axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
                    axis.text = ggplot2::element_text(size = 13),
                    axis.text.y = ggplot2::element_text(size = 14),
                    plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
                    axis.title.y = ggplot2::element_blank()) +
              ggplot2::geom_hline(yintercept = input$propThresh, color = "gray", linetype = 2, alpha = 1)
          } else {
            pres_plot <- char_summary %>%
              ggplot2::ggplot(ggplot2::aes(x = Lineage, 
                                           y = `Proportion Present`, 
                                           text = paste("Lineage: ", Lineage, "</br>", "</br>", 
                                                        "Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>", 
                                                        "Proportion Present: ", `Proportion Present`, "</br>", 
                                                        "Proportion With Seq Depth > Threshold: ", `Proportion High Depth`))) +
              ggplot2::geom_col(fill = "#A6CEE3") +
              ggplot2::coord_cartesian(ylim = c(0,1)) +
              ggplot2::theme_classic() +
              ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Sample_VarPres)) +
              ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                             axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
                             axis.text = ggplot2::element_text(size = 13),
                             axis.text.y = ggplot2::element_text(size = 14),
                             plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
                             axis.title.y = ggplot2::element_blank()) +
              ggplot2::geom_hline(yintercept = input$propThresh, color = "gray", linetype = 2, alpha = 1)
          }
          
          plotly::ggplotly(pres_plot, tooltip = "text")
        })
        
        output$lineage_proportions <- plotly::renderPlotly({  
          
          #get data for the selected Sample
          samp_data <- samp_data %>%
            dplyr::filter(SAMP_NAME %in% input$Sample_VarPres) 
          
          #get rid of any mutation duplicates, keeping one with highest DP
          #these generally occur when an amino acid change is caused by two or more SNPs - if so, it's repeated for each variant called
          samp_data <- samp_data %>% 
            dplyr::group_by(ALT_ID) %>%
            dplyr::arrange(desc(DP), .by_group = TRUE) %>%
            dplyr::ungroup() %>%
            dplyr::distinct(ALT_ID, .keep_all = TRUE)
          
          #join mutations observed in current sample in to lineage_muts df
          char_muts <- dplyr::left_join(x = maj_lineage_muts, y = samp_data, by = "ALT_ID")
          
          #if defined, join sub lineage mutations observed in current sample in to sub_lineage_muts df
          if (nrow(sub_lineage_muts) > 0) {
            sub_char_muts <- dplyr::left_join(x = sub_lineage_muts, y = samp_data, by = "ALT_ID")
          }
          
          if (input$metric == "Mean") {  #Estimated Frequecies to be plotted are dependent on whether they are based on the mean or median of the AF values
            
            #summarize based on main lineages to get proportion characteristic mutations present for each lineage (same metric as above), associated estimated freqs, and mutation lists (muts present and muts absent - appear in tooltip)
            char_summary <- char_muts %>%
              dplyr::filter(characteristic == "Y") %>%
              dplyr::group_by(Lineage) %>%
              dplyr::summarize("Proportion Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                        "Estimated Freq" = mean(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
                        "muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
                        "muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
                        "lab1" = "Muts Present: ",
                        "lab2" = "\n Muts Absent: ") %>%
              dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
                            "muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
                            "muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list),
                            "muts_list" = gsub(";$", "", muts_list)) %>%
              dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
              dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))  
            
            #summarize for sublineages if there are sublineage-associated mutations present
            #sublineage info will be shown in tooltip info
            if(nrow(sub_lineage_muts) > 0) { 
              sub_char_summary <- sub_char_muts %>%
                dplyr::filter(sub_characteristic == "Y") %>%
                dplyr::group_by(Lineage, Sublineage) %>% 
                dplyr::summarize("sub_Proportion_Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                                 "sub_Estimated_Freq" = mean(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
                                 "sub_muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
                                 "sub_muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";")) %>%
                dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., is.nan(.), 0)) %>%
                dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., `sub_Proportion_Present` < input$propThresh, 0)) %>%
                #dplyr::mutate("sub_Proportion_Present" = round(`sub_Proportion_Present`, 3),
                #              "sub_Estimated_Freq" = round(`sub_Estimated_Freq`, 3)) %>%
                #dplyr::mutate("sub_Estimated_Freq" = round(sub_Estimated_Freq, digits = 3)) %>%
                dplyr::ungroup()
            }
            
            
          } else {
            char_summary <- char_muts %>%
              dplyr::filter(characteristic == "Y") %>%
              dplyr::group_by(Lineage) %>%
              dplyr::summarize("Proportion Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                               "Estimated Freq" = median(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
                               "muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
                               "muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
                               "lab1" = "Muts Present: ",
                               "lab2" = "\n Muts Absent: ") %>%
              dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
                            "muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
                            "muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list),
                            "muts_list" = gsub(";$", "", muts_list)) %>%
              dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
              dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))
            
            #summarize for sublineages if there are sublineage-associated mutations present
            if(nrow(sub_lineage_muts) > 0) { 
              sub_char_summary <- sub_char_muts %>%
                dplyr::filter(sub_characteristic == "Y") %>%
                dplyr::group_by(Lineage, Sublineage) %>% 
                dplyr::summarize("sub_Proportion_Present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                                 "sub_Estimated_Freq" = median(AF[AF > 0 & DP >= input$DPThreshPlot], na.rm = TRUE),
                                 "sub_muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot]))), collapse = ";"),
                                 "sub_muts_absent" = paste0(c(sort(unique(ALT_ID[!ALT_ID %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";")) %>%
                dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., is.nan(.), 0)) %>%
                dplyr::mutate_at(dplyr::vars(`sub_Estimated_Freq`), ~replace(., `sub_Proportion_Present` < input$propThresh, 0)) %>%
                #dplyr::mutate("sub_Proportion_Present" = round(`sub_Proportion_Present`, 3),
                #              "sub_Estimated_Freq" = round(`sub_Estimated_Freq`, 3)) %>%
                #dplyr::mutate("sub_Estimated_Freq" = round(sub_Estimated_Freq)) %>%
                dplyr::ungroup()
            }
          }
          
          #round values
          char_summary <- char_summary %>%
            dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
                          "Estimated Freq" = round(`Estimated Freq`, 3))
          
          
          if(nrow(sub_lineage_muts) > 0) {
            sub_char_summary <- sub_char_summary %>%
              dplyr::mutate("sub_Proportion_Present" = round(sub_Proportion_Present, 3),
                            "sub_Estimated_Freq" = round(sub_Estimated_Freq, 3))
          }  
          
          if (input$scaled == "Scaled") {
            freq_sum <- sum(char_summary$`Estimated Freq`)
            if (freq_sum > 1) {
              scaled_vals <- char_summary$`Estimated Freq`/freq_sum
              char_summary <- char_summary %>%
                dplyr::mutate("Estimated Freq" = scaled_vals)
            }
            
            #scale sublineage frequencies if necessary
            if(exists("sub_char_summary")) {
                sub_char_summary <- sub_char_summary %>%
                  dplyr::group_by(Sublineage) %>%
                  dplyr::mutate("sub_Estimated_Freq" = sub_scale(x = sub_Estimated_Freq)) %>%
                  dplyr::ungroup()
            }
          }
          
          
          #generate geom_col plot to represent proportions of lineages present in the sample.
          #only plotted for lineages identified as "present" based on chosen threshold (otherwise, frequencies were all adjusted to zero above).
          if(input$metric == "Mean") {
            measure <- "Mean"
          } else {
            measure <- "Median"
          }
        
          
          if(!exists("sub_char_summary")) {    
             freq_plot <- char_summary %>%
                dplyr::mutate("lab" = "samp") %>%
                dplyr::filter(`Estimated Freq` > 0) %>% #if a sample has no lineages designated as present (all estimated freqs are 0), the plot will throw an error about aesthetics.
                ggplot2::ggplot(ggplot2::aes(x = lab, 
                                             y = `Estimated Freq`, 
                                             fill = Lineage, 
                                             text = paste("Lineage: ", Lineage, "</br>", "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list))) +
                ggplot2::geom_bar(stat = "identity",
                                  position = "stack") +
                ggplot2::theme_classic() +
                ggplot2::scale_fill_brewer(palette = "Paired") +
                ggplot2::coord_cartesian(ylim = c(0,1)) +
                ggplot2::ggtitle(paste0("Estimated Frequency of Lineages Present: ", input$Sample_VarPres)) +
                ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                               axis.text.x = ggplot2::element_blank(),
                               axis.text = ggplot2::element_text(size = 13),
                               plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
                               axis.text.y = ggplot2::element_text(size = 14),
                               axis.title.y = ggplot2::element_blank(),
                               legend.text = ggplot2::element_text(size = 12),
                               legend.title = ggplot2::element_blank()) +
                ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
              
            
              plotly::ggplotly(freq_plot, tooltip = c("text")) %>%
                plotly::layout(hoverlabel = list(align = "left"))
              
          } else { #there is sublineage info present that will be included in the plot
            
            #sub_char_summary <- sub_char_summary %>%
            #  dplyr::select(Lineage, Sublineages_present, Sub_estimated_freqs, sub_muts_present)
            
            sub_char_summary <- sub_char_summary %>%
              dplyr::ungroup() %>%
              dplyr::group_by(Lineage) %>%
              dplyr::summarize("Sublineages_present" = paste0(c(Sublineage[sub_Proportion_Present >= input$propThresh]), collapse = ";"),
                               "Sub_estimated_freqs" = paste0(c(sub_Estimated_Freq[sub_Proportion_Present >= input$propThresh]), collapse = ";"),
                               "sub_muts_present" = paste0(c(unique(sub_muts_present)), collapse = ";")) %>%
              dplyr::mutate("sub_muts_present" = gsub('(.{1,30})(;|$)', '\\1;\n', sub_muts_present)) %>%
              dplyr::ungroup()
            
            
            char_summary <- dplyr::left_join(x = char_summary, y = sub_char_summary, by = "Lineage")
            
            test_char_summary <- char_summary %>%
              dplyr::filter(`Estimated Freq` > 0)
            
            if (nrow(test_char_summary) == 0) { #deal with case where everything is filtered out - otherwise get error related to aesthetics
              freq_plot <- data.frame("lab" = "samp",
                                    "EstimatedFreq" = 0,
                                    "Lineage" = unique(char_summary$Lineage)) %>%
                ggplot2::ggplot(mapping = ggplot2::aes(x = lab,
                                              y = `EstimatedFreq`,
                                              fill = Lineage)) +
                ggplot2::geom_bar(stat = "identity",
                                  position = "stack") +
                ggplot2::theme_classic() +
                ggplot2::scale_fill_brewer(palette = "Paired") +
                ggplot2::coord_cartesian(ylim = c(0,1)) +
                ggplot2::ggtitle(paste0("Estimated Frequency of Lineages Present: ", input$Sample_VarPres)) +
                ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                               axis.text.x = ggplot2::element_blank(),
                               axis.text = ggplot2::element_text(size = 13),
                               plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
                               axis.text.y = ggplot2::element_text(size = 14),
                               axis.title.y = ggplot2::element_blank(),
                               legend.text = ggplot2::element_text(size = 12),
                               legend.title = ggplot2::element_blank()) +
                ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
              
            }
            
            else{
              freq_plot <- char_summary %>%
                dplyr::mutate("lab" = "samp") %>%
                dplyr::filter(`Estimated Freq` > 0) %>%
                ggplot2::ggplot(ggplot2::aes(x = lab, 
                                             y = `Estimated Freq`, 
                                             fill = Lineage, 
                                             text = paste("Lineage: ", Lineage, "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list, "</br>", "Sublineages Present: ", Sublineages_present, "</br>", "Sublineage Freqs: ", Sub_estimated_freqs, "</br>", "Sublineage Muts Present: ", sub_muts_present))) +
                ggplot2::geom_bar(stat = "identity",
                                  position = "stack") +
                ggplot2::theme_classic() +
                ggplot2::scale_fill_brewer(palette = "Paired") +
                ggplot2::coord_cartesian(ylim = c(0,1)) +
                ggplot2::ggtitle(paste0("Estimated Frequency of Lineages Present: ", input$Sample_VarPres)) +
                ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                               axis.text.x = ggplot2::element_blank(),
                               axis.text = ggplot2::element_text(size = 13),
                               plot.title = ggplot2::element_text (color = "black", size= 12, face="bold"),
                               axis.text.y = ggplot2::element_text(size = 14),
                               axis.title.y = ggplot2::element_blank(),
                               legend.text = ggplot2::element_text(size = 12),
                               legend.title = ggplot2::element_blank()) +
                ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
            }
            
            plotly::ggplotly(freq_plot, tooltip = c("text")) %>%
              plotly::layout(hoverlabel = list(align = "left"))
          }
        })
        
        #generate mutation table that includes column of lineages the mutation is associated with
        output$mut_table <- DT::renderDataTable({
          
          #join lineage muatation info into sample data
          samp_data <- dplyr::left_join(x = samp_data,
                                        y = lineage_muts,
                                        by = "ALT_ID")
          
          #combine all lineages and sublineages associated with a given mutation - separate with ';'
          #this info will be included as a column in the table
          samp_data <- samp_data %>% 
            dplyr::group_by(SAMP_NAME, CHR, POS, ALT_ID, AF, DP) %>%
            dplyr::summarize("Group" = paste(unique(Lineage),collapse = ";"),
                             "Group_sub" = paste(unique(Sublineage[!is.na(Sublineage)]),collapse = ";"),
                             "GENE" = GENE) %>% 
            dplyr::ungroup() %>%
            dplyr::distinct() %>%
            dplyr::mutate("Group_sub" = stringr::str_replace(Group_sub, "(.*?;.*?;.*?)(;.*)", "\\1\\.\\.\\."))
          
          #create table for selected sample
          samp_data %>% 
            dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
            dplyr::filter(SAMP_NAME %in% input$Sample_MutTable) %>%
            dplyr::select(SAMP_NAME, CHR, POS, GENE, MUTATION, AF, Group, Group_sub, DP) %>% 
            dplyr::filter(DP >= input$DPThresh) %>%
            dplyr::mutate("FREQ" = round(AF, digits = 3)) %>% 
            dplyr::rename("ASSOCIATED LINEAGE(S)" = "Group",
                          "ASSOCIATED SUBLINEAGE(S)" = "Group_sub",
                          "SEQ DEPTH" = "DP") %>%
            dplyr::select(-AF) %>%
            as.data.frame()
        })
    }
    
      ## run Shiny app for lineages but no dates
      shiny::shinyApp(ui = ui, server = server)
    }  
    
  } else { #samples have associated dates

    #read in file with locations and dates for samples
    dates_df <- readr::read_csv(dates, col_types = "cci")
    
    samp_data <- dplyr::left_join(x = samp_data, y = dates_df, by = "SAMP_NAME") %>%
      dplyr::mutate("date" = lubridate::mdy(DATE)) %>%
      dplyr::mutate("Location" = as.character(LOCATION)) %>%
      dplyr::select(-DATE, -LOCATION)
    
    if (is.null(lineage.muts)) { #have dates but no lineage definitions - output new mutations table, mutation freq plot, and mutations table (without associated lineages column).

      ui <- shiny::fluidPage(shiny::tabsetPanel(
        shiny::tabPanel("New Mutations",
                        shiny::selectInput(inputId = "MaxDate",
                             label = "Mutations First Detected On Or After...",
                             choices = unique(samp_data$date)),
                        shiny::numericInput(inputId = "DPThresh",
                              label = "Minimum Depth",
                              value = 5),
                 DT::dataTableOutput("new_muts")),
        shiny::tabPanel("Mutation Frequencies",
                        shiny::textInput(inputId = "Muts",
                           label = "Mutation(s) (i.e S_D614G or S_del144/144; separate with commas)"),
                        shiny::selectInput(inputId = "Location_Muts",
                             label = "Location",
                             choices = unique(samp_data$Location),
                             multiple = TRUE),
                        shiny::plotOutput(outputId = "mut_freqs")),
        shiny::tabPanel("View Mutations",
                       shiny::selectInput(inputId = "Sample_MutTable",
                             label = "Location",
                             choices = unique(samp_data$Location)),
                        shiny::selectInput(inputId = "Date",
                             label = "Date",
                             choices = unique(samp_data$date)),
                        shiny::numericInput(inputId = "DPThresh2",
                                            label = "Minimum Depth",
                                            value = 0),
                 DT::dataTableOutput("mut_table"))
     ))
      
      
      server <- function(input, output, session){
        
        #create table with mutations first observed on or after a selected date
        output$new_muts <- DT::renderDataTable({
          
          #filter for mutations observed with a specific sequencing depth
          samp_data <- samp_data %>%
            dplyr::filter(DP >= input$DPThresh) %>%
            dplyr::filter(AF > 0) #this applies if call_mutations was run with write.all.targets = TRUE - don't want to count things with zero reads.
          
          #ID the mutations observed prior to the chosen date - these will be filtered out
          already_observed <- samp_data %>%
            dplyr::filter(date < input$MaxDate) %>%
            dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
            unlist()
          
          #remove duplicate mutations (occur if multiple underlying mutations give rise to specific amino acid change)
          all_uniqs <- samp_data %>%
            dplyr::select(ALT_ID, AF, SAMP_NAME, Location, date, DP) %>%
            dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
            unlist()
          
          
          #ID mutations first observed on or after selected date
          new_mutations <- all_uniqs[!all_uniqs %in% already_observed]
          
          #filter for new mutations
          samp_data %>% dplyr::filter(ALT_ID %in% new_mutations) %>% 
            dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
            dplyr::rename("DATE" = "date",
                          "LOCATION" = "Location",
                          "SEQ DEPTH" = "DP") %>%
            dplyr::select(SAMP_NAME, DATE, LOCATION, CHR, POS, GENE, MUTATION, AF, `SEQ DEPTH`) 
        })
        

        output$mut_freqs <- shiny::renderPlot({
          #get set of mutations of interest
          #these are entered as comma separated values in text box. Allows for spaces between or not.
          target_muts <- stringr::str_split(input$Muts, pattern = ",\\s?") %>% unlist()
          
          #plot frequencies of selected mutations for the selected Sample(s)
          #multiple mutations are differentiated by color
          #multiple sites are plotted as separate facets
          samp_data %>%
            dplyr::filter(Location %in% input$Location_Muts) %>%
            dplyr::filter(ALT_ID %in% target_muts) %>%
            ggplot2::ggplot(ggplot2::aes(x = date, y = AF, color = Location)) +
            ggplot2::geom_line(size = 2) +
            ggplot2::geom_point(size = 2) +
            ggplot2::scale_color_brewer(palette = "Dark2") +
            ggplot2::facet_wrap(dplyr::vars(c(ALT_ID))) +
            ggplot2::coord_cartesian(ylim = c(0,1)) +
            ggplot2::theme(legend.text = ggplot2::element_text(size = 14),
                           legend.title = ggplot2::element_text(size = 15),
                           strip.text = ggplot2::element_text(size = 15),
                           axis.text.x = ggplot2::element_text(size = 14),
                           axis.text.y = ggplot2::element_text(size = 14),
                           axis.title.y = ggplot2::element_text(size = 15)) +
            ggplot2::labs(y = "Mutation Frequency", x = NULL)
        })
        
        #update the "Date" drop-down box to only include dates for which samples exist for the selected Sample
        shiny::observeEvent(input$Sample_MutTable, {
          date_options <- samp_data %>%
            dplyr::filter(Location == input$Sample_MutTable) %>%
            dplyr::distinct(date) 
          date_options <- date_options$date
          
          shiny::updateSelectInput(session, "Date",
                            label = "Date",
                            choices = date_options)
        })
        
        #create table with all mutations observed in selected sample. Will not include column showing any lineages they have been associated with (don't have lineages info).
        output$mut_table <- DT::renderDataTable({
          
          #create table for selected sample/date
          samp_data %>% 
            dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
            dplyr::filter(Location %in% input$Sample_MutTable) %>%
            dplyr::filter(date == input$Date) %>%
            dplyr::select(SAMP_NAME, Location, date, CHR, POS, GENE, MUTATION, AF, DP) %>% 
            dplyr::filter(DP >= input$DPThresh2) %>%
            dplyr::mutate("FREQ" = round(AF, digits = 3)) %>% 
            dplyr::rename("DATE" = "date",
                          "LOCATION" = "Location",
                          "SEQ DEPTH" = "DP") %>%
            dplyr::select(-AF) %>%
            dplyr::select(SAMP_NAME, LOCATION, DATE, CHR, POS, GENE, MUTATION, FREQ, `SEQ DEPTH`) %>%
            as.data.frame()
        })
        
      }  
      
      ## run Shiny app with dates but no lineages
      shiny::shinyApp(ui = ui, server = server)
      
    } else { #have both dates and lineage definitions - output all tabs
      
      ui <- shiny::fluidPage(shiny::tabsetPanel(
        shiny::tabPanel("Lineages Present",
                        shiny::sidebarPanel(width = 2,
                                            shiny::selectInput(inputId = "Location",
                                                               label = "Location",
                                                               choices = unique(samp_data$Location)),
                                            shiny::checkboxGroupInput(inputId = "Lineages",
                                                                      label = "Lineage",
                                                                      choices = unique(lineage_muts$Lineage),
                                                                      selected = unique(lineage_muts$Lineage)),
                                            shiny::selectInput(inputId = "DepthTag",
                                                               label = "Calculate Mean Depths On...",
                                                               choices = c("All Mutations", "Lineage-Characteristic Mutations Only")),
                                            shiny::sliderInput(inputId = "propThresh",
                                                               label = "Presence Threshold",
                                                               min = 0, max = 1,
                                                               value = 0.5),
                                            shiny::numericInput(inputId = "DPThreshPlot",
                                                                label = "Minimum Seq Depth",
                                                                value = 0),
                                            shiny::selectInput(inputId = "metric",
                                                               label = "Freq Measure",
                                                               choices = c("Mean", "Median")),
                                            shiny::selectInput(inputId = "scaled",
                                                               label = "Frequencies",
                                                               choices = c("Scaled", "Unscaled"))),
                        shiny::mainPanel(width = 10,
                                         shiny::verticalLayout(
                                           plotly::plotlyOutput(outputId = "lineages_present"),
                                           plotly::plotlyOutput(outputId = "lineage_proportions"))
                        )
        ),
        
        shiny::tabPanel("New Mutations",
                        shiny::selectInput(inputId = "MaxDate",
                                           label = "Mutations First Detected On Or After...",
                                           choices = unique(samp_data$date)),
                        shiny::numericInput(inputId = "DPThresh",
                                            label = "Minimum Depth",
                                            value = 5),
                        shiny::checkboxInput(inputId = "maj_sub", 
                                             label = "Analyze Sublineages", 
                                             value = FALSE),
                        DT::dataTableOutput("new_muts")
                        
        ),
        shiny::tabPanel("Mutation Frequencies",
                        shiny::textInput(inputId = "Muts",
                                         label = "Mutation(s) (i.e S_D614G or S_del144/144; separate with commas)"),
                        shiny::selectInput(inputId = "Location_Muts",
                                           label = "Location",
                                           choices = unique(samp_data$Location),
                                           multiple = TRUE),
                        shiny::plotOutput(outputId = "mut_freqs")
                        
        ),
        
        shiny::tabPanel("View Mutations",
                        
                        shiny::selectInput(inputId = "Location_MutTable",
                                           label = "Location",
                                           choices = unique(samp_data$Location)),
                        shiny::selectInput(inputId = "Date",
                                           label = "Date",
                                           choices = unique(samp_data$date)),
                        shiny::numericInput(inputId = "DPThresh2",
                                            label = "Minimum Depth",
                                            value = 0),
                        shiny::checkboxInput(inputId = "maj_sub_muttab", 
                                             label = "Analyze Sublineages", 
                                             value = FALSE),
                        DT::dataTableOutput("mut_table")
        )
      )) 
      
      server <- function(input, output, session){
       
        output$lineages_present <- plotly::renderPlotly({
          #filter for data for the selected Location
          samp_data <- samp_data %>%
            dplyr::filter(Location %in% input$Location) 
          
          #create master df that will store data for all selected lineages/variants
          all_summary <- data.frame()
          
          for (i in 1:length(input$Lineages)){  #loop over each selected lineage
            #get the mutations characteristic of the current lineage
            muts <- maj_lineage_muts %>%
              dplyr::filter(characteristic == "Y") %>%
              dplyr::filter(Lineage %in% input$Lineages[[i]]) %>%
              dplyr::pull(ALT_ID)
            
            #get the number of mutations characteristic of the current lineage
            lineage_n <- length(muts)
            
            #get proportion of characteristic mutations observed on each date for current Location and other summary info
            summary <- samp_data %>%
              #remove any duplicated mutations, keeping one with highest read depth
              dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
              dplyr::arrange(desc(DP), .by_group = TRUE) %>%
              dplyr::ungroup() %>%
              dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
              dplyr::group_by(SAMP_NAME, date) %>%
              dplyr::summarise("Proportion Present" = sum(ALT_ID[AF > 0 & DP >= input$DPThreshPlot] %in% muts)/lineage_n,
                               "Lineage" = paste0(input$Lineages[[i]], ""),
                               "Total Lineage-Characteristic Muts" = lineage_n,
                               #"Lineage_n" = paste0(input$Lineages[[i]], " (", lineage_n, ")"),
                               "Mean_Coverage_All" = mean(DP[DP >= input$DPThreshPlot], na.rm = TRUE),
                               "DP_n_all" = sum(!is.na(DP[DP >= input$DPThreshPlot])),
                               "PropHighDepth" = sum(DP[ALT_ID %in% muts] >= input$DPThreshPlot)/lineage_n) %>%
              dplyr::ungroup() %>% 
              dplyr::mutate("ID" = paste0(SAMP_NAME, "_", date))
            
            #recalculate Mean Coverage based on target mutations only and add in to 'summary' from above
            summary2 <- samp_data %>%
              dplyr::filter(ALT_ID %in% muts) %>%
              dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
              dplyr::arrange(desc(DP), .by_group = TRUE) %>%
              dplyr::ungroup() %>%
              dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
              dplyr::group_by(SAMP_NAME, date) %>%
              dplyr::summarise("Mean Coverage" = mean(DP[DP >= input$DPThreshPlot]),
                               "DP_n" = sum(!is.na(DP[DP >= input$DPThreshPlot]))) %>%
              dplyr::ungroup() %>%
              dplyr::mutate("ID" = paste0(SAMP_NAME, "_", date)) %>%
              dplyr::select(ID, `Mean Coverage`, DP_n)
            
            summary <- dplyr::left_join(x = summary, y = summary2, by = "ID")
            
            #add data for current lineage to master df
            all_summary <- dplyr::bind_rows(all_summary, summary)
            
          }  
          
          #use days since earliest sample date as x axis (lubridate interval)
          ints <- lubridate::interval(start = min(all_summary$date), end = all_summary$date)
          
          #convert seconds to days
          all_summary <- all_summary %>%
            dplyr::mutate("days" = lubridate::int_length(ints)/86400)
          
          #get vectors for labeling x-axis
          break_dates <- unique(all_summary$date)
          break_days <- unique(all_summary$days)
          
          #check which type of mean depth to plot
          if (input$DepthTag == "All Mutations") {
            
            #replace Mean Coverage col with data from Mean_Coverage_All if plotting based on all mutations instead of just targert mutations.
            all_summary <- all_summary %>% 
              dplyr::select(-`Mean Coverage`) %>%
              dplyr::rename("Mean Coverage" = "Mean_Coverage_All") %>%
              dplyr::select(-DP_n) %>%
              dplyr::rename("DP_n" = "DP_n_all")
          }
          
          #create new column for fill to represent seq depth. Need a constant range for the scale for this to be useful, so capping it at 1000.
          all_summary <- all_summary %>%
            dplyr::mutate("mean_cov_scaled" = `Mean Coverage`) %>%
            dplyr::mutate_at(dplyr::vars(mean_cov_scaled), ~replace(., mean_cov_scaled > 1000, 1000)) %>%
            dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
                          "PropHighDepth" = round(`PropHighDepth`, 3),
                          "Mean Coverage" = round(`Mean Coverage`, 3))
          
          
          #generate plot
          if (all.target.muts == FALSE) {
            pres_plot <- all_summary %>% 
              ggplot2::ggplot(ggplot2::aes(x = days, 
                                           y = `Proportion Present`,
                                           text = paste("Lineage: ", Lineage, "</br>", "</br>", 
                                                        "Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>", 
                                                        "Proportion Present: ", `Proportion Present`, "</br>",
                                                        "Avg Seq Depth: ", `Mean Coverage`, "</br>",
                                                        "Avg Seq Based On ", DP_n, " mutations"))) +
              ggplot2::geom_line(ggplot2::aes(colour = Lineage, group = Lineage), size = 1.5) +
              ggplot2::coord_cartesian(ylim = c(0,1)) +
              ggplot2::theme_classic() +
              ggplot2::scale_color_brewer(palette = "Paired") +
              ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Location)) +
              ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
              ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                             axis.text.x = ggplot2::element_text(angle = 45, size = 9, vjust = 0.4),
                             axis.text.y = ggplot2::element_text(size = 9),
                             plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
                             axis.title.y = ggplot2::element_blank(),
                             legend.text = ggplot2::element_text(size = 9),
                             legend.title = ggplot2::element_blank()) +
              ggplot2::geom_hline(yintercept = input$propThresh, color = "red", linetype = 2, alpha = 0.6) +
              ggplot2::geom_point(data = all_summary, mapping = ggplot2::aes(x = days, 
                                                                             y = `Proportion Present`,
                                                                             fill = mean_cov_scaled), size = 2.5, stroke = 0,
                                  show.legend = FALSE) +
              ggplot2::scale_fill_gradient(low = "#56B1F7", 
                                           high = "#132B43",
                                           limits = c(0,1000))
            
            plotly::ggplotly(pres_plot, tooltip = c("text"))
            
          } else {
            
            pres_plot <- all_summary %>% 
              ggplot2::ggplot(ggplot2::aes(x = days, 
                                           y = `Proportion Present`,
                                           text = paste("Lineage: ", Lineage, "</br>", "</br>", 
                                                        "Total Lineage-Characteristic Muts: ", `Total Lineage-Characteristic Muts`, "</br>", 
                                                        "Proportion Present: ", `Proportion Present`, "</br>",
                                                        "Proportion Exceeding Seq Depth Threshold: ", PropHighDepth, "</br>",
                                                        "Avg Seq Depth: ", `Mean Coverage`, "</br>",
                                                        "Avg Seq Based On ", DP_n, " mutations"))) +
              ggplot2::geom_line(ggplot2::aes(colour = Lineage, group = Lineage), size = 1.5) +
              ggplot2::coord_cartesian(ylim = c(0,1)) +
              ggplot2::theme_classic() +
              ggplot2::scale_color_brewer(palette = "Paired") +
              ggplot2::ggtitle(paste0("Proportion of Lineage-Characteristic Mutations Present: ", input$Location)) +
              ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
              ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                             axis.text.x = ggplot2::element_text(angle = 45, size = 9, vjust = 0.4),
                             axis.text.y = ggplot2::element_text(size = 9),
                             plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
                             axis.title.y = ggplot2::element_blank(),
                             legend.text = ggplot2::element_text(size = 9),
                             legend.title = ggplot2::element_blank()) +
              ggplot2::geom_hline(yintercept = input$propThresh, color = "red", linetype = 2, alpha = 0.6) +
              ggplot2::geom_point(data = all_summary, mapping = ggplot2::aes(x = days, 
                                                                             y = `Proportion Present`,
                                                                             fill = mean_cov_scaled), size = 2.5, stroke = 0,
                                  show.legend = FALSE) +
              ggplot2::scale_fill_gradient(low = "#56B1F7", 
                                           high = "#132B43",
                                           limits = c(0,1000))
            
            plotly::ggplotly(pres_plot, tooltip = c("text"))
          }
        }) 
        
        
        output$lineage_proportions <- plotly::renderPlotly({
          #filter for data for the selected Location
          samp_data <- samp_data %>%
            dplyr::filter(Location %in% input$Location) 
          
          all_summary <- data.frame()
          
          #loop over each selected lineage
          for (i in 1:length(input$Lineages)){
            
            #get the mutations characteristic of the current lineage
            muts <- maj_lineage_muts %>%
              dplyr::filter(characteristic == "Y") %>%
              dplyr::filter(Lineage %in% input$Lineages[[i]]) %>%
              dplyr::select(ALT_ID) %>%
              unlist()
            
            #get the number of mutations characteristic of the current lineage
            lineage_n <- length(muts)
            
            if (input$metric == "Mean") {
              #get proportion of characteristic mutations observed in current sample and avg frequency of observed mutations
              summary <- samp_data %>%
                dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
                dplyr::arrange(desc(DP), .by_group = TRUE) %>%
                dplyr::ungroup() %>%
                dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
                dplyr::group_by(SAMP_NAME, date) %>%
                dplyr::summarise("Proportion Present" = sum(ALT_ID[AF > 0 & DP >= input$DPThreshPlot] %in% muts)/lineage_n,
                                 "Lineage" = paste0(input$Lineages[[i]], ""),
                                 "Estimated Freq" = mean(AF[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]),
                                 "muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]))), collapse = ";"),
                                 "muts_absent" = paste0(c(sort(unique(muts[!muts %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
                                 "lab1" = "\nMuts Present: ",
                                 "lab2" = "\n\nMuts Absent: ") %>%
                dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
                              "muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
                              "muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list)) %>%
                dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
                dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))
            } else { #use median
              summary <- samp_data %>%
                dplyr::group_by(SAMP_NAME, date, ALT_ID) %>%
                dplyr::arrange(desc(DP), .by_group = TRUE) %>%
                dplyr::ungroup() %>%
                dplyr::distinct(SAMP_NAME, date, ALT_ID, .keep_all = TRUE) %>%
                dplyr::group_by(SAMP_NAME, date) %>%
                dplyr::summarise("Proportion Present" = sum(ALT_ID[AF > 0 & DP >= input$DPThreshPlot] %in% muts)/lineage_n,
                                 "Lineage" = paste0(input$Lineages[[i]], ""),
                                 "Estimated Freq" = median(AF[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]),
                                 "muts_present" = paste0(c(sort(unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot & ALT_ID %in% muts]))), collapse = ";"),
                                 "muts_absent" = paste0(c(sort(unique(muts[!muts %in% ALT_ID[AF > 0 & DP >= input$DPThreshPlot]]))), collapse = ";"),
                                 "lab1" = "\nMuts Present: ",
                                 "lab2" = "\n\nMuts Absent: ") %>%
                dplyr::mutate("muts_list" = glue::glue("{lab1}{muts_present}{lab2}{muts_absent}"),
                              "muts_list" = stringr::str_replace_all(string = muts_list, pattern = "NA\n", replacement = ""),
                              "muts_list" = gsub('(.{1,30})(;|$)', '\\1;\n', muts_list)) %>%
                dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., is.nan(.), 0)) %>%
                dplyr::mutate_at(dplyr::vars(`Estimated Freq`), ~replace(., `Proportion Present` < input$propThresh, 0))
              
              }
        
            #add data for current lineage to master df
            all_summary <- dplyr::bind_rows(all_summary, summary)
        
          }
          
          
          if(nrow(sub_lineage_muts) > 0) { 
           
            get_sub_props <- function(sampname, dat, lin) {
              samp_data_sub <- samp_data %>%
                dplyr::filter(SAMP_NAME %in% sampname & date %in% dat)
              
              char_sub_muts <- sub_lineage_muts %>%
                dplyr::filter(Lineage %in% lin & sub_characteristic == "Y")
              
              char_sub_muts <- dplyr::left_join(x = char_sub_muts, y = samp_data_sub, by = "ALT_ID") %>%
                dplyr::mutate("AF" = tidyr::replace_na(AF, 0),
                              "DP" = tidyr::replace_na(DP, 0))
              
              char_sub_summary <- char_sub_muts %>%
                dplyr::group_by(Sublineage) %>%
                dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n()) %>%
                dplyr::ungroup() %>%
                dplyr::filter(sub_prop_present >= input$propThresh)  %>%
                dplyr::pull(Sublineage) %>%
                paste(collapse = ";")
              
              if (char_sub_summary == "") {char_sub_summary <- NA}
              char_sub_summary
            } 
            
            get_sub_freqs <- function(sampname, dat, lin) {
              
              samp_data_sub <- samp_data %>%
                dplyr::filter(SAMP_NAME %in% sampname & date %in% dat)
              
              char_sub_muts <- sub_lineage_muts %>%
                dplyr::filter(Lineage %in% lin & sub_characteristic == "Y")
              
              char_sub_muts <- dplyr::left_join(x = char_sub_muts, y = samp_data_sub, by = "ALT_ID") %>%
                dplyr::mutate("AF" = tidyr::replace_na(AF, 0),
                              "DP" = tidyr::replace_na(DP, 0))
              
              
              if (input$metric == "Mean") {
                char_sub_summary <- char_sub_muts %>%
                  dplyr::group_by(Sublineage) %>%
                  dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                                   "sub_freqs" = mean(AF[AF > 0 & DP >= input$DPThreshPlot])) %>%
                  dplyr::ungroup() %>%
                  dplyr::mutate_at(dplyr::vars(sub_freqs), ~replace(., is.na(.), 0))
                
                if (input$scaled == "Scaled") {
                  char_sub_summary <- char_sub_summary %>%
                    dplyr::mutate("sub_freqs" = sub_scale(sub_freqs))
                }  
                
                char_sub_summary <- char_sub_summary %>%
                  dplyr::filter(sub_prop_present >= input$propThresh)  %>%
                  dplyr::mutate("sub_freqs" = round(sub_freqs, 3)) %>%
                  dplyr::pull(sub_freqs) %>%
                  paste(collapse = ";")
              
              } else { #calculate median
              
                char_sub_summary <- char_sub_muts %>%
                  dplyr::group_by(Sublineage) %>%
                  dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                                   "sub_freqs" = median(AF[AF > 0 & DP >= input$DPThreshPlot])) %>%
                  dplyr::ungroup() %>%
                  dplyr::mutate_at(dplyr::vars(sub_freqs), ~replace(., is.na(.), 0))
                
                if (input$scaled == "Scaled") {
                    char_sub_summary <- char_sub_summary %>%
                      dplyr::mutate("sub_freqs" = sub_scale(sub_freqs))
                  }  
                
                char_sub_summary <- char_sub_summary %>%
                  dplyr::filter(sub_prop_present >= input$propThresh)  %>%
                  dplyr::mutate("sub_freqs" = round(sub_freqs, 3)) %>%
                  dplyr::pull(sub_freqs) %>%
                  paste(collapse = ";")
                
              }
              if (char_sub_summary == "") {char_sub_summary <- NA}
              char_sub_summary
            }
            
            get_sub_muts <- function(sampname, dat, lin) {
              samp_data_sub <- samp_data %>%
                dplyr::filter(SAMP_NAME %in% sampname & date %in% dat)
              
              char_sub_muts <- sub_lineage_muts %>%
                dplyr::filter(Lineage %in% lin & sub_characteristic == "Y")
              
              char_sub_muts <- dplyr::left_join(x = char_sub_muts, y = samp_data_sub, by = "ALT_ID") %>%
                dplyr::mutate("AF" = tidyr::replace_na(AF, 0),
                              "DP" = tidyr::replace_na(DP, 0))
              
                char_sub_summary <- char_sub_muts %>%
                  dplyr::group_by(Sublineage) %>%
                  dplyr::summarise("sub_prop_present" = (sum(!is.na(SAMP_NAME[AF > 0 & DP >= input$DPThreshPlot])))/dplyr::n(),
                            "sub_muts" = unique(ALT_ID[AF > 0 & DP >= input$DPThreshPlot])) %>%
                  dplyr::ungroup() %>%
                  dplyr::filter(sub_prop_present >= input$propThresh)  %>%
                  dplyr::pull(sub_muts) %>%
                  paste(collapse = ";")
                
                if (char_sub_summary == "") {char_sub_summary <- NA}
                char_sub_summary
              
            }  
            
            all_summary <- all_summary %>%
              dplyr::rowwise() %>%
              dplyr::mutate("sub_Proportion_Present" = get_sub_props(sampname = SAMP_NAME, dat = date, lin = Lineage),
                     "sub_Estimated_Freq" = get_sub_freqs(sampname = SAMP_NAME, dat = date, lin = Lineage),
                     "sub_muts_present" = get_sub_muts(sampname = SAMP_NAME, dat = date, lin = Lineage)) %>%
              dplyr::ungroup()
          }  
          
        
          #use days since earliest sample date as x axis (lubridate interval)
          ints <- lubridate::interval(start = min(all_summary$date), end = all_summary$date)
          
          #convert seconds to days
          all_summary <- all_summary %>%
            dplyr::mutate("days" = lubridate::int_length(ints)/86400)
          
          #get vectors for labeling x-axis
          break_dates <- unique(all_summary$date)
          break_days <- unique(all_summary$days)
          
          #generate plot
          #only plot bars for lineages defined as "present" in the sample based on chosen proportion threshold 
          
          if (input$scaled == "Scaled") {
            all_summary_sum <- all_summary %>% 
              dplyr::ungroup() %>%
              dplyr::group_by(date) %>%
              dplyr::summarise("sum" = sum(`Estimated Freq`)) %>%
              dplyr::mutate("over1" = ifelse(sum > 1, "Y", "N"))
            
            all_summary_sum <- dplyr::left_join(x = all_summary, y = all_summary_sum, by = "date")
            
            to_scale <- all_summary_sum %>%
              dplyr::filter(over1 == "Y") %>% 
              dplyr::group_by(date) %>% 
              dplyr::mutate("Estimated Freq" = `Estimated Freq`/sum(`Estimated Freq`))
            
            unscaled <- all_summary_sum %>%
              dplyr::filter(over1 == "N")
            
            all_summary <- dplyr::bind_rows(to_scale, unscaled)
          }
          
          if (input$metric == "Mean") {
            measure <- "Mean"
          } else {
            measure <- "Median"
          }
          
          all_summary <- all_summary %>%
            dplyr::mutate("Proportion Present" = round(`Proportion Present`, 3),
                          "Estimated Freq" = round(`Estimated Freq`,3))
        
        
          if (nrow(sub_lineage_muts) == 0) {
            freq_plot <- all_summary %>%
              dplyr::filter(`Estimated Freq` > 0) %>%
              ggplot2::ggplot(ggplot2::aes(x = days, y = `Estimated Freq`, fill = Lineage, 
                                           text = paste("Lineage: ", Lineage, "</br>", "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list))) +
              ggplot2::geom_bar(stat = "identity",
                                position = "stack") +
              ggplot2::theme_classic() +
              ggplot2::scale_fill_brewer(palette = "Paired") +
              ggplot2::ggtitle(paste0("Estimated Frequency of Each Lineage Present: ", input$Location)) +
              ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
              ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                             axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
                             axis.text = ggplot2::element_text(size = 9),
                             plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
                             axis.title.y = ggplot2::element_blank(),
                             legend.text = ggplot2::element_text(size = 9),
                             legend.title = ggplot2::element_blank()) +
              ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
            
          } else {
            freq_plot <- all_summary %>%
              dplyr::filter(`Estimated Freq` > 0) %>%
              ggplot2::ggplot(ggplot2::aes(x = days, 
                                           y = `Estimated Freq`, 
                                           fill = Lineage, 
                                           text = paste("Lineage: ", Lineage, "</br>", "Estimated Freq: ", `Estimated Freq`, "</br>", muts_list, "</br>", "Sublineages Present: ", sub_Proportion_Present, "</br>", "Sublineage Freqs: ", sub_Estimated_Freq, "</br>", "Sublineage Muts Present: ", sub_muts_present))) +
              ggplot2::geom_bar(stat = "identity",
                                position = "stack") +
              ggplot2::theme_classic() +
              ggplot2::scale_fill_brewer(palette = "Paired") +
              ggplot2::ggtitle(paste0("Estimated Frequency of Each Lineage Present: ", input$Location)) +
              ggplot2::scale_x_continuous(breaks = break_days, labels = break_dates) +
              ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                             axis.text.x = ggplot2::element_text(angle = 45, vjust = 0.4),
                             axis.text = ggplot2::element_text(size = 9),
                             plot.title = ggplot2::element_text (color = "black", size= 11, face="bold"),
                             axis.title.y = ggplot2::element_blank(),
                             legend.text = ggplot2::element_text(size = 9),
                             legend.title = ggplot2::element_blank()) +
              ggplot2::labs(y = paste0("Estimated Frequency (", measure, ")"))
            
          }
          
          plotly::ggplotly(p = freq_plot, tooltip = "text") %>%
            plotly::layout(hoverlabel = list(align = "left"))
        })  
        
        #create table with mutations first observed on or after a selected date
        output$new_muts <- DT::renderDataTable({
          
          if (input$maj_sub == FALSE) {
            #merge in lineage information
            samp_data <- dplyr::left_join(x = samp_data,
                                          y = maj_lineage_muts,
                                          by = "ALT_ID") %>%
              dplyr::filter(AF > 0) #this applies if call_mutations was run with write.all.targets = TRUE - don't want to count things with zero reads.
            
            #filter for mutations observed with a specific sequencing depth
            #get all lineages a given mutation is associated with as a ';'-delimited string - will be printed to column
            samp_data <- samp_data %>%
              dplyr::filter(DP >= input$DPThresh) %>%
              dplyr::group_by(SAMP_NAME, date, Location, ALT_ID, AF, DP, CHR, POS, GENE) %>%
              dplyr::summarize("Group" = paste(unique(Lineage),collapse = ";")) %>% 
              dplyr::ungroup() 
            
            #ID the mutations observed prior to the chosen date - these will be filtered out
            already_observed <- samp_data %>%
              dplyr::filter(date < input$MaxDate) %>%
              dplyr::select(ALT_ID, AF, SAMP_NAME, Group) %>%
              dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
              unlist()
            
            #remove duplicate mutations (occur if multiple underlying mutations give rise to specific amino acid change)
            all_uniqs <- samp_data %>%
              dplyr::select(ALT_ID, AF, SAMP_NAME, Group, Location, date, DP) %>%
              dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
              unlist()
            
            #ID mutations first observed on or after selected date
            new_mutations <- all_uniqs[!all_uniqs %in% already_observed]
            
            samp_data %>% dplyr::filter(ALT_ID %in% new_mutations) %>% 
              dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
              dplyr::rename("DATE" = "date",
                            "LOCATION" = "Location",
                            "SEQ DEPTH" = "DP",
                            "ASSOCIATED LINEAGES" = "Group") %>%
              dplyr::select(SAMP_NAME, DATE, LOCATION, CHR, POS, GENE, MUTATION, AF, `SEQ DEPTH`, `ASSOCIATED LINEAGES`)
            
          } else{
            
            #merge in lineage information
            samp_data <- dplyr::left_join(x = samp_data,
                                          y = sub_lineage_muts,
                                          by = "ALT_ID") %>%
              dplyr::filter(AF > 0) #this applies if call_mutations was run with write.all.targets = TRUE - don't want to count things with zero reads.
            
            #filter for mutations observed with a specific sequencing depth
            #get all lineages a given mutation is associated with as a ';'-delimited string - will be printed to column
            samp_data <- samp_data %>%
              dplyr::filter(DP >= input$DPThresh) %>%
              dplyr::group_by(SAMP_NAME, date, Location, ALT_ID, AF, DP, CHR, POS, GENE) %>%
              dplyr::summarize("Group" = paste(unique(Sublineage),collapse = ";")) %>% 
              dplyr::ungroup() 
            
            #ID the mutations observed prior to the chosen date - these will be filtered out
            already_observed <- samp_data %>%
              dplyr::filter(date < input$MaxDate) %>%
              dplyr::select(ALT_ID, AF, SAMP_NAME, Group) %>%
              dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
              unlist()
            
            #remove duplicate mutations (occur if multiple underlying mutations give rise to specific amino acid change)
            all_uniqs <- samp_data %>%
              dplyr::select(ALT_ID, AF, SAMP_NAME, Group, Location, date, DP) %>%
              dplyr::distinct(ALT_ID, .keep_all = TRUE) %>%
              unlist()
            
            #ID mutations first observed on or after selected date
            new_mutations <- all_uniqs[!all_uniqs %in% already_observed]
            
            samp_data %>% dplyr::filter(ALT_ID %in% new_mutations) %>% 
              dplyr::mutate("MUTATION" = stringr::str_replace(ALT_ID, "(.+_)(.+)", "\\2")) %>%
              dplyr::rename("DATE" = "date",
                            "LOCATION" = "Location",
                            "SEQ DEPTH" = "DP",
                            "ASSOCIATED SUBLINEAGES" = "Group") %>%
              dplyr::select(SAMP_NAME, DATE, LOCATION, CHR, POS, GENE, MUTATION, AF, `SEQ DEPTH`, `ASSOCIATED SUBLINEAGES`)
            
          }
          
        })
        
        output$mut_freqs <- shiny::renderPlot({
          #get set of mutations of interest
          #these are entered as comma separated values in text box. Allows for spaces between or not.
          target_muts <- stringr::str_split(input$Muts, pattern = ",\\s?") %>% unlist()
          
          #plot frequencies of selected mutations for the selected Sample(s)
          #multiple mutations are differentiated by color
          #multiple sites are plotted as separate facets
          samp_data %>%
            dplyr::filter(Location %in% input$Location_Muts) %>%
            dplyr::filter(ALT_ID %in% target_muts) %>%
            ggplot2::ggplot(ggplot2::aes(x = date, y = AF, color = Location)) +
            ggplot2::geom_line(size = 2) +
            ggplot2::geom_point(size = 2.5) +
            ggplot2::scale_color_brewer(palette = "Dark2") +
            ggplot2::theme_classic() +
            ggplot2::facet_wrap(dplyr::vars(c(ALT_ID))) +
            ggplot2::coord_cartesian(ylim = c(0,1)) +
            ggplot2::theme(legend.text = ggplot2::element_text(size = 14),
                           legend.title = ggplot2::element_text(size = 15),
                           strip.text = ggplot2::element_text(size = 15),
                           axis.text.x = ggplot2::element_text(size = 14),
                           axis.text.y = ggplot2::element_text(size = 14),
                           axis.title.y = ggplot2::element_text(size = 15)) +
            ggplot2::labs(y = "Mutation Frequency", x = NULL)
        }) 
        
        #update the "Date" drop-down box to only incude dates for which samples exist for the selected Sample
        shiny::observeEvent(input$Location_MutTable, {
          date_options <- samp_data %>%
            dplyr::filter(Location == input$Location_MutTable) %>%
            dplyr::distinct(date) 
          date_options <- date_options$date
          
          shiny::updateSelectInput(session, "Date",
                                   label = "Date",
                                   choices = date_options)
        })
        
        
        #create table with all mutations observed in selected sample and any lineages they have been associated with.
        output$mut_table <- DT::renderDataTable({
          
          if (input$maj_sub_muttab == FALSE) {
            #merge in info on lineages associated with specific mutations
            samp_data <- dplyr::left_join(x = samp_data,
                                          y = maj_lineage_muts,
                                          by = "ALT_ID")
            
            #combine all lineages associated with a given mutation - separate with ';'
            samp_data <- samp_data %>% 
              dplyr::group_by(SAMP_NAME, Location, date, CHR, POS, ALT_ID, AF, DP) %>%
              dplyr::summarize("Group" = paste(unique(Lineage),collapse = ";")) %>%
              dplyr::ungroup()
            
            #create table for selected sample/date
            samp_data %>% 
              tidyr::separate(col = ALT_ID,
                              into = c("GENE", "MUTATION"),
                              sep = "_") %>%
              dplyr::filter(Location %in% input$Location_MutTable) %>%
              dplyr::filter(date == input$Date) %>%
              dplyr::select(SAMP_NAME, Location, date, CHR, POS, GENE, MUTATION, AF, Group, DP) %>% 
              dplyr::filter(DP >= input$DPThresh2) %>%
              dplyr::mutate("AF" = round(AF, digits = 3)) %>% 
              dplyr::rename("DATE" = "date", 
                            "ASSOCIATED LINEAGE(S)" = "Group",
                            "FREQ" = "AF",
                            "SEQ DEPTH" = "DP",
                            "LOCATION" = "Location") %>%
              as.data.frame()
            
          } else {
            
            #merge in info on lineages associated with specific mutations
            samp_data <- dplyr::left_join(x = samp_data,
                                          y = sub_lineage_muts,
                                          by = "ALT_ID")
            
            #combine all lineages associated with a given mutation - separate with ';'
            samp_data <- samp_data %>% 
              dplyr::group_by(SAMP_NAME, Location, date, CHR, POS, ALT_ID, AF, DP) %>%
              dplyr::summarize("Group" = paste(unique(Sublineage),collapse = ";")) %>%
              dplyr::ungroup()
            
            #create table for selected sample/date
            samp_data %>% 
              tidyr::separate(col = ALT_ID,
                              into = c("GENE", "MUTATION"),
                              sep = "_") %>%
              dplyr::filter(Location %in% input$Location_MutTable) %>%
              dplyr::filter(date == input$Date) %>%
              dplyr::select(SAMP_NAME, Location, date, CHR, POS, GENE, MUTATION, AF, Group, DP) %>% 
              dplyr::filter(DP >= input$DPThresh2) %>%
              dplyr::mutate("AF" = round(AF, digits = 3)) %>% 
              dplyr::rename("DATE" = "date", 
                            "ASSOCIATED SUBLINEAGE(S)" = "Group",
                            "FREQ" = "AF",
                            "SEQ DEPTH" = "DP",
                            "LOCATION" = "Location") %>%
              as.data.frame()
            
          }
        })
        
      }
      
        ## run Shiny app with dates but no lineages
        shiny::shinyApp(ui = ui, server = server)
        
    }    
        
  } 
  
}