R/esr_tree.R

Defines functions str_empty prepare_mlst compute_dist compute_tree compute_clustering compute_new_isolate_name rename_isolates_in_dist_matrix compute_leaf_colours colour_nodes_in_tree heatmap_ui phylocanvas_ui phylocanvas_module

if(file.exists("R/global.R")){source("R/global.R")}
reactive_selected_nodes <- reactiveVal(NULL)
reactive_distance_matrix <- reactiveVal(NULL)
reactive_hclust <- reactiveVal(NULL)
current_phylocanvas_tree <- reactiveVal(NULL)

#' Utility function checking if a string is empty or NA or NULL
#'
#' @param s the string to check
#'
#' @return TRUE if the string is empty, else FALSE
#'
#' @export
str_empty <- function(s){
  if(is.null(s)){return(T)}
  if(is.na(s)){return(T)}
  return(str_length(str_trim(s)) == 0)
}

#' Prepare MLST matrix for distance computation. This function subsets the mlst
#' matrix for the selected isolates in the metadata table. It also removes the
#' isolates for which the column defined in `mlst_complete_column` is not
#' `Complete`
#'
#' @param mlst_data the mlst data.frame as exported by the function
#' `split_mlst_and_metadata` from the `esr_mlst_data` module. Needs to be in the
#' same order as the metadata datatable
#' @param selected_rows indices of the selected rows in the metadata datatable (
#' and the mlst data frame)
#'
#' @return a numeric matrix of the mlst allele codes
#'
#' @noRd
prepare_mlst <- function(mlst_data, selected_rows) {
  log_debug("react_wgmlst")
  ret <- reactive({
    data <- mlst_data()
    # browser()
    if (!is.null(data)) {

      # find the selected isolates
      selected_isolates <- c()
      if (!is.null(selected_rows) &&
          length(selected_rows) >= 2) {
        # browser()
        selected_isolates <- data[[main_record_idt_col]]
        selected_isolates <- selected_isolates[selected_isolates %in% selected_rows]
      }

      # the mlst_complete_column needs to be set to complete for the sample
      # to be accepted
      # data <- data[data[[mlst_complete_column]] == "Complete", ]

      # prepare the matrix of mlst data
      wgMLST <- data[, str_starts(names(data), mlst_starts)]
      row.names(wgMLST) <- data[[main_record_idt_col]]

      # subset the data with only the selected isolates
      if (!is.null(selected_rows) &&
          length(selected_rows) >= 2) {
        wgMLST <- wgMLST[row.names(wgMLST) %in% selected_isolates, ]
      }

      #convert into matrix
      wgMLST <- as.matrix(wgMLST)
      if (nrow(wgMLST) > 0) {
        # js$enableTab("PhyloCanvas")
      }
    } else{
      wgMLST <- NULL
    }
    # browser()
    return(wgMLST)
  })
  return(ret)
}

#' Compute a distance matrix by counting the number of differences in the mlst
#' matrix for all alleles having non missing information between two isolates
#'
#' @param mlst the mlst data.frame as exported by the function
#' `split_mlst_and_metadata` from the `esr_mlst_data` module. Needs to be in the
#' same order as the metadata datatable
#' @param selected_rows indices of the selected rows in the metadata datatable (
#' and the mlst data frame)
#'
#' @return a distance matrix
#'
#' @noRd
compute_dist <- function(mlst, selected_rows){
  log_debug("react_dist")
  ret <- reactive({
    #Get the mlst matrix
    mlst_matrix <- prepare_mlst(mlst, selected_rows)()
    #The names of the isolates (aka their ids) are modified in phylocanvas to
    #remove special characters and replace them by '_'
    row.names(mlst_matrix) <- gsub("[-/&'() ]+", "_", row.names(mlst_matrix))
    #compute the distance
    if (!is.null(mlst_matrix) && nrow(mlst_matrix) > 0) {
      nb_differences <- function(x, y) {
        not_na <- !is.na(x) & !is.na(y)
        x <- x[not_na]
        y <- y[not_na]
        return(length(x[(x != y)]))
      }
      m <-
        dist_make(mlst_matrix, nb_differences, "Nb differences")
      reactive_distance_matrix(m)
      return(m)
    } else{
      reactive_distance_matrix(NULL)
      return(NULL)
    }
  })
  return(ret)
}

#' Compute the tree from a distance matrix
#'
#' @param dist_matrix the distance matrix result of `compute_dist`
#' @param sqrt_dist should the distances in the tree be modified using sqrt
#' @param sel_col the column selected for colour. This is not really used in the
#' code but it is mandatory to link the reactives. If not here, the tree is not
#' refreshed when the column is selected
#'
#' @return a tree as a `phylo` object
#'
#' @noRd
compute_tree <- function(dist_matrix, sqrt_dist, sel_col, clust_method){
  log_debug("react_base_tree")
  ret <- reactive({
    selected_column <- sel_col() #for reactive chain
    clustering_method <- clust_method()
    d <- dist_matrix()
    # browser()
    if (!is.null(d)) {
      clust <- compute_clustering(d, clustering_method)
      # dendro <- as.dendrogram(clust)
      mytree <- as.phylo(clust)
      if(sqrt_dist == T){
        mytree$edge.length <- sqrt(mytree$edge.length)
      }
      return(mytree)
    }
    reactive_hclust(NULL)
    return(NULL)
  })
  return(ret)
}

#' Compute a cluster (`hclust` object) from a distance matrix
#'
#' @param d the distance matrix
#' @param clustering_method to use (can be any of the `hclust` methods or `mst`)
#'
#' @return a `hclust` object
#'
#' @noRd
compute_clustering <- function(d, clustering_method){
  if(clustering_method == "mst"){
    clust <- as.hclust(spantree(d))
  }else{
    clust <- fastcluster::hclust(d, method = clustering_method)
  }
  reactive_hclust(clust)
  return(clust)
}

#' The isolate name in the tree changes because the value of the selected column
#' colour is added in the name. This function computes the new name and returns
#' it a tibble. This is a generic reactive function which is used by many other
#'
#' @param metadata the metadata table from which the column is selected
#' @param sel_column the column selected (reactive)
#'
#' @return a tibble with the new name in the column `new_isolate_name`. reactive
#'
#' @noRd
compute_new_isolate_name <- function(metadata, sel_column){
  ret <- reactive({
    data <- metadata()
    selected_column <- sel_column()
    if(is.null(selected_column)){
      selected_column <- default_grouping_col_phylocanvas
      }
    if(is.null(data)){return(data)}
    if(!selected_column %in% names(data)){
      data <- data %>%
        select(!!main_record_idt_col) %>%
        mutate(new_isolate_name = !!main_record_idt_col)
    }else{
      data <- data %>%
        select(!!main_record_idt_col, !!selected_column)
      data$new_isolate_name <- paste0(data[[main_record_idt_col]], phylocanvas_separator, data[[selected_column]])
    }
    return(data)
  })
  return(ret)
}

#' Rename the isolates in the distance matrix. This will cause the rename in the
#' tree. The isolates in the tree are renamed by adding the value of the column
#' selected for colourization of the nodes
#'
#'  @param dist_matrix the originla distance matrix
#'  @param metadata the metadata used for the colour selection
#'  @param sel_colum the metadata column selected
#'
#'  @return the distance matrix with the isolates renamed
#'
#'  @noRd
rename_isolates_in_dist_matrix <- function(dist_matrix, metadata, sel_column){
  ret <- reactive({

    new_names <- compute_new_isolate_name(metadata, sel_column)()
    # browser()
    if(is.null(new_names)){return(dist_matrix())}
    d <- dist_matrix()
    new_names_dist <- c()
    for(isolate in row.names(as.matrix(d))){
      if(isolate %in% new_names[[main_record_idt_col]]){
        nn <- new_names[new_names[[main_record_idt_col]] == isolate, "new_isolate_name"]
        new_names_dist <- c(new_names_dist, nn)
      }else{
        new_names_dist <- c(new_names_dist, isolate)
      }
    }
    # browser()
    d <- dist_setNames(d, new_names_dist)
  })
  return(ret)
}


#' Compute the tree leaves colours based on the metadata table and the selected
#' column
#'
#' @param metadata the metadata table
#' @param sel_column the selected column
#'
#' @return a tibble containing the record identifier, the selected metadata
#' column, the likely name of the isolate in the tree (see comment in function
#' colour nodes in tree), the colour
#'
#' @noRd
compute_leaf_colours <- function(metadata, sel_column){
  ret <- reactive({
    data <- metadata()
    selected_column <- sel_column()

    new_names <- compute_new_isolate_name(metadata, sel_column)()

    #colours
    colourCount <- length(unique(as.character(data[[selected_column]])))
    if(colourCount > 24){
      palette <- as.character(dark.colors(24))
      palette <- colorRampPalette(palette)(colourCount)
    }else{
      #minimum of 3 colours
      palette <- as.character(dark.colors(max(colourCount, 3)))
    }
    data$colours <- as.factor(data[[selected_column]])
    #Because the returned palette has at least 3 colour, it needs to be
    #subseted for the columns having 1 or 2 different values
    levels(data$colours) <- palette[seq(1,nlevels(data$colours))]
    colours <- data %>% select(!!main_record_idt_col, "colours")
    new_names_with_colours <- new_names %>%
      left_join(colours, by = main_record_idt_col)
    return(new_names_with_colours)
  })
  return(ret)
}

#' Colour the leaves of the phylocanvas using the metadata table and the column
#' selected
#'
#' @param pc the phylocanvas object
#' @param metadata the metadata table
#' @param sel_column the selected column
#' @param label_size the size of the labels (has to be set again)
#'
#' @return the modified phylocanvas object
#'
#' @noRd
colour_nodes_in_tree <- function(pc, metadata, sel_column, label_size){
  ret <- reactive({
    log_debug("colour_nodes_in_tree")
    # browser()
    data <- metadata()
    selected_column <- sel_column()
    new_names_with_colours <- compute_leaf_colours(metadata, sel_column)()
    isolates <- ape::read.tree(text=pc$x$tree)$tip.label
    for(isolate in isolates){
      #Because phylocanvas keeps modifying the isolate names, it is hard to
      #keep track of the changes. The else part of the condition splits the
      #isolate name using the phylocanvas separator defined in general to
      #recover the isolate name (works well but slower than the lookup)
      if(isolate %in% new_names_with_colours$new_isolate_name){
        colour <- as.character(new_names_with_colours[new_names_with_colours$new_isolate_name == isolate, "colours"])
      } else{
        rid <- str_split(isolate, phylocanvas_separator, simplify=T)[1]
        if(rid %in% new_names_with_colours[[main_record_idt_col]]){
          colour <- as.character(new_names_with_colours[new_names_with_colours[[main_record_idt_col]] == rid, "colours"])
        }else{
          colour <- "#000000"
        }
      }
      pc <-
        style_node(
          pc,
          isolate,
          # highlighted = T,
          fillcolor = colour,
          labelcolor = colour,
          strokecolor = colour,
          labeltextsize = label_size()
        )
    }
    return(pc)
  })
  return(ret)
}

#' heatmap UI
#' @param id the name of the namesapce
#'
#' @export
heatmap_ui <- function(id){
  ns <- NS(id)
  tagList(
    uiOutput(ns("heatmap_container")) %>%
      withSpinner(type = spinner_type)
  )
}

#' Phylocanvas module UI
#' @param id the name of the namesapce
#'
#' @export
phylocanvas_ui <- function(id) {
  ns <- NS(id)
  tagList(
    div(
      div(
        # legend_phylocanvas_ui(ns("phylocanvas_legend")),
        div(
          phylocanvasOutput(ns("tree_out_phylocanvas"), height = 800) %>%
            withSpinner(type = spinner_type),
          class = "phylocanvas"
        ),
        class = "col-sm-9"
      ),
      div(
        selectInput(
          ns("tree_layout_select"),
          label = "Tree layout:",
          choices = c(
            'rectangular' = 'rectangular',
            'hierarchical' =
              'hierarchical',
            'circular' =
              'circular',
            'diagonal' =
              'diagonal',
            'radial' =
              'radial'
          ),
          selected = default_tree,
          multiple = F
        ),
        selectInput(
          ns("clustering_method_select"),
          label = "Clustering method:",
          choices = c(
            "single" = "single",
            "complete" = "complete",
            "average" = "average",
            "mst" = "mst"
          ),
          selected = default_clustering_method,
          multiple = F
        ),
        sliderInput(
          ns("label_size"),
          label = "Labels size",
          min = 2,
          max = 36,
          step = 1,
          value = default_label_size
        ),
        prettySwitch(
          inputId = ns("align_labels"),
          label = "Align labels",
          fill = TRUE,
          value = TRUE,
          status = "primary"
        ),
        prettySwitch(
          inputId = ns("sqrt_dist"),
          label = "Use Square root of branch length?",
          fill = TRUE,
          value = FALSE,
          status = "primary"
        ),
        uiOutput(ns("column_selector")),
        # uiOutput(ns("level_selector")),
        class = "col-sm-3"
      ),
      class = "row"
    ),
    div(
      h4("Loading information"),
      class = "row"
    ),
    div(
      verbatimTextOutput(ns("nodes_selected_debug")),
      dataTableOutput(ns("nodes_selected")),
      class = "row"
    )
  )
}

#' The phylocanvas module. Build the tree and displays it
#'
#' @param input,output,session standard \code{shiny} boilerplate
#' @param mlst the mlst data.frame as exported by the function
#' `split_mlst_and_metadata` from the `esr_mlst_data` module. Needs to be in the
#' same order as the metadata datatable
#' @param metadata the metadata data.frame as exported by the function
#' `split_mlst_and_metadata` from the `esr_mlst_data` module.
#' @param table_rows_selected indices of the selected rows in the metadata
#' datatable (and the mlst data frame)
#'
#' @export
phylocanvas_module <- function(input,
                               output,
                               session,
                               mlst,
                               metadata,
                               table_rows_selected,
                               nodes) {
  log_debug("react_phylocanvas_module")

  output$column_selector <- renderUI({
    selected_metadata <- metadata() %>%
      select(!!default_grouping_col_phylocanvas, everything())
    cols <- names(selected_metadata)
    columns_metadata <- tibble(Columns=cols, labels=cols) %>%
      filter(!(Columns %in% select_exclude))

    selectizeInput(session$ns('colour_column_input'),
                   "Colour column",
                   choices = columns_metadata$Columns,
                   selected = "SequenceType",
                   multiple = F
    )
  })

  selected_column <- reactive({
    if(!is.null(input$colour_column_input)){
      return(input$colour_column_input)
    } else{
      return(default_grouping_col_phylocanvas)
    }
  })

  selected_clustering_method <- reactive({
    if(!is.null(input$clustering_method_select)){
      return(input$clustering_method_select)
    } else{
      return(default_clustering_method)
    }
  })

  ret <- reactive({
    selected_rows <- table_rows_selected()

    if(input$sqrt_dist == T){
      sqrt_dist = T
    }else{
      sqrt_dist = F # Can be NULL?
    }
    # browser()
    d <- compute_dist(mlst, selected_rows)

    dist_matrix <- rename_isolates_in_dist_matrix(d, metadata, selected_column)

    tree <- compute_tree(dist_matrix, sqrt_dist, selected_column, selected_clustering_method)
    # browser()

    pct <- phylocanvas(
      tree(),
      treetype = input$tree_layout_select,
      width = 1200,
      height = 600,
      nodesize = 2,
      alignlabels = input$align_labels,
      linewidth = 1,
      showcontextmenu = F,
      showhistory = F,
      showscalebar = T,
      textsize = input$label_size
    )
    return(pct)
  })

  label_size <- reactive({
    if(is.null(input$label_size)){return(default_label_size)}
    return(input$label_size)
  })

  pct <- reactive({
    pc <- colour_nodes_in_tree(ret(), metadata, selected_column, label_size)()
    current_phylocanvas_tree(pct)
    return(pc)
  })

  selected_nodes <- reactive({
    selection <- nodes()
    if(is.null(selection)){
      if(!is.null(reactive_selected_nodes())){
        return(reactive_selected_nodes())
      }
      return(NULL)
    }
    reactive_selected_nodes(selection)
    return(selection)
  })

  metadata_tree_selection <- reactive({
    selection <- selected_nodes()
    if(is.null(selection)){return(NULL)}
    data <- metadata()
    if(is.null(selected_column)){
      selected_column <- default_grouping_col_phylocanvas
    }
    isolates <- str_split(selection, phylocanvas_separator, simplify = T)[,1]
    data <- data[data[[main_record_idt_col]] %in% isolates, ]
    return(data)
  })

  output$tree_out_phylocanvas <- renderPhylocanvas({
    print(table_rows_selected())
    log_debug("phylocanvas_out")
    pct()
  })

  output$nodes_selected <- renderDataTable(
    metadata_tree_selection() %>% datatable()
  )

  heatmap_plot <- reactive({
    # browser()
    d <- reactive_distance_matrix()
    if(is.null(d)){return(NULL)}
    if(!is.null(metadata_tree_selection()) && nrow(metadata_tree_selection()) > 1){
      isolates <- row.names(as.matrix(d))
      isolates <- isolates[isolates %in% metadata_tree_selection()[[main_record_idt_col]]]
      if(length(isolates) > 0){
        d <- dist_subset(d, isolates)
      }
    }
    return(d)
  })

  output$heatmap_container <- renderUI({
    d <- heatmap_plot()
    if(!is.null(d)){
      print(nrow(as.matrix(d)))
      size <- min(nrow(as.matrix(d)) * 30, 1200)
      size <- max(size, 200)
      print(size)
      plotlyOutput(session$ns("heatmap"), width = "90%", height = paste0(size,"px")) %>%
        withSpinner(type = spinner_type)
    }
  })


  output$heatmap <- renderPlotly({
    d <- heatmap_plot()
    if(!is.null(d)){
      h <- compute_clustering(d, selected_clustering_method())
      d <- as.matrix(d)
      heatmaply(d,
                Rowv = h,
                Colv = h,
                show_dendrogram = c(F,F),
                dend_hoverinfo = F,
                dynamicTicks = T,
                plot_method = "plotly",
                margins = c(50,50,NA,0),
                grid_color = "#FFFFFF")
    }
  })
}
pydupont/esr.shiny.redcap.modules documentation built on Dec. 25, 2019, 3:20 a.m.