R/inspect.R

Defines functions make_plot inspect

Documented in inspect

# ==================================================================== #
# TITLE                                                                #
# Tools for Data Analysis at Certe                                     #
#                                                                      #
# AUTHORS                                                              #
# Berends MS (m.berends@certe.nl)                                      #
# Meijer BC (b.meijer@certe.nl)                                        #
# Hassing EEA (e.hassing@certe.nl)                                     #
#                                                                      #
# COPYRIGHT                                                            #
# (c) 2019 Certe Medische diagnostiek & advies - https://www.certe.nl  #
#                                                                      #
# LICENCE                                                              #
# This R package is free software; you can redistribute it and/or      #
# modify it under the terms of the GNU General Public License          #
# version 2.0, as published by the Free Software Foundation.           #
# This R package is distributed in the hope that it will be useful,    #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the         #
# GNU General Public License for more details.                         #
# ==================================================================== #

make_plot <- function(data,
                      title = deparse(substitute(data)), 
                      subtitle = "",
                      subtitle_colour = "black") {
  
  colour_fill <- "#96E4F6"
  colour_border <- "#4DCFF0"
  data_classes <- c('Date', 'POSIXct', 'POSIXlt')
  character_x_labels <- FALSE
  
  # rsi
  if (any(class(data) == 'rsi') | all(data %in% c('I', 'R', 'S'), na.rm = TRUE)) {
    data <- as.rsi(data)
    par(mar = c(3, 3, 4, 2))
    p <- barplot(as.matrix(table(data)),
                 main = title,
                 beside = FALSE,
                 col = c(S = rgb(162, 245, 150, maxColorValue = 255), 
                         I = rgb(245, 222, 150, maxColorValue = 255),
                         R = rgb(254, 174, 150, maxColorValue = 255)),
                 border = c(S = rgb(162, 245, 150, maxColorValue = 255), 
                            I = rgb(245, 222, 150, maxColorValue = 255),
                            R = rgb(254, 174, 150, maxColorValue = 255)),
                 ylim = c(0, sum(table(data)) + 0.1 * sum(table(data))))
  } else if (any(class(data) == "mic")) {
    par(mar = c(3, 3, 4, 2))
    p <- barplot(data[!is.na(data)],
                 col = colour_fill,
                 border = colour_border)
    

  } else if (any(class(data) == "factor")) {
    par(mar = c(3, 3, 4, 2))
    p <- barplot(table(data), 
                 col = colour_fill,
                 border = colour_border,
                 main = title)
    
    
  } else if (any(class(data) == "logical")) {
    par(mar = c(3, 3, 4, 2))
    p <- barplot(table(data), 
                 main = title,
                 col = c(rgb(254, 174, 150, maxColorValue = 255),
                         rgb(162, 245, 150, maxColorValue = 255)),
                 border = c(rgb(254, 174, 150, maxColorValue = 255),
                         rgb(162, 245, 150, maxColorValue = 255)),
                 ylim = c(0, sum(table(data)) + 0.25 * sum(table(data))))
    
    
  } else if (any(class(data) %in% c('numeric', 'integer', 'double', 'single', 'raw',
                             data_classes, 'hms', 'difftime'))) {
    if (any(class(data) %in% c('Date', 'POSIXct', 'POSIXlt'))) {
      difference_days <- as.integer(max(data, na.rm = TRUE) - min(data, na.rm = TRUE))
      breaks <- dplyr::case_when(
        difference_days <= 30 ~ "week",
        difference_days <= 365 ~ "month",
        TRUE ~ "year"
      )
      frmt <- dplyr::case_when(
        difference_days <= 30 ~ "%e",
        difference_days <= 365 ~ "%b",
        TRUE ~ "%Y"
      )
    } else {
      breaks <- "Sturges" 
      frmt <- NULL
    }
    
    par(mar = c(3.5, 3.5, 4, 2))
    p <- suppressWarnings(
      hist(data, 
           breaks = breaks,
           format = frmt,
           main = title,
           col = colour_fill,
           border = colour_border,
           xlab = "",
           ylab = "",
           las = 2,
           tick = FALSE,
           freq = TRUE))
    
  } else if (any(class(data) == "list")) {
    p <- plot(0,
              type = 'n',
              axes = FALSE,
              main = title)
    
  } else { # if (any(class(data) == "character")) {
    data_tbl <- rev(sort(table(data)))
    if (length(data_tbl) > 6) {
      data_tbl <- c(c(rest = sum(data_tbl[7:length(data_tbl)])),
                    sort(data_tbl[1:6]))
      names(data_tbl)[1] <- paste0("Rest (x", length(table(data)) - 6, ")")
      
    }
    par(mar = c(3, 1, 4, 1))
    # p <- barplot(data_tbl,
    #              horiz = TRUE,
    #              las = 1, # x tekst altijd horizontaal
    #              main = title,
    #              col = colour_fill,
    #              border = colour_border,
    #              xlim = c(0, max(data_tbl) + 0.25 * max(data_tbl)))
    p <- barplot(data_tbl,
                 horiz = TRUE,
                 las = 1, # x tekst altijd horizontaal
                 main = title,
                 col = colour_fill,
                 border = colour_border,
                 xlim = c(0, max(data_tbl) + 0.25 * max(data_tbl)), offset = 0,names.arg = FALSE)
    character_x_labels <- TRUE

  }

  p
  mtext(text = paste0(subtitle, collapse = " > "),
        line = 0.5,
        col = subtitle_colour)
  if (character_x_labels == TRUE) {
    
    axis(side = 4, 
         labels = names(data_tbl),
         at = (1 + length(data_tbl) / 35) * c(1:length(data_tbl)),
         cex.names = 0.7,
         las = 1, 
         tick = FALSE, 
         pos = -50,
         padj = 1.5)
  }
  
}

#' Interactive summary
#'
#' Dit opent een Shiny-applicatie voor interactieve samenvatting van alle data van een tibble of dataframe. Typ de tekst van een object in een document of in de Console en selecteer de optie in het menu Addins (of stel het in als sneltoets, bijvoorbeeld \code{F2}).
#' @param dfname Standaard is leeg. Een object of de naam ervan. Controleert eerst of er een tekst geselecteerd is in een document, of dat er tekst in de Console staat.
#' @export
inspect <- function(dfname = NA) {
  
  # GUI based on exploratory.io
  
  require(shiny)
  require(miniUI)
  #library(ggplot2)
  require(dplyr)
  
  
  if (all(is.na(dfname))) {
    text_document <- rstudioapi::getActiveDocumentContext()$selection[[1]]$text
    text_console <- rstudioapi::getConsoleEditorContext()$contents
    
    if (text_console != "") {
      dfname <- text_console
    } else if (text_document != "") {
      dfname <- text_document
    } else {
      stop('No object found to be analysed.')
    }
    data <- eval(parse(text = dfname))
  } else {
    if (NCOL(dfname) > 1 | NROW(dfname) > 1) {
      data <- dfname
      dfname <- deparse(substitute(dfname))
    } else {
      dfname <- as.character(match.call())[2]
      data <- eval(parse(text = dfname))
    }
  }
  
  # ui ----------------------------------------------------------------------
  title_df <- paste0('Summary of `',
                     dfname,
                     '` (',
                     nrow(data) %>% format2(),
                     ' obs. of ',
                     NCOL(data) %>% format2(),
                     ' variables)')
  ui <- fluidPage(
    headerPanel(div('Summary of ',
                    code(dfname),
                    ':',
                    nrow(data) %>% format2(),
                    ' obs. of ',
                    NCOL(data) %>% format2(),
                    ' variables'),
                br()),
    
    # sidebarPanel(
    #   textAreaInput("filter1", "Filter:", resize = "none", placeholder = "Type new filter..."),
    #   submitButton("Apply filter", icon("play-circle"), width = '100%'),
    #   br(),
    #   actionButton("save", "Save as...", icon("floppy-o"), width = '100%'),
    #   style = 'position: fixed !important',
    #   width = 2),
    mainPanel(
      lapply(1:NCOL(data), function(i) {
        column(width = 2,
               plotOutput(paste0("plot", i), height = "250px"),
               #hr(style = 'margin-top:0px;margin-bottom:10px;border: 0;border-top:10px solid #B3D1D9;'),
               tableOutput(paste0("table", i)) #,
               #hr(style = 'margin-top:30px;margin-bottom:30px;border: 0;border-top:2px solid #D7D7D7;')
        )
      }),
      width = 12 # moet 10 worden als sidebarPanel werkt
    )
  )
  
  # server ------------------------------------------------------------------
  server <- function(input, output, session) {
    
    observe({
      progress <- shiny::Progress$new(min = 1, max = NCOL(data))
      progress$set(message = "Getting data...", value = 0)
      for (i in 1:NCOL(data)) {
        
        local({
          my_i <- i
          
          plotname <- paste("plot", my_i, sep = "")
          tablename <- paste("table", my_i, sep = "")
          
          data_thiscol <- data %>% pull(my_i)
          data_class <- class(data_thiscol)
          
          if (any(data_class == 'rsi')) {
            subtitle_col <- 'purple'
          } else if (any(data_class == 'mic')) {
            subtitle_col <- 'yellow4'
          } else if (any(data_class == 'character')) {
            subtitle_col <- 'blue'
          } else if (any(data_class == 'logical')) {
            subtitle_col <- 'orange3'
          } else if (any(data_class %in% c('factor', 'list', 'matrix', 'vector', 'array'))) {
            subtitle_col <- 'yellow3'
          } else if (any(data_class %in% c('numeric', 'integer', 'double', 'single', 'raw'))) {
            subtitle_col <- 'green4'
          } else if (any(data_class %in% c('hms', 'difftime'))) {
            subtitle_col <- 'orangered1'
          } else if (any(data_class %in% c('Date', 'POSIXct', 'POSIXlt'))) {
            subtitle_col <- 'orangered3'
          } else {
            subtitle_col <- 'darkblue'
          }
          
          output[[tablename]] <- renderTable({
            progress$set(value = my_i,
                         message = 'Analysing...',
                         detail = substr(colnames(data)[my_i], 1, 22))
            
            if (length(data_thiscol[!is.na(data_thiscol)]) > 0) {
              if (any(data_class %in% c('numeric', 'integer', 'double', 'single', 'raw', 'hms', 'difftime'))) {
                # getallen
                t <- tibble(x = c("<code>NA</code>", "Min", "Max", "Median", "Mean"), y = character(1))
                if (any(data_class %in% c('hms', 'difftime'))) {
                  t[2, 2] <- data_thiscol %>% as.POSIXct() %>% min() %>% format2('HH:MM:SS')
                  t[3, 2] <- data_thiscol %>% as.POSIXct() %>% max() %>% format2('HH:MM:SS')
                  t[4, 2] <- data_thiscol %>% as.POSIXct() %>% median() %>% format2('HH:MM:SS')
                  t[5, 2] <- data_thiscol %>% as.POSIXct() %>% mean() %>% format2('HH:MM:SS')
                } else {
                  t[2, 2] <- data_thiscol %>% min() %>% format2()
                  t[3, 2] <- data_thiscol %>% max() %>% format2()
                  t[4, 2] <- data_thiscol %>% median() %>% format2()
                  t[5, 2] <- data_thiscol %>% mean() %>% format2()
                }
              } else if (any(data_class %in% c('Date', 'POSIXct', 'POSIXlt'))) {
                # datums
                t <- tibble(x = c("<code>NA</code>", "Unique", "Oldest", "Newest", "Difference"), y = character(1))
                date_min <- data_thiscol %>% min()
                date_max <- data_thiscol %>% max()
                t[2, 2] <- data_thiscol %>% unique() %>% length() %>% format2()
                t[3, 2] <- date_min %>% format2("d mmm yyyy")
                t[4, 2] <- date_max %>% format2("d mmm yyyy")
                t[5, 2] <- paste0(difftime(date_max, date_min, units = 'auto') %>% as.double() %>% format2(), ' days')
                
              } else if (any(data_class == 'logical')) {
                # logicals
                t <- tibble(x = c("<code>NA</code>", "% TRUE", "% FALSE", "&nbsp;", "&nbsp;"), y = character(1))
                TFs <- length(data_thiscol[!is.na(data_thiscol)])
                Ts <- sum(data_thiscol)
                Fs <- sum(!data_thiscol)
                t[2, 2] <- (Ts / TFs) %>% format2(percent = TRUE)
                t[3, 2] <- (Fs / TFs) %>% format2(percent = TRUE)
                
              } else if (any(data_class == 'rsi') | all(data_thiscol[!is.na(data_thiscol) & data_thiscol != ''] %>% toupper() %in% c('I', 'R', 'S'))) {
                data_thiscol <- as.rsi(data_thiscol)
                # resistentie
                t <- tibble(x = c("<code>NA</code>", "<i>Tested</i>", "% R", "<strong>% S+I</strong>", "% S"), y = character(1))
                t[2, 2] <- n_rsi(data_thiscol) %>% format2()
                t[3, 2] <- portion_R(data_thiscol) %>% format2(percent = TRUE)
                t[4, 2] <- paste0("<strong>", portion_SI(data_thiscol)%>% format2(percent = TRUE), "</strong>")
                t[5, 2] <- portion_S(data_thiscol)%>% format2(percent = TRUE)
                
              } else if (any(data_class == 'factor')) {
                # factors
                t <- tibble(x = c("<code>NA</code>", "Levels", "Lowest value", "Highest value", "&nbsp;"), y = character(1))
                if (any(data_class == 'mic')) {
                  # MIC's
                  t <- tibble(x = c("<code>NA</code>", "Levels", "Lowest MIC", "Highest MIC", "&nbsp;"), y = character(1))
                }
                x <- data_thiscol[!is.na(data_thiscol)]
                n <- x %>% length()
                t[2, 2] <- length(levels(data_thiscol)) %>% format2()
                t[3, 2] <- sort(x)[1] %>% as.character()
                t[4, 2] <- sort(x)[1] %>% as.character()
                
              } else {
                # rest
                t <- tibble(x = c("<code>NA</code>", "<i>Not listed</i>", "Unique", "Min length", "Max length"), y = character(1))
                NAs <- data_thiscol[is.na(data_thiscol)] %>% length()
                if (NAs > 0) {
                  NAs <- 1
                }
                
                t[2, 2] <- max(0, n_distinct(data_thiscol) - 8 - NAs)
                t[3, 2] <- data_thiscol %>% unique() %>% length() %>% format2()
                t[4, 2] <- data_thiscol %>% nchar() %>% min() %>% format2()
                t[5, 2] <- data_thiscol %>% nchar() %>% max() %>% format2()
              }
              
            } else {
              t <- tibble(x = c("<code>NA</code>", rep("&nbsp;", 4)), y = "&nbsp;")
            }
            
            if (my_i == NCOL(data)) {
              progress$close()
            }
            
            NAs <- data_thiscol[is.na(data_thiscol)] %>% length()
            if (NAs == 0) {
              font_colour <- "green"
            } else {
              font_colour <- "#c7254e"
            }
            t[1, 2] <- paste0('<font style="color:', font_colour, ';">', NAs %>% format2(),
                              ' (', (NAs / nrow(data)) %>% format2(percent = TRUE, round = 0), ')</font>')
            t
          },
          sanitize.text.function = function(x){x},
          colnames = FALSE,
          rownames = FALSE,
          align = 'lr',
          spacing = 'xs',
          width = "100%"
          )
          
          output[[plotname]] <- renderPlot({
            
            if (length(data_thiscol[!is.na(data_thiscol)]) > 0) {
              p_title <- colnames(data)[my_i]
              p_title_abname <- p_title %>% gsub('_mic$', '', .)
              p_title_str <- paste0('   ', p_title, ' ')
              drug <- suppressWarnings(AMR::ab_name(p_title_abname, tolower = TRUE))
              if (!is.na(drug)) {
                p_title <- bquote(bold(.(p_title_str)) ~ italic(.(drug))) %>% as.expression()
              } else {
                p_title <- bquote(bold(.(p_title_str))) %>% as.expression()
              }
              p <- suppressWarnings(make_plot(data = data_thiscol,
                                              title = p_title, 
                                              subtitle = data_class, 
                                              subtitle_colour = subtitle_col))
            } else {
              par(mar = c(5, 4, 4, 2) + 0.1)
              p <- plot(0, type='n', axes=FALSE, main = colnames(data)[my_i])
            }
            p
          })
        })
      }
    })
    
    # Listen for 'done'.
    observeEvent(input$done, {
      invisible(stopApp())
    })
    
    # observeEvent(input$save, {
    #   showNotification("Object saved.", type = "message")
    # })
  }
  
  if (Sys.info()['sysname'] == "Linux") {
    suppressWarnings(
      current_resolution <- system("xdpyinfo | awk '/dimensions/{print $2}'", intern = TRUE) %>% 
        strsplit("x") %>%
        unlist() %>%
        as.double()
    )
  } else if (Sys.info()['sysname'] == "Darwin") {
    # macOS
    suppressWarnings(
      current_resolution <- system("system_profiler SPDisplaysDataType |grep Resolution", intern = TRUE) %>% 
        strsplit("x") %>%
        unlist() %>%
        gsub("[^0-9]", "", .) %>%
        as.double()
    )
  } else {
    suppressWarnings(
      current_resolution <- system("wmic path Win32_VideoController get CurrentHorizontalResolution,CurrentVerticalResolution /format:value", intern = TRUE)  %>%
        strsplit("=") %>%
        unlist() %>%
        as.double()
    )
  }
  current_resolution <- current_resolution[!is.na(current_resolution)]
  current_resolution[current_resolution > 1920] <- 1920
  # Use a modal dialog as a viewer.
  viewer <- dialogViewer(dialogName = dfname,
                         width = current_resolution[1] * 0.95,
                         height = current_resolution[2] * 0.95)
  
  suppressMessages(
    # suppressWarnings(
    runGadget(app = ui, server = server, viewer = viewer)
    # )
  )
}

summary_interactive <- inspect
msberends/certedata documentation built on Nov. 26, 2019, 5:19 a.m.