pollucheck: Explore Open-Source Air Pollution Data

library(shiny)
library(Cairo)
library(openair)
library(tidyverse)
library(dplyr)
library(shinyjs)
library(bslib)
library(forecast)
library(biwavelet)
library(readxl)
library(DT)
library(ggplot2)
library(broom)
library(data.table)
library(janitor)
library(nortest)
library(zoo)



ui <- fluidPage(
  shinyjs::useShinyjs(),
  theme = bslib::bs_theme(bootswatch = "pulse"),
  h1("pollucheck - Analyse open-source air-quality data"),
  tags$head(tags$style(
    HTML(
      ".sidebar {
                    height : 10vh; overflow-y : auto; font-size : 13px;
                    background-color: #ffece9;
                    }" ,
      ".shiny-output-error-validation {
                    color : red; font-size : 14px;
                    }"
    )
  )),
  sidebarLayout(
    position = "left",
    sidebarPanel(
      width = 3,
      conditionalPanel(
        condition = "input.tabs1 == 3",
        tags$hr(),
        actionButton("da", "Data availability plot"),
        tags$hr(),
        tags$hr(),
        selectInput("palleInp", "Select parameter",
                    multiple = FALSE,
                    "Select"),
        radioButtons(
          "avg_hour3",
          "Plotting data",
          c("Hourly mean" = "hour3",
            "Daily mean" = "daily3"),
          selected = "hour3"
        ),
        tags$hr(),
        tags$hr(),
        actionButton("ts", "Time-series plot"),
        tags$br(),
        tags$br(),
        textInput("ts_mt", label = "Edit title of plot",
                  value = "Title"),
        textInput("ts_y", label = "Edit Y axis label",
                  value = "Parameter"),
        tags$hr(),
        tags$hr(),
        actionButton("box", "Month and Year box plot"),
        tags$br(),
        tags$br(),
        textInput("box_mt", label = "Edit title of plot",
                  value = "Title"),
        textInput("box_y", label = "Edit Y axis label",
                  value = "Parameter"),
        tags$hr(),
        tags$hr(),
        actionButton("mbox", "Monthly box plot"),
        tags$br(),
        tags$br(),
        textInput("box_mmt", label = "Edit title of plot",
                  value = "Title"),
        textInput("box_my", label = "Edit Y axis label",
                  value = "Parameter"),
        tags$hr(),
        tags$hr(),
        actionButton("boxt", "Vertical bar plot"),
        tags$br(),
        tags$br(),
        textInput("box_mtt", label = "Edit title of plot",
                  value = "Title"),
        textInput("box_yt", label = "Edit Y axis label",
                  value = "Parameter"),
        tags$hr(),
        tags$hr(),
        radioButtons(
          "diurn",
          "Diurnal plot",
          c("All data" = "all",
            "Monthly" = "mon"),
          selected = "all"
        ),
        radioButtons(
          "diur",
          "Aggregation method",
          c("Mean and Std Dev" = "mesd",
            "Median and IQR" = "mediq"),
          selected = "mesd"
        ),
        actionButton("diurnal", "Diurnal plot"),
        tags$br(),
        tags$br(),
        textInput("diurnal_mt", label = "Edit title of plot",
                  value = "Title"),
        textInput("diurnal_y", label = "Edit Y axis label",
                  value = "Parameter"),
        tags$br(),
        downloadButton('download_diurnal', "Download as csv"),
        tags$hr()
      ),
      conditionalPanel(
        condition = "input.tabs1 == 2",
        tags$hr(),
        selectInput(
          "avg",
          label = "Averaging period",
          c(
            "None" = "no",
            "Daily" = "day",
            "Monthly" = "monthly",
            "Month and Year" = "month"
          ),
          multiple = FALSE,
          selected = "None"
        ),
        tags$br(),
        downloadButton('download_stats', "Download as csv"),
        tags$hr()
      ),
      conditionalPanel(
        condition = "input.tabs1 == 6",
        tags$hr(),
        radioButtons(
          "avg_hour1",
          "Plotting data",
          c("Hourly mean" = "hour1",
            "Daily mean" = "daily1"),
          selected = "hour1"
        ),
        tags$hr(),
        tags$hr(),
        selectInput("DepVar",
                    "Dependent variable",
                    multiple = FALSE, "Select"),
        selectInput("InDepVar",
                    "Independent variable",
                    multiple = FALSE, "Select"),
        actionButton("reg", "Linear regression"),
        tags$br(),
        tags$br(),
        textInput("reg_mt", label = "Edit title of plot",
                  value = "Title"),
        textInput("reg_x", label = "Edit X axis label",
                  value = "Independent variable"),
        textInput("reg_y", label = "Edit Y axis label",
                  value = "Dependent variable"),
        tags$hr(),
        tags$hr(),
        selectInput("DepVar1",
                    "Dependent variable",
                    multiple = FALSE, "Select"),
        selectInput("InDepVar1",
                    "Independent variable(s)",
                    multiple = TRUE, "Select"),
        actionButton("mulreg", "Multiple linear regression"),
        tags$br(),
        tags$br(),
        textInput("reg_mmt", label = "Edit title of plot",
                  value = "Title"),
        textInput("reg_mx", label = "Edit X axis label",
                  value = "Fitted"),
        textInput("reg_my", label = "Edit Y axis label",
                  value = "Dependent variable"),
        tags$hr()
      ),
      conditionalPanel(
        condition = "input.tabs1 == 4",
        tags$hr(),
        selectInput("palleInp1", "Select parameter",
                    multiple = FALSE, "Select"),
        tags$hr(),
        actionButton("cp", "Calendar Plot"),
        tags$br(),
        tags$br(),
        textInput("cp_mt", label = "Edit title of plot",
                  value = "Title"),
        tags$hr(),
        tags$hr(),
        radioButtons(
          "stat_tv",
          "Plot using",
          c(
            "Median and quantiles" = "median",
            "Mean and 95% confidence intervals" = "mean"
          ),
          selected = "mean"
        ),
        actionButton("tv", "Time variation"),
        tags$hr()
      ),
      conditionalPanel(
        condition = "input.tabs1 == 1",
        tags$hr(),
        radioButtons(
          "type",
          "Data Source",
          choiceNames = list(
            HTML(
              "<a href = 'https://openaq.org/#/countries' target = '_blank'>OpenAQ</a>"
            ),
            HTML(
              "<a href = 'https://www.airnow.gov/international/us-embassies-and-consulates/' target = '_blank'>AirNow - US Embassies</a>"
            ),
            HTML(
              "<a href = 'https://app.cpcbccr.com/ccr/#/caaqm-dashboard-all/caaqm-landing' target = '_blank'>Pollution Control Board (India)</a>"
            )
          ),
          choiceValues = list("oaq", "an", "cpcb"),
          selected = "cpcb"
        ),
        tags$br(),
        test <-
          a(
            "Input Timezone* (link to supported
                                             timezones)",
            href = "https://en.wikipedia.org/wiki/List_of_tz_database_time_zones",
            style = "font-size:14px; ",
            target = "_blank"
          ),
        selectInput(
          "timezone",
          label = "",
          choices = OlsonNames(),
          selected = "Asia/Kolkata"
        ),
        tags$hr(),
        conditionalPanel(
          condition = "input.type == 'cpcb'",
          radioButtons(
            "file",
            "Input data time-resolution",
            c(
              "15 minutes" = "15 min",
              "30 minutes" = "30 min",
              "60 minutes" = "60 min"
            ),
            selected = "60 min"
          )
        ),
        tags$br(),
        fileInput(
          "file1",
          "Upload data file*",
          multiple = FALSE,
          accept = c('.xlsx', '.csv')
        ),
        tags$hr(),
        checkboxInput('remove_9', 'Remove negative values'),
        checkboxInput('repeated', 'Remove duplicate consecutive values'),
        checkboxInput(
          'exclude',
          'Specify a multiple (x) to remove outliers based on Mean and SD (Mean + x * SD)'
        ),
        checkboxInput('log_op', 'Use log values'),
        conditionalPanel(condition = "input.exclude == true",
                         numericInput("ey", NULL,
                                      value = 3)),
        checkboxInput(
          'percent',
          'Specify % of data completeness for computing daily mean values'
        ),
        conditionalPanel(condition = "input.percent == true",
                         sliderInput(
                           "per",
                           NULL,
                           value = 75,
                           min = 35,
                           max = 100
                         )),
        numericInput("high_number",
                     "Remove PM2.5 and PM10 above",
                     value = 9999),
        tags$hr(),
        radioButtons(
          "avg_hour",
          "Output aggregation",
          c("Hourly mean" = "hour",
            "Daily mean" = "daily"),
          selected = "hour"
        ),
        tags$br(),
        actionButton("hourly", "Show Data"),
        downloadButton('download', "Download as csv"),
        tags$hr()
      ),
      conditionalPanel(
        condition = "input.tabs1 == 7",
        tags$hr(),
        radioButtons(
          "type2",
          "Data Source",
          choiceNames = list(
            HTML(
              "<a href = 'https://openaq.org/#/countries/IN?_k=5ecycz' target = '_blank'>OpenAQ</a>"
            ),
            HTML(
              "<a href = 'https://www.airnow.gov/international/us-embassies-and-consulates/#India' target = '_blank'>AirNow - US Embassies</a>"
            ),
            HTML(
              "<a href = 'https://app.cpcbccr.com/ccr/#/caaqm-dashboard-all/caaqm-landing' target = '_blank'>Pollution Control Board (India)</a>"
            )
          ),
          choiceValues = list("oaq", "an", "cpcb"),
          selected = "cpcb"
        ),
        tags$br(),
        test <-
          a(
            "Input Timezone* (link to supported
                                             timezones)",
            href = "https://en.wikipedia.org/wiki/List_of_tz_database_time_zones",
            style = "font-size:14px; ",
            target = "_blank"
          ),
        selectInput(
          "timezone1",
          label = "",
          choices = OlsonNames(),
          selected = "Asia/Kolkata"
        ),
        tags$hr(),
        conditionalPanel(
          condition = "input.type2 == 'cpcb'",
          radioButtons(
            "file12",
            "Input data time resolution",
            c(
              "15 minutes" = "15 min",
              "30 minutes" = "30 min",
              "60 minutes" = "60 min"
            ),
            selected = "60 min"
          )
        ),
        tags$br(),
        fileInput(
          "file2",
          "Upload data from another site to compare",
          multiple = FALSE,
          accept = c('.xlsx', '.csv')
        ),
        tags$hr(),
        tags$hr(),
        selectInput(
          "Para",
          "Parameter to plot in original (Site 1) data",
          multiple = FALSE,
          "Select"
        ),
        selectInput(
          "Para1",
          "Parameter to plot in comparision (Site 2) data",
          multiple = FALSE,
          "Select"
        ),
        actionButton("plot_values", "Plot time series"),
        tags$br(),
        tags$br(),
        textInput("comp_mt", label = "Edit title of time-series plot",
                  value = "Title"),
        textInput("comp_ytr", label = "Edit secondary Y axis label (Site 1) of time-series plot",
                  value = "Site 1"),
        textInput("comp_y", label = "Edit primary Y axis label (Site 2) of time-series plot",
                  value = "Site 2"),
        tags$hr(),
        tags$hr(),
        actionButton("plot_val", "Scatter plot"),
        tags$br(),
        tags$br(),
        textInput("reg_mt1", label = "Edit title of scatter plot",
                  value = "Title"),
        textInput("reg_mx1", label = "Edit X axis of scatter plot",
                  value = "Site 2"),
        textInput("reg_my1", label = "Edit Y axis of scatter plot",
                  value = "Site 1"),
        tags$hr(),
        tags$hr(),
        radioButtons(
          "diurn1",
          "Diurnal plot",
          c("All data" = "all",
            "Monthly" = "mon"),
          selected = "all"
        ),
        radioButtons(
          "diur1",
          "Aggregation method",
          c("Mean and Std Dev" = "mesd",
            "Median and IQR" = "mediq"),
          selected = "mesd"
        ),
        actionButton("plot_val_di", "Diurnal plot"),
        tags$br(),
        tags$br(),
        textInput("diurnal_mt1", label = "Edit title of plot",
                  value = "Title"),
        textInput("diurnal_y1", label = "Edit Y axis label",
                  value = "Parameter"),
        tags$hr()
      ),
      conditionalPanel(
        condition = "input.tabs1 == 5",
        tags$hr(),
        selectInput("palleInp2", "Select parameter",
                    multiple = FALSE, "Select"),
        radioButtons(
          "avg_hour2",
          "Plotting data",
          c("Hourly mean" = "hour2",
            "Daily mean" = "daily2"),
          selected = "hour2"
        ),
        tags$hr(),
        tags$hr(),
        radioButtons(
          "normality",
          "Normality Test",
          c("Anderson-Darling" = "ad",
            "Shapiro-Wilk" = "sh"),
          selected = "ad"
        ),
        tags$hr(),
        tags$hr(),
        actionButton("freq", "Density plot"),
        tags$br(),
        tags$br(),
        textInput("freq_mt", label = "Edit title of plot",
                  value = "Title"),
        textInput("freq_x", label = "Edit X axis label",
                  value = "Parameter"),
        tags$hr(),
        tags$hr(),
        actionButton("qq", "Q-Q plot"),
        tags$br(),
        tags$br(),
        textInput("qq_mt", label = "Edit title of plot",
                  value = "Title"),
        textInput("qq_y", label = "Edit Y axis label",
                  value = "Parameter"),
        tags$hr(),
        tags$hr(),
        actionButton("acf", "Autocorrelogram plot (based on monthly mean)"),
        textInput("acf_main", label = "Edit title of plot",
                  value = "Title"),
        tags$hr(),
        tags$hr(),
        actionButton("mk", "Trend Analysis (based on daily mean)"),
        tags$br(),
        tags$br(),
        checkboxInput('avg_month', 'Use monthly means'),
        tags$hr(),
        tags$hr(),
        actionButton("ta", "Wavelet Analysis (based on daily mean)"),
        tags$br(),
        tags$br(),
        textInput("title_bi", label = "Edit title of the plot",
                  value = "Title"),
        tags$hr()
      )
    ),
    mainPanel(
      tags$a(
        img(
          src = 'logo.png',
          align = "right",
          height = 70,
          width = 70
        ),
        href = "https://www.ilklabs.com/",
        target = "_blank"
      ),
      tags$head(tags$style(type = 'text/css',
                           ".nav-tabs {
                               font-size: 17px
                               } ")),
      tabsetPanel(
        id = "tabs1",
        tabPanel(
          value = 1,
          title = "File",
          dataTableOutput("table1")
        ),
        tabPanel(value = 2,
                 title = "Summary",
                 dataTableOutput("table")),
        tabPanel(
          value = 3,
          title = "Summary Plots",
          plotOutput("plot16", width = 800),
          plotOutput("plot1", width = 800),
          plotOutput("plot3", width = 800),
          plotOutput("plot9", width = 800),
          plotOutput("plot10", width = 800),
          plotOutput("plot2", width = 800)
        ),
        tabPanel(
          value = 5,
          title = "Statistical Plots",
          verbatimTextOutput("normality_test"),
          plotOutput("plot6", width = 800),
          plotOutput("plot7", width = 800),
          plotOutput("plot18", width = 800),
          verbatimTextOutput("kendal_test"),
          plotOutput("plot13", width = 800)
        ),
        tabPanel(
          value = 6,
          title = "Linear Regression",
          plotOutput("plot8", width = 800),
          verbatimTextOutput("RegOut"),
          plotOutput("plot11", width = 800),
          verbatimTextOutput(outputId = "IndPrint"),
          verbatimTextOutput(outputId = "DepPrint")
        ),
        tabPanel(
          value = 7,
          title = "Compare",
          plotOutput("plot12", height = 600),
          plotOutput("plot15", height = 600),
          plotOutput("plot17", height = 600)
        ),
        tabPanel(
          value = 4,
          title = "`openair`",
          plotOutput("plot5", height = 600),
          plotOutput("plot4", height = 600)
        ),
        tabPanel(title = "FAQ's",
                 includeMarkdown("WWW/include.md")),
        tabPanel(
          title = "Disclaimer",
          helpText(
            "Wind Direction data is valid only if downloaded at hourly value and used at the same resoluton."
          ),
          helpText(
            "The developers and contributors are not responsible for malfunction or miscalculation or bugs in the code."
          ),
          helpText(
            "The developers and contributors of this application are not the authors of openair package used here."
          )
        )
      )
    )
  )
)


server <- function(input, output, session) {
  # bs_themer()
  options(shiny.maxRequestSize = 30 * 1024 ^ 2,
          shiny.launch.browser = TRUE)
  observeEvent(input$exclude, {
    if (input$exclude)
      shinyjs::enable(id = "log_op")
    else
      shinyjs::disable(id = "log_op")
  })
  observeEvent(input$type, {
    if (input$type == "cpcb")
      shinyjs::disable(id = "timezone")
    else
      shinyjs::enable(id = "timezone")
  })
  observeEvent(input$type2, {
    if (input$type2 == "cpcb")
      shinyjs::disable(id = "timezone1")
    else
      shinyjs::enable(id = "timezone1")
  })
  
  #### Function to remove outliers
  LLD <- function(x, y, z, ey) {
    ifelse(x < (y + (ey * z)) && x > (y - (ey * z)), x, NA)
  }
  
  #### Function to make date time series
  date_ts <- function(file, time_period) {
    ye <- format(file[1, "date"], format = "%Y-%m")
    x1 <- as.POSIXct(paste0(ye, "-01 00:00:00"),
                     format = '%Y-%m-%d %H:%M:%S',
                     tz = input$timezone)
    ye <- format(tail(file$date, n = 3)[1], format = "%Y-%m-%d")
    x2 <- as.POSIXct(paste0(ye, " 23:00:00"),
                     format = '%Y-%m-%d %H:%M:%S',
                     tz = input$timezone)
    date <- seq(
      from = as.POSIXct(x1, tz = input$timezone),
      to = as.POSIXct(x2, tz = input$timezone),
      by = time_period
    )
    date
    return(date)
  }
  
  #### CPCB data overlaps hence break it and make it into a df using this function
  #### using date and also making the first column as the column name
  make_df <- function(y, tseries_df) {
    df <- data.frame(y)
    if (!nrow(df))
    {
      df <- tseries_df
    } else {
      df <- df %>%
        select(everything(),-c(new_set, set))
      names(df) <- as.matrix(df[1,])
      df <- df[-1,]
      df[] <- lapply(df, function(x)
        type.convert(as.character(x)))
      df <- base::Filter(function(x)
        ! all(is.na(x)), df)
      df <- df %>%
        dplyr::select("date" = `To Date`, everything()) %>%
        dplyr::mutate(date = as.POSIXct(date, format = '%d-%m-%Y %H:%M:%S',
                                        tz = input$timezone))
    }
  }
  
  #### How to assign functions returning two variables
  ':=' <- function(lhs, rhs) {
    frame <- parent.frame()
    lhs <- as.list(substitute(lhs))
    if (length(lhs) > 1)
      lhs <- lhs[-1]
    if (length(lhs) == 1) {
      do.call(`=`, list(lhs[[1]], rhs), envir = frame)
      return(invisible(NULL))
    }
    if (is.function(rhs) || is(rhs, 'formula'))
      rhs <- list(rhs)
    if (length(lhs) > length(rhs))
      rhs <- c(rhs, rep(list(NULL), length(lhs) - length(rhs)))
    for (i in 1:length(lhs))
      do.call(`=`, list(lhs[[i]], rhs[[i]]), envir = frame)
    return(invisible(NULL))
  }
  
  #### Function to handle cpcb data
  cpcb <- function(sf, sfd) {
    trial <- read_excel(path = sf,
                        skip = 16,
                        col_types = "text")
    trial$date <-
      gsub(":00", ":00:00", trial$`To Date`, fixed = TRUE)
    trial <- trial %>%
      mutate(new_set = is.na(`From Date`),
             set     = cumsum(new_set)) %>%
      filter(!is.na(`To Date`))
    trial[, c('From Date', 'To Date')] <- list(NULL)
    site_name <- read_excel(path = sf, skip = 5)
    site_name <- as.character(site_name[1, 2])
    dt_s <- split(trial, trial$set)
    PM <- data.frame(dt_s$`0`) %>%
      dplyr::mutate(date = as.POSIXct(date, format = '%d-%m-%Y %H:%M:%S', tz = input$timezone))
    date <- date_ts(PM, sfd)
    tseries_df <- data.frame(date)
    site1_join <- left_join(tseries_df, PM, by = "date") %>%
      select(date, everything(),-c(new_set, set))
    Ben <- make_df(dt_s$`1`, tseries_df)
    Beny <- make_df(dt_s$`2`, tseries_df)
    Bent <- make_df(dt_s$`3`, tseries_df)
    all <-
      list(site1_join, Ben, Beny, Bent) %>% reduce(left_join, by = "date")
    all <- all %>%
      mutate(Location = site_name) %>%
      select(date, Location, everything())
    col_interest <- 3:ncol(all)
    all[, col_interest] <-
      sapply(
        X = all[, col_interest],
        FUN = function(x)
          as.numeric(as.character(x))
      )
    all[, col_interest] <-
      sapply(
        X = all[, col_interest],
        FUN = function(x)
          ifelse(x == -999, NA, x)
      )
    return(list(all, date))
  }
  
  #### Function to handle airnow data
  an <- function(sdw) {
    trial <- read.csv(sdw,
                      header = TRUE,
                      sep = ",",
                      row.names = NULL)
    trial <- trial %>%
      dplyr::select(
        "date" = Date..LT.,
        "PM2.5" = Raw.Conc.,
        "Valid" = QC.Name,
        "Location" = Site
      ) %>%
      dplyr::mutate(date  = as.POSIXct(date, format = '%Y-%m-%d %I:%M %p', tz = input$timezone)) %>%
      dplyr::filter(Valid == "Valid")
    site_name <- as.character(trial[1, "Location"])
    trial$Valid <- NULL
    date <- date_ts(trial, "60 min")
    tseries_df <- data.frame(date)
    all <- left_join(tseries_df, trial, by = "date")
    all <- all %>%
      mutate(Location = site_name) %>%
      select(date, Location, everything())
    col_interest <- 3:ncol(all)
    all[, col_interest] <-
      sapply(
        X = all[, col_interest],
        FUN = function(x)
          as.numeric(as.character(x))
      )
    all[, col_interest] <-
      sapply(
        X = all[, col_interest],
        FUN = function(x)
          ifelse(x == -999, NA, x)
      )
    return(list(all, date))
  }
  
  #### Function to handle openaq data
  openaq <- function(sgf) {
    trial <- read.csv(sgf,
                      header = TRUE,
                      sep = ",",
                      row.names = NULL)
    trial <- trial %>%
      dplyr::select(
        "date" = local,
        "parameter" = parameter,
        "value" = value,
        "Location" = location
      ) %>%
      dplyr::mutate(date  = as.POSIXct(date, format = '%Y-%m-%dT%H:%M:%S', tz = input$timezone))
    site_name <- as.character(trial[1, "Location"])
    trial <- trial[order(trial$date),]
    date <- date_ts(trial, "1 min")
    tseries_df <- data.frame(date)
    trial <- trial %>%
      pivot_wider(
        id_cols = c(date),
        names_from = parameter,
        values_from = value
      )
    all <- full_join(tseries_df, trial, by = "date")
    all$hour <- lubridate::ceiling_date(all$date, "hour")
    all <- all %>%
      group_by(hour) %>%
      summarise_all(list(~ mean(., na.rm = TRUE))) %>%
      dplyr::select(everything(),-date) %>%
      dplyr::select("date" = hour, everything())
    names(all) <- toupper(names(all))
    all <- all %>%
      dplyr::select(everything(), "date" = DATE) %>%
      mutate(Location = site_name)
    if ("PM25" %in% colnames(all))
    {
      all <- all %>%
        dplyr::select(date, Location, everything(), "PM2.5" = PM25)
    }
    if ("NOX" %in% colnames(all))
    {
      all <- all %>%
        dplyr::select(date, Location, everything(), "NOx" = nox)
    }
    date <- all %>%
      select(date)
    return(list(all, date))
  }
  
  #### Function to remove negative values
  neg <- function(site1_join_f1) {
    col_interest <- 4:ncol(site1_join_f1)
    site1_join_f1[, col_interest] <-
      sapply(
        X = site1_join_f1[, col_interest],
        FUN = function(x)
          as.numeric(as.character(x))
      )
    site1_join_f1[, col_interest] <-
      sapply(
        X = site1_join_f1[, col_interest],
        FUN = function(x)
          ifelse(x < 0, NA, x)
      )
    site1_join_f1
  }
  
  #### Function to remove repeated values
  repeat_ed <- function(site1_join_f1) {
    col_interest <- 4:ncol(site1_join_f1)
    site1_join_f1[, col_interest] <-
      sapply(
        X = site1_join_f1[, col_interest],
        FUN = function(j)
          ifelse(c(FALSE, diff(
            as.numeric(j), 1, 1
          ) == 0),
          NA, j)
      )
    site1_join_f1[, col_interest] <-
      sapply(
        X = site1_join_f1[, col_interest],
        FUN = function(x)
          as.numeric(as.character(x))
      )
    site1_join_f1
  }
  
  #### Function to remove values higher than a limit
  remov_99 <- function(site1_join_f1, high_number) {
    site1_join_f1$PM2.5 <-
      ifelse(
        as.numeric(as.character(site1_join_f1$PM2.5)) >
          as.numeric(as.character(high_number)),
        as.numeric(as.character(NA)),
        as.numeric(as.character(site1_join_f1$PM2.5))
      )
    site1_join_f1
  }
  
  #### Function to compute PM2.5/PM10 ratio and check it
  ratio <- function(site1_join_f1, high_number) {
    b <-
      as.numeric(as.character(site1_join_f1$PM10)) > as.numeric(as.character(high_number))
    site1_join_f1$PM10 <- ifelse(b, as.numeric(as.character(NA)),
                                 as.numeric(as.character(site1_join_f1$PM10)))
    site1_join_f1$ratio_PM <-
      as.numeric(as.character(site1_join_f1$PM2.5)) /
      as.numeric(as.character(site1_join_f1$PM10))
    site1_join_f1$PM2.5 <-
      ifelse(
        site1_join_f1$ratio_PM >= 1 & !is.na(site1_join_f1$ratio_PM),
        as.numeric(as.character(NA)),
        as.numeric(as.character(site1_join_f1$PM2.5))
      )
    site1_join_f1$PM10 <-
      ifelse(
        site1_join_f1$ratio_PM >= 1 & !is.na(site1_join_f1$ratio_PM),
        as.numeric(as.character(NA)),
        as.numeric(as.character(site1_join_f1$PM10))
      )
    site1_join_f1
  }
  
  #### Function to remove outliers based on LLD function
  outlier <- function(site1_join_f1, name, date, eq) {
    site1_join_f1 <- site1_join_f1 %>%
      group_by(day, Location) %>%
      dplyr::mutate_all(list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE))) %>%
      ungroup() %>%
      dplyr::select(date, day, Location, everything(),-date_mean,-date_sd)
    tseries_df <- site1_join_f1 %>%
      dplyr::select(date, Location)
    for (i in names(name)) {
      data_list <- site1_join_f1 %>%
        dplyr::select(starts_with(i))
      if (i == "NO") {
        data_list <- data_list %>%
          dplyr::select(-contains(c("NO2", "NOx")))
      } else if (i == "O") {
        data_list <- data_list %>%
          dplyr::select(-contains(c("Ozone")))
      } else {
        data_list
      }
      mean <- paste0(i, "_mean")
      sd <- paste0(i, "_sd")
      x <- as.numeric(as.character(data_list[[i]]))
      x[!is.finite(x)] <- NA
      y <- grep("_mean", colnames(data_list))
      y <- as.numeric(as.character(data_list[[y]]))
      y[!is.finite(y)] <- NA
      z <- grep("_sd", colnames(data_list))
      z <- as.numeric(as.character(data_list[[z]]))
      z[!is.finite(z)] <- NA
      if (!nrow(data_list)) {
        NULL
      } else {
        data_list[[i]] <- mapply(LLD, x, y, z, as.numeric(as.character(eq)))
      }
      tseries_df <- bind_cols(tseries_df, data_list)
    }
    site1_join_f1 <- tseries_df %>%
      dplyr::select(date, Location, everything(),-contains(c("_sd", "_mean")))
    tseries_df <- data.frame(date)
    site1_join_f1 <-
      left_join(tseries_df, site1_join_f1, by = "date") %>%
      dplyr::mutate(day = as.Date(date, format = '%Y-%m-%d', tz = input$timezone))
    site1_join_f1
  }
  
  #### Function to remove values which are incomplete in a day
  compl <- function(name, site1_join_f1, date, fi, per1) {
    tseries_df <- site1_join_f1 %>%
      dplyr::select(date, Location)
    for (i in names(name)) {
      data_list <- site1_join_f1 %>%
        dplyr::select(date, day, starts_with(i))
      if (i == "NO") {
        data_list <- data_list %>%
          dplyr::select(-contains(c("NO2", "NOx")))
      } else if (i == "O") {
        data_list <- data_list %>%
          dplyr::select(-contains(c("Ozone", "O3")))
      } else {
        data_list
      }
      data_list <- data_list %>%
        group_by(day) %>%
        dplyr::mutate_at(vars(contains(i)), list(no_hour = ~ sum(!is.na(.)))) %>%
        dplyr::select(-contains(c("_sd_no_hour", "_mean_no_hour")))
      old_no <- paste0(i, "_no_hour")
      names(data_list)[names(data_list) == old_no] <- 'no_hour'
      if (fi == "15 min") {
        time_avg <- 96
      } else if (fi == "30 min") {
        time_avg <- 48
      } else if (fi == "60 min") {
        time_avg <- 24
      }
      data_list[[i]] <-
        ifelse(data_list$no_hour >= ((per1 / 100) * time_avg), data_list[[i]], NA)
      data_list[, c('date', 'day', 'no_hour')] <- list(NULL)
      tseries_df <- bind_cols(tseries_df, data_list)
    }
    site1_join_f1 <- tseries_df %>%
      dplyr::mutate(day = as.Date(date, format = '%Y-%m-%d', tz = input$timezone))
  }
  
  #### Regression equation function
  reg_eqn <- function(x) {
    int <- round(coef(x)[1], digits = 3)
    slope <- round(coef(x)[2], digits = 3)
    eqn <- paste("y = ", slope, "x + (", int, ")")
    return(eqn)
  }
  
  #### Functions to be applied on the data set in a sequence
  data_file <-
    function(per,
             type,
             file_data_path,
             file_1,
             file,
             remove_9,
             repeated,
             percent,
             ey,
             exclude,
             high_number,
             log_op) {
      if (is.null(file_1)) {
        return(NULL)
      } else {
        if (type == "cpcb") {
          c(all, date) := cpcb(file_data_path, file)
          all <- data.frame(all)
          date <- data.frame(date)
        } else if (type == "an") {
          c(all, date) := an(file_data_path)
          all <- data.frame(all)
          date <- data.frame(date)
        } else if (type == "oaq") {
          c(all, date) := openaq(file_data_path)
          all <- data.frame(all)
          date <- data.frame(date)
        }
        all <- all %>%
          rename_at(vars(ends_with(".x")),
                    ~ str_replace(., "\\..$", "")) %>%
          select_at(vars(-ends_with(c(".y", ".1"))))
        site1_join_f1 <- all %>%
          dplyr::mutate(day = as.Date(date, format = '%Y-%m-%d', tz = input$timezone)) %>%
          dplyr::select(date, day, Location, everything())
        if (remove_9) {
          site1_join_f1 <- neg(site1_join_f1)
        } else {
          site1_join_f1
        }
        
        if (repeated) {
          site1_join_f1 <- repeat_ed(site1_join_f1)
        } else {
          site1_join_f1
        }
        
        name <- site1_join_f1 %>%
          dplyr::select(everything(),-day, -date, -Location)
        site1_join_f1 <- site1_join_f1 %>%
          dplyr::mutate(ratio_PM = NA) %>%
          dplyr::select(date, day, Location, everything())
        col_interest <- 4:ncol(site1_join_f1)
        if (log_op) {
          site1_join_f1[, col_interest] <-
            sapply(
              X = site1_join_f1[, col_interest],
              FUN = function(x)
                log(x)
            )
        } else {
          site1_join_f1
        }
        if (exclude) {
          site1_join_f1 <- outlier(site1_join_f1, name, date, ey)
          if (log_op) {
            site1_join_f1 <- site1_join_f1 %>%
              dplyr::select(date, day, Location, everything())
            col_interest <- 4:ncol(site1_join_f1)
            site1_join_f1[, col_interest] <-
              sapply(
                X = site1_join_f1[, col_interest],
                FUN = function(x)
                  exp(x)
              )
          }
        } else {
          site1_join_f1
        }
        
        site1_join_f1 <- site1_join_f1 %>%
          dplyr::select(date, day, Location, everything())
        col_interest <- 4:ncol(site1_join_f1)
        site1_join_f1[, col_interest] <-
          sapply(
            X = site1_join_f1[, col_interest],
            FUN = function(x)
              as.numeric(as.character(x))
          )
        if (percent) {
          site1_join_f1 <- compl(name, site1_join_f1, date, file, per)
        } else {
          site1_join_f1
        }
        
        if ("PM2.5" %in% colnames(site1_join_f1)) {
          site1_join_f1 <- remov_99(site1_join_f1, high_number)
          if ("PM10" %in% colnames(site1_join_f1))
          {
            site1_join_f1 <- ratio(site1_join_f1, high_number)
          } else {
            site1_join_f1$ratio_PM <- NA
          }
        } else {
          site1_join_f1
        }
        site1_join_f1 <- site1_join_f1 %>%
          dplyr::select(date, day, Location, everything()) %>%
          janitor::remove_empty("cols")
        site1_join_f1
      }
    }
  
  CPCB_f <- reactive({
    if (is.null(input$file1)) {
      data <-
        data_file(
          75,
          "cpcb",
          "tests/shinytest/BWSSB.xlsx",
          "tests/shinytest/BWSSB.xlsx",
          "60 min",
          TRUE,
          TRUE,
          TRUE,
          3,
          TRUE,
          999,
          FALSE
        )
    } else if (!is.null(input$file1)) {
      data <-
        data_file(
          input$per,
          input$type,
          input$file1$datapath,
          input$file1,
          input$file,
          input$remove_9,
          input$repeated,
          input$percent,
          input$ey,
          input$exclude,
          input$high_number,
          input$log_op
        )
    }
  })
  Cmp_f <- reactive({
    if (is.null(input$file2)) {
      data <-
        data_file(
          75,
          "cpcb",
          "tests/shinytest/BWSSB.xlsx",
          "tests/shinytest/BWSSB.xlsx",
          "60 min",
          TRUE,
          TRUE,
          TRUE,
          3,
          TRUE,
          999,
          FALSE
        )
    } else if (!is.null(input$file2)) {
      data <-
        data_file(
          input$per,
          input$type2,
          input$file2$datapath,
          input$file2,
          input$file12,
          input$remove_9,
          input$repeated,
          input$percent,
          input$ey,
          input$exclude,
          input$high_number,
          input$log_op
        )
    }
  })
  
  observeEvent(input$switch_tab, {
    updateTabsetPanel(session, "tabs1", selected = "FAQ's")
    removeModal()
  })
  
  query_modal <- modalDialog(
    title = "What to expect?",
    HTML(
      "First visit here? There is pre-loaded data click on buttons and generate the figures!<br>"
    ),
    footer = tagList(
      actionButton("help_link", "Read Me",
                   onclick = "window.open('https://github.com/adithirgis/pollucheck#app-usage',
                   '_blank')"),
      actionButton("switch_tab", "FAQ's"),
      modalButton("Close")
    ),
    easyClose = F
  )
  
  # Show the model on start up
  showModal(query_modal)
  
  observe({
    if (is.null(input$file1) & is.null(input$file2)) {
      data <- CPCB_f()
      data1 <- Cmp_f()
    } else {
      data <- CPCB_f()
      data1 <- Cmp_f()
    }
    data <- data %>%
      dplyr::select(-date,-day,-Location)
    data1 <- data1 %>%
      dplyr::select(-date,-day,-Location)
    updateSelectInput(session, "Para", choices = names(data))
    updateSelectInput(session, "Para1", choices = names(data1))
  })
  mess <- function(button, da) {
    data <- CPCB_f()
    if (button == da) {
      data <- openair::timeAverage(data, avg.time = "day")
    } else {
      data
    }
  }
  
  data_joined <- eventReactive(input$hourly, {
    data <- CPCB_f()
    if (input$avg_hour == "daily") {
      data <-
        openair::timeAverage(data, avg.time = "day", type = "Location")
    } else {
      data
    }
  })
  data_joined_comp <- eventReactive(input$plot_values, {
    data <- CPCB_f()
    data1 <- Cmp_f()
    if (input$avg_hour == "daily") {
      data <- openair::timeAverage(data, avg.time = "day")
      data1 <- openair::timeAverage(data1, avg.time = "day")
    } else {
      data
      data1
    }
    data <- data %>%
      select(date, "Site 1" = input$Para)
    data1 <- data1 %>%
      select(date, "Site 2" = input$Para1)
    all <- full_join(data, data1, by = "date")
  })
  data_scatter_comp <- eventReactive(input$plot_val, {
    data <- CPCB_f()
    data1 <- Cmp_f()
    if (input$avg_hour == "daily") {
      data <- openair::timeAverage(data, avg.time = "day")
      data1 <- openair::timeAverage(data1, avg.time = "day")
    } else {
      data
      data1
    }
    data <- data %>%
      select(date, "Site 1" = input$Para)
    data1 <- data1 %>%
      select(date, "Site 2" = input$Para1)
    all <- full_join(data, data1, by = "date")
  })
  data_diurnal_comp <- eventReactive(input$plot_val_di, {
    data <- CPCB_f()
    data1 <- Cmp_f()
    data <- data %>%
      select(date, "Site 1" = input$Para)
    data1 <- data1 %>%
      select(date, "Site 2" = input$Para1)
    all <- full_join(data, data1, by = "date")
  })
  trend <- function(data) {
    date_df <- data.frame(date = date_ts(data, "60 min"))
    data <- data %>%
      distinct(date, .keep_all = TRUE) %>%
      right_join(date_df, by = "date")
    data <-
      openair::timeAverage(data, avg.time = "day", type = "Location")
  }
  # Plot for ts of two sites
  output$plot12 <- renderPlot({
    if (is.null(input$file1) & is.null(input$file2)) {
      all <- data_joined_comp()
    }
    else {
      all <- data_joined_comp()
    }
    y_site1 <- as.character(input$comp_ytr)
    y_site2 <- as.character(input$comp_y)
    scaleFactor <-
      max(all$`Site 2`, na.rm = TRUE) / max(all$`Site 1`, na.rm = TRUE)
    ggplot(all, aes(x = as.POSIXct(date))) +
      labs(title = input$comp_mt, x = "") + theme2() +
      geom_line(aes(y = `Site 1`, colour = "Site 1"), size = 0.6) +
      geom_line(aes(y = (`Site 2` / scaleFactor), colour = "Site 2"), size = 0.6) +
      scale_colour_manual(
        name = "",
        labels = c(y_site1, y_site2),
        values = c("#F8766D", "#2596BE")
      ) +
      scale_y_continuous(
        name = input$comp_y,
        sec.axis = sec_axis( ~ . * scaleFactor, name = input$comp_ytr)
      )
  })
  data_da <- eventReactive(input$da, {
    data <- mess(input$avg_hour3, "daily3")
  })
  data_plot <- eventReactive(input$ts, {
    data <- mess(input$avg_hour3, "daily3")
  })
  data_box <- eventReactive(input$box, {
    data <- mess(input$avg_hour3, "daily3")
  })
  data_boxt <- eventReactive(input$boxt, {
    data <- CPCB_f()
    if (input$avg_hour3 == "daily3") {
      data <- openair::timeAverage(data, avg.time = "day")
    } else {
      data
    }
  })
  data_mbox <- eventReactive(input$mbox, {
    data <- mess(input$avg_hour3, "daily3")
  })
  data_tv <- eventReactive(input$tv, {
    data <- CPCB_f()
  })
  data_cp <- eventReactive(input$cp, {
    data <- CPCB_f()
  })
  data_qq <- eventReactive(input$qq, {
    data <- mess(input$avg_hour2, "daily2")
  })
  data_freq <- eventReactive(input$freq, {
    data <- mess(input$avg_hour2, "daily2")
  })
  data_mk <- eventReactive(input$mk, {
    data <- CPCB_f()
    data <- trend(data)
  })
  data_ta <- eventReactive(input$ta, {
    data <- CPCB_f()
    data <- trend(data)
  })
  data_acf <- eventReactive(input$acf, {
    data <- CPCB_f()
    data <- trend(data)
  })
  data_reg <- eventReactive(input$reg, {
    data <- mess(input$avg_hour1, "daily1")
  })
  data_mreg <- eventReactive(input$mulreg, {
    data <- mess(input$avg_hour1, "daily1")
  })
  data_diurnal <- eventReactive(input$diurnal, {
    data <- CPCB_f()
    return(data)
  })
  
  output$download_diurnal <- downloadHandler(filename <-
                                               function() {
                                                 name_file <- paste0(name_file(), "_diurnal")
                                                 paste(name_file, "csv", sep = ".")
                                               },
                                             content <- function(fname) {
                                               data <- data_diurnal()
                                               write.csv(data, fname)
                                             })
  observe({
    if (is.null(input$file1)) {
      data_joined <- CPCB_f()
      data_joined <- data_joined %>%
        dplyr::select(everything(),-date,-day,-Location)
    } else {
      data_joined <- data_joined()
      data_joined <- data_joined %>%
        dplyr::select(everything(),-date,-day,-Location)
    }
    data_joined$Location <- NULL
    updateSelectInput(session, "palleInp", choices = names(data_joined))
    updateSelectInput(session, "palleInp1", choices = names(data_joined))
    updateSelectInput(session, "palleInp2", choices = names(data_joined))
    updateSelectInput(session, "DepVar", choices = names(data_joined))
    updateSelectInput(session, "InDepVar", choices = names(data_joined))
    updateSelectInput(session, "DepVar1", choices = names(data_joined))
    updateSelectInput(session, "InDepVar1", choices = names(data_joined))
  })
  
  name_file <- reactive({
    if (is.null(input$file1)) {
      file_n <- "file"
    } else {
      file_n <- gsub(".xlsx", "", input$file1)
      file_n <- gsub(".csv", "", file_n)
    }
  })
  
  output$table1 <- DT::renderDataTable({
    if (is.null(input$file1)) {
      data_joined <- CPCB_f()
      if (input$avg_hour == "daily") {
        data_joined <-
          openair::timeAverage(data_joined, avg.time = "day", type = "Location")
      } else {
        data_joined
      }
    } else {
      data_joined <- data_joined()
    }
    cols <- names(data_joined)[4:ncol(data_joined)]
    data_joined[, cols] <-
      sapply(
        X = data_joined[, cols],
        FUN = function(x)
          as.numeric(as.character(x))
      )
    setDT(data_joined)
    data_joined[, (cols) := round(.SD, 2), .SDcols = cols]
    if (input$avg_hour == "daily") {
      data_joined <- data_joined %>%
        dplyr::mutate(date = as.Date(date, tz = input$timezone)) %>%
        dplyr::select(date, Location, everything(),-day)
      datatable(data_joined, options = list("pageLength" = 15)) %>% formatDate(1, "toLocaleDateString")
    } else {
      datatable(data_joined, options = list("pageLength" = 15)) %>% formatDate(1, "toLocaleString")
    }
  })
  
  output$download <- downloadHandler(
    filename = function() {
      name_file <- paste0(name_file(), "_average")
      paste(name_file, "csv", sep = ".")
    },
    content = function(file) {
      data_joined <- data_joined()
      write.csv(data_joined, file)
    }
  )
  
  theme2 <- reactive({
    theme2 <- list(
      theme_classic(),
      theme(
        legend.text = element_text(size = 18),
        plot.title = element_text(size = 14, face = "bold"),
        axis.title = element_text(size = 20, face = "bold"),
        axis.text = element_text(size = 18, face = "bold"),
        panel.border = element_rect(
          colour = "black",
          fill = NA,
          size = 1.2
        )
      )
    )
  })
  f <- reactive({
    f <- function(x) {
      r <- quantile(x, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
      names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
      r
    }
  })
  normalilty_t <- reactive({
    data <- data_freq()
    y <- as.numeric(as.character(data[[input$palleInp2]]))
    if (input$normality == "sh") {
      if (length(y) <= 5000) {
        x <- shapiro.test(y)
      } else {
        x <-
          "Sample size is greater than 500, so Shapiro-Wilk test will not be reliable, please use Anderson-Darling test."
      }
    } else {
      x <- ad.test(y)
    }
    x
  })
  kenda <- reactive({
    data <- data_mk()
    if (!input$avg_month) {
      x <- zoo(data[[input$palleInp2]], data$date)
      x <- na.interp(x)
    } else if (input$avg_month) {
      data <- data %>%
        mutate(
          date = format(date, "%Y-%m"),
          date = as.Date(
            paste0(date, "-01"),
            format = "%Y-%m-%d",
            tz = input$timezone
          )
        ) %>%
        group_by(date, Location) %>%
        summarise_all(list(~ mean(., na.rm = TRUE)))
      x <- zoo(data[[input$palleInp2]], data$date)
      x <- na.interp(x)
    } else {
      NULL
    }
  })
  
  output$normality_test <- renderPrint({
    normalilty_t()
  })
  output$kendal_test <- renderPrint({
    # validate(need(try(all(is.na(y)) == TRUE), "Sorry no data!"))
    y <- kenda()
    c <- trend::mk.test(y)
    c
  })
  # Plot ts for one parameter
  output$plot1 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_plot()
    } else {
      data <- data_plot()
    }
    y <- as.numeric(as.character(data[[input$palleInp]]))
    ggplot(data, aes(as.POSIXct(date), y)) +
      labs(y = input$ts_y,
           title = input$ts_mt,
           x = "") + theme2() + geom_line(size = 0.6, color = "seagreen")
  })
  # Diurnal plot for one parameter
  output$plot2 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_diurnal()
    } else {
      data <- data_diurnal()
    }
    data <- data %>%
      dplyr::mutate(hour = format(date, "%H"),
                    month = format(date, "%b"))
    data <- data %>%
      dplyr::select(hour, month, "y" = input$palleInp)
    data$hour <- as.numeric(as.character(data$hour))
    if (input$diur == "mesd" && input$diurn == "all") {
      data <- data %>%
        dplyr::select(hour, y) %>%
        group_by(hour) %>%
        summarise_all(list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE)))
      ggplot(data, aes(hour, mean)) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd),
                                                    color = "seagreen") +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y,
          x = "Hour of the day (LT)",
          title = input$diurnal_mt
        ) +
        theme2() + geom_line(size = 0.6, color = "seagreen")
    } else if (input$diur == "mediq" && input$diurn == "all") {
      data <- data %>%
        dplyr::select(hour, y) %>%
        group_by(hour) %>%
        summarise_all(list(median = ~ median(.x, na.rm = TRUE), p25 = ~ quantile(.x, .25, na.rm = TRUE), 
                           p75 = ~ quantile(.x, .75, na.rm = TRUE)))
      ggplot(data, aes(hour, median)) + geom_errorbar(aes(ymin = p25, ymax = p75),
                                                      color = "seagreen") +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y,
          x = "Hour of the day (LT)",
          title = input$diurnal_mt
        ) +
        theme2() + geom_line(size = 0.6, color = "seagreen")
    } else if (input$diur == "mediq" && input$diurn == "mon") {
      data <- data %>%
        group_by(month, hour) %>%
        summarise_all(list(median = ~ median(.x, na.rm = TRUE), p25 = ~ quantile(.x, .25, na.rm = TRUE), 
                           p75 = ~ quantile(.x, .75, na.rm = TRUE)))
      ggplot(data, aes(hour, median)) + geom_errorbar(aes(ymin = p25, ymax = p75),
                                                      color = "seagreen") +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y,
          x = "hour of the day",
          title = input$diurnal_mt
        ) +
        theme2() + geom_line(size = 0.6, color = "seagreen")  + facet_wrap(. ~
                                                                             month, nrow = 3) +
        theme(axis.text.y = element_text(size = 12))
    } else if (input$diur == "mesd" && input$diurn == "mon") {
      data <- data %>%
        group_by(month, hour) %>%
        summarise_all(list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE)))
      ggplot(data, aes(hour, mean)) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd),
                                                    color = "seagreen") +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y,
          x = "hour of the day",
          title = input$diurnal_mt
        ) +
        theme2() + geom_line(size = 0.6, color = "seagreen") + facet_wrap(. ~
                                                                            month, nrow = 3) +
        theme(axis.text.y = element_text(size = 12))
    }
  })
  # Month-yearly plot for one parameter
  output$plot3 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_box()
    } else {
      data <- data_box()
    }
    y <- as.numeric(as.character(data[[input$palleInp]]))
    ggplot(data, aes(x = reorder(format(date, '%b %Y'), date), y)) +
      stat_summary(
        fun.data = f(),
        colour = "seagreen",
        geom = "boxplot",
        width = 0.4,
        size = 1
      ) +
      labs(y = input$box_y,
           x = "",
           title = input$box_mt) +
      stat_summary(
        aes(y = y),
        fun.y = "mean",
        colour = "seagreen",
        geom = "point",
        size = 4
      )  +
      theme2() + theme(axis.text.x = element_text(
        size = 10,
        face = "bold",
        angle = 90
      ))
  })
  # Monthly plot for one parameter
  output$plot9 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_mbox()
    } else {
      data <- data_mbox()
    }
    y <- as.numeric(as.character(data[[input$palleInp]]))
    month.abb.loc  <- format(ISOdate(2004, 1:12, 1), "%b")
    ggplot(data, aes(x = factor(reorder(format(date, '%b'), date), levels = month.abb.loc), y)) +
      stat_summary(
        fun.data = f(),
        colour = "seagreen",
        geom = "boxplot",
        width = 0.4,
        size = 1
      ) +
      labs(y = input$box_my,
           x = "",
           title = input$box_mmt) +
      stat_summary(
        aes(y = y),
        fun.y = "mean",
        colour = "seagreen",
        geom = "point",
        size = 4
      )  +
      theme2() + theme(axis.text.x = element_text(size = 13, face = "bold"))
  })
  # timeVariation plot for one parameter
  output$plot4 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_tv()
    } else {
      data <- data_tv()
    }
    y <- as.numeric(as.character(data[[input$palleInp1]]))
    if (input$stat_tv == "mean") {
      openair::timeVariation(
        data,
        pollutant = input$palleInp1,
        par.settings = list(fontsize = list(text = 15))
      )
    } else if (input$stat_tv == "median") {
      openair::timeVariation(
        data,
        pollutant = input$palleInp1,
        stati = "median",
        conf.int = c(0.75, 0.99),
        par.settings = list(fontsize = list(text = 15))
      )
    }
  })
  # Calendar plot for one parameter
  output$plot5 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_cp()
    } else {
      data <- data_cp()
    }
    y <- as.numeric(as.character(data[[input$palleInp1]]))
    openair::calendarPlot(
      data,
      pollutant = input$palleInp1,
      main = input$cp_mt,
      cols = openColours(c("seagreen", "yellow", "red"), 10),
      par.settings = list(fontsize = list(text = 15))
    )
  })
  # Density plot for one parameter
  output$plot6 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_freq()
    } else {
      data <- data_freq()
    }
    y <- as.numeric(as.character(data[[input$palleInp2]]))
    ggplot(data, aes(x = y)) +
      geom_density(color = "deepskyblue", fill = "lightblue") +
      labs(y = "density",
           x = input$freq_x,
           title = input$freq_mt) + theme2()
  })
  # qq plot for one parameter
  output$plot7 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_qq()
    } else {
      data <- data_qq()
    }
    y <- as.numeric(as.character(data[[input$palleInp2]]))
    ticks <-
      qnorm(c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99))
    labels <- c(1, 5, 10, 25, 50, 75, 90, 95, 99)
    ggplot(data, aes(sample = log10(y))) +
      stat_qq(size = 2,
              geom = 'point',
              color = "deepskyblue") +
      stat_qq_line(size = 1, linetype = 2) +
      scale_x_continuous(breaks = ticks, labels = labels) +
      labs(x = "Emperical percentiles",
           y =  input$qq_y,
           title = input$qq_mt) + theme2()
  })
  # Linear regression plot for one file
  output$plot8 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_reg()
    } else {
      data <- data_reg()
    }
    y <- as.numeric(as.character(data[[input$DepVar]]))
    x <- as.numeric(as.character(data[[input$InDepVar]]))
    m <- lm(y ~ x, data)
    s <- summary(m)
    r <- round(s$adj.r.squared, digits = 2)
    ggplot(data = data, aes(x = x, y = y)) +
      geom_point(alpha = 0.5, color = "red") +
      geom_smooth(
        method = lm,
        size = 1.2,
        se = FALSE,
        formula = y ~ x,
        color = "deepskyblue"
      ) +
      labs(
        x = input$reg_x,
        y = input$reg_y,
        title = input$reg_mt,
        subtitle = paste0("R square: ", r, "; Equation: ", reg_eqn(s))
      ) +
      theme2()
  })
  # Linear regression plot for two files
  output$plot15 <- renderPlot({
    if (is.null(input$file1) & is.null(input$file2)) {
      data <- data_scatter_comp()
    } else {
      data <- data_scatter_comp()
    }
    y <- as.numeric(as.character(data$`Site 1`))
    x <- as.numeric(as.character(data$`Site 2`))
    m <- lm(y ~ x, data)
    s <- summary(m)
    r <- round(s$adj.r.squared, digits = 2)
    ggplot(data = data, aes(x = x, y = y)) +
      geom_point(alpha = 0.5, color = "red") +
      geom_smooth(
        method = lm,
        size = 1.2,
        se = FALSE,
        formula = y ~ x,
        color = "deepskyblue"
      ) +
      labs(
        x = input$reg_mx1,
        y = input$reg_my1,
        title = input$reg_mt1,
        subtitle = paste0("R square: ", r, "; Equation: ", reg_eqn(s))
      ) +
      theme2()
  })
  # Vertical bar plot for one parameter
  output$plot10 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_boxt()
    } else {
      data <- data_boxt()
    }
    data <- data %>%
      dplyr::mutate(
        date = format(date, "%Y-%m-01"),
        date = as.Date(date, format = "%Y-%m-%d", tz = input$timezone)
      ) %>%
      dplyr::select(date, "y" = input$palleInp) %>%
      group_by(date) %>%
      summarise_all(list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE)))
    ggplot(data, aes(x = reorder(format(date, '%b %Y'), date), mean)) +
      geom_bar(
        position = position_dodge(),
        stat = "identity",
        colour = 'seagreen',
        fill  = 'seagreen'
      ) +
      labs(y = input$box_yt,
           x = "",
           title = input$box_mtt) +
      geom_errorbar(
        aes(ymin = mean - sd, ymax = mean + sd),
        width = .2,
        position =
          position_dodge(.9),
        color = 'seagreen'
      ) +
      theme2() + theme(axis.text.x = element_text(
        size = 10,
        face = "bold",
        angle = 90
      ))
  })
  # biwavelet plot for one parameter
  output$plot13 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_ta()
    } else {
      data <- data_ta()
    }
    data$date <- as.Date(data$date, "%Y-%m-%d", tz = input$timezone)
    data <-
      data.frame(data$date, na.interp(data[[input$palleInp2]]))
    data_wt <- wt(data, dt = 1)
    par(
      mfrow = c(1, 1),
      oma = c(0, 0, 0, 2),
      mar = c(5, 5, 5, 7) + 0.1
    ) #specify the limits of margins to the plot area
    plot.biwavelet(
      data_wt,
      form = "%Y-%m-%d",
      type = "power.corr.norm",
      plot.cb = TRUE,
      lwd.coi = 1,
      col.coi = "black",
      cex.lab = 1.5,
      cex.axis = 1.5,
      cex.main = 1.5,
      plot.sig = 95,
      lwd.sig = 2,
      main = input$title_bi,
      ylab = "Period (days)",
      xlab = ""
    )
  })
  # Data availability plot
  output$plot16 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_da()
    } else {
      data <- data_da()
    }
    data_avail <- data %>%
      dplyr::select(day, everything(),-date) %>%
      group_by(day) %>%
      summarise_all(list(~ mean(., na.rm = TRUE))) %>%
      pivot_longer(-day, names_to = "Parameter", values_to = "Value")
    no_na_df <- data_avail
    no_na_df <- no_na_df[complete.cases(no_na_df),]
    ggplot(no_na_df, aes(x = day, y = Parameter, color = Parameter)) +
      geom_errorbarh(aes(xmax = day, xmin = day), size = 0.25) +
      labs(y = "", title = "Data availability plot", x = "") +
      scale_x_date(date_labels = "%b-%y") +
      theme2() + theme(legend.position = "none")
  })
  # Diurnal for two sites
  output$plot17 <- renderPlot({
    if (is.null(input$file1) & is.null(input$file2)) {
      data <- data_diurnal_comp()
    } else {
      data <- data_diurnal_comp()
    }
    data <- data %>%
      dplyr::mutate(hour = format(date, "%H"),
                    month = format(date, "%b"))
    data <- data %>%
      dplyr::select(hour, month, site1 = `Site 1`, site2 = `Site 2`)
    data$hour <- as.numeric(as.character(data$hour))
    cols <- c("Site 1" = "#F8766D", "Site 2" = "#2596BE")
    if (input$diur1 == "mesd" && input$diurn1 == "all") {
      data <- data %>%
        dplyr::select(hour, site1, site2) %>%
        group_by(hour) %>%
        summarise_all(list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE)))
      ggplot(data, aes(hour, site1_mean)) +
        geom_errorbar(aes(
          ymin = site1_mean - site1_sd,
          ymax = site1_mean + site1_sd,
          colour = "Site 1"
        )) +
        geom_errorbar(aes(
          ymin = site2_mean - site2_sd,
          ymax = site2_mean + site2_sd,
          colour = "Site 2"
        )) +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y1,
          x = "Hour of the day (LT)",
          title = input$diurnal_mt1
        ) +
        theme2() + geom_line(aes(y = site1_mean, colour = "Site 1"), size = 0.6) +
        geom_line(aes(y = site2_mean, colour = "Site 2"), size = 0.6) +
        scale_colour_manual(name = "", values = cols)
    } else if (input$diur1 == "mediq" && input$diurn1 == "all") {
      data <- data %>%
        dplyr::select(hour, site1, site2) %>%
        group_by(hour) %>%
        summarise_all(list(median = ~ median(.x, na.rm = TRUE), p25 = ~ quantile(.x, .25, na.rm = TRUE), 
                           p75 = ~ quantile(.x, .75, na.rm = TRUE)))
      ggplot(data, aes(as.numeric(hour), site1_median)) +
        geom_errorbar(aes(
          ymin = site1_p25,
          ymax = site1_p75,
          colour = "Site 1"
        )) +
        geom_errorbar(aes(
          ymin = site2_p25,
          ymax = site2_p75,
          colour = "Site 2"
        )) +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y1,
          x = "Hour of the day (LT)",
          title = input$diurnal_mt1
        ) +
        theme2() + geom_line(aes(y = site1_median, colour = "Site 1"), size = 0.6) +
        geom_line(aes(y = site2_median, colour = "Site 2"), size = 0.6) +
        scale_colour_manual(name = "", values = cols)
    } else if (input$diur1 == "mediq" && input$diurn1 == "mon") {
      data <- data %>%
        group_by(month, hour) %>%
        summarise_all(list(median = ~ median(.x, na.rm = TRUE), p25 = ~ quantile(.x, .25, na.rm = TRUE), 
                           p75 = ~ quantile(.x, .75, na.rm = TRUE)))
      ggplot(data, aes(as.numeric(hour), site1_median)) +
        geom_errorbar(aes(
          ymin = site1_p25,
          ymax = site1_p75,
          colour = "Site 1"
        )) +
        geom_errorbar(aes(
          ymin = site2_p25,
          ymax = site2_p75,
          colour = "Site 2"
        )) +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y1,
          x = "Hour of the day (LT)",
          title = input$diurnal_mt1
        ) +
        theme2() + geom_line(aes(y = site1_median, colour = "Site 1"), size = 0.6) +
        geom_line(aes(y = site2_median, colour = "Site 2"), size = 0.6) + facet_wrap(. ~
                                                                                       month, nrow = 3) +
        theme(axis.text.y = element_text(size = 12)) +
        scale_colour_manual(name = "", values = cols)
    } else if (input$diur1 == "mesd" && input$diurn1 == "mon") {
      data <- data %>%
        group_by(month, hour) %>%
        summarise_all(list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE)))
      ggplot(data, aes(hour, site1_mean)) +
        geom_errorbar(aes(
          ymin = site1_mean - site1_sd,
          ymax = site1_mean + site1_sd,
          colour = "Site 1"
        )) +
        geom_errorbar(aes(
          ymin = site2_mean - site2_sd,
          ymax = site2_mean + site2_sd,
          colour = "Site 2"
        )) +
        scale_x_continuous(limits = c(-1, 24),
                           breaks = c(0, 6, 12, 18)) +
        labs(
          y = input$diurnal_y1,
          x = "Hour of the day (LT)",
          title = input$diurnal_mt1
        ) +
        theme2() + geom_line(aes(y = site1_mean, colour = "Site 1"), size = 0.6) +
        geom_line(aes(y = site2_mean, colour = "Site 2"), size = 0.6) + facet_wrap(. ~
                                                                                     month, nrow = 3) +
        theme(axis.text.y = element_text(size = 12)) +
        scale_colour_manual(name = "", values = cols)
    }
  })
  # Autocorrelogram plot for one parameter
  output$plot18 <- renderPlot({
    if (is.null(input$file1)) {
      data <- data_acf()
    } else {
      data <- data_acf()
    }
    data <-
      data.frame(data$date, na.interp(data[[input$palleInp2]]))
    names(data) <- c("date", "Parameter")
    month <- as.numeric(nrow(unique(
      data %>%
        mutate(month = format(date, "%Y-%m")) %>%
        select(month)
    ))) - 1
    data <- data %>%
      mutate(month = format(date, "%Y-%m")) %>%
      select(month, Parameter) %>%
      group_by(month) %>%
      summarise_all(list(~ mean(., na.rm = TRUE))) %>%
      select(Parameter)
    plot_acf <-
      acf(data,
          lag.max = month,
          plot = TRUE,
          main = input$acf_main)
    plot_acf
  })
  
  lm_reg <- reactive({
    data <- data_mreg()
    y <-
      lm(as.formula(paste(
        input$DepVar1, " ~ ", paste(input$InDepVar1, collapse = "+")
      )), data)
    y
  })
  # MLR plot
  output$plot11 <- renderPlot({
    if (is.null(input$file1)) {
      data <- broom::augment(lm_reg()) 
    } else {
      data <- broom::augment(lm_reg()) 
    }
    y <- as.numeric(as.character(data[[input$DepVar1]]))
    ggplot(data = data, aes(x = .fitted, y = y)) +
      geom_point(alpha = 0.5, color = "red") +
      geom_smooth(
        method = lm,
        size = 1.2,
        se = FALSE,
        formula = y ~ x,
        color = "deepskyblue"
      ) +
      labs(x = input$reg_mx,
           y = input$reg_my,
           title = input$reg_mmt) + theme2()
  })
  output$RegOut <- renderPrint({
    summary(lm_reg())
  })
  output$DepPrint <-
    renderPrint({
      paste("Dependent variable:", input$DepVar1)
    })
  output$IndPrint <-
    renderPrint({
      paste("Independent variables:", input$InDepVar1)
    })
  
  data_summary <- reactive({
    if (is.null(input$file1)) {
      data <- CPCB_f()
    } else {
      data <- CPCB_f()
    }
    data <- data %>%
      dplyr::select(day, everything(),-date)
    data$Location <- NULL
    if (input$avg == "no") {
      data <- data %>%
        dplyr::select(everything(),-day)
      columns <- 1:ncol(data)
      data[, columns] <-
        lapply(columns, function(x)
          as.numeric(as.character(data[[x]])))
      tmp1 <- do.call(
        data.frame,
        list(
          Mean = apply(data, 2, function(y)
          {
            mean(y, na.rm = TRUE)
          }),
          SD = apply(data, 2, function(y)
          {
            sd(y, na.rm = TRUE)
          }),
          Median = apply(data, 2, median, na.rm = TRUE),
          IQR = apply(data, 2, IQR, na.rm = TRUE),
          Min = apply(data, 2, min, na.rm = TRUE),
          Max = apply(data, 2, max, na.rm = TRUE),
          p1  = apply(
            data,
            2,
            quantile,
            probs = c(0.01),
            na.rm = TRUE
          ),
          p10 = apply(
            data,
            2,
            quantile,
            probs = c(0.1),
            na.rm = TRUE
          ),
          p25 = apply(
            data,
            2,
            quantile,
            probs = c(0.25),
            na.rm = TRUE
          ),
          p75 = apply(
            data,
            2,
            quantile,
            probs = c(0.75),
            na.rm = TRUE
          ),
          p90 = apply(
            data,
            2,
            quantile,
            probs = c(0.9),
            na.rm = TRUE
          ),
          p99 = apply(
            data,
            2,
            quantile,
            probs = c(0.99),
            na.rm = TRUE
          ),
          non_NA = apply(data, 2,
                         function(y)
                         {
                           length(which(!is.na(y)))
                         })
        )
      )
      tmp <- data.frame(tmp1)
      tmp$Mean   <-
        round(as.numeric(as.character(tmp$Mean)), digits = 2)
      tmp$IQR    <-
        round(as.numeric(as.character(tmp$IQR)), digits = 2)
      tmp$Median <-
        round(as.numeric(as.character(tmp$Median)), digits = 2)
      tmp$Min    <-
        round(as.numeric(as.character(tmp$Min)), digits = 2)
      tmp$Max    <-
        round(as.numeric(as.character(tmp$Max)), digits = 2)
      tmp$p10    <-
        round(as.numeric(as.character(tmp$p10)), digits = 2)
      tmp$SD     <-
        round(as.numeric(as.character(tmp$SD)), digits = 2)
      tmp$p90    <-
        round(as.numeric(as.character(tmp$p90)), digits = 2)
      tmp$p75    <-
        round(as.numeric(as.character(tmp$p75)), digits = 2)
      tmp$p99    <-
        round(as.numeric(as.character(tmp$p99)), digits = 2)
      tmp$p1     <-
        round(as.numeric(as.character(tmp$p1)), digits = 2)
      tmp$p25    <-
        round(as.numeric(as.character(tmp$p25)), digits = 2)
      tmp
      tmp <- t(tmp)
    } else {
      data <- data %>%
        dplyr::mutate(month  = format(day, "%b %Y"),
                      monthly = format(day, "%b")) %>%
        dplyr::select(day, month, monthly, everything())
      data$group <- data[[input$avg]]
      data <- data %>%
        group_by(group) %>%
        summarise_if(
          is.numeric,
          list(
            Mean = ~ mean(.x, na.rm = TRUE),
            SD = ~ sd(.x, na.rm = TRUE),
            Median = ~ median(.x, na.rm = TRUE),
            IQR = ~ IQR(.x, na.rm = TRUE),
            Min = ~ min(.x, na.rm = TRUE),
            Max = ~ max(.x, na.rm = TRUE),
            p1 = ~ quantile(.x, .01, na.rm = TRUE),
            p10 = ~ quantile(.x, .1, na.rm = TRUE),
            p25 = ~ quantile(.x, .25, na.rm = TRUE),
            p75 = ~ quantile(.x, .75, na.rm = TRUE),
            p90 = ~ quantile(.x, .9, na.rm = TRUE),
            p99 = ~ quantile(.x, .99, na.rm = TRUE),
            non_NA = ~ sum(!is.na(.))
          )
        )
      columns <- 2:ncol(data)
      data[, columns] <-
        lapply(columns, function(x)
          round(as.numeric(as.character(data[[x]])), digits = 2))
      tmp <- data
    }
    tmp
  })
  output$table <- DT::renderDataTable({
    tmp <- data_summary()
    datatable(tmp, options = list("pageLength" = 13))
  })
  
  output$download_stats <- downloadHandler(filename <- function() {
    name_file <- paste0(name_file(), "_stats")
    paste(name_file, "csv", sep = ".")
  },
  content <- function(fname) {
    data_summary <- data_summary()
    write.csv(data_summary, fname)
  })
  
}
#### Run app
shinyApp(ui, server)
adithirgis/pollucheck documentation built on Dec. 18, 2021, 10:28 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
adithirgis/pollucheck
Explore Open-Source Air Pollution Data

inst/shiny/app.R
In adithirgis/pollucheck: Explore Open-Source Air Pollution Data

R Package Documentation

Browse R Packages

We want your feedback!

adithirgis/pollucheck Explore Open-Source Air Pollution Data

inst/shiny/app.R In adithirgis/pollucheck: Explore Open-Source Air Pollution Data

R Package Documentation

Browse R Packages

We want your feedback!

adithirgis/pollucheck
Explore Open-Source Air Pollution Data

inst/shiny/app.R
In adithirgis/pollucheck: Explore Open-Source Air Pollution Data