trenda: Trendanalysis in climate related data

Documented in trenda_obs

#' Function to calculate trend analyses for Bosch & Partner (without influential observations)
#'
#' @param data_dir directory where the data is stored
#' @param plot_dir directory where the generated plots are saved
#' @param result_name directory and name (without ending) where the result table
#' is stored
#' @param log_trans logical if log-transformed values are used (TRUE leads to
#' the adjustments for plot function)
#' @param res_tab_file result table of the same data of perform_trendanalyse;
#' needed for information about the influential observations which should be
#' left out in the analysis
#' @param calc_einfl_beob logical; the influential observations should be
#' calculated again. Can be useful to turn off as this information is not
#' needed and sometimes the calculation of the influential observations for
#' GLS models does not work
#'
#' @return the function does not return a value but stores the plot and table in
#' the assigned directories
#'
#' @details Since this function needs the information which are influential obser-
#' vations, the function perform_trendanalyse has to be run first on the same
#' data.
#' this function needs the information generated by
#' @export
trenda_obs <- function(data_dir, plot_dir, result_name,
                                                 log_trans = FALSE, res_tab_file,
                                                 calc_einfl_beob = TRUE) {

  source("functions_quadreg_2018.R")

  ## set the directory
  trend_files <- list.files(data_dir)

  ResTab <- utils::read.table("ResTab.csv", sep = ";", header = TRUE)

  # Einlesen der vorherigen Ergebnistabelle, um einflussreiche Beob. ausschließen
  # zu können
  ergebnis_einflussreich <- utils::read.csv2(res_tab_file,
                                      stringsAsFactors = FALSE)

  # Da generalisierter Durbin-Watson-Test zufaellig ist => Seed setzen
  set.seed(1)

  z <- 1

  output <- lapply(trend_files, function(trend_file) {

    print(sprintf("Data:----------%s", trend_file)) # Exceltitle

    ## Daten einlesen
    dat <- utils::read.csv2(sprintf("%s%s", data_dir, trend_file), header = TRUE)
    print(utils::str(dat))

    #dat <- read.xlsx(sprintf("%s%s", data_dir, trend_file),
    #                 sheetName="machine-readable", encoding = "UTF-8",
    #                 stringsAsFactors = FALSE)

    ## Wegen plot-Funktion muessen die Punkte in den Variablennamen ersetzt werden
    names(dat) <- gsub("\\.", "\\_", names(dat))

    ## Alle Variablen ausser ID, Zeit und Jahr sind Umweltindikatoren
    varnames <- setdiff(names(dat), c("ID", "Zeit", "Jahr"))

    ## ID notwendig fuer Korrelationsstruktur fuer gls-Funktion von
    # nlme package (alle Daten stammen jeweils von einer Beobachtungseinheit)
    dat$ID <- 1

    ## Zeit faengt bei 0 an
    dat$Zeit <- dat$Jahr - min(dat$Jahr)

    for(varname in varnames){
      print(varname)

      # Entferne die Beobachtungen, die bei der vorherigen Analyse als einflussreiche
      # Beobachtungen erkannt wurden
      # die Indices sind als string gespeichert; die Zahlen sind per Komma getrennt
      index <- ergebnis_einflussreich[ergebnis_einflussreich$File == trend_file &
                                        ergebnis_einflussreich$Index == varname,
                                      "zu_entfernende_Beobachtungen_Index"]

      if (!is.na(index)) {
        index <- as.character(index)
        index <- strsplit(index, ",")
        index <- as.vector(sapply(index, as.numeric))
      }

      # nur Daten entfernen und Auswertung durchführen, wenn im Index Beobachtungen
      # enthalten sind
      # dies ist der Fall, wenn der Index keine Liste ist oder kein NA ist
      if (!is.list(index) && !is.na(index)) {
        # hier schon die Zeilen mit NA entfernen, da sich darauf der Index
        # der einflussreichen Beobachtungen bezieht
        temp_dat <- stats::na.omit(dat[, c("Zeit", "Jahr", "ID", varname)])
        temp_dat <- temp_dat[-index, ]

        print(utils::str(temp_dat))

        ## Trend berechnen
        res <- compute_trend(temp_dat, varname, log_trans = log_trans,
                             calc_einfl_beob = calc_einfl_beob)

        ## Ergebnisse speichern
        ResTab[z,"File"] <<- trend_file
        ResTab[z,"Index"] <<- varname
        ResTab[z,"Beta0"] <<- round(as.numeric(res$beta[1]),4)
        ResTab[z,"Beta1"] <<- round(as.numeric(res$beta[2]),4)
        ResTab[z,"Beta2"] <<- round(as.numeric(res$beta[3]),4)
        ResTab[z,"Phi1"] <<- res$phi[1]
        ResTab[z,"Phi2"] <<- res$phi[2]
        ResTab[z,"Trend"] <<- res$trend
        ResTab[z,"rQuadrat"] <<- res$rsq

        ## Trendgrafik speichern
        if (log_trans) {
          name_file <- substr(trend_file, start = 6, stop = nchar(trend_file) - 4)
        } else {
          name_file <- abbreviate(trend_file, 8)
        }
        grDevices::jpeg(file = sprintf("%s%s_%s.jpeg", plot_dir,
                            name_file,
                            varname),
             width = 800, height = 800, quality = 640000)
        plot(res$plot)
        grDevices::dev.off()

        ## Fuer state-ind-temperatur-1 und state-ind-niederschlag:
        ## Trendgrafik speichern
        #jpeg(file = sprintf("%s%s_%s.jpeg", plot_dir, abbreviate(trend_file, 8), varname),
        #     width = 1600, height = 900, quality = 640000)
        #plot(res$plot)
        #dev.off()

        z <<- z + 1

      }
    }
  })

  summary(ResTab)

  utils::write.table(ResTab,
              paste0(result_name, format(Sys.Date(), "%Y-%m-%d"), ".csv"),
              sep = ";", dec = ",", row.names = FALSE)

}