R/calc_bioclim.R

Defines functions calc_bioclim_df calc_bioclim

Documented in calc_bioclim calc_bioclim_df

#' Bioclimatic index calculation
#'
#' This function allows you to calculate bioclimatic indices
#' from meteorological timeseries. A wrapper around calc_bioclim_df and calc_bioclim_raster
#' @param data input data, has to inherit either 'data.frame' in which case calc_bioclim_df
#' will be called or 'list' in which case calc_bioclim_raster will be called.
#' @param ... further arguments passed on to calc_bioclim_df or calc_bioclim_raster
#' @keywords bioclimatic index
#' @export
#' @import data.table
#' @examples
#' calc_bioclim(data = dt.agg,
#'   frml = winkler ~ T_mean - 10,
#'   agg.func = sum,
#'   period = 91:304,
#'   set.zero = T)

calc_bioclim <- function(data, ...) {

  if (inherits(data, 'data.frame')) { result <- calc_bioclim_df(dt = data, ...)

  } else if (inherits(data, 'list')) { result <- calc_bioclim_raster(l = data, ...)

  } else { stop('Unknown input data class. Has to inherit either data.frame or list') }

  return(result)
}

#' Bioclimatic index calculation
#'
#' This function allows you to calculate bioclimatic indices
#' from meteorological timeseries.
#' @param dt data.frame or data.table with a column of class 'Date' and the
#' columns mentioned in frml
#' @param id character string, if supplied this string will be appended as a new column
#' of the resulting dataframe
#' @param index.calc string, Name of the index that you want to calculate.
#' See function get_index_params for further details.
#' @param frml A formula object that describes how to calculate the daily index values. The left side of the formula will be used to name the resulting column and the right side to calculate the daily index values.
#' @param agg.func A function that will be used to aggregate the daily values calculated by 'frml' for every year
#' @param period A sequence of numbers that indicates the day of years that should be used to aggregate the index
#' @param set.zero Are values below 0 possible or should they be converted to 0?
#' @param na.action string, what should happen with NAs in the data? If keep, NAs in the
#' data will translate to NAs in the output, if ignore the index is calculated by leaving
#' out the days with NAs and if fill the NAs can be filled up to maxgap by linear interpolation.
#' @param maxgap integer, up to how many consecutive missing values should be filled?
#' @keywords bioclimatic index
#' @export
#' @import data.table
#' @examples
#' calc_bioclim_df(dt = dt.agg,
#'   frml = winkler ~ T_mean - 10,
#'   agg.func = sum,
#'   period = 91:304,
#'   set.zero = T)

calc_bioclim_df <- function(dt, index.calc = 'winkler',
                            frml = NULL, agg.func = NULL,
                            period = NULL, set.zero = NULL,
                            na.action = 'keep', max.gap = 3) {

  if (!inherits(dt, 'data.table')) { dt <- data.table::setDT(dt) }

  #If index name is supplied get parameters from default list
  if (!missing(index.calc)) {
    params <- rebecka::get_index_params(index.calc)
    if (is.null(frml)) frml <- params[['frml']]
    if (is.null(agg.func)) agg.func <- params[['agg.func']]
    if (is.null(period)) period <- params[['period']]
    if (is.null(set.zero)) set.zero <- params[['set.zero']]
  }

  #Get index name and formula string from frml
  index.name <- deparse(frml[[2]])
  frml.string <- deparse(frml[[3]])

  #Check if all elements of frml are in dt
  frml.elements <- all.vars(frml[[3]])
  if (!all(frml.elements %in% colnames(dt))) {

    empty.dt <- data.table::data.table('year' = NA,
                                       'index' = NA)

    data.table::setnames(empty.dt, old = c('index'), new = c(index.name))

    warning('Not all frml elements found in column names of dataframe. NAs are returned')
    return(empty.dt)

  }

  #Check if column of class Date is present and extract its name
  col.classes <- sapply(dt, class)
  datecol <- which(sapply(col.classes, function(x) is.element('Date', x)))

  if (length(datecol) == 0) stop('No column of class Date found in dataframe.')

  datecol.name <- colnames(dt)[datecol]

  data.table::setnames(dt, old = c(datecol.name), new = c('Datetime'))

  #Fill in missing days with rows with NAs by join to a complete datesequence
  year.min <- min(as.numeric(format(dt[['Datetime']], '%Y')))
  year.max <- max(as.numeric(format(dt[['Datetime']], '%Y')))

  complete.dates <- data.table::setDT(list('Datetime' = seq.Date(as.Date(str_c(year.min, '-01-01')),
                                                                 as.Date(str_c(year.max, '-12-31')),
                                                                 by='day')))
  data.table::setkey(complete.dates, Datetime)
  data.table::setkey(dt, Datetime)

  #Perform the join to the complete datesequence
  complete.dt <- dt[complete.dates]

  #Extract only the specified days
  complete.dt[,
              ':='('year' = as.numeric(format(Datetime, '%Y')),
                   'doy' = as.numeric(format(Datetime, '%j')))
              ][doy == 366, doy := 365]#Transform doy values of 366 to 365

  complete.dt <- subset(complete.dt, doy %in% period)

  #Optionally fill NAs up to maxgap
  if (na.action == 'fill') {
    complete.dt[,(frml.elements) := lapply(.SD, zoo::na.approx, maxgap = max.gap, na.rm = F), .SDcols = frml.elements]
  }

  #Calculate the daily index value
  complete.dt[,':='('index' = eval(parse(text = frml.string)))]

  #Optionally set values below 0 to 0
  if (set.zero) {
    complete.dt[index < 0, index := 0]
  }

  na.bool <- FALSE
  if (na.action == 'ignore') na.bool <- TRUE

  #Aggregate to yearly values with the given agg.func
  index.dt <- complete.dt[,.('index' = agg.func(index, na.rm = na.bool)), by = 'year']

  data.table::setnames(index.dt, old = c('index'), new = c(index.name))

  return(index.dt)
}


#' Bioclimatic index calculation raster
#'
#' This function allows you to calculate bioclimatic indices based on the
#' overlay of raster datasets.
#' @param l list, a list of RasterStacks or RasterBricks. The elements of the list
#' have to be named according to the elements in 'frml' and must have the same
#' names() attribute that corresponds to the day the raster values refer to.
#' @param index.calc string, Name of the index that you want to calculate.
#' See function get_index_params for further details.
#' @param frml formula, A formula object that describes how to calculate the daily index values.
#' The left side of the formula will be used to name the resulting RasterLayer
#' and the right side to calculate the daily rasters.
#' @param agg.func function, A function that will be used to aggregate the daily rasters
#' calculated by 'frml' for every year
#' @param period integer, A sequence of numbers that indicates the day of years that
#' should be used to aggregate the index
#' @param set.zero logical, Are values below 0 possible or should they be converted to 0?
#' @param project logical, Should the resulting raster files be projected to another crs?
#' Defaults to TRUE.
#' @param crs character or object of class 'CRS', output coordinate system reference.
#' Only used if project is TRUE. Defaults to WGS84
#' @param res single or vector of two numerics, output resolution. Only used if
#' project is TRUE. Defaults to the resolution of the input rasters.
#' @keywords bioclimatic index, raster
#' @export
#' @import data.table
#' @examples
#' calc_bioclim_raster(l = list('T_max' = tx.stack, 'T_max' = tn.stack),
#'   frml = winkler ~ ((T_max + T_min) / 2) - 10,
#'   agg.func = sum,
#'   period = 91:304,
#'   set.zero = T)

calc_bioclim_raster <- function(l, index.calc = 'winkler',
                                frml = NULL, agg.func = NULL, period = NULL, set.zero = NULL,
                                project = T,
                                crs = "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0",
                                res) {

  #Check if l is a list
  if (!inherits(l, 'list')) { stop('Input is not a list') }

  #Check if both raster bricks are of the same length
  dims <- sapply(l, dim)[3, ]
  if (length(unique(dims)) > 1) {
    stop('The two raster bricks are not of the same length!') }

  #Check if both raster bricks contain the same days
  raster.names <- sapply(l, names)
  if (!all(duplicated(t(raster.names))[-1])) {
    stop('The two raster bricks have not the same raster names') }

  #If index name is supplied get parameters from default list
  if (!missing(index.calc)) {
    params <- rebecka::get_index_params(index.calc)
    if (is.null(frml)) frml <- params[['frml']]
    if (is.null(agg.func)) agg.func <- params[['agg.func']]
    if (is.null(period)) period <- params[['period']]
    if (is.null(set.zero)) set.zero <- params[['set.zero']]
  }

  index.name <- all.vars(frml[[2]])
  frml.string <- deparse(frml[[3]])

  #Check if all elements of frml.string are in l
  frml.elements <- all.vars(frml[[3]])
  if (!all(frml.elements %in% names(l))) {
    stop(paste0('Not all formula elements found in names of input list. You need: ', paste(frml.elements, collapse = ', '))) }

  #Optionally get res from first raster within l
  if (missing(res)) {
    res <- raster::res(l[[1]][[1]])
  }

  raster.names <- names(l[[1]])
  #Regex expression that finds every character between two numbers as well as
  #at the beginning and end of numbers
  #which is then used as separating string for Date generation
  sep.string <- unique(unlist(stringr::str_extract_all(raster.names, '(?<=\\d)([^0-9])(?=\\d)')))
  start.string <- unique(unlist(stringr::str_extract_all(raster.names, '^[^0-9]*')))
  end.string <- unique(unlist(stringr::str_extract_all(raster.names, '[^0-9]+$')))

  #Make sure all the names have the same strings
  if (length(sep.string) > 1 | length(start.string) > 1 | length(end.string) > 1) {
    stop('No unique string found for raster names!') }

  #Transform raster names to dates and extract years and day of years
  date.format <- stringr::str_c(start.string,
                                stringr::str_c('%Y', '%m', '%d', sep = sep.string),
                                end.string)
  names.date <- as.Date(raster.names, format = date.format)
  years <- as.numeric(format(names.date, '%Y'))
  days <- as.numeric(format(names.date, '%j'))
  days[days == 366] <- 365

  #Iterate over the years and calculate the index for every year
  #The results are collected in a stack of raster files
  years.unique <- unique(years)
  output <- vector('list', length = length(years.unique))
  names(output) <- stringr::str_c(index.name, '_', years.unique)
  for (i in seq_along(years.unique)) {

    if (i == 1) { pb <- txtProgressBar(min = 0, max = length(years.unique), style = 3) }

    year <- years.unique[[i]]

    #Extract the index of dates in names.date that are within period
    raster.stack.index <- which(years == year & days %in% period)

    #Skip year if in this year no days during the period are present
    if (length(raster.stack.index) == 0) { next() }

    #Subset the raster bricks to only include the relevant rasters for this year
    l.sub <- lapply(l, raster::subset, raster.stack.index)

    #Calculate the daily rasters by evaluating the formula as string within l.sub
    index.daily <- eval(parse(text=frml.string), envir = l.sub)

    #Optionally set raster values below 0 to 0
    if (set.zero) {
      index.daily <- raster::calc(index.daily, function(x) { x[x < 0] <- 0; return(x) } )
    }

    #Summarize the daily rasters
    index.final <- raster::calc(index.daily, fun = agg.func)

    #Optionally project raster
    if (project) { index.final <- raster::projectRaster(index.final, crs = crs, res = res) }

    output[[i]] <- index.final

    setTxtProgressBar(pb, i)

  }

  #Remove null values from list
  null.index <- which(sapply(output, is.null))
  if (length(null.index) > 0) { output <- output[-null.index] }

  result.stack <- stack(output)

  return(result.stack)
}
sitscholl/rebecka_package documentation built on Aug. 25, 2020, 4:20 a.m.