R/process.R

#' Process valid models
#'
#' This function takes any models that are valid, identifies and characterizes
#' heat waves within projections for each of its ensemble members, writes
#' those dataframes of heat wave projections out to the user-specified output
#' directory, and stores information about the models and grid locations.
#'
#' @param model List with parsed information about the directory structure for
#'    a specific climate model from the user-specified projections directory.
#'    This list is a subset of the list generated by
#'    \code{acquireDirectoryStructure}.
#' @param global An list object created by \code{\link{gen_hw_set}} that
#'    includes user specifications (e.g., the path to the output directory, the
#'    path to the input climate projections, the dataframe with city
#'    locations).
#' @param custom An list object created by \code{\link{gen_hw_set}} that
#'    includes user specifications (e.g., the name of the R function to
#'    use to identify heat waves, alternative upper and lower year boundaries
#'    for the data used to determine threshold temperatures for the heat wave
#'    definition, alternative upper and lower year boundaries
#'    for the projection period of the heat wave datasets being generated).
#' @param accumulators The closure generated by
#'    \code{\link{createAccumulators}} that allows you to append model
#'    information and grid location data as you process through models to
#'    a growing list.
#' @inheritParams gen_hw_set
#'
#' @return A list object with processed projections, as well as various other
#'    elements needed for further processing to identify heat waves.
processModel <- function(model, global, custom, accumulators,
                         dataDirectories){

        # Acquire characteristics of each experiment of the model
        modelName <- model[[1]]
        histens <- model[[2]]
        rcpens <- model[[3]]

        # Add entry to the modelInfoAccumulator
        accumulators(command = "append model information",
                     newElement = data.frame(modelName,
                                             length(histens),
                                             length(rcpens)))

        # Acquire vector of threshold temperatures using the historical
        # ensemble for this model which possesses the name r1i1p1
        thresholdList <- processThresholds(model = model,
                                        global = global,
                                        custom = custom)
        thresholds <- thresholdList$thresholds
        out_locations <- thresholdList$out_locations
        out_locations$model <- modelName

        # Add locations to the locationList accumulator
        accumulators(command = "append location list",
                     newElement = out_locations)

        # If reference period differs from projection period, get data
        # for reference period
        if(custom$createHwDataframe[[1]]){
                referenceEnsemble <- processReference(model, global, custom)
                reference <- referenceEnsemble$series
                reference_dates <- referenceEnsemble$dates
        } else {
                reference <- FALSE
                reference_dates <- FALSE
        }

        # Process the projection data

        # First, check which of the two experiment subdirectories should be
        # used for the projections
        if(custom$getBounds[3] <= dataDirectories[[1]][2]){
                proj_ens <- histens
        } else {
                proj_ens <- rcpens
        }

        # Process the projection data for the files in the relevant directory
        projectionEnsembles <- lapply(proj_ens,
                               processProjections,
                               modelName = modelName,
                               ensembleWriter = createEnsembleWriter(modelName,
                                                                     global,
                                                                     custom),
                               global = global,
                               custom = custom,
                               thresholds = thresholds,
                               accumulators = accumulators,
                               reference = reference,
                               reference_dates = reference_dates)

        return(NULL)
}

#' Calculate threshold temperatures
#'
#' This function calculates the threshold temperatures required to identify
#' heat waves in the climate projection data using the ensemble member specified
#' by \code{threshold_ensemble} in \code{\link{gen_hw_set}}. This threshold is
#' used in later functions to identify heat waves.
#'
#' @inheritParams processModel
#'
#' @return A list with two elements: (1) a vector with one element for
#'    each city included in the user-specified city location
#'    file, with each element value giving the threshold temperature for the
#'    heat wave definition for a city, in the same order that cities
#'    are listed in the user-specified \code{citycsv} file and (2) the
#'    components of the dataframe of climate model grid locations that will
#'    ultimately be output as the climate model grid locations file.
processThresholds <- function(model, global, custom){
        mod_name <- model[[1]]

        # To find the threshold, use the first ensemble member within
        # the relevant directory, historical or rcp
        if(custom$getBounds[1] <= global$dataDirectories[[1]][2]){
                thresholdDirs_model <- model[[2]]
        } else {
                thresholdDirs_model <- model[[3]]
        }
        r1i1p1_i <- sapply(thresholdDirs_model, function(x) x[1]) ==
                global$threshold_ensemble
        thresholdDirs <- thresholdDirs_model[r1i1p1_i][[1]]

        cat("Processing thresholds for", mod_name, "\n")

        # Acquire characteristics of the first historical ensemble
        thresholdEnsemble <- processEnsemble(ensemble = thresholdDirs,
                                              modelName = mod_name,
                                              global = global,
                                              custom = custom,
                                              type = "threshold")

        # Calculate threshold temperatures using the user-specified
        # threshold percentile (default: 0.98)
        thresholds <- apply(thresholdEnsemble$series, 2, stats::quantile,
                            probs = custom$probThreshold)

        out_locations <- thresholdEnsemble$out_locations

        return(list(thresholds = thresholds,
                    out_locations = out_locations))
}

#' Get projection data for reference period
#'
#' This function is only run if the reference period is different from the
#' projection period. In that case, this function will acquire the time series
#' of projected temperatures during the indicated reference period and pass
#' that through to be used in a later function to characterize heat waves.
#'
#' @inheritParams processModel
#'
#' @return A list with, among other elements, a \code{series} element with
#' the time series of projected temperatures for the reference period for each
#' study city.
processReference <- function(model, global, custom){
        name <- model[[1]]

        # To find the threshold, use the first ensemble member within
        # the relevant directory, historical or rcp
        if(custom$processModel[1] <= global$dataDirectories[[1]][2]){
                referenceDirs_model <- model[[2]]
        } else {
                referenceDirs_model <- model[[3]]
        }
        r1i1p1_i <- sapply(referenceDirs_model, function(x) x[1]) ==
                global$threshold_ensemble
        referenceDirs <- referenceDirs_model[r1i1p1_i][[1]]

        cat("Processing references for", name, "\n")

        # Acquire characteristics of the first historical ensemble
        referenceEnsemble <- processEnsemble(ensemble = referenceDirs,
                                             modelName = name,
                                             global = global,
                                             custom = custom,
                                             type = "reference")

        return(referenceEnsemble)
}

#' Create heat wave dataframe for climate projection
#'
#' This function, for each ensemble member, creates the heat wave dataframe and
#' writes it to file. It also stores the locations for each ensemble.
#'
#' @param ensemble List with parsed information about the directory structure for
#'    a specific ensemble member from the user-specified projections directory.
#'    This list is a subset of the list generated by
#'    \code{\link{acquireDirectoryStructure}}.
#' @param ensembleWriter A closure created by \code{createEnsembleWriter} to
#'    write output from this process to a file within the output directory
#'    specified by the user with the \code{out} argument in
#'    \code{\link{gen_hw_set}}.
#' @param thresholds A vector with the thresholds to use within each city
#'    in that city's heat wave definition.These are typically automatically
#'    determine during the run of the \code{gen_hw_set} function.
#' @param reference FALSE, if the user has not specified custom reference
#'    boundaries through the \code{referenceBoundaries} argument in
#'    \code{\link{gen_hw_set}}, otherwise a dataframe with the time series of
#'    projected temperatures for the custom reference period for each study
#'    city.
#' @param reference_dates A numeric vector with the start and
#'    end years to use for the reference period
#' @inheritParams buildStructureModels
#' @inheritParams processModel
#' @inheritParams processThresholds
#'
#' @return This function writes every heat wave dataframe to a .csv and returns
#'    the ensemble member used as the reference.
processProjections <- function(ensemble, modelName, ensembleWriter,
                               thresholds, global, custom, accumulators,
                               reference, reference_dates){

        cat("Processing projections for", modelName, "\n")

        # Acquire desired characteristics of the projection ensemble
        ensembleSeries <- processEnsemble(ensemble = ensemble,
                                    modelName = modelName,
                                    global = global,
                                    custom = custom,
                                    type = "projections")

        ensembleSeries$reference <- reference
        ensembleSeries$reference_dates <- reference_dates

        # Acquire the heat wave dataframes for every ensemble in the
        # model
        hwFrame <- formHwFrame(ensembleSeries = ensembleSeries,
                               thresholds = thresholds,
                               global = global,
                               custom = custom)

        # Write every heat wave frame to a .csv
        ensembleWriter(hwFrame)

        # Return the ensemble used as the reference
        return(NULL)
}

#' Extract projections from ensemble member
#'
#' This function extracts the desired climate data from a single ensemble
#' member, from one of the two experiment subdirectories.
#'
#' @inheritParams processModel
#' @inheritParams buildStructureExperiments
#' @inheritParams buildStructureModels
#' @inheritParams processThresholds
#' @inheritParams processProjections
#' @inheritParams getBounds
#'
#' @return A list object with processed projections, as well as various other
#'    elements needed for further processing to identify heat waves.
#'
#' @note This function calls another function that uses Euclidean distance for
#'    the distance between each city given in the
#'    \code{citycsv} file and the nearest grid point for the climate model
#'    for the specified ensemble member.
processEnsemble <- function(ensemble, modelName, global, custom, type){

        # Read the ensemble data
        cat("Reading --->", ensemble[1], "\n")

        latlong <- readLatLong(ensemble = ensemble, global = global)
        climate_lats <- unique(latlong[ , 1])
        climate_longs <- unique(latlong[ , 2])

        times <- readTimes(ensemble = ensemble, global = global)

        cat("Read operation complete", "\n")

        # Find indices of the closest points of measurement
        locations <- apply(global$cities, 1, closest_point, latlong = latlong)
        out_locations <- cbind(global$cities, latlong[locations, ])
        colnames(out_locations)[4:5] <- c("lat_grid", "long_grid")

        # Acquire the boundaries for the time series
        # Structure: c(start index, end index, # of elements spanning
        # from start to end)
        bounds <- getBounds(times = times,
                            custom = custom,
                            type = type)
        start <- bounds[1]
        end <- bounds[2]

        # Get a vector containing the dates of days we are dealing with
        dates <- formDates(times = times,
                           bounds = bounds)

        # Acquire time series for every city
        tas <- readtas(ensemble = ensemble, global = global, locations = locations)
        series <- data.frame(tas[start:end, ])

        # If the user wants to identify periods below, rather than above,
        # a threshold, make all values in the series negative
        if(global$above_threshold == FALSE){
                series <- -1 * series
        }

        # Prepare return value
        ret <- list(locations = locations,
                    bounds = bounds,
                    series = series,
                    times = times,
                    dates = dates,
                    out_locations = out_locations)
        return(ret)
}

#' Find closest grid point to a city location
#'
#' This function identifies the closest grid points in a climate model to a
#'city based on the city's latitude and longitude using the
#' Euclidean distance.
#'
#' @param city A numeric vector containing the city's ID, latitude, and
#'    longitude, in that order.
#' @param latlong A dataframe of latitude and longitude of each
#'    climate grid cell, with latitudes in the first column and
#'    longitudes in the second column. The \code{\link{readLatLong}} function
#'    re-orders the columns of longitude and latitude to comply with this
#'    format if necessary.
#'
#' @return An index corresponding to the column in the climate projections
#'    dataframe for that climate model that is closest to the city's
#'    coordinates.
closest_point <- function(city, latlong){
        latitude <- as.double(city[2])
        longitude <- as.double(city[3])
        aSQ <- abs(latlong[,1] - latitude) ^ 2
        bSQ <- abs(latlong[,2] - longitude) ^ 2
        cSQ <- aSQ + bSQ
        c_distance <- sqrt(cSQ)
        index <- which.min(c_distance)
        return(index)
}

#' Acquire boundaries of time series data
#'
#' This function acquires the boundaries of time series data.
#'
#' @param times A dataframe containing the time data for one ensemble member's
#'    projections. Each row in this dataframe corresponds to the row with the
#'    same row number in the climate projection dataframe.
#' @param type A character vector giving the type of boundaries that are being
#'   checked. Possible values are "threshold", "projections", and "reference".
#' @inheritParams buildStructureExperiments
#' @inheritParams processModel
#'
#' @return A numeric vector containing the number of days spanned by the
#'    time period, the lower bound of the experiment time
#'    period as an index specifying the relevant row of the climate projection
#'    data; and an upper bound of the experiment time period as an index
#'    specifying the relevant row of the climate projection data.
getBounds <- function(times, custom, type){
        # Set boundaries

        if(type == "threshold"){
                start_time <- custom$getBounds[1]
                end_time <- custom$getBounds[2]
        } else if (type == "projections") {
                start_time <- custom$getBounds[3]
                end_time <- custom$getBounds[4]
        } else if (type == "reference"){
                start_time <- custom$processModel[1]
                end_time <- custom$processModel[2]
        }

        start <- min(which(start_time == times[ , 2]))
        end <- max(which(end_time == times[ , 2]))
        size <- end - (start - 1)

        return(c(start, end, size))
}

#' Create date vector for requested time period
#'
#' This function creates a vector that includes all the dates spanning the
#' requested date range.
#'
#' @param bounds A numeric vector with the desired time boundaries of the
#'    ensemble, in terms of indices referring to the first and last row
#'    in the climate projection data frame within the requested date range.
#' @inheritParams getBounds
#'
#' @return A vector of the desired time boundaries, formatted as year-month-day.
formDates <- function(times, bounds){
        start <- bounds[1]
        end <- bounds[2]
        dateComponents <- data.frame(times[start:end, 2:4])
        dates <- paste(dateComponents[,1],
                       dateComponents[,2],
                       dateComponents[,3],
                       sep = "-")
        dates <- as.Date(dates)
        return(dates)
}
geanders/futureheatwaves documentation built on May 17, 2019, 12:14 a.m.