#' Prepare dekad or pentad data in GeoCLIM format
#'
#' @description `prepare_geoclim` rearranges a data frame to a
#' format suitable for use in GeoCLIM. This data frame can be rearranged and
#' exported to a file or connection with `export_geoclim`.
#'
#' @param data \code{data.frame} The data.frame to calculate from.
#' @param year \code{character(1)} The name of the year column in \code{data}.
#' @param type_col \code{character(1)} The name of the dekad or pentad column in \code{data}.
#' @param element \code{character(1)} The name of the element column in \code{data} to apply the function to.
#' @param station_id \code{character(1)} The name of the station column in \code{metadata}, or \code{data} if \code{metadata = NULL}.
#' @param latitude \code{character(1)} The name of the latitude column in \code{metadata}, or \code{data} if \code{metadata = NULL}.
#' @param longitude \code{character(1)} The name of the longitude column in \code{metadata}, or \code{data} if \code{metadata = NULL}.
#' @param type \code{character(1)} Whether the data is in `dekad` or `pentad` format.
#' @param metadata \code{data.frame} The metadata data.frame to calculate from.
#' @param join_by \code{character} The variable(s) to merge the \code{data} and \code{metadata} data frames.
#' @param add_cols \code{character} Names of additional metadata columns that should be included in the output
#'
#' @return A data.frame formatted for use in GeoCLIM
#' @export
#'
#' @examples
#' # Calculate dekadal summaries for the rainfall column
#' dekad_data <- daily_niger %>% dplyr::mutate(dekad = dekad(date))
#' summary_data <- dekad_data %>% dplyr::group_by(station_name, year, dekad) %>%
#' dplyr::summarise(mean_rain = mean(rain, na.rm = TRUE))
#' prepare_geoclim(data = summary_data, year = "year",
#' station_id = "station_name",
#' type_col = "dekad",
#' element = "mean_rain", metadata = stations_niger,
#' join_by = "station_name",
#' latitude = "lat", longitude = "long")
prepare_geoclim <- function(data, year, type_col, element, station_id,
latitude, longitude, type = c("dekad", "pentad"),
metadata = NULL, join_by = NULL, add_cols = NULL) {
checkmate::assert_data_frame(data)
checkmate::assert_string(year)
assert_column_names(data, year)
type <- match.arg(type)
checkmate::assert_string(type_col)
assert_column_names(data, type_col)
checkmate::assert_string(element)
assert_column_names(data, element)
checkmate::assert_data_frame(metadata, null.ok = TRUE)
checkmate::assert_string(join_by, null.ok = TRUE)
if (is.null(names(join_by))) names(join_by) <- join_by
checkmate::assert_string(station_id)
checkmate::assert_string(latitude)
checkmate::assert_string(longitude)
# data_with_meta is whichever data.frame has the metadata columns in
if (is.null(metadata)) {
data_with_meta <- data
} else {
data_with_meta <- metadata
if (is.null(join_by)) stop("join_by must be specified when metadata is supplied.")
}
assert_column_names(data_with_meta, station_id)
assert_column_names(data_with_meta, latitude)
assert_column_names(data_with_meta, longitude)
unique_types <- unique(data[[type_col]])
if (type == "dekad") ntypes <- 36
else if (type == "pentad") ntypes <- 72
if (setequal(as.character(unique_types), as.character(1:ntypes))) {
data[[type_col]] <- factor(data[[type_col]],
levels = 1:ntypes,
labels = 1:ntypes)
} else {
if (is.factor(data[[type_col]]) && nlevels(data[[type_col]]) == ntypes) {
levels(data[[type_col]]) <- 1:ntypes
} else {
stop("Values in type column are not recognised. ",
"Values must be numbers 1 to ", ntypes,
" or a factor with ", ntypes, " levels.")
}
}
type_levels <- levels(data[[type_col]])
if (!is.null(metadata)) {
# Check that metadata has all stations in data
data_by <- unique(data[[names(join_by)]])
metadata_by <- unique(metadata[[as.vector(join_by)]])
if (!all(data_by %in% metadata_by)) {
stop("metadata is missing some values of '", join_by, "' found in data.")
}
# column names without the joining columns
names_data <- setdiff(names(data), names(join_by))
names_metadata <- setdiff(names(metadata), as.vector(join_by))
# names that are the same other than joining columns
# this will cause renaming issues when merging
same_names <- intersect(names_data, names_metadata)
# rename columns that are the same before joining
# to prevent renaming issues
for (col in c(station_id, latitude, longitude)) {
if (col %in% same_names) {
names(data)[names(data) == col] <- paste0(col, "_x")
}
}
data_with_meta <- dplyr::left_join(data, metadata, by = join_by)
} else {
data_with_meta <- data
}
id_cols <- c(station_id, latitude, longitude, add_cols, year)
data_with_meta[[element]] <- tidyr::replace_na(data_with_meta[[element]], -999)
geoclim_data <-
tidyr::pivot_wider(data_with_meta,
id_cols = tidyselect::all_of(id_cols),
names_from = tidyselect::all_of(type_col),
values_from = tidyselect::all_of(element),
values_fill = -999)
names(geoclim_data)[names(geoclim_data) == station_id] <- "id"
names(geoclim_data)[names(geoclim_data) == latitude] <- "lat"
names(geoclim_data)[names(geoclim_data) == longitude] <- "lon"
names(geoclim_data)[names(geoclim_data) == year] <- "year"
geoclim_data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.