#' @title Cube diagnostics
#'
#' @description This function takes two spatiotemporal data cubes and runs diagnostic checks on the cubes to ensure that they can be combined using the unite_cubes function in the CFT package. This function will stop execution and return an error
#' if the given data cubes cannot be combined using the CFT unite_cubes function and will return a data frame containing relevant diagnostic information about the two data cubes if the two cubes can be combined using the CFT unite_cubes function.
#'
#' @param cube1 A spatiotemporal data cube with one dependent variable and some number of independent variables (Data.frame)
#' @param cube2 A spatiotemporal data cube with one dependent variable and some number of independent variables (Data.frame)
#'
#' @importFrom magrittr %>%
#' @importFrom sf
#' @importFrom sp
#' @importFrom dplyr
#' @return Data.frame containing diagnostic information about the two data cubes
#' @export
cube_diagnostics <- function(cube1, cube2){
require(sf)
require(sp)
require(dplyr)
# define a vector of possible column names containing latitude data
ynames <- c('lat', 'latitude', 'Lat', 'Latitude', 'y')
# define a vector of possible column names containing longitude data
xnames <- c('lon', 'long', 'longitude', 'Lon', 'Long', 'Longitude', 'x')
# define a vector of possible column names containing time data
tnames <- c('time', 'day', 'hour', 'month', 'year', 'date')
# check if cube 1 is formatted as a data frame and if not, stop execution with an error
if (is.data.frame(cube1) == FALSE){
stop('Error: Cube 1 is not a dataframe. Please convert Cube 1 to a dataframe and try again.')
}
# check if cube 2 is formatted as a data frame and if not, stop execution with an error
if (is.data.frame(cube2) == FALSE){
stop('Error: Cube 2 is not a dataframe. Please convert Cube 2 to a dataframe and try again.')
}
# check if cube 1 is empty and if so, stop execution with an error
if (nrow(cube1) == 0 || ncol(cube1) == 0){
stop('Error: Cube 1 is empty. Add data to Cube 1 and try again.')
}
# check if cube 2 is empty and if so, stop execution with an error
if (nrow(cube2) == 0 || ncol(cube2) == 0){
stop('Error: Cube 2 is empty. Add data to Cube 2 and try again.')
}
#if (ncol(cube1) > 4){
#stop('Error: Cube 1 contains more than 1 variable. You will need to create separate data cubes (with lat/long/time data) for each variable in Cube 1 and then try again using each subcube separately.')
#}
#if (ncol(cube2) > 4){
#stop('Error: Cube 2 contains more than 1 variable. You will need to create separate data cubes (with lat/long/time data) for each variable in Cube 1 and then try again using each subcube separately.')
#}
# check if cube 1 contains latitude data with one of the previously specified column names and if not, stop execution with an error
if (any(c('lat', 'latitude', 'Lat', 'Latitude', 'y') %in% colnames(cube1)) == FALSE){
stop("Error: Cube 1 does not contain latitude data. Your cube must contain spatial data and your latitude data must be in a column labeled 'lat', 'latitude', 'Lat', or 'Latitude'.")
}
# check if cube 1 contains longitude data with one of the previously specified column names and if not, stop execution with an error
if (any(c('lon', 'long', 'longitude', 'Lon', 'Long', 'Longitude', 'x') %in% colnames(cube1)) == FALSE){
stop("Error: Cube 1 does not contain longitude data. Your cube must contain spatial data and your longitude data must be in a column labeled 'lon', 'long', 'longitude', 'Lon', 'Long', or 'Longitude'.")
}
# check if cube 1 contains time data with one of the previously specified column names and if not, stop execution with an error
if (any(c('time', 'day', 'hour', 'month', 'year', 'date') %in% colnames(cube1)) == FALSE){
stop("Error: Cube 1 does not contain time data. Your cube must contain temporal data and your time data must be in a column labeled 'time', 'day', 'hour', 'month', 'year', or 'date'.")
}
# check if cube 2 contains time data with one of the previously specified column names and if not, stop execution with an error
if (any(c('time', 'day', 'hour', 'month', 'year', 'date') %in% colnames(cube2)) == FALSE){
stop("Error: Cube 2 does not contain time data. Your cube must contain temporal data and your time data must be in a column labeled 'time', 'day', 'hour', 'month', 'year', or 'date'.")
}
# check if cube 2 contains latitude data with one of the previously specified column names and if not, stop execution with an error
if (any(c('lat', 'latitude', 'Lat', 'Latitude', 'y') %in% colnames(cube2)) == FALSE){
stop("Error: Cube 2 does not contain latitude data. Your cube must contain spatial data and your latitude data must be in a column labeled 'lat', 'latitude', 'Lat', or 'Latitude'.")
}
# check if cube 2 contains longitude data with one of the previously specified column names and if not, stop execution with an error
if (any(c('lon', 'long', 'longitude', 'Lon', 'Long', 'Longitude', 'x') %in% colnames(cube2)) == FALSE){
stop("Error: Cube 2 does not contain longitude data. Your cube must contain spatial data and your longitude data must be in a column labeled 'lon', 'long', 'longitude', 'Lon', 'Long', or 'Longitude'.")
}
# find the index of the column containing longitude data in cube 1
c1_xname <- which(colnames(cube1) %in% xnames)
# find the index of the column containing latitude data in cube 1
c1_yname <- which(colnames(cube1) %in% ynames)
# find the index of the column containing time data in cube 1
c1_tname <- which(colnames(cube1) %in% tnames)
# find the index of the column containing longitude data in cube 2
c2_xname <- which(colnames(cube2) %in% xnames)
# find the index of the column containing latitude data in cube 2
c2_yname <- which(colnames(cube2) %in% ynames)
# find the index of the column containing time data in cube 2
c2_tname <- which(colnames(cube2) %in% tnames)
# Dimension Check
# find the x dimension of cube 1, which corresponds to the length of unique longitude values in cube 1
c1_xdim <- length(unique(cube1[,c1_xname]))
# find the y dimension of cube 1, which corresponds to the length of unique latitude values in cube 1
c1_ydim <- length(unique(cube1[,c1_yname]))
# find the z dimension of cube 1, which corresponds to the length of unique time values in cube 1
c1_zdim <- length(unique(cube1[,c1_tname]))
# find the x dimension of cube 2, which corresponds to the length of unique longitude values in cube 2
c2_xdim <- length(unique(cube2[,c2_xname]))
# find the y dimension of cube 2, which corresponds to the length of unique latitude values in cube 2
c2_ydim <- length(unique(cube2[,c2_yname]))
# find the z dimension of cube 2, which corresponds to the length of unique time values in cube 2
c2_zdim <- length(unique(cube2[,c2_tname]))
# check if the x dimension of cube 1 and cube 2 are equal
xdim_check <- c1_xdim == c2_xdim
# check if the y dimension of cube 1 and cube 2 are equal
ydim_check <- c1_ydim == c2_ydim
# check if the z dimension of cube 1 and cube 2 are equal
zdim_check <- c1_zdim == c2_zdim
# find the difference between the x dimension of the smaller cube and the x dimension of the larger cube
xdim_dif <- abs(c1_xdim - c2_xdim)
# find the difference between the y dimension of the smaller cube and the y dimension of the larger cube
ydim_dif <- abs(c1_ydim - c2_ydim)
# find the difference between the z dimension of the smaller cube and the z dimension of the larger cube
zdim_dif <- abs(c1_zdim - c2_zdim)
# use the difference between the x dimension of cube 1 and the x dimension of cube 2 to determine which cube has the larger x dimension
if (c1_xdim - c2_xdim > 0){
xdim_larger <- 'Cube 1'
} else{
xdim_larger <- 'Cube 2'
}
# use the difference between the y dimension of cube 1 and the y dimension of cube 2 to determine which cube has the larger y dimension
if (c1_ydim - c2_ydim > 0){
ydim_larger <- 'Cube 1'
} else{
ydim_larger <- 'Cube 2'
}
# use the difference between the z dimension of cube 1 and the z dimension of cube 2 to determine which cube has the larger z dimension
if (c1_zdim - c2_zdim > 0){
zdim_larger <- 'Cube 1'
} else{
zdim_larger <- 'Cube 2'
}
# Resolution Check
# find the longitudinal resolution of cube 1
c1_xres <- resolution(cube1[,c1_xname])
# find the latitudinal resolution of cube 1
c1_yres <- resolution(cube1[,c1_yname])
# find the temporal resolution of cube 1
c1_tres <- resolution(cube1[,c1_tname])
# find the longitude resolution of cube 2
c2_xres <- resolution(cube2[,c2_xname])
# find the latitude resolution of cube 2
c2_yres <- resolution(cube2[,c2_yname])
# find the temporal resolution of cube 2
c2_tres <- resolution(cube2[,c2_tname])
# check whether the longitudinal resolution of cube 1 is the same as the longitudinal resolution of cube 2
xres_check <- c1_xres == c2_xres
# check whether the latitudinal resolution of cube 1 is the same as the latitudinal resolution of cube 2
yres_check <- c1_yres == c2_yres
# check whether the temporal resolution of cube 1 is the same as the temporal resolution of cube 2
tres_check <- c1_tres == c2_tres
# find the difference between the longitudinal resolution of the larger cube and the longitudinal resolution of the smaller cube
xres_dif <- abs(c1_xres - c2_xres)
# find the difference between the latitudinal resolution of the larger cube and the latitudinal resolution of the smaller cube
yres_dif <- abs(c1_yres - c2_yres)
# find the difference between the temporal resolution of the larger cube and the temporal resolution of the smaller cube
tres_dif <- abs(c1_tres - c2_tres)
# use the difference between the longitudinal resolution of cube 1 and the longitudinal resolution of cube 2 to determine which cube has the larger longitudinal resolution
if (c1_xres - c2_xres > 0){
xres_larger <- 'Cube 1'
} else{
xres_larger <- 'Cube 2'
}
# use the difference between the latitudinal resolution of cube 1 and the latitudinal resolution of cube 2 to determine which cube has the larger latitudinal resolution
if (c1_yres - c2_yres > 0){
yres_larger <- 'Cube 1'
} else{
yres_larger <- 'Cube 2'
}
# use the difference between the temporal resolution of cube 1 and the temporal resolution of cube 2 to determine which cube has the larger temporal resolution
if (c1_tres - c2_tres > 0){
tres_larger <- 'Cube 1'
} else{
tres_larger <- 'Cube 2'
}
# Data Types Check
# create a data frame of NA values with 1 row and the same number of columns as cube 1 to hold the data type of each column in cube 1
c1_dtypes <- data.frame(matrix(NA, nrow = 1, ncol = length(colnames(cube1))))
# iterate over the columns in cube 1, adding the data type of each column in cube 1 to its respective column in the new data frame
for (i in 1:length(colnames(cube1))){
c1_dtypes[1,i] <- typeof(cube1[,i])
}
# set the column names of the data frame to be the column names of cube 1
colnames(c1_dtypes) <- colnames(cube1)
# create a data frame of NA values with 1 row and the same number of columns as cube 2 to hold the data type of each column in cube 2
c2_dtypes <- data.frame(matrix(NA, nrow = 1, ncol = length(colnames(cube2))))
# iterate over the columns in cube 2, adding the data type of each column in cube 2 to its respective column in the new data frame
for (j in 1:length(colnames(cube2))){
c2_dtypes[1,j] <- typeof(cube2[,j])
}
# se the column names of the data frame to the column names of cube 2
colnames(c2_dtypes) <- colnames(cube2)
# Convert to Spatial Objects - this is done to reproject the data to a standard long/lat projection
# convert cube 1 to an SF object using the longitude and latitude columns as the coordinates and a long/lat WGS84 projection
cube1_sf <- st_as_sf(x = cube1, coords = c(colnames(cube1)[c1_xname], colnames(cube1)[c1_yname]), crs = '+proj=longlat +datum=WGS84')
# convert cube 2 to an SF object using the longitude and latitude columns as the coordinates and a long/lat WGS84 projection
cube2_sf <- st_as_sf(x = cube2, coords = c(colnames(cube2)[c2_xname], colnames(cube2)[c2_yname]), crs = '+proj=longlat +datum=WGS84')
# convert cube 1 back into a data frame putting the points from the SF geometry into two columns named lon and lat for longitude and latitude values, respectively
cube1 <- tidyr::extract(cube1_sf, geometry, into = c('lon', 'lat'), '\\((.*),(.*)\\)')
# convert cube 2 back into a data frame putting the points from the SF geometry into two columns named lon and lat for longitude and latitude values, respectively
cube2 <- tidyr::extract(cube2_sf, geometry, into = c('lon', 'lat'), '\\((.*),(.*)\\)')
# Extent Check
# find the minimum and maximum longitude value in cube 1
c1_xmin <- min(cube1$lon)
c1_xmax <- max(cube1$lon)
# find the minimum and maximum latitude value in cube 1
c1_ymin <- min(cube1$lat)
c1_ymax <- max(cube1$lat)
# find the minimum and maximum longitude value in cube 2
c2_xmin <- min(cube2$lon)
c2_xmax <- max(cube2$lon)
# find the minimum and maximum latitude value in cube 2
c2_ymin <- min(cube2$lat)
c2_ymax <- max(cube2$lat)
# check whether the minimum longitude value in cube 1 is equal to the minimum longitude value in cube 2
xmin_check <- c1_xmin == c2_xmin
# check whether the maximum longitude value in cube 1 is equal to the maximum longitude value in cube 2
xmax_check <- c1_xmax == c2_xmax
# check whether the minimum latitude value in cube 1 is equal to the minimum latitude value in cube 2
ymin_check <- c1_ymin == c2_ymin
# check whether the maximum latitude value in cube 1 is equal to the maximum latitude value in cube 2
ymax_check <- c1_ymax == c2_ymax
# find the difference between the minimum longitude value of the larger cube and the minimum longitude value of the smaller cube
xmin_dif <- abs(c1_xmin - c2_xmin)
# find the difference between the maximum longitude value of the larger cube and the maximum longitude value of the smaller cube
xmax_dif <- abs(c1_xmax - c2_xmax)
# find the difference between the minimum latitude value of the larger cube and the minimum latitude value of the smaller cube
ymin_dif <- abs(c1_ymin - c2_ymin)
# find the difference between the maximum latitude value of the larger cube and the maximum latitude value of the smaller cube
ymax_dif <- abs(c1_ymax - c2_ymax)
# determine which cube has a smaller minimum longitude value
if (c1_xmin < c2_xmin){
xmin_larger <- 'Cube 1'
} else{
xmin_larger <- 'Cube 2'
}
# determine which cube has a larger maximum longitude value
if (c1_xmax > c2_xmax){
xmax_larger <- 'Cube 1'
} else{
xmax_larger <- 'Cube 2'
}
# determine which cube has a smaller minimum latitude value
if (c1_ymin < c2_ymin){
ymin_larger <- 'Cube 1'
} else{
ymin_larger <- 'Cube 2'
}
# determine which cube has a larger maximum latitude value
if (c1_ymax > c2_ymax){
ymax_larger <- 'Cube 1'
} else{
ymax_larger <- 'Cube 2'
}
# create a vector of row names for the data frame of diagnostic information about the two data cubes
description <- c('x Dimension', 'y Dimension', 'Time', 'x Resolution', 'y Resolution', 'Temporal Resolution', 'x Min', 'x Max', 'y Min', 'y Max')
# create a vector containing boolean values for all of the checks performed above
check_vals <- c(xdim_check, ydim_check, zdim_check, xres_check, yres_check, tres_check, xmin_check, xmax_check, ymin_check, ymax_check)
# create a vector containing the values of the dimensions, resolutions, and min and max values for cube 1
cube1_vals <- c(c1_xdim, c1_ydim, c1_zdim, c1_xres, c1_yres, c1_tres, c1_xmin, c1_xmax, c1_ymin, c1_ymax)
# create a vector containing the values of the dimensions, resolutions, and min and max values for cube 2
cube2_vals <- c(c2_xdim, c2_ydim, c2_zdim, c2_xres, c2_yres, c2_tres, c2_xmin, c2_xmax, c2_ymin, c2_ymax)
# create a vector containing the absolute value of the difference of each dimension, resolution, and min and max values between the two cubes
dif_vals <- c(xdim_dif, ydim_dif, zdim_dif, xres_dif, yres_dif, tres_dif, xmin_dif, xmax_dif, ymin_dif, ymax_dif)
# create a vector containing strings identifying which cube has the larger dimension, resolution, and min and max value
larger_vals <- c(xdim_larger, ydim_larger, zdim_larger, xres_larger, yres_larger, tres_larger, xmin_larger, xmax_larger, ymin_larger, ymax_larger)
# create a data frame of diagnostic values for the two data cubes containing the vectors defined above
diagnostics <- data.frame(description, check_vals, cube1_vals, cube2_vals, dif_vals, larger_vals)
# set the column names for the data frame of diagnostic values for the two data cubes
colnames(diagnostics) <- c('Description', 'Check', 'Cube 1 Value', 'Cube 2 Value', 'Difference', 'Larger Cube')
# create a list containing the data frame with diagnostic information about the two data cubes, the data frame of data types for the columns in cube 1, and the data frame of data types for the columns in cube 2
vals <- list(diag = diagnostics, c1 = c1_dtypes, c2 = c2_dtypes)
# return the list containing the return values
return(vals)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.