Nothing
################################################################################
#' Scores for FLUXNET reference data in NetCDF format
#' @description This function compares model output against
#' measurements from FLUXNET or from the Baseline Surface Radiation Networking in
#' NetCDF format. The performance of a model is expressed through scores that range
#' from zero to one, where increasing values imply better performance. These scores
#' are computed in five steps:
#' \eqn{(i)} computation of a statistical metric,
#' \eqn{(ii)} nondimensionalization,
#' \eqn{(iii)} conversion to unit interval,
#' \eqn{(iv)} spatial integration, and
#' \eqn{(v)} averaging scores computed from different statistical metrics.
#' The latter includes the bias, root-mean-square error, phase shift,
#' inter-annual variability, and spatial distribution. The corresponding equations
#' are documented in \code{\link{amber-package}}.
#'
#' @param long.name A string that gives the full name of the variable, e.g. 'Gross primary productivity'
#' @param nc.mod A string that gives the path and name of the netcdf file that contains the model output, e.g. '/home/model_gpp.nc'
#' @param nc.ref A string that gives the path and name of the netcdf file that contains the reference data output, e.g. '/home/reference_gpp.nc'
#' @param mod.id A string that identifies the source of the reference data set, e.g. 'CanESM2'
#' @param ref.id A string that identifies the source of the reference data set, e.g. 'MODIS'
#' @param unit.conv.mod A number that is used as a factor to convert the unit of the model data, e.g. 86400
#' @param unit.conv.ref A number that is used as a factor to convert the unit of the reference data, e.g. 86400
#' @param variable.unit A string that gives the final units using LaTeX notation, e.g. 'gC m$^{-2}$ day$^{-1}$'
#' @param score.weights R object that gives the weights of each score (\eqn{S_{bias}}, \eqn{S_{rmse}}, \eqn{S_{phase}}, \eqn{S_{iav}}, \eqn{S_{dist}})
#' that are used for computing the overall score, e.g. c(1,2,1,1,1)
#' @param rotate.me logical: TRUE if you want longitudes to range from -180 to
#' 180 degrees and FALSE if you want longitudes to range from 0 to 360 degrees
#' @param irregular logical: TRUE if data is on an irregular grid and FALSE if
#' data is on a regular grid
#' @param my.projection A string that gives the projection of the irregular grid
#' @param shp.filename A string that gives the coastline shapefile
#' @param my.xlim An R object that gives the longitude range that you wish to
#' plot, e.g. c(-180, 180)
#' @param my.ylim An R object that gives the longitude range that you wish to
#' plot, e.g. c(-90, 90)
#' @param plot.width Number that gives the plot width, e.g. 8
#' @param plot.height Number that gives the plot height, e.g. 4
#' @param numCores An integer that defines the number of cores, e.g. 2
#' @param outputDir A string that gives the output directory, e.g. '/home/project/study'. The output will only be written if the user specifies an output directory.
#' @param phaseMinMax A string (either 'phaseMax' or 'phaseMin') that determines
#' whether to assess the seasonal peak as a maximum or a minimum. The latter may be appropriate for variables
#' that tend to be negative, such as net longwave radiation or net ecosystem exchange.
#' @return (1) Figures in PDF format that show global maps of
#' the model data at the location of FLUXNET sites
#' (mean, \eqn{mod.mean}; interannual-variability, \eqn{mod.iav}; month of annual cycle maximum, \eqn{mod.max.month}),
#' the reference data
#' (mean, \eqn{ref.mean}; interannual-variability, \eqn{ref.iav}; month of annual cycle maximum, \eqn{ref.max.month}),
#' statistical metrics
#' (bias, \eqn{bias}; centralized root mean square error, \eqn{crmse}; time difference of the annual cycle maximum, \eqn{phase}),
#' and scores
#' (bias score, \eqn{bias.score}; root mean square error score, \eqn{rmse.score}; inter-annual variability score \eqn{iav.score}; annual cycle score (\eqn{phase.score}).
#'
#' (2) Four text files: (i) score values and (ii) score inputs for each individual
#' site, and (iii) score values and (iv) score inputs averaged across sites.
#' when averaging over all station.
#' @examples
#' library(amber)
#' library(classInt)
#' library(doParallel)
#' library(foreach)
#' library(Hmisc)
#' library(latex2exp)
#' library(ncdf4)
#' library(parallel)
#' library(raster)
#' library(rgdal)
#' library(rgeos)
#' library(scico)
#' library(sp)
#' library(stats)
#' library(utils)
#' library(viridis)
#' library(xtable)
#'
#' long.name <- 'Gross primary productivity'
#' nc.mod <- system.file('extdata/modelRegular', 'gpp_monthly.nc', package = 'amber')
#' nc.ref <- system.file('extdata/referenceRegular', 'gpp_FLUXNET.nc', package = 'amber')
#' mod.id <- 'CLASSIC' # define a model experiment ID
#' ref.id <- 'FLUXNET' # give reference dataset a name
#' unit.conv.mod <- 86400*1000 # optional unit conversion for model data
#' unit.conv.ref <- 86400*1000 # optional unit conversion for reference data
#' variable.unit <- 'gC m$^{-2}$ day$^{-1}$' # unit after conversion (LaTeX notation)
#' score.weights <- c(1,2,1,1,1) # score weights of S_bias, S_rmse, S_phase, S_iav, S_dist
#' # global plot
#' scores.fluxnet.nc(long.name, nc.mod, nc.ref, mod.id, ref.id, unit.conv.mod,
#' unit.conv.ref, variable.unit, score.weights)
#'
#' \donttest{
#' # regional plot
#' scores.fluxnet.nc(long.name, nc.mod, nc.ref, mod.id, ref.id, unit.conv.mod,
#' unit.conv.ref, variable.unit, score.weights, my.xlim = c(-150, -60), my.ylim = c(20, 60),
#' plot.width = 6, plot.height = 3.8)
#' scores.fluxnet.nc(long.name, nc.mod, nc.ref, mod.id, ref.id, unit.conv.mod,
#' unit.conv.ref, variable.unit, score.weights)
#'
#' # (2) Example for data on a rotated grid
#' nc.mod <- system.file('extdata/modelRotated', 'gpp_monthly.nc', package = 'amber')
#' nc.ref <- system.file('extdata/referenceRegular', 'gpp_FLUXNET.nc', package = 'amber')
#' mod.id <- 'CLASSIC' # define a model experiment ID
#' ref.id <- 'FLUXNET' # give reference dataset a name
#' unit.conv.mod <- 86400*1000 # optional unit conversion for model data
#' unit.conv.ref <- 86400*1000 # optional unit conversion for reference data
#' variable.unit <- 'gC m$^{-2}$ day$^{-1}$' # unit after conversion (LaTeX notation)
#' score.weights <- c(1,2,1,1,1) # score weights of S_bias, S_rmse, S_phase, S_iav, S_dist
#' rotate.me <- FALSE
#' irregular <- TRUE
#' my.projection <-'+proj=ob_tran +o_proj=longlat +o_lon_p=83. +o_lat_p=42.5 +lon_0=263.'
#' # shp.filename <- system.file('extdata/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp',
#' # package = 'amber')
#' shp.filename <- system.file("extdata/ne_110m_land/ne_110m_land.shp", package = "amber")
#' my.xlim <- c(-171, 0) # longitude range that you wish to plot
#' my.ylim <- c(32, 78) # latitude range that you wish to plot
#' plot.width <- 7 # plot width
#' plot.height <- 3.8 # plot height
#' numCores = 2
#'
#'scores.fluxnet.nc(long.name, nc.mod, nc.ref, mod.id, ref.id, unit.conv.mod,
#' unit.conv.ref, variable.unit, score.weights, rotate.me, irregular,
#' my.projection,
#' shp.filename, my.xlim, my.ylim)
#' } #donttest
#' @export
scores.fluxnet.nc <- function(long.name, nc.mod, nc.ref, mod.id, ref.id, unit.conv.mod, unit.conv.ref, variable.unit, score.weights,
rotate.me = TRUE, irregular = FALSE, my.projection = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0",
shp.filename = system.file("extdata/ne_110m_land/ne_110m_land.shp", package = "amber"), my.xlim = c(-180, 180),
my.ylim = c(-60, 85), plot.width = 8, plot.height = 3.8, numCores = 2, outputDir = FALSE, phaseMinMax = "phaseMax") {
# (I) Data preparation -----------------------------------------------------
# (1) Reproject data from an irregular to a regular grid if 'irregular=TRUE'
if (irregular == TRUE) {
regular <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
rotated <- my.projection
# get variable name
nc <- ncdf4::nc_open(nc.mod)
variable.name <- base::names(nc[["var"]])
variable.name <- variable.name[1]
variable.name <- ifelse(variable.name == "burntFractionAll", "burnt", variable.name) # rename burntFractionAll to shorter name
variable.name <- toupper(variable.name) # make variable name upper-case
ncdf4::nc_close(nc)
# process model data
my.nc <- nc.mod
nc <- ncdf4::nc_open(my.nc)
data <- ncdf4::ncvar_get(nc) # get values
lon <- ncdf4::ncvar_get(nc, "lon")
lat <- ncdf4::ncvar_get(nc, "lat")
time <- ncdf4::ncvar_get(nc, "time")
#
nCol <- base::length(lon[, 1])
nRow <- base::length(lon[1, ])
nTime <- base::length(time)
# compute dates
origin <- ncdf4::ncatt_get(nc, "time", attname = "units")[2]
origin <- base::strsplit(origin$value, " ")
origin <- base::unlist(origin)[3]
start.date <- base::as.Date(origin) + time[1] + 30 # the result is consitent with cdo sinfo
dates <- base::seq(as.Date(start.date), by = "month", length = nTime)
start.date <- min(dates)
end.date <- max(dates)
start.date <- format(as.Date(start.date), "%Y-%m")
end.date <- format(as.Date(end.date), "%Y-%m")
lon <- base::matrix(lon, ncol = 1)
lat <- base::matrix(lat, ncol = 1)
lonLat <- base::data.frame(lon, lat)
sp::coordinates(lonLat) <- ~lon + lat
suppressWarnings(raster::projection(lonLat) <- regular)
suppressWarnings(lonLat <- sp::spTransform(lonLat, sp::CRS(rotated)))
myExtent <- raster::extent(lonLat)
# create an empty raster
r <- raster::raster(ncols = nCol, nrows = nRow) # empty raster
# create a raster by looping through all time steps
cl <- parallel::makePSOCKcluster(numCores)
doParallel::registerDoParallel(cl)
myRaster <- foreach::foreach(i = 1:nTime) %dopar% {
myValues <- data[, , i]
myValues <- base::apply(base::t(myValues), 2, rev) # rotate values
r <- raster::setValues(r, myValues) # assign values
raster::extent(r) <- myExtent # extent using the rotated projection
r <- r * 1 # this is necessary for the base::do.call function below
}
myStack <- base::do.call(raster::stack, myRaster)
parallel::stopCluster(cl)
myStack <- raster::setZ(myStack, dates, name = "time")
names(myStack) <- dates
mod <- myStack
start.date.mod <- start.date
end.date.mod <- end.date
# process reference data
nc <- ncdf4::nc_open(nc.ref)
variable.name.ref <- names(nc[["var"]])
variable.name.ref <- variable.name.ref[length(variable.name.ref)] # take the last variable
# get date
time <- nc$dim[[1]] # days since 1850-01-01 00:00:00
n <- length(time$vals) # number of months in file
origin <- strsplit(time$units, " ")
origin <- unlist(origin)[3]
start.date.ref <- as.Date(origin) + time$vals[1] + 30 # the result is consitent with cdo sinfo
dates.ref <- seq(as.Date(start.date.ref), by = "month", length = n)
dates.ref <- format(as.Date(dates.ref), "%Y-%m") # only year and month
start.date.ref <- min(dates.ref)
end.date.ref <- max(dates.ref)
# convert data to data frame
lon <- ncdf4::ncvar_get(nc, varid = "lon")
lat <- ncdf4::ncvar_get(nc, varid = "lat")
lon <- base::matrix(lon, ncol = 1)
lat <- base::matrix(lat, ncol = 1)
# reproject coordinates to rotated grid
lonLat <- base::data.frame(lon, lat)
sp::coordinates(lonLat) <- ~lon + lat
suppressWarnings(raster::projection(lonLat) <- regular)
suppressWarnings(lonLat <- sp::spTransform(lonLat, sp::CRS(rotated)))
lon <- sp::coordinates(lonLat)[, 1]
lat <- sp::coordinates(lonLat)[, 2]
data <- ncdf4::ncvar_get(nc, varid = variable.name.ref)
ncdf4::nc_close(nc)
data <- data.frame(lon, lat, data * unit.conv.ref)
colnames(data) <- c("lon", "lat", dates.ref)
site.id <- paste("site", seq(1, nrow(data), 1), sep = "_")
rownames(data) <- site.id
ref <- data
} else {
# (2) Process data if 'irregular=FALSE' --------------------------------
# model data
nc <- ncdf4::nc_open(nc.mod)
variable.name <- names(nc[["var"]])
ncdf4::nc_close(nc)
variable.name <- variable.name[length(variable.name)] # take the last variable (relevant for CanESM5)
variable.name <- ifelse(variable.name == "burntFractionAll", "burnt", variable.name) # rename burntFractionAll to shorter name
variable.name <- toupper(variable.name) # make variable name upper-case
mod <- raster::brick(nc.mod)
dates.mod <- raster::getZ(mod)
dates.mod <- format(as.Date(dates.mod), "%Y-%m") # only year and month
start.date.mod <- min(dates.mod)
end.date.mod <- max(dates.mod)
myExtent <- c(-180, 180, -90, 90) # used later
# reference data
nc <- ncdf4::nc_open(nc.ref)
variable.name.ref <- names(nc[["var"]])
variable.name.ref <- variable.name.ref[length(variable.name.ref)] # take the last variable
# get date
time <- nc$dim[[1]] # days since 1850-01-01 00:00:00
n <- length(time$vals) # number of months in file
origin <- strsplit(time$units, " ")
origin <- unlist(origin)[3]
start.date.ref <- as.Date(origin) + time$vals[1] + 30 # the result is consitent with cdo sinfo
dates.ref <- seq(as.Date(start.date.ref), by = "month", length = n)
dates.ref <- format(as.Date(dates.ref), "%Y-%m") # only year and month
start.date.ref <- min(dates.ref)
end.date.ref <- max(dates.ref)
# convert data to data frame
lon <- ncdf4::ncvar_get(nc, varid = "lon")
lat <- ncdf4::ncvar_get(nc, varid = "lat")
data <- ncdf4::ncvar_get(nc, varid = variable.name.ref)
ncdf4::nc_close(nc)
data <- data.frame(lon, lat, data * unit.conv.ref)
colnames(data) <- c("lon", "lat", dates.ref)
site.id <- paste("site", seq(1, nrow(data), 1), sep = "_")
rownames(data) <- site.id
ref <- data
}
#---------------------------------------------------------------------------
# find common time period
start.date <- max(start.date.mod, start.date.ref)
end.date <- min(end.date.mod, end.date.ref)
# subset common time period for model data
mod <- mod[[which(format(as.Date(raster::getZ(mod)), "%Y-%m") >= start.date & format(as.Date(raster::getZ(mod)), "%Y-%m") <=
end.date)]]
# subset common time period for reference data get a sequence of common dates
dates <- seq(from = as.Date(paste(start.date, "15", sep = "-")), to = as.Date(paste(end.date, "15", sep = "-")), by = "month")
dates <- format(as.Date(dates), "%Y-%m")
common.dates <- data.frame(dates)
colnames(common.dates) <- "dates"
## add an index to each date
index <- seq(1, length(dates.ref), 1)
dates.ref <- data.frame(index, dates.ref)
colnames(dates.ref) <- c("index", "dates")
## merge common dates and reference dates the index will be used to subset the reference data set if the time covered by the
## reference data exceeds the time covered by the model data
column.id <- merge(dates.ref, common.dates, by = "dates")
column.id <- column.id$index
ref <- data.frame(ref[2 + column.id]) # 2+ because column 1 and 2 are are lon and lat
colnames(ref) <- c(dates)
ref <- intFun.site.points(lon, lat, ref)
# compute statistics for each site by extracting model data for each site
if (rotate.me == TRUE) {
suppressWarnings(mod <- raster::rotate(mod))
}
mod <- mod * unit.conv.mod
mod <- raster::extract(mod, ref, method = "simple")
mod <- data.frame(mod)
rownames(mod) <- site.id
ref <- as.data.frame(ref)
ref <- ref[-c(1, 2)] # drop lon lat column
# check whether number of columns and rows match between mod and ref
stopifnot(ncol(ref) == ncol(mod))
stopifnot(nrow(ref) == nrow(mod))
# make a string that summarizes metadata
meta.data.mod <- paste(variable.name, mod.id, "from", start.date, "to", end.date, sep = "_")
meta.data.ref <- paste(variable.name, ref.id, "from", start.date, "to", end.date, sep = "_")
meta.data.com <- paste(variable.name, mod.id, "vs", ref.id, "from", start.date, "to", end.date, sep = "_")
# statistical analysis the time period of the reference data varies among sites some sites are located in the model ocean
# exclude data in one data set that is not available in the other data set
mask.mod <- (mod - mod + 1)
mask.ref <- (ref - ref + 1)
mask <- mask.mod * mask.ref
mod <- mod * mask
ref <- ref * mask
# get number of years of data coverage of each station
n.months <- apply(mask, 1, sum, na.rm = TRUE)
n.years <- round(n.months/12)
n.years <- intFun.site.points(lon, lat, n.years)
# transpose data, where rows are dates and columns are sites this is necessary for using mapply below
mod <- data.frame(t(mod))
ref <- data.frame(t(ref))
# (1) bias
mod.mean <- apply(mod, 2, mean, na.rm = TRUE) # time mean
ref.mean <- apply(ref, 2, mean, na.rm = TRUE) # time mean
weights <- ref.mean # weights used for spatial integral
bias <- mod.mean - ref.mean # time mean
ref.sd <- apply(ref, 2, sd, na.rm = TRUE) # standard deviation of reference data
epsilon_bias <- abs(bias)/ref.sd
epsilon_bias[epsilon_bias == Inf] <- NA # relative error
bias.score <- exp(-epsilon_bias) # bias score as a function of space
S_bias_not.weighted <- mean(bias.score, na.rm = TRUE) # scalar score (not weighted)
# calculate the weighted scalar score
a <- bias.score * weights
b <- sum(a, na.rm = TRUE) # this is a scalar, the sum of all values
S_bias_weighted <- b/sum(weights, na.rm = TRUE) # scalar score (weighted)
# compute global mean values of score input(s)
mod.mean.scalar <- mean(mod.mean, na.rm = TRUE) # global mean value
ref.mean.scalar <- mean(ref.mean, na.rm = TRUE) # global mean value
bias.scalar <- mean(bias, na.rm = TRUE) # global mean value
bias.scalar.rel <- (mod.mean.scalar - ref.mean.scalar)/abs(ref.mean.scalar) * 100
ref.sd.scalar <- mean(ref.sd, na.rm = TRUE)
epsilon_bias.scalar <- stats::median(epsilon_bias, na.rm = TRUE)
# (2) root mean square error (rmse)
rmse <- mapply(intFun.rmse, mod, ref) # compute rmse
mod.anom <- data.frame(apply(mod, 2, intFun.anom)) # compute anomalies
ref.anom <- data.frame(apply(ref, 2, intFun.anom)) # compute anomalies
crmse <- mapply(intFun.crmse, mod.anom, ref.anom)
epsilon_rmse <- crmse/ref.sd
epsilon_rmse[epsilon_rmse == Inf] <- NA # relative error
rmse.score <- exp(-epsilon_rmse) # rmse score as a function of space
S_rmse_not.weighted <- mean(rmse.score, na.rm = TRUE) # scalar score (not weighted)
# calculate the weighted scalar score
a <- rmse.score * weights # this is a raster
b <- sum(a, na.rm = TRUE) # this is a scalar, the sum up all values
S_rmse_weighted <- b/sum(weights, na.rm = TRUE) # scalar score (weighted)
# compute global mean values of score input(s)
rmse.scalar <- mean(rmse, na.rm = TRUE) # global mean value
crmse.scalar <- mean(crmse, na.rm = TRUE) # global mean value
epsilon_rmse.scalar <- stats::median(epsilon_rmse, na.rm = TRUE)
# (3) phase shift make monthly means month index is the same for mod and ref, given the temporal subset made above
month <- format(as.Date(paste(dates, "15", sep = "-")), format = "%m")
month <- as.numeric(month)
mod <- data.frame(month, mod)
ref <- data.frame(month, ref)
# compute climatological mean monthly values
index <- list(mod$month)
mod.clim.mly <- apply(mod, 2, function(x) {
tapply(x, index, mean, na.rm = TRUE)
})
ref.clim.mly <- apply(ref, 2, function(x) {
tapply(x, index, mean, na.rm = TRUE)
})
mod.clim.mly.mm <- mod.clim.mly # will be used when computing iav further below
ref.clim.mly.mm <- ref.clim.mly # will be used when computing iav further below
# drop 'month' column
mod.clim.mly <- subset(mod.clim.mly, select = -c(month))
ref.clim.mly <- subset(ref.clim.mly, select = -c(month))
# find month of seasonal peak
# In most cases, we are interested in the timing of the seasonal maximum value
# In some cases, however, the seasonal peak is a minimum, e.g. NEE = RECO - GPP
if (phaseMinMax == "phaseMax") {
mod.max.month <- apply(mod.clim.mly, 2, which.max)
ref.max.month <- apply(ref.clim.mly, 2, which.max)
}
if (phaseMinMax == "phaseMin") {
mod.max.month <- apply(mod.clim.mly, 2, which.min)
ref.max.month <- apply(ref.clim.mly, 2, which.min)
}
mod.max.month <- as.numeric(mod.max.month)
mod.max.month <- data.frame(as.numeric(mod.max.month))
ref.max.month <- data.frame(as.numeric(ref.max.month))
# get shortest time distance between these months
abs.diff <- abs(mod.max.month - ref.max.month) # absolute difference from 0 to 12 months
phase <- apply(abs.diff, 2, intFun.theta) # shortest distance from 0 to 6 months (theta)
phase.score <- 0.5 * (1 + cos(2 * pi * phase/12)) # score from 0 (6 months) to 1 (0 months)
S_phase_not.weighted <- mean(phase.score, na.rm = TRUE) # scalar score (not weighted)
# calculate the weighted scalar score
a <- phase.score * weights # this is spatial data
b <- sum(a, na.rm = TRUE) # this is a scalar, the sum up all values
S_phase_weighted <- b/sum(weights, na.rm = TRUE) # scalar score (weighted)
# compute global mean values of score input(s)
phase.scalar <- mean(phase, na.rm = TRUE) # global mean value
mod.max.month.scalar <- mean(mod.max.month[, 1], na.rm = TRUE) # global mean value
ref.max.month.scalar <- mean(ref.max.month[, 1], na.rm = TRUE) # global mean value
# (4) interannual variability
# This approach assumes that all data start in Jan all months after the last Dec will be dropped if data does not end in Dec
mod.anom <- intFun.anom.mly(mod, mod.clim.mly.mm)
ref.anom <- intFun.anom.mly(ref, ref.clim.mly.mm)
mod.iav <- apply(mod.anom, 2, intFun.iav)
ref.iav <- apply(ref.anom, 2, intFun.iav)
# set values close to zero to NA
ref.iav.na <- ref.iav
ref.iav.na[ref.iav.na < 10^(-5)] <- NA
epsilon_iav <- abs((mod.iav - ref.iav))/ref.iav.na
epsilon_iav[epsilon_iav == Inf] <- NA # I changed Eq. 26 so that epsilon_iav > = 0
iav.score <- exp(-epsilon_iav) # iav score as a function of space
S_iav_not.weighted <- mean(iav.score, na.rm = TRUE) # scalar score (not weighted)
# calculate the weighted scalar score
a <- iav.score * weights # this is a raster
b <- sum(a, na.rm = TRUE) # this is a scalar, the sum up all values
S_iav_weighted <- b/sum(weights, na.rm = TRUE) # scalar score (weighted)
# compute global mean values of score input(s)
mod.iav.scalar <- mean(mod.iav, na.rm = TRUE) # global mean value
ref.iav.scalar <- mean(ref.iav, na.rm = TRUE) # global mean value
epsilon_iav.scalar <- stats::median(epsilon_iav, na.rm = TRUE) # global mean value
# (5) dist
mod.sigma.scalar <- sd(mod.mean, na.rm = TRUE) # standard deviation of period mean data
ref.sigma.scalar <- sd(ref.mean, na.rm = TRUE) # standard deviation of period mean data
sigma <- mod.sigma.scalar/ref.sigma.scalar
y <- mod.mean
x <- ref.mean
reg <- stats::lm(y ~ x)
R <- sqrt(summary(reg)$r.squared)
S_dist <- 2 * (1 + R)/(sigma + 1/sigma)^2 # weighting does not apply
# scores
w.bias <- score.weights[1]
w.rmse <- score.weights[2]
w.phase <- score.weights[3]
w.iav <- score.weights[4]
w.dist <- score.weights[5]
# not weighted
S_bias <- S_bias_not.weighted
S_rmse <- S_rmse_not.weighted
S_phase <- S_phase_not.weighted
S_iav <- S_iav_not.weighted
# weight importance of statisitcal metrics and compute overall score
S_overall <- (w.bias * S_bias + w.rmse * S_rmse + w.phase * S_phase + w.iav * S_iav + w.dist * S_dist)/(w.bias + w.rmse +
w.phase + w.iav + w.dist)
scores <- data.frame(variable.name, ref.id, S_bias, S_rmse, S_phase, S_iav, S_dist, S_overall)
scores_not.weighted <- scores
# weighted (except for S_dist)
S_bias <- S_bias_weighted
S_rmse <- S_rmse_weighted
S_phase <- S_phase_weighted
S_iav <- S_iav_weighted
S_overall <- (w.bias * S_bias + w.rmse * S_rmse + w.phase * S_phase + w.iav * S_iav + w.dist * S_dist)/(w.bias + w.rmse +
w.phase + w.iav + w.dist)
scores <- data.frame(variable.name, ref.id, S_bias, S_rmse, S_phase, S_iav, S_dist, S_overall)
scores_weighted <- scores
#
scores <- rbind(scores_not.weighted, scores_weighted)
rownames(scores) <- c("not.weighted", "weighted")
if (outputDir != FALSE) {
utils::write.table(scores, paste(outputDir, "/", "scorevalues", "_", meta.data.com, sep = ""))
}
# get all score values in case you want to compare this run against another run using a significance test
dist.score <- rep(S_dist, length(bias.score))
all.score.values <- data.frame(bias.score, rmse.score, phase.score, iav.score, dist.score)
colnames(all.score.values) <- c("bias.score", "rmse.score", "phase.score", "iav.score", "dist.score")
all.score.values[is.na(all.score.values)] <- NA # converts all NaN to NA
if (outputDir != FALSE) {
utils::write.table(all.score.values, paste(outputDir, "/", "allscorevalues", "-", variable.name, "-", ref.id, sep = ""))
}
# selected score inputs
scoreinputs <- data.frame(long.name, variable.name, ref.id, variable.unit, mod.mean.scalar, ref.mean.scalar, bias.scalar,
bias.scalar.rel, ref.sd.scalar, epsilon_bias.scalar, S_bias_not.weighted, rmse.scalar, crmse.scalar, ref.sd.scalar, epsilon_rmse.scalar,
S_rmse_not.weighted, mod.max.month.scalar, ref.max.month.scalar, phase.scalar, S_phase_not.weighted, mod.iav.scalar,
ref.iav.scalar, epsilon_iav.scalar, S_iav_not.weighted, mod.sigma.scalar, ref.sigma.scalar, sigma, R, S_dist)
if (outputDir != FALSE) {
utils::write.table(scoreinputs, paste(outputDir, "/", "scoreinputs", "_", meta.data.com, sep = ""))
}
# min.max.int
mmi.bias <- intFun.min.max.int.bias(bias)
mmi.bias.score <- c(0, 1, 0.1)
mmi.crmse <- intFun.min.max.int(crmse)
mmi.rmse.score <- c(0, 1, 0.1)
mmi.phase <- c(0, 6, 1)
mmi.phase.score <- c(0, 1, 0.1)
mmi.iav.score <- c(0, 1, 0.1)
# min.max.int.mod.ref
mmi.mean <- intFun.min.max.int.mod.ref(mod.mean, ref.mean)
mmi.max.month <- c(1, 12, 1)
mmi.iav <- intFun.min.max.int.mod.ref(mod.iav, ref.iav)
# add metadata:
# 1. figure title (e.g. Mean_nee_ModID_123_from_1982-01_to_2008-12)
# 2. min, max, interval used in legend (e.g. 0, 1, 0.1)
# 3. legend bar text (e.g. 'score (-)')
attr(mod.mean, "metadata") <- list(paste("Mean", meta.data.mod, sep = "_"), mmi.mean, variable.unit)
attr(ref.mean, "metadata") <- list(paste("Mean", meta.data.ref, sep = "_"), mmi.mean, variable.unit)
attr(bias, "metadata") <- list(paste("Bias", meta.data.com, sep = "_"), mmi.bias, variable.unit)
attr(bias.score, "metadata") <- list(paste("Bias_score", meta.data.com, sep = "_"), mmi.bias.score, "score (-)")
attr(crmse, "metadata") <- list(paste("CRMSE", meta.data.com, sep = "_"), mmi.crmse, variable.unit)
attr(rmse.score, "metadata") <- list(paste("RMSE_score", meta.data.com, sep = "_"), mmi.rmse.score, "score (-)")
attr(mod.max.month, "metadata") <- list(paste("Month_with_max", meta.data.mod, sep = "_"), mmi.max.month, "month")
attr(ref.max.month, "metadata") <- list(paste("Month_with_max", meta.data.ref, sep = "_"), mmi.max.month, "month")
attr(phase, "metadata") <- list(paste("Diff_in_max_month", meta.data.com, sep = "_"), mmi.phase, "month")
attr(phase.score, "metadata") <- list(paste("Seasonality_score", meta.data.com, sep = "_"), mmi.phase.score, "score (-)")
attr(mod.iav, "metadata") <- list(paste("IAV", meta.data.mod, sep = "_"), mmi.iav, variable.unit)
attr(ref.iav, "metadata") <- list(paste("IAV", meta.data.ref, sep = "_"), mmi.iav, variable.unit)
attr(iav.score, "metadata") <- list(paste("IAV_score", meta.data.com, sep = "_"), mmi.iav.score, "score (-)")
# write data to file
stat.metric <- data.frame(lon, lat, mod.mean, ref.mean, bias, bias.score, crmse, rmse.score, mod.max.month, ref.max.month,
phase, phase.score, mod.iav, ref.iav, iav.score)
colnames(stat.metric) <- c("lon", "lat", "mod.mean", "ref.mean", "bias", "bias.score", "crmse", "rmse.score", "mod.max.month",
"ref.max.month", "phase", "phase.score", "mod.iav", "ref.iav", "iav.score")
my.filename <- paste(variable.name, ref.id, sep = "_")
if (outputDir != FALSE) {
utils::write.table(stat.metric, paste(outputDir, my.filename, sep = "/"))
}
land <- intFun.coast(my.xlim, my.ylim, my.projection, shp.filename) # reproject coastline
# loop through all layers of the raster stack
lon2 <- stat.metric$lon
lat2 <- stat.metric$lat
for (i in 3:ncol(stat.metric)) {
data <- stat.metric[i]
cname <- base::names(data)
data <- data.frame(lon2, lat2, data)
data <- stats::na.omit(data)
lon <- data$lon2
lat <- data$lat2
data <- data.frame(data[, cname])
colnames(data) <- cname
values <- data
colnames(values) <- "values"
data <- intFun.site.points(lon, lat, values) # make spatial points
x <- base::get(cname) # get the data
my.attributes <- attributes(x)
meta <- my.attributes$metadata
id <- unlist(meta[1])
my.title <- gsub("_", " ", id)
min.max.int <- unlist(meta[2])
legend.bar.text <- latex2exp::TeX(meta[[3]])
# for legend
min <- min.max.int[1]
max <- min.max.int[2]
interval <- min.max.int[3]
my.breaks <- round(seq(min, max, interval), 3) # breaks of the colors
my.labels <- round(seq(min, max, interval), 3) # locations where to set the labels
my.col <- viridis::viridis(n = length(my.breaks) - 1, direction = -1)
my.col.bias <- scico::scico(n = length(my.breaks) - 1, palette = "vik")
my.col.phase <- grDevices::rainbow(n = length(my.breaks) - 1)
if (i == 5)
{
my.col <- my.col.bias
} # divergent color scheme for bias plots
if (i == 9)
{
my.col <- my.col.phase
} # circular color scheme for phase
if (i == 10)
{
my.col <- my.col.phase
} # circular color scheme for phase
if (i == 11)
{
my.col <- my.col.phase
} # circular color scheme for phase
my.axis.args <- list(at = my.labels, labels = my.labels, cex.axis = 1)
my.legend.args <- list(text = legend.bar.text, side = 2, font = 1, line = 1, cex = 1)
colors <- cut(values$values, breaks = my.breaks, labels = my.col, include.lowest = TRUE)
colors <- toString(colors)
colors <- unlist(strsplit(colors, split = ", "))
# plot
oldpar <- graphics::par(mfrow = c(1, 2))
on.exit(graphics::par(oldpar))
my.plotname <- paste(my.filename, cname, sep = "_")
my.plotname <- gsub("_", "-", my.plotname)
my.plotname <- gsub(".", "-", my.plotname, fixed = TRUE)
if (outputDir != FALSE) {
grDevices::pdf(paste(outputDir, "/", my.plotname, ".pdf", sep = ""), width = plot.width, height = plot.height)
}
graphics::par(mfrow = c(1, 1), font.main = 1, mar = c(3, 3, 3, 4), lwd = 1, cex = 1)
# create a dummy raster layer
if (irregular == TRUE) {
my.xlim <- c(raster::extent(land)[1], raster::extent(land)[2])
my.ylim <- c(raster::extent(land)[3], raster::extent(land)[4])
}
# mod.mean
dummy <- stats::runif(360 * 180, min = min, max = max)
dummy <- matrix(dummy, nrow = 180)
dummy <- raster::raster(dummy)
raster::extent(dummy) <- myExtent
raster::plot(dummy, col = NA, xlim = my.xlim, ylim = my.ylim, main = paste(long.name, my.title, sep = "\n"), axes = FALSE,
legend = FALSE)
raster::plot(land, col = "grey", border = NA, add = TRUE)
graphics::points(data, pch = 16, col = colors)
if (irregular == FALSE) {
graphics::axis(1, labels = TRUE, tcl = 0.3)
graphics::axis(2, labels = TRUE, tcl = 0.3, las = 2)
}
raster::plot(dummy, legend.only = TRUE, col = my.col, breaks = my.breaks, axis.args = my.axis.args, legend.args = my.legend.args,
legend.width = 1.5, legend.shrink = 1, font = 1)
if (outputDir != FALSE) {
grDevices::dev.off()
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.