Nothing
#'Computes the correlation coefficient between an array of forecasts and their corresponding observations
#'
#'Calculates the correlation coefficient (Pearson, Kendall or Spearman) for
#'an array of forecasts and observations. The input should be an array with
#'dimensions c(no. of datasets, no. of start dates, no. of forecast times,
#'no. of lons, no. of lats.), where the longitude and latitude dimensions are
#'optional. The correlations are computed along the poscor dimension which
#'should correspond to the startdate dimension. If compROW is given, the
#'correlations are computed only if rows along the compROW dimension are
#'complete between limits[1] and limits[2], i.e. there are no NAs between
#'limits[1] and limits[2]. This option can be activated if the user wishes to
#'account only for the forecasts for which observations are available at all
#'leadtimes. \cr
#'Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr
#'The confidence interval is computed by a Fisher transformation.\cr
#'The significance level relies on a one-sided student-T distribution.\cr
#'We can modifiy the treshold of the test modifying siglev (default value=0.95).\cr\cr
#'.Corr calculates the correlation between the ensemble mean and the
#'observations, using an N by M matrix (exp) of forecasts and a vector of
#'observations (obs) as input.
#'
#'@param var_exp Array of experimental data.
#'@param var_obs Array of observational data, same dimensions as var_exp
#' except along posloop dimension, where the length can be nobs instead of nexp.
#'@param posloop Dimension nobs and nexp.
#'@param poscor Dimension along which correlation are to be computed (the
#' dimension of the start dates).
#'@param compROW Data taken into account only if (compROW)th row is complete.
#' Default = NULL.
#'@param limits Complete between limits[1] & limits[2]. Default = NULL.
#'@param siglev Significance level. Default = 0.95.
#'@param method Type of correlation: 'pearson', 'spearman' or 'kendall'.
#' Default='pearson'
#'@param conf Whether to compute confidence intervals (default = 'TRUE') or
#' not (FALSE).
#'@param pval Whether to compute statistical significance p-value (default = 'TRUE')
#' or not (FALSE).
#'@param exp N by M matrix of N forecasts from M ensemble members.
#'@param obs Vector of the corresponding observations of length N.
#'
#'@return
#'Corr: Array with dimensions :\cr
#'c(# of datasets along posloop in var_exp, # of datasets along posloop in
#'var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr
#'The third dimension, of length 4 maximum, contains to the lower limit of
#'the 95\% confidence interval, the correlation, the upper limit of the 95\%
#'confidence interval and the 95\% significance level given by a one-sided
#'T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension
#'will be of length 3. If the confidence intervals are disabled via
#'\code{conf = FALSE}, this dimension will be of length 2. If both are
#'disabled, this will be of length 2. \cr\cr
#'.Corr:
#' \itemize{
#' \item{$corr}{The correlation statistic.}
#' \item{$p_val}{Corresponds to the p values for the \code{siglev}\%
#' (only present if \code{pval = TRUE}) for the correlation.}
#' \item{$conf_low}{Corresponds to the upper limit of the \code{siglev}\%
#' (only present if \code{conf = TRUE}) for the correlation.}
#' \item{$conf_high}{Corresponds to the lower limit of the \code{siglev}\%
#' (only present if \code{conf = TRUE}) for the correlation.}
#' }
#'
#'@keywords datagen
#'@author History:\cr
#' 0.1 - 2011-04 (V. Guemas) - Original code\cr
#'1.0 - 2013-09 (N. Manubens) - Formatting to R CRAN\cr
#'1.1 - 2014-10 (M. Menegoz) - Adding siglev argument\cr
#'1.2 - 2015-03 (L.P. Caron) - Adding method argument\cr
#'1.3 - 2017-02 (A. Hunter) - Adapted to veriApply()
#'@examples
#'# Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
#'runmean_months <- 12
#'dim_to_smooth <- 4
#'# Smooth along lead-times
#'smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth)
#'smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth)
#'dim_to_mean <- 2 # Mean along members
#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times
#'leadtimes_per_startdate <- 60
#'corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean),
#' Mean1Dim(smooth_ano_obs, dim_to_mean),
#' compROW = required_complete_row,
#' limits = c(ceiling((runmean_months + 1) / 2),
#' leadtimes_per_startdate - floor(runmean_months / 2)))
#' \donttest{
#'PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation",
#' monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'),
#' listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1),
#' fileout = 'tos_cor.eps')
#' }
#'
#'# The following example uses veriApply combined with .Corr instead of Corr
#' \dontrun{
#'require(easyVerification)
#'Corr2 <- s2dverification:::.Corr
#'corr2 <- veriApply("Corr2",
#' smooth_ano_exp,
#' # see ?veriApply for how to use the 'parallel' option
#' Mean1Dim(smooth_ano_obs, dim_to_mean),
#' tdim = 3, ensdim = 2)
#' }
#'@rdname Corr
#'@importFrom stats cor pt qnorm
#'@export
Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL,
limits = NULL, siglev = 0.95, method = 'pearson',
conf = TRUE, pval = TRUE) {
#
# Remove data along compROW dim if there is at least one NA between limits
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
if (!is.null(compROW)) {
if (is.null(limits)) {
limits <- c(1, dim(var_obs)[compROW])
}
outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits)))
outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW])
var_obs[which(outrows)] <- NA
}
#
# Enlarge var_exp & var_obs to 10 dim + move posloop & poscor to 1st & 2nd
# pos
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
dimsvar <- dim(var_exp)
for (iind in 1:length(dimsvar)) {
if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) {
stop("var_exp & var_obs must have same dimensions except along posloop")
}
}
if (dimsvar[poscor] < 3 ) {
stop("At least 3 values required to compute correlation")
}
if (method != "kendall" && method != "spearman" && method != "pearson") {
stop("Wrong correlation method")
}
nexp <- dimsvar[posloop]
nobs <- dim(var_obs)[posloop]
var_exp <- Enlarge(var_exp, 10)
var_obs <- Enlarge(var_obs, 10)
posaperm <- numeric(10)
posaperm[1] <- posloop
posaperm[2] <- poscor
posaperm[3:10] <- seq(1, 10)[-c(posloop, poscor)]
var_exp <- aperm(var_exp, posaperm)
var_obs <- aperm(var_obs, posaperm)
dimsaperm <- dim(var_exp)
#
# Check the siglev arguments:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (siglev > 1 || siglev < 0) {
stop("siglev need to be higher than O and lower than 1")
}
#
# Loop to compute correlation for each grid point
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
dim_val <- 2
dim_pval <- 4
nvals <- 1 + 2*conf + pval
if (!conf) {
dim_val <- 1
dim_pval <- 2
} else {
conf_low <- (1 - siglev) / 2
conf_high <- 1 - conf_low
}
CORR <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10]))
for (jexp in 1:nexp) {
for (jobs in 1:nobs) {
for (j3 in 1:dimsaperm[3]) {
for (j4 in 1:dimsaperm[4]) {
for (j5 in 1:dimsaperm[5]) {
for (j6 in 1:dimsaperm[6]) {
for (j7 in 1:dimsaperm[7]) {
for (j8 in 1:dimsaperm[8]) {
for (j9 in 1:dimsaperm[9]) {
for (j10 in 1:dimsaperm[10]) {
tmp1 <- var_exp[jexp, , j3, j4, j5, j6, j7, j8, j9,
j10]
tmp2 <- var_obs[jobs, , j3, j4, j5, j6, j7, j8, j9,
j10]
if (any(!is.na(tmp1)) && sum(!is.na(tmp2)) > 2) {
toto <- cor(tmp1, tmp2, use = "pairwise.complete.obs", method = method)
CORR[jexp, jobs, dim_val, j3, j4, j5, j6, j7, j8, j9, j10] <- toto
#eno <- min(Eno(tmp2, 1), Eno(tmp1, 1))
if (pval || conf) {
if (method == "kendall" | method == "spearman") {
eno <- Eno(rank(tmp2), 1)
} else if (method == "pearson") {
eno <- Eno(tmp2, 1)
}
}
if (pval) {
#t <- qt(0.95, eno - 2)
# t <- qt(siglev, eno - 2)
# CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9,
# j10] <- sqrt((t * t) / ((t * t) + eno - 2))
t <-sqrt(toto * toto * (eno - 2) / (1 - (toto ^ 2)))
CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9,
j10] <- pt(t, eno - 2, lower.tail = FALSE)
}
if (conf) {
CORR[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9,
j10] <- tanh(atanh(toto) + qnorm(conf_high) / sqrt(
#j10] <- tanh(atanh(toto) + qnorm(0.975) / sqrt(
eno - 3))
CORR[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9,
j10] <- tanh(atanh(toto) + qnorm(conf_low) / sqrt(
#j10] <- tanh(atanh(toto) + qnorm(0.025) / sqrt(
eno - 3))
}
}
}
}
}
}
}
}
}
}
}
}
#
dim(CORR) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, poscor)])
#
# Output
# ~~~~~~~~
#
CORR
}
#'@rdname Corr
#'@importFrom stats cor pt qnorm
#'@export
.Corr <- function(exp, obs, siglev = 0.95, method = 'pearson',
conf = TRUE, pval = TRUE) {
# Check 'method'
if (!(method %in% c("kendall", "spearman", "pearson"))) {
stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.")
# Check 'siglev'
if (siglev > 1 || siglev < 0) {
stop("Parameter 'siglev' must be higher than O and lower than 1.")
}
}
p_val <- NULL
conflow <- NULL
confhigh <- NULL
ens_mean <- rowMeans(exp)
CORR <- cor(obs, ens_mean, use = "pairwise.complete.obs", method = method)
if (pval || conf) {
if (method == "kendall" || method == "spearman") {
eno <- Eno(rank(obs), 1)
} else if (method == "pearson") {
eno <- Eno(obs, 1)
}
}
if (pval && (method == "pearson")) {
t <-sqrt(CORR * CORR * (eno - 2) / (1 - (CORR ^ 2)))
p_val <- pt(t, eno - 2, lower.tail = FALSE)
}
if (conf && (method == "pearson")) {
conf_low <- (1 - siglev) / 2
conf_high <- 1 - conf_low
conf_int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno - 3)),
tanh(atanh(CORR) + qnorm(conf_high) / sqrt(eno - 3)))
conf_int <- conf_int[!is.na(CORR)]
conflow <- conf_int[1]
confhigh <- conf_int[2]
}
# Output
invisible(result <- list(corr = CORR, p_val = p_val, conf_low = conflow, conf_high = confhigh))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.