#' @export
correlate_co2_temperature <- function(series, start_year=1880, end_year=current_year - 1, data, ylab, main_base="Temperature", text_x=380, text_y=-0.4, baseline=FALSE, download=FALSE)
{
if (missing(data))
e <- get_climate_data(download=download, baseline=baseline)
else
e <- data
dm1 <- data.matrix(e[e$year %in% ifelse(start_year < 1958, 1958, start_year):end_year, c(series, "CO2 Mauna Loa")])
colnames(dm1) <- c("temp", "co2")
row.names(dm1) <- apply(e[e$year %in% ifelse(start_year < 1958, 1958, start_year):end_year, c("year", "month")], 1, paste, collapse=".")
lawPath <- system.file("extdata/co2/law2006.txt", package="climeseries")
law <- read.table(lawPath, header=TRUE, skip=182, nrow=2004)
dm <- dm1
if (start_year < 1958) {
l <- law[law$YearAD %in% start_year:1957, c("CO2spl")]
flit <- e[e$year %in% start_year:1957, c("year", series)]
flitDm <- tapply(flit[, 2], flit[, 1], mean)
dm0 <- data.matrix(data.frame(temp=flitDm, co2=l))
dm <- rbind(dm0, dm1)
}
r <- cor(dm[, 1], dm[, 2], use="pairwise.complete.obs")
#r^2
plot(as.numeric(row.names(dm)), dm[, 1])
plot(as.numeric(row.names(dm)), dm[, 2])
xlab <- eval(substitute(expression(paste("Atmospheric CO", phantom()[2], " (PPM)", sep=""))))
if (missing(ylab))
ylab <- eval(substitute(expression(paste(series, " Temp. Anomaly (", phantom(l) * degree, "C)", sep="")), list(series=as.symbol(series))))
main <- eval(substitute(expression(paste(main_base, " vs. CO", phantom()[2], " (", startYear, "\u2013", endYear, ")", sep="")), list(endYear=as.symbol(end_year), startYear=as.symbol(start_year), main_base=as.symbol(main_base))))
plot(dm[, 2], dm[, 1], ylab=ylab, xlab=xlab, main=main)
m <- lm(dm[, 1] ~ dm[, 2])
#summary(m)
abline(coef(m)[1], coef(m)[2], col="red", lwd=2)
r2Text <- eval(substitute(expression(paste("R", phantom()^2, " = ", v, sep="")), list(v=sprintf(r^2, fmt="%1.2f"))))
text(text_x, text_y, r2Text)
list(series=series, data=dm, model=m)
}
## usage:
# [File: "HadCRUT4-vs-CO2_1850-2015.png"]
# rv <- correlate_co2_temperature("HadCRUT4 Global", 1850)
# [File: "HadCRUT4-vs-CO2_1970-2015.png"]
# rv <- correlate_co2_temperature("HadCRUT4 Global", 1970)
# [File: "GISTEMP-vs-CO2_1880-2015.png"]
# rv <- correlate_co2_temperature("GISTEMP Global", 1880)
# [File: "RATPAC-A 850-300 mb-vs-CO2_1958-2015.png"]
# rv <- correlate_co2_temperature("RATPAC-A 850-300 mb Global", 1958)
# [File: "RSS TLT 3.3-vs-CO2_1979-2015.png"]
# rv <- correlate_co2_temperature("RSS TLT 3.3 -70.0/82.5", 1979)
#' @export
#' @import data.table
plot_horse_race <- function(series, top_n_years = NULL, baseline = TRUE, data, size = 1)
{
if (missing(data))
data <- get_climate_data(download = FALSE, baseline = baseline)
d <- data[, c("year", "month", series)]
d <- subset(d, na_unwrap(d[, series]))
d1 <- data.table::data.table(dcast(d, year ~ month, value.var = series))
d2 <- data.table::copy(d1)
## Calculate cumulative average by row.
d2[, names(d2[, !1, with = FALSE]) :=
as.list((function(x) { cumsum(as.matrix(x)[1, ]) / seq_along(x) })(.SD)),
.SDcols = names(d2[, !1, with = FALSE]), by = 1:nrow(d2)]
## Melt data set for plotting.
d3 <- dplyr::arrange(data.table::melt(d2, id.vars = c("year"),
variable.name = "month", value.name = "YTD mean temp."), year, month) %>%
as.data.table()
d4 <- data.table::copy(d2)
d4[, `latest YTD mean temp.` :=
as.list((function(x) { y <- as.matrix(x)[1, ]; tail(y[!is.na(y)], 1) })(.SD)),
.SDcols = names(d4[, !1, with = FALSE]), by = 1:nrow(d4)]
d4 <- dplyr::arrange(d4[, .(year, `latest YTD mean temp.`)], desc(`latest YTD mean temp.`)) %>%
as.data.table()
## Top n warmest years:
if (!is.null(top_n_years)) {
if (top_n_years < 0)
d4 <- tail(d4, -top_n_years)
else
d4 <- head(d4, top_n_years)
d3 <- d3[year %in% d4$year, ]
}
baselineText <- ""
if (is.logical(baseline)) {
if (baseline) baseline <- defaultBaseline
else baseline <- NULL
}
if (!is.null(baseline))
baselineText <- " w.r.t. " %_% min(baseline) %_% "\u2013" %_% max(baseline)
subtitle <- paste(series, " ", min(d$year), "\u2013", max(d$year), sep = "")
ylab <- eval(substitute(expression(paste("Temperature Anomaly (", phantom(l) * degree, "C)", b, sep = "")),
list(b = baselineText)))
g <- ggplot2::ggplot(d3, ggplot2::aes(x = month, y = `YTD mean temp.`, group = factor(year), color = factor(year))) +
ggplot2::theme_bw() +
ggplot2::geom_line(size = size) +
#ggplot2::scale_colour_discrete(guide = "none") +
ggplot2::labs(color = "Year") +
ggplot2::scale_x_discrete(expand = c(0, 1)) +
directlabels::geom_dl(ggplot2::aes(label = year), method = list(directlabels::dl.trans(x = x + 0.2), "last.points", cex = 0.8)) +
ggplot2::labs(title = "Year-to-Date Temperature Anomalies", subtitle = subtitle, y = ylab)
print(g)
return (list(data = d4, grob = g))
}
## usage:
# ytd <- plot_horse_race("GISTEMP Global")
# ytd <- plot_horse_race("UAH TLT 6.0 Global", 10)
# ytd <- plot_horse_race("NCEI US Avg. Temp.", 10) # Use -10 for bottom 10 years.
# print(as.data.frame(ytd$data), digits = 3, row.names = FALSE, right = FALSE)
## Zoom in w/out clipping data:
# ytd <- plot_horse_race("NCEI US Avg. Temp.", 10) # Use -10 for bottom 10 years.
## Or, if the current year is incomplete:
# ytd <- plot_horse_race(data = get_climate_data() %>% dplyr::filter(year < climeseries::current_year), "NCEI US Avg. Temp.", 10) # Use -10 for bottom 10 years.
# ytd$grob + ggplot2::coord_cartesian(ylim = c(-0.5, 2.5))
## Check:
# show_warmest_years(inst0, "NCEI US Avg. Temp.")
#' @export
get_yearly_gistemp <- function(series="GISTEMP Met. Stations Oct. 2005", uri="https://web.archive.org/web/20051029130103/http://data.giss.nasa.gov/gistemp/graphs/Fig_A.txt", skip=0L)
{
Error <- function(e) {
cat(series %_% " series not available.", fill=TRUE)
}
x <- NULL
gissGlobalMean <- 14.0 # GISS absolute global mean for 1951–1980.
tryCatch({
#r <- httr::content(httr::GET(uri, httr::timeout(300)), "text", encoding="ISO-8859-1") # Gives timeout errors ≪ 300 s
curl <- RCurl::getCurlHandle()
RCurl::curlSetOpt(useragent="Mozilla/5.0", followlocation = TRUE, curl = curl)
r <- RCurl::getURLContent(uri, curl = curl)
tab <- gsub("^(?!\\s*\\d{4}\\s+).*$", "", strsplit(r, '\n')[[1L]], perl=TRUE)
x <- read.table(text=tab, header=FALSE, as.is=TRUE, na.strings=c("*", "**", "***", "****"), skip=skip, check.names=FALSE)
}, error=Error, warning=Error)
d <- cbind(data.frame(year=x$V1, month=6, check.names=FALSE, stringsAsFactors=FALSE), temp=x$V2)
d <- base::merge(expand.grid(month=1:12, year=d$year), d, by=c("year", "month"), all=TRUE)
d$yr_part <- d$year + (2 * d$month - 1)/24
names(d)[names(d) == "temp"] <- series
return (d)
}
## usage:
# inst <- get_climate_data(download=FALSE, baseline=FALSE)
# allSeries <- list(
# inst,
# get_yearly_gistemp("GISTEMP Global Met. Stations Oct. 2005", "https://web.archive.org/web/20051029130103/http://data.giss.nasa.gov/gistemp/graphs/Fig_A.txt"),
# get_yearly_gistemp("GISTEMP Global Met. Stations Sep. 2015", "https://web.archive.org/web/20150918040726/http://data.giss.nasa.gov/gistemp/graphs_v3/Fig.A.txt"),
# get_yearly_gistemp("GISTEMP Global Met. Stations Apr. 2016", "https://web.archive.org/web/20160419081141/http://data.giss.nasa.gov/gistemp/graphs_v3/Fig.A.txt"),
## N.B. The following won't work any more since changes to NASA's Web site:
# get_yearly_gistemp("GISTEMP Global Met. Stations Current", "http://data.giss.nasa.gov/gistemp/graphs_v3/Fig.A.txt")
# )
# d <- Reduce(merge_fun_factory(all=TRUE, by=c(Reduce(intersect, c(list(climeseries::common_columns), lapply(allSeries, names))))), allSeries)
# d <- recenter_anomalies(d, 1951:1980) # Should be the same baseline, but make sure.
# series <- sapply(allSeries[-1], get_climate_series_names)
# plot_climate_data(d, series=series, start=1994, ma=NULL, lwd=2, conf_int=FALSE, show_trend=TRUE)
## N.B 'show_trend=TRUE' requires that the column 'met_year' be in the data frame 'x'. Fix this!
#' @export
get_old_monthly_gistemp <- function(series="GISTEMP Global Nov. 2015", uri="http://web.archive.org/web/20151218065405/http://data.giss.nasa.gov/gistemp/tabledata_v3/GLB.Ts+dSST.txt", skip=0L)
{
Error <- function(e) {
cat(series %_% " series not available.", fill=TRUE)
}
x <- NULL
skip <- skip # Skip over notes at start of data.
gissGlobalMean <- 14.0 # GISS absolute global mean for 1951–1980.
#tryCatch({
#r <- httr::content(httr::GET(uri, httr::timeout(300)), "text", encoding="ISO-8859-1") # Gives timeout errors ≪ 300 s
curl <- RCurl::getCurlHandle()
RCurl::curlSetOpt(useragent="Mozilla/5.0", followlocation = TRUE, curl = curl)
r <- RCurl::getURLContent(uri, curl = curl)
r <- gsub("*****", " ****", r, fixed=TRUE)
r <- gsub("****", " ***", r, fixed=TRUE)
r <- gsub("^\\D+.*$", "", strsplit(r, '\n')[[1L]], perl=TRUE)
x <- read.table(text=r, header=FALSE, as.is=TRUE, na.strings=c("*", "**", "***", "****"), skip=skip, check.names=FALSE)
#}, error=Error, warning=Error)
flit <- reshape2::melt(x[, 1L:13L], id.vars="V1", variable.name="month", value.name="temp")
for (i in names(flit)) flit[[i]] <- as.numeric(flit[[i]])
flit <- dplyr::arrange(flit, V1, month)
d <- data.frame(year=flit$V1, yr_part=flit$V1 + (2 * flit$month - 1)/24, month=flit$month, temp=flit$temp, check.names=FALSE, stringsAsFactors=FALSE)
d$temp <- d$temp / 100
names(d)[names(d) == "temp"] <- series
return (d)
}
## usage:
# inst <- get_climate_data(download=FALSE, baseline=FALSE)
# climeseriesDataPath <- "C:/common/data/climate/climeseries"
# env <- new.env()
# load(paste(climeseriesDataPath, "climate-series_raw_20160711.RData", sep="/"), envir=env) # Get GISTEMP Global data from climeseries archive.
# env$d$`GISTEMP Global May 2016` <- env$d$GISTEMP
# allSeries <- list(
# inst,
# get_old_monthly_gistemp("GISTEMP Global Nov. 2005", "http://web.archive.org/web/20051227031241/http://data.giss.nasa.gov/gistemp/tabledata/GLB.Ts+dSST.txt"),
# env$d[, c(climeseries::common_columns, "GISTEMP Global May 2016")],
# get_old_monthly_gistemp()
# )
# d <- Reduce(merge_fun_factory(all=TRUE, by=c(Reduce(intersect, c(list(climeseries::common_columns), lapply(allSeries, names))))), allSeries)
# d <- recenter_anomalies(d, 1951:1980) # Should be the same baseline, but make sure.
# series <- c("GISTEMP Global Nov. 2005", "GISTEMP Global Nov. 2015", "GISTEMP Global May 2016", "GISTEMP Global")
# plot_climate_data(d, series=series, ma=12, lwd=2, conf_int=FALSE, show_trend=TRUE)
#
## Even older e.g.:
# d1 <- get_old_monthly_gistemp("GISTEMP Global Land Dec. 1998", "http://web.archive.org/web/19990220235952/http://www.giss.nasa.gov/data/gistemp/GLB.Ts.txt")
# d2 <- get_old_monthly_gistemp("GISTEMP Global Dec. 2001", "https://web.archive.org/web/20020122065805/http://www.giss.nasa.gov/data/update/gistemp/GLB.Ts+dSST.txt")
satelliteSlrBaseUrl <- "http://sealevel.colorado.edu/cgi-bin/table.cgi?q=content%2Finteractive-sea-level-time-series-wizard&dlat=@@LAT@@&dlon=@@LON@@&fit=s&smooth=n&days=0"
#' @export
get_satellite_slr <- function(lat, lon) # +lat N of the equator, -lon W of the prime meridian.
{
Error <- function(e) {
cat(series %_% " series not available.", fill=TRUE)
}
skip <- 1L
x <- NULL
uri <- sub("@@LAT@@", lat, sub("@@LON@@", lon, satelliteSlrBaseUrl))
tryCatch({
## Scrape Web page for data.
webPage <- httr::content(httr::GET(uri), "text", encoding="ISO-8859-1")
webPage <- readLines(tc <- textConnection(webPage)); close(tc)
pageTree <- htmlTreeParse(webPage, useInternalNodes=TRUE)
## The data table is in a PRE node (the only one, hopefully).
pre <- XML::xpathSApply(pageTree, "//*/pre", xmlValue)
## Prepare the table text.
tab <- strsplit(pre, '\n')[[1L]]
tab <- tab[tab != ""]
x <- read.table(text=tab, header=FALSE, skip=skip, fill=FALSE, check.names=FALSE, stringsAsFactors=FALSE)
}, error=Error, warning=Error)
x$V2 <- as.numeric(x$V2) * 10 # Convert to mm.
names(x) <- c("Year", "Sea Level (mm)")
x
}
## usage:
# d <- get_satellite_slr(lat=26, lon=-80) # Off the coast of Miami, FL.
tidegaugeSlrBaseUrl <- "http://www.psmsl.org/data/obtaining/rlr.monthly.data/@@STATION_ID@@.rlrdata"
## List of IDs: http://www.psmsl.org/data/obtaining/
## Other long records not in the PSMSL data set: https://psmsl.org/data/longrecords/
#' @export
get_tidegauge_slr <- function(station_id)
{
Error <- function(e) {
cat(series %_% " series not available.", fill=TRUE)
}
skip <- 1L
x <- NULL
uri <- sub("@@STATION_ID@@", station_id, tidegaugeSlrBaseUrl)
tryCatch({
x <- read.csv2(uri, header=FALSE, fill=FALSE, check.names=FALSE, stringsAsFactors=FALSE)
}, error=Error, warning=Error)
x <- x[, 1:2]
x$V2 <- as.numeric(x$V2)
is.na(x$V2) <- x$V2 == -99999
names(x) <- c("Year", "Sea Level (mm)")
x
}
## usage:
# d <- get_tidegauge_slr(station_id=1858) # E.g. Virginia Key, E of Miami, FL.
## Based on the technique described at https://tamino.wordpress.com/2012/01/08/trend-and-cycle-together/.
#' @export
remove_periodic_cycle <- function(
inst,
series,
center = TRUE,
period = 1,
num_harmonics = 4,
loess... = list(),
unwrap = TRUE,
keep_series = TRUE,
keep_interpolated = FALSE,
keep_loess = FALSE,
suffix = " (anomalies)",
is_unc = FALSE, unc_suffix = "_uncertainty", fit_unc = FALSE,
...
)
{
uncertaintyDf <- NULL
if (!is_unc && !is_invalid(inst[[series %_% unc_suffix]])) {
## Get all arguments of this function to pass on for recursion.
recursiveArgs <- get_all_args()
recursiveArgs$center <- FALSE
recursiveArgs$is_unc <- TRUE
uncertaintyDf <- do.call(remove_periodic_cycle, recursiveArgs)
}
d <- inst[, c(intersect(common_columns, names(inst)), series %_% ifelse(!is_unc, "", unc_suffix))]
if (unwrap)
d <- subset(d, na_unwrap(d[, series %_% ifelse(!is_unc, "", unc_suffix)]))
if (is_unc && !fit_unc) {
d[[series %_% suffix %_% unc_suffix]] <- d[[series %_% unc_suffix]]
return (d)
}
d[[series %_% " (interpolated)" %_% ifelse(!is_unc, "", unc_suffix)]] <-
drop(interpNA(d[, series %_% ifelse(!is_unc, "", unc_suffix)], "fmm"))
if (is.null(period)) { # Estimate period from data.
spectralDensity <- spectrum(y)
period <- 1 / spectralDensity$freq[spectralDensity$spec == max(spectralDensity$spec)]
}
## Get residuals from LOESS fit.
loessArgs = list(
formula = eval(substitute(s ~ yr_part,
list(s = as.name(series %_% " (interpolated)" %_% ifelse(!is_unc, "", unc_suffix))))),
data = d,
span = 0.2,
na.action = na.exclude
)
loessArgs <- utils::modifyList(loessArgs, loess..., keep.null = TRUE)
#l <- do.call(stats::loess, loessArgs)
l <- do.call(LOESS, loessArgs)
if (keep_loess)
d[[series %_% " (LOESS fit)" %_% ifelse(!is_unc, "", unc_suffix)]] <- predict(l)
r <- residuals(l)
## Construct model formula for given no. of harmonics.
fBase <- "r ~ "; f <- NULL
for (i in seq(num_harmonics))
f <- c(f, paste0(c("sin", "cos"), paste0("(", 2 * i, " * pi / period * yr_part)")))
f <- as.formula(paste0(fBase, paste0(f, collapse = " + ")))
rfit <- lm(f, data = d, na.action = na.exclude, ...)
uncycled <- d[[series %_% ifelse(!is_unc, "", unc_suffix)]] - predict(rfit)
if (is.logical(center))
d[[series %_% suffix %_% ifelse(!is_unc, "", unc_suffix)]] <-
scale(uncycled, center = center, scale = FALSE)[, 1]
else
d[[series %_% suffix %_% ifelse(!is_unc, "", unc_suffix)]] <-
uncycled - mean(uncycled[d$year %in% center], na.rm = TRUE)
if (!keep_series) {
d[[series %_% ifelse(!is_unc, "", unc_suffix)]] <- NULL
if (!is.null(uncertaintyDf))
uncertaintyDf[[series %_% unc_suffix]] <- NULL
}
if (!keep_interpolated)
d[[series %_% " (interpolated)" %_% ifelse(!is_unc, "", unc_suffix)]] <- NULL
if (!is.null(uncertaintyDf))
d <- merge(d, uncertaintyDf[c("yr_part", get_climate_series_names(uncertaintyDf,
conf_int = TRUE))], all.x = TRUE, by = "yr_part", sort = TRUE)
d %>% as.data.frame
}
## usage:
# inst <- get_climate_data(download=FALSE, baseline=FALSE)
# series <- "CO2 Mauna Loa"
# d <- remove_periodic_cycle(inst, series, center=2000:2012)
# plot(d$yr_part, d[, series %_% " (anomalies)"], type="o", pch=20, xlim=c(2000, 2012), ylim=c(-15, 15))
# ## Temperature series.
# inst <- get_climate_data(download=FALSE, baseline=TRUE)
# series <- "GISTEMP Global"
# ## To calculate Fourier series only on a specific time range, subset the data first, e.g. 'subset(inst, inst$year %in% 1970:2016)':
# d <- remove_periodic_cycle(subset(inst, inst$year %in% 1970:2016), series, center=climeseries:::defaultBaseline)
# plot(d$yr_part, d[, series %_% " (anomalies)"], type="o", pch=20)
# ## Monthly anomalies are almost the same because trend and cycle are very different in size.
# lines(d$yr_part, d[, series], type="o", pch=20, col="red")
#' @export
create_aggregate_variable <- function(
x,
var_names,
aggregate_name = "aggregate_var",
method = "fmm",
interpolate = TRUE,
baseline = NULL,
add = TRUE,
...
)
{
d <- x[, c(get_climate_series_names(x, invert = FALSE), var_names), drop = FALSE]
## Put variables on common baseline before combining them
if (!is.null(baseline))
d <- recenter_anomalies(d, baseline = baseline)
## Remove non-monthly-series columns
d <- d[, get_climate_series_names(d)]
if (interpolate)
d <- interpNA(d, method = method, unwrap = TRUE)
r <- apply(d, 1, function(a) { r <- NA; if (!all(is.na(a))) r <- mean(a, na.rm = TRUE); r })
if (interpolate)
r <- drop(interpNA(r, method = "linear", unwrap = TRUE, ...))
if (!add) return (r)
x[[aggregate_name]] <- r
x
}
## usage:
# e <- get_climate_data(download=TRUE, baseline=FALSE)
# e <- create_aggregate_variable(e, c("GISS Stratospheric Aerosol Optical Depth (550 nm) Global", "OSIRIS Stratospheric Aerosol Optical Depth (550 nm) Global"), "SAOD Aggregate Global", type="head")
#' @export
create_aggregate_co2_variable <- function(x, co2_var_name, merge...=list(), ...)
{
lawPath <- system.file("extdata/co2/law2006.txt", package="climeseries")
l <- read.table(lawPath, header=TRUE, skip=182, nrow=2004)
law <- data.frame(year=l$YearAD, month=6, `CO2 Law Dome`=l$CO2spl, check.rows=FALSE, check.names=FALSE, fix.empty.names=FALSE, stringsAsFactors=FALSE)
law <- base::merge(expand.grid(month=1:12, year=law$year), law, by=c("year", "month"), all=TRUE)
yearlyInstrumentalCo2 <- as.data.frame(make_yearly_data(x[, c(common_columns, co2_var_name)]))
instrumentalStartYear <- head(yearlyInstrumentalCo2[na_unwrap(yearlyInstrumentalCo2[[co2_var_name]]), ]$year, 1)
mergeArgs = list(
x = x,
y = law[law$year < instrumentalStartYear, ],
by = intersect(common_columns, names(law)),
all.x = TRUE
)
mergeArgs <- utils::modifyList(mergeArgs, merge..., keep.null = TRUE)
## Unlike the other aggregate variables, which use overlap means, the CO2 aggregate series has a distinct cutpoint between paleo and instrumental.
x <- do.call("merge", mergeArgs)
r <- create_aggregate_variable(x, c("CO2 Law Dome", co2_var_name), ...)
## Replace truncated Law Dome series with the full one.
r$`CO2 Law Dome` <- NULL
mergeArgs$x <- r
mergeArgs$y <- law
r <- do.call("merge", mergeArgs)
r
}
## usage:
# e <- get_climate_data(download=FALSE, baseline=FALSE)
# e <- create_aggregate_co2_variable(e, "CO2 Mauna Loa", aggregate_name="CO2 Aggregate Global", type="head") # With interpolation.
## Create a yearly aggregate CO2 variable without any monthly interpolation.
# e <- get_climate_data(download=FALSE, baseline=FALSE)
# e <- create_aggregate_co2_variable(e, "CO2 Mauna Loa", aggregate_name="CO2 Aggregate Global", merge...=list(all=TRUE), interpolate=FALSE)
# g <- make_yearly_data(e[, c(climeseries::common_columns, "CO2 Aggregate Global")])
#' @export
add_default_aggregate_variables <- function(x, co2_instrumental_variable = "CO2 Mauna Loa", use_adjusted_tsi = TRUE, ...) # Use 'interpolate = FALSE' as needed
{
x <- create_aggregate_variable(x, c("Extended Multivariate ENSO Index", "Multivariate ENSO Index"), "MEI Aggregate Global", type = "head", ...)
x <- create_aggregate_variable(x, c("GISS Stratospheric Aerosol Optical Depth (550 nm) Global", "OSIRIS Stratospheric Aerosol Optical Depth (550 nm) Global"), "SAOD Aggregate Global", type = "head", ...)
## TSI
if (use_adjusted_tsi) {
## "PMOD TSI VIRGO A+B (new)" is shaped very much like "TSI Reconstructed" but shifted downwards a bit;
## so, shift it up and fill in the monthly details missing from "TSI Reconstructed".
flit <- make_yearly_data(x[, c(common_columns, "PMOD TSI VIRGO A+B (new)", "TSI Reconstructed")])
tsiDifference <- flit[[2]] - flit[[3]]
x$`PMOD TSI VIRGO A+B (new adj.)` <- x$`PMOD TSI VIRGO A+B (new)` - mean(tsiDifference, na.rm = TRUE)
x <- create_aggregate_variable(x, c("TSI Reconstructed", "PMOD TSI VIRGO A+B (new adj.)"), "TSI Aggregate Global", type = "head", ...)
}
else { # Otherwise, for less monthly detail and less interpolation, just use "Reconstructed" and SORCE.
x <- create_aggregate_variable(x, c("TSI Reconstructed", "PMOD TSI VIRGO A+B (new)"), "TSI Aggregate Global", type = "head", ...)
}
aggregateName <- "CO2 Aggregate Global"
x <- create_aggregate_co2_variable(x, co2_instrumental_variable, aggregate_name = aggregateName %_% " (interp.)", type = "head", ...)
#x[["log " %_% aggregateName %_% " (interp.)"]] <- 5.35 * log(x[[aggregateName %_% " (interp.)"]] / 280) # A test.
x[["log " %_% aggregateName %_% " (interp.)"]] <- log(x[[aggregateName %_% " (interp.)"]])
x$`CO2 Law Dome` <- NULL
x <- create_aggregate_co2_variable(x, co2_instrumental_variable, aggregate_name = aggregateName, interpolate = FALSE)
x[["log " %_% aggregateName]] <- log(x[[aggregateName]])
x
}
## usage:
# e <- get_climate_data(download=FALSE, baseline=FALSE)
# e <- add_default_aggregate_variables(e)
# plot_climate_data(e, c("Extended Multivariate ENSO Index", "Multivariate ENSO Index", "MEI Aggregate Global"), 1940, lwd = 2)
# plot_climate_data(e, c("GISS Stratospheric Aerosol Optical Depth (550 nm) Global", "OSIRIS Stratospheric Aerosol Optical Depth (550 nm) Global", "SAOD Aggregate Global"), 1985, lwd = 2)
# plot_climate_data(e, c("TSI Reconstructed", "PMOD TSI (new VIRGO)", "TSI Aggregate Global"), 1985, lwd = 2)
## Create temperature series with the influence of some exogenous factors removed.
## Based on Foster & Rahmstorf 2011, dx.doi.org/10.1088/1748-9326/6/4/044022.
#' @export
remove_exogenous_influences <- function(x, series,
start = NULL, end = NULL,
lags = list(`MEI Aggregate Global` = NULL, `SAOD Aggregate Global` = NULL, `TSI Aggregate Global` = NULL),
aggregate_vars_fun = add_default_aggregate_variables,
period = 1, num_harmonics = 4,
max_lag = 12, bs_df = NULL, bs_degree = 3,
center_on_mean = TRUE,
suffix = " (adj.)")
{
if (missing(x))
x <- get_climate_data(download = FALSE, baseline = FALSE)
if (!is.null(aggregate_vars_fun))
x <- aggregate_vars_fun(x)
if (length(lags) == 0)
return (x)
lagsDf <- NULL
for (i in series) {
startYrPart <- min(x$yr_part[na_unwrap(x[[i]])], na.rm = TRUE)
endYrPart <- max(x$yr_part[na_unwrap(x[[i]])], na.rm = TRUE)
if (!is.null(start)) startYrPart <- max(start, startYrPart)
if (!is.null(end)) endYrPart <- min(end, endYrPart)
yr_part_offset <- trunc(mean(x$yr_part[x$yr_part >= startYrPart & x$yr_part <= endYrPart]))
## This guess is crude, but should work okay for the instrumental temperature series.
rangeYrPart <- endYrPart - startYrPart
if (!is.null(bs_df))
bsDf <- bs_df
else {
bsDf <- 6
if (rangeYrPart > 50)
bsDf <- 8
}
## Construct model formula for given no. of harmonics.
flitSeries <- x[[i]]
x[[i]] <- interpNA(x[[i]], type = "tail")
fBase <- backtick(i) %_% "~"; form <- NULL
for (j in seq(num_harmonics))
form <- c(form, paste0(c("sin", "cos"), paste0("(", 2 * j, " * pi / period * yr_part)")))
form <- c(paste0("splines::bs(yr_part - yr_part_offset, df = ", bsDf, ", degree=", bs_degree, ")"), backtick(names(lags)), form)
form <- as.formula(paste0(fBase, paste0(form, collapse = " + ")))
y <- x[, c(i, "yr_part", names(lags))]
x[[i]] <- flitSeries
l <- expand.grid(sapply(lags, function(a) { r <- seq(0, max_lag); if (!is.null(a)) r <- a; r }, simplify = FALSE))
aic <- apply(l, 1,
function(a) {
lr <- as.list(unlist(a))
z <- shift(y, lr, roll = FALSE)
z <- subset(z, z$yr_part >= startYrPart & z$yr_part <= endYrPart)
## Test the lag combinations to find the model with the lowest AIC.
AIC(lm(form, z))
}
)
lagMinAic <- as.list(unlist(l[which.min(aic)[1], , drop = FALSE]))
z <- shift(y, lagMinAic, roll = FALSE)
z <- subset(z, z$yr_part >= startYrPart & z$yr_part <= endYrPart)
yr_part <- z$yr_part
## Interpolate exogenous variables back in time a little for long lags.
for (j in names(lagMinAic))
z[[j]] <- drop(interpNA(z[[j]], type = "tail"))
m <- lm(form, z)
mf <- model.frame(m)
## Check the fit:
# plot(yr_part, mf[[1]], type="l"); lines(yr_part, m$fitted, type = "l", col = "red"); plot(m$residuals)
partialCoefsRe <- "bs\\(yr_part"
partialCoefs <- coef(m)[grep(partialCoefsRe, names(coef(m)))]
partialValuesNames <- grep(partialCoefsRe, names(mf), value = TRUE)
partialValuesList <- sapply(partialValuesNames, function(a) data.matrix(mf[[a]]), simplify = FALSE)
partialValues <- Reduce(cbind, partialValuesList)
partial <- (partialValues %*% partialCoefs)[, , drop = TRUE] + coef(m)["(Intercept)"]
adj <- m$residuals + partial
if (center_on_mean)
adj <- adj - mean(adj)
flit <- dataframe(yr_part = yr_part)
flit[[i %_% suffix]] <- adj
lagsDf <- rbind(lagsDf, dataframe(lagMinAic))
#browser()
x <- merge(x, flit, by = "yr_part", all.x = TRUE)
}
rownames(lagsDf) <- series
cat("Lag values (mos.) of exogenous variables for each series:", fill = TRUE)
print(lagsDf, row.names = TRUE)
cat(fill = TRUE)
x
}
## usage:
# series <- c("GISTEMP Global", "NCEI Global", "HadCRUT4 Global", "RSS TLT 3.3 -70.0/82.5", "UAH TLT 5.6 Global")
# start <- 1979; end <- 2011
# g <- remove_exogenous_influences(series = series, start = start, end = end, max_lag = 12)
# series_all <- as.vector(rbind(series, paste(series, "(adj.)")))
# h <- make_yearly_data(g[, c(climeseries::common_columns, series_all)])
# h <- h[year >= start & year < end]
# ylab <- expression(paste("Temperature Anomaly (", phantom(l) * degree, "C)", sep=""))
# main <- "Adjusted for ENSO, Volcanic, and Solar Influences"
# if (dev.cur() == 1L) # If a graphics device is active, plot there instead of opening a new device.
# dev.new(width = 12.5, height = 7.3) # New default device of 1200 × 700 px at 96 DPI.
# for (i in series) {
# year_range <- paste0(min(h$year), "\u2013", max(h$year))
# plot(h$year, h[[i]], lwd = 2, pch = 19, type = "o", main = paste(i, year_range), xlab = "year", ylab = ylab)
# plot(h$year, h[[i %_% " (adj.)"]], lwd = 2, pch = 19, type = "o", main = paste(i, year_range, main), xlab = "year", ylab = ylab)
# }
#' @export
easy_exogenous_plot <- function(series, start=NULL, end=NULL, bs_df=NULL, tamino_style=FALSE, ...)
{
g <- remove_exogenous_influences(series=series, start=start, end=end, bs_df=bs_df)
series_all <- as.vector(rbind(series, paste(series, "(adj.)")))
if (!tamino_style)
plot_climate_data(g, paste(series, "(adj.)"), start, end, ...)
else {
h <- make_yearly_data(g[, c(common_columns, series_all)])
if (!is.null(start)) h <- h[year >= start]
if (!is.null(end)) h <- h[year < end]
ylab <- expression(paste("Temperature Anomaly (", phantom(l) * degree, "C)", sep=""))
main <- "Adjusted for ENSO, Volcanic, and Solar Influences"
if (dev.cur() == 1L) # If a graphics device is active, plot there instead of opening a new device.
dev.new(width=12.5, height=7.3) # New default device of 1200 × 700 px at 96 DPI.
for (i in series) {
keepRows <- na_unwrap(h[[i %_% " (adj.)"]])
year_range <- paste0(min(h$year[keepRows]), "\u2013", max(h$year[keepRows]))
plot(h$year[keepRows], h[[i]][keepRows], lwd=2, pch=19, type="o", main=paste(i, year_range), xlab="year", ylab=ylab)
plot(h$year[keepRows], h[[i %_% " (adj.)"]][keepRows], lwd=2, pch=19, type="o", main=paste(i, year_range, main), xlab="year", ylab=ylab)
}
return (h) # Return the data set for reuse.
}
return (g) # Return the data set for reuse.
}
## usage:
# series <- c("GISTEMP Global", "NCEI Global", "HadCRUT4 Global", "Cowtan & Way Krig. Global", "BEST Global (Water Ice Temp.)", "JMA Global", "RSS TLT 3.3 -70.0/82.5", "UAH TLT 6.0 Global")
## N.B. Each of these will likely take a minute or two to run.
# g <- easy_exogenous_plot(series[c(1:3, 7:8)], 1979, 2011, ma=12, lwd=2, show_trend=TRUE, baseline=TRUE)
# g <- easy_exogenous_plot(series, ma=12, lwd=2, baseline=TRUE)
## Similar to Foster & Rahmstorf 2011:
# h <- easy_exogenous_plot(series[c(1:3, 7:8)], 1979, 2011, tamino=TRUE)
# ## Make and save some plots:
# setwd(".") # Set to desired storage location.
# png(filename="tamino-style_%03d.png", width=12.5, height=7.3, units="in", res=600)
# h <- easy_exogenous_plot(series, tamino=TRUE)
# dev.off()
## Similar to plots here: https://tamino.wordpress.com/2017/01/18/global-temperature-the-big-3/.
# setwd(".") # Set to desired storage location.
# png(filename="tamino-big-3/tamino-big-3_%03d.png", width=12.5, height=7.3, units="in", res=600)
# h <- easy_exogenous_plot(series, 1950, tamino=TRUE)
# dev.off()
## Convert Fahrenheit temperatures to Kelvin.
#' @export
fahr_to_kelvin <- function(temp)
{
((temp - 32) * (5/9)) + 273.15
}
## Convert Kelvin temperatures to Celsius.
#' @export
kelvin_to_celsius <- function(temp)
{
temp - 273.15
}
## Convert Fahrenheit temperatures to Celsius.
#' @export
fahr_to_celsius <- function(temp)
{
kelvin_to_celsius(fahr_to_kelvin(temp))
}
## Convert Celsius temperatures to Fahrenheit.
#' @export
celsius_to_fahr <- function(temp)
{
temp * (9/5) + 32
}
#' @export
convert_hdf4_to_h5 <- function(
hdf4_path = ".", # Can be a single directory or vector of file paths.
h5_path = NULL, # If a list of file paths, must be same length as no. files given/listed by 'hdf4_path'.
re = "^.*?\\.hdf$", # Case ignored unless overridden in 'list.files...'.
list.files... = list(),
converter_path = "h4toh5convert.exe",
overwrite = FALSE,
verbose = TRUE
)
{
hdf4Path <- hdf4_path
## Is 'hdf4_path' a directory?
if (utils::file_test("-d", hdf4_path[1])) { # Keep only 1st element; 'Vectorize()' if needed.
list.filesArgs <- list(
path = hdf4_path[1],
pattern = re,
full.names = TRUE,
recursive = TRUE,
ignore.case = TRUE
)
list.filesArgs <- utils::modifyList(list.filesArgs, list.files..., keep.null = TRUE)
hdf4Path <- do.call(list.files, list.filesArgs)
}
h5Path <- h5_path
if (!is.null(h5_path) && utils::file_test("-d", h5_path[1])) { # N.B. Directory must already exist.
h5Path <- paste(h5_path[1], basename(tools::file_path_sans_ext(hdf4Path)) %_% ".h5", sep = "/")
}
r <- sapply(seq_along(hdf4Path),
function (i)
{
if (verbose) {
cat(sprintf("Converting file %s to HD5...", basename(hdf4Path[i]))); utils::flush.console()
}
hdf4File <- hdf4Path[i]
if (is.null(h5_path))
h5File <- "" # Convert in same directory w/ ext "h5".
else
h5File <- h5Path[i]
## Conversion software here: https://portal.hdfgroup.org/display/support/Download+h4h5tools
cmd <- trimws(sprintf("\"%s\" \"%s\" \"%s\"", converter_path, hdf4File, h5File))
rv <- NA
if (!file.exists(h5File) || overwrite)
rv <- system(cmd, intern = TRUE)
if (verbose) {
cat(". Done.", fill = TRUE); utils::flush.console()
}
rv
}, simplify = TRUE)
invisible(r)
}
## usage:
## V. https://disc.gsfc.nasa.gov/data-access
## Update AIRS gridded data. From the WSL Bash shell:
# sudo mount -t drvfs v: /mnt/v
# wget --load-cookies ~/.urs_cookies --save-cookies ~/.urs_cookies --auth-no-challenge=on --keep-session-cookies -N -np -r --accept='*.hdf' -P /mnt/v/data/climate/AIRS-Level3 --content-disposition https://acdisc.gesdisc.eosdis.nasa.gov/data/Aqua_AIRS_Level3/AIRS3STM.006/
## Now convert HDF4 files to HD5 in R:
# r <- convert_hdf4_to_h5("V:/data/climate/AIRS-Level3/acdisc.gesdisc.eosdis.nasa.gov/data/Aqua_AIRS_Level3/AIRS3STM.006", "V:/data/climate/AIRS-Level3/h5")
## Or a single file, e.g.
# r <- convert_hdf4_to_h5("V:/data/climate/AIRS-Level3/acdisc.gesdisc.eosdis.nasa.gov/data/Aqua_AIRS_Level3/AIRS3STM.006/2020/AIRS.2020.09.01.L3.RetStd_IR030.v6.0.31.1.G20281103846.hdf", "V:/data/climate/AIRS-Level3/h5/AIRS.2020.09.01.L3.RetStd_IR030.v6.0.31.1.G20281103846.h5")
## Bash shell:
# sudo umount /mnt/v
#####
## New! V7: https://acdisc.gesdisc.eosdis.nasa.gov/data/Aqua_AIRS_Level3/AIRS3STM.7.0/
## Also, AIRS tables! https://data.giss.nasa.gov/gistemp/#tabledata
#' @export
create_airs_monthly_data <- function(
data_path = ".",
files_re = "^.*?\\.h5$", # Case ignored unless overridden in 'list.files...'.
list.files... = list(),
group_name = "/ascending/Data Fields/SurfSkinTemp_A", # Or "/descending/Data Fields/SurfSkinTemp_D"
baseline = 2003:2018,
apply_lat_weights = TRUE,
series = "AIRS Surface Skin Global",
save_rdata = FALSE
)
{
list.filesArgs <- list(
path = data_path,
pattern = files_re,
full.names = TRUE,
recursive = FALSE,
ignore.case = TRUE
)
list.filesArgs <- utils::modifyList(list.filesArgs, list.files..., keep.null = TRUE)
files <- do.call(list.files, list.filesArgs)
l <- sapply(files,
function(i)
{
latitude <- t(rhdf5::h5read(i, "/location/Data Fields/Latitude"))
longitude <- t(rhdf5::h5read(i, "/location/Data Fields/Longitude"))
m <- rhdf5::h5read(i, "/location/Grid Attributes/Month")[1, 1]
y <- rhdf5::h5read(i, "/location/Grid Attributes/Year")[1, 1]
value <- t(rhdf5::h5read(i, group_name))
attr(value, "metadata") <- list(year = y, month = m, lat = latitude[, 1], lon = longitude[1, ])
value
}, simplify = FALSE)
d <- Reduce(rbind, sapply(l, function(x) { m <- attr(x, "metadata"); dataframe(year = m$year, month = m$month) }, simplify = FALSE))
g <- Reduce(function(x, y) abind::abind(x, y, along = 3), l) # lat × lon × month
p <- make_planetary_grid(grid_size = c(1, 1))
i <- get_index_from_element(seq_along(g[, , 1]), g[, , 1])
## Put full time series into each grid cell.
utils::flush.console()
plyr::a_ply(i, 1,
function(x)
{
d$temp <- g[x[1], x[2], ]
is.na(d$temp) <- d$temp == -9999
p[[x[1], x[2]]][[1]] <<- d
}, .progress = "text")
p0 <- rlang::duplicate(p, shallow = FALSE)
## Calculate time-series anomalies for each cell.
flit <- expand.grid(month = 1:12, year = seq(min(d$year), max(d$year)))
utils::flush.console()
plyr::a_ply(i, 1,
function(x)
{
e <- p[[x[1], x[2]]][[1]]
e <- merge(flit, e, by = c("year", "month"), all.x = TRUE)
p[[x[1], x[2]]][[1]] <<- recenter_anomalies(e, baseline)
}, .progress = "text")
## Now weight the means zonally (i.e. by latitude grid).
get_mean_series <- function(p)
{
l <- sapply(seq(NROW(p)),
function(x)
{
y <- p[x, ]
w <- attr(y[[1]], "weight")
l <- sapply(names(y), function(i) { r <- y[[i]][[1]]; names(r)[names(r) == "temp"] <- i; r }, simplify = FALSE) # List of time series for this latitude
## Now merge all the series together.
d <- dplyr::arrange(Reduce(function(i, j) merge(i, j, by = c("year", "month"), all = TRUE), l), year, month)
m <- apply(data.matrix(d[, -(1:2)]), 1, function(i) { r <- NA; if(!all(is.na(i))) r <- mean(i, na.rm = TRUE); r })
attr(m, "weight") <- w
attr(m, "time") <- d[, c("year", "month")]
m
}, simplify = FALSE)
w <- sapply(l, attr, which = "weight")
ll <- Reduce(cbind, l)
m <- apply(ll, 1, function(i) { r <- NA; if(!all(is.na(i))) r <- weighted.mean(i, w, na.rm = TRUE); r })
d <- dplyr::arrange(Reduce(function(i, j) merge(i, j, by = c("year", "month"), all = TRUE), sapply(l, attr, which = "time", simplify = FALSE)), year, month)
d[[series]] <- m
d
}
r <- get_mean_series(p)
## This will be fairly inflexible, but it's mostly for debugging.
if (save_rdata) {
airsSaveDirBase <- "."
if (!is.null(getOption("climeseries_data_dir")))
airsSaveDirBase <- getOption("climeseries_data_dir")
airsSaveDir <- paste(airsSaveDirBase, "AIRS", sep = "/")
if (!dir.exists(airsSaveDir))
dir.create(airsSaveDir, recursive = TRUE)
fileName <- paste(stringr::str_replace_all(group_name, "/", "_"), make_current_timestamp(use_seconds = TRUE), sep = "_") %_% ".RData"
save(list = c("l", "d", "g", "p0", "p", "i", "r"), file = paste(airsSaveDir, fileName, sep = "/"))
}
r
}
## usage:
#create_airs_monthly_data("E:/Users/priscian/my_documents/oversize/data/climate/AIRS-Level3/acdisc.gesdisc.eosdis.nasa.gov/data/Aqua_AIRS_Level3/AIRS3STM.006/2002/hd5")
#create_airs_monthly_data("V:/data/climate/AIRS-Level3/h5")
#' @export
create_combined_airs_series <- function(
data_path = NULL,
ascending = "/ascending/Data Fields/SurfSkinTemp_A",
descending = "/descending/Data Fields/SurfSkinTemp_D",
series = "AIRS Surface Skin Global",
node_weights = 1,
multiplier = 0.5,
...
)
{
if (is.null(data_path)) {
if (!is.null(getOption("climeseries_airs_data_dir")))
data_path <- getOption("climeseries_airs_data_dir")
else
data_path <- "."
}
a <- create_airs_monthly_data(data_path = data_path, group_name = ascending, series = series, ...)
d <- create_airs_monthly_data(data_path = data_path, group_name = descending, series = series, ...)
w <- rep(node_weights, length.out = 2)
ad <- a; ad[[series]] <- (w[1] * a[[series]] + w[2] * d[[series]]) * multiplier
ad$yr_part <- ad$year + (2 * ad$month - 1)/24
dplyr::arrange(ad, year, month)
}
## Linearly interpolate a climate series backwards for approximate baselining.
## I'll use this mostly for AIRS, but it might be otherwise applicable.
#' @export
interpolate_baseline <- function(
series, # A single column in 'x'
x, # A 'climeseries' data set
baseline = NULL
)
{
if (missing(x))
x <- get_climate_data(download = FALSE, baseline = FALSE)
series <- series[1]
xu <- x[na_unwrap(x[[series]]), c(common_columns, series)]
if (!is.null(baseline)) {
if (min(baseline) < min(xu$year)) {
xx <- x[, c(common_columns, series)] %>%
dplyr::filter(year >= min(baseline))
is_na <- is.na(xx[[series]])
m <- stats::lm(substitute(s ~ yr_part, list(s = as.name(series))), data = x)
## Calculate linear prediction back to start of baseline (don't go back further than about 1970).
xx[[series]][is_na] <- stats::predict(m, dataframe(yr_part = xx$yr_part))[is_na]
xxx <- recenter_anomalies(xx, baseline)
is.na(xxx[[series]]) <- is_na
r <- merge(x[, common_columns], xxx[, c("year", "month", series)], all.x = TRUE)
} else {
r <- recenter_anomalies(x[, c(common_columns, series)], baseline)
}
} else {
r <- x[, c(common_columns, series)]
}
r
}
## First download CMIP5 TAZ, where e.g. "/mnt/v/data/climate/CMIP5-taz" is your directory:
## wget --user-agent=Mozilla --no-check-certificate -r -np -nd -l 1 -A nc,NC -H -P /mnt/v/data/climate/CMIP5-taz -erobots=off https://climexp.knmi.nl/CMIP5/monthly/taz/
#' @export
create_cmip5_taz_data <- function(
data_path = ".",
rdata_path = paste(data_path, "cmip5-taz_all-members_lats-all.RData", sep = "/"),
files_re = "^taz_Amon_ens_rcp(26|45|60|85)_.*?\\.nc$", # Case ignored unless overridden in 'list.files...'.
list.files... = list(),
filter_expr = NULL,
verbose = TRUE
)
{
list.filesArgs <- list(
path = data_path,
pattern = files_re,
full.names = TRUE,
recursive = FALSE,
ignore.case = TRUE
)
list.filesArgs <- utils::modifyList(list.filesArgs, list.files..., keep.null = TRUE)
files <- do.call(list.files, list.filesArgs)
cmip5_taz <- sapply(basename(files),
function(i)
{
f <- paste(data_path, i, sep = "/")
if (verbose) {
cat(sprintf("Processing file %s...", i)); utils::flush.console()
}
nc <- RNetCDF::open.nc(f)
#RNetCDF::print.nc(nc)
institute_id <- RNetCDF::att.get.nc(nc, "NC_GLOBAL", "institute_id")
model_id <- RNetCDF::att.get.nc(nc, "NC_GLOBAL", "model_id")
scenario <- RNetCDF::att.get.nc(nc, "NC_GLOBAL", "experiment")
forcing <- RNetCDF::att.get.nc(nc, "NC_GLOBAL", "forcing")
origin <- sub("^\\s*days since\\s*", "", RNetCDF::att.get.nc(nc, "time", "units"), ignore.case = TRUE)
RNetCDF::close.nc(nc)
x0 <- tidync::tidync(f)
if (!is.null(filter_expr))
x0 <- x0 %>% { eval(filter_expr) }
x <- x0 %>% tidync::hyper_array() %>% `[[`(1L, drop = FALSE)
latitude <- x0$transforms$lat %>% dplyr::filter(selected) %>% dplyr::pull(lat)
air_pressure <- x0$transforms$plev %>% dplyr::filter(selected) %>% dplyr::pull(plev)
dates <- x0$transforms$time %>% dplyr::filter(selected) %>% dplyr::pull(time) %>%
as.Date(origin = origin)
dimnames(x) <- list(latitude = latitude, air_pressure = air_pressure, dates = as.character(dates))
attr(x, "latitude") <- latitude
attr(x, "air_pressure") <- air_pressure
attr(x, "dates") <- dates
attr(x, "institute_id") <- institute_id
attr(x, "model_id") <- model_id
attr(x, "scenario") <- scenario
if (verbose) {
cat(". Done.", fill = TRUE); utils::flush.console()
}
x
}, simplify = FALSE)
if (!is.null(rdata_path))
save(list = c("cmip5_taz"), file = rdata_path)
cmip5_taz
}
## usage:
# cmip5_taz <- create_cmip5_taz_data("V:/data/climate/CMIP5-taz")
#
# cmip5_taz <- create_cmip5_taz_data(
# data_path = "V:/data/climate/CMIP5-taz",
# rdata_path = paste("V:/data/climate/CMIP5-taz", "cmip5-taz_all-members_lats-tropics.RData", sep = "/"),
# filter_expr = expression(tidync::hyper_filter(., lat = lat < 24 & lat > -24))
# )
## V. ftp://ftp.remss.com/msu/weighting_functions
#' @export
get_rss_msu_weights <- function(
weights_path,
air_pressure, # Vector of air pressures to base interpolations on
skip = 7
)
{
## These reads are very specific, but seem to work for all the RSS weighting functions:
colNames <- unlist(read.table(weights_path, skip = skip - 2, header = FALSE, nrows = 1, check.names = FALSE, stringsAsFactors = FALSE))
surface_weight <- read.table(weights_path, skip = skip - 4, header = FALSE, nrows = 1, check.names = FALSE, stringsAsFactors = FALSE)$V3
w <- read.table(weights_path, skip = skip, header = FALSE, check.names = FALSE, stringsAsFactors = FALSE)
colnames(w) <- sub("^weight$", "Weight", colNames, ignore.case = TRUE, perl = TRUE)
a <- air_pressure[air_pressure %nin% w$`P(pa)`]
z <- merge(w, dataframe(`P(pa)` = a), by = "P(pa)", all = TRUE) %>%
dplyr::arrange(desc(`P(pa)`))
zz <- z %>% dplyr::select(`P(pa)`, `h(m)`, Weight) %>%
interpNA(method = "linear", unwrap = FALSE) %>% dataframe()
zzz <- zz %>% dplyr::filter(`P(pa)` %in% air_pressure)
attr(zzz, "surface_weight") <- surface_weight
attr(zzz, "original_data") <- w
zzz
}
## usage:
# weights_path <- system.file("inst/extdata/misc/RSS/std_atmosphere_wt_function_chan_tmt_land.txt", package = "climeseries")
# air_pressure <- c(1e+05, 92500, 85000, 70000, 60000, 50000, 40000, 30000, 25000, 20000, 15000, 10000, 7000, 5000, 3000, 2000, 1000)
# w <- get_rss_msu_weights(weights_path, air_pressure)
## V. http://www.realclimate.org/index.php/archives/2017/03/the-true-meaning-of-numbers/
#' @export
create_cmip5_atmosphere_temps <- function(
taz_archive,
channel,
rdata_path = NULL,
weighting_domain = c("_land", "_ocean"), # These will be blanks for TLS & TTS.
column_integrate = FALSE,
...
)
{
if (is.character(taz_archive)) {
load(taz_archive)
taz <- cmip5_taz
cmip5_taz <- NULL
}
else
taz <- taz_archive
data(etopo5, package = "esd")
land_ocean_weights <- list(land = sum(etopo5 >= 0)/length(etopo5), ocean = sum(etopo5 < 0)/length(etopo5))
## Estimate area-weighted mean temperature
area_mean <- function(x, lat_weights) {
d <- dim(lat_weights)
y <- x * c(lat_weights)
dim(y) <- d
z <- colSums(y, na.rm = TRUE) / sum(lat_weights[, 1], na.rm = TRUE)
z
}
total_weight_between_levels <- function(x) # Where 'x' = data frame from 'get_rss_msu_weights()'.
{
## RSS pseudocode:
# total_wt_between_level_minus_one_and_level_one =
# 0.5 * (wt_function(level) + wt_function(level-1)) * (h(level) - h(level-1))
w <- c(attr(x, "surface_weight"), x$Weight)
h <- c(0.0, x$`h(m)`)
0.5 * (x$Weight + head(w, -1)) * diff(h)
}
l <- sapply(names(taz),
function(i)
{
tazi <- taz[[i]]
latitude <- attr(tazi, "latitude")
d <- dim(tazi)
lat_weights <- matrix(rep(cos(pi * latitude/180), d[2]), d[1], d[2])
x <- tazi; dim(x) <- c(d[1] * d[2], d[3])
z <- apply(x, 2, area_mean, lat_weights)
air_pressure <- attr(tazi, "air_pressure")
channel <- tolower(channel)
ocean_msu_weights <- get_rss_msu_weights(system.file(sprintf("inst/extdata/misc/RSS/std_atmosphere_wt_function_chan_%s%s.txt", channel, weighting_domain[1]), package = "climeseries"), air_pressure, ...)
land_msu_weights <- get_rss_msu_weights(system.file(sprintf("inst/extdata/misc/RSS/std_atmosphere_wt_function_chan_%s%s.txt", channel, weighting_domain[2]), package = "climeseries"), air_pressure, ...)
msu_weights <-
land_ocean_weights$ocean * ocean_msu_weights$Weight +
land_ocean_weights$land * land_msu_weights$Weight
if (!column_integrate)
tt <- apply(z, 2, function(x, w) { sum(x * w, na.rm = TRUE) / sum(w, na.rm = TRUE) }, w = msu_weights)
## Also test out vertical integration of weighted temps.
# ocean_msu_weight_surface <- attr(ocean_msu_weights, "original_data") %>% dplyr::slice(1) %>% dplyr::select(`P(pa)`, `h(m)`, Weight)
# land_msu_weight_surface <- attr(land_msu_weights, "original_data") %>% dplyr::slice(1) %>% dplyr::select(`P(pa)`, `h(m)`, Weight)
total_weights_combined <-
land_ocean_weights$ocean * total_weight_between_levels(ocean_msu_weights) +
land_ocean_weights$land * total_weight_between_levels(land_msu_weights)
total_msu_weights <- ocean_msu_weights %>% dplyr::mutate(Weight = total_weights_combined)
layer_heights <- diff(c(0, total_msu_weights$`h(m)`)) # Unnecessary
# tth <- apply(z, 2, function(x, w) { sum(x * w, na.rm = TRUE) / sum(w, na.rm = TRUE) }, w = total_msu_weights$Weight) # ?
if (column_integrate) {
tt <- apply(z, 2,
function(x, w, h)
{
## V. https://en.wikipedia.org/wiki/Weight_function#Weighted_average
integratex(h, x * w)$value / integratex(h, w)$value
}, w = total_msu_weights$Weight, h = total_msu_weights$`h(m)`)
}
dates <- attr(tazi, "dates")
model_id <- attr(tazi, "model_id")
model_name <- tools::file_path_sans_ext(i)
r0 <- dataframe(year = lubridate::year(dates), month = lubridate::month(dates)) %>%
dplyr::mutate(!!model_name := tt)
r <- data.table::data.table(r0)
r <- as.data.frame(r[, lapply(.SD, mean, na.rm = TRUE), by = .(year, month), .SDcols = names(r0)[3:NCOL(r0)]]) # Remove year/month duplicates by averaging.
attr(r, "institute_id") <- attr(tazi, "institute_id")
attr(r, "model_id") <- attr(tazi, "model_id")
attr(r, "scenario") <- attr(tazi, "scenario")
r
}, simplify = FALSE)
r <- range(c(sapply(l, function(x) range(x$year))))
flit <- expand.grid(month = 1:12, year = seq(r[1], r[2], by = 1))
m <- sapply(l,
function(i)
{
merge(flit, i, by = c("year", "month"), all = TRUE)[[3]]
}, simplify = TRUE)
colnames(m) <- sapply(l, function(x) names(x)[3])
y <- flit %>%
dplyr::mutate(yr_part = year + (2 * month - 1)/24, met_year = NA)
m <- cbind(y, m, stringsAsFactors = FALSE)
attr(m, "ensemble") <- "CMIP5"
attr(m, "model_type") <- "taz"
attr(m, "model") <- as.vector(sapply(l, attr, which = "model_id"))
attr(m, "scenario") <- paste("RCP", sprintf("%.1f", as.numeric(sub("^(rcp)(.*)$", "\\2", sapply(l, attr, which = "scenario"), ignore.case = TRUE))))
cmip5 <- m
if (!is.null(rdata_path))
save(list = c("cmip5"), file = rdata_path)
m
}
#' @export
create_osiris_daily_saod_data <- function(
data_path = ".",
rdata_path = ".",
daily_filename = "OSIRIS-Odin_Stratospheric-Aerosol-Optical_550nm.RData",
planetary_grid = NULL,
extract = FALSE
)
{
if (extract) {
fileNames <- list.files(data_path, pattern = "^AEROSOL-L2-LP-OSIRIS_ODIN-SASK_v7_3-", full.names = TRUE)
fileDates <- tools::file_path_sans_ext(basename(fileNames)) %>% stringr::str_extract("\\d{6}$")
x <- sapply(fileNames,
function(i)
{
cat(" Processing file", basename(i), fill = TRUE); flush.console()
nc0 <- RNetCDF::open.nc(i)
origin <- sub("^\\s*days since\\s*", "", RNetCDF::att.get.nc(nc0, "time", "units"), ignore.case = TRUE)
dates <- RNetCDF::var.get.nc(nc0, "time") %>% as.Date(origin = origin)
RNetCDF::close.nc(nc0)
datesC <- dates %>% as.character
nc <- tidync::tidync(i)
varNames <- c("extinction", "altitude", "latitude", "longitude")
x <- sapply(varNames,
function(a)
{
substitute(tidync::hyper_tbl_cube(nc %>% tidync::activate(A))$mets[[B]], list(A = as.name(a), B = a)) %>% eval
}, simplify = FALSE)
xx <- by(seq_along(datesC), datesC,
function(a)
{
list(
extinction = x$extinction[, a, drop = FALSE] %>% unclass %>% { `[<-`(., is.nan(.), NA) },
alt = x$altitude,
lat = x$latitude[a],
long = x$longitude[a]
)
}, simplify = FALSE) %>% unclass
xx
}, simplify = FALSE)
names(x) <- fileDates
save(x, file = paste(rdata_path, "AEROSOL-L2-LP-OSIRIS_ODIN-SASK_v7_3.RData", sep = "/"))
}
else
load(paste(rdata_path, "AEROSOL-L2-LP-OSIRIS_ODIN-SASK_v7_3.RData", sep = "/"))
### Process the extinction data to calculate monthly SAOD.
saodDaily <- NULL
cat(fill = TRUE)
for (i in names(x)) {
re <- "(\\d{4})(\\d{2})"
yearMatches <- stringr::str_match(i, re)
yearValue <- as.numeric(yearMatches[, 2L])
monthValue <- as.numeric(yearMatches[, 3L])
saodDailyTemplate <-
data.frame(year = yearValue, month = monthValue, day = NA, saod = NA, check.names = FALSE, stringsAsFactors = FALSE)
for (j in seq_along(x[[i]])) {
#dayValue <- as.numeric(stringr::str_match(names(x[[i]])[j], ".*?_\\d{4}m\\d{2}(\\d{2})\\..*$")[, 2])
#cat(" Processing object", paste(i, tools::file_path_sans_ext(names(x[[i]])[j]), sep = "/"), fill = TRUE); flush.console()
dayValue <- as.numeric(stringr::str_match(names(x[[i]])[j], "\\d{4}-\\d{2}-(\\d{2})")[, 2])
cat(" Processing object", names(x[[i]])[j], fill = TRUE); flush.console()
extinction <- x[[i]][[j]]$extinction
## Missing values are given as -9999.
#is.na(extinction) <- extinction == -9999
alt <- x[[i]][[j]]$alt
extinction <- data.frame(extinction, check.names = FALSE, stringsAsFactors = FALSE)
rownames(extinction) <- alt
lat <- x[[i]][[j]]$lat
long <- x[[i]][[j]]$long
coords <- mapply(function(x, y) c(lat = x, long = y), lat, long, SIMPLIFY = FALSE)
## Get stratospheric subset of extinction values from 15–35 km. (After Sato et al. 1993 and Rieger et al. 2015;
## but v. Ridley et al. 2014 for including some aerosol effects below 15 km.)
keepRows <- alt >= 15 & alt <= 35
e <- subset(extinction, keepRows)
for (k in seq_along(coords)) {
attr(e[[k]], "alt") <- alt[keepRows]
attr(e[[k]], "coords") <- coords[[k]]
}
gridSaod <- sapply(e,
function(y)
{
r <- NA
## Calculate vertical column integral of aerosol extinction.
if (!all(is.na(y))) {
r <- integratex(attr(y, "alt"), y)$value
## Boucher - Atmospheric Aerosols--Properties and Climate Impacts (2015), p. 44 (Eq. 3.31):
## τ = τ_r × (λ / λ_r)^-α; λ = 550 nm, λ_r = 750 nm, τ_r is OSIRIS value, α = 2.3 (v. Rieger et al. 2015)
## = τ_r × 2.04, where τ_r is aerosol extinction integrated from 15–35 km
r <- r * 2.04
}
attr(r, "coords") <- attr(y, "coords")
r
}, simplify = FALSE
)
## Create global grid of 5° × 5° squares and bin each SAOD value in the correct square.
if (is.null(planetary_grid))
g <- make_planetary_grid()
else
g <- planetary_grid
dev_null <- sapply(gridSaod,
function(y)
{
coords <- attr(y, "coords")
lat <- coords["lat"]; long <- coords["long"]
rc <- find_planetary_grid_square(g, lat, long)
if (any(is.na(rc))) return ()
sq <- g[[rc["row"], rc["col"]]][[1]]
if (all(is.na(sq)))
g[[rc["row"], rc["col"]]][[1]] <<- as.vector(y)
else
g[[rc["row"], rc["col"]]][[1]] <<- c(sq, as.vector(y))
}
)
## From the global grid, create a data frame of mean values for every bin and their corresponding latitude weights.
d <- sapply(g,
function(y)
{
r <- c(value = NA, weight = attr(y, "weight"))
if (all(is.na(y[[1]]))) return (r)
r["value"] <- mean(y[[1]], na.rm = TRUE)
r
}, simplify = FALSE
)
d <- data.matrix(Reduce(rbind, d))
saodToday <- stats::weighted.mean(d[, "value"], d[, "weight"], na.rm = TRUE)
is.na(saodToday) <- is.nan(saodToday)
saodTodayDf <- saodDailyTemplate
saodTodayDf$day <- dayValue
saodTodayDf$saod <- saodToday
saodDaily <- rbind(saodDaily, saodTodayDf, make.row.names = FALSE, stringsAsFactors = FALSE)
}
}
saod_daily <- saodDaily
save(saod_daily, file = paste(rdata_path, daily_filename, sep = "/"))
}
#' @export
create_osiris_saod_data <- function(
path = NULL,
filename = "OSIRIS-Odin_Stratospheric-Aerosol-Optical_550nm.RData",
series_name = "OSIRIS Stratospheric Aerosol Optical Depth (550 nm) Global",
create_daily = FALSE,
...
)
{
if (is.null(path)) {
if (!is.null(getOption("climeseries_saod_data_dir")))
path <- getOption("climeseries_saod_data_dir")
else
path <- "."
}
if (create_daily)
create_osiris_daily_saod_data(rdata_path = path, daily_filename = filename, ...)
load(paste(path, filename, sep = "/"), envir = environment())
r <- plyr::arrange(Reduce(rbind,
by(saod_daily, list(saod_daily$year, saod_daily$month),
function(x) data.frame(year = x$year[1], month = x$month[1], flit = mean(x$saod, na.rm = TRUE),
check.names = FALSE, stringsAsFactors = FALSE),
simplify = FALSE)), year, month)
r$yr_part <- r$year + (2 * r$month - 1)/24
names(r)[names(r) %in% "flit"] <- series_name
r
}
## usage:
# inst <- get_climate_data(download = FALSE, baseline = FALSE)
# allSeries <- list(
# inst,
# create_osiris_saod_data()
# )
# d <- Reduce(merge_fun_factory(all = TRUE, by = c(Reduce(intersect, c(list(climeseries::common_columns), lapply(allSeries, names))))), allSeries)
# series <- c("GISS Stratospheric Aerosol Optical Depth (550 nm) Global", "OSIRIS Stratospheric Aerosol Optical Depth (550 nm) Global")
# plot_climate_data(d, series, start = 1985, ylab = "SAOD", main = "Global Mean Stratospheric Aerosol Optical Depth")
## Save only OSIRIS data as CSV file.
# keepRows <- na_unwrap(d$`OSIRIS Stratospheric Aerosol Optical Depth (550 nm) Global`)
# write.csv(d[keepRows, c("year", "month", "yr_part", "OSIRIS Stratospheric Aerosol Optical Depth (550 nm) Global")], "./OSIRIS-SAOD_2001.11-2016.7.csv", row.names = FALSE)
#' @export
make_yearly_data <- function(x, na_rm = TRUE, unwrap = TRUE, baseline = FALSE, incomplete_years_to_na = FALSE)
{
if (missing(x))
x <- get_climate_data(download = FALSE)
if (incomplete_years_to_na) {
series <- get_climate_series_names(x, conf_int = TRUE)
yearTab <- table(x[, "year"])
incompleteYears <- as.numeric(names(yearTab)[yearTab != 12])
## For incomplete years, make all elements NA.
dev_null <- sapply(series, function(a) { is.na(x[, a]) <<- x[, "year"] %in% incompleteYears; nop() }); rm(dev_null)
}
## This doesn't account for the "_uncertainty" columns, though, whose squares should be averaged then 'sqrt()'ed.
#r0 <- data.table::data.table(x)[, lapply(.SD, function(a) { r <- NA_real_; if (!all(is.na(a))) r <- mean(a, na.rm=na_rm); r }), .SDcols = -common_columns[common_columns %nin% "year"], by = year]
## Use a better mean estimate for the "_uncertainty" columns.
## V. stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation/26647#26647
cnames <- get_climate_series_names(x, conf_int = TRUE)
l <- list(cnames[stringr::str_ends(cnames, "_uncertainty", negate = TRUE)], cnames[stringr::str_ends(cnames, "_uncertainty", negate = FALSE)])
r <- list(.vars = tibble::lst(!!l[[1]], !!l[[2]]),
.funs = tibble::lst(
function(a) { r <- NA_real_; if (!all(is.na(a))) r <- mean(a, na.rm = na_rm); r },
function(a) { r <- NA_real_; if (!all(is.na(a))) r <- sqrt(mean(a^2, na.rm = na_rm)); r }
)) %>%
## For applying multiple functions to different columns in 'summarize_at()', see:
## https://stackoverflow.com/questions/41109403/r-dplyr-summarise-multiple-functions-to-selected-variables/53981812#53981812
purrr::pmap(~ x %>% as.data.frame %>% dplyr::group_by(year) %>% dplyr::summarize_at(.x, .y)) %>%
purrr::reduce(dplyr::inner_join, by = "year")
if (unwrap)
r <- r[na_unwrap(r), ]
r <- recenter_anomalies(as.data.frame(r), baseline = baseline, by_month = FALSE)
r
}
## usage:
## Reproduce a plot here: https://tamino.wordpress.com/2017/01/01/tony-hellers-snow-job/.
# g <- make_yearly_data(na_rm = FALSE) # Allow NA values for 'mean()'; possibly better for very seasonally sensitive series.
# series <- "Rutgers NH Snow Cover"
# h <- eval(substitute(g[na_unwrap(SERIES)][year >= min(year) & !is.na(SERIES)], list(SERIES = as.name(series))))
# plot(h$year, h[[series]]/1e6, lwd = 2, pch = 19, type = "o")
#' @export
show_warmest_years <- function(
x,
series,
num_top_years = 10,
start_year = NULL, end_year = current_year - 1,
baseline = FALSE,
simplify = TRUE # TRUE to include the actual anomaly values
)
{
if (missing(x))
x <- get_climate_data(download = FALSE, baseline = baseline)
if (is.null(start_year)) start_year <- min(x$year, na.rm = TRUE)
xx <- x %>% dplyr::filter(year >= start_year & year <= end_year)
y <- make_yearly_data(oss(x, series))
l <- sapply(y[, -1, drop = FALSE],
function(x)
{
r <- dplyr::arrange(dataframe(year = y$year, temp = x), desc(temp))[seq(num_top_years), ]
if (simplify) r$year else r
}, simplify = simplify)
l
}
## usage:
# series <- c("GISTEMP v4 Global", "NCEI Global", "HadCRUT4 Global", "Cowtan & Way Krig. Global", "BEST Global (Air Ice Temp.)", "JMA Global", "RSS TLT 4.0 -70.0/82.5", "UAH TLT 6.0 Global", "JRA-55 Surface Air Global", "ERA5 Surface Air Global", "NCEP/NCAR R1 Surface Air Global")
# show_warmest_years(series = series)
#' @export
get_yearly_difference <- function(
series,
start, end = current_year - 1,
data,
digits = 3,
unit = "\u00b0C",
loess = FALSE,
plot_baseline = TRUE,
save_png = FALSE,
...
)
{
if (missing(data))
data <- get_climate_data(download = FALSE, baseline = FALSE)
# if (loess) data <- add_loess_variables(data, series, ...) # Ends up being a poor fit for yearly data
g <- make_yearly_data(data)
if (loess)
g <- add_loess_variables(g, series)
h <- g[c(which(g$year == start), which(g$year == end)), series %_% ifelse(loess, " (LOESS fit)", ""), drop = FALSE] %>%
`rownames<-`(c(start, end))
plot_climate_data(g, series %>% unique, start, end, yearly = FALSE, baseline = plot_baseline, lwd = 2, conf_int = FALSE,
make_standardized_plot_filename... = list(suffix = ""), loess = loess, save_png = save_png, ...)
## N.B. Use e.g. stringi::stri_escape_unicode("°") to get Unicode value(s) easily.
cat("Difference in ", unit ," from ", start, "\u2013", end, sep = "", fill = TRUE)
print(t(h[2, ] - h[1, ]) %>% `colnames<-`("diff"), digits = digits, row.names = FALSE)
cat(fill = TRUE)
cat("Decadal rate in ", unit ,"/dec. from ", start, "\u2013", end, sep = "", fill = TRUE)
print((10 * t(h[2, ] - h[1, ]) / (end - start)) %>% `colnames<-`("rate"), digits = digits, row.names = FALSE)
attr(h, "range") <- c(start = start, end = end)
#browser()
return (h)
}
## usage:
# series <- c("GISTEMP v4 Global", "NCEI Global", "HadCRUT5 Global", "BEST Global (Air Ice Temp.)", "JMA Global")
# ytd <- get_yearly_difference(series, 1880)
# ytd <- get_yearly_difference(series, 1880, loess = TRUE)
# ytd <- get_yearly_difference(series, 1880, loess = TRUE, loess... = list(span = 0.4))
# ytd <- get_yearly_difference(series, 1970)
## Make "cranberry plots" à la http://variable-variability.blogspot.com/2017/01/cherry-picking-short-term-trends.html.
#' @export
make_vv_cranberry_plot <- function(x, series, start, end, ylab, span=NULL)
{
if (missing(x)) g <- make_yearly_data()
else g <- make_yearly_data(x)
if (!missing(start)) g <- g[year >= start]
if (!missing(end)) g <- g[year <= end]
if (missing(ylab))
ylab <- expression(paste("Temperature Anomaly (", phantom(l) * degree, "C)", sep=""))
if (dev.cur() == 1L) # If a graphics device is active, plot there instead of opening a new device.
dev.new(width=9, height=8) # New default device.
for (i in series) {
h <- g[, c("year", i), with=FALSE]
h <- h[na_unwrap(h[[i]])]
layout(matrix(c(1, 2)))
## Plot the time series.
plot(h$year, h[[i]],
lwd = 2, type = "l", col = "gray",
main = series, xlab = "year", ylab = ylab,
panel.first=(function(){ grid(); abline(h=0.0) })(),
panel.last=points(g$year, g[[i]], pch=19, col="red"))
#l <- loess(h[[i]] ~ h$year, span=span)
l <- LOESS(h[[i]] ~ h$year, span=span)
lines(l$x, l$fitted, lwd=3, col="blue", type="l")
## Plot the LOESS residuals.
plot(l$x, l$residuals, lwd=2,
type="l", col="gray",
main = "Standard Deviation = " %_% sprintf(sd(l$residuals, na.rm=TRUE), fmt="%.3f"), xlab = "year", ylab = ylab,
panel.last=points(l$x, l$residuals, pch=19, col="red"))
}
return (nop())
}
## usage:
# series <- c("BEST Global (Water Ice Temp.)", "UAH TLT 6.0 Global")
# g <- remove_exogenous_influences(series=series)
# make_vv_cranberry_plot(g, series[1], start=1880, span=0.3)
# make_vv_cranberry_plot(g, series[1] %_% " (adj.)", start=1880, span=0.3)
# make_vv_cranberry_plot(g, series[2], span=0.9)
# make_vv_cranberry_plot(g, series[2] %_% " (adj.)", span=0.9)
## Basically a "show hottest year" function, but slightly configurable.
#' @export
show_single_value <- function
(
series,
baseline = TRUE,
data,
fun = which.max,
value_name = "temp anom. (\u00b0C)",
format = "%.3f",
this_year = current_year,
...
)
{
if (missing(data))
data <- get_climate_data(download = FALSE, baseline = baseline)
## N.B. Data must have complete year-month pairs for this to be accurate!
## This doesn't work correctly, so check:
complete <- data %>% dplyr::select(!!series) %>%
dplyr::group_by(data$year) %>%
dplyr::group_map(
function(x, y)
{
x %>% dplyr::mutate_all(function(m) !is.na(m)) %>%
dplyr::summarize_all(all) %>%
dplyr::bind_cols(y, .)
}) %>%
purrr::reduce(dplyr::bind_rows) %>%
dplyr::rename(year = 1)
baseline <- attr(data, "baseline")
g <- make_yearly_data(data)[, c("year", series)]
single <- sapply(series,
function(a)
{
m <- fun(g[[a]], ...)
r <- data.frame(year = g$year[m], check.names = FALSE)
r[[value_name]] <- g[[a]][m]
r[["complete?"]] <- c("no", "yes")[complete[[a]][m] + 1]
r
}, simplify = FALSE) %>%
purrr::reduce(dplyr::bind_rows)
rownames(single) <- series
single[["last complete"]] <- sapply(complete[, -1], function(a) { complete$year[a %>% which %>% max] })
this_year_rank <- sapply(g[, -1],
function(a) {
o <- order(a, decreasing = TRUE)
rank_map <- structure(seq(NROW(g)) %>% `is.na<-`(is.na(a[o])), .Names = g$year[o])
rank_map[this_year %>% as.character] %>% as.vector
})
single[[paste(this_year, "rank")]] <- this_year_rank
print(single %>%
tibble::rownames_to_column() %>%
dplyr::mutate(!!value_name := sprintf(format, .[[value_name]])) %>%
tibble::column_to_rownames()
)
if (!is.null(baseline))
cat("\nBaseline: ", min(baseline), "\u2014", max(baseline), fill = TRUE, sep = "")
attr(single, "baseline") <- baseline
single
}
## usage:
# series <- c("GISTEMP Global", "NCEI Global", "HadCRUT4 Global", "Cowtan & Way Krig. Global", "BEST Global (Water Ice Temp.)", "JMA Global", "RSS TLT 3.3 -70.0/82.5", "UAH TLT 6.0 Global")
# hottest <- show_single_value(series)
## Crudely based on Cowtan et al. 2015, dx.doi.org/10.1002/2015GL064888.
#' @export
create_cmip5_tas_tos_data <- function(baseline=defaultBaseline, save_to_package=FALSE)
{
data_dir <- system.file("extdata", package="climeseries")
ensemble <- "cmip5"
subdir <- "tas + tos"
path <- paste(data_dir, ensemble, subdir, sep="/")
ff <- list.files(path, "^.*?\\.dat$", recursive=TRUE, ignore.case=TRUE, full.names=TRUE)
modelSummary <- sapply(ff,
function(a) {
modelVariable <- stringr::str_match(a, "\\S(tas|tas land|tos)\\S")[2L]
pathway <- stringr::str_match(a, "rcp(\\d{2})")[2L]
modelType <- stringr::str_match(a, "\\S(all models|all members|one member per model)\\S")[2L]
l <- readLines(a)
re <- "^#.*?from\\s+(.*?),?\\s+(RCP|experiment|model).*$"
modelLine <- grep(re, l, value=TRUE)
model <- stringr::str_match(modelLine, re)[2L]
r <- data.frame(model=model, type=tolower(modelType), variable=tolower(modelVariable), RCP=as.numeric(pathway)/10, path=a,
check.rows=FALSE, check.names=FALSE, fix.empty.names=FALSE, stringsAsFactors=FALSE)
r
}, simplify = FALSE
)
modelSummary <- Reduce(rbind, modelSummary)
modelSummary <- subset(modelSummary, modelSummary$variable %in% c("tas land", "tos")) # Not necessary here, but for genericness.
#xtabs(~ model + variable + type + RCP, modelSummary)
l <- dlply(modelSummary, ~ model + type + RCP,
function(a)
{
if (is.null(a) || length(unique(a$variable)) < 2)
return (NULL)
## Earth is approx. 72% water, 28% land.
weightMap <- c(`tas land`=0.28, tos=0.72)
w <- weightMap[a$variable]
w <- w / table(a$variable)[names(w)]
## Now read in the files to be averaged together. Returns a list of the separate time series.
weightedValues <- mapply(a$model, w, a$path, seq(nrow(a)),
FUN = function (model, weight, path, n)
{
tab <- read.table(path)
flit <- melt(tab[, 1L:13L], id.vars="V1", variable.name="month", value.name="temp")
for (i in names(flit)) flit[[i]] <- as.numeric(flit[[i]])
flit <- arrange(flit, V1, month)
x <- data.frame(year=flit$V1, met_year=NA, yr_part=flit$V1 + (2 * flit$month - 1)/24, month=flit$month, temp=flit$temp, check.names=FALSE, stringsAsFactors=FALSE)
modelDesignation <- "m" %_% sprintf("%04d", n)
x[[modelDesignation]] <- x$temp
if (!is.null(baseline)) {
flit <- subset(x, x$year %in% baseline)
bma <- tapply(flit$temp, flit$month, mean, na.rm=TRUE)
x$base <- NA
l_ply(names(bma), function(s) { v <- bma[s]; if (is.nan(v)) v <- 0.0; x$base[x$month == s] <<- v })
## Center anomalies on average baseline-period temperatures.
x[[modelDesignation]] <- round(x$temp - x$base, 3L)
}
x <- x[, c(common_columns, modelDesignation)]
## Weight the series.
x[[modelDesignation]] <- x[[modelDesignation]] * weight
attr(x, "weight") <- weight
attr(x, "model") <- model
x
}, SIMPLIFY = FALSE
)
m <- Reduce(merge_fun_factory(all=TRUE, by=common_columns), weightedValues)
modelDesignation <- paste0(unique(a$model), "_", unique(a$RCP))
m[[modelDesignation]] <- rowSums(m[, colnames(m) %nin% common_columns])
m[, c(names(m)[names(m) %in% common_columns], modelDesignation)]
}
)
keepElements <- !sapply(l, is.null)
modelDetails <- subset(attr(l,"split_labels"), keepElements)
m <- l[keepElements]
m <- Reduce(merge_fun_factory(all=TRUE, by=common_columns), m)
m <- recenter_anomalies(m, baseline=baseline) # Is this necessary?
## Make similar to previously made model objects.
colNames <- colnames(m)
colnames(m)[colNames %nin% common_columns] <- "m" %_% sprintf("%04d", seq(sum(colNames %nin% common_columns)))
attr(m, "ensemble") <- "CMIP5"
attr(m, "model_type") <- subdir
attr(m, "model") <- modelDetails$model
scenario <- "RCP " %_% sprintf(modelDetails$RCP, fmt="%.1f")
names(scenario) <- colnames(m)[colNames %nin% common_columns]
attr(m, "scenario") <- factor(scenario)
cmip5 <- m
if (save_to_package) {
save(cmip5, file=paste(path, "cmip5.RData", sep="/"))
save(cmip5, file=paste(path, "cmip5_raw.RData", sep="/")) # Not really "raw," but oh well.
}
## To create the package data set:
# m <- create_cmip5_tas_tos_data(save_to_package=TRUE)
cmip5
}
## usage:
# inst <- get_climate_data(download=FALSE, baseline=TRUE)
# cmip5 <- get_models_data(ensemble="cmip5", subdir="tas + tos")
# series <- c("HadCRUT4 Global")
## Like Fig. 4(b) of Cowtan et al. 2015:
# plot_models_and_climate_data(inst, cmip5, series=series, scenario="RCP 8.5", start=1880, end=2020, ma=12, ma_i=12, baseline=1961:1990, center_fun="mean", smooth_envelope=FALSE, envelope_type="range", envelope_text="range", ylim=c(-1.0, 1.5), conf_int_i=FALSE, col_i_fun="topo.colors", col_i_fun...=list())
#' @export
create_loess_variables <- function(inst, series, loess... = list(), unwrap = TRUE, keep_interpolated = FALSE, ...)
{
yearVar <- ifelse(is.null(inst$month), "year", "yr_part")
baselineAttribute <- attr(inst, "baseline")
d <- inst[, c(names(inst)[names(inst) %in% common_columns], series)]
if (unwrap)
d <- subset(d, na_unwrap(d[, series]))
for (i in series) {
d[[i %_% " (interpolated)"]] <- drop(interpNA(d[, i], "fmm"))
loessArgs = list(
formula = eval(substitute(s ~ yr_part, list(s = as.name(i %_% " (interpolated)"), yr_part = as.name(yearVar)))),
data = d,
span = NULL # Perform best fit to data (was 'span = 0.2')
)
loessArgs <- utils::modifyList(loessArgs, loess..., keep.null = TRUE)
#l <- do.call("loess", loessArgs) # Removes NAs, so attend to it.
l <- do.call(LOESS, loessArgs) # Removes NAs, so attend to it.
lContext <- d[[i %_% " (interpolated)"]]
lContext[!is.na(lContext)] <- l$fit
d[[i %_% " (LOESS fit)"]] <- lContext
if (!keep_interpolated)
d[[i %_% " (interpolated)"]] <- NULL
}
attr(d, "baseline") <- baselineAttribute
d
}
#' @export
add_loess_variables <- function(inst, series, ...)
{
d <- create_loess_variables(inst, series, ...)
baselineAttribute <- attr(inst, "baseline")
r <- base::merge(inst, d[, setdiff(names(d), series)], by = names(d)[names(d) %in% common_columns], all.x = TRUE)
attr(r, "baseline") <- baselineAttribute
r
}
## usage:
# series <- c("GISTEMP Zonal 64N-90N", "GISTEMP Zonal 44N-64N", "GISTEMP Zonal 24N-44N", "GISTEMP Zonal EQU-24N", "GISTEMP Zonal 24S-EQU", "GISTEMP Zonal 44S-24S", "GISTEMP Zonal 64S-44S", "GISTEMP Zonal 90S-64S")
# d <- get_climate_data(download=FALSE, baseline=TRUE)
# g <- add_loess_variables(d, series, loess...=list(span=0.4))
# plot_climate_data(g, series %_% " (LOESS fit)")
## Fit segmented linear models to selected climate data.
#' @export
fit_segmented_model <- function(
x,
series,
col = suppressWarnings(RColorBrewer::brewer.pal(length(series),"Paired")),
start = NULL, end = NULL,
yearly = TRUE,
breakpoints... = list(),
segmented... = list(), seg.control... = list(seed = 100),
make_yearly_data... = list(),
...
)
{
r <- list(data = x, series = series)
r$range <- list(start = start, end = end)
r$col <- col
length(r$col) <- length(r$series); names(r$col) <- r$series
if (!yearly) {
g <- r$data
}
else {
make_yearly_dataArgs <- list(
x = r$data
)
make_yearly_dataArgs <- utils::modifyList(make_yearly_dataArgs, make_yearly_data..., keep.null = TRUE)
g <- as.data.frame(do.call("make_yearly_data", make_yearly_dataArgs))
if (!is.null(start)) start <- trunc(start)
if (!is.null(end)) end <- trunc(end)
}
yearVar <- ifelse(yearly, "year", "yr_part")
r$piecewise <- list()
for (i in r$series) {
r$piecewise[[i]] <- list()
r$piecewise[[i]]$col <- r$piecewise$col[i]
#h <- oss(g, i)[na_unwrap(g[[i]]), , drop = FALSE]
h <- oss(g, i)[na_unwrap(g[, i]), , drop = FALSE] %>% as.data.frame
h <- h[h[[yearVar]] >= ifelse(!is.null(start), start, -Inf) & h[[yearVar]] <= ifelse(!is.null(end), end, Inf), ]
breakpointsArgs <- list(
formula = eval(substitute(Y ~ X, list(X = as.name(yearVar), Y = as.name(i)))),
data = h,
breaks = NULL
)
breakpointsArgs <- utils::modifyList(breakpointsArgs, breakpoints..., keep.null = TRUE)
r$piecewise[[i]]$bp <- do.call(strucchange::breakpoints, breakpointsArgs)
r$piecewise[[i]]$breaks <- r$piecewise[[i]]$bp$X[, yearVar][r$piecewise[[i]]$bp$breakpoint]
seg.controlArgs <- list(
#stop.if.error = TRUE,
fix.npsi = TRUE,
K = length(r$piecewise[[i]]$breaks),
n.boot = 250,
random = FALSE,
h = 0.3
)
seg.controlArgs <- utils::modifyList(seg.controlArgs, seg.control..., keep.null = TRUE)
segControl <- do.call(segmented::seg.control, seg.controlArgs)
r$piecewise[[i]]$lm <- lm(breakpointsArgs$formula, data = h, x = TRUE, y = TRUE)
segmentedArgs <- list(
obj = r$piecewise[[i]]$lm,
seg.Z = as.formula(paste("~", yearVar)),
psi = r$piecewise[[i]]$breaks,
control = segControl
)
segmentedArgs <- utils::modifyList(segmentedArgs, segmented..., keep.null = TRUE)
#r$piecewise[[i]]$sm <- do.call("segmented", segmentedArgs)
run_segmented <- function()
{
mf <- model.frame(r$piecewise[[i]]$lm)
while (TRUE) {
withRestarts({
sm <- do.call(segmented::segmented, segmentedArgs)
break
},
restart = function() {
## Which breakpoint is closest to the start or end of the time series?
if (length(segmentedArgs$psi) > 1L)
segmentedArgs$psi <<- segmentedArgs$psi[-which.min(pmin(segmentedArgs$psi, NROW(mf) - segmentedArgs$psi + 1))]
})
}
sm
}
tryCatch({
withCallingHandlers({
sm <- run_segmented()
},
error = function(e) {
message("Error: ", e$message)
if (any(grepl("one coef is NA: breakpoint(s) at the boundary", e$message, fixed = TRUE)))
invokeRestart("restart")
}
)
r$piecewise[[i]]$sm <- sm
}, error = function(e) { message("Warning: No breakpoint(s) found") })
}
r
}
#' @export
nearest_year_month_from_numeric <- function(yr_part, x, nearest_type = c("nearest", "above", "below"), as_data_frame = FALSE)
{
nearest_type <- match.arg(nearest_type)
if (missing(yr_part)) {
flit <- rev(expand.grid(month = 1:12, year = trunc(x), by = 1))
flit$yr_part <- flit$year + (2 * flit$month - 1)/24
}
else {
r <- range(yr_part)
x <- x[1]
flit <- rev(expand.grid(month = 1:12, year = seq(floor(r[1]), floor(r[2]), by = 1)))
flit$yr_part <- flit$year + (2 * flit$month - 1)/24
## Allow fuzzy equality of the start- & endpoints (sometimes necessary).
isEqualStart <- is_equal(flit$yr_part, r[1])
isEqualEnd <- is_equal(flit$yr_part, r[2])
flit <- flit[(isEqualStart | flit$yr_part > r[1]) & (flit$yr_part < r[2] | isEqualEnd), ]
}
isEqual <- is_equal(flit$yr_part, x)
egrid <- switch(nearest_type,
`above` = flit[isEqual | flit$yr_part > x, ],
`below` = flit[flit$yr_part < x | isEqual, ],
flit
)
r <- egrid[nearest(egrid$yr_part, x), c("year", "month")]
if (!as_data_frame)
r <- unlist(r)
r
}
#' @export
create_timeseries_from_gridded <- function(
x,
sub_lat = c(-90, 90), sub_long = c(-180, 180),
data_dir = getOption("climeseries_data_dir"),
series_suffix = NULL
)
{
if (missing(x))
x <- get_climate_data(download = FALSE, baseline = FALSE)
if (is.null(data_dir)) data_dir <- getwd()
## To be continued!
}
#' @export
create_zonal_data <- function(
x, # series from call to 'get_climate_data()'
sub_lat = c(-90, 90), sub_long = c(-180, 180),
what = c("hadcrut", "hadsst", "crutem", "cw", "be"),
data_dir = getOption("climeseries_data_dir"),
metadata = list( # Names should be same as 'what' options
## HadCRUT4 url: https://crudata.uea.ac.uk/cru/data/temperature/HadCRUT.4.6.0.0.median.nc
hadcrut = list(
url = "https://crudata.uea.ac.uk/cru/data/temperature/HadCRUT.5.0.1.0.analysis.anomalies.ensemble_mean.nc",
tempvar = "tas_mean",
series = "HadCRUT5"
),
hadsst = list(
url = "https://www.metoffice.gov.uk/hadobs/hadsst4/data/netcdf/HadSST.4.0.1.0_median.nc",
tempvar = "tos",
series = "HadSST4"
),
crutem = list(
url = "https://crudata.uea.ac.uk/cru/data/temperature/CRUTEM.5.0.1.0.anomalies.nc",
tempvar = "tas",
series = "CRUTEM5"
),
cw = list(
url = "http://www-users.york.ac.uk/~kdc3/papers/coverage2013/had4_krig_v2_0_0.nc.gz",
tempvar = "temperature_anomaly",
series = "Cowtan & Way Krig. Land+SST"
),
be = list(
url = "https://berkeley-earth-temperature.s3.us-west-1.amazonaws.com/Global/Gridded/Land_and_Ocean_LatLong1.nc",
tempvar = "temperature",
series = "BE Land+SST (Air Ice Temp.)"
)
),
series_suffix = NULL,
use_local = FALSE
)
{
what <- match.arg(what)
if (missing(x))
x <- get_climate_data(download = FALSE, baseline = FALSE)
mergeWithOtherData <- TRUE
if (is.null(x))
mergeWithOtherData <- FALSE
if (is.null(data_dir)) data_dir <- getwd()
gurl <- metadata[[what]]$url
tempVar <- metadata[[what]]$tempvar
series <- metadata[[what]]$series
filePathTemplate <- sprintf("%s/%%s", data_dir)
filePath <- sprintf(filePathTemplate, basename(gurl))
filePath <- switch(what,
cw = {
if (!use_local || !file.exists(filePath))
download.file(gurl, filePath, mode = "wb", quiet = TRUE)
R.utils::gunzip(filePath, overwrite = TRUE, remove = FALSE)
flit <- basename(tools::file_path_sans_ext(gurl))
filePath <- sprintf(filePathTemplate, flit)
filePath
},
hadcrut =,
hadsst =,
crutem =,
be = {
if (!use_local || !file.exists(filePath))
download.file(gurl, filePath, mode = "wb", quiet = TRUE)
filePath
}
)
n <- ncdf4::nc_open(filePath) # 'print(n)' or just 'n' for details.
a <- ncdf4::ncvar_get(n, tempVar)
## Structure of 'a' is temperature_anomaly[longitude, latitude, time], 72 × 36 × Inf (monthly since Jan. 1850)
lat <- ncdf4::ncvar_get(n, "latitude")
long <- ncdf4::ncvar_get(n, "longitude")
times <- ncdf4::ncvar_get(n, "time")
tunits <- ncdf4::ncatt_get(n,"time", "units")
ncdf4::nc_close(n)
if (what == "be") {
tunits
# $value
# [1] "year A.D."
r <- range(trunc(times))
flit <- expand.grid(month = 1:12, year = seq(r[1], r[2], by = 1))
flit$yr_part <- flit$year + (2 * flit$month - 1)/24
flit <- data.table::data.table(flit)
## This data set should have the same NROW as the "time" dimension of 'a':
h <- flit[data.table::data.table(yr_part = times), roll = "nearest", on = "yr_part"] %>%
as.data.frame %>% dplyr::select(year, month)
} else {
tunits
# $value
# [1] "days since 1850-1-1 00:00:00"
dtimes <- as.Date(times, origin = "1850-01-01")
## This data set should have the same NROW as the "time" dimension of 'a':
h <- dataframe(year = year(dtimes), month = month(dtimes))
}
flit <- apply(a, 3,
function(y)
{
x <- t(y)
w <- cos(matrix(rep(lat, NCOL(x)), ncol = NCOL(x), byrow = FALSE) * (pi / 180)) # Latitude weights.
## Use only subgrid for calculations.
keepSubGrid <- list(
lat = lat >= sub_lat[1] & lat <= sub_lat[2],
long = long >= sub_long[1] & long <= sub_long[2]
)
x1 <- x[keepSubGrid$lat, keepSubGrid$long, drop = FALSE]
w1 <- w[keepSubGrid$lat, keepSubGrid$long, drop = FALSE]
nlat <- length(lat[keepSubGrid$lat])
temp <- NULL
for (i in seq(1L, nrow(x1), by = nlat)) {
xi <- data.matrix(x1[i:(i + nlat - 1L), ])
tempi <- stats::weighted.mean(xi, w1, na.rm = TRUE)
temp <- c(temp, tempi)
}
temp
})
is.na(flit) <- is.nan(flit)
lat_long_to_text <- function(x, sufs) { suf <- sufs[2]; r <- abs(x); if (x < 0) suf <- sufs[1]; r %_% suf }
subLatText <- sapply(sub_lat, lat_long_to_text, sufs = c("S", "N"), simplify = TRUE)
subLongText <- sapply(sub_long, lat_long_to_text, sufs = c("W", "E"), simplify = TRUE)
if (is.null(series_suffix))
seriesOut <- paste0(series, " (", paste(subLatText, collapse = "-"), ", ", paste(subLongText, collapse = "-"), ")")
else
seriesOut <- paste0(series, series_suffix)
h[[seriesOut]] <- flit
if (mergeWithOtherData)
x <- merge(x, h, by = c("year", "month"), all = TRUE)
else
x <- h %>% dplyr::mutate(yr_part = year + (2 * month - 1)/24, .after = "month")
x
}
## usage:
# g <- create_zonal_data(what = "cw", use_local = FALSE)
# series <- c("HadCRUT5 Global", "HadCRUT5 (90S-90N, 180W-180E)")
# series <- c("BEST Global (Air Ice Temp.)", "BEST Global (Water Ice Temp.)", "BE Land+SST (Air Ice Temp.) (90S-90N, 180W-180E)")
# series <- c("Cowtan & Way Krig. Global", "Cowtan & Way Krig. Land+SST (90S-90N, 180W-180E)")
# plot_climate_data(g, series, yearly = TRUE) # These should mostly overlap.
#
# g <- create_zonal_data(what = "be", sub_lat = c(0, 90), use_local = TRUE)
# series <- c("BEST Global (Air Ice Temp.)", "BEST Global (Water Ice Temp.)", "BE Land+SST (Air Ice Temp.) (0N-90N, 180W-180E)")
# plot_climate_data(g, series, yearly = TRUE) # These should mostly overlap.
#
# g <- create_zonal_data(what = "be", sub_lat = c(-90, 0), use_local = TRUE)
# series <- c("BEST Global (Air Ice Temp.)", "BEST Global (Water Ice Temp.)", "BE Land+SST (Air Ice Temp.) (90S-0N, 180W-180E)")
# plot_climate_data(g, series, yearly = TRUE) # These should mostly overlap.
## https://crudata.uea.ac.uk/cru/data/temperature/read_cru_hemi.r
read_cru_hemi <- function(filename)
{
# read in whole file as table
tab <- read.table(filename, fill = TRUE)
nrows <- nrow(tab)
# create frame
hemi <- data.frame(
year = tab[seq(1, nrows, 2), 1],
annual = tab[seq(1, nrows, 2), 14],
month = tab[seq(1, nrows, 2), 2:13] %>% `colnames<-`(seq(NCOL(.))),
cover = tab[seq(2, nrows, 2), 2:13] %>% `colnames<-`(seq(NCOL(.)))
)
# mask out months with 0 coverage
hemi$month.1[which(hemi$cover.1 == 0)] <- NA
hemi$month.2[which(hemi$cover.2 == 0)] <- NA
hemi$month.3[which(hemi$cover.3 == 0)] <- NA
hemi$month.4[which(hemi$cover.4 == 0)] <- NA
hemi$month.5[which(hemi$cover.5 == 0)] <- NA
hemi$month.6[which(hemi$cover.6 == 0)] <- NA
hemi$month.7[which(hemi$cover.7 == 0)] <- NA
hemi$month.8[which(hemi$cover.8 == 0)] <- NA
hemi$month.9[which(hemi$cover.9 == 0)] <- NA
hemi$month.10[which(hemi$cover.10 == 0)] <- NA
hemi$month.11[which(hemi$cover.11 == 0)] <- NA
hemi$month.12[which(hemi$cover.12 == 0)] <- NA
#
return(hemi)
}
## Based on Foster & Rahmstorf 2011, dx.doi.org/10.1088/1748-9326/6/4/044022
## V. http://www.ysbl.york.ac.uk/~cowtan/applets/trend/trend.js
## & http://www.ysbl.york.ac.uk/~cowtan/applets/trend/trendapp.js
#' @export
correct_monthly_autocorrelation <- function(
xdata,
ydata,
model,
autocorrel_period = c(1980, 2010),
slope_coef = "yr_part",
remove_missings = TRUE,
santer = FALSE
)
{
## Covariance with lag
autocovariance <- function(data, j, remove_missings)
{
if (remove_missings)
data <- data[!is.na(data)]
n <- length(data); sx <- 0.0; cx <- 0.0
for (i in seq(n))
sx <- sx + data[i]
sx <- sx / n
for (i in seq(n - j))
cx <- cx + (data[i] - sx) * (data[i + j] - sx)
return (cx / n)
}
## Degrees of freedom correction
data_per_degree_of_freedom <- function(xdata, ydata, autocorrel_period)
{
xy <- dataframe(xdata = xdata, ydata = ydata)
xyac <- xy %>%
dplyr::filter(xdata >= min(autocorrel_period) & xdata <= max(autocorrel_period))
mod_ac <- lm(ydata ~ xdata, data = xyac)
## Redefine 'xdata' & 'ydata'
xdata <- xyac$xdata; ydata <- xyac$ydata
for (i in seq(length(xdata)))
ydata[i] <- ydata[i] - coefficients(mod_ac)["xdata"] * xdata[i]
cov_ <- autocovariance(ydata, 0, remove_missings = remove_missings)
rho1 <- autocovariance(ydata, 1, remove_missings = remove_missings) / cov_
rho2 <- autocovariance(ydata, 2, remove_missings = remove_missings) / cov_
## This sometimes returns negative values; investigate why that's so sometime.
return (1.0 + (2.0 * rho1) / (1.0 - rho2/rho1))
}
## Santer &al 2000 correction dx.doi.org/10.1029/1999JD901105
santer_correct <- function(xdata, ydata, model, slope_coef)
{
if (missing(xdata)) {
xdata = model$x[, slope_coef]
ydata = model$y
}
temp_corr_matrix <- structure(cbind(
ydata,
keystone::shift(ydata, -1L, roll = FALSE)
), .Dimnames = list(as.character(xdata), c("x_t", "x_t-1"))) %>% as.data.frame
temp_corr_matrix_resid <- structure(cbind(
stats::residuals(model),
keystone::shift(stats::residuals(model), 1L, roll = FALSE)
), .Dimnames = list(as.character(xdata), c("x_t", "x_t-1"))) %>% as.data.frame
r <- stats::cor(temp_corr_matrix_resid$x_t, temp_corr_matrix_resid$`x_t-1`, use = "complete.obs")
N <- stats::df.residual(model)
N_eff <- N * ((1 - r)/(1 + r))
#`s_e^2` <- (1/(N_eff - 2)) * sum(stats::residuals(model)^2, na.rm = TRUE)
`s_e^2` <- function(N) (1/N) * sum(stats::residuals(model)^2, na.rm = TRUE)
#`s_b^2` <- `s_e^2`(N_eff - 2)/sum((xdata - mean(xdata, na.rm = TRUE))^2, na.rm = TRUE)
`s_b^2` <- function(N) `s_e^2`(N)/sum((xdata - mean(xdata, na.rm = TRUE))^2, na.rm = TRUE)
t_b <- stats::coef(model)[slope_coef]/sqrt(`s_b^2`(N_eff - 2)) %>% as.vector
#t_b0 <- stats::coef(model)[slope_coef]/sqrt(`s_b^2`(N)) %>% as.vector
## Multiplicative constant for 'correct_stats()' to work is (t_b0/`t_b0_N-2`)^2
## This is a 'nu' value that makes 'tval_corrected' in 'correct_stats()' equal to 't_b' here:
structure((`s_b^2`(N - 2)/`s_b^2`(N)) * (N - 2)/(N_eff - 2), N_eff = N_eff - 2)
}
## Correct t-stat & p-value
correct_stats <- function(model, nu, slope_coef)
{
Qr <- stats:::qr.lm(model)
p <- model$rank
p1 <- 1L:p
R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
r <- model$residuals
rss <- sum(r^2)
rdf <- model$df.residual
resvar <- rss/rdf
se <- sqrt(diag(R) * resvar)
est <- model$coefficients[Qr$pivot[p1]]
tval <- est/se
sigma_w <- se[names(est) == slope_coef]
N_eff <- attr(nu, "N_eff"); nu <- nu %>% as.vector
sigma_c <- sigma_w * sqrt(nu)
tval_corrected <- est[slope_coef]/(sigma_c)
`Pr(>|t|)` <- 2 * stats::pt(abs(tval), rdf, lower.tail = FALSE) # Two-tailed
`Pr(>|t|) corrected` <-
2 * stats::pt(abs(tval_corrected), ifelse(is.null(N_eff), rdf, N_eff), lower.tail = FALSE) # Two-tailed
list(
sigma_w = sigma_w,
tval = tval[slope_coef], pval = `Pr(>|t|)`[slope_coef],
sigma_c = sigma_c,
tval_corrected = tval_corrected, pval_corrected = `Pr(>|t|) corrected`,
nu = nu
)
}
if (!santer)
nu <- data_per_degree_of_freedom(xdata, ydata, autocorrel_period)
else {
if (is_invalid(model$x))
nu <- santer_correct(xdata, ydata, model, slope_coef)
else
nu <- santer_correct(model = model, slope_coef = slope_coef)
}
if (!santer && (is_invalid(nu) || nu <= 0.0)) {
if (is_invalid(model$x))
nu <- santer_correct(xdata, ydata, model, slope_coef)
else
nu <- santer_correct(model = model, slope_coef = slope_coef)
warning("'nu' value is invalid; Santer &al 2000 correction made for autocorrelation")
}
if (is_invalid(nu) || nu <= 0.0) {
nu <- 1
warning("'nu' value is invalid; no correction made for autocorrelation")
}
cs <- correct_stats(model, nu, slope_coef)
c(
cs,
decadal_slope = (10 * coefficients(model)[slope_coef]) %>% as.vector,
decadal_2sigma = 2 * 10 * cs$sigma_c,
use.names = TRUE
)
}
## usage:
# d <- get_climate_data(download = FALSE, baseline = TRUE)
# series <- "GISTEMP v4 Global"
# r <- plot_climate_data(d, series, 1970, 2020.99, trend = TRUE, save_png = FALSE)
# lmod <- r$trend[[series]]$lm
# correct_monthly_autocorrelation(lmod$x[, "yr_part"], lmod$y, lmod)
## Simulated temperature series generator
#' @export
simulate_temp_series <- function(
mean_temperature = 15.0, # Mean temperature/climatology in °C
amplitude = 10.0, # Amplitude of seasonal variation (°C)
trend_rate = 0.2, # Long-term warming trend (°C/decade)
start_year = 1970,
total_years = current_year - start_year,
months_per_year = 12,
noise_sd = 0.3, # Try to match global land+SSTs
var_name = "simulated temperature",
seed = 666 # If NULL, not reproducible
)
{
total_months <- (total_years + 1) * months_per_year
## Generate time values
yr_part <- sapply(start_year:(start_year + total_years), `+`,
e1 = (1:months_per_year - 0.5) / months_per_year) %>% as.vector
times <- seq(total_months)
## Generate seasonal pattern
seasonal_pattern <- amplitude * sin(2 * pi * times / months_per_year)
## Generate long-term trend
trend <- (trend_rate / 10 * times) / months_per_year
## Generate random noise
if (!is.null(seed))
set.seed(seed)
noise <- stats::rnorm(n = total_months, mean = 0, sd = noise_sd)
## Combine components to simulate temperature data
simulated_temperature <- mean_temperature + seasonal_pattern + trend + noise
d <- dataframe(month = 1:months_per_year, yr_part = yr_part) %>%
dplyr::mutate(year = trunc(yr_part), .before = "month") %>%
dplyr::mutate(!!var_name := simulated_temperature)
d
}
## usage:
# r <- plot_climate_data(simulate_temp_series(), "simulated temperature", yearly = TRUE, baseline = TRUE, trend = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.