The ocean heat flux data is essential for Hector calibration. This markdown serves two purposes.
The data being processed by this script is generated by the 1A.tas_nc.R that is set up to run on pic. Because the pic results are rather large the inputs will not be commited the the GitHub Repo.
# Load the required libs library(dplyr) library(tidyr) library(cdoR) library(ggplot2) # And define the required directories DATARAW_DIR <- here::here('data-raw') INPUT_DIR <- file.path(DATARAW_DIR, 'comparison-data', 'cmip6', '1A-out') OUT_DIR <- file.path(DATARAW_DIR, 'comparison-data', 'cmip6', '1B-out') dir.create(OUT_DIR, showWarnings = FALSE)
# Import the raw temperature data. raw_tas_data <- read.csv(file.path(INPUT_DIR, '1A.tas-output.csv'), stringsAsFactors = FALSE)
assertthat::assert_that(!'problem' %in% names(raw_tas_data), msg = 'Some problem occured when processing the netcdf files.') # Right now because there are no temp netcdf files that could not be processed there this section does not have any # code to take a look at what happened. If there problems do arrise the asserthtat will indicate that more code will # need to be added to this section. data1 <- raw_tas_data
Check for unexpected NA values, this has to be done after checking for the problem files
# Unexpected NA values are the worst and can happen when the weighted mean does not remove NA values. assertthat::assert_that(sum(is.na(data1$value)) == 0, msg = 'Unexpected NAs in the value column.')
data1 %>% group_by(year, units, variable, domain, model, experiment) %>% summarise(n_grid = n_distinct(grid)) %>% filter(n_grid != 1) -> dupplicate_grids assertthat::assert_that(nrow(dupplicate_grids) == 0, msg = 'Data reported for mulitple grids.') # Right now because there are no temp netcdf files that contain the same information but contain results from # different grids, so there is no code in this section to hanndle this. If this does occur, which it has for other # CMIP6 variables the assertthat error will indicate that code needs to be added here. data2 <- dupplicate_grids
Sometimes when the results are written out as seperate netcdf files where the new netcdf file starts midyear means that for a single model / experiment / ensemble there can be multiple results returned for a single year.
# Check to see how many observations there are per model / ensemble / domain / year. raw_tas_data %>% group_by(units, variable, model, ensemble, experiment, domain, year) %>% summarise(n_value = n()) %>% ungroup %>% filter(n_value != 1) -> mulitple_years # Most of the time there are going to be 2 values for a year, because of the situtation described above. If # for whatever reason there are more 2 values reported for a single year more code will need to be added. The # assert that call checks for that here. assertthat::assert_that(all(mulitple_years$n_value == 2), msg = 'More than 2 observations per year.') # Calculate the annaul average. raw_tas_data %>% inner_join(mulitple_years, by = c("year", "units", "variable", "domain", "model", "experiment", "ensemble")) %>% group_by(year, units, variable, domain, model, experiment, ensemble, grid) %>% summarise(value = mean(value)) %>% ungroup() -> new_data # Replace the mulitple observations with the mean of the multiple observations. raw_tas_data %>% anti_join(mulitple_years, by = c("year", "units", "variable", "domain", "model", "experiment", "ensemble")) %>% bind_rows(new_data) -> data3 # A final check to make sure that there are no duplicate years. data3 %>% group_by(units, variable, model, ensemble, experiment, domain, year) %>% summarise(n_value = n()) %>% ungroup %>% filter(n_value != 1) -> mulitple_years assertthat::assert_that(nrow(mulitple_years) == 0 , msg = 'More than 2 observations per year in data3.')
Are there any missing years? If so that's unfortunate.
split(data3, interaction(data3$units, data3$variable, data3$model, data3$grid, data3$ensemble, data3$experiment, data3$domain, drop = TRUE)) %>% lapply(function(input){ input %>% arrange(year) %>% pull(year) %>% diff() %>% unique() -> step_size # If the step size is 1 then ther is if(step_size == 1){ TRUE } else { FALSE } }) -> missing_check assertthat::assert_that(unique(missing_check) == TRUE, msg = 'discontinuous data: data3 is missing some values for some years') # This is a common issue but is not occuring here! Hooooray! If the check in this section is triggered with an error message then # code will need to be added here to handle this issue. data4 <- data3
The idealized epxeriment results (the DECK experiments minus the historical experiment) are reported in aboslute years when they should be reported in realitve years. This section will convert time to years since the run start date which is how we are going to want to plot the results.
idealized_exps <- c("1pctCO2", "abrupt-4xCO2", "piControl", "abrupt-0p5xCO2") data4 %>% filter(experiment %in% idealized_exps) -> idealized_results # Determine the start year for each model / ensemble / idealized start year idealized_results %>% group_by(variable, domain, model, experiment, ensemble) %>% summarise(start_year = min(year)) %>% ungroup -> start_years # Remove the start year from the year column. idealized_results %>% left_join(start_years, by = c("variable", "domain", "model", "experiment", "ensemble")) %>% mutate(year = year - start_year) %>% select(-start_year) -> idealized_results_reatlive_yrs
So it turns out that some of the idealzed runs are rather short. Like only contain 5 years of data short. This does not provide us with enough information to help during the calibration process so they will be discarded.
required_length <- 100 idealized_results_reatlive_yrs %>% group_by(model, experiment, ensemble) %>% summarise(end_year = max(year)) %>% ungroup %>% filter(end_year < required_length) %>% select(-end_year) -> to_remove idealized_results_reatlive_yrs %>% anti_join(to_remove, by = c("model", "experiment", "ensemble")) -> good_idealized_results
Idealized runs sanity plots
ggplot(good_idealized_results) + geom_line(aes(year, value, color = model, group = interaction(model, experiment, ensemble))) + facet_wrap('experiment', scales = 'free') + theme(legend.position = 'none') + labs(title = 'Idealized Experiments', y = 'absolute temperature (K)', x = 'year')
data4 %>% filter(!experiment %in% idealized_exps) %>% bind_rows(good_idealized_results) -> data5
tas_data <- data5 %>% select(-datetime, -month, -grid)
Start by calculating the baseline, this the value should be removed from the temperature to calcualte the temperature anomaly.
tas_data %>% filter(year %in% 1850:1860) %>% group_by(variable, domain, model, experiment, ensemble) %>% summarise(baseline = mean(value)) %>% ungroup() -> baseline_temp emission_driven_historical <- filter(baseline_temp, experiment == 'esm-hist') %>% select(-experiment) concentration_driven_historical <- filter(baseline_temp, experiment == 'historical') %>% select(-experiment)
Calculate temp change for the concentration driven runs.
tas_data %>% filter(!grepl(pattern = 'esm', x = experiment)) %>% left_join(concentration_driven_historical, by = c("variable", "domain", "model", "ensemble")) %>% mutate(value = value - baseline, variable = 'Tgav', units = 'deg C') -> conc_tgav
Calculate temp change for the emission driven runs.
tas_data %>% filter(grepl(pattern = 'esm', x = experiment)) %>% left_join(emission_driven_historical, by = c("variable", "domain", "model", "ensemble")) %>% mutate(value = value - baseline, variable = 'Tgav', units = 'deg C') -> emiss_tgav
tgav_data <- na.omit(bind_rows(conc_tgav, emiss_tgav))
tas_data %>% mutate(class = 'Phase CMIP6', class = if_else(experiment %in% idealized_exps, 'DECK Idealized', class)) %>% split(interaction(.$model, .$variable, drop = TRUE)) %>% lapply(function(input){ model <- unique(input$model) input %>% filter(year <= 2150) %>% ggplot() + geom_line(aes(year, value, color = experiment, group = interaction(variable, ensemble, model, experiment))) + facet_wrap('class', scales = 'free') + labs(x = 'Year', y = 'Temp (K)', title = model) }) -> tas_plots invisible(lapply(tas_plots, print))
The temperature anomoly is the ideal way to look at the difference between the models.
tgav_data %>% mutate(class = 'Phase CMIP6', class = if_else(experiment %in% idealized_exps, 'DECK Idealized', class)) %>% split(interaction(.$model, .$variable, drop = TRUE)) %>% lapply(function(input){ model <- unique(input$model) input %>% filter(year <= 2150) %>% ggplot() + geom_line(aes(year, value, color = experiment, group = interaction(variable, ensemble, model, experiment))) + facet_wrap('class', scales = 'free') + labs(x = 'Year', y = 'Tgav (degree C)', title = model) }) -> tgav_plots invisible(lapply(tgav_plots, print))
Save a copy of the tas results.
write.csv(tas_data, file = file.path(OUT_DIR, '1B.tas_quality.csv'))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.