The ocean heat flux data is essential for Hector calibration. This markdown serves two purposes.
# Import required libs library(dplyr) library(tidyr) library(ggplot2) library(knitr)
# Define required directories DATARAW_DIR <- here::here('data-raw') INPUT_DIR <- file.path(DATARAW_DIR, 'comparison-data', 'cmip6', '1A-out') OUT_DIR <- file.path(DATARAW_DIR, 'comparison-data', 'cmip6', '1B-out') dir.create(OUT_DIR, showWarnings = FALSE)
Load the ocean heat flux data that has been processed on pic.
# Import the raw ocean heat flux data. raw_hfds_data <- read.csv(file.path(INPUT_DIR, '1A.hfds-output.csv'), stringsAsFactors = FALSE)
How many files were unable to be processed by the cdo code?
problem_files <- raw_hfds_data[which(raw_hfds_data$problem), ] processed_data <- setdiff(raw_hfds_data, problem_files) message(nrow(problem_files), ' files were not processed.')
What files were not processed?
problem_files %>% select(model, experiment) %>% distinct() %>% kable(format = 'markdown')
Check to see if it is just ensemble memebers that we are missing or if we are missing novel model / experiment data.
req_cols <- c('variable', 'model', 'experiment', 'grid') missing <- distinct(problem_files[ , names(problem_files) %in% req_cols]) have_data <- distinct(processed_data[ , names(processed_data) %in% req_cols]) no_data <- setdiff(have_data, missing) message('We have absolutely no data for ', nrow(no_data), ' models / experiments.')
How many files were we able to process?
n_good <- nrow(processed_data[ , names(processed_data) %in% c('model', 'experiment')]) message('We have data from ', n_good, ' models & experiments.')
processed_data %>% group_by(variable, domain, model, experiment, ensemble) %>% summarise(n_grid = n_distinct(grid)) %>% ungroup %>% filter(n_grid != 1) %>% mutate(duplicate = TRUE) -> duplicate_grids message('Number of models with results from mulitple grids: ', length(unique(duplicate_grids$model)))
If there are instances where we have results from mulitple grids opt to continue to perserve the gn grid because it is the model output in its native resolution.
processed_data %>% mutate(keep = TRUE) %>% left_join(duplicate_grids, by = c("variable", "domain", "model", "experiment", "ensemble")) %>% mutate(keep = if_else(duplicate == TRUE & grid != 'gn', FALSE, keep)) %>% filter(keep) %>% select(-keep, -duplicate, -n_grid, -problem) -> data1
Check to make sure there are no duplicates.
data1 %>% group_by(variable, domain, model, experiment, ensemble) %>% summarise(n_grid = n_distinct(grid)) %>% ungroup %>% filter(n_grid != 1) %>% mutate(duplicate = TRUE) -> duplicate_grids invisible(assertthat::assert_that(nrow(duplicate_grids) == 0, msg = 'duplicate grids in data frame.'))
Mulitple years of data can be an issue whenever the output netcdf files are saved as mulitple netcdfs.
data1 %>% group_by(units, variable, domain, model, experiment, ensemble, grid, year) %>% summarise(n_yr = n_distinct(year)) %>% ungroup() %>% filter(n_yr != 1) -> duplicate_years assertthat::assert_that(nrow(duplicate_years) == 0, msg = 'Multiple observations for a single year of data.') # If it ends up where there are mulitple observations for a single model output year more code will need to be # added but for now this is not an issue. data1 %>% select(year, value, units, variable, domain, model, experiment, ensemble) -> data2
Make sure the idealized runs all start at t = 0.
idealized_exps <- c("1pctCO2", "abrupt-4xCO2", "piControl", "abrupt-0p5xCO2") data2 %>% filter(experiment %in% idealized_exps) -> idealized_exp_results idealized_exp_results %>% group_by(experiment, model, ensemble) %>% summarise(start_year = min(year)) -> start_years idealized_exp_results %>% left_join(start_years, by = c("model", "experiment", "ensemble")) %>% mutate(year = year - start_year) -> idealized_correct_years data2 %>% filter(!experiment %in% idealized_exps) %>% bind_rows(idealized_correct_years) -> data3
data3 %>% mutate(variable = hector::HEAT_FLUX()) -> data4
heatflux_data <- data4
heatflux_data %>% mutate(class = 'Phase CMIP6', class = if_else(experiment %in% idealized_exps, 'DECK Idealized', class)) %>% split(interaction(.$model, .$variable, drop = TRUE)) %>% lapply(function(input){ model <- unique(input$model) input %>% filter(year <= 2150) %>% ggplot() + geom_line(aes(year, value, color = experiment, group = interaction(variable, ensemble, model, experiment))) + facet_wrap('class', scales = 'free') + labs(x = 'Year', y = 'Heat Flux Wm-2', title = model) }) -> heatflux_plots invisible(lapply(heatflux_plots, print))
Save a copy of the tas results.
write.csv(heatflux_data, file = file.path(OUT_DIR, '1B.heatflux_quality.csv'))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.