Nothing
# TITLE: mipplot variable submission check
# OUTLINE: function to verify which data have been submitted for a scenario with IAMC format.
# Developed by: Masahiro SUGIYAMA and Diego SILVA HERRAN.
# Last revision: 2017.10.16 Diego SILVA HERRAN.
# List of revisions:
#--------------------------------------------------------------------
# variable SUBMISSION CHECK
#====================================================================
# Verify whether data of variables included in list template have been
# submitted.
#====================================================================
#' @title variable SUBMISSION CHECK
#' @description Verify whether data of variables included
#' in list template have been submitted.
#' @param D input data table
#' @param V list of variables
#' @param na_name string for N/A
#' @return A dataframe representing variable availabilities.
#' @importFrom stats aggregate complete.cases na.omit
#' @importFrom utils write.csv
#' @export
#'
mipplot_var_submission <- function(D, V, na_name = "N/A") {
if (missing(D)) stop("Input data is missing! Specify input data!")
if (missing(V)) stop("Input list of variables is missing! Specify input data!")
# Filter input table to exclude periods without data for any variables.
# Useful for submissions covering timeframes smaller than 2100 (e.g. until 2050).
D1 <- D
D1$value <- as.numeric(as.character(D$value))
D1 <- D1[complete.cases(D1$value), ]
D <- D[D$period %in% levels(as.factor(D1$period)), ]
V1 <- V
#V1 <- V[,"variable"] # Vector with list of variables to be submitted.
#names(V1) <- "variable" # Edit header name to comply with IAMC DB format.
D[is.na(D$model), "model"] <- "Model_empty" # Fill name of model if empty.
# Add "Member" column (identifier for each unique combination of model/scenario/region).
D$Member <- as.factor(paste(D$model, D$scenario, D$region, sep=" | "))
D1 <- D
#D1_empty <- D[D$value=="",] # DF with empty values in submission.
D1_na <- na.omit(D[D$value==na_name,]) # DF with "NA" values in submission.
D1_empty <- D[!complete.cases(D),] # DF with empty values in submission (includes "empty "N/A" values).
D1 <- na.omit(D[D$value!=na_name,]) # DF with "non-NA" values in submission.
D1$value <- as.numeric(as.character(D1$value)) # Replace non-numerical data by "NA".
D1_invalid <- D1[is.na(D1$value),] # DF with invalid values (i.e. non-numeric AND NOT empty/na) in submission.
D1_valid <- na.omit(D1) # DF with valid values (i.e. numeric) in submission.
D1_valid[D1_valid$value != 0,"value"] <- 1 # Replace all non-zero valid values with 1 for further operations.
#S1 <- aggregate(D1_valid$value, by=D1_valid[c("model","scenario","region","variable","unit")], mean)
S1 <- aggregate(D1_valid$value, by=D1_valid[,c("model","scenario","region","variable","unit")], mean)
D1_zero <- S1[S1$x == 0,] # DF with "all zero" values in submission.
# Select data of variables with values (i.e. not all values are NA).
D2 <- D1_valid[!duplicated(D1_valid[,c("model","scenario","region","variable")]),]
# List of member data (i.e. Model, Scenario, Region).
m_list <- levels(D$Member)
V_list <- list()
#var_list <- levels(as.factor(D$variable))
# Compare variables in submission to template for each member.
# Assume that variables with "NA" are missing; if all values are zero then it is NOT missing.
#i <- m_list[[1]] # For testing!
for(i in m_list){
D3 <- D2[D2$Member == i,]
V2 <- V1
Submitted <-intersect(V1$variable, D3$variable) # List of variables submitted.
Additional <- setdiff(D3$variable, V1$variable) # List of other variables.
V2$Outcome <- "Missing_var" # Create column filled with "Missing" by default.
V2[V2$variable %in% Submitted, "Outcome"] <- "Submitted_var"
# Information on submitted variables excluded in template list.
if (length(Additional) > 0) {
A1 <- D3[D3$variable %in% Additional, "variable"] # Create dataframe from list of variables only found in the data submission.
A1$Outcome <- "Additional_var"
V2 <- rbind(V2,A1) # Append list of additional variables.
}
# Information on irregular and/or missing variables.
# Reports periods with NA/Empty/Invalid entries for each variable and member.
V2$NA_periods <- "None"
V2$Empty_periods <- "None"
V2$Invalid_periods <- "None"
#j <- 1 # For testing purposes!
#j = var_list[1]
#next(j)
var_list <- V2[V2$Outcome=="Submitted_var",]$variable
for (j in var_list) { # Skip loop for variables submitted.
NA_periods <- D1_na[D1_na$Member == i & D1_na$variable == j, ]
if (nrow(NA_periods) > 0) {
#V2[V2$variable %in% NA_periods$variable, "NA_periods"] <- paste(NA_periods$period, collapse=", ")
V2[V2$variable==j, "NA_periods"] <- paste(NA_periods$period, collapse=", ")
}
Empty_periods <- D1_empty[D1_empty$Member == i & D1_empty$variable == j, ]
if (nrow(Empty_periods) > 0) {
#V2[V2$variable %in% Empty_periods$variable, "Empty_periods"] <- paste(Empty_periods$period, collapse=", ")
V2[V2$variable==j, ]$Empty_periods <- paste(Empty_periods$period, collapse = ", ")
}
Invalid_periods <- D1_invalid[D1_invalid$Member == i & D1_invalid$variable == j, ]
if (nrow(Invalid_periods) > 0) {
#V2[V2$variable %in% Invalid_periods$variable, "Invalid_periods"] <- paste(Invalid_periods$period, collapse=", ")
V2[V2$variable == j, "Invalid_periods"] <- paste(Invalid_periods$period, collapse = ", ")
}
}
V2 <- cbind(D3[1, c("model","scenario","region")], V2) # Add columns with Member information.
V_list[[length(V_list) + 1]] <- V2
}
V_out <- do.call(rbind, V_list)
V_missing <- V_out[V_out$Outcome == "Missing_var", c("model", "scenario", "region", "variable","Outcome")]
filename <- sprintf("../data_output/Var_missing_%s.csv", format(Sys.time(), "%Y_%m%d"))
write.csv(V_missing, file = filename, row.names = FALSE)
V_missing_wide <- dcast(V_missing, ...~model+scenario)
filename <- sprintf("../data_output/Var_missing_wide_%s.csv", format(Sys.time(), "%Y_%m%d"))
write.csv(V_missing_wide, file = filename, row.names = FALSE)
filename <- sprintf("../data_output/Var_submission_check_%s.csv", format(Sys.time(), "%Y_%m%d"))
write.csv(V_out, file = filename, row.names = FALSE)
return(V_out)
}
# TEST
# # Select data to be plotted.
# df1 <- data.table::fread("../data/ar5_db_sample05_EMF.csv", header = TRUE)
# D <- melt(df1,
# id.vars=c("model","scenario","region","variable","unit"))
# names(D) <- c("model","scenario","region","variable","unit","period","value")
# D$period <- as.numeric(gsub("X([0-9]+)","\\1",D$period))
# V <- read.csv("../data/Variable_list_ar5_v01.csv", header = TRUE)
#xD1a <- mipplot_var_submission(D,V,na_name="N/A")
#head(Dx1a)
# END
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.