R/check_for_missing_values.R

Defines functions check_missing_values

Documented in check_missing_values

#' @title Check for missing values in the ENTRANCES DATA 
#'
#' @param years vector of character strings, years for which the data set should be analyzed
#' @param var_dataset data frame, 
#' @param region character string,
#'
#' @return nested data frame 
#' @export
#'
#' @examples df <- check_missing_values(years = c(2018:2019),var_dataset = c("pop"))
check_missing_values <- function(years,var_dataset = unique(data$variable), region = c("rhineland", "lusatia", "centralgermany","jiuvalley", "uppernitra", "sulcis", "silesia" )) {
  
  nuts_list <- get_nuts_list()
  data <- EntrancesData
  
  region <- c(names(nuts_list))
  
  
  
  # create the column_names for the data set
  c_names <- vector(mode = "list", length = (length(var_dataset)*2)+1) 
  
  for (i in 1:(length(var_dataset)+1)){
    c_names[(i*2)-1] <- paste(var_dataset[i-1],"_data")
    c_names[i*2] <- paste(var_dataset[i-1],"_check")
  }
  c_names <- c_names[-2]
  c_names[1] <- c("Region")
  
  
  #create all necessary empty data frame
  df1 <- data.frame(matrix(ncol = ((length(var_dataset)*2)+1), nrow = length(region)))
  colnames(df1) <- c_names
  a <- data.frame()
  b <- data.frame()
  
  # fill the data frame
  for (g in 1:length(region)){
    for (i in 1:(length(var_dataset)+1)){
      a <- get_timetable_for_variable(var_dataset[i], NUTS_IDs = unlist(nuts_list[region[g]]),years = years)
      df1[g,(i*2)] <-nest(a)
      if (sum(is.na(a))>0) {b[g,i] <- c("Missing")} else {b[g,i] <- c("Complete")}
      if (i!=1){df1[g,(i*2)-1] <- b[g,i-1]}
    }
  } 
  # delete last column
  df1 <- df1[1:(length(df1)-1)]
  df1[,1] <- region
  
  return(df1)
}
THartl1/EntrancesDataPackage documentation built on Dec. 18, 2021, 4:01 p.m.