R/lissyuse.r

Defines functions variable_selection_for_lissyuse load_datasets lissyuse

Documented in lissyuse

 
#' Load data easily and efficiently with lissyuse
#' 
#' @description
#' `lissyuse()` enables LISSY R-users to quickly import whole series of data from a specific country and/or within any time period.
#'
#' @param data 
#' @param vars 
#' @param subset 
#' @param from 
#' @param to 
#' @param lws 
#'
#' @return A list 
#' @export
#'
#' @examples
#' # LIS # 
lissyuse <- function(data = NULL , vars = NULL , subset = NULL , from = NULL, to = NULL ,  lws = FALSE) {
  
  
# 0) Define paths and location  ---------------------------------------------------------

  if (!exists("define_path")) {
    data_to_load <- import_sample_datasets_to_lissyuse(data, lws) # local machine ----> only access to sample datasets

  } else {
    path_to_files <- define_path(lws)[[1]]
    location <- define_path(lws)[[2]]

  
# 1) Argument {data}  -------------------------------------  

  check_empty_data(data, lws)  
  
  check_length_iso2(data)

  check_iso2(data, lws)

  invalid_ccyy_pairs(data, lws)
  
  # Define data to be loaded -----------------------------------  
  
  data_to_load <- load_datasets(data, lws, from, to)

}

# 2)  Variable-driven selection of files bases on argument {vars}  -------------------------------------
  
  check_invalid_vars(vars, lws)
  
  # Main function to select columns based on argument {vars}
  intermediate_data_and_message <- variable_selection_for_lissyuse(data_to_load, path_to_files, vars, lws)
  
  # allocation of its outputs
  intermediate_data_to_filter <- intermediate_data_and_message$data
  message <- intermediate_data_and_message$message


# 3)  Filtering of rows based on the condition imposed in argument {subset}  -------------------------------------
  
  
  datasets <- if (!is.null(subset)) {
    subset_expr <- rlang::parse_expr(subset)
    subset_datasets(intermediate_data_to_filter, lws, subset_expr)
  } else {
    intermediate_data_to_filter
  }
  
  
# 4)  LISSY adjustment  -------------------------------------
  
  if (exists("define_path")) {
  if (location == "L") {
    datasets <- lapply(datasets, as.data.frame)
  }
  }
  
# 5) Final assignment  -------------------------------------
  assign(if (lws) "lws_datasets" else "lis_datasets", datasets, envir = .GlobalEnv)
  

# 6)  Print message on the availability of the list with the datasets and its names ----------
  cat(message, "\n")

}





load_datasets <- function(data = NULL, lws = FALSE, from = NULL, to = NULL) {
  
  # Step 1: Split the `data` into series for country and dataset pairs
  entire_series_for_a_country <- data[stringr::str_length(data) == 2]
  ccyy_datasets <- data[stringr::str_length(data) == 4]
  
  # Step 2: Extract dnames for entire series based on LWS or LIS
  entire_series_extract_dname <- lissyrtools::datasets %>%
    filter(
      if (lws) {
        database == "LWS" & iso2 %in% entire_series_for_a_country
      } else {
        database == "LIS" & iso2 %in% entire_series_for_a_country
      }
    ) %>%
    select(dname) %>%
    unique() %>%
    pull()
  
  # Combine datasets for the entire series and cc/yy datasets
  all_dnames <- c(entire_series_extract_dname, ccyy_datasets)
  
  # Step 3: Filter datasets based on the combined list and the year range
  data_to_load <- lissyrtools::datasets %>%  
    filter(
      if (lws) {database == "LWS"} 
      else {database == "LIS"}
    ) %>%
    filter(
      if (!is.null(data)) {dname %in% all_dnames}
      else {TRUE}
    ) %>% 
    filter(
      if (!is.null(from) & !is.null(to)) {year >= from & year <= to}
      else if (!is.null(from)) {year >= from}
      else if (!is.null(to)) {year <= to}    
      else {year > 0}
    ) %>% 
    select(dname) %>% 
    unique() %>% 
    pull()
  
  # Clean attributes of the final result
  attributes(data_to_load) <- NULL
  
  
  # Step 4: Check if the result is empty and stop if no data is found
  # (probably already caught on previous checks)
  if (length(data_to_load) == 0) {
    stop(
      glue::glue(
        "No datasets matched the provided criteria. Please check the arguments provided, especially 'data'."
      )
    )
  }
  
  return(data_to_load)
}








variable_selection_for_lissyuse <- function(data_to_load, path_to_files, vars = NULL, lws = FALSE) {
  
  if (exists("define_path")) {     
    
    if (lws)   {    # 1.1 LWS #  
      
      
      # 1.1.1 Select all variables, if none is specified # ---------------------------
      if (is.null(vars)) {
        
        files_h <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "wh", ".dta")) ) 
        
        files_p <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "wp", ".dta")) ) 
        
        
        list_with_data <- purrr::map2(files_h, files_p, ~ inner_join(.x, .y, c("hid", "inum")) %>% 
                                        select(-ends_with(".y")) %>% 
                                        rename_with(~ sub("\\.x$", "", .), ends_with(".x")))
        names(list_with_data) <- data_to_load
        
        
        
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following data frames: ", 
          paste(names(list_with_data), collapse = ", "),
          ".\n", # Proper period and paragraph space
          "All variables were imported! We recommend specifying a character vector with the desired variables in the argument `vars`."
        )
      } 
      
      # 1.1.2 Only household-level variables were selected: load only h-level file # -----------------------
      else if (sum(vars %in% setdiff(lws_household_variables, lws_person_variables)) > 0 & sum(vars %in% setdiff(lws_person_variables, lws_household_variables)) == 0) {
        
        vars <- vars[vars %in% lissyrtools::lws_household_variables] # remove any invalid variables
        
        list_with_data <- purrr::map(data_to_load, 
                                     ~haven::read_dta(file = paste0(path_to_files, .x, "wh", ".dta"), 
                                               col_select = unique(c(vars, lissyrtools::key_vars_household_lws)))) 
        names(list_with_data) <- paste0(data_to_load, "h")
        
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following `household-level` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
        
        
        
        
      } 
      
      # 1.1.3 Only person-level variables were selected: load only p-level file # -----------------------------------
      else if (sum(vars %in% setdiff(lws_household_variables, lws_person_variables)) == 0 & sum(vars %in% setdiff(lws_person_variables, lws_household_variables)) > 0) {
        
        vars <- vars[vars %in% lissyrtools::lws_person_variables]  
        
        list_with_data <- purrr::map(data_to_load,
                                     ~haven::read_dta(file = paste0(path_to_files, .x, "wp", ".dta"),
                                               col_select = unique(c(vars, lissyrtools::key_vars_person_lws))) )
        names(list_with_data) <- paste0(data_to_load, "p")
        
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following `person-level` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
        
        
      } 
      
      # 1.1.4 Both household and person level varaibles were selected: load both filed and merge them # --------------------
      else if (sum(vars %in% setdiff(lws_household_variables, lws_person_variables)) > 0 & sum(vars %in% setdiff(lws_person_variables, lws_household_variables)) > 0) {
        
        hvars <- vars[vars %in% lissyrtools::lws_household_variables]
        pvars <- vars[vars %in% lissyrtools::lws_person_variables]
        
        files_h <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "wh", ".dta"), 
                                         col_select = unique(c(hvars, lissyrtools::key_vars_household_lws))))
        
        files_p <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "wp", ".dta"), 
                                         col_select = unique(c(pvars, lissyrtools::key_vars_person_lws))) )
        
        
        list_with_data <- purrr::map2(files_h, files_p, ~ inner_join(.x, .y, by = c("hid", "inum")) %>% 
                                        select(-ends_with(".y")) %>% 
                                        rename_with(~ sub("\\.x$", "", .), ends_with(".x")))
        
        names(list_with_data) <- data_to_load
        
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".") 
        
        
      } 
      
      # 1.1.5 No valid lws variables supplied # ----------------
      
      else if (sum(vars %in% lws_variables) == 0) { 
        stop("No valid LWS variable names specified.") # already ensured by check_invalid_vars(vars, lws)
      }
      
      
      
      # 1.1.6 # Mix between invalid variables and lws_both_hp_variables # --------------------------------
      
      
      else if (sum(vars %in% lws_variables) > 0) {
        
        vars <- vars[vars %in% lws_variables]
        assertthat::assert_that(all(vars %in% lws_both_hp_variables))
        
        list_with_data <- purrr::map(data_to_load, 
                                     ~haven::read_dta(file = paste0(path_to_files, .x, "wh", ".dta"), 
                                               col_select = unique(c(vars, lissyrtools::key_vars_household_lws)))) 
        names(list_with_data) <- paste0(data_to_load, "h")
        
        
        message_to_print_in_the_end <- paste0("The list `lws_datasets` is now available. It contains the following data frames: ", 
                                              paste(names(list_with_data), collapse = ", "),
                                              ".\n", 
                                              "NOTE: The selected variables are not exclusive to either household or individual-level datasets. The imported datasets have been defaulted to `household-level`."
        )
        
      }
      
      
      
      return(list(data = list_with_data,
                  message = message_to_print_in_the_end
      )
      ) 
      
    }
    
    
    else {    # LIS # 
      
      
      
      # 1.2.1 Select all variables, if none is specified # ------------------------------------
      if (is.null(vars)) {
        
        files_h <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "ih", ".dta")) ) 
        
        files_p <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "ip", ".dta")) ) 
        
        
        list_with_data <- purrr::map2(files_h, files_p, ~ inner_join(.x, .y, by = c("hid")) %>% 
                                        select(-ends_with(".y")) %>% 
                                        rename_with(~ sub("\\.x$", "", .), ends_with(".x")))
        names(list_with_data) <- data_to_load
        
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following data frames: ", 
          paste(names(list_with_data), collapse = ", "),
          ".\n", # Proper period and paragraph space
          "All variables were imported! We recommend specifying a character vector with the desired variables in the argument `vars`."
        )
      } 
      
      # 1.2.2 Only household-level variables were selected: load only h-level file # --------------------
      else if (sum(vars %in% setdiff(lis_household_variables, lis_person_variables)) > 0 & sum(vars %in% setdiff(lis_person_variables, lis_household_variables)) == 0) {
        
        vars <- vars[vars %in% lissyrtools::lis_household_variables] # remove any invalid variables
        
        list_with_data <- purrr::map(data_to_load, 
                                     ~haven::read_dta(file = paste0(path_to_files, .x, "ih", ".dta"), 
                                               col_select = unique(c(vars, lissyrtools::key_vars_household_lis))))   
        names(list_with_data) <- paste0(data_to_load, "h")
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following `household-level` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
        
      } 
      
      # 1.2.3 Only person-level variables were selected: load only p-level file #    ------------------------
      else if (sum(vars %in% setdiff(lis_household_variables, lis_person_variables)) == 0 & sum(vars %in% setdiff(lis_person_variables, lis_household_variables)) > 0) {
        
        vars <- vars[vars %in% lissyrtools::lis_person_variables] # remove any invalid variables
        
        list_with_data <- purrr::map(data_to_load,
                                     ~haven::read_dta(file = paste0(path_to_files, .x, "ip", ".dta"),
                                               col_select = unique(c(vars, lissyrtools::key_vars_person_lis))) )
        names(list_with_data) <- paste0(data_to_load, "p")
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following `person-level` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
        
      } 
      
      
      # 1.2.4 Both household and person level varaibles were selected: load both filed and merge them # --------------------
      else if (sum(vars %in% setdiff(lis_household_variables, lis_person_variables)) > 0 & sum(vars %in% setdiff(lis_person_variables, lis_household_variables)) > 0) {
        
        hvars <- vars[vars %in% lissyrtools::lis_household_variables] # remove any invalid variables
        pvars <- vars[vars %in% lissyrtools::lis_person_variables] # remove any invalid variables
        
        files_h <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "ih", ".dta"), 
                                         col_select = unique(c(hvars, lissyrtools::key_vars_household_lis))))
        
        files_p <-  purrr::map(data_to_load,
                               ~haven::read_dta(file = paste0(path_to_files, .x, "ip", ".dta"), 
                                         col_select = unique(c(pvars, lissyrtools::key_vars_person_lis))))
        
        
        list_with_data <- purrr::map2(files_h, files_p, ~ inner_join(.x, .y, by = c("hid")) %>% 
                                        select(-ends_with(".y")) %>% 
                                        rename_with(~ sub("\\.x$", "", .), ends_with(".x")))
        
        names(list_with_data) <- data_to_load
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")  
        
      } 
      
      # 1.2.5 No valid LIS variables supplied # ----------------------------------
      
      else if (sum(vars %in% lis_variables) == 0) { 
        stop("No valid LIS variable names specified.") # already ensured by check_invalid_vars(vars, lws)
      }
      
      # 1.2.6 Mix between invalid variables and lis_both_hp_variables # ---------------------------
      
      
      else if (sum(vars %in% lis_variables) > 0)  {
        
        vars <- vars[vars %in% lis_variables]
        assertthat::assert_that(all(vars %in% lis_both_hp_variables))
        
        list_with_data <- purrr::map(data_to_load, 
                                     ~haven::read_dta(file = paste0(path_to_files, .x, "ih", ".dta"), 
                                               col_select = unique(c(vars, lissyrtools::key_vars_household_lis))))  
        names(list_with_data) <- paste0(data_to_load, "h")
        
        
        message_to_print_in_the_end <- paste0("The list `lis_datasets` is now available. It contains the following data frames: ", 
                                              paste(names(list_with_data), collapse = ", "),
                                              ".\n", 
                                              "NOTE: The selected variables are not exclusive to either household or individual-level datasets. The imported datasets have been defaulted to `household-level`."
        )
      }
      
      
      
      return(list(data = list_with_data,
                  message = message_to_print_in_the_end
      )
      ) 
    } 
    
    
  }
  
  #  Variable selection for the sample datasets to be used in a local environemnt -------------------------------
  
  else if (!exists("define_path")) {      
    
    if (lws) {
      # 2.1.1 Select all variables, if none is specified #	 ----------------------
      if (is.null(vars)) {
        list_with_data <- data_to_load
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following `sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "),
          ".\n", # Proper period and paragraph space
          "All variables were imported! We recommend specifying a character vector with the desired variables in the argument `vars`."
        )
      }
      # 2.1.2 Only household-level variables were selected: load only h-level file # ----------------------
      else if (sum(vars %in% setdiff(lws_household_variables, lws_person_variables)) > 0 & sum(vars %in% setdiff(lws_person_variables, lws_household_variables)) == 0) {
        
        vars <- vars[vars %in% lissyrtools::lws_household_variables]
        
        list_with_data <- purrr::map(data_to_load, ~.x   %>% filter(pid == 1) %>% select(unique(c(vars, key_vars_household_lws))))
        names(list_with_data) <- paste0(names(list_with_data), "h")
        
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following `household-level sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
      }	
      # 2.1.3 Only person-level variables were selected: load only p-level file #	----------------------
      else if (sum(vars %in% setdiff(lws_household_variables, lws_person_variables)) == 0 & sum(vars %in% setdiff(lws_person_variables, lws_household_variables)) > 0) {
        
        vars <- vars[vars %in% lissyrtools::lws_person_variables]
        
        list_with_data <- purrr::map(data_to_load, ~.x %>% select(unique(c(vars, key_vars_person_lws))))
        names(list_with_data) <- paste0(names(list_with_data), "p")
        
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following `person-level sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
        
      }	
      # 2.1.4 Both household and person level varaibles were selected: load both filed and merge them # ----------------------
      else if (sum(vars %in% setdiff(lws_household_variables, lws_person_variables)) > 0 & sum(vars %in% setdiff(lws_person_variables, lws_household_variables)) > 0) {
        
        hvars <- vars[vars %in% lissyrtools::lws_household_variables]
        pvars <- vars[vars %in% lissyrtools::lws_person_variables]
        
        list_with_data <- purrr::map(data_to_load, ~.x %>% select(unique(c(hvars, pvars, key_vars_household_lws ,key_vars_person_lws))))
        
        message_to_print_in_the_end <- paste0(
          "The list `lws_datasets` is now available. It contains the following `sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".") 
      }
      
      # 2.1.5 No valid lws variables supplied # ----------------------
      else if (sum(vars %in% lws_variables) == 0) {
        stop("No valid LWS variable names specified.") # already ensured by check_invalid_vars(vars, lws)
      } 
      # 2.1.6 # Mix between invalid variables and lws_both_hp_variables ----------------------
      else if (sum(vars %in% lws_variables) > 0) {
        
        vars <- vars[vars %in% lws_variables]
        assertthat::assert_that(all(vars %in% lws_both_hp_variables))
        
        list_with_data <- purrr::map(data_to_load, ~.x %>% filter(pid == 1) %>% select(unique(c(vars, key_vars_household_lws))))
        names(list_with_data) <- paste0(names(list_with_data), "h")
        
        message_to_print_in_the_end <- paste0("The list `lws_datasets` is now available. It contains the following `sample` data frames: ", 
                                              paste(names(list_with_data), collapse = ", "),
                                              ".\n", 
                                              "NOTE: The selected variables are not exclusive to either household or individual-level datasets. The imported datasets have been defaulted to `household-level`."
        )
        
      }
      
      return(list(data = list_with_data,
                  message = message_to_print_in_the_end
      )
      )      
      
    } else {
      
      # 2.2.1 Select all variables, if none is specified # ----------------------
      if (is.null(vars)) {
        list_with_data <- data_to_load
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following `sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "),
          ".\n", # Proper period and paragraph space
          "All variables were imported! We recommend specifying a character vector with the desired variables in the argument `vars`."
        )
      } 
      # 2.2.2 Only household-level variables were selected: load only h-level file # ----------------------
      else if (sum(vars %in% setdiff(lis_household_variables, lis_person_variables)) > 0 & sum(vars %in% setdiff(lis_person_variables, lis_household_variables)) == 0) {
        
        vars <- vars[vars %in% lissyrtools::lis_household_variables]
        
        list_with_data <- purrr::map(data_to_load, ~.x   %>% filter(pid == 1) %>% select(unique(c(vars, key_vars_household_lis))))
        names(list_with_data) <- paste0(names(list_with_data), "h")
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following `household-level sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
      }
      # 2.2.3 Only person-level variables were selected: load only p-level file # ----------------------   
      else if (sum(vars %in% setdiff(lis_household_variables, lis_person_variables)) == 0 & sum(vars %in% setdiff(lis_person_variables, lis_household_variables)) > 0) {
        
        vars <- vars[vars %in% lissyrtools::lis_person_variables]
        
        list_with_data <- purrr::map(data_to_load, ~.x %>% select(unique(c(vars, key_vars_person_lis))))
        names(list_with_data) <- paste0(names(list_with_data), "p")
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following `person-level sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".")
        
      }
      # 2.2.4 Both household and person level varaibles were selected: load both filed and merge them # ----------------------
      else if (sum(vars %in% setdiff(lis_household_variables, lis_person_variables)) > 0 & sum(vars %in% setdiff(lis_person_variables, lis_household_variables)) > 0) {
        
        hvars <- vars[vars %in% lissyrtools::lis_household_variables]
        pvars <- vars[vars %in% lissyrtools::lis_person_variables]
        
        list_with_data <- purrr::map(data_to_load, ~.x %>% select(unique(c(hvars, pvars, key_vars_household_lis ,key_vars_person_lis))))
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following `sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "), ".") 
        
      }
      # 2.2.5 No valid LIS variables supplied # ----------------------
      else if (sum(vars %in% lis_variables) == 0) {
        stop("No valid LIS variable names specified.")  # already ensured by check_invalid_vars(vars, lws)
      } 
      # 2.2.6 Mix between invalid variables and lis_both_hp_variables # ----------------------
      else if (sum(vars %in% lis_variables) > 0)  {
        
        vars <- vars[vars %in% lis_variables]
        assertthat::assert_that(all(vars %in% lis_both_hp_variables))
        
        list_with_data <- purrr::map(data_to_load, ~.x %>% filter(pid == 1) %>% select(unique(c(vars, key_vars_household_lis))))
        names(list_with_data) <- paste0(names(list_with_data), "h")
        
        message_to_print_in_the_end <- paste0(
          "The list `lis_datasets` is now available. It contains the following `sample` data frames: ", 
          paste(names(list_with_data), collapse = ", "),
          ".\n", 
          "NOTE: The selected variables are not exclusive to either household or individual-level datasets. The imported datasets have been defaulted to `household-level`."
        )
        
      }
      
      return(list(data = list_with_data,
                  message = message_to_print_in_the_end
      )
      )     
    }
    
    
    
  }
  
  
}






subset_datasets <- function(intermediate_data_to_filter, lws = FALSE, subset_expr) {
  
  
  
  dataset_list <- intermediate_data_to_filter
  
  # Apply filtering logic
  filtered_datasets <- lapply(names(dataset_list), function(dataset_name) {
    data <- dataset_list[[dataset_name]]
    
    # Check if all variables in the subset condition exist in the dataset
    missing_vars <- setdiff(all.vars(subset_expr), names(data))
    if (length(missing_vars) > 0) {
      message(sprintf("Skipping '%s' because it is missing variables: %s", 
                      dataset_name, paste(missing_vars, collapse = ", ")))
      return(data) # Return unfiltered dataset
    }
    
    # Check if the dataset contains rows with values matching the condition
    matching_rows <- dplyr::filter(data, !!subset_expr)
    
    if (is.null(matching_rows) || nrow(matching_rows) == 0) {
      message(sprintf("Skipping filtering for '%s' because no rows match the condition.", dataset_name))
      return(data) # Return unfiltered dataset
    }
    
    # Calculate the percentage of rows deleted
    deleted_percentage <- (nrow(data) - nrow(matching_rows)) / nrow(data) * 100
    
    # Log the result
    message(sprintf("Applying filtering on '%s'. Rows before: %d, Rows after: %d, Rows deleted: %.2f%%", 
                    dataset_name, nrow(data), nrow(matching_rows), deleted_percentage))
    
    return(matching_rows)
  })
  
  names(filtered_datasets) <- names(dataset_list)
  
  return(filtered_datasets)
}
JosepER/lissyrtools documentation built on Jan. 26, 2025, 10:01 p.m.