R/default_load_functions.R

Defines functions read_glm_flow read_from_excel_error read_from_csv_error read_from_excel read_from_csv

read_from_csv <- function(config, directory, odata_definitions=NULL) {

  if (!is.null(config$parameter_transform)) {
    parameter_transform <- eval(parse(text = config$parameter_transform))
  }
  skip <- 0
  if (!is.null(config$skip)) {
    skip <- config$skip
  }

  data <- as.data.frame(read.csv(
    paste0(directory, config$filename),
    skip = skip
  ))
  names(data) <- paste0("col_", seq_len(ncol(data)))

  data <- data %>%
    dplyr::rename(
      Parameter = paste0("col_", config$parameter_col)
    ) %>%
    mutate(Parameter = parameter_transform(Parameter)) %>%
    select(c("Parameter", paste0("col_", config$value_col)))

  if (!is.null(config$input_units)) {
    data[, 2:ncol(data)] <- mapply("*", data[, 2:ncol(data)], as.numeric(config$input_units))
  }

  if (!is.null(config$date_filter)) {
    date_filter <- eval(parse(text = config$date_filter))
    data <- data %>% filter(date_filter(Parameter))
  }

  if (is.null(config$drop_na) || config$drop_na) {
    data <- drop_na(data)
  }

  if (is.null(config$order_parameter) || config$order_parameter) {
    data <- data %>% arrange(Parameter)
  }

  if (any(config$ResourceID %in% odata_definitions$ResourceID)) {
    metadata <- odata_definitions[which(odata_definitions$ResourceID == config$ResourceID), ]
    data_frame_to_api_helper(directory, config, metadata, data)
    return(NULL)
  }
  return(data_frame_to_data_object_helper(
    directory,
    config,
    data
  ))
}

read_from_excel <- function(config, directory, odata_definitions=NULL) {
  if (!is.null(config$parameter_transform)) {
    parameter_transform <- eval(parse(text = config$parameter_transform))
  }
  skip <- 0
  if (!is.null(config$skip)) {
    skip <- config$skip
  }
  cols_to_read <- c(config$parameter_col, unlist(config$value_col))
  data <- as.data.frame(read_excel(
    paste0(directory, config$filename),
    sheet = config$sheet_number,
    col_names = paste0("col_", min(cols_to_read):max(cols_to_read)),
    range = cell_limits(c(2 + skip, min(cols_to_read)), c(NA, max(cols_to_read)))
  )) %>%
    dplyr::rename(
      Parameter = paste0("col_", config$parameter_col)
    ) %>%
    mutate(Parameter = parameter_transform(Parameter))
  data <- data[, cols_to_read - min(cols_to_read) + 1]

  if (!is.null(config$input_units)) {
    data[, 2:ncol(data)] <- mapply("*", data[, 2:ncol(data)], config$input_units)
  }

  if (!is.null(config$date_filter)) {
    date_filter <- eval(parse(text = config$date_filter))
    data <- data %>% filter(date_filter(Parameter))
  }

  if (is.null(config$drop_na) || config$drop_na) {
    data <- drop_na(data)
  }

  if (is.null(config$order_parameter) || config$order_parameter) {
    data <- data %>% arrange(Parameter)
  }

  if (any(config$ResourceID %in% odata_definitions$ResourceID)) {
    metadata <- odata_definitions[which(odata_definitions$ResourceID == config$ResourceID), ]
    data_frame_to_api_helper(directory, config, metadata, data)
    return(NULL)
  }

  return(data_frame_to_data_object_helper(
    directory,
    config,
    data
  ))
}

read_from_csv_error <- function(config, directory, odata_definitions=NULL) {
  if (!is.null(config$parameter_transform)) {
    parameter_transform <- eval(parse(text = config$parameter_transform))
  }
  skip <- 0
  if (!is.null(config$skip)) {
    skip <- config$skip
  }

  data <- as.data.frame(read.csv(
    paste0(directory, config$filename),
    skip = skip
  ))
  names(data) <- paste0("col_", seq_len(ncol(data)))

  data <- data %>%
    dplyr::rename(
      Parameter = paste0("col_", config$parameter_col)
    ) %>%
    mutate(Parameter = parameter_transform(Parameter))


  if (!is.null(config$error_col)) {

    # don't judge this method too harshly, I am very sleepy
    stopifnot(length(config$value_col) == length(config$error_col))

    data_lower <- data %>% select(paste0("col_", config$value_col)) -
      data %>% select(paste0("col_", config$error_col))
    names(data_lower) <- paste0("col_", config$value_col, "_lower")


    data_upper <- data %>% select(paste0("col_", config$value_col)) +
      data %>% select(paste0("col_", config$error_col))
    names(data_upper) <- paste0("col_", config$value_col, "_upper")

    data <- cbind(data, data_lower, data_upper)

  } else if ((!is.null(config$lower_bound_col) && !is.null(config$upper_bound_col))) {

    stopifnot(length(config$value_col) == length(config$lower_bound_col),
              length(config$value_col) == length(config$upper_bound_col))

    data <- data %>%

      rename_with(~paste0("col_", config$value_col, "_lower"), unlist(config$lower_bound_col)) %>%
      rename_with(~paste0("col_", config$value_col, "_upper"), unlist(config$upper_bound_col))

  }

  data <- data %>% select("Parameter",
                          paste0("col_", config$value_col),
                          paste0("col_", config$value_col, "_lower"),
                          paste0("col_", config$value_col, "_upper"))


  if (!is.null(config$input_units)) {
    data[, 2:ncol(data)] <- mapply("*", data[, 2:ncol(data)], config$input_units)
  }

  if (!is.null(config$date_filter)) {
    date_filter <- eval(parse(text = config$date_filter))
    data <- data %>% filter(date_filter(Parameter))
  }

  if (is.null(config$drop_na) || config$drop_na) {
    data <- drop_na(data)
  }

  # Error bar datasets have not added to Odata API
  if (any(config$ResourceID %in% odata_definitions$ResourceID)) {
    odata_data <- data[, -grep(pattern = "_lower|_upper", colnames(data))]
    metadata <- odata_definitions[which(odata_definitions$ResourceID == config$ResourceID), ]
    data_frame_to_api_helper(directory, config, metadata, odata_data)
    return(NULL)
  }

  return(data_frame_to_data_object_helper_error(
    directory,
    config,
    data %>% arrange(Parameter)
  ))
}

read_from_excel_error <- function(config, directory, odata_definitions=NULL) {
  if (!is.null(config$parameter_transform)) {
    parameter_transform <- eval(parse(text = config$parameter_transform))
  }
  skip <- 0
  if (!is.null(config$skip)) {
    skip <- config$skip
  }


  if (!is.null(config$error_col)) {
    cols_to_read <-  c(config$parameter_col,
                       unlist(config$value_col),
                       unlist(config$error_col))
  } else {
    cols_to_read <-  c(config$parameter_col,
                       unlist(config$value_col),
                       unlist(config$lower_bound_col),
                       unlist(config$upper_bound_col))
  }


  data <- as.data.frame(read_excel(
    paste0(directory, config$filename),
    sheet = config$sheet_number,
    col_names = paste0("col_", min(cols_to_read):max(cols_to_read)),
    range = cell_limits(c(2 + skip, min(cols_to_read)), c(NA, max(cols_to_read)))
  )) %>%
    dplyr::rename(
      Parameter = paste0("col_", config$parameter_col)
    ) %>%
    mutate(Parameter = parameter_transform(Parameter))



  if (!is.null(config$error_col)) {
    stopifnot(length(config$value_col) == length(config$error_col))

    data_lower <- data %>% select(paste0("col_", config$value_col)) -
      data %>% select(paste0("col_", config$error_col))
    names(data_lower) <- paste0("col_", config$value_col, "_lower")


    data_upper <- data %>% select(paste0("col_", config$value_col)) +
      data %>% select(paste0("col_", config$error_col))
    names(data_upper) <- paste0("col_", config$value_col, "_upper")

    data <- cbind(data, data_lower, data_upper)

  } else if ((!is.null(config$lower_bound_col) && !is.null(config$upper_bound_col))) {

    stopifnot(length(config$value_col) == length(config$lower_bound_col),
              length(config$value_col) == length(config$upper_bound_col))

    data <- data %>%
      rename_with(~paste0("col_", config$value_col, "_lower"), unlist(config$lower_bound_col)) %>%
      rename_with(~paste0("col_", config$value_col, "_upper"), unlist(config$upper_bound_col))

  }

  data <- data %>% select("Parameter",
                          paste0("col_", config$value_col),
                          paste0("col_", config$value_col, "_lower"),
                          paste0("col_", config$value_col, "_upper"))




  if (!is.null(config$input_units)) {
    data[, 2:ncol(data)] <- mapply("*", data[, 2:ncol(data)], config$input_units)
  }

  if (!is.null(config$date_filter)) {
    date_filter <- eval(parse(text = config$date_filter))
    data <- data %>% filter(date_filter(Parameter))
  }

  if (is.null(config$drop_na) || config$drop_na) {
    data <- drop_na(data)
  }

  # Error bar datasets have not added to Odata API
  if (any(config$ResourceID %in% odata_definitions$ResourceID)) {
    odata_data <- data[, -grep(pattern = "_lower|_upper", colnames(data))]
    metadata <- odata_definitions[which(odata_definitions$ResourceID == config$ResourceID), ]
    data_frame_to_api_helper(directory, config, metadata, odata_data)
  }

  return(data_frame_to_data_object_helper_error(
    directory,
    config,
    data %>% arrange(Parameter)
  ))
}

read_glm_flow <- function(config, directory, odata_definitions=NULL) {
  if (!is.null(config$parameter_transform)) {
    parameter_transform <- eval(parse(text = config$parameter_transform))
  }
  skip <- 0
  if (!is.null(config$skip)) {
    skip <- config$skip
  }

  data <- read.csv(
    paste0(directory, config$filename),
    skip = skip,
    na.strings = c("NA", "..C")
  ) %>%
    as.data.frame(stringsAsFactors = FALSE) %>%
    select(month2, source, new_source, config$series, count) %>%
    rename(parameter = month2, series = config$series)

  # Standardizing the values
  if (config$series == "age") {
    data$series <- gsub(",", " – ", data$series)
  } else if (config$series == "Region") {
    data$series <- gsub(" Region", "", data$series)
  }

  data <- data %>%
    group_by(parameter, source, new_source, series) %>%
    summarise(value = sum(count), .groups = "drop")

  filtered_data <- data %>%
    filter(source == config$indicator_name) %>%
    select(-source) %>%
    arrange(series) %>%
    mutate(parameter = parameter_transform(parameter))

  if (any(config$ResourceID %in% odata_definitions$ResourceID)) {
    metadata <- odata_definitions[which(odata_definitions$ResourceID == config$ResourceID), ]
    data_frame_to_api_helper(directory, config, metadata, filtered_data)
    return(NULL)
  }

  ## to put new_source in third dropdown
  long_data <- pivot_wider(filtered_data, names_from = series, values_from = value)
  output_group <- list()
  update_date <- as.Date(file.info(paste0(directory, config$filename))$mtime, tz = "NZ")

  for (val in unique(long_data$new_source)) {
    data_group <- long_data %>%
      filter(new_source == val) %>%
      select(-new_source)
    group_name <- val
    output_group[[group_name]] <- TimeSeries$new(
      data_group,
      names(data_group)[2:ncol(data_group)],
      update_date
    )
  }
  return(output_group)
}
xaviermiles/portalLite documentation built on Jan. 28, 2022, 9:10 a.m.