R/tarpan2-data.R

#' tarpan2_geography_variables
#'
#' List all available models and variables (model_variable) selected project
#' database
#'
#' @param dbname A project database, found in \code{tarpan2_show_databases}
#'
#' @return a list of available analysis variables for the specified geography
tarpan2_geography_variables <- function(dbname) {
  available_tables <- tarpan2_show_tables(con = dbname)

  project_data_tables <- available_tables[
    grepl("project_tbl_data_", available_tables)
  ]

  gsub("project_tbl_data_", "", project_data_tables)
}

#' tarpan2_index_read
#'
#' List all available GP index calculation results for a given geography,
#' hazard, index class and index variable.
#'
#' @param hazard The hazard code. Currently only "tc" or "drought"
#' @param index_var The index variable to use.
#' @inheritParams tarpan2_geography_variables
#'
#' @return A list all available GP index calculation results
#'
#' @export
tarpan2_index_read <- function(hazard = c("tropical_cyclone", "drought"),
                               index_var, dbname) {
  hazard <- match.arg(hazard)

  if (is.null(dbname)) stop("Tarpan 2.0 requires a database")

  available_tables <- tarpan2_show_tables(con = dbname)
  index_table <- glue("project_tbl_index_{hazard}_{index_var}")

  if (index_table %in% available_tables)
      tarpan2_get_table(con = dbname, table_name = index_table)
  else
      stop("requested table in not available, build it using ",
           "tarpan2_index_write")
}

#' tarpan2_index_write
#'
#' For a given hazard, index_class, index_var, and index_data, write to the
#' database
#'
#' @inheritParams tarpan2_index_read
#' @param index_data The data to write to the index. Can be data.frame or a
#' list
#'
#' @export
tarpan2_index_write <- function(hazard = c("tropical_cyclone", "drought"),
                                index_var, index_data, dbname) {
  available_tables <- tarpan2_show_tables(con = dbname)
  index_table <- glue("project_tbl_index_{hazard}_{index_var}")

  if (!(index_table %in% available_tables)) {
    message("requested table in not available, creating new index table...")
    tarpan2_sql_script(con = dbname, script_name = hazard)
  }

  tarpan2_append_data(con = dbname, table = index_table, data = index_data)

  invisible()
}

#' tarpan2_index_delete
#'
#' Deletes an index for a specified database, hazard, and index_var
#'
#' @inheritParams tarpan2_index_read
#'
#' @export
tarpan2_index_delete <- function(hazard = c("tc", "drought"), index_var,
                                 dbname) {
  index_table <- glue("project_tbl_index_{hazard}_{index_var}")
  tarpan2_delete_table(con = dbname, table_name = index_table)

  invisible()
}

#' tarpan2_variable_models
#'
#' List all models available in the project database.
#'
#' @param dbname A project database, found in \code{tarpan2_show_databases}
#' @param variable Which variable to use when looking for models
#'
#' @return List of all models for a particular variable and project database
#' @export
tarpan2_variable_models <- function(dbname, variable) {
  available_tables <- tarpan2_show_tables(con = dbname)

  available_tables <- available_tables[
    grepl(glue("project_tbl_data_[a-z0-9_]+_{variable}$"), available_tables)
  ]

  available_tables <- gsub("project_tbl_data_", "", available_tables)
  gsub(glue("_{variable}"), "", available_tables)
}

#' tarpan2_model_data
#'
#' Retrieves model data from the postgres server
#'
#' @param geography A geography code, found in \code{tarpan1_geographies}
#' @param dbname A project database, found in \code{tarpan2_show_databases}
#' @param variable Which variable to use when querying data
#' @param model Which model to use when querying data
#' @param start Starting date to query data
#' @param end Ending date to query data
#' @param specified_variables Any specific variables to request from API.
#' Helpful for large queries.
#' @param group_by Name of `specified_variables` to group results by, typically
#' grouped by a time variable
#' @param weight_by Name of column from `project_tbl` that is to be used to
#' group the weights, typically a spatial grouping, such as "sub_iso".
#' @param weight Name of column from `project_tbl` that is to be used to weight
#' the results, typically "branch_book". Must be used in combination with
#' `weight_by` and `group_by`
#'
#' @return Tarpan model data frame
tarpan2_model_data <- function(geography = NULL, variable = NULL, model = NULL,
                               start = NULL, end = NULL,
                               specified_variables = NULL, dbname = NULL,
                               group_by = NULL, weight_by = NULL,
                               weight = NULL, bc_method = NULL) {
  date_format <- "^\\d{4}-\\d{2}-\\d{2}$"

  if (!is.null(start) && !is.null(end))
    if (grepl(date_format, start) && grepl(date_format, end)) {
      where_clause <- glue("initial_time >= '{start}' AND initial_time <= '{end}'")
    } else
      stop("Invalid start or end format. Must match the format yyy-mm-dd")
  else if (is.null(start) && !is.null(end) && grepl(date_format, end))
    where_clause <- glue("initial_time <= '{end}'")
  else if (!is.null(start) && is.null(end) && grepl(date_format, start))
    where_clause <- glue("initial_time >= '{start}'")
  else
    where_clause <- NULL

  #enforce postgres table name length limit
  model_data_table <- strtrim(glue('project_tbl_data_{model}_{variable}'), 63)

  if (is.null(specified_variables) && is.null(weight_by))
    specified_variables <- "*"

  if (!is.null(geography) & is.null(weight_by)) {
    # User can supply a geometry of any of these types:
    geo_columns <- c("geo_id", "fname", "sub_iso", "admin0_country_name",
                     "asciiname")
    # geo_clauses <- paste0(geo_columns, sep = " = \'{geography}\'",
    #                       collapse = " OR ")
    # geo_where_clause <- glue(geo_clauses)
    geo_paste <- paste0(geography,collapse = ",")
    geo_where_clause <- paste0(geo_columns,
                               sep = paste0(" = ANY(\'{",geo_paste,"}\')"),
                               collapse = " OR ")

    geo_id <- tarpan2_get_variables(con = dbname, variables = "geo_id",
                                    table = "project_tbl", schema = "public",
                                    where_clause = geo_where_clause)

    geo_ids <- paste0("'{", paste0(geo_id$geo_id, collapse = ", "), "}'")
  }

  if (is.null(weight_by))
    if (is.null(where_clause)){
      where_clause <- glue("gid = any({geo_ids})")
    } else {
      where_clause <- paste(where_clause, "AND", glue("gid = any({geo_ids})"))
    }
  else
    message("NOTE: geo field is ignored for weight_by queries")

  if (is.null(weight_by)){
    model_data <- tarpan2_get_variables(
      con = dbname,
      variables = specified_variables,
      table = model_data_table,
      schema = "public",
      n_vals = NULL,
      where_clause = where_clause,
      group_by = group_by)
  } else {
    model_data <- tarpan2_get_variables_weighted(
      con = dbname,
      variable = variable,
      variables = specified_variables,
      model = model,
      schema = "public",
      where_clause = where_clause,
      group_by = group_by,
      weight = weight,
      weight_by = weight_by,
      bc_method = bc_method)
  }
  convert_dates(model_data)
}

convert_dates <- function(data_frame) {
  if (!is.data.frame(data_frame))
    stop("data_frame argument is not a data frame. class(data_frame):\n",
         class(data_frame))

  # Only applies to columns that contains time
  converted_data_frame <- mutate_at(data_frame,
                                    vars(contains("time")),
                                    function(column) {
    column_not_na <- column[!is.na(column)]
    first_date <- column_not_na[1]

    parse_formats <- list(ymd = ymd, mdy = mdy, dmy = dmy)

    valid_formats <- suppressWarnings(
      keep(parse_formats, ~!is.na(.x(first_date)))
    )

    if (length(valid_formats) == 0) {
      return(column)
    } else if (length(valid_formats) > 1) {
      valid_formats_column <- suppressWarnings(
        keep(parse_formats, ~all(!is.na(.x(column))))
      )

      if (length(valid_formats_column) == 1)
        return(valid_formats_column[[1]](column))
      else {
        n <- min(length(column), 10)
        warning("Issue processing this column: ", column[1:n])
      }

    } else
      return(valid_formats[[1]](column))
  })

  converted_data_frame
}
GlobalParametrics/taRpan_readonly documentation built on May 13, 2019, 11:23 a.m.