R/weather.R

Defines functions weather_raw remove_sym meta_html weather_html get_html weather_single weather_dl

Documented in weather_dl

#' Download weather data from Environment and Climate Change Canada
#'
#' Downloads data from Environment and Climate Change Canada (ECCC) for one or
#' more stations. For details and units, see the glossary vignette
#' (`vignette("glossary", package = "weathercan")`) or the glossary online
#' <https://climate.weather.gc.ca/glossary_e.html>.
#'
#' @details Data can be returned 'raw' (format = FALSE) or can be formatted.
#'   Formatting transforms dates/times to date/time class, renames columns, and
#'   converts data to numeric where possible. If character strings are contained
#'   in traditionally numeric fields (e.g., weather speed may have values such
#'   as "< 30"), they can be replaced with a character specified by `string_as`.
#'   The default is NA. Formatting also replaces data associated with certain
#'   flags with NA (M = Missing).
#'
#'   Start and end date can be specified, but if not, it will default to the
#'   start and end date of the range (this could result in downloading a lot of
#'   data!).
#'
#'   For hourly data, timezones are always "UTC", but the actual times are
#'   either local time (default; `time_disp = "none"`), or UTC (`time_disp =
#'   "UTC"`). When `time_disp = "none"`, times reflect the local time without
#'   daylight savings. This means that relative measures of time, such as
#'   "nighttime", "daytime", "dawn", and "dusk" are comparable among stations in
#'   different timezones. This is useful for comparing daily cycles. When
#'   `time_disp = "UTC"` the times are transformed into UTC timezone. Thus
#'   midnight in Kamloops would register as 08:00:00 (Pacific time is 8 hours
#'   behind UTC). This is useful for tracking weather events through time, but
#'   will result in odd 'daily' measures of weather (e.g., data collected in the
#'   afternoon on Sept 1 in Kamloops will be recorded as being collected on Sept
#'   2 in UTC).
#'
#'   Files are downloaded from the url stored in
#'   `getOption("weathercan.urls.weather")`. To change this location use
#'   `options(weathercan.urls.weather = "your_new_url")`.
#'
#'   Data is downloaded from ECCC as a series of files which are then bound
#'   together. Each file corresponds to a different month, or year, depending on
#'   the interval. Metadata (station name, lat, lon, elevation, etc.) is
#'   extracted from the start of the most recent file (i.e. most recent dates)
#'   for a given station. Note that important data (i.e. station name, lat, lon)
#'   is unlikely to change between files (i.e. dates), but some data may or may
#'   not be available depending on the date of the file (e.g., station operator
#'   was added as of April 1st 2018, so will be in all data which includes dates
#'   on or after April 2018).
#'
#' @param station_ids Numeric/Character. A vector containing the ID(s) of the
#'   station(s) you wish to download data from. See the \code{\link{stations}}
#'   data frame or the \code{\link{stations_search}} function to find IDs.
#' @param start Date/Character. The start date of the data in YYYY-MM-DD format
#'   (applies to all stations_ids). Defaults to start of range.
#' @param end Date/Character. The end date of the data in YYYY-MM-DD format
#'   (applies to all station_ids). Defaults to end of range.
#' @param interval Character. Interval of the data, one of "hour", "day",
#'   "month".
#' @param trim Logical. Trim missing values from the start and end of the
#'   weather dataframe. Only applies if `format = TRUE`
#' @param format Logical. If TRUE, formats data for immediate use. If FALSE,
#'   returns data exactly as downloaded from Environment and Climate Change
#'   Canada. Useful for dealing with changes by Environment Canada to the format
#'   of data downloads.
#' @param string_as Character. What value to replace character strings in a
#'   numeric measurement with. See Details.
#' @param time_disp Character. Either "none" (default) or "UTC". See details.
#' @param encoding Character. Text encoding for download.
#' @param list_col Logical. Return data as nested data set? Defaults to FALSE.
#'   Only applies if `format = TRUE`
#' @param verbose Logical. Include progress messages
#' @param quiet Logical. Suppress all messages (including messages regarding
#'   missing data, etc.)
#' @param stn DEFUNCT. Now use `stations_dl()` to update internal data and
#'   `stations_meta()` to check the date it was last updated.
#'
#' @return A tibble with station ID, name and weather data.
#'
#' @examplesIf check_eccc()
#'
#' kam <- weather_dl(station_ids = 51423,
#'                   start = "2016-01-01", end = "2016-02-15")
#'
#' stations_search("Kamloops A$", interval = "hour")
#' stations_search("Prince George Airport", interval = "hour")
#'
#' kam.pg <- weather_dl(station_ids = c(48248, 51423),
#'                      start = "2016-01-01", end = "2016-02-15")
#'
#' library(ggplot2)
#'
#' ggplot(data = kam.pg, aes(x = time, y = temp,
#'                           group = station_name,
#'                           colour = station_name)) +
#'        geom_line()
#'
#' @aliases weather
#'
#' @export
weather_dl <- function(station_ids,
                       start = NULL, end = NULL,
                       interval = "hour",
                       trim = TRUE,
                       format = TRUE,
                       string_as = NA,
                       time_disp = "none",
                       stn = NULL,
                       encoding = "UTF-8",
                       list_col = FALSE,
                       verbose = FALSE,
                       quiet = FALSE) {

  # Address as.POSIXct...
  if((!is.null(start) &
      inherits(try(as.Date(start), silent = TRUE), "try-error")) |
     (!is.null(end) &
      inherits(try(as.Date(end), silent = TRUE), "try-error"))) {
    stop("'start' and 'end' must be either a standard date format ",
         "(YYYY-MM-DD) or NULL")
  }

  if(length(interval) > 1) {
    stop("'interval' must be either 'hour', 'day', OR 'month'")
  }

  if(!is.null(stn)){
    stop("`stn` is defunct, to use an updated stations data frame ",
         "use `stations_dl()` to update the internal data, and ",
         "`stations_meta()` to check when it was last updated", call. = FALSE)
  }
  stn <- stations()

  check_int(interval)

  w_all <- data.frame()
  missing <- c()
  end_dates <- c()
  msg_fmt <- dplyr::tibble()

  for(s in station_ids) {
    if(verbose) message("Getting station: ", s)

    stn1 <- stn %>%
      dplyr::filter(.data$station_id %in% s,
                    !is.na(.data$start),
                    .data$interval == !!interval) %>%
      dplyr::arrange(.data$interval)

    ## Check if station missing that interval
    if(nrow(stn1) == 0) {
      if(length(station_ids) > 1) {
        missing <- c(missing, s)
        if(verbose) message("No data for station ", s)
        next
      } else {

        if(!quiet) message(paste0("There are no data for station ", s, " ",
                                  "for this interval (", interval, ")",
                                  "\nAvailable Station Data:\n",
                                  paste0(utils::capture.output(print(
                                    dplyr::filter(stn,
                                                  .data$station_id %in% s,
                                                  !is.na(.data$start)))),
                                    collapse = "\n")))
        return(dplyr::tibble())
      }
    }

    if(!lubridate::is.Date(stn1$start)) {
      stn1 <- dplyr::mutate(stn1,
                            start = lubridate::ymd(as.character(.data$start),
                                                   truncated = 2),
                            start = lubridate::floor_date(.data$start, "year"))
    }
    if(!lubridate::is.Date(stn1$end)) {
      stn1 <- dplyr::mutate(stn1,
                            end = lubridate::ymd(as.character(.data$end),
                                                 truncated = 2),
                            end = lubridate::ceiling_date(.data$end, "year"))
    }
    stn1 <- stn1 %>%
      dplyr::mutate(end = replace(.data$end, .data$end > Sys.Date(), Sys.Date()),
                    int = lubridate::interval(.data$start, .data$end),
                    interval = factor(.data$interval,
                                      levels = c("hour", "day", "month"),
                                      ordered = TRUE))

    if(is.null(start)) {
      s.start <- stn1$start
      msg.start <- "earliest date"
    } else {
      s.start <- as.Date(start)
      msg.start <- start
    }

    if(is.null(end)) s.end <- Sys.Date() else s.end <- as.Date(end)
    msg.end <- as.character(s.end)

    dates <- lubridate::interval(s.start, s.end)

    if(lubridate::int_end(dates) < lubridate::int_start(dates)) {
      if(length(station_ids) > 1) {
        if(!quiet) message("End date earlier than start date for station ", s)
        end_dates <- c(end_dates, s)
        next
      } else {
        if(!quiet) message(paste0("The end date (", msg.end, ") ",
                                  "is earlier than the start date (",
                                  as.character(s.start),
                                  ") for station ", s, " for this interval (",
                                  interval, "), ",
                                  "\nAvailable Station Data:\n",
                                  paste0(utils::capture.output(print(
                                    dplyr::filter(stn,
                                                  .data$station_id %in% s,
                                                  !is.na(.data$start)))),
                                    collapse = "\n")))
        return(dplyr::tibble())
      }
    }

    if(interval == "hour") {
      date_range <- seq(lubridate::floor_date(s.start, unit = "month"),
                        lubridate::floor_date(s.end, unit = "month"),
                        by = dplyr::if_else(interval %in% c("hour"), "month", "year"))
    } else if(interval == "day") {
      date_range <- seq(lubridate::floor_date(s.start, unit = "year"),
                        lubridate::floor_date(s.end, unit = "year"), by = "year")
    } else if(interval == "month") {
      date_range <- lubridate::floor_date(s.start, unit = "year")
    }

    w <- weather_single(date_range, s, interval, encoding)

    # Extract only most recent meta
    meta <-  meta_html(station_id = s, interval = interval) %>%
      meta_raw(encoding = encoding, interval = interval) %>%
      meta_format(s = s)

    ## Format data if requested
    if(format) {
      if(verbose) message("Formatting station data: ", s)

      w <- weather_format(w, meta = meta,
                          stn = stn,
                          interval = interval,
                          s.start = s.start,
                          s.end = s.end,
                          time_disp = time_disp,
                          string_as = string_as,
                          quiet = quiet)

      # Catch messages
      if(nrow(w$msg) > 0) {
        msg_fmt <- dplyr::bind_rows(dplyr::bind_cols(station_id = s,
                                                     w$msg),
                                    msg_fmt)
      }

      # Get formatted data
      w <- w$data

      ## Check if all missing, remove and message
      n <- c("time", "date", "year", "month", "day", "hour")
      temp <- dplyr::select(w, -tidyselect::any_of(n))

      if(nrow(temp) == 0 || all(is.na(temp) | temp == "")) {
        if(length(station_ids) > 1) {
          if(verbose) message("No data for station ", s)
          missing <- c(missing, s)
          next
        } else {
          if(!quiet) message(paste0("There are no data for station ", s, ", ",
                                    "in this time range (", msg.start,
                                    " to ", msg.end, "), for this interval (",
                                    interval, "), ",
                                    "\nAvailable Station Data:\n",
                                    paste0(utils::capture.output(print(
                                      dplyr::filter(stn,
                                                    .data$station_id %in% s,
                                                    !is.na(.data$start)))),
                                      collapse = "\n")))
          return(dplyr::tibble())
        }
      }
    }

    ## Add header info
    if(verbose) message("Adding header data: ", s)
    if(nrow(w) > 0) w <- cbind(meta, w)

    ## Fill missing headers with NA
    w[names(m_names)[!names(m_names) %in% names(w)]] <- NA

    w_all <- rbind(w_all, w)
  }

  if(nrow(w_all) > 0) {
    ## Trim to available data provided it is formatted

    if(trim) w_all <- weather_trim(w_all, format, verbose)

    m <- names(m_names)[names(m_names) %in% names(w_all)]

    ## Arrange
    w_all <- dplyr::select(w_all,
                           dplyr::one_of(m),
                           dplyr::everything())

    ## If list_col is TRUE and data is formatted
    if(list_col && format) {
      w_all <- weather_list_cols(w_all, interval, names = m)
    }
  }

  # Return messages
  if(length(missing) > 0 & !quiet) {
    if(all(station_ids %in% missing)) type <- "all" else type <- "some"

    message(paste0("There are no data for ", type, " stations (",
                   paste0(missing, collapse = ", "), "), ",
                   "in this time range (", msg.start, " to ", msg.end, "), ",
                   "for this interval (", interval, ")",
                   "\nAvailable Station Data:\n",
                   paste0(utils::capture.output(print(
                     dplyr::filter(stn,
                                   .data$station_id %in% missing,
                                   !is.na(.data$start)))),
                     collapse = "\n")))
  }

  if(length(end_dates) > 0 & !quiet) {
    if(all(station_ids %in% missing)) type <- "all" else type <- "some"

    message(paste0("The end dates (", msg.end, ") are earlier than the ",
                   "start dates (", msg.start, ") for ", type, " stations (",
                   paste0(end_dates, collapse = ", "),
                   "), for this interval (", interval, "), ",
                   "\nAvailable Station Data:\n",
                   paste0(utils::capture.output(print(
                     dplyr::filter(stn,
                                   .data$station_id %in% end_dates,
                                   !is.na(.data$start)))),
                     collapse = "\n")))
  }
  ## Return Format messages
  if(!quiet && nrow(msg_fmt) > 0) {
    cols <- paste0(unique(msg_fmt$col), collapse = ", ")
    stations_msg <- paste0(unique(msg_fmt$station_id), collapse = ", ")
    message("Some variables have non-numeric values (", cols,
            "), for stations: ", stations_msg)
    if(all(is.na(msg_fmt$replace) | msg_fmt$replace != "no_replace")) {
      message("  Replaced all non-numeric entries with ",
              msg_fmt$replace[1], ". ",
              "Use 'string_as = NULL' to keep as characters (see ?weather_dl).")
    } else {
      message("  Left all non-numeric entries as characters. ",
              "Couldn't summarize these columns.")
    }

    if(verbose) {
      show <- msg_fmt %>%
        dplyr::select("station_id", "problems")

      if(utils::packageVersion("tidyr") > "0.8.99") {
        show <- tidyr::unnest(show, "problems")
      } else {
        show <- tidyr::unnest(show)
      }

      message("  Examples:  ")
      message(paste0("  ", utils::capture.output(show), collapse = "\n"))
    }
  }

  if(interval == "hour" && !getOption("weathercan.time.message")){
   message("As of weathercan v0.3.0 time display is either local time or UTC\n",
           "See Details under ?weather_dl for more information.\n",
           "This message is shown once per session")
    options("weathercan.time.message" = TRUE)
  }

  dplyr::as_tibble(w_all)
}

weather_single <- function(date_range, s, interval, encoding) {
  w <- dplyr::tibble(date_range = date_range)
  w <- dplyr::mutate(w, html = purrr::map(.data$date_range,
                                    ~ weather_html(station_id = s,
                                                   date = .x,
                                                   interval = interval)))
  w <- dplyr::mutate(w, data = purrr::map(.data$html,
                                    ~ weather_raw(.,
                                                  encoding = encoding,
                                                  header = TRUE)))
  w <- dplyr::select(w, "data")

  if(utils::packageVersion("tidyr") > "0.8.99") {
    w <- tidyr::unnest(w, "data")
  } else w <- tidyr::unnest(w)
  w
}



get_html <- function(station_id,
                     date = NULL,
                     interval = "hour",
                     format = "csv") {

  q <- list(format = format, stationID = station_id,
            timeframe = ifelse(interval == "hour", 1,
                               ifelse(interval == "day", 2,
                                      3)),
            submit = 'Download+Data')

  if(format == "csv" & interval != "month") {
    q['Year'] <- format(date, "%Y")
    q['Month'] <- format(date, "%m")
  }

  get_check(url = getOption("weathercan.urls.weather"),
            query = q, task = "access historical weather data")
}

weather_html <- function(station_id, date, interval = "hour") {
  if(interval == "month") date <- NULL
  get_html(station_id, date, interval, format = "csv")
}

meta_html <- function(station_id, interval = "hour") {
  get_html(station_id, date = NULL, interval, format = "txt")
}

remove_sym <- function(df) {
  to_remove <- "\\u00BB|\\u00BF|\\u00EF|\\u00C2|\\u00B0"
  dplyr::rename_all(df, ~stringr::str_remove_all(., to_remove))
}


weather_raw <- function(html, skip = 0,
                        nrows = Inf,
                        header = TRUE,
                        encoding = "UTF-8") {

  raw <- httr::content(html, type = "raw")

  # Look for and remove BOM
  if(raw[1] == as.raw(0xef) &
     raw[2] == as.raw(0xbb) &
     raw[3] == as.raw(0xbf)) {
    raw <- raw[4:length(raw)]
  }

  # Get number of columns
  ncols <- readr::read_csv(I(raw), n_max = 1, col_names = FALSE,
                           col_types = readr::cols(), progress = FALSE) %>%
    ncol()
  readr::local_edition(1)
  suppressWarnings({ # when some data are missing, final columns not present
    w <- readr::read_csv(I(raw), n_max = nrows, skip = skip,
                         col_types = paste(rep("c", ncols), collapse = ""),
                         progress = FALSE)})
  # Get rid of special symbols right away
  w <- remove_sym(w)

  # For some reason the flags "^" are replaced with "I",
  # change back to match flags on ECCC website
  if(utils::packageVersion("dplyr") > package_version("0.8.0")) {
    w <- dplyr::mutate_at(w, .vars = dplyr::vars(dplyr::ends_with("Flag")),
                          list(~gsub("^I$", "^", .)))
  } else {
    w <- dplyr::mutate_at(w, .vars = dplyr::vars(dplyr::ends_with("Flag")),
                          dplyr::funs(gsub("^I$", "^", .)))
  }
  w
}


weather_trim <- function(w, format, verbose) {

  if(format && nrow(w) > 0) {
    if(verbose) message("Trimming missing values before and after")
    temp <-  dplyr::select(w,
                           -dplyr::any_of(c(names(m_names), "date", "time",
                                            "year", "month",
                                            "day", "hour", "qual")))
    temp <- w$date[which(rowSums(is.na(temp) | temp == "") != ncol(temp))]

    w <- w[w$date >= min(temp) & w$date <= max(temp), ]
  }
  w
}

weather_format <- function(w, stn, meta, interval = "hour", s.start, s.end,
                           string_as = "NA", time_disp = NULL, quiet = FALSE) {

  w <- dplyr::select(w,
                     -dplyr::any_of(c("Station Name", "Climate ID")),
                     -dplyr::contains("Latitude"),
                     -dplyr::contains("Longitude"))

  ## Get names from stored name list
  n <- w_names[[interval]]

  ## Trim to match names in data
  n <- n[n %in% names(w)]

  w <- dplyr::rename(w, !!n)

  if(interval == "day") w <- dplyr::mutate(w, date = as.Date(.data$date))
  if(interval == "month") {
    w <- dplyr::mutate(w, date = as.Date(paste0(.data$date, "-01")))
  }

  ## Get correct timezone
  if(interval == "hour"){
    w <- dplyr::mutate(w, time = as.POSIXct(.data$time, tz = "UTC"))
    if(time_disp == "UTC") {
      offset <- tz_hours(stn$tz[stn$station_id == meta$station_id[1]][1])
      w <- dplyr::mutate(w, time = .data$time + lubridate::hours(offset))
    }
    w <- dplyr::mutate(w, date = lubridate::as_date(.data$time))
  }

  ## Replace some flagged values with NA
  w <- w %>%
    tidyr::gather(key = "variable", value = "value",
                  names(w)[!(names(w) %in% c("date", "year", "month", "day",
                                             "hour", "time", "qual",
                                             "weather"))]) %>%
    tidyr::separate("variable", into = c("variable", "type"),
                    sep = "_flag", fill = "right") %>%
    dplyr::mutate(type = replace(.data$type, .data$type == "", "flag"),
                  type = replace(.data$type, is.na(.data$type), "value")) %>%
    tidyr::spread("type", "value") %>%
    dplyr::mutate(value = replace(.data$value, .data$value == "", NA),  ## No data
                  value = replace(.data$value, .data$flag == "M", NA))  ## Missing

  if("qual" %in% names(w)){
    w <- dplyr::mutate(w,
                       # Convert to ascii
                       qual = stringi::stri_escape_unicode(.data$qual),
                       qual = replace(.data$qual, .data$qual == "\\u2020",
                                      "Only preliminary quality checking"),
                       qual = replace(.data$qual, .data$qual == "\\u2021",
                                      paste0("Partner data that is not subject",
                                             " to review by the National ",
                                             "Climate Archives")))
  }
  w <- w %>%
    tidyr::gather(key = "type", value = "value", "flag", "value") %>%
    dplyr::mutate(variable = replace(.data$variable, .data$type == "flag",
                                     paste0(.data$variable[.data$type == "flag"],
                                            "_flag"))) %>%
    dplyr::select("date", dplyr::everything(), -"type") %>%
    tidyr::spread("variable", "value")

  ## Can we convert to numeric?
  #w$wind_spd[c(54, 89, 92)] <- c(">3", ">5", ">10")

  num <- apply(dplyr::select(
    w, -dplyr::any_of(c("date", "year", "month",
                        "day", "hour", "time", "qual",
                        "weather",
                        grep("flag", names(w), value = TRUE)))),
    MARGIN = 2,
    FUN = function(x) tryCatch(as.numeric(x),
                                          warning = function(w) w))

  warn <- vapply(num,
                 FUN = function(x) methods::is(x, "warning"),
                 FUN.VALUE = TRUE)

  if(any(warn)) {
    m <- paste0(names(num)[warn], collapse = ", ")
    non_num <- dplyr::tibble(col = names(num)[warn])
    for(i in names(num)[warn]) {
      problems <- w[grep("<|>|\\)|\\(", w[[i]]),
                    names(w) %in% c("date", "year", "month",
                                    "day", "hour", "time", i)]
      if(nrow(problems) > 20) rows <- 20 else rows <- nrow(problems)
      non_num$problems <- list(problems[1:rows,])
    }
    if(!is.null(string_as)) {
      non_num$replace <- string_as
      suppressWarnings({
        valid_cols <- c("date", "year", "month", "day",
                        "hour", "time", "qual", "weather",
                        grep("flag", names(w), value = TRUE))

        replacement <- apply(dplyr::select(w, -tidyselect::any_of(valid_cols)),
                             MARGIN = 2,
                             FUN = as.numeric)

        w[!(names(w) %in% valid_cols)] <- as.data.frame(replacement)
      })
    } else {
      m <- paste0(names(num)[warn],
                  collapse = ", ")

      non_num <- dplyr::mutate(non_num, replace = "no_replace")

      replace <- c("date", "year", "month", "day",
                   "hour", "time", "qual", "weather",
                   grep("flag", names(w), value = TRUE),
                   names(num)[warn])

      w[!(names(w) %in% replace)] <- as.data.frame(num[!warn])
    }
  } else {
    non_num <- data.frame()
    w[!(names(w) %in% c("date", "year", "month", "day",
                        "hour", "time", "qual", "weather",
                        grep("flag", names(w), value = TRUE)))] <- as.data.frame(num)
  }

  ## Trim to match date range
  w <- dplyr::filter(w, .data$date >= s.start & .data$date <= s.end)

  list(data = w, msg = non_num)
}

weather_list_cols <- function(w_all, interval, names) {
  w_all <- dplyr::as_tibble(w_all)
  ## Appropriate grouping levels
  if(utils::packageVersion("tidyr") > "0.8.99") {
    col <- dplyr::case_when(interval == "hour" ~ "date",
                            interval == "day" ~ "month",
                            interval == "month" ~ "year")

    w_all <- tidyr::nest(w_all, key = -dplyr::one_of(names, col))

  } else {
    if(interval == "hour"){
      w_all <- tidyr::nest(w_all, -dplyr::one_of(names), -"date")
    }

    if(interval == "day"){
      w_all <- tidyr::nest(w_all, -dplyr::one_of(names), -"month")
    }

    if(interval == "month"){
      w_all <- tidyr::nest(w_all, -dplyr::one_of(names), -"year")
    }
  }
  w_all
}

meta_raw <- function(html, encoding = "UTF-8", interval, return = "meta") {
  split <- httr::content(html, as = "text", encoding = encoding) %>%
    stringr::str_split("\n", simplify = TRUE) %>%
    stringr::str_subset("^\r$", negate = TRUE)

  if(return == "meta") {
    i <- stringr::str_which(split, "If Local Standard Time|Legend")[1] - 1

    r <- httr::content(html, as = "text",
                  type = "text/csv",
                  encoding = encoding) %>%
      stringr::str_replace_all("(\\t)+", "\\\t") %>%
      readr::read_tsv(., n_max = i,
                      col_names = FALSE,
                      col_types = readr::cols(),
                      progress = FALSE)

    if(ncol(r) > 2) {
      stop("Problems parsing metadata. Submit an issue at ",
           "https://github.com/ropensci/weathercan/issues", call. = FALSE)
    }
  } else if(return == "legend") {
    r <- httr::content(html, as = "text",
                       type = "text/csv",
                       encoding = encoding) %>%
      stringr::str_replace_all("(\\t)+", "\\\t") %>%
      stringr::str_remove("\\*https\\:\\/\\/climate.weather.gc.ca\\/FAQ_e.html#Q5") %>%
      readr::read_tsv(., skip = stringr::str_which(split, "Legend") + 1,
                      col_names = FALSE,
                      col_types = readr::cols(),
                      progress = FALSE)
  }
  # Get rid of any special symbols
  remove_sym(r)
}

meta_format <- function(meta, s) {

  m <- paste0("(", paste0(m_names, collapse = ")|("), ")")

  meta <- meta %>%
    dplyr::mutate(X1 = stringr::str_extract(.data$X1, pattern = m)) %>%
    dplyr::filter(!is.na(.data$X1)) %>%
    tidyr::spread("X1", "X2")

  m <- m_names[m_names %in% names(meta)]

  meta %>%
    dplyr::select(dplyr::all_of(m)) %>%
    dplyr::mutate(station_id = s,
                  prov = province[[.data$prov]],
                  lat = as.numeric(as.character(.data$lat)),
                  lon = as.numeric(as.character(.data$lon)),
                  elev = as.numeric(as.character(.data$elev)))
}
steffilazerte/envirocan documentation built on Sept. 25, 2023, 10:16 p.m.