R/download_archives.R
In aire.zmvm: Download Mexico City Pollution, Wind, and Temperature Data

Documented in .clean_archive download_24hr_average download_deposition download_lead download_meteorological download_pollution download_pressure download_radiation

#' Download Pollution Archives
#'
#' Download the pollution files available at
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmI=\%27&opcion=Zg==}{Contaminante}
#'
#' @param year a numeric vector containing the years for which to download data
#' (the earliest possible value is 2009)
#' @param progress whether to display a progress bar (TRUE or FALSE).
#' By default it will only display in an interactive session.
#'
#' @return a data.frame with pollution information for the following pollutants
#' "CO", "NO", "NO2", "NOX", "O3", "PM10", "SO2", "PM25", and "PMCO"
#' @export
#' @importFrom readr read_csv
#' @importFrom progress progress_bar
#'
#' @examples
#' \dontrun{
#' head(download_pollution(2017))
#' }
download_pollution <- function(year, progress = interactive()) {
  get_data <- function(year) {
    RAMA <- paste0("http://datosabiertos.aire.cdmx.gob.mx:8080/",
                   "opendata/anuales_horarios_gz/contaminantes_")
    ## The files from 2012 onwards changed the name of the columns
    ## cve_station and cve_parameter to id_station and id_parameter
    if (year >= 2012)
      df <- read_csv(str_c(RAMA, year, ".csv.gz"),
                     skip = 10, progress = FALSE, col_types = list(
                       date = col_character(),
                       id_station = col_character(),
                       id_parameter = col_character(),
                       value = col_double(),
                       unit = col_integer()
                     ))
    else
      df <- read_csv(str_c(RAMA, year, ".csv.gz"),
                     skip = 10, progress = FALSE, col_types = list(
                       date = col_character(),
                       cve_station = col_character(),
                       cve_parameter = col_character(),
                       value = col_double(),
                       unit = col_integer()
                     ))
    df <- .clean_archive(df, TRUE)
    df$pollutant <- .recode_pollutant(df$pollutant)
    df
  }
  ## Check the year argument is an integer or vector of integers
  if (length(year) < 1)
    stop("year should be an integer in YYYY format")
  for (i in seq_len(length(year)))
    if (is.integer2(year[i]) == FALSE)
      stop("year should be an integer in YYYY format")
  if (min(year) < 1986)
    stop("year must be greater or equal to 1986")
  if (identical(progress, TRUE) && length(year) > 1) {
    p <- progress_bar$new(format = "  downloading [:bar] :percent eta: :eta",
                          total = length(year))
    p$tick(0)
  }
  df <- data.frame()
  for (i in year){
    df <- rbind(df, get_data(i))
    if (identical(progress, TRUE) && length(year) > 1)
      p$tick()
  }
  as.data.frame(df)
}

#' Download Meteorological Data Archives
#'
#' Download the files available at
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmI=\%27&opcion=Zw==}{Meteorología}
#'
#' @param year a numeric vector containing the years for which to download data
#' (the earliest possible value is 1986)
#' @param progress whether to display a progress bar (TRUE or FALSE).
#' By default it will only display in an interactive session.
#'
#' @return a data.frame with meterological information:
#' "RH","TMP","WDR","WSP","PBa"
#' @export
#' @importFrom readr read_csv
#' @importFrom progress progress_bar
#'
#' @examples
#' \dontrun{
#' head(download_meteorological(2017))
#' }
download_meteorological <- function(year, progress = interactive()) {
  get_data <- function(year) {
    REDMET <- paste0("http://datosabiertos.aire.cdmx.gob.mx:8080/",
                     "opendata/anuales_horarios_gz/meteorolog%C3%ADa_")
    ## The files from 2012 onwards changed the name of the columns
    ## cve_station and cve_parameter to id_station and id_parameter
    if (year >= 2012)
      df <- read_csv(str_c(REDMET, year, ".csv.gz"),
                     skip = 10, progress = FALSE, col_types = list(
                       date = col_character(),
                       id_station = col_character(),
                       id_parameter = col_character(),
                       value = col_double(),
                       unit = col_integer()
                     ))
    else
      df <- read_csv(str_c(REDMET, year, ".csv.gz"),
                     skip = 10, progress = FALSE, col_types = list(
                       date = col_character(),
                       cve_station = col_character(),
                       cve_parameter = col_character(),
                       value = col_double(),
                       unit = col_integer()
                     ))
    .clean_archive(df, TRUE)
  }
  ## Check the year argument is an integer or vector of integers
  if (length(year) < 1)
    stop("year should be an integer in YYYY format")
  for (i in seq_len(length(year)))
    if (is.integer2(year[i]) == FALSE)
      stop("year should be an integer in YYYY format")
  if (min(year) < 1986)
    stop("year must be greater or equal to 1986")
  ## Errors in 2016 and 2017 data
  # if (any(year %in% 2016))
  #   warning(paste0("There may be errors in the 2016 wind speed data.",
  #                  " It was incorrectly converted to mph. Use the function",
  #                  " `get_station_data` to download the correct values"),
  #           call. = FALSE)
  if (identical(progress, TRUE) && length(year) > 1) {
    p <- progress_bar$new(format = "  downloading [:bar] :percent eta: :eta",
                          total = length(year))
    p$tick(0)
  }
  df <- data.frame()
  for (i in year){
    df <- rbind(df, get_data(i))
    if (identical(progress, TRUE) && length(year) > 1)
      p$tick()
  }
  as.data.frame(df)
}

#' Download Lead Pollution Archives
#'
#' Download data on lead pollution from the archives available at
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmE=\%27&r=aHR0cDovLzE0OC4yNDMuMjMyLjExMjo4MDgwL29wZW5kYXRhL3JlZF9tYW51YWwvcmVkX21hbnVhbF9wbG9tby5jc3Y=}{Plomo}
#' and
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmE=\%27&r=aHR0cDovLzE0OC4yNDMuMjMyLjExMjo4MDgwL29wZW5kYXRhL3JlZF9tYW51YWwvcmVkX21hbnVhbF9wYXJ0aWN1bGFzX3N1c3AuY3N2}{Partículas suspendidas}
#'
#' @param type type of data to download.
#' \itemize{
#'  \item PbPST
#'  \item PST, PM10, PM25
#' }
#'
#' @return A data.frame with pollution data.
#' @export
#' @importFrom readr read_csv
#' @examples
#' \dontrun{
#' head(download_lead("PbPST"))
#' }
download_lead <- function(type) {
  if (!(identical("PbPST", type) || identical("PST, PM10, PM25", type)))
    stop("type should be 'PbPST', or 'PST, PM10, PM25'")
  if (type == "PbPST")
    REDMA <- paste0("http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/red_manual/",
                    "red_manual_plomo.csv")
  else if (type == "PST, PM10, PM25")
    REDMA <- paste0("http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/red_manual/",
                    "red_manual_particulas_susp.csv")
  df <- read_csv(str_c(REDMA),
                 skip = 8, progress = FALSE, col_types = list(
                   Date = col_character(),
                   cve_station = col_character(),
                   cve_parameter = col_character(),
                   value = col_double(),
                   unit = col_integer()
                 ))
  .clean_archive(df, FALSE)
}

#' Download Acid Rain Measurements Archives
#'
#' Download data on rainfall samples collected weekly during the rainy season, available at
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmE=\%27&r=aHR0cDovLzE0OC4yNDMuMjMyLjExMjo4MDgwL29wZW5kYXRhL3JlZGRhL2RlcG9zaXRvLmNzdg==}{Depósito}
#' and
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmE=\%27&r=aHR0cDovLzE0OC4yNDMuMjMyLjExMjo4MDgwL29wZW5kYXRhL3JlZGRhL2RlcG9zaXRvVC5jc3Y=}{Depósito}
#'
#' @param type type of ion measurement
#' \itemize{
#'  \item DEPOSITO -  ion quantity deposition
#'  \item CONCENTRACION - ion concentration
#' }
#' @param deposition type of deposition to download
#' \itemize{
#'  \item TOTAL - Total deposition (1988-2000)
#'  \item HUMEDO - Wet and dry deposition (1997-)
#' }
#' @return A data.frame with deposition data.
#' @export
#' @importFrom readr read_csv
#' @importFrom progress progress_bar
#' @examples
#' \dontrun{
#' ## Download rainfall in mm
#' df <- download_deposition(deposition = "HUMEDO", type = "CONCENTRACION") %>%
#'         filter(pollutant == "PP")
#' head(df)
#' }
download_deposition <- function(deposition, type) {
  if (!(identical("HUMEDO", deposition) || identical("TOTAL", deposition)))
    stop("deposition should be 'HUMEDO', or 'TOTAL'")
  if (!(identical("DEPOSITO", type) || identical("CONCENTRACION", type)))
    stop("type should be 'DEPOSITO', or 'CONCENTRACION'")
  # Deposito humedo - deposito
  if (deposition == "HUMEDO" & type == "DEPOSITO")
    REDDA <- "http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/redda/deposito.csv"
  # Deposito humedo - concentracion
  if (deposition == "HUMEDO" & type == "CONCENTRACION")
    REDDA <- "http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/redda/concentracion.csv"
  # Deposito total - deposito
  if (deposition == "TOTAL" & type == "DEPOSITO")
    REDDA <- "http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/redda/depositoT.csv"
  # Deposito total - concentracion
  if (deposition == "TOTAL" & type == "CONCENTRACION")
    REDDA <- "http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/redda/concentracionT.csv"
  df <- read_csv(REDDA,
                 skip = 8, progress = FALSE, col_types = list(
                   Date = col_character(),
                   cve_station = col_character(),
                   cve_parameter = col_character(),
                   value = col_double(),
                   unit = col_integer()
                 ))
  .clean_archive(df, FALSE)
}

#' Download Ultraviolet Radiation Archives
#'
#' Download data on UVA and UVB from the pollution archives available at
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmI=\%27&opcion=bA==}{Radiación Solar (UVA)}
#' and
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmI=\%27&opcion=bQ==}{Radiación Solar (UVB)}
#'
#' @param type type of data to download.
#' \itemize{
#'  \item UVA - long wave ultraviolet A
#'  \item UVB - short wave ultraviolet B
#' }
#' @param year a numeric vector containing the years for which to download data
#' (the earliest possible value is 2000)
#' @param progress whether to display a progress bar (TRUE or FALSE).
#' By default it will only display in an interactive session.
#'
#' @return A data.frame with pollution data. The
#' hours correspond to the
#' \emph{Etc/GMT+6} timezone, with no daylight saving time
#' @export
#' @importFrom readr read_csv
#' @importFrom progress progress_bar
#' @examples
#' \dontrun{
#' head(download_radiation("UVA", 2017))
#' }
download_radiation <- function(type, year, progress = interactive()) {
  get_data <- function(year, type) {
    if (type == "UVA")
      RADIACION <- "http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/radiacion/UVA_"
    else if (type == "UVB")
      RADIACION <- "http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/radiacion/UVB_"
    df <- read_csv(str_c(RADIACION, year, ".csv"),
                   skip = 8, progress = FALSE, col_types = list(
                     Date = col_character(),
                     cve_station = col_character(),
                     parameter = col_character(),
                     value = col_double(),
                     unit = col_integer()
                   ))
    .clean_archive(df, TRUE)
  }
  ## Check the year argument is an integer or vector of integers
  if (length(year) < 1)
    stop("year should be an integer in YYYY format")
  for (i in seq_len(length(year)))
    if (is.integer2(year[i]) == FALSE)
      stop("year should be an integer in YYYY format")
  if (min(year) < 2000)
    stop("year must be greater or equal to 2000")
  if (!(identical("UVB", type) || identical("UVA", type)))
    stop("deposition should be 'UVA', or 'UVB'")
  if (identical(progress, TRUE) && length(year) > 1) {
    p <- progress_bar$new(format = "  downloading [:bar] :percent eta: :eta",
                          total = length(year))
    p$tick(0)
  }
  df <- data.frame()
  for (i in year){
    df <- rbind(df, get_data(i, type))
    if (identical(progress, TRUE) && length(year) > 1)
      p$tick()
  }
  as.data.frame(df)
}

#' Download archives of the 24 hour averages of pollutants
#'
#' Data comes from
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmI=\%27&opcion=ag==}{Promedios de 24 horas de partículas suspendidas(PM10 Y PM2.5)} and
#' \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmI=\%27&opcion=aQ==}{Promedios de 24 horas de Dióxido azufre}
#'
#' @param type  type of data to download.
#' \itemize{
#'  \item SO2 - Sulfur Dioxide (parts per billion)
#'  \item PS - Suspended solids
#' }
#' @param year a numeric vector containing the years for which to download data
#' (the earliest possible value is 1986 for SO2 and 1995 for PS)
#' @param progress whether to display a progress bar (TRUE or FALSE).
#' By default it will only display in an interactive session.
#'
#' @return A data.frame with pollution data.
#' @export
#' @importFrom readr read_csv
#' @importFrom progress progress_bar
#'
#' @examples
#' \dontrun{
#' head(download_24hr_average("PS", 2017))
#' }
download_24hr_average <- function(type, year, progress = interactive()) {
  get_data <- function(year, type) {
    if (type == "PS")
      base_url <- paste0("http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/",
                         "promedios_diarios/promedios_")
    else if (type == "SO2")
      base_url <- paste0("http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/",
                         "promedios_diarios/promedios_")

    df <- read_csv(str_c(base_url, year, "_", tolower(type), ".csv"),
                   skip = 8, progress = FALSE,
                   col_types = list(
                     date         = col_character(),
                     id_station   = col_character(),
                     id_parameter = col_character(),
                     value        = col_double(),
                     unit         = col_integer()
                   ))
    .clean_archive(df, FALSE)
  }
  ## Check the year argument is an integer or vector of integers
  if (length(year) < 1)
    stop("year should be an integer in YYYY format")
  for (i in seq_len(length(year)))
    if (is.integer2(year[i]) == FALSE)
      stop("year should be an integer in YYYY format")
  if (!(identical("SO2", type) || identical("PS", type)))
    stop("type should be 'SO2', or 'PS'")
  if (min(year) < 1986 & type == "SO2")
    stop("year must be greater or equal to 1986")
  if (min(year) < 1995 & type == "PS")
    stop("year must be greater or equal to 1995")

  if (identical(progress, TRUE) && length(year) > 1) {
    p <- progress_bar$new(format = "  downloading [:bar] :percent eta: :eta",
                          total = length(year))
    p$tick(0)
  }
  df <- data.frame()
  for (i in year){
    df <- rbind(df, get_data(i, type))
    if (identical(progress, TRUE) && length(year) > 1)
      p$tick()
  }
  as.data.frame(df)
}

#' Download Atmospheric Pressure Archives
#'
#' The data comes from \href{http://www.aire.cdmx.gob.mx/default.php?opc=\%27aKBhnmI=\%27&opcion=bg==}{Presión Atmosférica}
#'
#' @param year a numeric vector containing the years for which to download data
#' (the earliest possible value is 2009)
#' @param progress whether to display a progress bar (TRUE or FALSE).
#' By default it will only display in an interactive session.
#'
#' @return A data.frame with atmospheric pressure data.
#' @export
#' @importFrom readr read_csv
#' @importFrom progress progress_bar
#'
#' @examples
#' \dontrun{
#' head(download_pressure(2017))
#' }
download_pressure <- function(year, progress = interactive()) {
  get_data <- function(year) {
    PRESION <- "http://datosabiertos.aire.cdmx.gob.mx:8080/opendata/presion/PA_"
    df <- read_csv(str_c(PRESION, year, ".csv"),
                   skip = 8, progress = FALSE,
                   col_types = list(
                     Date        = col_character(),
                     cve_station = col_character(),
                     parameter   = col_character(),
                     value       = col_double(),
                     unit        = col_integer()
                   ))
    .clean_archive(df, TRUE)
  }
  ## Check the year argument is an integer or vector of integers
  if (length(year) < 1)
    stop("year should be an integer in YYYY format")
  for (i in seq_len(length(year)))
    if (is.integer2(year[i]) == FALSE)
      stop("year should be an integer in YYYY format")
  if (min(year) < 2009)
    stop("year must be equal or greater than 2009")

  if (identical(progress, TRUE) && length(year) > 1) {
    p <- progress_bar$new(format = "  downloading [:bar] :percent eta: :eta",
                          total = length(year))
    p$tick(0)
  }
  df <- data.frame()
  for (i in year){
    df <- rbind(df, get_data(i))
    if (identical(progress, TRUE) && length(year) > 1)
      p$tick()
  }
  as.data.frame(df)
}

#' Clean archive files
#'
#' @param df data.frame to clean
#' @param include_hour is the data hourly
#'
#' @return data.frame
#' @export
#' @importFrom stringr str_replace_all str_sub
#' @keywords internal
.clean_archive <-  function(df, include_hour) {
  names(df) <- c("date", "station_code", "pollutant", "value", "unit")

  df$date <- str_replace_all(df$date, "\t", "")
  if (include_hour)
    df$hour <- as.numeric(str_sub(df$date, 12, 13))
  df$date <- str_c(str_sub(df$date, 7, 10), "-",
                   str_sub(df$date, 4, 5), "-",
                   str_sub(df$date, 1, 2))
  df$value <- as.numeric(df$value)
  df$date <- as.Date(df$date)

  df$unit <- .recode_unit_code(df$unit)

  if (include_hour)
    df <- df[, c("date", "hour", "station_code", "pollutant", "unit", "value")]
  else
    df <- df[, c("date", "station_code", "pollutant", "unit", "value")]

  # sometimes with download_deposition("TOTAL", "DEPOSITO") and
  # download_deposition("TOTAL", "CONCENTRACION") the last line is NAs
  if (is.na(df$date[nrow(df)]))
    df <- df[1:(nrow(df) - 1), ]
  as.data.frame(df)
}

Any scripts or data that you put into this service are public.

aire.zmvm documentation built on June 26, 2024, 5:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

aire.zmvm
Download Mexico City Pollution, Wind, and Temperature Data

R/download_archives.R
In aire.zmvm: Download Mexico City Pollution, Wind, and Temperature Data

Defines functions .clean_archive download_pressure download_24hr_average download_radiation download_deposition download_lead download_meteorological download_pollution

Documented in .clean_archive download_24hr_average download_deposition download_lead download_meteorological download_pollution download_pressure download_radiation

Try the aire.zmvm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

aire.zmvm Download Mexico City Pollution, Wind, and Temperature Data

R/download_archives.R In aire.zmvm: Download Mexico City Pollution, Wind, and Temperature Data

Defines functions .clean_archive download_pressure download_24hr_average download_radiation download_deposition download_lead download_meteorological download_pollution

Documented in .clean_archive download_24hr_average download_deposition download_lead download_meteorological download_pollution download_pressure download_radiation

Try the aire.zmvm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

aire.zmvm
Download Mexico City Pollution, Wind, and Temperature Data

R/download_archives.R
In aire.zmvm: Download Mexico City Pollution, Wind, and Temperature Data