fitzRoy: Easily Scrape and Process AFL Data

library(tidyverse)
library(XML)
library(data.table)
library(dtplyr)
library(lubridate)
library(fitzRoy)
base_url <- "http://www.bom.gov.au"
years <- 2017
# setwd("data")

# Get stations ------------------------------------------------------------
tmptxt <- tempfile(fileext = ".txt")
download.file(
  "http://www.bom.gov.au/climate/data/lists_by_element/alphaAUS_136.txt",
  dest = tmptxt
)
stations <- read.delim(tmptxt, as.is = TRUE)
unlink(tmptxt)

# Clean by hand
stations <- stations[-c(1, 3), ]
cname <- stations[1]
stations <- data.frame(stations[2:(length(stations) - 3)])
names(stations) <- cname

# # read stations file
tmpcsv <- tempfile(fileext = ".csv")
write.csv(stations, tmpcsv, row.names = FALSE, quote = FALSE)
stations <- read.fwf(tmpcsv, widths = c(8, 41, 10, 9, 9, 10, 7, 3, 4))
unlink(tmpcsv)

xstring <- apply(stations[1, ], 1, as.character)
xstring <- str_replace_all(xstring, fixed(" "), "")
stations <- stations[-1, ]
names(stations) <- xstring
# head(stations)
# View(stations)

stations$StartYear <- as.integer(format(strptime(paste("1", str_replace_all(as.character(stations$Start), fixed(" "), ""), sep = ""), "%d%b%Y"), "%Y"))
stations$EndYear <- as.integer(format(strptime(paste("1", str_replace_all(as.character(stations$End), fixed(" "), ""), sep = ""), "%d%b%Y"), "%Y"))
stations$Site <- as.numeric(as.character(stations$Site))
# active stations
# stations <- subset(stations, StartYear >= 2014 & EndYear == 2016)

stations <- stations[which(stations$Site %in% c(
  23090, 94029,
  67119,
  40764, 31011,
  86038,
  23034,
  40913,
  87184,
  86038,
  70351,
  14015,
  86038,
  66062,
  66212,
  9225,
  66212,
  14015,
  # N/A
  91237
)), ]


# Download and process ----------------------------------------------------
download_obs_file <- function(station, obs_code) {
  tmppath <- paste0(obs_code, "_", station, ".zip")
  if (!file.exists(tmppath)) {
    Sys.sleep(0.1)
    # goto station page for rainfall
    page <- htmlParse(paste0("http://www.bom.gov.au/jsp/ncc/cdio/weatherData/av?p_nccObsCode=", obs_code, "&p_display_type=dailyDataFile&p_startYear=&p_c=&p_stn_num=", station))
    print(page)
    # find the all years data link
    node_title <- c(
      "Data file for daily rainfall data for all years",
      "Data file for daily maximum temperature data for all years",
      "Data file for daily minimum temperature data for all years",
      "Data file for daily global solar exposure data for all years"
    )[match(obs_code, c(136, 122, 123, 193))]
    ziplink <- paste0(base_url, xpathSApply(page, paste0("//a[@title='", node_title, "']"), xmlGetAttr, "href"))

    # download and unzip
    if (ziplink != base_url) {
      download.file(ziplink, tmppath, quiet = TRUE, mode = "wb")
    }

    print(station)
    return(tmppath)
  }
}


process_file <- function(filename, years) {
  temp_dir <- tempdir()
  unzip(filename, exdir = temp_dir)

  datafilename <- dir(temp_dir, pattern = ".csv", full.names = TRUE)

  if (length(datafilename) == 1) {
    # read csv
    df <- read.csv(datafilename)
    df <- df[df$Year %in% years, 2:6]
  } else {
    df <- 0
  }
  # delete csv
  file.remove(dir(temp_dir, full.names = TRUE))

  return(df)
}

# Process -----------------------------------------------------------------
# 136, 122, 123, 193

tmp_paths <- lapply(as.numeric(stations$Site), download_obs_file, obs_code = 136)
df <- lapply(dir(pattern = "136_([0-9]{4,5}).zip"), process_file, years = years)
data_rain <- do.call("rbind", df)
data_rain <- data_rain[data_rain$Year != 0, ]
rm(df)
unlink(tmp_paths)

# tmp_paths <- lapply(stations$Site, download_obs_file, obs_code = 122)
# df <- lapply(dir(pattern = "122_([0-9]{4,5}).zip"), process_file, years = years)
# data_temp_max <- do.call("rbind", df)
# data_temp_max <- data_temp_max[data_temp_max$Year != 0, ]
# rm(df)
# unlink(tmp_paths)
#
# tmp_paths <- lapply(stations$Site, download_obs_file, obs_code = 123)
# df <- lapply(dir(pattern = "123_([0-9]{4,5}).zip"), process_file, years = years)
# data_temp_min <- do.call("rbind", df)
# data_temp_min <- data_temp_min[data_temp_min$Year != 0, ]
# rm(df)
# unlink(tmp_paths)

tmp_paths <- lapply(stations$Site, download_obs_file, obs_code = 193)
df <- lapply(dir(pattern = "193_([0-9]{4,5}).zip"), process_file, years = years)
data_solar <- do.call("rbind", df)
data_solar <- data_solar[data_solar$Year != 0, ]
rm(df)
unlink(tmp_paths)

bom_data <- merge(data_rain, data_solar, all = TRUE)
# df <- merge(df, data_temp_min, all = TRUE)
# df <- merge(df, data_temp_max, all = TRUE)
# save(df, file = "BOM.RData")
# devtools::use_data(BOM, overwrite = TRUE)
# save(df, file = "./data-raw/weather/BOM.rda")

### Get match results and merge --------------------
results <- fitzRoy::match_results
BOMVenueLookup <- read_csv(file = "data-raw/weather/BOM Mapping.csv")
colnames(BOMVenueLookup) <- c("Venue", "Description", "StationNo")

StationsList <- unique(BOMVenueLookup$StationNo)

colnames(bom_data)[1] <- "StationNo"
colnames(bom_data)[5] <- "Rainfall"

# Convert Y,M,D to date
bom_data$date <- ISOdate(bom_data$Year, bom_data$Month, bom_data$Day, 0, 0, 0)

bom_data$date <- ymd(bom_data$date)

# Merge the AFL data set with the BOM rainfall ----------------------------

AFLDataWithBOMStation <- merge(x = results, y = BOMVenueLookup, by = "Venue", all.x = TRUE)
AFLDataWithBOMStation$Date <- ymd(AFLDataWithBOMStation$Date)


colnames(AFLDataWithBOMStation)[3] <- "date"
# No missing observations (i.e. lookup table is solid)
# names(AFLDataWithBOMStation)

results_weather <- merge(x = AFLDataWithBOMStation, y = bom_data[, c("StationNo", "date", "Rainfall")], by = c("StationNo", "date"), all.x = TRUE, all.y = FALSE)

results_weather <- results_weather[order(results_weather$date), ]
# Write data using devtools
devtools::use_data(results_weather, overwrite = TRUE)
save(results_weather, file = "./data-raw/weather/results_weather.rda")

jimmyday12/fitzRoy documentation built on March 26, 2024, 8:56 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

jimmyday12/fitzRoy
Easily Scrape and Process AFL Data

data-raw/weather/BOM_Data.R
In jimmyday12/fitzRoy: Easily Scrape and Process AFL Data

R Package Documentation

Browse R Packages

We want your feedback!

jimmyday12/fitzRoy Easily Scrape and Process AFL Data

data-raw/weather/BOM_Data.R In jimmyday12/fitzRoy: Easily Scrape and Process AFL Data

R Package Documentation

Browse R Packages

We want your feedback!

jimmyday12/fitzRoy
Easily Scrape and Process AFL Data

data-raw/weather/BOM_Data.R
In jimmyday12/fitzRoy: Easily Scrape and Process AFL Data