#' Download Purple Air PM2.5 data
#'
#' Download Purple Air PM2.5 data and save as csv files (each PurpleAir site per file). The indoor sites are not included by default.
#'
#' @param site.csv a data frame of a site list or a absolute path to the site CSV file (from \code{getPurpleairLst}).
#' @param start.date the beginning date in the format "YYYY-MM-DD".
#' @param end.date the end date in the format "YYYY-MM-DD".
#' @param output.path the path to output CSV files.
#' @param average get average of this many minutes, valid values: 10, 15, 20, 30, 60, 240, 720, 1440, "daily". "daily" is not recommended as the daily values can only be calculated at the UTC time.
#' @param time.zone time zone specification to be used for the conversion, but "" is the current time zone, and "GMT" is UTC (Universal Time, Coordinated). Invalid values are most commonly treated as UTC, on some platforms with a warning. For more time zones, see \link{https://www.mathworks.com/help/thingspeak/time-zones-reference.html}.
#' @param indoor whether includes indoor sites (TRUE by default).
#' @param n.thread number of parallel threads used to download the data (1 by default).
#'
#' @examples
#' purpleairDownload(site.csv = '/absolute/path/to/the/sensorlist.csv',
#' start.date = '2017-01-01',
#' end.date = '2017-12-31',
#' output.path = '/output_path',
#' average = 60,
#' time.zone = 'America/Los_Angeles')
#' @export
purpleairDownload <- function(site.csv, start.date, end.date, output.path, average, time.zone = 'GMT', indoor = T, n.thread = 1) {
if (!require('httpuv')) {
install.packages('httpuv')
library(httpuv)
}
if (!require('foreach')) {
install.packages('foreach')
library(foreach)
}
if (!require('doMC')) {
install.packages('doMC')
library(doMC)
}
registerDoMC(n.thread)
# Read the lastest sensor list
if (class(site.csv) == 'data.frame') {
sites <- site.csv
} else if (class(site.csv) == 'character') {
sites <- read.csv(site.csv, as.is = T)
} else {
stop('Illegal CSV variable name!')
}
# Start date and end date
start_date <- as.Date(start.date)
end_date <- as.Date(end.date)
# Output directory
out.path <- output.path
if (!file.exists(out.path)) {
dir.create(out.path, recursive = T)
}
# Time zone
timezone <- time.zone
# Average level
# Don't use 'daily' since the time is UTC !!!
average <- average
# Field names
# Primary
fieldnames.pri.A <- c("PM1.0_CF_1_ug/m3_A","PM2.5_CF_1_ug/m3_A","PM10.0_CF_1_ug/m3_A","Uptime_Minutes_A","RSSI_dbm_A","Temperature_F_A","Humidity_%_A","PM2.5_CF_ATM_ug/m3_A")
fieldnames.pri.B <- c("PM1.0_CF_1_ug/m3_B","PM2.5_CF_1_ug/m3_B","PM10.0_CF_1_ug/m3_B","HEAP_B","ADC0_voltage_B","Atmos_Pres_B","Not_Used_B","PM2.5_CF_ATM_ug/m3_B")
# Secondary
fieldnames.sec.A <- c("0.3um/dl_A","0.5um/dl_A","1.0um/dl_A","2.5um/dl_A","5.0um/dl_A","10.0um/dl_A","PM1.0_CF_ATM_ug/m3_A","PM10_CF_ATM_ug/m3_A")
fieldnames.sec.B <- c("0.3um/dl_B","0.5um/dl_B","1.0um/dl_B","2.5um/dl_B","5.0um/dl_B","10.0um/dl_B","PM1.0_CF_ATM_ug/m3_B","PM10_CF_ATM_ug/m3_B")
#--------------Run--------------#
# For each site
foreach (i = 1 : nrow(sites)) %dopar% {
if (!file.exists(file.path(out.path, paste(sites$ID[i], '.csv', sep = '')))) { # Skip existing files
if ((is.na(sites$ParentID[i])) & (!is.na(sites$DEVICE_LOCATIONTYPE[i]))) { # Skip Channel B sensors
if (indoor | sites$DEVICE_LOCATIONTYPE[i] == 'outside') { # Skip indoor sensors
# --- Site information ---
name <- trimws(sites$Label[i]) # Remove Leading/Trailing Whitespace
Lat <- sites$Lat[i]
Lon <- sites$Lon[i]
Location <- sites$DEVICE_LOCATIONTYPE[i]
Type <- sites$Type[i]
# Channel A (Primary)
ID.A <- sites$ID[i]
channelID.A <- sites$THINGSPEAK_PRIMARY_ID[i]
channelKey.A <- sites$THINGSPEAK_PRIMARY_ID_READ_KEY[i]
channelID.A.sec <- sites$THINGSPEAK_SECONDARY_ID[i]
channelKey.A.sec <- sites$THINGSPEAK_SECONDARY_ID_READ_KEY[i]
# Channel B
ib <- which(sites$ParentID == ID.A)
if (length(ib) > 1) { # If there are multiple Channel B
ib.min <- ib[which(sites$AGE[ib] == min(sites$AGE[ib]))]
ib.min <- ib.min[1] # If there are multiple channel B with the same AGE, select the first one
channelID.B <- sites$THINGSPEAK_PRIMARY_ID[ib.min]
channelKey.B <- sites$THINGSPEAK_PRIMARY_ID_READ_KEY[ib.min]
channelID.B.sec <- sites$THINGSPEAK_SECONDARY_ID[ib.min]
channelKey.B.sec <- sites$THINGSPEAK_SECONDARY_ID_READ_KEY[ib.min]
} else {
channelID.B <- sites$THINGSPEAK_PRIMARY_ID[ib]
channelKey.B <- sites$THINGSPEAK_PRIMARY_ID_READ_KEY[ib]
channelID.B.sec <- sites$THINGSPEAK_SECONDARY_ID[ib]
channelKey.B.sec <- sites$THINGSPEAK_SECONDARY_ID_READ_KEY[ib]
}
print(ID.A)
# --- Channel A & B ---
# Initialization of primary data frame
dat.final <- data.frame()
for (j in 0 : as.numeric(end_date - start_date)) {
this.day <- start_date + j
cat(as.character(this.day), '\r')
# --- Time range for a day ---
starttime <- httpuv::encodeURI(paste(this.day, '00:00:00')) # UTC Time !!!
endtime <- httpuv::encodeURI(paste(this.day, '23:59:59')) # UTC Time !!!
# --- URL ---
# Channel A
url.csv.A <- paste("https://thingspeak.com/channels/", channelID.A, "/feed.csv?api_key=", channelKey.A, '&average=', average, "&round=3&start=", starttime, "&end=", endtime, '&timezone=', timezone, sep = '')
url.csv.A.sec <- paste("https://thingspeak.com/channels/", channelID.A.sec, "/feed.csv?api_key=", channelKey.A.sec, '&average=', average, "&round=3&start=", starttime, "&end=", endtime, '&timezone=', timezone, sep = '')
# Channel B
url.csv.B <- paste("https://thingspeak.com/channels/", channelID.B, "/feed.csv?api_key=", channelKey.B, '&average=', average, "&round=3&start=", starttime, "&end=", endtime, '&timezone=', timezone, sep = '')
url.csv.B.sec <- paste("https://thingspeak.com/channels/", channelID.B.sec, "/feed.csv?api_key=", channelKey.B.sec, '&average=', average, "&round=3&start=", starttime, "&end=", endtime, '&timezone=', timezone, sep = '')
# --- Load CSV data ---
# Download URL A
url.idx <- T
while (url.idx) {
try.txt <- try(expr = { # Try to connect the link
dat.A <- read.csv(url.csv.A)
}, silent = T)
closeAllConnections()
if (!inherits(try.txt, 'try-error')) url.idx <- F
}
url.idx <- T
while (url.idx) {
try.txt <- try(expr = { # Try to connect the link
dat.A.sec <- read.csv(url.csv.A.sec)
}, silent = T)
closeAllConnections()
if (!inherits(try.txt, 'try-error')) url.idx <- F
}
if (length(ib) != 0) { # If Channel B exists
# Download URL B
url.idx <- T
while (url.idx) {
try.txt <- try(expr = { # Try to connect the link
dat.B <- read.csv(url.csv.B)
}, silent = T)
closeAllConnections()
if (!inherits(try.txt, 'try-error')) url.idx <- F
}
url.idx <- T
while (url.idx) {
try.txt <- try(expr = { # Try to connect the link
dat.B.sec <- read.csv(url.csv.B.sec)
}, silent = T)
closeAllConnections()
if (!inherits(try.txt, 'try-error')) url.idx <- F
}
} else {
dat.B <- dat.A
dat.B.sec <- dat.A.sec
if (nrow(dat.B) != 0) {
dat.B[,] <- NA
dat.B.sec[,] <- NA
dat.B$created_at <- dat.A$created_at
dat.B.sec$created_at <- dat.A.sec$created_at
}
}
# Change the column names
names(dat.A)[2 : ncol(dat.A)] <- c(fieldnames.pri.A)
names(dat.A.sec)[2 : ncol(dat.A.sec)] <- c(fieldnames.sec.A)
names(dat.B)[2 : ncol(dat.B)] <- c(fieldnames.pri.B)
names(dat.B.sec)[2 : ncol(dat.B.sec)] <- c(fieldnames.sec.B)
# Combine Channel A & B
dat <- merge(dat.A, dat.A.sec, by = c('created_at'), all = T)
dat <- merge(dat, dat.B, by = c('created_at'), all = T)
dat <- merge(dat, dat.B.sec, by = c('created_at'), all = T)
# --- Combine data frame ---
dat.final <- rbind(dat.final, dat)
}
# --- Add basic information --- #
if (nrow(dat.final) != 0) {
dat.final$ID <- ID.A
dat.final$Name <- name
dat.final$Lat <- Lat
dat.final$Lon <- Lon
dat.final$Location <- Location
dat.final$Type <- Type
}
# --- Save CSV data ---
file.name <- paste(ID.A, '.csv', sep = '')
write.csv(dat.final, file.path(out.path, file.name), row.names = F)
} # if
} # if
} # if
}
#--------------Run--------------#
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.