#' Create a data frame of discrete receptor concentrations
#' @description Create a data frame of discrete receptor concentrations using a CALPOST time series outfile file.
#' @param time_series_file a path to a binary time series data file that was generated by CALPOST with time series options set.
#' @param location_name the name of the location in which the receptors reside.
#' @param source_id the ID value for the source emissions.
#' @param pollutant_id the ID value for the emitted pollutant.
#' @param create_hourly_CSV an option to create hourly CSV files describing pollutant concentrations at every receptor.
#' @param create_hourly_rda an option to create hourly R data (.rda) files describing pollutant concentrations at every receptor.
#' @param return_large_df an option to invisibly return a large data frame object.
#' @param resume_from_set_hour an option to resume processing of concentrations from a set hour of the year.
#' @param autoresume_processing
#' @param autoresume_year
#' @export calpost_get_concentrations_from_time_series_file
calpost_get_concentrations_from_time_series_file <- function(time_series_file = NULL,
location_name,
source_id,
pollutant_id,
create_hourly_CSV = TRUE,
create_hourly_rda = TRUE,
return_large_df = FALSE,
resume_from_set_hour = NULL,
autoresume_processing = TRUE,
autoresume_year = NULL){
# Add require statements
require(stringr)
require(lubridate)
# Read in all lines from the CALPOST time series output file
time_series_output <- readLines(time_series_file)
# Get numeric vector of all receptor numbers
time_series_receptor_numbers <-
as.numeric(unlist(str_split(gsub(" ix:", "", time_series_output[8]), " ")))[
!is.na(as.numeric(unlist(str_split(gsub(" ix:", "", time_series_output[8]), " "))))]
# Get numeric vector of all x coordinate values (UTM) in units of km
time_series_x_km <- as.numeric(unlist(str_split(gsub("^.*x......", "", time_series_output[10]), " ")))
# Get numeric vector of all y coordinate values (UTM) in units of km
time_series_y_km <- as.numeric(unlist(str_split(gsub("^.*y......", "", time_series_output[11]), " ")))
# Set a beginning index for the i value of the outer loop data frame
begin <- 15
# Set an index for autoresuming processing if 'resume_from_hour' is NULL and
# 'autoresume_processing' is TRUE
if (is.null(resume_from_set_hour) & autoresume_processing == TRUE &
!is.null(autoresume_year)){
pattern <- paste0(location_name, "--",
source_id, "--",
pollutant_id, "--",
autoresume_year,
"-[0-9][0-9]-[0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9].csv")
file_list <- list.files(path = ".",
pattern = pattern)
# Get the highest index number from matched files
highest_index <- max(as.numeric(gsub(".*([0-9][0-9][0-9][0-9]).csv", "\\1", file_list)))
begin <- highest_index + 14
}
# Set a resume hour if the argument 'resume_from_hour' is not NULL and
# 'autoresume_processing is FALSE
if (!is.null(resume_from_set_hour) & autoresume_processing == FALSE){
begin <- resume_from_set_hour + 14
}
# Generate a long data frame containing concentration data for each cell at every timestep
for (i in begin:length(time_series_output)){
# Initialize a data frame object for containing concentration, date, location, source,
# pollutant information
if (i == begin){
concentration_df <- as.data.frame(mat.or.vec(nr = 0, nc = 12))
colnames(concentration_df) <- c("posix_date", "year", "month", "day", "hour",
"recep_number", "recep_x_km", "recep_y_km",
"concentration", "location_name", "source_id",
"pollutant_id")
}
# Extract a string from the 'time_series_output' object, representing data for an hour
time_series_row <-
as.vector(as.numeric(unlist(
str_split_fixed(time_series_output[i],
pattern = "[ ]+",
n = length(time_series_receptor_numbers) + 4))))
# Get the POSIXct date from the string
POSIXdate <- ISOdatetime(year = time_series_row[2],
month = 1, day = 1,
hour = time_series_row[4] / 100,
min = 0, sec = 0, tz = "GMT") + ((time_series_row[3] - 1) * 24 * 3600)
# Get vector object containing concentrations at each receptor for the hour
time_series_conc <- time_series_row[5:length(time_series_row)]
# Create a for structure an inner loop
for (j in 1:length(time_series_conc)){
# Initialize a smaller data frame containing data for all information found in the
# larger data frame, but for a single hour
if (j == 1){
concentration_small_df <- as.data.frame(mat.or.vec(nr = length(time_series_conc), nc = 12))
colnames(concentration_small_df) <- c("posix_date", "year", "month", "day", "hour",
"recep_number", "recep_x_km", "recep_y_km",
"concentration", "location_name", "source_id",
"pollutant_id")
}
# Populate the rows of the smaller data frame (data for a single hour)
concentration_small_df[j,1] <- POSIXdate
concentration_small_df[j,2] <- year(POSIXdate)
concentration_small_df[j,3] <- month(POSIXdate)
concentration_small_df[j,4] <- day(POSIXdate)
concentration_small_df[j,5] <- hour(POSIXdate)
concentration_small_df[j,6] <- j
concentration_small_df[j,7] <- time_series_x_km[j]
concentration_small_df[j,8] <- time_series_y_km[j]
concentration_small_df[j,9] <- time_series_conc[j]
concentration_small_df[j,10] <- location_name
concentration_small_df[j,11] <- source_id
concentration_small_df[j,12] <- pollutant_id
}
# Successively bind the smaller data frame to the larger data frame
if (return_large_df == TRUE){
concentration_df <- rbind(concentration_df, concentration_small_df)
}
# Create a CSV file for the hour if it is requested
if (create_hourly_CSV == TRUE){
write.table(concentration_small_df,
file = paste0(location_name, "--",
source_id, "--",
pollutant_id, "--",
year(POSIXdate), "-",
formatC(month(POSIXdate), width = 2, flag = "0"), "-",
formatC(day(POSIXdate), width = 2, flag = "0"), "-",
formatC(hour(POSIXdate), width = 2, flag = "0"), "-",
formatC((i-14), width = 4, flag = "0"), ".csv"),
sep = ",",
row.names = FALSE)
}
# Create an .rda file for the hour if it is requested
if (create_hourly_rda == TRUE){
save(concentration_small_df,
file = paste0(location_name, "--",
source_id, "--",
pollutant_id, "--",
year(POSIXdate), "-",
formatC(month(POSIXdate), width = 2, flag = "0"), "-",
formatC(day(POSIXdate), width = 2, flag = "0"), "-",
formatC(hour(POSIXdate), width = 2, flag = "0"), "-",
formatC((i-14), width = 4, flag = "0"), ".csv"))
}
}
# Return the 'concentration_df' object invisibly
if (return_large_df == TRUE){
invisible(concentration_df)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.