#' Obtain surface meteorology and generate input files
#' @description Obtain surface meteorology and generate input files
#' @param location_name an assigned name for the CALMET domain.
#' @param year the year for the SURF.DAT file.
#' @param lat_dec_deg the latitude of the CALMET domain in decimal degrees. The location of this point is defined in the lat_long_grid_loc argument.
#' @param lon_dec_deg the longitude of the CALMET domain in decimal degrees. The location of this point is defined in the lat_long_grid_loc argument.
#' @param lat_lon_grid_loc the location of the lat/long inputs in relation to the domain. Choices are: 1 (center), 2 (lower left), 3 (lower right), 4 (upper left), 5 (upper right).
#' @param domain_width_m the width of the meteorological domain in meters.
#' @param domain_height_m the height of the meteorological domain in meters.
#' @param cell_resolution_m the grid cell resolution in meters.
#' @param time_offset the offset from UTC-00:00 in hours.
#' @param local_archive_dir a local path containing an archive of gzipped NCDC station data files.
#' @param use_CSV_files option to use local, pre-processed CSV files.
#' @export calmet_surface_met
#' @examples
#' \dontrun{
#' # Generate a surface station file
#' calmet_surface_met(location_name = 'test',
#' year = 2005,
#' lat_dec_deg = 49.196116,
#' lon_dec_deg = -122.505866,
#' lat_lon_grid_loc = 1,
#' domain_width_m = 117000,
#' domain_height_m = 43250,
#' cell_resolution_m = 500,
#' time_offset = -8)
#'}
calmet_surface_met <- function(location_name,
year,
lat_dec_deg = NULL,
lon_dec_deg = NULL,
lat_lon_grid_loc = 1,
domain_width_m = NULL,
domain_height_m = NULL,
cell_resolution_m = 250,
time_offset,
local_archive_dir = NULL,
use_CSV_files = NULL){
# Add require statements
require(lubridate)
require(plyr)
require(raster)
require(rgdal)
# Round the provided width and the height of the met domain to the resolution of the cell
domain_width_m <- round_any(domain_width_m, cell_resolution_m, round)
domain_height_m <- round_any(domain_height_m, cell_resolution_m, round)
# Get matrix of longitude and latitude for chosen point
lat_lon_dec_deg <- cbind(lon_dec_deg, lat_dec_deg)
# Determine the UTM zone
UTM_zone <- (floor((lon_dec_deg + 180)/6) %% 60) + 1
# Determine whether domain is in Northern Hemisphere or Southern Hemisphere
UTM_hemisphere <- ifelse(lat_dec_deg >= 0, "N", "S")
# Define a PROJ.4 projection string for a lat/lon projection
proj_string_longlat <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
# Define a PROJ.4 projection string for a UTM projection
proj_string_UTM <- paste0("+proj=utm +zone=",
UTM_zone,
" +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
# Project as UTM coordinates from the determined UTM zone, round to cell resolution using the
# 'round_any' function from the 'plyr' package
UTM_location <- project(lat_lon_dec_deg, proj_string_UTM)
UTM_location <- round_any(UTM_location, cell_resolution_m, round)
# Do these length and width values accomodate an integer number of cells of the specified resolution?
# These checks will be later part of a function in setting domain width and height
is_number_cells_across_x_an_int <- ifelse(domain_width_m %% cell_resolution_m != 0, FALSE, TRUE)
is_number_cells_across_y_an_int <- ifelse(domain_height_m %% cell_resolution_m != 0, FALSE, TRUE)
# Get the number of cells in the x direction
number_cells_across_x <- ifelse(is_number_cells_across_x_an_int == TRUE,
domain_width_m/cell_resolution_m, NULL)
# Get the number of cells in the y direction
number_cells_across_y <- ifelse(is_number_cells_across_y_an_int == TRUE,
domain_height_m/cell_resolution_m, NULL)
# Get the total number of cells
total_cells <- number_cells_across_x * number_cells_across_y
# Generate an output file name for the SURF.DAT file
output_file <- paste0("surf--", location_name, "-",
number_cells_across_x, "x",
number_cells_across_y, "x",
cell_resolution_m, "--",
year, ".txt")
# Get extents of UTM grid (left, right, bottom, top) in meters
left_UTM <- get_grid_extents_UTM(side = "left",
lat_lon_grid_loc = lat_lon_grid_loc,
UTM_location = UTM_location,
domain_width_m = domain_width_m,
domain_height_m = domain_height_m)
right_UTM <- get_grid_extents_UTM(side = "right",
lat_lon_grid_loc = lat_lon_grid_loc,
UTM_location = UTM_location,
domain_width_m = domain_width_m,
domain_height_m = domain_height_m)
bottom_UTM <- get_grid_extents_UTM(side = "bottom",
lat_lon_grid_loc = lat_lon_grid_loc,
UTM_location = UTM_location,
domain_width_m = domain_width_m,
domain_height_m = domain_height_m)
top_UTM <- get_grid_extents_UTM(side = "top",
lat_lon_grid_loc = lat_lon_grid_loc,
UTM_location = UTM_location,
domain_width_m = domain_width_m,
domain_height_m = domain_height_m)
# Create a data frame object for UTM values of LL, LR, UL, and UR
LL_LR_UL_UR_UTM_m_DF <- data.frame("x" = c(left_UTM, right_UTM, left_UTM, right_UTM),
"y" = c(bottom_UTM, bottom_UTM, top_UTM, top_UTM))
# Create a SpatialPoints object for UTM values of LL, LR, UL, and UR
LL_LR_UL_UR_UTM_m_SP <- SpatialPoints(as.matrix(LL_LR_UL_UR_UTM_m_DF),
proj4string = CRS(proj_string_UTM))
# Generate Extent object in UTM
bbox_UTM <- extent(LL_LR_UL_UR_UTM_m_SP)
# Create a RasterLayer object for UTM values
LL_LR_UL_UR_UTM_m_RL <- raster(nrows = number_cells_across_x,
ncols = number_cells_across_x,
ext = bbox_UTM,
crs = proj_string_UTM)
# Create a SpatialPoints object for lat/lon values of LL, LR, UL, and UR through a
# spatial transform
LL_LR_UL_UR_longlat_SP <- spTransform(LL_LR_UL_UR_UTM_m_SP, CRS("+proj=longlat +ellps=GRS80"))
# Generate Extents object in lat/lon projection
bbox_lat_lon <- extent(LL_LR_UL_UR_longlat_SP)
if (is.null(use_CSV_files)){
# Get all surface met data, write CSV files to the working folder, get a vector
# list of the generate CSV files
CSV_files <- calmet_get_ncdc_station_data(year = year,
bbox_lat_lon = bbox_lat_lon,
local_archive_dir = local_archive_dir)
# Include only records that are on the hour
for (i in 1:length(CSV_files)){
the_data <- read.csv(CSV_files[i], header = TRUE, stringsAsFactors = FALSE)
the_data_on_hour <- subset(the_data, MIN == 0)
write.table(the_data_on_hour, file = CSV_files[i], sep = ",", row.names = FALSE)
}
# Include only those files with more than 5000 hours for the year
for (i in 1:length(CSV_files)){
the_data <- read.csv(CSV_files[i], header = TRUE, stringsAsFactors = FALSE)
if(nrow(the_data) < 5000) {
file.remove(CSV_files[i])
}
}
# Update the list of CSV files that are acceptable to use
CSV_files <- list.files(pattern = "[0-9]{6}-[0-9]{5}-[0-9]{4}.*.csv$")
# Get additional files to compensate for missing data at either the beginning or end of
# the year (since datasets are standardized to UTC-00:00)
if (time_offset < 0 & year < as.numeric(format(Sys.time(), "%Y"))){
for (i in 1:length(CSV_files)){
calmet_get_ncdc_station_data(data_filename = gsub("([0-9]*-[0-9]*-)[0-9]*.csv",
paste("\\1", year + 1, ".gz", sep = ''),
CSV_files[i]),
local_archive_dir = local_archive_dir)
}
}
# Create vector list of additional CSV files that were downloaded
additional_CSV_files <- list.files(pattern = paste0("[0-9]*-[0-9]*-", year + 1, ".csv"))
# Include only records that are on the hour
for (i in 1:length(additional_CSV_files)){
the_data <- read.csv(additional_CSV_files[i], header = TRUE, stringsAsFactors = FALSE)
the_data_on_hour <- subset(the_data, MIN == 0)
write.table(the_data_on_hour, file = additional_CSV_files[i], sep = ",", row.names = FALSE)
}
# Include only those files with more than 5000 hours for the year
for (i in 1:length(additional_CSV_files)){
the_data <- read.csv(additional_CSV_files[i], header = TRUE, stringsAsFactors = FALSE)
if(nrow(the_data) < 5000) {
file.remove(additional_CSV_files[i])
}
}
# if (time_offset > 0){
# for (i in seq((start_year - 1), end_year, 1)){
# if (i == start_year) CSV_files <- vector(mode = "character", length = 0)
# CSV_files_year <- list.files(path = ".", pattern = paste("[0-9]*-[0-9]*-", i, ".csv", sep = ''))
# CSV_files <- c(CSV_files, CSV_files_year)
# }
# }
CSV_files_unique_stations <- unique(gsub("([0-9]*-[0-9]*)-[0-9]*.csv", "\\1", CSV_files))
# Join years of station data together
for (i in 1:length(CSV_files_unique_stations)){
CSV_station_years <- list.files(path = ".",
pattern = paste0(CSV_files_unique_stations[i],
"-.*.csv"))
for (j in 1:length(CSV_station_years)){
if (j == 1){
CSV_all_years_at_station <- read.csv(CSV_station_years[j],
header = TRUE,
stringsAsFactors = FALSE)
}
if (j > 1){
CSV_single_year_at_station <- read.csv(CSV_station_years[j],
header = TRUE,
stringsAsFactors = FALSE)
CSV_all_years_at_station <- rbind(CSV_all_years_at_station,
CSV_single_year_at_station)
}
if (j == length(CSV_station_years))
write.csv(CSV_all_years_at_station,
file = paste0(CSV_files_unique_stations[i], ".csv"),
row.names = FALSE)
}
}
# Clean up folder
file.remove(additional_CSV_files, gsub(".csv", "", CSV_files))
# Generate a new list of CSV files
CSV_files <- list.files(path = ".", pattern = "^[0-9]*-[0-9]*.csv")
# Define the start and end times and determine number of hours in each year
start_time <- ISOdatetime(year, 1, 1, hour = 0, min = 0, sec = 0, tz = "GMT")
end_time <- ISOdatetime(year, 12, 31, hour = 24, min = 0, sec = 0, tz = "GMT")
time_difference <- difftime(end_time, start_time, units = 'hours')
total_hours <- time_difference[[1]]
# Get key station information as a character vector
for (i in 1:length(CSV_files)){
# Initialize the vector object
if (i == 1) station_information_strings <- vector(mode = "character", length = 0)
# Create new identifiers for the stations
identifier_1 <- paste(formatC(i, digits = 3, flag = "0"))
identifier_2 <- paste(formatC(i, digits = 5, flag = "0"))
# Obtain the station's latitude and longitude
station_lat <- read.csv(CSV_files[i], header = TRUE, stringsAsFactors = FALSE)[1,8]
station_lon <- read.csv(CSV_files[i], header = TRUE, stringsAsFactors = FALSE)[1,9]
# Reproject the lat/lon coordinates as UTM coordinates
station_lat_lon_dec_deg <- cbind(station_lon, station_lat)
station_UTM <- project(station_lat_lon_dec_deg, proj_string_UTM)
station_UTM_easting_km <- round((station_UTM[1,1] / 1000), digits = 3)
station_UTM_northing_km <- round((station_UTM[1,2] / 1000), digits = 3)
# Get the station's time offset and format the value for use in CALMET
station_time_offset <- -(time_offset)
# Set a fixed anenometer height of 20 m AGL
station_anenometer_height <- 20
# Format the values into a character string
station_values_string <- paste0(identifier_1, " ",
identifier_2, " ",
station_UTM_easting_km, " ",
station_UTM_northing_km, " ",
station_time_offset, " ",
station_anenometer_height)
station_information_strings <- c(station_information_strings,
station_values_string)
}
# Create the hourly time series as a list of POSIXct time objects
time_series <- as.list(c(1:total_hours))
for (i in 1:total_hours) {
time_series[[i]] <- start_time + (3600 * (i - 1))
}
# Use CSV_files to extract data from specified stations
station_data_frames <- as.list(c(1:length(CSV_files)))
for (i in 1:length(CSV_files)){
station_data_frames[[i]] <- list(read.csv(CSV_files[i], header = TRUE))
}
# For every station, change UTC times to local times using the specified time offset
for (i in 1:length(station_data_frames)){
POSIXdatetime <- ISOdatetime(year = station_data_frames[[i]][[1]]$YR,
month = station_data_frames[[i]][[1]]$M,
day = station_data_frames[[i]][[1]]$D,
hour = station_data_frames[[i]][[1]]$HR,
min = station_data_frames[[i]][[1]]$MIN,
sec = 0,
tz = "GMT")
# Shift POSIXct timestamps by specified time offset
POSIXdatetime <- POSIXdatetime + (time_offset * 3600)
# Extract time elements from 'POSIXdatetime' object and replace in df
station_data_frames[[i]][[1]]$YR <- year(POSIXdatetime)
station_data_frames[[i]][[1]]$M <- month(POSIXdatetime)
station_data_frames[[i]][[1]]$D <- day(POSIXdatetime)
station_data_frames[[i]][[1]]$HR <- hour(POSIXdatetime)
station_data_frames[[i]][[1]]$MIN <- minute(POSIXdatetime)
# Trim data frame to only contain target times
station_data_frames[[i]][[1]] <-
subset(station_data_frames[[i]][[1]], YR >= year & YR <= year) # 8741
# Obtain new vector of POSIXct timestamps
POSIXdatetime <- ISOdatetime(year = station_data_frames[[i]][[1]]$YR,
month = station_data_frames[[i]][[1]]$M,
day = station_data_frames[[i]][[1]]$D,
hour = station_data_frames[[i]][[1]]$HR,
min = station_data_frames[[i]][[1]]$MIN,
sec = 0,
tz = "GMT")
# Scan for missing hours by comparing against all available hours in year
time_series_vector <- as.POSIXct(unlist(time_series),
origin = "1970-01-01",
tz = "GMT")
station_data_frames[[i]][[1]]$time_series <- POSIXdatetime
missing_times <- time_series_vector[which(!(time_series_vector %in% POSIXdatetime))]
# Create data frame with missing values
missing_usafid <- rep(unique(station_data_frames[[i]][[1]][,1]), length(missing_times))
missing_wban <- rep(unique(station_data_frames[[i]][[1]][,2]), length(missing_times))
missing_yr <- year(missing_times)
missing_m <- month(missing_times)
missing_d <- day(missing_times)
missing_hr <- hour(missing_times)
missing_min <- rep(0, length(missing_times))
missing_lat <- rep(station_data_frames[[i]][[1]][1,8], length(missing_times))
missing_long <- rep(station_data_frames[[i]][[1]][1,9], length(missing_times))
missing_elev <- rep(station_data_frames[[i]][[1]][1,10], length(missing_times))
missing_wind.dir <- rep(999, length(missing_times))
missing_wind.spd <- rep(999.9, length(missing_times))
missing_ceil.hgt <- rep(999.9, length(missing_times))
missing_temp <- rep(999.9, length(missing_times))
missing_dewpoint <- rep(999.9, length(missing_times))
missing_atm.pres <- rep(999.9, length(missing_times))
missing_sky.cover <- rep(999, length(missing_times))
missing_precip.rate <- rep(999, length(missing_times))
missing_precip.rh <- rep(999, length(missing_times))
missing_precip.code <- rep(999, length(missing_times))
# Create vector of vector objects
missing_vectors <- c(missing_usafid, missing_wban, missing_yr, missing_m,
missing_d, missing_hr, missing_min, missing_lat, missing_long,
missing_elev, missing_wind.dir, missing_wind.spd, missing_ceil.hgt,
missing_temp, missing_dewpoint, missing_atm.pres, missing_sky.cover,
missing_precip.rate, missing_precip.rh, missing_precip.code,
missing_times)
# Create matrix of vector objects
missing_df <- as.data.frame(matrix(missing_vectors,
nrow = length(missing_times),
ncol = 21),
stringsAsFactors = FALSE)
# Add column names to the 'missing_df' data frame
colnames(missing_df) <- c("USAFID", "WBAN", "YR", "M", "D", "HR", "MIN",
"LAT", "LONG", "ELEV", "WIND.DIR", "WIND.SPD",
"CEIL.HGT", "TEMP", "DEW.POINT", "ATM.PRES",
"SKY.COVER", "PRECIP.RATE", "RH", "PRECIP.CODE",
"time_series")
missing_df$time_series <- as.POSIXct(missing_df$time_series,
origin = "1970-01-01",
tz = "GMT")
# Row bind missing data frame with the available data frame
station_data_frames[[i]][[1]] <- rbind(station_data_frames[[i]][[1]],
missing_df)
# Reorder data frame by POSIXct timestamp
station_data_frames[[i]][[1]] <-
station_data_frames[[i]][[1]][order(station_data_frames[[i]][[1]]$time_series),]
}
}
if (!is.null(use_CSV_files)){
print("Not NULL.")
}
# Validate the individual CSV files, present a table several pieces of
# information related to the data availablity and the data quality
# 1. Elements where data is available
# 2. Data completeness for each of the elements
# 3. Whether the station has cloud opacity and ceiling height fields
# 4. A data completeness and data quality score (from 1 to 10)
# Save the data frame to disk as a CSV file
# Ask the user which files to use, or, ask what DC/DQ is the minimum for
# excluding stations
# Generate a list of CSV files that are accepted by the user
# Determine whether the selected stations, when taken together, provide
# the minimum data completeness for the SURF.DAT file
# Write out the SURF.DAT file
#
# The following parameters are used:
#
# Name Description
# -------- -----------
# IYR Year of data
# IJUL Julian day
# IHR Hour (00-23 LST)
# WS Wind Speed (m/s)
# WD Wind Direction (degrees)
# ICEIL Ceiling Height (hundreds of feet)
# ICC Opaque Sky Cover (tenths)
# TEMPK Air temperature (K)
# IRH Relative humidity (percent)
# PRES Station Pressure
# IPCODE Precipitation code
# (0=no precipitation, 1-18=liquid precipitation, 19-45=frozen precipitation)
# Construct the file header records of the SURF.DAT file
# Initialize file for writing
cat("", file = paste(output_file))
# Add line 1 to file header (dataset name [SURF.DAT], dataset version [2.1],
# dataset message field)
cat("SURF.DAT 2.1 Hour Start and End Times with Seconds",
file = paste(output_file), sep = "\n", append = TRUE)
# Add line 2 to file header (number of comment lines to follow)
cat(as.character(length(station_information_strings) + 2),
file = paste(output_file), sep = "\n", append = TRUE)
# Add line 3 to file header
cat("Produced by PuffR !Do not edit by hand!",
file = paste(output_file), sep = "\n", append = TRUE)
# Add line 4 to file header
cat("Station Information:",
file = paste(output_file), sep = "\n", append = TRUE)
# Add station information to file header
cat(station_information_strings,
file = paste(output_file), sep = "\n", append = TRUE)
# Add map projection keyword [NONE] to file header
cat("NONE",
file = paste(output_file), sep = "\n", append = TRUE)
# Add time zone to file header
cat(paste0("UTC", ifelse(time_offset >=0, "+", "-"),
formatC(abs(time_offset), width = 2, flag = "0"), "00"),
file = paste(output_file), sep = "\n", append = TRUE)
# Add time and station info to file header (Beginning and end times for
# file, number of met stations)
cat(year(time_series[[1]]),
" ",
yday(time_series[[1]]),
" ",
hour(time_series[[1]]),
" ",
"0 ",
year(time_series[[total_hours]] + 3600),
" ",
yday(time_series[[total_hours]] + 3600),
" ",
hour(time_series[[total_hours]] + 3600),
" ",
"0 ",
length(CSV_files),
file = paste(output_file), sep = '', append = TRUE)
cat("", file = paste(output_file), sep = "\n", append = TRUE)
# Add 5-digit identifiers for met stations used in file
for (i in 1:length(station_data_frames)){
cat(paste(formatC(i, digits = 5, flag = "0")),
file = output_file, sep = "\n", append = TRUE)
}
# Construct the body of the SURF.DAT file using a nested loops that provide grouped time interval
# headers with ordered surface station parameters
for (i in 1:total_hours){
cat(year(time_series[[i]]),
" ",
yday(time_series[[i]]),
" ",
hour(time_series[[i]]),
" ",
"0 ",
year(time_series[[i]] + 3600),
" ",
yday(time_series[[i]] + 3600),
" ",
hour(time_series[[i]] + 3600),
" ",
"0 ",
file = paste(output_file), append = TRUE)
cat("", file = paste(output_file), sep = "\n", append = TRUE)
for (j in 1:length(station_data_frames)){
cat(# Wind speed, m/s (WS)
format(station_data_frames[[j]][[1]]$WIND.SPD[i],
width = 6, justify = "right"),
" ",
# Wind direction, degrees (WD)
format(station_data_frames[[j]][[1]]$WIND.DIR[i],
width = 6, justify = "right"),
" ",
# Ceiling height, hundreds of feet (ICEIL)
format(station_data_frames[[j]][[1]]$CEIL.HGT[i],
width = 6, justify = "right"),
" ",
# Opaque sky cover, tenths (ICC)
format(station_data_frames[[j]][[1]]$SKY.COVER[i],
width = 6, justify = "right"),
" ",
# Air temperature, K (TEMPK)
format(station_data_frames[[j]][[1]]$TEMP[i],
width = 6, justify = "right"),
" ",
# Relative humidity, % (IRH)
format(station_data_frames[[j]][[1]]$RH[i],
width = 6, justify = "right"),
" ",
# Station pressure, mb (PRES)
format(station_data_frames[[j]][[1]]$ATM.PRES[i],
width = 6, justify = "right"),
" ",
# Precipitation code (IPCODE)
# 0 = no precipitation
# 1-18 = liquid precipitation
# 19-45 = frozen precipitation
format(station_data_frames[[j]][[1]]$PRECIP.CODE[i],
width = 6, justify = "right"),
file = paste(output_file), append = TRUE)
cat("", file = output_file, sep = "\n", append = TRUE)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.