#' Filter Data by Quality Flag
#'
#' @description If the quality flag from a NOAA dataset is G, I, K, L, N, O, then change the weather value to "NA"
#' Table 2 (Quality Flag/Attribute) from the NOAA documentation:
#' \itemize{
#' \item Blank = did not fail any quality assurance check D = failed duplicate check
#' \item G = failed gap check
#' \item I = failed internal consistency check
#' \item K = failed streak/frequent-value check
#' \item L = failed check on length of multiday period M = failed mega-consistency check
#' \item N = failed naught check
#' \item O = failed climatological outlier check
#' }
#' @param x Dataset with which to evaluate quality
#' @param y Dataset containing quality information
#'
#' @export
Quality_Flag_Function <- function(x, y){
x[which(y=="G" | y=="I" | y=="K" | y=="L" | y=="N" | y=="O")] <- NA
return(x)
}
#' Replace blank values (quality is okay) with "Okay"
#'
#' @param x Dataset
#'
#' @export
Replace_Blank_w_Okay_Function <- function(x){
x[which(x==" ")] <- "Okay"
return(x)
}
#' Format Weather Station Info
#'
#' @param wstations List of weather stations, downloaded from the NOAA NCDC site
#' @param Start_Date Start_Date
#' @param End_Date End_Date
#'
#' @export
formatWeatherStationInfo <- function(wstations, Start_Date = "2014-01-17", End_Date = "2008-01-20") {
# create columns for Start and End Dates of Weather Station
wstations$Start_Date <- sub(" .*", "",
wstations$Date_Range)
wstations$End_Date <- sub(".* ", "",
wstations$Date_Range)
# filter dates to study dates
wstations %<>%
filter(Start_Date <= Start_Date, End_Date >= End_Date) %>%
# merge with list of weather stations for which start/end date is not known
rbind(filter(wstations, Start_Date == "", End_Date >= ""))
return(wstations)
}
#' Merge Weather Data Files and Format Column Names
#'
#' @param climate_file_names Climate dataset
#' @description For each variable, NOAA uses generic column names "Measurement.Flag", "Quality.Flag", "Source.Flag", "Time.of.Observation" to ensure that the appropriate columns are merged together. This function renames these columns by pasting the name with the name of the variable to which it refers e.g., the "Measurement.Flag" column directly after "PRCP" will become "PRCP.Measurement.Flag" for each weather variable, take weather variable name and paste it to the names of the 4 following columns.
#'
#' @export
#' @importFrom plyr rbind.fill
mergeClimateFiles <- function(climate_file_names) {
X <- list()
for (h in 1:length(climate_file_names)) {
# PRCP, TMIN, TMAX name vectors
TMIN_vector <- vector()
TMAX_vector <- vector()
PRCP_vector <- vector()
# pull climate file
Y <- eval(parse(text=climate_file_names[h]))
X[[h]] <- list()
# select weather station info to keep
X[[h]][[1]] <- Y %>% dplyr::select(
.data$STATION,
.data$STATION_NAME,
.data$ELEVATION,
.data$LATITUDE,
.data$LONGITUDE,
.data$DATE
)
# counter for weather variable (have to count because not all files have all weather variables)
k=2 # start at 2 because weather station info goes in [[1]]
# fix PRCP flag column names
if ("PRCP" %in% names(Y)) {
X[[h]][[k]] <- Y[, (which(colnames(Y)=="PRCP")) :
(which(colnames(Y)=="PRCP")+4)]
for (i in 1:4) {
PRCP_vector[i] <- paste(
"PRCP",
sub(
"(.*?)[.].*", "\\1",
names(X[[h]][[k]][which(colnames(X[[h]][[k]])=="PRCP")
+ i])
),
"Flag",
sep="."
)
}
names(X[[h]][[k]])[2:5] <- PRCP_vector
k <- k + 1
}
# fix TMIN flag column names
if ("TMIN" %in% names(Y)) {
X[[h]][[k]] <- Y[, (which(colnames(Y)=="TMIN")) :
(which(colnames(Y)=="TMIN")+4)]
for (i in 1:4) {
TMIN_vector[i] <- paste(
"TMIN",
sub(
"(.*?)[.].*", "\\1",
names(X[[h]][[k]][which(colnames(X[[h]][[k]])=="TMIN") + i])
),
"Flag",
sep="."
)
}
names(X[[h]][[k]])[2:5] <- TMIN_vector
k <- k + 1
}
# fix TMAX flag column names
if ("TMAX" %in% names(Y)) {
X[[h]][[k]] <- Y[, (which(colnames(Y)=="TMAX")) :
(which(colnames(Y)=="TMAX")+4)]
for (i in 1:4) {
TMAX_vector[i] <- paste(
"TMAX",
sub(
"(.*?)[.].*", "\\1",
names(X[[h]][[k]][which(colnames(X[[h]][[k]])=="TMAX") + i])
),
"Flag",
sep="."
)
}
names(X[[h]][[k]])[2:5] <- TMAX_vector
}
}
# merge list of lists of lists
data.array2 <- list()
for (i in 1:length(unique(climate_file_names))) {
data.array2[[i]] = as.data.frame(
mapply(
cbind,
unlist(
X[[i]],
recursive=F
)
)
)
}
do.call(rbind.fill, data.array2)
}
#' Filter data by quality
#'
#' @param climate_data Climate dataset
#' @description Replace data with NA if it is of questionable quality.
#' \itemize{
#' \item Filter data by quality
#' \itemize{
#' \item replace data with NA if it is of questionable quality (see \code{Quality_Flag_Function} function for details)
#' \item replace blank values (quality is okay) with "Okay"
#' \item Replace -999 and blanks with NAs
#' }
#' }
#'
#' @export
filterClimateDataByQuality <- function(climate_data) {
climate_data$PRCP <- with(
climate_data,
Quality_Flag_Function(PRCP, PRCP.Quality.Flag)
)
climate_data$TMIN <- with(
climate_data,
Quality_Flag_Function(TMIN, TMIN.Quality.Flag)
)
climate_data$TMAX <- with(
climate_data,
Quality_Flag_Function(TMAX, TMAX.Quality.Flag)
)
# replace blank values (quality is okay) with "Okay"
climate_data[,c(
"PRCP.Quality.Flag",
"TMIN.Quality.Flag",
"TMAX.Quality.Flag"
)] %<>% apply(.data, 2, Replace_Blank_w_Okay_Function)
# Replace -999 and blanks with NAs
climate_data[,c(
"STATION",
"STATION_NAME",
"ELEVATION",
"LATITUDE",
"LONGITUDE",
"DATE",
"PRCP",
"PRCP.Measurement.Flag",
"PRCP.Quality.Flag",
"PRCP.Source.Flag",
"PRCP.Time.Flag",
"TMIN",
"TMIN.Measurement.Flag",
"TMIN.Quality.Flag",
"TMIN.Source.Flag",
"TMIN.Time.Flag",
"TMAX",
"TMAX.Measurement.Flag",
"TMAX.Quality.Flag",
"TMAX.Source.Flag",
"TMAX.Time.Flag"
)] %<>% apply(.data, 2, NA_Function)
return(climate_data)
}
#' Format and convert weather data
#'
#' @param climate_data Climate dataset
#' @description For each location, compile weather data from the closest weather stations.
#' \itemize{
#' \item Format/convert weather data
#' \itemize{
#' \item convert tenths of Celcius to Celsius
#' \item convert tenths of Celcius to Celsius
#' \item convert PRCP (in tenths of mm) to cm
#' \item replace NA for precip less than 0
#' }
#' }
#'
#' @export
formatconvertClimateData <- function(climate_data) {
climate_data[,c(
"PRCP",
"TMIN",
"TMAX"
)] %<>% apply(.data, 2, as.numeric)
climate_data %<>% mutate(
MinTemp = .data$TMIN/10, # convert tenths of Celcius to Celsius
MaxTemp = .data$TMAX/10, # convert tenths of Celcius to Celsius
Precip = .data$PRCP/100, # convert PRCP (in tenths of mm) to cm
Date = as.Date(as.character(.data$DATE), "%Y%m%d")
)
# replace NAs again just in case
climate_data[,c("MinTemp","MaxTemp","Precip")] %<>% apply(.data, 2, NA_Function)
# replace NA for precip
climate_data$Precip[which(climate_data$Precip < 0)] <- NA
return(climate_data)
}
#' Choose closest weather variable measurement for each Location/Date combo
#'
#' @description For each date and location, get weather data from the closest available weather station.
#' @param Datalist Output (list format) from the \code{findClosestWeatherStations} function.
#' @param Location_list Location_list
#'
#' @export
#' @importFrom plyr join_all
getClimateDataByLocationDate <- function(Datalist, Location_list) {
X <- list()
# for each LOCATION
for (i in 1:length(Location_list)) {
X[[i]] <- list()
# pull climate data for that location and merge with station data to get distance from station to sampling location
data = eval(parse(text=paste(
"Datalist$",
Location_list[i],
"_stations",
sep=""
)))
# PRECIPITATION
P <- data %>% filter(!is.na(.data$Precip))
X[[i]][[1]] <- as.data.frame(matrix(NA, length(unique(P$Date)), 6))
names(X[[i]][[1]]) <- c(
"Precip_STATION_NAME",
"Precip_STATION",
"Precip_STATION_Distance",
"Date",
"Precip",
"Location"
)
# for each DATE
for (j in 1:length(unique(P$Date))) {
# pull climate data associated with that location
X[[i]][[1]][j, ] = P %>%
filter(.data$Date == unique(.data$Date)[j]) %>%
filter(.data$Distance == min(.data$Distance)) %>%
summarise(
Precip_STATION_NAME = .data$STATION_NAME,
Precip_STATION = .data$Station.ID,
Precip_STATION_Distance = .data$Distance,
Date = .data$Date[1],
Precip = .data$Precip
)
}
# MIN TEMPERATURE
Min <- data %>% filter(!is.na(.data$MinTemp))
X[[i]][[2]] <- as.data.frame(
matrix(NA, length(unique(Min$Date)), 6)
)
names(X[[i]][[2]]) <- c(
"MinTemp_STATION_NAME",
"MinTemp_STATION",
"MinTemp_STATION_Distance",
"Date",
"MinTemp",
"Location"
)
# for each DATE
for (j in 1:length(unique(Min$Date))) {
# pull climate data associated with that location
X[[i]][[2]][j, ] = Min %>%
filter(.data$Date == unique(.data$Date)[j]) %>%
filter(.data$Distance == min(.data$Distance)) %>%
summarise(
MinTemp_STATION_NAME = .data$STATION_NAME,
MinTemp_STATION = .data$Station.ID,
MinTemp_STATION_Distance = .data$Distance,
Date = .data$Date,
MinTemp = .data$MinTemp
)
}
# MAX TEMPERATURE
Max <- data %>% filter(!is.na(.data$MaxTemp))
X[[i]][[3]] <- as.data.frame(
matrix(NA, length(unique(Max$Date)), 6)
)
names(X[[i]][[3]]) <- c(
"MaxTemp_STATION_NAME",
"MaxTemp_STATION",
"MaxTemp_STATION_Distance",
"Date",
"MaxTemp",
"Location"
)
# for each DATE
for (j in 1:length(unique(Max$Date))) {
# pull climate data associated with that location
X[[i]][[3]][j, ] = Max %>%
filter(Date == unique(Date)[j]) %>%
filter(Distance == min(Distance)) %>%
summarise(
MaxTemp_STATION_NAME = .data$STATION_NAME,
MaxTemp_STATION = .data$Station.ID,
MaxTemp_STATION_Distance = .data$Distance,
Date = .data$Date,
MaxTemp = .data$MaxTemp
)
}
X[[i]][[1]]$Date %<>% as.Date(origin="1970-01-01")
X[[i]][[2]]$Date %<>% as.Date(origin="1970-01-01")
X[[i]][[3]]$Date %<>% as.Date(origin="1970-01-01")
X[[i]][[1]]$Location = Location_list[i]
X[[i]][[2]]$Location = Location_list[i]
X[[i]][[3]]$Location = Location_list[i]
}
# merge list of lists of lists
data.array2 <- list()
# for each density
for (i in 1:length(Location_list)) {
# for (i in 1:length(unique(patch_data$density))) {
# compress list of statistics to dataframe
data.array2[[i]] = join_all(X[[i]], by="Date", type="full")
}
climate_data = do.call(rbind.fill, data.array2)
}
#' Calculate Growing Degree Days
#'
#' @description I used this website to calculate growing degree days: http://www.ipm.ucdavis.edu/WEATHER/ddretrievetext.html. This function merges them together with the \code{climate_data} dataframe.
#' @param DegreeDay_list list of separate Degree Day files
#' @param climate_data Climate dataset
#'
#' @export
calculateDegreeDays <- function(climate_data, DegreeDay_list) {
X <- list()
for (i in 1:length(DegreeDay_list)) {
X[[i]] <- eval(parse(text=DegreeDay_list[i]))
}
DegreeDays <- do.call(rbind.fill, X)
DegreeDays$Date %<>% as.Date("%Y-%m-%d")
DegreeDays.unique = unique(DegreeDays[, 1:4])
# DegreeDays %>% filter(Date=="2014-01-17")
DegreeDays.merged = merge(
DegreeDays.unique,
climate_data,
by.x=c("Date", "Temp.min", "Temp.max"),
by.y=c("Date", "MinTemp", "MaxTemp"),
all.y=T
)
names(DegreeDays.merged)[2:3] <- c("MinTemp", "MaxTemp")
return(DegreeDays.merged)
}
#' Create Year and Day of Year Variables for Climate Data
#'
#' @param climate_data climate data
#' @description Create Year and Day of Year variables.
#'
#' @export
formatClimateDataYearDayofYear <- function(climate_data) {
climate_data %>%
mutate(
Year = year(.data$Date),
Day_of_year = as.numeric(strftime(.data$Date, format = "%j"))
)
}
#' Calculate climate variables from weather data
#'
#' @param x survey data
#' @param climate_data climate data
#' @param calculate_dates Default is \code{TRUE}. Either \code{x} is a dataframe of survey dates from which to calculate dates for climate variables or \code{x} is a list of pre-determined dates and their locations, in which case \code{calculate_dates} should be \code{FALSE}.
#' @param Dates_dataframe Dates_dataframe
#' @param first.year first.year
#'
#' @export
#' @importFrom dplyr group_by summarise arrange filter
#' @importFrom stats sd
calculateClimateVariables <- function(x, climate_data, calculate_dates="TRUE", Dates_dataframe=NULL, first.year=2009) {
climate_data %<>%
renameLocations(.data) %>%
formatClimateDataYearDayofYear(.data) %>%
mutate(
Precip_Presence = ifelse(.data$Precip>0, 1, 0),
MinTemp_lt_equal_0 = ifelse(.data$MinTemp<=0, 1, 0)
)
# create list of dates
if (calculate_dates=="TRUE") {
# get unique Date and DaysSincePrevSurvey combos
A = as.data.frame(x) %>%
group_by(.data$Date, .data$Location, .data$Species) %>%
# to make sure that this group all share the same Previous_Survey_Date Date
summarise(PrevSurvD = paste(max(.data$Previous_Survey_Date, na.rm=T))) %>%
as.data.frame %>%
arrange(.data$Location, .data$Date)
#----------------- Fill in Missing Previous Survey Date for Survey 1 --#
# Generate fake "PrevSurvD" based on the average number of days
# between surveys
A$PrevSurvD[A$PrevSurvD=="NA" & year(A$Date)==first.year] <-
as.character(A$Date[A$PrevSurvD=="NA" & year(A$Date)==first.year] -
round(mean(x$DaysSincePrevSurvey, na.rm=T)))
A %<>% filter(PrevSurvD!="NA")
A$PrevSurvD %<>% as.Date("%Y-%m-%d")
} else {
A <- Dates_dataframe
# A %<>% mutate(
# previous dates
# PrevSurvD = as.Date(c(NA, Date[-length(Date)]), origin="1970-01-01")
#)
}
# prep dataframe for addition of of climate variables
Names <- c(
"Daily_Precip_mean",
"Daily_Precip_SD",
"Perc_Days_w_Rain",
"Perc_Days_w_Freeze",
"mean_Max_Temp",
"sd_Max_Temp",
"MeanDegreeDay",
"Mean_Consecutive_Days_w_Rain",
"Max_Consecutive_Days_w_Rain",
"sd_Consecutive_Days_w_Rain",
"Mean_Consecutive_Drought_Days",
"Max_Consecutive_Drought_Days",
"sd_Consecutive_Drought_Days",
"Mean_Consecutive_Freezing_Days",
"Max_Consecutive_Freezing_Days",
"sd_Consecutive_Freezing_Days"
)
A1 <- A %>% cbind(
as.data.frame(
matrix(
nrow=dim(A)[1],
ncol=length(Names),
data=NA
)
)
)
names(A1)[ (dim(A)[2] + 1) : (dim(A)[2] + length(Names)) ] <- Names
# for new plants, assign previous visit based on previous visit to location
# calculate climate variables
for (i in 1:dim(A1)[1]) {
# subset data by time period and location
# date is equal to or greater than previous survey date
temp = climate_data[which(
climate_data$Date >= A1$PrevSurvD[i] &
climate_data$Date < A1$Date[i] & # date is less than current date
climate_data$Location==A1$Location[i]
), ] # and pull data for correct location
# Climate Variable Calculations
A1$Daily_Precip_mean[i] <- mean(temp$Precip, na.rm=T)
A1$Daily_Precip_SD[i] <- sd(temp$Precip, na.rm=T)
# length of precip > 0 / length of precip != NA
A1$Perc_Days_w_Rain[i] <-
length(filter(temp, .data$Precip>0)$Precip) / length(!is.na(temp$Precip))
# length of MinTemp <= 0 / length of MinTemp != NA
A1$Perc_Days_w_Freeze[i] <-
length(filter(temp, .data$MinTemp<=0)$MinTemp) /
length(!is.na(temp$MinTemp))
A1$mean_Max_Temp[i] <- mean(temp$MaxTemp, na.rm=T)
A1$sd_Max_Temp[i] <- sd(temp$MaxTemp, na.rm=T)
A1$MeanDegreeDay[i] <- mean(temp$Daily.DD, na.rm=T)
# consecutive days with rain
# count identical consecutive values
r <- rle(temp$Precip_Presence)
# get lengths for value=1
r1 <- r$length[r$values == 1]
A1$Mean_Consecutive_Days_w_Rain[i] <- mean(r1, na.rm=T)
A1$Max_Consecutive_Days_w_Rain[i] <- max(r1, na.rm=T)
A1$sd_Consecutive_Days_w_Rain[i] <- sd(r1, na.rm=T)
# consecutive drought days
r0 <- r$length[r$values == 0]
A1$Mean_Consecutive_Drought_Days[i] <- mean(r0, na.rm=T)
A1$Max_Consecutive_Drought_Days[i] <- max(r0, na.rm=T)
A1$sd_Consecutive_Drought_Days[i] <- sd(r0, na.rm=T)
# Consecutive Freezing Days
# count identical consecutive values
r <- rle(temp$MinTemp_lt_equal_0)
# get lengths for value=1
r1 <- r$length[r$values == 1]
# if there was at least one freezing day:
if (length(r1) > 0) {
A1$Mean_Consecutive_Freezing_Days[i] <- mean(r1, na.rm=T)
A1$Max_Consecutive_Freezing_Days[i] <- max(r1, na.rm=T)
# calculate sd if there is more than one value; otherwise sd=0
A1$sd_Consecutive_Freezing_Days[i] <- ifelse(
length(r1)>1,
sd(r$length[r$values == 1], na.rm=T),
0
)
} else {
A1$Mean_Consecutive_Freezing_Days[i] <- 0
A1$Max_Consecutive_Freezing_Days[i] <- 0
A1$sd_Consecutive_Freezing_Days[i] <- 0
}
}
# replace NAs
A1 %<>% as.data.frame
A1[,c(
"Daily_Precip_mean",
"Daily_Precip_SD",
"Perc_Days_w_Rain",
"Perc_Days_w_Freeze",
"mean_Max_Temp",
"sd_Max_Temp",
"MeanDegreeDay",
"Mean_Consecutive_Days_w_Rain",
"Max_Consecutive_Days_w_Rain",
"sd_Consecutive_Days_w_Rain",
"Mean_Consecutive_Drought_Days",
"Max_Consecutive_Drought_Days",
"sd_Consecutive_Drought_Days",
"Mean_Consecutive_Freezing_Days",
"Max_Consecutive_Freezing_Days",
"sd_Consecutive_Freezing_Days")] %<>%
apply(.data, 2, NA_Function
)
A1 %<>% dplyr::select(-.data$PrevSurvD)
return(A1)
}
#' Fix Erroneous Temperature Data
#'
#' @param climate_data climate data
#' @description If a date's minimum temperature record is greater than or equal to the maximum temperature record, replace both values with NA.
#'
#' @export
fixErroneousTemps <- function(climate_data) {
X = as.vector(which(climate_data$MinTemp >= climate_data$MaxTemp))
if (!is.null(dim(X))) {
climate_data[X, ]$MinTemp <- NA
climate_data[X, ]$MaxTemp <- NA
}
return(climate_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.