############################################################################################
#' @title Execute Radiation Data Quality Testing
#' @author Robert Lee \email{rlee@battelleecology.org}\cr
#' @description Run 4 checks on radiation data quality for TIS sites: (1) Internal Correlation,
#' (2) Tower-Top Sensor Agreement, (3) Variance Stability, and
#' (4) External Correlation with a Non-NEON Site. Test results are written as simple pass/fail entries
#' in the results.csv files, however expanded test resullts are written in a site-specific folder in
#' the save directory.
#'
#' @param site The TIS site of interest, as a 4-letter code.
#' @param save.dir The save directory for data, results, and other files.
#' @param bgn.month The first month of testing, as "YYYY-MM".
#' @param end.month The last month of testing, as "YYYY-MM".
#'
#' @return Site results in 'results.csv', and raw
#' @keywords process quality, data quality, gaps, commissioning
#'
# changelog and author contributions / copyrights
# Robert Lee (2018-01-03)
# Re-wrote from earlier script (original creation)
#
##############################################################################################
## Function start
rad.dq.test=function(site, save.dir, bgn.month, end.month){
# Initiallize some params
startDateTime=NULL
value=NULL
variable=NULL
gloRadMean.000.060=NULL
UTC_DATE=NULL
NEON.rad=NULL
#START
message(paste0("Testing ", site))
#Define directories
domn=Noble::is_site_config$domain[Noble::is_site_config$site.id==site]
site.dir=Noble:::.data.route(site=site, save.dir=save.dir)
rslt.dir=paste0(save.dir, "/Common/")
if(!dir.exists(rslt.dir)){
dir.create(rslt.dir)
}
raw.dir=paste0(site.dir, "/rawData/")
if(!dir.exists(raw.dir)){
dir.create(raw.dir)
}
#set up DP info
test.dp.ids=c("DP1.00014.001",
"DP1.00023.001",
"DP1.00024.001",
"DP1.00066.001"
)
# Add in Primary Pyranometer if Core site
if(Noble::tis_site_config$core.relocatable[Noble::tis_site_config$site.id==site]=="Core"){test.dp.ids=c(test.dp.ids, "DP1.00022.001")}
# Pull and refine data to vairance fields only
lapply(test.dp.ids, function(x)
try(Noble::pull.data(site = site,
dp.id = x,
bgn.month = bgn.month,
end.month = end.month,
time.agr = 30,
package = "basic",
save.dir = site.dir)
)
) %>%
do.call(what = cbind) %>%
as.data.frame() -> rad.data
########### VARIANCE TESTING ###########
######## ALL AVAILABLE RAD DATA ########
time.indx=grep(x = colnames(rad.data), pattern = "time", ignore.case = T)
extra.times=time.indx[time.indx>3]
rad.data=rad.data[,-extra.times]
empty.cols=which(colSums(is.na(rad.data))==nrow(rad.data))
rad.data=rad.data[,-empty.cols]
var.cols=grep(colnames(rad.data), pattern = "variance", ignore.case = T)
pdf(file = paste0(raw.dir, site, "_var_plots.pdf"), width = 10, height = 7.5, paper = "USr")
out=lapply(var.cols,
function(c) .var.drift.test(single.var.data=rad.data[,c(1,c)],
bgn.month=bgn.month,
end.month=end.month,
raw.dir=raw.dir,
site=site))
grDevices::dev.off()
out %>% do.call(what = rbind) %>% data.frame() %>% `colnames<-`(value=c("stream", "slope")) -> stable.vars
stable.vars=stable.vars[!is.na(stable.vars$slope),]
stable.vars$slope=as.numeric(stable.vars$slope)
stable.vars$overall=rep("Fail", times=nrow(stable.vars))
# stable.vars$test0.1="Fail"
# stable.vars$test0.01="Fail"
# stable.vars$test0.001="Fail"
#
# stable.vars$test0.1[stable.vars$slope<0.1]="Pass"
stable.vars$overall[stable.vars$slope<0.01]="Pass"
# stable.vars$test0.001[stable.vars$slope<0.001]="Pass"
utils::write.csv(x = stable.vars, file = paste0(raw.dir, "variance_stats.csv"))
drift.test="No Test"
if(all(stable.vars$overall=="Pass")){drift.test="Pass"}else if(any(stable.vars$overall=="Fail")){drift.test="Fail"}
print("Variance Stability (Drift test): ", drift.test)
#utils::write.csv(stable.vars, file = paste0(""))
# drift.test=out$drift.test
# var.data=out$var.data
###########################################
########### INTERNAL CONSISTENCY ###########
########### PAR and QL PAR ONLY ############
site.MLs=1:Noble::tis_site_config$num.of.mls[Noble::tis_site_config$site.id==site]
if(Noble::rad_dq_info$classification[Noble::rad_dq_info$Site==site]=="forest"){rho.TH=.65}else{rho.TH=.9}
PAR.pairwise=lapply(site.MLs[-1], function(x) c(x-1, x))
QL.PAR.pairwise=list(c(1,2), c(2,3))
PAR=try(Noble::pull.data(site = site,
dp.id = "DP1.00024.001",
bgn.month = bgn.month,
end.month = end.month,
time.agr = 30,
package = "basic",
save.dir = site.dir)
)
QL.PAR=try(Noble::pull.data(site = site,
dp.id = "DP1.00066.001",
bgn.month = bgn.month,
end.month = end.month,
time.agr = 30,
package = "basic",
save.dir = site.dir)
)
PAR=PAR[,grepl(pattern = "^PARMean", x = colnames(PAR))]
QL.PAR=QL.PAR[,grepl(pattern = "linePARMean", x = colnames(QL.PAR))]
PAR.rho=unlist(lapply(PAR.pairwise, function(x) round(stats::cor.test(PAR[,x[1]], PAR[,x[2]], method = "spearman", exact = F)$estimate, digits=2)))
QL.PAR.rho= unlist(lapply(QL.PAR.pairwise, function(x) round(stats::cor.test(QL.PAR[,x[1]], QL.PAR[,x[2]], method = "spearman", exact = F)$estimate, digits = 2)))
names(PAR.rho)=paste0("PAR-", PAR.pairwise)
names(QL.PAR.rho)=c("QL PAR 1-3", "QL PAR 3-5")
# One pair in each direction may fail
if(Noble::rad_dq_info$classification[Noble::rad_dq_info$Site==site]=="forest"){
if(length(PAR.rho<rho.TH)>=(length(PAR.rho)-1)){PAR.rho.test="Pass"}else{PAR.rho.test="Fail"} # ------>PAR rho results####
if(length(QL.PAR.rho<rho.TH)>=(length(PAR.rho)-1)){QL.PAR.rho.test="Pass"}else{QL.PAR.rho.test="Fail"} # ------>QL PAR rho results####
}
if(any(PAR.rho<rho.TH)==F){PAR.rho.test="Pass"}else{PAR.rho.test="Fail"} # ------>PAR rho results####
if(any(QL.PAR.rho<rho.TH)==F){QL.PAR.rho.test="Pass"}else{QL.PAR.rho.test="Fail"} # ------>QL PAR rho results####
### Write stats out ###
raw.stats=data.frame(rho.estimate=append(PAR.rho, QL.PAR.rho))
utils::write.csv(x = raw.stats, file = paste0(raw.dir,"par_rho_stats.csv"), row.names = T)
### Final Results ###
if(PAR.rho.test=="Fail"|QL.PAR.rho.test=="Fail"){
internal.compair.result="Fail"
}else{internal.compair.result="Pass"}
message(
paste0("PAR/QL PAR Internal Comparison Test: ", internal.compair.result) ###############################################################################################
)
########### TOWER-TOP CONSISTENCY ###########
########### Direct & Diffuse ONLY ###########
DirDif=try(Noble::pull.data(site = site,
dp.id = "DP1.00014.001",
bgn.month = bgn.month,
end.month = end.month,
time.agr = 30,
package = "basic",
save.dir = .data.route(site, save.dir = save.dir))
)
DirDif=data.frame(startDateTime=DirDif[,1],
dirRadMean=DirDif[,grepl(colnames(DirDif), pattern = "dirRadMean")],
difRadMean=DirDif[,grepl(colnames(DirDif), pattern = "difRadMean")],
gloRadMean=DirDif[,grepl(colnames(DirDif), pattern = "gloRadMean")]
)
# Convert to local time
time.zone=Noble::tis_site_config$time.zone[Noble::tis_site_config$site.id==site]
DirDif$startDateTime=as.POSIXct(DirDif$startDateTime, tz="UTC")
DirDif$startDateTime=as.POSIXct(format(DirDif$startDateTime, tz=time.zone, usetz = T), tz=time.zone, usetz = T)
#Only rows with greater than 5 W/m^2 get tested
DirDif$SZA=GeoLight::zenith(GeoLight::solar(tm=DirDif$startDateTime),
lon = Noble::tis_site_config$longitude[Noble::tis_site_config$site.id==site],
lat = Noble::tis_site_config$latitude[Noble::tis_site_config$site.id==site]
)
# DirDif$SZA=RAtmosphere::SZA(timein = DirDif$startDateTime,
# Lat = Noble::tis_site_config$Latitude[Noble::tis_site_config$site.id==site],
# Lon = Noble::tis_site_config$Longitude[Noble::tis_site_config$site.id==site]
# )
# Sum up the direct and diffuse rad
DirDif$total=DirDif$dirRadMean*cos(DirDif$SZA/180*pi)+DirDif$difRadMean
DirDif=DirDif[DirDif$total>5&!is.na(DirDif$total),]
# A. Ratio is within ±8% for solar zenith angle < 75°
set1=DirDif[which(DirDif$SZA<75),]
ratio1=round(sum(set1$total)/sum(set1$gloRadMean), 2)
if(0.92<ratio1&ratio1<1.08){
rat1Result="Pass"
}else(rat1Result="Fail")
# B. Ratio is within ±15% for 93° > solar zenith angle > 75°
set2=DirDif[which(DirDif$SZA>75&&DirDif$SZA<93),]
ratio2="NA"
rat2Result="NA"
if(length(set2[,1]>0)){
ratio2=round(sum(set2$total)/sum(set2$gloRadMean), 2)
if(0.85<ratio1&ratio1<1.15){
rat2Result="Pass"
}else(rat2Result="Fail")
}
if(rat2Result=="Fail"|rat1Result=="Fail"){
ratio.result="Fail"
}else{ratio.result="Pass"}
### Raw data out
tower.top=data.frame(sets=c("SZA<75", "75<SZA<93"), ratio=c(ratio1, ratio2), result=c(rat1Result, rat2Result))
utils::write.csv(x = tower.top, file = paste0(raw.dir,"gloRad_tower_top_comparison.csv"), row.names = F)
message(
paste0("Tower Top (Direct + Diffuse, Global) Consistency Test: ", ratio.result) ##############################################################################################################
)
########### GLOBAL RAD STREAMS AT TOWER TOP ###########
t.top.IDs=test.dp.ids[test.dp.ids %in% c("DP1.00014.001", "DP1.00022.001", "DP1.00023.001")]
raw.ttop.data=lapply(t.top.IDs, function(x)
try(Noble::pull.data(site = site,
dp.id = x,
bgn.month = bgn.month,
end.month = end.month,
time.agr = 30,
package = "basic",
save.dir = site.dir)
)
)
for(i in 1:length(raw.ttop.data)){
raw.ttop.data[[i]]$startDateTime=as.POSIXct(raw.ttop.data[[i]]$startDateTime, tz="UTC")
}
big.df=data.frame(
startDateTime=as.POSIXct(
Noble::help.time.seq(
from = as.POSIXct(paste0(bgn.month, "-01"), tz = "UTC"),
to=as.POSIXct(last.day.time(end.month = end.month, time.agr = 30), tz = "UTC"),
time.agr = 30), tz="UTC"))
big.df$startDateTime=as.POSIXct(big.df$startDateTime, tz="UTC")
for(i in 1:length(raw.ttop.data)){
big.df=merge(x=big.df, y=raw.ttop.data[[i]])
}
big.df=big.df[,-which(grepl(colnames(big.df), pattern = ".000$"))] #Get rid of soil plots
data.indx=unlist(lapply(c("gloRadMean", "inSWMean", "shortRadMean"), function(x) which(grepl(pattern = x, x=colnames(big.df)))))
if(length(data.indx)==3){
list=list(
pair1=stats::cor.test(x=big.df[,data.indx[1]], y=big.df[,data.indx[2]], method = "spearman", conf.level = 0.95, exact = TRUE),
pair2=stats::cor.test(x=big.df[,data.indx[2]], y=big.df[,data.indx[3]], method = "spearman", conf.level = 0.95, exact = TRUE),
pair3=stats::cor.test(x=big.df[,data.indx[1]], y=big.df[,data.indx[3]], method = "spearman", conf.level = 0.95, exact = TRUE)
)
tower.top.glo.rad="Fail"
if(all(unlist(lapply(list, "[[", "estimate")>.9))){tower.top.glo.rad="Pass"}
}
if(length(data.indx)==2){
list=stats::cor.test(x=big.df[,data.indx[1]], y=big.df[,data.indx[2]], method = "spearman", conf.level = 0.95)
tower.top.glo.rad="Fail"
if(list$estimate>=.9){tower.top.glo.rad="Pass"}
}
utils::write.csv(x = data.frame(unlist(list)), file = paste0(raw.dir, "tower_top_ratios.csv"), row.names = T)
message(paste0("Tower Top (Global Radiation Measurements) Consistency Test: ", tower.top.glo.rad))
# if(site %in% Noble::tis_site_config$site.id[Noble::tis_site_config$core.relocatable=="Core"]){
#
# }
########### EXTERNAL CONSISTENCY ###########
######## ALL GLORAD DATA ########
if(site=="BART"){
time.zone=Noble::tis_site_config$time.zone[Noble::tis_site_config$site.id==site]
offset=difftime(time1 = as.POSIXct("2018-01-01", tz=time.zone), time2=as.POSIXct("2018-01-01", tz="UTC"))
temp=RNRCS::grabNRCS.data(network = "SCAN",
site_id = 2069,
timescale = "hourly",
DayBgn =paste0(bgn.month, "-01"),
DayEnd =stringr::str_sub(string = as.character(Noble::last.day.time(end.month = end.month, time.agr = 1)), start = 1, end = 10)
)
ext.data=temp
ext.data$Date=as.POSIXct(ext.data$Date, tz=time.zone)
int.data=try(Noble::pull.data(site = site,
dp.id = "DP1.00014.001",
bgn.month = bgn.month,
end.month = end.month,
time.agr = 30,
package = "basic",
save.dir = .data.route(site, save.dir = save.dir))
)
int.data$startDateTime=as.POSIXct(int.data$startDateTime, tz="UTC")#-lubridate::hours(offset+2)
int.rad<-data.frame(int.data %>%
dplyr::group_by(startDateTime = cut(startDateTime, breaks="60 min")) %>%
dplyr::summarize(gloRadMean.000.060 = mean(gloRadMean.000.060)))
int.rad$startDateTime=as.POSIXct(int.rad$startDateTime, tz="UTC")
all.data=merge(x=int.rad, y=ext.data, by.y = "Date", by.x = "startDateTime")
#qplot(x=all.data$Solar.Radiation.Average..watt.m2., y=all.data$gloRadMean.000.060)
ext.consist=stats::cor.test(x=all.data$Solar.Radiation.Average..watt.m2., y = all.data$gloRadMean.000.060, conf.level = 0.95, method = "spearman", exact = F)$estimate
#}
}else if(!is.na(Noble::rad_dq_info$nearestUSCRN[Noble::rad_dq_info$Site==site])){
uscrn.site=as.character(Noble::rad_dq_info$nearestUSCRN[Noble::rad_dq_info$Site==site])
if(nchar(uscrn.site)<5){uscrn.site=paste0(0, uscrn.site)}
uscrn.save.loc=paste0(.data.route(site = site, save.dir = save.dir), "/",uscrn.site, "_", bgn.month, "-", end.month, ".csv")
if(!file.exists(uscrn.save.loc)){
#browser()
temp=metget::getUSCRNData(temp_agg = "subhourly",
sid = uscrn.site,
start_date = paste0(bgn.month, "-01"),
end_date = as.character(Noble::last.day.time(end.month = end.month, time.agr = 1))
)
utils::write.csv(x=temp, file = uscrn.save.loc, row.names = F)
}else{
temp=utils::read.csv(file=uscrn.save.loc, stringsAsFactors = F, colClasses = "character")
}
ext.data=temp
ext.data$UTC_DATE=as.POSIXct(paste0(ext.data$UTC_DATE, ext.data$UTC_TIME), format="%Y%m%d%H%M", tz="UTC")
#utils::write.csv(x = ext.data, file = paste0(raw.dir, "USCRN_", uscrn.site, ".csv"), row.names = F)
ext.rad=data.frame(UTC_DATE=ext.data$UTC_DATE, USCRN.rad=ext.data$SOLAR_RADIATION)
int.data=try(Noble::pull.data(site = site,
dp.id = "DP1.00014.001",
bgn.month = bgn.month,
end.month = end.month,
time.agr = 1,
package = "basic",
save.dir = .data.route(site, save.dir = save.dir))
)
int.data$startDateTime=as.POSIXct(int.data$startDateTime, tz="UTC")
int.rad=data.frame(UTC_DATE=as.POSIXct(int.data$endDateTime, format="%Y-%m-%dT%H:%M:%SZ"), NEON.rad=int.data[,grepl(x = colnames(int.data), pattern = "gloRadMean")])
int.rad=data.frame(int.rad[-(1:4),] %>%
dplyr::group_by(UTC_DATE = cut(UTC_DATE, breaks="5 min")) %>%
dplyr::summarize(NEON.rad = mean(NEON.rad)))
int.rad$UTC_DATE=as.POSIXct(int.rad$UTC_DATE, tz="UTC")
ext.rad$UTC_DATE=as.POSIXct(ext.rad$UTC_DATE, tz="UTC")
all.data=merge(x=int.rad, y=ext.rad, by= "UTC_DATE")
all.data=all.data[all.data$USCRN.rad>-5,]
ext.consist=NA
if(!all(is.na(all.data$NEON.rad))&!all(is.na(all.data$USCRN.rad))){
spearman.results=stats::cor.test(all.data$NEON.rad, as.numeric(all.data$USCRN.rad), method = "spearman", conf.level = 0.95, exact = F)
ext.consist=spearman.results$estimate
corr.plot=ggplot2::qplot(x=all.data$NEON.rad, y=all.data$USCRN.rad)+
ggplot2::ggtitle(label = paste0(site, "-", uscrn.site, " Rho: ", round(ext.consist, digits = 2)))+
ggplot2::geom_abline(slope = 1, color='red')+
ggplot2::coord_fixed()
ggplot2::ggsave(filename = paste0(raw.dir, "raw_corr_plot.pdf"), plot = corr.plot, device = "pdf", width = 7.5, height = 10, units = "in", dpi = 300)
utils::write.csv(x = data.frame(unlist(spearman.results)), file = paste0(raw.dir, "external_comparison.csv"), row.names = T)
}
}else{ext.consist=NA}
if(is.na(ext.consist)){external.test="No Test"}else if(ext.consist>0.90){external.test="Pass"}else{external.test="Fail"}
message(
paste0("External Consistency Test: ", external.test)
)
dq.rslt=data.frame(site=site,
begin.month=bgn.month,
end.month=end.month,
variance.stability = drift.test,
internal.consistency=internal.compair.result,
tower.top.consistency=tower.top.glo.rad,
dir.dif.glo.consistency=ratio.result,
external.consistency=external.test,
test.date=as.character(Sys.Date()))
########### WRITE TO RESULTS FILE ###########
if(file.exists(paste(rslt.dir,"results.csv",sep = "/"))){
dq.rpt <- utils::read.csv(file = paste(rslt.dir,"results.csv",sep = "/"), header = T, stringsAsFactors = T)
dq.rpt <- rbind(dq.rpt, dq.rslt)
utils::write.csv(x = dq.rpt, file = paste(rslt.dir,"results.csv",sep = "/"), row.names = F)
}
else{
utils::write.csv(x = dq.rslt, file = paste(rslt.dir,"results.csv",sep = "/"), col.names = T, row.names = F)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.