Nothing
#'Calculates the rmse, mae, bias, correlation over time
#'of a NetCDF file and a dataframe (station data).
#'Designed for the CM SAF R Toolbox.
#'
#'@param var Name of NetCDF variable of NetCDF file (character).
#'@param infile Filename of input NetCDF file. This may include
#' the directory (character).
#'@param data_station Dataframe of RData or csv file (station data);
#' Designed for the CM SAF R Toolbox.
#'@param outfile Filename of output csv file. This may include the directory
#' (character).
#'@param overwrite logical; should existing output file be overwritten?; Default: FALSE
#'@param nc Alternatively to \code{infile} you can specify the input as an
#' object of class `ncdf4` (as returned from \code{ncdf4::nc_open}).
#'
#'@return A csv file including the rmse, mae, bias and correlation over time
#' is written.
#'@export
#'
#'@family metrics
cmsaf.stats.station.data <- function(var, infile, data_station, outfile, overwrite = FALSE,
nc = NULL) {
gc()
calc_time_start <- Sys.time()
if(overwrite){
if(file.exists(outfile)){
unlink(outfile)
}
}
check_variable(var)
if (is.null(nc)) check_infile(infile)
if (!is.null(nc)) nc_in <- nc
else nc_in <- nc_open(infile)
file_data <- cmsafops::read_file(infile, var, nc = nc)
lon <- file_data$dimension_data$x
lat <- file_data$dimension_data$y
if (!is.null(nc)) id <- nc
else id <- ncdf4::nc_open(infile)
data_nc <- ncdf4::ncvar_get(id, var, collapse_degen = FALSE)
date <- ncdf4::ncvar_get(id, "time")
t_unit <- ncdf4::ncatt_get(id, "time", "units")$value
date.time <- format(cmsafops::get_time(t_unit, date))
data2 <- data_station
# Assume this needs to be added as id wasn't closed
if (is.null(nc)) ncdf4::nc_close(id)
list_data_station <- list()
station_all <- NULL
for (i in seq_along(data2$lon)) {
dummy <- paste0("[", round(data2$lon[i], digits = 1), ";", round(data2$lat[i], digits = 1), "]")
station_all <- append(station_all, dummy)
}
station_all_seq <- unique(station_all)
for(index_time in 1:length(date.time)){
a <- data2
min_lon <- min(lon, na.rm = TRUE)
max_lon <- max(lon, na.rm = TRUE)
min_lat <- min(lat, na.rm = T)
max_lat <- max(lat, na.rm = T)
# lon
slider1 <- c(max(round(as.numeric(min_lon)), -180), min(round(as.numeric(max_lon)), 180))
# lat
slider2 <- c(max(round(as.numeric(min_lat)), -90), min(round(as.numeric(max_lat)), 90))
lo_dummy <- c("lon", "longitude", "laenge", "x", "lon_rep")
la_dummy <- c("lat", "latitude", "breite", "y", "lat_rep")
ti_dummy <- c("time", "date", "zeit", "t", "get_time.file_data.time_info.units..file_data.dimension_data.t.")
da_dummy <- c("data", "daten", "z", "element", "result")
dn <- attr(a, "element_name")
if (!is.null(dn)) {
da_dummy <- append(da_dummy, dn)
} else {
dn <- attr(a, "data_name")
if (!is.null(dn)) {
da_dummy <- append(da_dummy, dn)
}
}
instat_names <- names(a)
lo_n <- 0
la_n <- 0
ti_n <- 0
da_n <- 0
for (i in seq_along(instat_names)) {
if (toupper(instat_names[i]) %in% toupper(lo_dummy)) (lo_n <- i)
if (toupper(instat_names[i]) %in% toupper(la_dummy)) (la_n <- i)
if (toupper(instat_names[i]) %in% toupper(ti_dummy)) (ti_n <- i)
if (toupper(instat_names[i]) %in% toupper(da_dummy)) (da_n <- i)
}
if (lo_n > 0 & la_n > 0 & ti_n > 0 & da_n > 0) {
# check monthly or daily
# station
time_station <- a[, ti_n]
if (length(time_station) > 500) (time_station <- time_station[1:500])
mon_station <- format(as.Date(time_station), "%m")
year_station <- format(as.Date(time_station), "%Y")
day_station <- format(as.Date(time_station), "%d")
dummy <- which(mon_station == mon_station[1] & year_station == year_station[1])
mmdm <- "d"
if (length(unique(day_station[dummy])) == 1) {
mmdm <- "m"
}
# satellite
time_sat <- date.time
if (length(time_sat) > 40) (time_sat <- time_sat[1:40])
mon_sat <- format(as.Date(time_sat), "%m")
year_sat <- format(as.Date(time_sat), "%Y")
day_sat <- format(as.Date(time_sat), "%d")
dummy <- which(mon_sat == mon_sat[1] & year_sat == year_sat[1])
mmdm_sat <- "d"
if (length(unique(day_sat[dummy])) == 1) {
mmdm_sat <- "m"
}
# extract data for chosen time step
if (mmdm == "m" & mmdm_sat == "m") {
match_time <- which(format(as.Date(a[, ti_n]), "%Y-%m") == format(as.Date(date.time[index_time]), "%Y-%m"), arr.ind = TRUE)
} else {
match_time <- which(a[, ti_n] == date.time[index_time], arr.ind = TRUE)
}
lon_station <- a[, lo_n][match_time]
lat_station <- a[, la_n][match_time]
data_station <- a[, da_n][match_time]
# delete NAs
dummy <- !is.na(data_station)
data_station <- data_station[dummy]
data_station <- data_station
lon_station <- lon_station[dummy]
lat_station <- lat_station[dummy]
# Extract corresponding data points
data_sat <- c(seq_along(data_station))
result_x <- c()
result_y <- c()
result_x <- rep(lon, length(lat))
for(j in seq_along(lat)){
result_y <- append(result_y, rep(lat[j], length(lon)))
}
A <- cbind(x=result_x, y=result_y)
for (istation in seq_along(data_station)) {
B <- cbind(x=c(lon_station[istation]), y=c(lat_station[istation]))
tree <- SearchTrees::createTree(A)
inds <- SearchTrees::knnLookup(tree, newdat=B, k=1)
lon_coor <- A[inds,1]
lat_coor <- A[inds,2]
data_sat[istation] <- data_nc[which(lon == lon_coor),which(lat == lat_coor), which(date.time == date.time[index_time])]
}
cd <- data.frame(data_sat, data_station, lon_station, lat_station)
}
labs <- NULL
for (i in seq_along(cd$lon_station)) {
dummy <- paste0("[", round(cd$lon_station[i], digits = 1), ";", round(cd$lat_station[i], digits = 1), "]")
labs <- append(labs, dummy)
}
row.names(cd) <- labs
for(i in 1:length(station_all_seq)){
value <- cd[station_all_seq[i],]
if(index_time==1){
list_data_station <- append(list_data_station, list(cd[station_all_seq[i],]))
row.names(list_data_station[[i]]) <- NULL
} else {
list_data_station[[i]] <- rbind(list_data_station[[i]],cd[station_all_seq[i],])
row.names(list_data_station[[i]]) <- NULL
}
}
}
rmse <- c() # RMSE (root mean squared error)
mae <- c() # MAE (mean absolute error)
bias <- c() # BIAS
correlation <- c() # Correlation over time
for(i in 1:length(station_all_seq)) {
diff_nc_station_data <- list_data_station[[i]]$data_sat - list_data_station[[i]]$data_station
rmse <- append(rmse, sqrt(mean((c(diff_nc_station_data))^2, na.rm = TRUE)))
mae <- append(mae, mean(abs(c(diff_nc_station_data)), na.rm = TRUE))
bias <- append(bias, sqrt( mean((c(diff_nc_station_data))^2, na.rm = TRUE) - stats::var(c(diff_nc_station_data), na.rm = TRUE)))
correlation <- append(correlation, stats::cor(list_data_station[[i]]$data_sat, list_data_station[[i]]$data_station, use = "na.or.complete"))
}
location <- station_all_seq
result_stats <- cbind(location, rmse, mae, bias, correlation)
result_stats_df <- as.data.frame(result_stats)
utils::write.csv(x = result_stats_df, file = outfile, row.names = FALSE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.