#' get_DWDdata
#'
#' This is a function to download DWD data based on a buffer distance.
#' The meteorological variables are provide in data.frame per station id.
#' @param lat_center latitude central point for the buffer
#' @param lon_center longitude central point for the buffer
#' @param radius_km for the buffer
#' @param time_lag default "hourly" , it can be change for daily or other timestamp available
#' @param period default "historical", if change to "recent" you get one year lag up to "now".
#' @param start_date select stations with date later than
#' @param end_date select stations with date earlier than
#' @param meteo_var type of meteorological data target in the DWD ftp (see option below)
#' @param var_name variable name target in the the downloaded meteo_var file (see option below)
#' @param by_lag to create a timestamp, default "hour",
#' @return a list with: 1- a data.frame with the selected variable per station id for the set timestamp, and
#' 2- a data.frame with metadata information, such as stations_id, start_date, end_date,
#' station_height, latitude, longitude, stations_name, region, time_lag, variable, period, file, distance, url.
#' data_dir where to save the downlaod data, default temporary. dir.create(file.path(tempdir(), "DWDdir"), showWarnings = FALSE)
#' @examples
#' ##### meteo_var options
#' # "precipitation" "air_temperature" "extreme_temperature" "extreme_wind"
#' # "solar" "wind" "wind_test" "kl"
#' # "more_precip" "weather_phenomena" "soil_temperature" "water_equiv"
#' # "cloud_type" "cloudiness" "dew_point" "moisture"
#' # "pressure" "sun" "visibility" "wind_synop"
#' # "soil" "standard_format"
#'
#' ##### var_name options
#' ### meteo_var = air_temperature
#' # var_name = TT_TU, air temperature at 2m height (Ta)
#' # var_name = RF_TU, relative humidity at 2m height (RH)
#' # meteo_var = "precipitation"
#' # var_name = R1, mm of precipitation (prec_mm)
#' # var_name = RS_IND, occurrence of precipitation, 0 no precipitation / 1 precipitation fell (prec_h)
#' ### meteo_var = "pressure"
#' # var_name = P0 # Pressure at station height (2m)
#' # var_name = P # Pressure at see level
#' ### meteo_var = "wind_synop"
#' # var_name = FF # Average wind speed (ws)
#' # var_name = DD # wind direction (wd)
#' ### meteo_var = "moisture" (atm)
#' # var_name = P_STD Hourly air pressure [hpa]
#' # var_name = RF_STD Hourly values of relative humidity [%]
#' # var_name = TD_STD Dew point temperature at 2m height [°C]
#' # var_name = TF_STD Calculated hourly values of the wet temperature [°C]
#' # var_name = TT_STD Air temperature at 2m height [°C]
#' # var_name = VP_STD calculated hourly values of the vapour pressure [hpa]
#' ### meteo_var = "sun"
#' # var_name = SD_SO # sunshine duration - minutes
#' ### "soil_temperature"
#' # soil.temp.2cm
#' # soil.temp.5cm
#' # soil.temp.10cm
#' # soil.temp.20cm
#' # soil.temp.50cm
#' # soil.temp.100cm
#' ### "visibility"
#' # V_VV # Visibility in meter
#' # V_VV_I # from the observer
#' ### "dew_point"
#' # TT # dry bulb temperature at 2 meter above ground
#' # TD # dew point temperature at 2 meter above ground
#' ### "cloudiness"
#' # V_N # Total coverage - eighth levels # same as cloudness
#'
#' #########
#' Air_temp <- get_DWDdata(lat_center = 52.4537,
#' lon_center = 13.3017,
#' radius_km = 70,
#' time_lag = "hourly",
#' period = "historical",
#' meteo_var = "air_temperature",
#' start_date = "2019-01-01",
#' end_date = "2020-12-31",
#' var_name = "TT_TU");
#'
#' Air_temp[[1]];
#' summary(Air_temp[[1]]);
#'
#' #########
#' solar_radiation <- get_Solardata(lat_center = 52.4537,
#' lon_center = 13.3017,
#' radius_km = 70,
#' time_lag = "hourly",
#' meteo_var = "solar",
#' start_date = "2018-12-31",
#' end_date = "2021-01-01");
#'
#' solar_radiation[[1]][[1]];
#' summary(solar_radiation[[1]][[1]]);
#'
#' library(tidyverse)
#'
#' solar_radiation[[1]][[1]] %>%
#' mutate(Rli = zoo::na.approx(round((ATMO_LBERG*100*100)/(60*60),2)),
#' diffuse_radiation = zoo::na.approx(round((FD_LBERG*100*100)/(60*60),2)),
#' Rin = zoo::na.approx(round((FG_LBERG*100*100)/(60*60),2)),
#' sun_duration = zoo::na.approx(SD_LBERG),
#' sun_zenith_angle = zoo::na.approx(ZENIT),
#' # timestamp = MESS_DATUM_WOZ,
#' timestamp = with_tz(force_tz(MESS_DATUM_WOZ, "CEST"), tz = "UTC")
#' ) %>%
#' select(timestamp, Rin, Rli, diffuse_radiation, sun_duration, sun_zenith_angle, MESS_DATUM) -> solar_radiation_3987
#'
#' ##########
#' SMC_daily <- get_SMCdata(lat_center = 52.4537,
#' lon_center = 13.3017,
#' radius_km = 70,
#' time_lag = "daily",
#' meteo_var = "soil",
#' start_date = "2019-01-01",
#' end_date = "2020-12-31");
#'
#' SMC_daily[[1]][[1]];
#' summary(SMC_daily[[1]][[1]]);
#'
#' @export
get_DWDdata <- function(
lat_center,
lon_center,
radius_km,
time_lag = "hourly",
period,
meteo_var,
start_date,
end_date,
var_name,
by_lag = "hour"
){
###### # stations
stations_loc <- rdwd::nearbyStations(lat = lat_center,
lon = lon_center,
radius = radius_km,
res = time_lag,
per = period,
var = meteo_var,
mindate = as.Date(start_date))
stations_loc <- stations_loc[-1,]
links_data <- rdwd::selectDWD(stations_loc$Stationsname,
res = time_lag,
per = period,
var = meteo_var)
# download file:
data_name <- rdwd::dataDWD(links_data, dir = "DWDdata", read = FALSE)
# read and plot file:
data_set <- rdwd::readDWD(sub(paste0(getwd(),"/"), "", data_name), varnames = FALSE, tz = "UTC") #, format = NULL
delete_staions <- as.vector(stats::na.omit(sapply(1:length(data_set), function(i)
ifelse((as.Date(utils::tail(data_set[[i]]$MESS_DATUM, n = 1)) >= as.Date(end_date)) == FALSE |
(as.Date(utils::head(data_set[[i]]$MESS_DATUM, n = 1)) <= as.Date(start_date)) == FALSE,
i, NA))))
data_set <- data_set[-c(delete_staions)]
stations_loc <- stations_loc[-c(delete_staions),]
delete_staions2 <- as.vector(stats::na.omit(sapply(1:length(data_set), function(i)
ifelse(is.na(utils::head(dplyr::filter(data_set[[i]], lubridate::year(MESS_DATUM) >= lubridate::year(start_date))[var_name], n = 1)) == TRUE |
is.na(utils::tail(dplyr::filter(data_set[[i]], lubridate::year(MESS_DATUM) >= lubridate::year(start_date))[var_name], n = 1)) == TRUE,
i, NA)
)))
# ### if dif than NA
if (class(delete_staions2) == "integer"){
data_set <- data_set[-c(delete_staions2)]
}
if (class(delete_staions2) == "integer"){
stations_loc <- stations_loc[-c(delete_staions2),]
}
# #####
ts <- seq(as.POSIXct(as.Date(start_date), tz = "UTC"), as.POSIXct(as.Date(end_date)+1, tz = "UTC"),
by = by_lag) #"30 min"
ts <- lubridate::force_tz(ts, tz = "UTC")
ts <- data.frame("MESS_DATUM" = ts[24:(length(ts)-2)])
data_set_period_NA <- sapply(1:length(data_set), function(i) dplyr::left_join(ts, data_set[[i]],
by = "MESS_DATUM")[var_name])
data_set_period <- data.frame("MESS_DATUM" = ts)
# the sun duration exclude night time
if (meteo_var == "sun") {
for (i in 1:length(data_set_period_NA)) {
ifelse(is.na(tail(data_set_period_NA[[i]])[3])==TRUE,
data_set_period_NA[[i]][length(data_set_period_NA[[i]])-3] <- round(mean(sapply(data_set_period_NA,tail,4)[1,], na.rm = TRUE),2),
NA)
}
for (i in 1:length(data_set_period_NA)) {
ifelse(is.na(head(data_set_period_NA[[i]])[4])==TRUE,
data_set_period_NA[[i]][4] <- round(mean(sapply(data_set_period_NA,head,4)[4,], na.rm = TRUE),2),
NA)
}
for (i in 1:length(data_set_period_NA)) {
data_set_period[i] <- c(c(0,0,0), zoo::na.approx(data_set_period_NA[[i]]), c(0,0,0))
}
} else {
# otherwise test if the first or last value is NA
for (i in 1:length(data_set_period_NA)) {
ifelse(is.na(tail(data_set_period_NA[[i]],1))==TRUE,
data_set_period_NA[[i]][length(data_set_period_NA[[i]])] <- round(mean(sapply(data_set_period_NA,tail,1), na.rm = TRUE),2),
NA)
}
for (i in 1:length(data_set_period_NA)) {
ifelse(is.na(head(data_set_period_NA[[i]],1))==TRUE,
data_set_period_NA[[i]][1] <- round(mean(sapply(data_set_period_NA,head,1), na.rm = TRUE),2),
NA)
}
for (i in 1:length(data_set_period_NA)) {
data_set_period[i] <- c(as.numeric(zoo::na.approx(data_set_period_NA[[i]])))
}
}
colnames(data_set_period) <- sapply(1:length(data_set_period), function(i) paste0("ID_", data_set[[i]][1,1]))
dataset <- data.frame("timestamp" = ts, data_set_period)
names(stations_loc) <- c("stations_id", "start_date", "end_date", "station_height", "latitude", "longitude",
"stations_name", "region", "time_lag", "variable", "period", "file", "distance", "url")
data_list <- list(dataset, stations_loc)
return(data_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.