# DWD Daten mit R runterladen, Wetter und Klimadaten in R
# Deutscher Wetterdienst R Daten download Klimastationen
# Weather Data Germany download with R, Climate Data Germany
#
#' @title Download data from the DWD CDC FTP Server
#' @description Get climate data from the German Weather Service (DWD) FTP-server.
#' The desired dataset is downloaded into `dir`.
#' If `read=TRUE`, it is also read and processed.\cr
#' `dataDWD` handles vectors of URLs,
#' displays progress bars (if the package `pbapply` is available)
#' and by default does not re-download data already in `dir`
#' (but see argument `force` to update files).\cr
#' To solve "errors in download.file: cannot open URL", see
#' <https://bookdown.org/brry/rdwd/fileindex.html>.\cr
#'
#' @return Presuming downloading and processing were successful:
#' if `read=TRUE`, the desired dataset
#' (as returned by [readDWD()]),
#' otherwise the filename as saved on disc
#' (may have "_n" appended in name, see [berryFunctions::newFilename()]).\cr
#' If length(file)>1, the output is a list of outputs / vector of filenames.\cr
#' The output is always invisible.
#' @author Berry Boessenkool, \email{berry-b@@gmx.de}, Jun-Oct 2016
#' @seealso [selectDWD()]. [readDWD()], [download.file()].\cr
#' <https://bookdown.org/brry/rdwd>\cr
#' Helpful for plotting: [berryFunctions::monthAxis()],
#' see also [berryFunctions::climateGraph()]
#' @keywords data file
#' @importFrom utils tail download.file browseURL
#' @importFrom berryFunctions newFilename owa tmessage twarning tstop truncMessage
#' @importFrom pbapply pblapply
#' @importFrom stats runif
#' @export
#' @examples
#' \dontrun{ ## requires internet connection
#' # find FTP files for a given station name and file path:
#' link <- selectDWD("Fuerstenzell", res="hourly", var="wind", per="recent")
#' # download file:
#' fname <- dataDWD(link, dir=locdir(), read=FALSE) ; fname
#' # dir="DWDdata" is the default directory to store files
#' # unless force=TRUE, already obtained files will not be downloaded again
#'
#' # read and plot file:
#' wind <- readDWD(fname, varnames=TRUE) ; head(wind)
#' metafiles <- readMeta(fname) ; str(metafiles, max.level=1)
#' column_names <- readVars(fname) ; head(column_names)
#'
#' plot(wind$MESS_DATUM, wind$F, main="DWD hourly wind Fuerstenzell", col="blue",
#' xaxt="n", las=1, type="l", xlab="Date", ylab="Hourly Wind speed [m/s]")
#' berryFunctions::monthAxis(1)
#'
#'
#' # current and historical files, keep historical in the overlap time period:
#' link <- selectDWD("Potsdam", res="daily", var="kl", per="hr"); link
#' potsdam <- dataDWD(link, dir=locdir(), hr=4)
#' plot(TMK~MESS_DATUM, data=tail(potsdam,1500), type="l")
#'
#'
#' # With many files (>>50), use sleep to avoid getting kicked off the FTP server
#' #links <- selectDWD(res="daily", var="solar")
#' #sol <- dataDWD(links, sleep=20) # random waiting time after download (0 to 20 secs)
#'
#' # Real life examples can be found in the use cases section of the vignette:
#' # browseURL("https://bookdown.org/brry/rdwd")
#' }
#'
#' @param url Char (vector): complete file URL(s) (including base and filename.zip)
#' as returned by [selectDWD()]. Can be a vector with several FTP URLs.
#' @param base Single char: base URL that will be removed from output file names.
#' DEFAULT: [`dwdbase`]
#' @param joinbf Logical: paste `base` and `file url` together?
#' Needed mostly for data at [`gridbase`].
#' DEFAULT: FALSE (selectDWD returns complete URLs already)
#' @param dir Char: Writeable directory name where to save the downloaded file.
#' Created if not existent. DEFAULT: [locdir()]
#' @param force Logical (vector): always download, even if the file already exists in `dir`?
#' Use NA to force re-downloading files older than 24 hours.
#' Use a numerical value to force after that amount of hours.
#' Use something like `c(Inf, 24)` or `force=c(24*365, 6)`, for per="hr".
#' Note: if `force` is not FALSE, the `overwrite` default is TRUE.
#' DEFAULT: FALSE
#' @param overwrite Logical (vector): if force=TRUE, overwrite the existing file
#' rather than append "_1"/"_2" etc to the filename?
#' DEFAULT: `!isFALSE(force)`, i.e. true when `force` is specified.
#' @param read Logical: read the file(s) with [readDWD()]? If FALSE,
#' only download is performed and the filename(s) returned. DEFAULT: TRUE
#' @param dbin Logical: Download binary file, i.e. add `mode="wb"` to the
#' [download.file()] call?
#' See [Website](https://bookdown.org/brry/rdwd/raster-data.html#binary-file-errors)
#' for details.
#' DEFAULT: TRUE
#' @param method [download.file] `method`. Introduced in version 1.5.25 (2022-05-12)
#' as triggered by <https://github.com/brry/rdwd/issues/34>.
#' DEFAULT: `getOption("download.file.method")`
#' @param dfargs Named list of additional arguments passed to [download.file()]
#' Note that mode="wb" is already passed if `dbin=TRUE`
#' @param sleep Number. If not 0, a random number of seconds between 0 and
#' `sleep` is passed to [Sys.sleep()] after each download
#' to avoid getting kicked off the FTP-Server,
#' see note in [indexFTP()]. DEFAULT: 0
#' @param progbar Logical: present a progress bar with estimated remaining time?
#' If missing and length(file)==1, progbar is internally set to FALSE.
#' Only works if the R package `pbapply` is available. DEFAULT: TRUE (!quiet)
#' @param browse Logical: open repository via [browseURL()] and
#' return URL folder path? If TRUE, no data is downloaded.
#' If file has several values, only unique folders will be opened.
#' DEFAULT: FALSE
#' @param ntrunc Single integer: number of filenames printed in messages
#' before they get truncated with message "(and xx more)". DEFAULT: 2
#' @param file Deprecated since rdwd version 1.3.34, 2020-07-28.
#' @param quiet Logical: suppress message about directory / filenames?
#' DEFAULT: FALSE through [rdwdquiet()]
#' @param \dots Further arguments passed to [readDWD()],
#' like `fread`, `varnames`, `hr`, etc.
#
dataDWD <- function(
url,
base=dwdbase,
joinbf=FALSE,
dir=locdir(),
force=FALSE,
overwrite=!isFALSE(force),
read=TRUE,
dbin=TRUE,
method=getOption("download.file.method"),
dfargs=NULL,
sleep=0,
progbar=!quiet,
browse=FALSE,
ntrunc=2,
file=NULL,
quiet=rdwdquiet(),
...
)
{
if(!is.null(file)) tstop("The argument 'file' has been renamed to 'url' with rdwd version 1.3.34, 2020-07-28")
if(!is.atomic(url)) tstop("url must be a vector, not a ", class(url))
if(!is.character(url)) tstop("url must be char, not ", class(url))
base <- sub("/$","",base) # remove accidental trailing slash
url <- sub("^/","",url) # remove accidental leading slash
if(joinbf) url <- paste0(base,"/",url)
if(missing(progbar) & length(url)==1) progbar <- FALSE
if(any(url==""))
{
tmessage("Removing ", sum(url==""), " empty element(s) from url vector.")
url <- url[url!=""]
}
if(length(url)<1) tstop("The vector of urls to be downloaded is empty.")
# be safe from accidental vector input:
dir <- dir[1]
progbar <- progbar[1]
sleep <- sleep[1]
quiet <- quiet[1]
read <- read[1]
browse <- browse[1]
#
# open URL path(s) in internet browser:
if(browse)
{
folders <- unique(dirname(url))
sapply(folders, browseURL)
return(folders)
}
# create directory to store downloaded data
owd <- dirDWD(dir, quiet=quiet)
on.exit(setwd(owd))
# output file name(s)
hbas <- sub("^ftp://","https://", base) # for https base
outfile <- gsub(paste0(base,"/"), "", url)
outfile <- gsub(paste0(hbas,"/"), "", outfile)
outfile <- gsub("/", "_", outfile)
# force=NA management
if(is.null(force)) tstop("'force' cannot be NULL. Must be TRUE, FALSE, NA or a number.")
force <- rep(force, length=length(outfile)) # recycle vector
fT <- sapply(force, isTRUE)
fF <- sapply(force, isFALSE)
if(any(fT)) force[fT] <- 0
if(any(fF)) force[fF] <- Inf
force[is.na(force)] <- 24
force <- difftime(Sys.time(), file.mtime(outfile), units="h") > force
dontdownload <- file.exists(outfile) & !force
if( any(dontdownload) & !quiet )
{
tmessage(sum(dontdownload), " file", if(sum(dontdownload)>1)"s",
" already existing and not downloaded again: ",
berryFunctions::truncMessage(outfile[dontdownload], ntrunc=ntrunc, prefix=""),
"\nNow downloading ",sum(!dontdownload)," files...")
}
outfile <- newFilename(outfile, quiet=quiet, ignore=dontdownload,
overwrite=overwrite, ntrunc=ntrunc, tellignore=FALSE)
# since berryFunctions 1.15.9 (2017-06-14), outfile is now an absolute path
# Optional progress bar:
if(progbar) lapply <- pbapply::pblapply
# ------------------------------------------------------------------------------
# loop over each filename
dl_results <- lapply(seq_along(url), function(i)
if(!dontdownload[i])
{
# Actual file download:
dfdefaults <- list(url=url[i], destfile=outfile[i], method=method, quiet=TRUE)
if(dbin) dfdefaults <- c(dfdefaults, mode="wb")
e <- try(suppressWarnings(do.call(download.file,
berryFunctions::owa(dfdefaults, dfargs))), silent=TRUE)
# wait some time to avoid FTP bot recognition:
if(sleep!=0) Sys.sleep(runif(n=1, min=0, max=sleep))
return(e)
})
# check for download errors:
iserror <- sapply(dl_results, inherits, "try-error")
if(any(iserror))
{
ne <- sum(iserror)
msg <- paste0(ne, " Download", if(ne>1) "s have" else " has",
" failed (out of ",length(iserror),").",
if(read)" Setting read=FALSE.")
read <- FALSE
msg <- paste0(msg, " download.file error",if(ne>1) "s",":\n")
msg2 <- sapply(dl_results[iserror], function(e)attr(e,"condition")$message)
msg2 <- berryFunctions::truncMessage(msg2, ntrunc=15, prefix="", midfix="", altnix="", sep="\n")
if(any(!substr(url[iserror], 1, 4) %in% c("ftp:","http")))
msg2 <- paste0(msg2, "\n- dataDWD needs urls starting with 'ftp://' or 'https://'. ",
"You can use joinbf=TRUE for relative links.")
if(grepl("cannot open URL", msg2) || grepl("Kann URL .* nicht", msg2))
msg2 <- paste0(msg2, "\n- If files have been renamed on the DWD server, ",
"see https://bookdown.org/brry/rdwd/fileindex.html")
msg <- paste0(msg, msg2)
warning(msg, call.=FALSE)
}
# ------------------------------------------------------------------------------
# Output: Read the file or outfile name:
output <- outfile
if(read) output <- readDWD(file=outfile, quiet=quiet, progbar=progbar, ...)
# output:
return(invisible(output))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.