download data from DWD

Share:

Description

Get climate data from the German Weather Service (DWD) FTP-server. The desired .zip dataset is downloaded into dir, unpacked, read, processed and returned as a data.frame

Usage

1
2
3
4
dataDWD(file,
  base1 = "ftp://ftp-cdc.dwd.de/pub/CDC/observations_germany/climate",
  base2 = "hourly/precipitation/recent", dir = "DWDdata", browse = 0:2,
  meta = 0:2, read = TRUE, format = NA, quiet = FALSE, ...)

Arguments

file

Filename (must be available at the location given by base1 and base2)

base1

Main directory of DWD ftp server (can probably always be left unchanged)

base2

Characterstring with subdirectory. DEFAULT: "hourly/precipitation/recent"

dir

Writeable directory on your computer. Created if not existent. DEFAULT: "DWDdata" at your current getwd()

browse

Integer specifying whether and how to open repository via browseURL.
0 for regular file download.
1 to open base1.
2 to open base1/base2).
If base= 1 or 2, no dir is created and no download performed. DEFAULT: 0

meta

Integer specifying whether to get metadata instead of actual data.
0 for regular file.
1 for meta data of all stations (meta is automatically set to 1 if file ends in ".txt". Column widths for read.fwf are computed internally).
2 for a list of the available files (requires RCurl to be installed. If meta=2, file="" is possible, as it is ignored anyways).
DEFAULT: 0

read

Read the file with readDWD? If FALSE, only download is performed. DEFAULT: TRUE

format

Format used in strptime to convert date/time column, see readDWD. DEFAULT: NA

quiet

Suppress message about directory? DEFAULT: FALSE

...

Further arguments currently ignored

Value

data.frame of the desired dataset (returned by readDWD if meta=0), presuming downloading and processing were successfull. Alternatively, links that were opened if browse!=0.

Author(s)

Berry Boessenkool, berry-b@gmx.de, Jun 2016

See Also

readDWD, download.file, monthAxis, climateGraph

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
## Not run:  ## Not run in CRAN checks because of downloading, writing files, etc

# 1. Basic usage ------------------------------------------------------------

prec <- dataDWD(file="stundenwerte_RR_02787_akt.zip")
plot(prec$MESS_DATUM, prec$NIEDERSCHLAGSHOEHE, main="DWD hourly rain Kupferzell", col="blue",
     xaxt="n", las=1, type="l", xlab="Date", ylab="Hourly rainfall  [mm]")
monthAxis(1, ym=T)

prec2 <- dataDWD("stundenwerte_RR_03987_akt.zip") # writes into the same folder


# 2. find certain station ---------------------------------------------------
# Get long term climate records of a certain station (e.g. Kirchberg)

dataDWD("", browse=2, base2="monthly/kl/historical") # open link in browser
# metadata for all existing stations:
stats <- dataDWD("KL_Monatswerte_Beschreibung_Stationen.txt", base2="monthly/kl/historical")
str(stats)  # data.frame with 8 columns (4 int, 2 num, 2 factor), 1053 rows (July 2016)
stats[grep("kirchberg", stats$Stationsname, ignore.case=TRUE), ] 
# identify the station id you need (there may be multiple matches): 02575 


# List of actually available files (needs RCurl):
# install.packages("RCurl")
files <- dataDWD("", meta=2, base2="monthly/kl/historical")
#   files <- strsplit(files, "\n")[[1]]   # needed on linux
clim <- dataDWD(base2="monthly/kl/historical", file=files[grep("_02575_", files)])
# monthly averages/mins/maxs of: wind, clouds, rainfall, sunshine, temperature
head(clim)

# Map of all precipitation stations:
if(FALSE){ # pdf saving works only in berryFunctions source directory
pstats <- dataDWD("RR_Stundenwerte_Beschreibung_Stationen.txt", 
                  base2="hourly/precipitation/historical")
pfiles <- dataDWD("", meta=2, base2="hourly/precipitation/historical")
hasfile <- pstats$Stations_id %in% na.omit(as.numeric(substr(pfiles, 17, 21)))
library("OSMscale")
map <- pointsMap(geoBreite, geoLaenge, data=pstats, fx=0.28, fy=0.06)
pdf("inst/extdata/RainfallStationsMap.pdf")
plot(map)
scaleBar(map, x=0.05, y=0.03, abslen=200)
pp <- projectPoints(geoBreite, geoLaenge, data=pstats, to=posm())
points(pp[!hasfile,], col="red", pch=3)
points(pp[ hasfile,], col="blue", pch=3)
legend("bottomright", c("in matadata only", "file on FTP server"), 
       col=c("red", "blue"), pch=3, bg="white")
title(main="DWD stations: Rainfall data on ftp server", line=3)
dev.off()
}

# 3. Get data for several stations ------------------------------------------
# (do this at your own risk of getting kicked off the FTP)

files <- dataDWD("", meta=2)
#   files <- strsplit(files, "\n")[[1]]   # needed on linux
headtail(sort(files),6)
# Apply the function to several files, create a list of data.frames:
files <- files[grep(".zip", files, fixed=TRUE)]
prec <- lapply(files[1:2], function(f) {Sys.sleep(runif(1,0,5)); dataDWD(f)})
names(prec) <- substr(files[1:2], 14, 21)
str(prec, max.level=1)

# Real life example with data completeness check etc:
browseURL("http://github.com/brry/prectemp/blob/master/Code_example.R")


# 4. Test metadata part of function -----------------------------------------

files <- read.table(as.is=TRUE, text="
#ftp://ftp-cdc.dwd.de/pub/CDC/observations_germany/climate/
daily/kl/historical                KL_Tageswerte_Beschreibung_Stationen.txt
daily/kl/recent                    KL_Tageswerte_Beschreibung_Stationen.txt
daily/more_precip/historical       RR_Tageswerte_Beschreibung_Stationen.txt
daily/more_precip/recent           RR_Tageswerte_Beschreibung_Stationen.txt
daily/soil_temperature/historical  EB_Tageswerte_Beschreibung_Stationen.txt
daily/soil_temperature/recent      EB_Tageswerte_Beschreibung_Stationen.txt
daily/solar                        ST_Beschreibung_Stationen.txt
hourly/air_temperature/historical  TU_Stundenwerte_Beschreibung_Stationen.txt
hourly/air_temperature/recent      TU_Stundenwerte_Beschreibung_Stationen.txt
hourly/cloudiness/historical       N_Stundenwerte_Beschreibung_Stationen.txt
hourly/cloudiness/recent           N_Stundenwerte_Beschreibung_Stationen.txt
hourly/precipitation/historical    RR_Stundenwerte_Beschreibung_Stationen.txt
hourly/precipitation/recent        RR_Stundenwerte_Beschreibung_Stationen.txt
hourly/pressure/historical         P0_Stundenwerte_Beschreibung_Stationen.txt
hourly/pressure/recent             P0_Stundenwerte_Beschreibung_Stationen.txt
hourly/soil_temperature/historical EB_Stundenwerte_Beschreibung_Stationen.txt
hourly/soil_temperature/recent     EB_Stundenwerte_Beschreibung_Stationen.txt
hourly/solar                       ST_Beschreibung_Stationen.txt
hourly/sun/historical              SD_Stundenwerte_Beschreibung_Stationen.txt
hourly/sun/recent                  SD_Stundenwerte_Beschreibung_Stationen.txt
hourly/wind/historical             FF_Stundenwerte_Beschreibung_Stationen.txt
hourly/wind/recent                 FF_Stundenwerte_Beschreibung_Stationen.txt
monthly/kl/historical              KL_Monatswerte_Beschreibung_Stationen.txt
monthly/kl/recent                  KL_Monatswerte_Beschreibung_Stationen.txt
monthly/more_precip/historical     RR_Monatswerte_Beschreibung_Stationen.txt
monthly/more_precip/recent         RR_Monatswerte_Beschreibung_Stationen.txt")
i=1
meta <- dataDWD(file=files[i,2], base2=files[i,1])
colPoints(geoLaenge, geoBreite, Stations_id, data=meta, add=F, asp=1.5)
colPoints(geoLaenge, geoBreite, Stationshoehe, data=meta, add=F, asp=1.5)
meta$von_jahr <- meta$von_datum/1e4
meta$bis_jahr <- meta$bis_datum/1e4
meta$dauer <- meta$bis_jahr - meta$von_jahr
colPoints(geoLaenge, geoBreite, von_jahr, data=meta, add=F, asp=1.5)
colPoints(geoLaenge, geoBreite, bis_jahr, data=meta, add=F, asp=1.5)
colPoints(geoLaenge, geoBreite, dauer, data=meta, add=F, asp=1.5) 
hist(meta$bis_jahr, breaks=50, col="purple")
hist(meta$dauer, breaks=50, col="purple")
sum(meta$dauer>50); mean(meta$dauer>50) 
# 356 (32.7%) stations with more than 50 years of data (according to metadata)

## End(Not run)

Want to suggest features or report bugs for rdrr.io? Use the GitHub issue tracker.