## EU-Circle.
## This script reads the rain gauge data from the MIDAS data base (UK Met Office)
## downloaded from CEDA.
## Metadata in http://browse.ceda.ac.uk/browse/badc/ukmo-midas/metadata/CPAS/CPAS.DATA
## Data in: http://browse.ceda.ac.uk/browse/badc/ukmo-midas/data/RD/yearly_files
## File description in: http://badc.nerc.ac.uk/data/ukmo-midas/RD_Table.html
## All stations are stored in single ASCII files for each year (not an efficient way).
## The data is not open, but an application must be sent to CEDA. This script has been
## made to read downloaded ASCII files with data.
## Rasmus.Benestad@met.no, 2016-01-29, Meteorologisk institutt, Oslo, Norway
require(esd)
dealwithduplicates <- function(precip,t1,action="remove",verbose=FALSE) {
print('dealwithduplicates')
idup <- is.element(t1,t1[duplicated(t1)])
if (verbose) {str(precip); str(t1); str(idup)
print('duplicated dates'); print(t1[idup])
print('duplicated precipitation'); print(precip[idup])}
if (action=="remove") x <- precip[!duplicated(t1)]
if (action=="NA") {
precip[idup] <- NA
x <- precip[!duplicated(t1)]
}
if (action=="sum") {
for (it in t1[duplicated(t1)]) {
iii <- is.element(t1,it)
precip[iii] <- sum(precip[iii])
}
x <- precip[!duplicated(t1)]
}
return(x)
}
## Function that removes some extra commas in files with different number of commas
fixfile <- function(fname) {
x <- readLines(con=fname)
gregexpr(',',x) -> z
nc <- unlist(lapply(z,length))
print(table(nc))
icrm <- nc == max(nc)
x[icrm] <- substr(x[icrm],1,unlist(lapply(z,max))-1)
writeLines(x,con=fname)
}
station.midas <- function(stid=NULL,loc=NULL,lon=c(-7,5),lat=c(48,62),alt=NULL,county=NULL,
cntr=NULL,it=NULL,nmin=30,
path='data/midas/',pattern='midas_raindrnl',metaid='CPAS.DATA',verbose=TRUE,plot=TRUE) {
metacolnames <- c('SRC_ID','SRC_NAME','ID_TYPE','ID','MET_DOMAIN_NAME','SRC_CAP_BGN_DATE','SRC_CAP_END_DATE',
'PRIME_CAPABILITY_FLAG','RCPT_METHOD_NAME','DB_SEGMENT_NAME','DATA_RETENTION_PERIOD',
'LOC_GEOG_AREA_ID','POST_CODE','WMO_REGION_CODE','HIGH_PRCN_LAT','HIGH_PRCN_LON',
'GRID_REF_TYPE','EAST_GRID_REF','NORTH_GRID_REF','ELEVATION','HYDR_AREA_ID',
'DRAINAGE_STREAM_ID','ZONE_TIME','SRC_BGN_DATE','SRC_END_DATE')
## Reading the meta-data is tricky because of inconsistent use of commas.
## Error in scan(file, what, nmax, sep, dec, quote, skip, nlines, na.strings, :
## line 79261 did not have 25 elements
if (verbose) print('station.midas')
ref <- paste('Met Office (2012): Met Office Integrated Data Archive System (MIDAS) Land and Marine Surface Stations Data (1853-current). ',
'NCAS British Atmospheric Data Centre, date of citation. http://catalogue.ceda.ac.uk/uuid/220a65615218d5c9cc9e4785a3234bd0')
meta.file <- list.files(path=path,pattern=metaid,full.names=TRUE)
metatest <- readLines(meta.file,n=1)
metawidths=gregexpr(',',metatest)[[1]]
metawidths <- c(metawidths[1],diff(metawidths))
metawidths <- c(metawidths,nchar(metatest) - sum(metawidths))
metadata <- read.fwf(meta.file,widths=metawidths,col.names=metacolnames,nrows=79000)
stids <- as.character(metadata$ID); stids <- as.integer(substr(stids,1,nchar(stids)-1))
locs <- as.character(metadata$SRC_NAME); locs <- substr(locs,1,nchar(locs)-1); locs <- gsub(" ","",locs)
alts <- as.character(metadata$ELEVATION); alts <- as.numeric(substr(alts,1,nchar(alts)-1))
lats <- as.character(metadata$HIGH_PRCN_LAT); lats <- as.numeric(substr(lats,1,nchar(lats)-1))
lons <- as.character(metadata$HIGH_PRCN_LON); lons <- as.numeric(substr(lons,1,nchar(lons)-1))
cntrs <- as.character(metadata$LOC_GEOG_AREA_ID); cntrs <- substr(cntr,1,nchar(cntrs)-1); cntrs <- gsub(" ","",cntrs)
t1 <- as.character(metadata$SRC_CAP_BGN_DATE); t1 <- substr(t1,1,nchar(t1)-1); t1 <- as.Date(t1)
t2 <- as.character(metadata$SRC_CAP_END_DATE); t2 <- substr(t2,1,nchar(t2)-1); t2 <- as.Date(t2)
nyrs <- year(t2) - year(t1) + 1
ii <- is.finite(stids)
if (!is.null(stid)) i1 <- is.element(stids,stid) else i1 <- ii
if (!is.null(loc)) i2 <- is.element(substr(toupper(locs),1,nchar(loc)),toupper(loc)) else i2 <- ii
if (!is.null(alt)) {
if (alt >= 0) i3 <- (alts >= alt) else i3 <- (alts < alt)
} else i3 <- ii
if (!is.null(lat)) i4 <- (lats <= max(lat)) & (lats >= min(lat))
if (!is.null(lon)) i5 <- (lons <= max(lon)) & (lons >= min(lon))
if (!is.null(cntr)) i6 <- is.element(substr(toupper(cntrs),1,nchar(cntr)),toupper(cntr)) else i6 <- ii
if (!is.null(nmin)) i7 <-(nmin >= nyrs) else i7 <- ii
if (!is.null(it)) i8 <- ( (year(t1) <= min(it)) & (year(t2) >= max(it)) ) else i8 <- ii
if (verbose) print(paste('#maching ID=',sum(i1),' #maching loc=',sum(i2),
' #matching alt=',sum(i3),' #machning lat=',sum(i4),
' #matchin lon=',sum(i5),' #matching coyntry=',sum(i6),
' #matching length=',sum(i7),' #matching interval=',sum(i8)))
ii <- i1 & i2 & i3 & i4 & i5 & i6 & i7 & i8
stids <- stids[ii]
locs <- locs[ii]
lats <- lats[ii]
lons <- lons[ii]
alts <- alts[ii]
cntrs <- cntrs[ii]
t1 <- t1[ii]
t2 <- t2[ii]
nyrs <- nyrs[ii]
## Remove duplicated stations
ind <- !duplicated(stids)
stids <- stids[ind]; locs <- locs[ind]
lats <- lats[ind]; lons <- lons[ind]
alts <- alts[ind]; cntrs <- cntrs[ind]
t1 <- t1[ind]; t2 <- t2[ind]; nyrs=nyrs[ind]
if ( (plot) & (sum(ii)>0) ) {
nt <- nyrs; nt[nt > 50] <- 50
z <- abs(alts/max(alts,na.rm=TRUE))
z[!is.finite(z)] <- 0
colt <- rgb(1-z,1,1-z,nt/50)
plot(lons,lats,cex=0.5,col='grey')
points(lons,lats,col=colt,pch=19,cex=0.5)
data(geoborders)
lines(geoborders)
}
if (sum(ii)==0) { print('station.midas: could not find any station'); return(NULL) }
## Get the station values
colnames <- c('ID','ID_TYPE','OB_DATE','VERSION_NUM','MET_DOMAIN_NAME','OB_END_CTIME','OB_DAY_CNT',
'SRC_ID','REC_ST_IND','PRCP_AMT','OB_DAY_CNT_Q','PRCP_AMT_Q','METO_STMP_TIME',
'MIDAS_STMP_ETIME','PRCP_AMT_J')
if (verbose) print(paste('read the data:',sum(ii),'stations'))
data.files <- list.files(path=path,pattern=pattern,full.names=TRUE)
# data.files <- data.files[(length(data.files)-1):length(data.files)] # test
data.files <- data.files[1:(length(data.files)-2)] # fudge
Y <- list() ## Set up a list object for storing the results temporarily
for (i in 1:length(data.files)) {
if (verbose) print(paste(i,length(data.files),data.files[i]))
x <- try(read.table(data.files[i],sep=',',
col.names=colnames)) # Use try - some files contain inconsistencies leading to errors
if (inherits(x,"try-error")) print('failed') else {
im <- is.element(x$ID,stids) # select the stations according to given criterea
# im gives both stations and different times
i0 <- is.element(stids,x$ID) # double check to pick only metadata for the selected stations
s <- as.integer(rownames(table(x$ID[im]))); ns <- length(s) ## list of stations selected
t <- as.POSIXlt(as.character(x$OB_DATE[im])) # time index for the selected stations
X <- x$PRCP_AMT[im] # X contains all the selected stations for all available times
time <- t[!duplicated(t)]; nt <- length(time) ## common time index for all selected stations in this file
if (verbose) {
print(paste('Number of stations=',sum(im),sum(i0),'total=',length(rownames(table(x$ID)))))
print(range(time)); print(length(time))
}
y <- matrix(rep(NA,ns*nt),ns,nt) # Set up a matrix for a zoo object for the stations data
for (i in 1:ns) { # Loop over all the single stations
if (verbose) print(paste('station ID',s[i],' location=',locs[i0][i]))
if (plot) points(lons[i0][i],lats[i0][i])
iii <- is.element(x$ID[im],s[i]) & # Point to all the times for one respective station
(x$OB_DAY_CNT==1)
precip <- X[iii] # Extract precipitation for single station
info <- paste(x$ID_TYPE[iii][1],x$VERSION_NUM[iii][1],x$MET_DOMAIN_NAME[iii][1],
x$SRC_ID[iii][1],x$REC_ST_IND[iii][1])
t1 <- t[iii] # Time index for single station
i1 <- is.element(time,t1) # Synchonise the times for the single station with that of the station group
i2 <- is.element(t1,time)
if (verbose) print(c(sum(i1),sum(iii),length(t1),length(precip)))
## Check consistency in the data: data matched with time stamps?
if (length(precip)!=length(t1)) {
print('------- Some irregularities were detected ---------')
print(paste('length(precip)=',length(precip),'!=','length(t1)',length(t1)))
browser()
}
## Quality check!
ok <- TRUE
if ( (sum(duplicated(t1))>0) | (sum(iii) != sum(i1)) ) { # Check for duplicated times for single station
print('------- Some errors were detected ---------')
if (sum(duplicated(t1))>0) { # See if it looks OK the duplicated data are removed
print('Duplicated times!')
#browser()
precip <- dealwithduplicates(precip,t1,action="remove",verbose=TRUE)
tnd <- t1[!duplicated(t1)]
str(precip)
i1 <- is.element(time,tnd); iii <- is.finite(precip)
i2 <- is.element(tnd,time)
print(paste(sum(iii),'valid rain gauge data points'))
}
if (sum(iii) != sum(i1)) {
print(paste('Still, not matching number of elements!',sum(iii),sum(i1)))
print(table(x$OB_DAY_CNT[iii]))
ok <- FALSE
#browser()
}
}
if (ok & (sum(i1)==sum(i2))) y[i,i1] <- precip[i2] else {
print(paste('Something went wrong! sum(i1)=',sum(i1),'sum(i2)=',sum(i2)))
#browser()
}
print(paste(year(time)[1],sum(i1),'stations'))
}
}
## Create a time seies object
y <- zoo(t(y),order.by=as.Date(time))
if (sum(i0)!=dim(y)[2]) {
print(paste('Something wrong? sum(i0)=',sum(i0),'!= dim(y)[2]=',dim(y)[2]))
browser()
}
y <- as.station(y,param='precip',unit='mm/day',
loc=locs[i0],lon=lons[i0],lat=lats[i0],alt=alts[i0],stid=stids[i0],
src='MIDAS (UK MetOffice)',ref=ref,info=info,
url='http://browse.ceda.ac.uk/browse/badc/ukmo-midas/data/RD/yearly_files')
nok <- apply(coredata(y),2,'nv')
y <- subset(y,is=(nok>=300))
eval(parse(text=paste('Y$x.',year(time)[1],' <- y',sep='')))
}
invisible(Y)
}
#y <- station.midas(lon=c(-6,-2),lat=c(50,51.5))
#y <- station.midas(lon=c(-7,0),lat=c(49,52))
y <- station.midas(lon=c(-7,-1),lat=c(49,51.75))
## Combine all the list elements representing individual years into one zoo-object
## prepare the metadata
locations <- as.character(unlist(lapply(y,loc)))
lons <- as.numeric(unlist(lapply(y,lon)))
lats <- as.numeric(unlist(lapply(y,lat)))
alts <- as.numeric(unlist(lapply(y,alt)))
stationIDs <- as.integer(unlist(lapply(y,stid)))
locations <- locations[!duplicated(locations)]
lons <- lons[!duplicated(lons)]
lats <- lats[!duplicated(lats)]
alts <- alts[!duplicated(alts)]
stationIDs <- stationIDs[!duplicated(stationIDs)]
stations <- table(unlist(lapply(y,stid)))
n <- as.numeric(stations)
stids <- as.integer(rownames(stations)[n > 30])
iii <- is.element(stationIDs,stids)
lons <- lons[iii]; lats <- lats[iii]; alts=alts[iii]
locations <- locations[iii]; stationIDs <- stationIDs[iii]
for (i in 1:length(y)) {
print(names(y)[i])
z <- y[[i]]
im <- is.element(stid(z),stids)
mi <- !is.element(stids,stid(z))
iM <- is.element(stationIDs,stids[mi])
nm <- length(stids) - sum(im)
## Quality check:
if (sum(iM)!=nm) {
print(paste('Something wrong? sum(iM)=',sum(iM),'!= nm=',nm))
browser()
}
## Extract the stations with valid data:
x <- subset(z,is=im)
if (is.null(dim(x))) {
str(x)
print(paste("Problem with data: sum(im)",sum(im)))
browser()
} else {
attr(x,'variable') <- rep('precip',dim(x)[2])
attr(x,'unit') <- rep('mm/day',dim(x)[2])
attr(x,'country') <- rep('England',dim(x)[2])
attr(x,'longname') <- rep('24hr_precipitation',dim(x)[2])
## make a station objects with the remaining data containng NAs.
if (nm > 0) {
xm <- zoo(matrix(rep(NA,dim(x)[1]*nm),dim(x)[1],nm),order.by=index(x))
xm <- as.station(xm,rep(param='precip',sum(iM)),unit=rep('mm/day',sum(iM)),
stid=stationIDs[iM],
loc=locations[iM],lon=lons[iM],lat=lats[iM],alt=alts[iM],
longname=rep(attr(x,'longname')[1],sum(iM)),
cntr=rep(cntr(x)[1],sum(iM)))
x <- combine.stations(x,xm)
}
x <- sort(x)
print(paste(i,'sum(im)=',sum(im),'nm=',nm,'nm+sum(im)',nm+sum(im)))
print(loc(x))
if (dim(x)[2]!=length(stids)) {
print(paste('Something wrong? dim(x)[2]=',dim(x)[2],'!= length(stids)=',length(stids)))
browser()
}
}
if (i==1) X <- x else {
Xx <- c(zoo(X),zoo(x))
X <- as.station(Xx)
X <- attrcp(x,X)
}
if (dim(X)[2]!=length(stids)) {
print(paste('Something wrong? dim(X)[2]=',dim(X)[2],'!= length(stids)=',length(stids)))
browser()
}
}
## Save the Midas data as rda-file.
attr(X,'variable')[] <- 'precip'
pr <- X
save(file='pr.eu-circle-torbay.rda',pr)
nok <- apply(coredata(X),2,'nv')
x <- subset(X,is=(nok> 15000))
mu <- subset(annual(x,'wetmean',nmin=180),it=c(1961,2011))
fw <- subset(annual(x,'wetfreq',nmin=180),it=c(1961,2011))
nok <- apply(coredata(mu),2,'nv')
mu <- subset(mu,is=(nok> 45))
fw<- subset(fw,is=(nok> 45))
diagnose(mu)
#pcafill(mu) -> zmu
plot(mu); grid()
plot(fw); grid()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.