fdbk_dt_verif_continuous: Deterministic scores for data.tables from feedback files,...

Description Usage Arguments Value Author(s) Examples

View source: R/fdbk_asdataframe.R

Description

Function returns a score data.table with ME,MAE,RMSE,SD,R2 and length of verification data pairs Additionaly 5th and 95th confidence interval from bootstrap resampling can be returned. ( Do not use to verify e.g. wind direction or similarly strange data types (as ordinary differences make no sense))

Usage

1
fdbk_dt_verif_continuous(DT, strat, bootscores = F, R = 100)

Arguments

DT

the data table (obs and veri_data are required)

strat

list of variables to stratify for

bootscores

logical if bootstrap confidence intervals are required (5-95)

R

number of bootstrap iterations (default 100)

Value

a data.table of stratified continuous verification scores (ME,SD,RMSE,R2,LEN)(CI_L,CI_U if bootstrap)

Author(s)

Felix <felix.fundel@dwd.de>

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#EXAMPLE 1 (continuous scores by lead-time)
require(ggplot2)
fnames                = system("ls ~/examplesRfdbk/*/synop/*",intern=T)
cond                  = list(varno="varno%in%c(3,4)",veri_description="grepl('forecast',veri_description)")
columnnames           = c("obs","veri_data","varno","veri_model","veri_forecast_time")
DT                    = fdbk_dt_multi_large(fnames,cond,columnnames,20)
DT$varno              = varno_to_name(DT$varno)
strat                 = c("varno","veri_forecast_time","veri_model")
scores                = fdbk_dt_verif_continuous(DT,strat)
p =  ggplot(scores,aes(x=veri_forecast_time,y=scores,group=interaction(scorename,varno,veri_model),colour = veri_model, linetype=factor(varno)))+
     geom_line(size=.7) + geom_point(size=1.5) + facet_wrap(~scorename, scales = "free")+
     theme_bw()+theme(axis.text.x  = element_text(angle=70,hjust = 1))
p

#EXAMPLE 2 (talagrand diagram for each variable)
require(ggplot2)
fnames                = system("ls ~/examplesRfdbk/talagrand/*SYNOP*",intern=T)
cond                  = list(veri_description="grepl('Talagrand',veri_description)")
columnnames           = c("veri_data","varno")
DT                    = fdbk_dt_multi_large(fnames,cond,columnnames,20)
DT$varno              = varno_to_name(DT$varno)
p                     = ggplot(DT, aes(x=veri_data)) + 
                        geom_histogram(binwidth=1, colour="black", fill="white") + 
                        facet_wrap(~varno)+theme_bw()
p

#EXAMPLE 3 (TEMP verification)
require(ggplot2)
fnames=system("ls ~/examplesRfdbk/fof/*", intern=T)
cond = list(obs="!is.na(obs)",level="level%in%c(100000,92500,85000,70000,50000,40000,30000,25000,20000,15000,10000,5000,3000,2000,1000)")
columnnames = c("obs","veri_data","varno","level")
DT                    = fdbk_dt_multi_large(fnames,cond,columnnames,cores=20)
DT$varno              = varno_to_name(DT$varno)
strat                 = c("varno","level")
scores                = fdbk_dt_verif_continuous(DT,strat)
setkey(scores, scorename,varno,level)
scores                = scores[!scorename%chin%c("LEN"),]
p =  ggplot(scores,aes(x=scores,y=level,group=interaction(varno,scorename)))+
     geom_path() + facet_wrap(~scorename~varno,scales="free_x")+
     theme_bw()+theme(axis.text.x  = element_text(angle=70,hjust = 1))+scale_y_reverse()
p

#EXAMPLE 4 (SATOB verification)
require(ggplot2)
fnames                = system("ls ~/examplesRfdbk/gme/satob/*",intern=T)
cond                  = list(obs="!is.na(obs)")
columnnames           = c("veri_data","varno","obs","veri_forecast_time","statid","lat","lon")
DT                    = fdbk_dt_multi_large(fnames,cond,columnnames,10)
DT[,lon:=cut(lon,seq(-180,180,by=10),labels=seq(-175,175,by=10),include.lowest=T),]
DT[,lat:=cut(lat,seq(-90,90,by=10),labels=seq(-85,85,by=10),include.lowest=T),]
strat                 = c("varno","veri_forecast_time","statid","lon","lat")
scores                = fdbk_dt_verif_continuous(DT,strat)
scores[,lon:=as.numeric(levels(lon))[lon]]
scores[,lat:=as.numeric(levels(lat))[lat]]
scores[,varno:=varno_to_name(varno)]
scores                = scores[!is.na(scores),]
p = ggplot(droplevels(scores[varno=="U" & veri_forecast_time=="10800" & scorename=="R2", ]),aes(x=lon,y=lat,fill=cut(scores,seq(0,1,by=.1))))+geom_raster()+
    facet_wrap(~varno~statid~scorename)+
    scale_fill_manual(breaks=seq(0,1,by=.1),values=tim.colors(10),drop = FALSE)+borders()
p

#EXAMPLE 5 (SYNOP score time series)
require(ggplot2)
fnames   = system("ls ~/examplesRfdbk/*/synop/verSYNOP.*",intern=T)
cond     = list(obs="!is.na(obs)",
                veri_description="grepl('forecast',veri_description)",
                veri_forecast_time="veri_forecast_time%in%c(1200,16800)",
                state="state%in%c(0,1)",
                statid="!is.na(as.numeric(statid))")

colnames = c("obs","veri_data","veri_forecast_time","veri_initial_date","varno","veri_model","statid")
DT       = fdbk_dt_multi_large(fnames,cond,colnames,cores=20)
keep     = comparableRows(DT,splitCol="veri_model",splitVal=c("GME       ","ICON      "),compareBy=c("veri_forecast_time","veri_initial_date","varno","statid"))
DT       = DT[keep]
gc()

scores                   = fdbk_dt_verif_continuous(DT,strat=c("veri_forecast_time","veri_initial_date","varno","veri_model"))
scores$veri_initial_date = as.POSIXct(scores$veri_initial_date,format="%Y%m%d%H")
scores$varno             = varno_to_name(scores$varno)

p = ggplot(scores[varno=="RH"&scorename=="RMSE",],aes(x=veri_initial_date,y=scores,color=factor(veri_forecast_time),linetype=veri_model,group=veri_model))+
    geom_line()+
    facet_grid(~scorename~varno~veri_forecast_time,scales="free")
p

#EXAMPLE 6 (TEMP time series)
require(ggplot2)
require(RColorBrewer)
fnames      = system("/bin/ls ~/examplesRfdbk/*/temp/verTEMP.*",intern=T)
LEVELS      = c(100000,92500,85000,70000,50000,40000,30000,25000,20000,15000,10000,7000,5000,3000,2000,1000)
cond        = list(statid="!is.na(as.numeric(statid))",
                   obs="!is.na(obs)", 
                   state="state%in%c(0,1,5)",
                   veri_run_type="veri_run_type%in%c(0,4)", 
                   statid="round(as.numeric(statid)/1000)<=10",
                   level='level%in%c(100000,92500,85000,70000,50000,40000,30000,25000,20000,15000,10000,7000,5000,3000,2000,1000)',
                   veri_forecast_time="veri_forecast_time%in%c(0,4800,9600,14400,16800)")
columnnames = c("obs","veri_data","veri_forecast_time","veri_initial_date","level","varno","veri_model")
DT          = fdbk_dt_multi_large(fnames,cond,columnnames,cores=10)
DT[,valid_date:=as.POSIXct(veri_initial_date,format="%Y%m%d%H%M")+veri_forecast_time*36]
SCORES  = fdbk_dt_verif_continuous(DT,strat=c("veri_forecast_time","level","varno","valid_date","veri_model"))
SCORES[,varno:=varno_to_name(varno)]
x11(width=18,height=6)
ggplot(SCORES[scorename=="ME" & varno=="T"],aes(x=valid_date,y=as.numeric(factor(level)),fill=cut(scores,seq(-10,10,len=9))))+
         geom_raster(limits=c(-20,20))+
         facet_wrap(~veri_model~veri_forecast_time~varno,ncol=5)+
         scale_y_reverse(breaks = seq(length(LEVELS),1,by=-1),labels=rev(LEVELS))+
         scale_fill_manual("ME",values=rev(brewer.pal(9, "RdYlBu")),drop=F)+
         theme_bw()

#EXAMPLE 7 (continuous scores by lead-time plus confidence intervals)
require(ggplot2)
fnames                = system("ls ~/examplesRfdbk/*/synop/verSYNOP.*",intern=T)[1:10]
cond                  = list(varno="varno%in%c(3,4)",veri_description="grepl('forecast',veri_description)")
columnnames           = c("obs","veri_data","varno","veri_forecast_time")
DT                    = fdbk_dt_multi_large(fnames,cond,columnnames,20)
DT$varno              = varno_to_name(DT$varno)
strat                 = c("varno","veri_forecast_time")
scores                = fdbk_dt_verif_continuous(DT,strat,bootscores=T,R=100)
ggplot(scores, aes(x=veri_forecast_time, y=scores,color=varno)) + 
   geom_errorbar(aes(ymin=CI_L, ymax=CI_U), width=.1) +
   geom_line() +
   geom_point()+
   theme_bw()  +
   facet_wrap(~scorename,scale="free_y",ncol = 6)

rfxf/Rfdbk documentation built on May 27, 2019, 7:22 a.m.