R/TLstatistics.R

Defines functions TLstatistics

Documented in TLstatistics

TLstatistics<-function(func,...,historical=NULL,datasources=NULL){
  require(qualV, quietly = TRUE)
  if(is.null(historical)){
    types<-c("historical","projection")
  } else if(historical==TRUE){
    types<-"historical"
  } else if(historical==FALSE){
    types<-"projection"
  } else {
    stop("'historical argument must be either TRUE, FALSE or NULL.")
  }
  ####################################################
  #Define functions
  ####################################################
  
  #Mann Kendall test
  mk <- function(x) {
    es <- function(x) {
      ln <- length(x)
      mks <- 0
      for (i in 1:(ln-1))
        for (j in (i+1):ln) mks <- mks + ifelse(is.na(x[j]-x[i]),0,sign(x[j]-x[i]))
      return(mks)
    }
    vars <- function(x) {
      ln <- length(x)
      li <- length(which(!is.na(x)))
      g  <- rep(0,ln)
      for (i in 1:ln) g[i] <- max(0,length(which(x == x[i])) - 1)
      ti <- length(which(g>0))
      return((li*(li-1)*(2*li+5)-sum(g*(g-1)*(2*g+5)))/18)
    }
    slope <- function(x) {
      sl <- NULL
      ln <- length(x)
      for (i in 1:(ln-1)) sl <- c(sl,(x[(i+1):ln]-x[i])/(c((i+1):ln)-i))
      return(median(sl,na.rm=T))
    }
    xs <- es(x)
    xv <- vars(x)
    xz <- (xs>0)*(xs-1)/sqrt(xv) + (xs<0)*(xs+1)/sqrt(xv)
    p <- 1-pnorm(abs(xz)) # Signifikanzniveau
    m <- slope(x)         # Trend
    return(c(m=m,p=p))
  }
  
  ####################################################
  #Get the magpie data
  ####################################################
  magpie<-func(...)
  if(is.null(magpie))stop("Couldn't read magpie output from the gdx file")
  if(dim(magpie)[3]>1)stop("Only possible to preform on outputs with one data column")
  if(ncells(magpie)>1)stop("Only possible to preform on global outputs")
  getNames(magpie)<-"magpie"
  
  ####################################################
  #Get the external validation data
  ####################################################
  data<-getData(func=func,...,historical=historical,datasources=datasources)
  if(is.null(data)) stop("No external data for comparison found.")
  #if uncertainty information is present, take the mean as obs and the range as the one sigma interval
  for(hist in names(data[[1]][["data"]])){
    for(coll in names(data[[1]][["data"]][[hist]])){
      i_data<-new.magpie(getCells(data[[1]][["data"]][[hist]][[coll]]),getYears(data[[1]][["data"]][[hist]][[coll]]),c("obs","sigma"))
      tmp<-data[[1]][["data"]][[hist]][[coll]]
      if(length(fulldim(tmp)[[2]][[3]])==3){
        i_data[,,"obs"]<-setNames(dimSums(tmp[,,c("up","lo")],dim=3.1)/2,"obs")
        i_data[,,"sigma"]<-abs(setNames(tmp[,,"up"],"sigma")-setNames(i_data[,,"obs"],"sigma"))      
      } else {
        i_data[,,"obs"]<-setNames(tmp,"obs")
        i_data[,,"sigma"]<-NA
      }
      data[[1]][["data"]][[hist]][[coll]]<-i_data      
    }
  }
  
  
  ####################################################
  #Create the output objects
  ####################################################
  #level test object
  datasets<-unlist(lapply(data[[1]][["data"]],names))
  tests<-c("L_GRI","L_MRSR","L_RD","T_MK_m_annual","T_MK_p","O_MK_m_annual","O_MK_p")
  names<-c(t(outer(datasets,tests,paste,sep=".")))
  #Add information about the quantity being tested
  names<-paste(names,names(data),sep=".")
  level_test<-new.magpie(getCells(magpie),NULL,names)

  ####################################################
  #Do the comparison
  ####################################################
  for(type in types){
    for(dataset in names(data[[1]][["data"]][[type]])){

      #####################################################
      #Level
      #####################################################
      
      #determine MAgPIE reference year
      yref  <- getYears(magpie,as.integer=TRUE)[1]
      #determine data comparison years (10 years around start year)
      ycomp <- getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)
      ycomp <- ycomp[which(-5<=(ycomp-yref)&(ycomp-yref)<=5)]
      
      if(length(ycomp)>0) {
        #get vetors with simulated and observed values
        sim<-rep(as.numeric(magpie[,paste("y",yref,sep=""),]),length(ycomp))      
        obs<-as.vector(data[[1]][["data"]][[type]][[dataset]][,paste("y",ycomp,sep=""),"obs"])
        sigma<-as.vector(data[[1]][["data"]][[type]][[dataset]][,paste("y",ycomp,sep=""),"sigma"])
        
        ##############################################################
        #Leggett Williams
        level_test[,,paste(dataset,"L_GRI",sep=".")]<-GRI(p=sim,o=obs)
        
        ###############################################################
        #MRSR
        #define MRSR
        MRSR<-function(obs,sim,sigma=NA){
          if(length(obs)!=length(sim)) stop("Number of observations and simulations do not fit together")
          num<-sqrt(1/length(obs)*sum((obs-sim)^2))
          if(!any(is.na(sigma))){
            den<-1/length(obs)*sum(sigma)
            if(den==0)return(NA)
          } else {
            den<-sqrt(1/length(obs)*sum((obs-mean(obs))^2))
            if(den==0)return(NA)
          }
          return(num/den)        
        }
        level_test[,,paste(dataset,"L_MRSR",sep=".")]<-MRSR(obs=obs,sim=sim,sigma=sigma)
        
        #############################################3
        #relative difference
        ###############################################
        level_test[,,paste(dataset,"L_RD",sep=".")]<-(mean(sim,na.rm=TRUE)-mean(obs,na.rm=TRUE))/mean(obs,na.rm=TRUE)
      }
      
      #################################################33
      #Short term trend (40 years)
      ##################################################
      i_magpie<-NULL
      if(type=="projection"){
        minx<-max(c(min(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=T)),min(getYears(magpie,as.integer=T))))
        maxx<-min(c(max(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=T)),max(getYears(magpie,as.integer=T))))
        overlap<-sort(unique(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)[which(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)>=minx&getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)<=maxx)]))
        overlap<-overlap[which(abs(overlap-getYears(magpie,as.integer=TRUE)[1])<40)]
        #Do the interpolation
        if(length(overlap)>0){
          i_magpie<-time_interpolate(magpie,interpolated_year = overlap,integrate_interpolated_years = FALSE)
          #get correct data years
          i_data<-data[[1]][["data"]][[type]][[dataset]][,paste("y",overlap,sep=""),]
        }
      } else if(type=="historical"){   
        yhist<-getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)
        #throw away years that are too far in the past
        yhist<-yhist[which(abs(yhist-getYears(magpie,as.integer=TRUE)[1])<40)] 
        if(length(yhist)>0){
          #construct artificial data timeseries starting at magpie start point
          years<-yhist-yhist[1]+getYears(magpie,as.integer=TRUE)[1]
          i_data<-new.magpie(getCells(i_data),years,c("obs","sigma"))
          i_data[,years,]<-setYears(data[[1]][["data"]][[type]][[dataset]][,paste("y",yhist,sep=""),],paste("y",years,sep=""))  
          #interpolate the magpie time series
          i_magpie<-time_interpolate(magpie,interpolated_year =getYears(i_data),integrate_interpolated_years = FALSE)
          #Correct the data trend to MAgPIE start value
          i_data[,,"obs"]<-i_data[,,"obs"]-setYears(i_data[,1,"obs"],NULL)+setYears(i_magpie[,1,],NULL)
        }
      }
      if(!is.null(i_magpie)){
        ###############################################
        #Mann Kendall
        #check if data area temporally equidistant
        if(nyears(i_data)<2){
          level_test[,,paste(dataset,"T_MK_m_annual",sep=".")]<-NA
          level_test[,,paste(dataset,"T_MK_p",sep=".")]<-NA
          dist<-NA  
        } else {
          years<-getYears(i_data,as.integer=TRUE)
          dist<-years[2:length(years)]-years[1:(length(years)-1)]
          dist<-unique(dist)
          if(length(dist)>1) dist<-NA
        }
        # Do the test on the difference between model and data divided by 1995 data value
        if(!is.na(dist)){
          #Difference between timeseries
          tmp<-(as.vector(i_magpie[,paste("y",years,sep=""),])-as.vector(i_data[,paste("y",years,sep=""),"obs"]))
          #Divide by 1995 data value
          tmp<-tmp/as.numeric(dimSums(data[[1]][["data"]][[type]][[dataset]][,paste("y",ycomp,sep=""),"obs"],dim=2)/length(ycomp))
          tmp<-mk(tmp)
          level_test[,,paste(dataset,"T_MK_m_annual",sep=".")]<-tmp["m"]/dist
          level_test[,,paste(dataset,"T_MK_p",sep=".")]<-tmp["p"]
        }
      }
      
      
      #################################################33
      #Overlap trend
      ##################################################  
      minx<-max(c(min(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=T)),min(getYears(magpie,as.integer=T))))
      maxx<-min(c(max(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=T)),max(getYears(magpie,as.integer=T))))
      overlap<-sort(unique(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)[which(getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)>=minx&getYears(data[[1]][["data"]][[type]][[dataset]],as.integer=TRUE)<=maxx)]))
      if(length(overlap)>0){
        #Do the interpolation
        i_magpie<-time_interpolate(magpie,interpolated_year = overlap,integrate_interpolated_years = FALSE)
        #if uncertainty information is present, take the mean as obs and the range as the one sigma interval   
        i_data<-data[[1]][["data"]][[type]][[dataset]][,paste("y",overlap,sep=""),]
        #interpolate the magpie time series
        i_magpie<-time_interpolate(magpie,interpolated_year = getYears(i_data),integrate_interpolated_years = FALSE)
        #Correct the data trend to MAgPIE start value
        i_data[,,"obs"]<-i_data[,,"obs"]-setYears(i_data[,1,"obs"],NULL)+setYears(i_magpie[,1,],NULL)
        ###############################################
        #Mann Kendall
        #check if data area temporally equidistant
        if(nyears(i_data)<2){
          level_test[,,paste(dataset,"O_MK_m_annual",sep=".")]<-NA
          level_test[,,paste(dataset,"O_MK_p",sep=".")]<-NA
          dist<-NA
        } else {
          years<-getYears(i_data,as.integer=TRUE)
          dist<-years[2:length(years)]-years[1:(length(years)-1)]
          dist<-unique(dist)
          if(length(dist)>1) dist<-NA
        }
        if(!is.na(dist)){
          #Difference between timeseries
          tmp<-as.vector(i_magpie[,paste("y",years,sep=""),])-as.vector(i_data[,paste("y",years,sep=""),"obs"])  
          #Divide by 1995 data value
          tmp<-tmp/as.numeric(dimSums(data[[1]][["data"]][[type]][[dataset]][,paste("y",ycomp,sep=""),"obs"],dim=2)/length(ycomp))
          tmp<-mk(tmp)
          level_test[,,paste(dataset,"O_MK_m_annual",sep=".")]<-tmp["m"]/dist
          level_test[,,paste(dataset,"O_MK_p",sep=".")]<-tmp["p"]
        }
      }
    }
  }
  return(level_test)
}
pik-piam/validation documentation built on Nov. 5, 2019, 12:50 a.m.