inst/scripts/EddyUStarFilterDP.R

#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++ R script with sEddyProc methods for ustar filtering +++
#+++ Ustar filtering adapted after the idea in Papale, D. et al. (2006) +++
#+++ Dependencies: Eddy.R
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

sEddyProc$methods(
	sEstUstarThreshold = function(
	  ##title<<
	  ## sEddyProc$sEstUstarThreshold - Estimating ustar threshold
	  ##description<<
	  ## Calling \code{\link{usEstUstarThreshold}} for class data and storing results
		UstarColName = "Ustar"		##<< column name for UStar
		,NEEColName = "NEE"			##<< column name for NEE
		,TempColName = "Tair"		##<< column name for air temperature
		,RgColName = "Rg"			##<< column name for solar radiation for omitting night time data
		, ...						##<< further arguments to \code{\link{usEstUstarThreshold}}		
){
	##author<<
	## TW
	ds <- sDATA[,c("sDateTime", UstarColName,NEEColName,TempColName,RgColName)]
	colnames(ds) <- c("sDateTime","Ustar","NEE","Tair","Rg")
	resEst <- usEstUstarThreshold( ds, ...)
	sUSTAR <<- resEst[c("uStarTh","seasonYear","season","tempInSeason")]
	sDATA$season <<-  resEst$bins$season
	sDATA$tempBin <<-  resEst$bins$tempBin
	sDATA$uStarBin <<-  resEst$bins$uStarBin
	##value<< restult of \code{\link{usEstUstarThreshold}}. In addition the result is stored
	## in class variable sUSTAR and the bins as additional columsn to sDATA
	resEst
})

usEstUstarThreshold = function(
		##title<<
		## sEddyProc$sEstUstarThreshold - Estimating ustar threshold
		##description<<
		## Estimate the Ustar threshold by aggregating the estimates for seasonal and temperature subsets.
		ds 					##<< data.frame with columns "sDateTime", "Ustar", "NEE", "Tair", and "Rg"
		,seasonFactor.v = usCreateSeasonFactorMonth(ds$sDateTime) ##<< factor for subsetting times (see details)
		,seasonFactorsYear = usGetYearOfSeason(seasonFactor.v, ds$sDateTime)   ##<< named integer vector: for each seasonFactor level, get the year (aggregation period) that this season belongs to  
		,ctrlUstarEst.l = usControlUstarEst()			##<< control parameters for estimating uStar on a single binned series, see \code{\link{usControlUstarEst}}
		,ctrlUstarSub.l = usControlUstarSubsetting()	##<< control parameters for subsetting time series (number of temperature and Ustar classes \ldots), see \code{\link{usControlUstarSubsetting}} 
		,fEstimateUStarBinned = usEstUstarThresholdSingleFw2Binned	##<< function to estimate UStar on a single binned series, see \code{\link{usEstUstarThresholdSingleFw2Binned}}
		,isCleaned=FALSE			##<< set to TRUE, if the data was cleaned already, to avoid expensive call to \code{\link{usGetValidUstarIndices}}.
){
	##author<<
	## TW, OM
	##references<< 
	## Ustar filtering following the idea in Papale, D. et al. (2006)  
	## Towards a standardized processing of net ecosystem exchange measured with eddy covariance technique: algorithms and uncertainty estimation.
	## Biogeosciences 3(4): 571-583.
	
	##details<<
	## The threshold for sufficiently turbulent conditions u* (Ustar) 
	## is estimated for different subsets of the time series.
	## From the estimates for each season (each value in \code{seasonFactor.v}) 
	## the maximum of all seasons of one year is reported as estimate for this year.
	## Within each season the time series is split by temperature classes. 
	## Among these Ustar estimates, the median is reported as season value.
	##
	## In order to split the seasons, the uses must provide a vector with argument \code{seasonFactor.v}.
	## All positions with the same factor, belong to
	## the same season. It is conveniently generated by one of the following functions:
	## \itemize{
	## 	\item{ \code{\link{usCreateSeasonFactorMonth}} (default DJF-MAM-JJA-SON with December from previous to January of the year) }
	## 	\item{ \code{\link{usCreateSeasonFactorMonthWithinYear}} (default DJF-MAM-JJA-SON with December from the same year) }
	## 	\item{ \code{\link{usCreateSeasonFactorYday}} for a refined specification of season starts. }
	## 	\item{ \code{\link{usCreateSeasonFactorYdayYear}} for specifying different seasons season between years. }
	## } 
	##
	## The estimation of Ustar on a single binned series can be selected argument \code{fEstimateUStarBinned}.
	## \itemize{
	## 	\item{ \code{\link{usEstUstarThresholdSingleFw1Binned}} }
	## 	\item{ \code{\link{usEstUstarThresholdSingleFw2Binned}} (default) }
	## } 
	##
	## This function is called by
	## \itemize{
	## \item{ \code{\link{sEstUstarThreshold}} which stores the result in the class variables (sUSTAR and sDATA).}
	## \item{ \code{\link{sEstUstarThresholdDistribution}} which additionally estimates median and confidence intervals for each year by bootstrapping the original data within seasons.}
	## } 
	##
	## For inspecting the NEE~uStar relationship plotting is provided by \code{\link{sPlotNEEVersusUStarForSeason}}
	#
	# add index columns to locate which season/tempClass/uStarBin each record belongs
	# cannot directly change sDATA, in EddyProcC, because will be overwritten in each bootstrap 
	if( any(is.na(seasonFactor.v)) ) stop("usEstUstarThreshold: encountered NA in seasonFactor. Need to specify a valid season for each record.")
    ds$season <- as.factor(seasonFactor.v)
	ds$seasonYear <- seasonFactorsYear[seasonFactor.v]
	ds$tempBin <- NA_integer_
	ds$uStarBin <- NA_integer_
	# extract valid (nighttime records)
	if( isCleaned){
		isValidUStar <- TRUE
		dsc <- ds
	} else {
		isValidUStar <- usGetValidUstarIndices( ds,  swThr=ctrlUstarSub.l$swThr) 
		dsc <- 	ds[isValidUStar, ,drop=FALSE] 
	}
	if( nrow(dsc)==0L ) stop("sEstUstarThreshold: no finite records in dataset")
	#
	tdsc <- as.data.frame(table(dsc$season)); colnames(tdsc) <- c("season","nRec")
	#some seasons might be absent in dsc from cleaning, construct vectors that report NA for missing seasons
	nRecValidInSeason <- merge( data.frame( season=levels(ds$season) ), tdsc, all.x=TRUE)
	nRecValidInSeasonYear <- merge(nRecValidInSeason, data.frame(season=names(seasonFactorsYear), seasonYear=seasonFactorsYear), all.x=TRUE)
	nYear <- ddply(nRecValidInSeasonYear, as.quoted('seasonYear'), summarize, nRec=sum(substitute(nRec), na.rm=TRUE) )
	seasonYearsWithFewData <- nYear$year[ nYear$nRec < ctrlUstarSub.l$minRecordsWithinYear ]
	#nRecValidInSeasonYear$seasonAgg <- nRecValidInSeasonYear$season
	#
	#dsi <- subset(dsc, season == 4)
	#dsi <- subset(dsc, season == 0)
	fEstimateUStarSeason <- function(...){
		if( isTRUE(ctrlUstarEst.l$isUsingCPTSeveralT)){
			##details<< \describe{\item{change point detection (CPT) method}{
			## With specifying \code{ctrlUstarEst.l=usControlUstarEst(isUsingCPTSeveralT=TRUE)}
			## change point detection is applied instead of the moving point test (e.g. with Fw2Binned).
			##
			## The sometimes sensitive binning of uStar values within a temperature class is avoided.
			## Further, possible spurious thresholds are avoid by testing that the model with a threshold
			## fits the data better than a model without a threshold using a likelihood ratio test.
			## In addition, with CPT seasons are excluded where a threshold was detected in only less 
			## than ctrlUstarEst.l$minValidUStarTempClassesProp (default 20% ) of the 
			## temperature classes.
			##
			## Note, that this method often gives higher estimates of the u* threshold.
			## }}
			.estimateUStarSeasonCPTSeveralT(...)
		} else .estimateUStarSeason(...)
	}
	UstarSeasonsTempL <- dlply(dsc, .(season), fEstimateUStarSeason, .drop = FALSE, .inform = TRUE
				#, .drop_o = FALSE, .inform = TRUE
				,ctrlUstarSub.l = ctrlUstarSub.l
				,ctrlUstarEst.l = ctrlUstarEst.l
				,fEstimateUStarBinned = fEstimateUStarBinned
		)  
	UstarSeasonsTemp <- laply(UstarSeasonsTempL, "[[", 1L)	# matrix (nSeason x nTemp)
	uStarSeasons <- apply( UstarSeasonsTemp, 1, median, na.rm=TRUE)
	# different to C-version, report NA where threshold was found in less than 20% of temperature classes
	iNonValid <- rowSums(is.finite(UstarSeasonsTemp))/ncol(UstarSeasonsTemp) < ctrlUstarEst.l$minValidUStarTempClassesProp
	uStarSeasons[iNonValid] <- NA_real_
	# extract the temperature and bin indices
	# season <- names(UstarSeasonsTempL)[2]
	for( season in names(UstarSeasonsTempL) ){
		dsc[dsc$season==season,c("tempBin","uStarBin")] <- UstarSeasonsTempL[[season]]$bins.F
	}
	#plot( tempBin ~ Tair, dsc, col=rainbow(8)[ as.factor(dsc$season)] )	# check correct ordering
	#
	resultsSeason <- nRecValidInSeasonYear
	resultsSeason$uStarSeasonEst <- uStarSeasons
	#
	resultsSeasonYear = ddply(resultsSeason, as.quoted('seasonYear'), function(dss){
				data.frame( uStarMaxSeason=if( all(!is.finite(dss$uStarSeasonEst))  ) NA_real_ else max( dss$uStarSeasonEst, na.rm=TRUE), seasonYear=dss$seasonYear[1], nRec=sum(dss$nRec) )
			} )
	resultsSeasonYear$uStarAggr <- resultsSeasonYear$uStarMaxSeason
	#---- for seasonYears with too few records and for seasonYears with no seasonal estimate do a pooled estimate
	##details<< \describe{\item{One-big-season fallback}{
	## If there are too few records within one year, of when no season yielded a finite u*Threshold estimate, then
	## the yearly u*Th is estimated by pooling the data from seasons within one seasonYear.
	## The user can suppress using pooled data on few records by providing option
	## \code{ctrlUstarSub.l$isUsingOneBigSeasonOnFewRecords = FALSE} (see \code{\link{usControlUstarSubsetting}})
	## }}
	seasonYearsPooled <- resultsSeasonYear$seasonYear[ !is.finite(resultsSeasonYear$uStarAggr) & resultsSeasonYear$nRec > 0]
	if( isTRUE(ctrlUstarSub.l$isUsingOneBigSeasonOnFewRecords) )
		seasonYearsPooled <- union( seasonYearsWithFewData, seasonYearsPooled)
	resultsSeasonYearPooled <- if( !length(seasonYearsPooled) ){
				resultsSeasonYearPooled <- data.frame(seasonYear=NA_character_, nRec=NA_integer_ , uStarPooled=NA_real_)[FALSE,]
			} else {
				dscPooled <- dsc[dsc$seasonYear %in% seasonYearsPooled, ,drop=FALSE]
				UstarYearsTempL <- dlply(dscPooled, as.quoted('seasonYear'), fEstimateUStarSeason, .drop = FALSE, .inform = TRUE
						,ctrlUstarSub.l = ctrlUstarSub.l
						,ctrlUstarEst.l = ctrlUstarEst.l
						,fEstimateUStarBinned = fEstimateUStarBinned
					)
				UstarYearsTemp <- laply(UstarYearsTempL, "[[", 1L, .drop=FALSE)	# matrix (nSeason x nTemp)
				uStarYears <- apply( UstarYearsTemp, 1, median, na.rm=TRUE)
				# different to C-version, report NA where threshold was found in less than 20% of temperature classes
				iNonValid <- rowSums(is.finite(UstarYearsTemp))/ncol(UstarYearsTemp) < ctrlUstarEst.l$minValidUStarTempClassesProp
				uStarYears[iNonValid] <- NA_real_
				# omit gettting binning into ds (do not overwrite bins from seasonal estimates)
				resultsSeasonYearPooled <- data.frame(seasonYear=names(uStarYears), nRec=as.vector(table(dscPooled$seasonYear)), uStarPooled=uStarYears)
			}
	resultsSeasonYear <- merge(resultsSeasonYear, resultsSeasonYearPooled, all.x=TRUE)
	isFinitePooled <- is.finite(resultsSeasonYear$uStarPooled)
	resultsSeasonYear$uStarAggr[isFinitePooled] <- resultsSeasonYear$uStarPooled[isFinitePooled]
	#----- overall is the median across years
	uStarMedianYears <- median(resultsSeasonYear$uStarAggr, na.rm=TRUE)
	message(paste("Estimated UStar threshold of: ", signif(uStarMedianYears,2)
					,"by using controls:\n", paste(capture.output(unlist(ctrlUstarSub.l)),collapse="\n")
			))
	#----- propagate aggregate estimates back to NA-slots of years and seaons
	isNonFinite <- !is.finite(resultsSeasonYear$uStarAggr)
	resultsSeasonYear$uStarAggr[isNonFinite] <- uStarMedianYears
	# merge yearly aggregated estimates to season and replace by finite seasonal estimates
	resultsSeason <- merge(resultsSeason, resultsSeasonYear[,c("seasonYear","uStarAggr")], all.x=TRUE)
	isFiniteEst <- (is.finite(resultsSeason$uStarSeasonEst))
	resultsSeason$uStarAggr[isFiniteEst] <- resultsSeason$uStarSeasonEst[isFiniteEst]
	#
	resultsDf <- resultsSeason[,c("season","seasonYear","uStarAggr")]
	resultsDf$season <- as.factor(resultsDf$season)
	resultsDf$aggregationMode <- "season"
	resultsDf <- tmp <- rbind(cbind(data.frame(aggregationMode="year", season=as.factor(NA_integer_)), resultsSeasonYear[,c("seasonYear","uStarAggr")]),resultsDf )
	resultsDf <- tmp <- rbind(cbind(data.frame(aggregationMode="single", season=as.factor(NA), seasonYear=NA_integer_), uStarAggr=uStarMedianYears),resultsDf )
	resultsDf$uStar <- resultsDf$uStarAggr
	# store indices in ds, first remove columns
	ds[isValidUStar,] <- dsc 	
	#plot( tempBin ~ Tair, ds, col=rainbow(8)[ as.factor(ds$season)] )	# check correct ordering
	##value<< A list with entries
	res <- list(
			uStarTh = resultsDf[,c("aggregationMode","seasonYear","season","uStar")]	##<< data.frame with columns "aggregationMode","seasonYear","season","uStar" 
			## with rows for "single": the entire aggregate (median across years)
			##, "seasonYear": each year (maximum across seasons or estimate on pooled data)
			##, "season": each season (median across temperature classes)
			,seasonYear = resultsSeasonYear		##<< data.frame listing results for year with columns "seasonYear"
			## , "uStarMaxSeason" the maximum across seasonal estimates within the year
			## , "uStarPooled" the estimate based on data pooled across the year (only calculated on few valid records or on uStarMaxSeason was nonfinite) 
			## , "nRec" number of valid records  (only if the pooled estimate was calculated) 
			## , "uStarAggr" chosen estimate, corresponding to uStarPooled if this was calculated, or uStarMaxSeason or uStarTh across years if the former was non-finite
			,season =  resultsSeason	##<< data.frame listing results for each season
			## , "nRec" the number of valid records
			## , "uStarSeasonEst" the estimate for based on data within the season (median across temperature classes)
			## , "uStarAggr" chose estimate, corresponding to uStarSeasonEst, or the yearly seasonYear$uStarAggr, if the former was non-finite 
			,tempInSeason=t(UstarSeasonsTemp)		##<< numeric matrix (nTemp x nAggSeason): estimates for each temperature subset for each season
			,bins=ds[,c("season","tempBin","uStarBin")]		##<< columns season, tempBin and uStarBin for each record of input ds 
			## reporting classes of similar environmental conditions that the record belongs to.
	)
	res
}


.tmp.f <- function(){
    Dir.s <- paste(system.file(package='REddyProc'), 'examples', sep='/')
    EddyData.F <- ds <- fLoadTXTIntoDataframe('Example_DETha98.txt', Dir.s)
    EddyDataWithPosix.F <- ds <- fConvertTimeToPosix(EddyData.F, 'YDH', Year.s='Year', Day.s='DoY', Hour.s='Hour')
    EddyProc.C <- sEddyProc$new('DE-Tha', EddyDataWithPosix.F, c('NEE','Rg','Tair','VPD','Ustar'))   
    #ds <- head(ds,2000)
    (Result.L <- EddyProc.C$sEstUstarThreshold())
	seasonI <- "1998008"
	EddyProc.C$sPlotNEEVersusUStarForSeason(seasonI)
	dsSeason <- subset(EddyProc.C$sDATA, season==seasonI)
	# tempBinI <- 4
	for( tempBinI in sort(unique(dsSeason$tempBin))){
		#plot( tempBin ~ Tair, dsc, col=rainbow(8)[ as.factor(dsc$season)] )	# check correct ordering
		uStarTh <- Result.L$UstarSeasonTemp[ tempBinI, seasonI] 
		dss <- subset(dsSeason,  tempBin==tempBinI )
		.plotNEEVersusUStarTempClass(dss, uStarTh )
	}
	(Results.L2 <- EddyProc.C$sEstUstarThreshold(ctrlUstarEst.l=usControlUstarEst(isUsingCPTSeveralT=TRUE)))
}

.plotNEEVersusUStarTempClass <- function(
	### plot NEE versus uStar for data of a subset with estimates
	NEEUStar.F		##<< data.frame with columns of NEE, Ustar and Temperature, and columns 'uStarBin' and 'sDateTime',  
	,uStarTh		##<< value of uStar of an estimated threshold		
	,UstarColName = "Ustar"		##<< column name for UStar
	,NEEColName = "NEE"			##<< column name for NEE
	,TempColName = "Tair"		##<< column name for air temperature
){
	##author<<
	## TW
	dss <- NEEUStar.F[, c(NEEColName, UstarColName, TempColName, "uStarBin","sDateTime")]
	colnames(dss) <- c("NEE","Ustar","Temp","uStarBin","sDateTime")
	##details<< for each uStarBin, mean of NEE and uStar is calculated.
	dssm <- ddply(dss, as.quoted('uStarBin'), summarise, mUStar=mean(substitute(Ustar)), mNEE=mean(substitute(NEE)) )
	plot( NEE ~ Ustar, dss, col=adjustcolor("brown",alpha.f=0.3), ylim=quantile(dss$NEE, c(0.02,0.98))) #col=rainbow(20)[dss$uStarBin] )
	points( mNEE ~ mUStar, dssm, pch="+", cex=1.5)
	abline(v=uStarTh, lty="dashed",col="grey")
	dateRange <- strftime( range(dss$sDateTime), "%d.%m.%y" )
	#\u2103 is degree Centigrade (degree symbol is not ascii) but does not work with some devices
	#\u00B0 is degree only
	legend("topleft",legend=c(
					paste(NEEUStar.F$season[1], " (",dateRange[1],"-",dateRange[2],")",sep="")
					,sprintf(" (%1.1f-%1.1f\u00B0C)",min(dss$Temp),max(dss$Temp))
					,sprintf("u*Thr=%1.2f",uStarTh)
			))
	##value<< side effect of plotting NEE ~ Ustar with indicating Means of the bins, uStarThreshold, Date-Range, and Temperature range
}

.estimateUStarSeason <- function(
		### Estimate uStar threshold for data of a given season
		dsi					##<< dataframe with columns UstarColName, NEEColName, TempColName, and RgColName
		, ctrlUstarSub.l, ctrlUstarEst.l, fEstimateUStarBinned
){
	##author<<
	## TW
	# result in correct format when no quit early 
	resNA <- 	list(
			UstarTh.v=rep(NA_real_, ctrlUstarSub.l$taClasses)	##<< vector of uStar for temperature classes
			,bins.F=data.frame(tempBin=rep(NA_integer_, nrow(dsi)) 
							 , uStarBin=rep(NA_integer_, nrow(dsi)) )			##<< data.frame with columns tempBin, uStarBin for each row in dsi
	)
	if( nrow(dsi) < ctrlUstarSub.l$minRecordsWithinSeason){
		warning("sEstUstarThreshold: too few finite records within season (n=",nrow(dsi),"). Need at least n=",ctrlUstarSub.l$minRecordsWithinSeason,". Returning NA for this Season." )
		return( resNA )
	}
	if( nrow(dsi)/ctrlUstarSub.l$taClasses < ctrlUstarSub.l$minRecordsWithinTemp ){
		warning("sEstUstarThreshold: too few finite records within season (n=",nrow(dsi),") for ",ctrlUstarSub.l$taClasses
				," temperature classes. Need at least n=",ctrlUstarSub.l$minRecordsWithinTemp*ctrlUstarSub.l$taClasses
				,". Returning NA for this Season." )
		return( resNA )
	}
	orderTemp <- order(dsi[,"Tair"])
	uStarBinSortedT <- integer(nrow(dsi))		# default value for methods that do not bin uStar
	dsiSort <- dsi[orderTemp, ,drop=FALSE] 	#sort values in a season by air temperature (later in class by ustar)
	#N <- nrow(dsi ) #number of observations (rows) total, probably can get from elsewhere..
	#T_bin_size <- round(N/ctrlUstarSub.l$taClasses) #set T_bin size so that every bin has equal #values
	#set up vector that contains Ustar values for temperature classes
	UstarTh.v = vector(length=ctrlUstarSub.l$taClasses)
	# twutz 1505: changed temperature binning of records to put equals temperatures into the same bin (compatibility with C code)
	#trace(.binWithEqualValues,recover)		#untrace(.binWithEqualValues)
	TId <- .binWithEqualValuesBalanced(dsiSort[,"Tair"], ctrlUstarSub.l$taClasses)
	#k<-1L
	for (k in 1:ctrlUstarSub.l$taClasses){	# k temperature class
		isCurrentTclass <- TId == k
		dsiSortTclass <- dsiSort[isCurrentTclass,]
		#constraint: u* threshold only accepted if T and u* are not or only weakly correlated..
		Cor1 = suppressWarnings( abs(cor(dsiSortTclass[,"Ustar"],dsiSortTclass[,"Tair"])) ) # maybe too few or degenerate cases
		if( inherits(Cor1,"try-error") ) recover()
		# TODO: check more correlations here? [check C code]
		#      Cor2 = abs(cor(dataMthTsort$Ustar,dataMthTsort$nee))
		#      Cor3 = abs(cor(dataMthTsort$tair,dataMthTsort$nee))
		if( (is.finite(Cor1)) && (Cor1 < ctrlUstarEst.l$corrCheck)){ #& Cor2 < CORR_CHECK & Cor3 < CORR_CHECK){
			if( isTRUE(ctrlUstarEst.l$isUsingCPT) ){
				if( !requireNamespace('segmented') ) stop("Need to install package segmented before using Change Point Decetion for estimation of UStar threshold.")
				resCPT <- try( .fitSeg1(dsiSortTclass[,"Ustar"], dsiSortTclass[,"NEE"]), silent=TRUE )
				UstarTh.v[k] <- if( inherits(resCPT,"try-error") || !is.finite(resCPT["p"]) || resCPT["p"] > 0.05) NA else resCPT["cp"]
				#plot( dsiSortTclass[,"Ustar"], dsiSortTclass[,"NEE"] )	
			} else {
				resBin <- .binUstar(dsiSortTclass[,"NEE"],dsiSortTclass[,"Ustar"],ctrlUstarSub.l$UstarClasses)
				dsiBinnedUstar <- resBin$binAverages
				uStarBinSortedT[isCurrentTclass] <- resBin$uStarBins
				#plot( NEE_avg ~ Ust_avg, dsiBinnedUstar)
				if( any(!is.finite(dsiBinnedUstar[,2])) ){
					stop("Encountered non-finite average NEE for a UStar bin.",
							"You need to provide data with non-finite collumns uStar and NEE for UStar Threshold detection.")
				}
				UstarTh.v[k] <- if( dsiBinnedUstar[1,1] > ctrlUstarEst.l$firstUStarMeanCheck ){
					##details<<
					## If the first mean uStar bin is already large (>ctrlUstarEst.l$firstUStarMeanCheck)
					## Then this temperature class is skipped from estimation
					NA_real_	
				} else {
					fEstimateUStarBinned(  dsiBinnedUstar, ctrlUstarEst.l = ctrlUstarEst.l)
				}
			}
		} else { #correlation between T and u* too high
			#fill respective cell with NA
			UstarTh.v[k] = NA
			#TODO: should a message be printed here to the user??
		}
	}
	# bins of temperature and uStar have been generated on sorted data.frame. Need to assign to original positions
	TIdUnsorted <- uStarBinUnsortedT <- integer(length(orderTemp)); 	
	TIdUnsorted[orderTemp] <- TId	
	uStarBinUnsortedT[orderTemp] <- uStarBinSortedT
	##value<< list with entries
	invisible(list(
			UstarTh.v=UstarTh.v 	##<< vector of uStar for temperature classes
			,bins.F=data.frame(tempBin=TIdUnsorted, uStarBin=uStarBinUnsortedT)			##<< data.frame with columns tempBin, uStarBin for each row in dsi
	))
}


usControlUstarEst <- function(
  ### Default list of parameters for determining UStar of a single binned series	
  #T Classes not needed here?
  # either?
  #,taClasses=7 # set number of ta classes   
  # or?
  #,ctrlUstarSubsetting.l = usControlUstarSubsetting()
  #,taClasses=ctrlUstarSubsetting.l$taClasses  
  #
  #,percentile = 90 #percentile value... double check!
  #,percentile_check = TRUE #enable percentile check\n ... double check!
  ustPlateauFwd = 10 	##<< number of subsequent uStar bin values to compare to in fwd mode
  ,ustPlateauBack = 6	##<< number of subsequent uStar bin values to compare to in back mode  
  ,plateauCrit = 0.95	##<< significant differences between a u* value and the mean of a "plateau"
  ,corrCheck = 0.5 		##<< threshold value for correlation between Tair and u* data
  ,firstUStarMeanCheck=0.2	##<< if first uStar bin average of a class is already larger than this value, the temperature class is skipped.
  ,isOmitNoThresholdBins = TRUE	##<< if TRUE, bins where no threshold was found are ignored. Set to FALSE to report highest uStar bin for these cases
  ,isUsingCPTSeveralT=FALSE		##<< set to TRUE to use changePointDetection without binning uStar but with additionally changed aggregation scheme for several temperature classifications
  ,isUsingCPT=FALSE				##<< set to TRUE to use changePointDetection without binning uStar before in usual aggregation method (good for comparing methods, but not recommended, overruled by isUsingCPTSeveralT=TRUE)
  ,minValidUStarTempClassesProp=0.2 ##<< seasons, in which only less than this proportion of temperature classes a threshold was detected, are excluded from aggregation
  ,minValidBootProp=0.4	##<< minimum proportion of bootstrap samples for which a threshold was detected. Below this proportion NA quantiles are reported.
  ,minNuStarPlateau=3L  ##<< minimum number of records in plateau, threshold must be larger than mean of this many bins 
  #,bt = FALSE 			##<< flag for bootstrapping
  #,btTimes = 100 		##<< number of bootstrap samples
  
  #,method.v = function... #fw2 by default..
  
  #TODO: what does the following param do?
  #define FIRST_Ustar_MEAN_CHECK  		0.2  
  # 4.) const int percentiles[PERCENTILES_COUNT] = { 5, 10, 25, 50, 75, 90, 95};
  
){
	##author<<
	## TW
  ##seealso<< \code{\link{usEstUstarThresholdSingleFw2Binned}}, \code{\link{usControlUstarSubsetting}} 
  ctrl <- list(  
    #taClasses=taClasses
    #,UstarClasses=UstarClasses  
    #percentile = percentile
    #percentile_check = percentile_check #enable percentile check\n ... double check!
    ustPlateauFwd = ustPlateauFwd    #number of subsequent thresholds to compare to in fwd mode
    ,ustPlateauBack = ustPlateauBack #number of subsequent thresholds to compare to in back mode  
    ,plateauCrit = plateauCrit #significant differences between a u* value and the mean of a "plateau"
    ,corrCheck = corrCheck #threshold value for correlation between Tair and u* data
	,firstUStarMeanCheck=firstUStarMeanCheck
	,isOmitNoThresholdBins = isOmitNoThresholdBins
	,isUsingCPT = isUsingCPT
	,isUsingCPTSeveralT = isUsingCPTSeveralT
	,minValidUStarTempClassesProp = minValidUStarTempClassesProp
	,minValidBootProp=minValidBootProp
	,minNuStarPlateau=minNuStarPlateau
	#,seasons = seasons # switch for three different seasonal modes 
    #(seasons or "groupby" may easily extended to an input vector or matrix)
    #,bt = bt #flag for bootstrapping
    #,btTimes = btTimes #number of bootstrap samples
  )
  #display warning message for the following variables that we advise not to be changed
  if (corrCheck != 0.5) warning("WARNING: parameter corrCheck set to non default value!")
  ctrl
}
attr(usControlUstarEst,"ex") <- function(){
	usControlUstarEst()
}

usControlUstarSubsetting <- function(
	### Default list of parameters for subsetting the data for uStarThreshold estimation into classes with similar environmental conditions 	
	taClasses=7 		##<< set number of air temperature classes 
	,UstarClasses=20 	##<< set number of Ustar classes 	
	# seasons param deprecated
  # TODO: add seasons handling to documentation
  #,seasons = 1 # switch for different seasonal modes #TODO: Update?!
	,swThr = 10  		##<< nighttime data threshold for solar radion [Wm-2]
	,minRecordsWithinTemp = 100		##<< integer scalar: the minimum number of Records within one Temperature-class
	,minRecordsWithinSeason = 160	##<< integer scalar: the minimum number of Records within one season
	,minRecordsWithinYear	= 3000	##<< integer scalar: the minimum number of Records within one year
	,isUsingOneBigSeasonOnFewRecords = TRUE ##<< boolean scalar: set to FALSE to avoid aggregating all seasons on too few records
	# 1.) ,selection parameter for which fwd and back modes? fwd2 as default... 
	# 2.) ,MIN_VALUE_PERIOD <<- 3000 # per whole data set... double check C code
	# 3.) ,MIN_VALUE_SEASON <<- 160 #if #number of data points in one any season are smaller than that, merge to one big season
	#define MIN_VALUE_PERIOD    		3000		/* min values for compute u* threshold */
	#define MIN_VALUE_SEASON				160			/* min for seasons */
	#define TA_CLASS_MIN_SAMPLE				100
  ){  
	  ##author<<
	  ## TW
	  ##seealso<< \code{\link{usEstUstarThresholdSingleFw2Binned}}, \code{\link{usControlUstarSubsetting}} 
	ctrl <- list(
    	taClasses=taClasses
		,UstarClasses= UstarClasses
  		#,seasons
  		,swThr = swThr
		,minRecordsWithinTemp = minRecordsWithinTemp
		,minRecordsWithinSeason = minRecordsWithinSeason
		,minRecordsWithinYear = minRecordsWithinYear
		,isUsingOneBigSeasonOnFewRecords = isUsingOneBigSeasonOnFewRecords
)	
  if (ctrl$swThr != 10) warning("WARNING: parameter swThr set to non default value!")
  if (ctrl$taClasses != 7) warning("WARNING: parameter taClasses set to non default value!")	
  if (ctrl$UstarClasses != 20) warning("WARNING: parameter UstarClasses set to non default value!")
  if (ctrl$minRecordsWithinTemp != 100) warning("WARNING: parameter minRecordsWithinTemp set to non default value!")
  if (ctrl$minRecordsWithinSeason != 160) warning("WARNING: parameter minRecordsWithinSeason set to non default value!")
  if (ctrl$minRecordsWithinYear != 3000) warning("WARNING: parameter minRecordsWithinYear set to non default value!")
  ctrl
}
attr(usControlUstarSubsetting,"ex") <- function(){
	usControlUstarSubsetting()
}

usCreateSeasonFactorMonth <- function(
		### calculate factors to denote the season for uStar-Filtering by specifying starting months, with continuous seasons spanning year boundaries
		dates							##<< POSIXct vector of length of the data set to be filled, specifying the center-time of each record				
		, month=as.POSIXlt(dates)$mon+1L   	##<< integer (1-12) vector of length of the data set to be filled, specifying the month for each record
		, year=as.POSIXlt(dates)$year+1900L	##<< integer vector of length of the data set to be filled, specifying the year 
		, startMonth=c(3,6,9,12)		##<< integer vector specifying the starting month for each season, counting from one.
## Default is (Dez,Jan,Feb)(Mar,April,May)(June,July,August),(Sept,Okt,Nov)
){
	##author<<
	## TW
	##seealso<< 
	## \code{\link{usCreateSeasonFactorMonthWithinYear}}, 
	## \code{\link{usCreateSeasonFactorYday}}, 
	## \code{\link{usCreateSeasonFactorYdayYear}}
	##details<< 
	## If Jan is not a starting month, then the first months of each year will be 
	## part of the last period in the year.
	## E.g. with the default the fourth period of the first year consists of Jan,Feb,Dec.
	##
	## REddyProc internally works with a timestamp 15 minutes after the start of each half hour.
	## When providing the \code{dates} argument, user may shift the start time by \code{dates=myDataset$DateTime+15*60}
	#
	if( length(year) == 1L) year <- rep(year, length(month))
	if( length(month) != length(year) ) stop("Month and Year arguments need to have the same length.")
	if( any(month < 1 | month > 12) ) stop("Month out of range 1..12")
	starts <- data.frame(month=sort(unique(startMonth)), year=rep(sort(unique(year)),each=length(startMonth)) )
	if( starts$month[1] != 1L ) starts <- rbind( data.frame(month=1L, year=starts$year[1]),starts )
	seasonFac <- integer(length(month)) # 0L
	starts$startYearMonths <- startYearMonths <- starts$year*1000L + starts$month
	yearMonths <- year*1000L+month
	# i <- 1
	for( i in 1:(length(startYearMonths)-1) ){
		bo <- (yearMonths >= startYearMonths[i]) & (yearMonths < startYearMonths[i+1])
		seasonFac[bo] <- starts$year[i]*1000L + starts$month[i] 
	}
	# last period with no end border defined
	i <- length(startYearMonths)
	bo <- (yearMonths >= startYearMonths[i])
	seasonFac[bo] <- starts$year[i]*1000L + starts$month[i]
	#plot( seasonFac ~ dates )
	as.factor(seasonFac) 	
	##value<<
	## Integer vector length(dates), with each unique value representing one season
}

.tmp.f <- function(){
	pkgDir <- system.file(package='REddyProc')
	if( nzchar(pkgDir) ){
		Dir.s <- paste(pkgDir, 'examples', sep='/')
		EddyData.F <- dss <- fLoadTXTIntoDataframe('Example_DETha98.txt', Dir.s)
		EddyDataWithPosix.F <- ds <- fConvertTimeToPosix(dss, 'YDH', Year.s='Year', Day.s='DoY', Hour.s='Hour')
		(res <- usCreateSeasonFactorMonth(ds$DateTime))
		plot.default( res ~ ds$DateTime, type="p")
	}
}



usCreateSeasonFactorMonthWithinYear <- function(
	### calculate factors to denote the season for uStar-Filtering by specifying starting months, with seasons not spanning year boundaries
	dates							##<< POSIXct vector of length of the data set to be filled, specifying the center-time of each record				
	, month=as.POSIXlt(dates)$mon+1   ##<< integer (1-12) vector of length of the data set to be filled, specifying the month for each record
	, year=as.POSIXlt(dates)$year+1900	##<< integer vector of length of the data set to be filled, specifying the year 
	, startMonth=c(3,6,9,12)		##<< integer vector specifying the starting month for each season, counting from one
		## default is (Dez,Jan,Feb)(Mar,April,May)(June,July,August),(Sept,Okt,Nov)
){
	##author<<
	## TW
	##seealso \code{\link{usCreateSeasonFactorMonth}}
  ##details<< 
  ## If Jan is not a starting month, then the first months of each year will be 
  ## part of the last period in the year.
  ## E.g. with the default the fourth period of the first year consists of Jan,Feb,Dec.
  if( length(year) == 1L) year <- rep(year, length(month))
  if( length(month) != length(year) ) stop("Month and Year arguments need to have the same length.")
  if( any(month < 1 | month > 12) ) stop("Month out of range 1..12")
  startMonth <- sort(unique(startMonth))
  boLastPeriod <- month < startMonth[1]
  # translate month before the first specified beginning month to be after last specified month (1 becomes 13)
  month[ boLastPeriod ] <- month[ boLastPeriod] +12
  startMonthAdd <- c(startMonth, startMonth[1]+12)
  seasonFac <- year*1000L + rep(startMonth[1], length(month) )
  # i <- 2
  for( i in 2:length(startMonth) ){
	  bo <- (month >= startMonthAdd[i]) & (month < startMonthAdd[i+1])
	  seasonFac[bo] <- year[bo]*1000L + startMonth[i]
  }
  table(seasonFac)
  #plot.default( as.factor(seasonFac) ~ as.POSIXlt(dates)$mon+1 ); levels(as.factor(seasonFac))
  as.factor(seasonFac) 	
  ##value<<
  ## Integer vector length(dates), with each unique value representing one season
}

.tmp.f <- function(){
	Dir.s <- paste(system.file(package='REddyProc'), 'examples', sep='/')
	EddyData.F <- dss <- fLoadTXTIntoDataframe('Example_DETha98.txt', Dir.s)
	EddyDataWithPosix.F <- ds <- fConvertTimeToPosix(dss, 'YDH', Year.s='Year', Day.s='DoY', Hour.s='Hour')
	table(res <- usCreateSeasonFactorMonthWithinYear(ds$DateTime-1))  #-1 to move last record of newYear to 1998 
}


usCreateSeasonFactorYday <- function(
	### calculate factors to denote the season for uStar-Filtering by specifying starting day of years
	dates							##<< POSIXct vector of length of the data set to be filled, specifying the center-time of each record				
	, yday=as.POSIXlt(dates)$yday+1L  ##<< integer (1-366) vector of length of the data set to be filled, specifying the day of the year (1..366) for each record
	, year=as.POSIXlt(dates)$year+1900L	##<< integer vector of length of the data set to be filled, specifying the year 
	, startYday=c(335,60,152,244)	 ##<< integer vector (1-366) specifying the starting yearDay for each season in increasing order
){
	##author<<
	## TW
	##seealso \code{\link{usCreateSeasonFactorMonth}}
	##details<<
	## With default parameterization, dates are assumed to denote begin or center of the eddy time period.
	## If working with dates that denote the end of the period, use \code{yday=as.POSIXlt(fGetBeginOfEddyPeriod(dates))$yday}
	starts <- data.frame(yday=sort(unique(startYday)), year=rep(sort(unique(year)),each=length(startYday)) )
	usCreateSeasonFactorYdayYear( dates, yday, year, starts)	
	##value<<
	## Integer vector of nrow ds, each unique class representing one season
}

.tmp.f  <- function(){
	Dir.s <- paste(system.file(package='REddyProc'), 'examples', sep='/')
	EddyData.F <- dss <- fLoadTXTIntoDataframe('Example_DETha98.txt', Dir.s)
	EddyDataWithPosix.F <- ds <- fConvertTimeToPosix(dss, 'YDH', Year.s='Year', Day.s='DoY', Hour.s='Hour')
	table(res <- usCreateSeasonFactorYday(ds$DateTime))
	plot.default( res ~ ds$DateTime)
}

usCreateSeasonFactorYdayYear <- function(
		### calculate factors to denote the season for uStar-Filtering by specifying starting day and year of each season
		dates							##<< POSIXct vector of length of the data set to be filled, specifying the center-time of each record				
		, yday=as.POSIXlt(dates)$yday+1L  ##<< integer (1-366) vector of length of the data set to be filled, specifying the day of the year (1..366) for each record
		, year=as.POSIXlt(dates)$year+1900L	##<< integer vector of length of the data set to be filled, specifying the year 
		, starts	 					 ##<< data.frame with first column specifying the starting yday (integer 1-366) and second column the year (integer e.g. 1998) for each season in increasing order
){
	##author<<
	## TW
	##seealso \code{\link{usCreateSeasonFactorMonth}}
	##details<<
	## With default parameterization, dates are assumed to denote begin or center of the eddy time period.
	## If working with dates that denote the end of the period, use \code{yday=as.POSIXlt(fGetBeginOfEddyPeriod(dates))$yday}
	##
	if( length(year) == 1L) year <- rep(year, length(yday))
	if( length(yday) != length(year) ) stop("Month and Year arguments need to have the same length.")
	if( any(yday < 1 | yday > 366) ) stop("yday out of range 1..366")
	#
	colnames(starts) <- c("yday","year")
	if( starts$yday[1] != 1L ) starts <- rbind( data.frame(yday=1L, year=starts$year[1]),starts )
	seasonFac <- integer(length(yday)) # 0L
	starts$startYearDays <- startYearDays <- starts$year*1000L + starts$yday
	yearDays <- year*1000L+yday
	# i <- 1
	for( i in 1:(length(startYearDays)-1) ){
		bo <- (yearDays >= startYearDays[i]) & (yearDays < startYearDays[i+1])
		seasonFac[bo] <- starts$year[i]*1000L + starts$yday[i] 
	}
	# last period with no defined end border
	i <- length(startYearDays)
	bo <- (yearDays >= startYearDays[i])
	seasonFac[bo] <- starts$year[i]*1000L + starts$yday[i]
	#plot( seasonFac ~ dates ); levels(as.factor(seasonFac))
	as.factor(seasonFac) 	
	##value<<
	## Integer vector of nrow ds, each unique class representing one season
}


usGetYearOfSeason <- function(
		### determine the year of the record of middle of seasons  
		seasonFactor.v		##<< factor vector of length data: for each record which season it belongs to 
		, sDateTime.v		##<< POSIX.t vector of length data: for each record: center of half-hour period (corresponding to sDATA$sDateTime) 
){
	##author<<
	## TW
	originCt <- as.POSIXct("1970-01-01 00:00.00 UTC")
	timezone <- attr(sDateTime.v[1],"tzone")
	#dates <- sDateTime.v[seasonFactor.v == seasonFactor.v[1]]
	res <- tapply(sDateTime.v, seasonFactor.v, FUN=function(dates){
				x <- as.numeric(dates)
				xCenter <- x[1] + (x[length(x)] - x[1])/2
				1900L + as.POSIXlt(xCenter, origin=originCt, tz=timezone)$year
			} )
	##value<<  named integer vector, with names corresponding to seasons
	# need to convert 1d array to vector
	structure(as.vector(res), names=rownames(res))
}
.tmp.f <- function(){
	ds <- eddyProc$sDATA
	sDateTime.v <-ds$sDateTime
	seasonFactor.v <- usCreateSeasonFactorMonth(ds$sDateTime)
	usGetYearOfSeason( seasonFactor.v, sDateTime.v)
	usGetYearOfSeason( seasonFactor.v, sDATA$sDateTime)
}




.binUstar <- function(
	### Bin the NEE for a number of classes of UStar classes
	NEE.v				##<< vector with value of Net Ecosystem exchange
	,Ustar.v 			##<< vector with u* (friction velocity (m2/s)
	,UstarClasses=usControlUstarSubsetting()$UstarClasses	##<< the number of binning classes
	,isUStarSorted=FALSE	##<< set to TRUE, if NEE and Ustar are already sorted by increasin Ustar values (performance gain)

){
	##author<<
	## TW
	ds.F <- ds0.F <- data.frame(NEE=NEE.v,Ustar=Ustar.v)
	#within data frame sort values by Ustar
	if( !isTRUE(isUStarSorted)){
		orderUStar <- order(Ustar.v)
		ds.F <- ds.F[orderUStar, ]
	}else{
		orderUStar <- TRUE
	}
	#
	# twutz 1505: changed binning to take care of equal values in uStar column 
	# when assigning uStar classes, only start a new class when uStar value changes
	ds.F$uClass <- .binWithEqualValuesMinRec(ds.F$Ustar, nBin=UstarClasses, tol = 1e-14)
	binAverages <- ddply( ds.F, as.quoted('uClass'), summarise, Ust_avg=mean(substitute(Ustar),na.rm=TRUE), NEE_avg=mean(substitute(NEE), na.rm=TRUE), nRec=length(substitute(NEE)))[,-1]
	uStarBinsUnsorted <- integer(nrow(ds.F)); uStarBinsUnsorted[orderUStar] <- as.integer(ds.F$uClass)
	# plot( uStarBinsUnsorted ~ ds0.F$Ustar )	# for checking correct ordering
	##value<< list with entries
	list(
			binAverages=binAverages		##<< data.frame with columns Ust_avg   NEE_avg nRec with one row for each bin
			,uStarBins=uStarBinsUnsorted	##<< integer vector reporting the bin for each record in Ustar.v  	
			)
}

.binWithEqualValuesBalanced <- function(
	### create a binning factor so that equal values of x end up in the same bin, with shortening following following bins
	x				##<< sorted numeric vector to sort into bins
	,nBin			##<< intended number of bins
	,tol = 1e-8		##<< distance between successive values of x that are treated to be equal
	,isBinSizeFloorUsed=TRUE	##<< set to FALSE to postpone rounding on start and end values
){
	if( nBin == 1L) return(integer(length(x)))
	binSize <- length(x) / nBin
	##details<< 
	## By not taking the floor, a better distribution of samples across bins is achieved.
	## But here keep it due to compatibility to C-Code.
	if( isBinSizeFloorUsed ) binSize <- floor(binSize)
	breaksX <- which(diff(x) > tol)+1
	binId <- rep(1L,length(x))
	iBreak <- 1L	# index from which to seek next break
	#iClass <- 2L
	for( iClass in 2L:as.integer(nBin)){
		start0 <- round((iClass-1)*binSize)+1
		iBreak <- .whichValueGreaterEqual(breaksX, start0, iStart=iBreak)
		start1 <- breaksX[iBreak]
		#start1Slow <- breaksX[breaksX >= start0][1]	# find next uStar change at or after position start0 
		binId[start1:length(x)] <- iClass
	}
	##value<< integer vector of same length as x, with unique value for each bin
	binId
}

.binWithEqualValuesMinRec <- function(
		### createg a binning factor so that equal values of x end up in the same bin, with shifting following bins
		x				##<< sorted numeric vector to sort into bins
		,nBin			##<< intended number of bins
		,tol = 1e-8		##<< distance between successive values of x that are treated to be equal
){
	##author<<
	## TW
	lengthX <- length(x)
	binId <- integer(lengthX)
	binSize <- as.integer(floor(lengthX / nBin))
	iBreaksX <- which(diff(x) > tol)		# positions in x where value is numerically different from following element
	iBreak <- 0L		# start index in iBreaks, to avoid searching the part of samller elements 
	iEnd <- 0L			# index in x, end of the (previous) period
	iBin <- 0L			# bin Id
	while( iEnd < lengthX ){
		iBin <- iBin + 1L
		iStart <- iEnd +1L
		iEnd <- iEnd+binSize		# same as iStart + binsSize-1, with counting from 1 instead of 0 
		# find the next break after iEnd
		iBreak <- .whichValueGreaterEqual(iBreaksX, iEnd, iBreak+1L)
		if( is.na(iBreak) ){
			# no break was found, set period end to vector end and finish
			# if length of last bin is smaller than 90% of intended binsize, sort records to former bin
			if( (lengthX+1L-iStart) < binSize*0.9 && iBin != 1L)
				iBin <- iBin -1L
			binId[iStart:lengthX] <- iBin
			break
		} else {
			iEnd <- iBreaksX[iBreak]	# update iEnd to position with break after it
			binId[iStart:iEnd] <- iBin
		}
	}
	##value<< integer vector of same length as x, with unique value for each bin.
	## Each bin holds at least floor(length(x)/nBin) records, or more if there were values after the bin that were
	## numerically equal to last value of the bin.
	## The actual number of bins might be differnt from argument nBin due to numericall equal values
	## and is reported with attribute \code{nBin}
	## Because of using floor in bin-width calculation, the last, i.e. nbin, of the bins may hold more values.
	#
	# for C-code compatibility do not use more than nBin classes, and increase the size of the last class
	binId[binId > nBin] <- nBin
	attr(binId,"nBin") <- min(iBin,nBin)
	binId
}

.whichValueGreaterEqual <- function(
	### search first element in an integer vector that is larger 
	x			##<< increasingly sorted numeric vector to search 
	, threshold	##<< integer scalar: searched element will need to be greater or equal as this argument 
	, iStart=1L	##<< index in vector to start search
){
	##author<<
	## TW
	# see tests/test_binWithEqualValues.R
	#which(x >= threshold)[1]
	#iStart-1 + which(x[iStart:length(x)] >= threshold)[1]
	# for performance reasons call a c++ function that loops across the vector
	# 
	# cannot generate C function with dot
	# Rcpp::compileAttributes() generates a function without leading dot, need to adjust by hand afterwards 
	.whichValueGreaterEqualC( as.integer(x), as.integer(threshold), as.integer(iStart) )	# defined in cFunc.R
	##value<< 
	## Scalar integer: first index in x, that is >= iStart, and whose value x[i] is >= threshold.
	## If no index was found, returns NA
}

usEstUstarThresholdSingleFw1Binned <- function(
		### estimate the Ustar threshold for single subset, using FW1 algorithm, relying on binned NEE and Ustar
		Ust_bins.f			##<< data.frame with columns NEE_avg and Ust_avg, of Ustar bins 
		,ctrlUstarEst.l = usControlUstarEst() ##<< parameter list, see \code{\link{usControlUstarEst}} for defaults and description
){
	##author<<
	## TW, OM
	##references<< inspired by Papale 2006
	
	# algorithm to check when plateau is reached
	flag <- FALSE
	#for every u* bin compare to avg of subsequent UST_PLATEAU, until found
	u <- 1
	#TODO: change to for loop 1:ustClasses and then break
	# in order to avoid infinite loop in case of error
	# optimize with Thomas?
	
  while (!flag){ #only stop if threshold is found
		if (!flag & (Ust_bins.f$NEE_avg[u] >= (ctrlUstarEst.l$plateauCrit*mean(Ust_bins.f$NEE_avg[(u+1):(u+ctrlUstarEst.l$ustPlateauFwd)],na.rm=T)))){ #na.rm=T to exclude NAs out of bounds..
			#   NEE_i >= .95*avg(i,i+1,...,i+10)  [FW]    
			UstarThSingle <- Ust_bins.f$Ust_avg[u]        
			flag <- TRUE #set flag for threshold found in this mode
		}          
		#case that no threshold could be found by plateau method, use maximum u* in that T_class...
		if (u==(nrow(Ust_bins.f)-1)){ #FW1: -1 ; FW2: 
			UstarThSingle <- Ust_bins.f$Ust_avg[u+1]        
			break;      
		}
		u <- u+1 #increase index by 1
	}  
	return(UstarThSingle)
}

usEstUstarThresholdSingleFw2Binned <- function(
  ### estimate the Ustar threshold for single subset, using FW2 algorithm  	
  Ust_bins.f							##<< data.frame with column s NEE_avg and Ust_avg, of Ustar bins
  ,ctrlUstarEst.l = usControlUstarEst()	##<< parameter list, see \code{\link{usControlUstarEst}} for defaults and description 
){  
	##author<<
	## TW, OM
	# algorithm to check when plateau is reached
  flag <- FALSE
  #for every u* bin compare to avg of subsequent UST_PLATEAU, until found
  u <- 1
  UstarThSingle <- NA_real_
  ##details<< 
  ## Demand that threshold is higher than \code{ctrlUstarEst.l$minNuStarPlateau} records.
  ## If fewer records  
  umax <- nrow(Ust_bins.f)-max(2L,ctrlUstarEst.l$minNuStarPlateau) # FF2 neads at least two bins after threshold
  while (u <= umax){ 
    if (
		(Ust_bins.f$NEE_avg[u] >= (ctrlUstarEst.l$plateauCrit*mean(Ust_bins.f$NEE_avg[(u+1):(u+ctrlUstarEst.l$ustPlateauFwd)],na.rm=T))) & 
		(Ust_bins.f$NEE_avg[u+1] >= (ctrlUstarEst.l$plateauCrit*mean(Ust_bins.f$NEE_avg[(u+1+1):(u+ctrlUstarEst.l$ustPlateauFwd+1)],na.rm=T)))
	){
      UstarThSingle <- Ust_bins.f$Ust_avg[u]        
	  break
    }
	u = u+1L
  }
  #case that no threshold could be found by plateau method, use maximum u* in that T_class...
  # twutz: 1505: implemented option to return NA, to omit from median over bins (C-compatibility)
  if(is.na(UstarThSingle) & !isTRUE(ctrlUstarEst.l$isOmitNoThresholdBins) ) 
	  UstarThSingle <- Ust_bins.f$Ust_avg[u+1]
  return(UstarThSingle)    
}

.tmp.f <- function(){
	plot( Ust_bins.f$NEE_avg ~ Ust_avg, Ust_bins.f)
}

usGetValidUstarIndices <- function(
		### getremove non-finite cases and omit night time data.
		ds						    ##<< data.frame with columns
		,UstarColName = "Ustar"		##<< column name for UStar
		,NEEColName = "NEE"			##<< column name for NEE
		,TempColName = "Tair"		##<< column name for air temperature
		,RgColName = "Rg"			##<< column name for solar radiation for omitting night time data
		,swThr = usControlUstarSubsetting()$swThr	##<< threshold below which data is acknowledged as night time respiration.
){
	##author<<
	## TW
	bo <- 
			is.finite(ds[,NEEColName]) & 
					is.finite(ds[,TempColName]) & 
					is.finite(ds[,UstarColName]) & 
					is.finite(ds[,RgColName]) 
	bo <- bo & ds[,RgColName] < swThr
	##value<< boolean vector with non-finite cases and cases with radiation < swThr set to FALSE.
	bo
}

usGetAnnualSeasonUStarMappingFromDistributionResult <- function(
		### extract mapping season -> uStar columns from Distribution result 
		uStarTh		##<< result of \code{\link{sEstUstarThresholdDistribution}} or \code{\link{sEstUstarThreshold}}$uStarTh
){
	##author<<
	## TW
	dsYear <- uStarTh[uStarTh$aggregationMode=="year", ,drop=FALSE]
	dsYear$season <- NULL
	dsYear$aggregationMode <- NULL
	# deprecated: done in sEstUstarThreshold
#	##details<<
#	## rows with no threshold get the aggregated value, i.e. the median of columns of other years
#	naLines <- which(apply(dsYear[,-(1) ,drop=FALSE],1,function(x){ all(is.na(x))} ))
#	if( length(naLines) ){
#		medianYear <- apply( dsYear[!naLines,-(1),drop=FALSE], 2, median, na.rm=TRUE )
#		dsYear[naLines,-(1)] <- medianYear
#	}
	dsSeasons <- uStarTh[uStarTh$aggregationMode=="season",c("season","seasonYear"),drop=FALSE]
	res2 <- merge(dsSeasons, dsYear )
	res2$seasonYear <- NULL
	# transform column names of "x%" to "Ux" with leading zeros
	colnames(res2)[-(1:2)] <- (gsub(" ","0",sprintf("U%2s",gsub("%","",colnames(res2)[-(1:2)]))))
	res2
}

usGetSeaonalSeasonUStarMappingFromDistributionResult <- 
usGetSeasonalSeasonUStarMappingFromDistributionResult <- function(
		### extract mapping season -> uStar columns from Distribution result (\code{\link{sEstUstarThresholdDistribution}})
		uStarTh		##<< result of \code{\link{sEstUstarThresholdDistribution}} or \code{\link{sEstUstarThreshold}}$uStarTh
){
	##author<<
	## TW
	# omit aggregation model and seasonYear column
	dsSeasons <- uStarTh[uStarTh$aggregationMode=="season", ,drop=FALSE]
	dsSeasons$seasonYear <- NULL
	dsSeasons$aggregationMode <- NULL
	
	# deprecated: already done in uStar estimation
	##details<<
#	## missing thresholds are replaced by corresponding estimates based on annually aggregated estimates
#	## (\code{\link{usGetAnnualSeasonUStarMappingFromDistributionResult}})
#	naLines <- apply(dsSeasons[,-(1),drop=FALSE],1,function(x){ all(is.na(x))} )
#	if( length(naLines) ){
#		dsYears <- usGetAnnualSeasonUStarMappingFromDistributionResult(uStarTh)
#		dsSeasons[naLines,] <- dsYears[naLines,]
#	}
	##value<< a data frame with first column the season, and other columns different uStar threshold estimates
	# transform column names of "x%" to "Ux" with leading zeros
	colnames(dsSeasons)[-(1:2)] <- (gsub(" ","0",sprintf("U%2s",gsub("%","",colnames(dsSeasons)[-(1:2)]))))
	dsSeasons
}


sEddyProc$methods(
		sEstUstarThresholdDistribution = structure(function(
		### Estimating the distribution of u* threshold by bootstrapping over data
		#ds					    ##<< data.frame with columns see \code{\link{sEstUstarThreshold}}
		ctrlUstarEst.l = usControlUstarEst()			##<< control parameters for estimating uStar on a single binned series, see \code{\link{usControlUstarEst}}
		,ctrlUstarSub.l = usControlUstarSubsetting()	##<< control parameters for subsetting time series (number of temperature and Ustar classes \ldots), see \code{\link{usControlUstarSubsetting}} 
		,UstarColName = "Ustar"		##<< column name for UStar
		,NEEColName = "NEE"			##<< column name for NEE
		,TempColName = "Tair"		##<< column name for air temperature
		,RgColName = "Rg"			##<< column name for solar radiation for omitting night time data
		,...						##<< further arguments to \code{\link{sEstUstarThreshold}}
		,seasonFactor.v = usCreateSeasonFactorMonth(sDATA$sDateTime)   ##<< factor of seasons to split (data is resampled only within the seasons)
		,seasonFactorsYear = usGetYearOfSeason(seasonFactor.v, ds$sDateTime)   ##<< named integer vector: for each seasonFactor level, get the year that this season belongs to  
		,nSample = 100L				##<< the number of repetitions in the bootstrap
        ,probs = c(0.05,0.5,0.95)	##<< the quantiles of the bootstrap sample to return. Default is the 5%, median and 95% of the bootstrap
		,verbose.b=TRUE				##<< set to FALSE to omit printing progress
){
	##author<<
	## TW
	##details<< 
		## The choice of the criterion for sufficiently turbulent conditions (u* > choosen threshold)
		## introduces large uncertainties in calculations based on gap-filled Eddy data.
	    ## Hence, it is good practice to compare derived quantities based on gap-filled data using a range of u* threshold estimates.
		  
		##
		## This method explores the probability density of the threshold by repeating its estimation
		## on a bootstrapped sample.
		## By default it returns the 90% confidence interval (arguement \code{probs}). 
		## For larger intervals the sample number need to be increased (arguement \code{probs}). 
		
		##seealso<< \code{\link{sEstUstarThreshold}}, \code{\link{sMDSGapFillAfterUStarDistr}}
		res0 <- suppressMessages(.self$sEstUstarThreshold(
				UstarColName = UstarColName
				,NEEColName = NEEColName
				,TempColName = TempColName
				,RgColName = RgColName				
				,...
				,ctrlUstarEst.l =ctrlUstarEst.l, ctrlUstarSub.l=ctrlUstarSub.l
				, seasonFactor.v=seasonFactor.v	))
		iPosAgg <- which(res0$uStarTh$aggregationMode=="single")
		iPosYears <- which(res0$uStarTh$aggregationMode=="year")
		iPosSeasons <- which(res0$uStarTh$aggregationMode=="season")
		years0 <- res0$uStarTh$year[iPosYears]
		seasons0 <- res0$uStarTh$season[iPosSeasons]
		ds <- sDATA[,c("sDateTime", UstarColName,NEEColName,TempColName,RgColName)]
		colnames(ds) <- c("sDateTime","Ustar","NEE","Tair","Rg")
		ds$seasonFactor.v <- seasonFactor.v
		fWrapper <- function(iSample, ...){
			dsBootWithinSeason <- ds2 <- ddply(ds, .(seasonFactor.v), function(dss) {
						iSample <- sample.int(nrow(dss),replace=TRUE)
						dss[iSample, ,drop=FALSE]
					} )
			if( isTRUE(verbose.b) ) message(".", appendLF = FALSE)
			res <- usEstUstarThreshold(dsBootWithinSeason, ...
							, seasonFactor.v=seasonFactor.v
							, ctrlUstarEst.l =ctrlUstarEst.l, ctrlUstarSub.l=ctrlUstarSub.l	)
			gc()
			# need to check if years and seasons have been calculated differently due to subsetting with
			# too few values within a season
			# then report NA for those cases
			resAgg <- res$uStarTh$uStar[iPosAgg] 
			years <- res$uStarTh$year[iPosYears]
			resYears <- structure( 
					if( all(years == years0) ) res$uStarTh$uStar[iPosYears] else rep(NA_real_, length(years0))
					, names=as.character(years0) )
			resSeasons <- structure( 
					if( nrow(res$uStarTh) == nrow(res0$uStarTh) && all((seasons <- res$uStarTh$season[iPosSeasons]) == seasons0) ) res$uStarTh$uStar[iPosSeasons] else rep(NA_real_, length(seasons0))
					, names=as.character(seasons0) )
			return(c(aggYears=resAgg, resYears, resSeasons ))
			#return(length(res$UstarSeason$uStar))
			#res$UstarAggr
		}
		Ustar.l0 <- res0$uStarTh$uStar[c(iPosAgg, iPosYears, iPosSeasons)]	
		Ustar.l <- suppressMessages(
				Ustar.l <- lapply(1:(nSample-1), fWrapper,...)
		)
		if( isTRUE(verbose.b) ) message("")	# line break
		stat <- do.call( rbind, c(list(Ustar.l0),Ustar.l))
		##details<< \describe{\item{Quality Assurance}{
		## If more than \code{ctrlUstarEst.l$minValidBootProp} (default 40%) did not report a treshold, 
		## no quantiles (i.e. NA) are reported.
		## }}		
		resQuantiles <-	t(apply( stat, 2, quantile, probs=probs, na.rm=TRUE ))
		iInvalid <- colSums(is.finite(stat))/nrow(stat) < ctrlUstarEst.l$minValidBootProp 
		resQuantiles[iInvalid,] <- NA_real_
		resDf <- cbind(res0$uStarTh, resQuantiles)
		message(paste("Estimated UStar distribution of:\n", paste(capture.output(resDf[resDf$aggregationMode=="single",-(1:3)]),collapse="\n")
						,"\nby using ",nSample,"bootstrap samples and controls:\n", paste(capture.output(unlist(ctrlUstarSub.l)),collapse="\n")
				))
		resDf
		##value<< 
		## A data.frame with columns aggregationMode, year, and UStar estimate based on the unresampled data. 
		## The other columns correpond to the quantiles of Ustar estimate
		## for given probabilities (argument \code{probs}) based on the distribution of estimates using resampled the data.
}))

.tmp.f <-  function(){
		# load the data and generate DateTime column
		Dir.s <- paste(system.file(package='REddyProc'), 'examples', sep='/')
		EddyData.F <- ds <- fLoadTXTIntoDataframe('Example_DETha98.txt', Dir.s)
		EddyDataWithPosix.F <- ds <- fConvertTimeToPosix(EddyData.F, 'YDH', Year.s='Year', Day.s='DoY', Hour.s='Hour')
		EddyProc.C <- sEddyProc$new('DE-Tha', EddyDataWithPosix.F, c('NEE','Rg','Tair','VPD','Ustar'))
		(res <- EddyProc.C$sEstUstarThresholdDistribution(nSample=10))	# for real applications use larger sample size
		usGetAnnualSeasonUStarMappingFromDistributionResult(res)
}

Try the REddyProc package in your browser

Any scripts or data that you put into this service are public.

REddyProc documentation built on May 2, 2019, 5:19 p.m.