R/04a_ms_flat_1tail.R
In fitdistcp: Distribution Fitting with Calibrating Priors for Commonly Used Distributions

Documented in ms_flat_1tail

#' Illustration of Model Selection Among 10 One Tail Distributions from the \code{fitdistcp} Package
#'
#' @description
#' Applies model selection using AIC, WAIC1, WAIC2 and leave-one-out logscore
#' to the input data \eqn{x},
#' for 10 one tailed models in the \code{fitdistcp} package
#' (although for the GPD, the logscore is NA for mathematical reasons).
#'
#' The code is straightforward, and the point is to illustrate what is
#' possible using the model selection outputs from the \code{fitdistcp} routines.
#'
#' The input data may be automatically shifted so that the minimum value is positive.
#'
#' For the Pareto, the data may be further shifted so that the minimum value is slightly greater than 1.
#'
#' @param x 			data vector
#'
#' @details
#' The 10 models are:
#' \code{exp},
#' \code{pareto_k2},
#' \code{halfnorm},
#' \code{lnorm},
#' \code{frechet_k1},
#' \code{weibull},
#' \code{gamma},
#' \code{invgamma},
#' \code{invgauss} and
#' \code{gpd_k1}.

#' @return
#' Plots QQ plots to the screen, for each of the models,
#' and returns a data frame containing
#' \itemize{
#'  \item MLE parameter values
#'	\item AIC scores (times -0.5), AIC weights
#'	\item WAIC1 scores, WAIC1 weights
#'	\item WAIC2 scores, WAIC2 weights
#'	\item logscores, logscore weights
#'	\item maximum likelihood and calibrating prior means
#'	\item maximum likelihood and calibrating prior standard deviations
#' }
#'
#' @author
#' Stephen Jewson \email{stephen.jewson@gmail.com}
#'
#' @example man/examples/example_03_ms_flat_1tail.R
#'
#' @export
#'
ms_flat_1tail=function(x){
#
xx=x
#
# 1 set up labels for the graph
#
models=c(	"exp","pareto_k2","halfnorm","lnorm","frechet_k1",
						"weibull","gamma","invgamma","invgauss","gpd_k1")
nmodels=length(models)
#
# 2 shift the data if necessary
# -since all models require x>0, shift the data if there are negative or zero values
# -for the Pareto, shift the data so that x>1
# -and print a message to the screen to warn that the data has been shifted
#
dd1=0
if(min(xx)<0){
	dd1=0-min(xx)+0.0001
	message("Message from inside modelselection_flat_1tail:")
	message(" I'm increasing the data to eliminate negative or zero values")
	message("  adjustment=",dd1)
	message("  before adjustment:min(xx)     =",min(xx))
	message("  after  adjustment:min(xx+dd1) =",min(xx+dd1))
}
dd2=0
if(min(xx)<1){
	dd2=1-min(xx)+0.0001
	message("Message from inside modelselection_flat_1tail:")
	message(" For the Pareto only, I increase the input data so that it's all >1")
	message(" I fit the model, and then adjust back")
	message("  adjustment=",dd2)
	message("  before Pareto adjustment:min(xx)     =",min(xx))
	message("  after  Pareto adjustment:min(xx) =",min(xx+dd2))
}
#
# 3 fit the quantiles using q***
#
nx=length(xx)
pp1=(c(1:nx)-0.5)/nx
qq1	=qexp_cp				(xx+dd1,	pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq2	=qpareto_k2_cp	(xx+dd2,	pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq3	=qhalfnorm_cp		(xx+dd1,	pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq4	=qlnorm_cp			(xx+dd1,	pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq5	=qfrechet_k1_cp	(xx+dd1,	pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq6	=qweibull_cp		(xx+dd1,	pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq7	=qgamma_cp			(xx,			pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq8	=qinvgamma_cp		(xx,			pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq9	=qinvgauss_cp		(xx,			pp1,waicscores=TRUE,logscores=TRUE,means=TRUE)
qq10=qgpd_k1_cp			(xx,			pp1,waicscores=TRUE,means=TRUE,kloc=0)
#
# 4 simulate randoms to calculate sd using r***
#
nran=100000
rr1 =rexp_cp				(nran,xx+dd1)
rr2 =rpareto_k2_cp	(nran,xx+dd2)
rr3 =rhalfnorm_cp		(nran,xx+dd1)
rr4 =rlnorm_cp			(nran,xx+dd1)
rr5 =rfrechet_k1_cp	(nran,xx+dd1)
rr6 =rweibull_cp		(nran,xx+dd1)
rr7 =rgamma_cp			(nran,xx)
rr8 =rinvgamma_cp		(nran,xx)
rr9 =rinvgauss_cp		(nran,xx)
rr10=rgpd_k1_cp			(nran,xx)
#
# 5 make qq plots
# -horiz axis is the data we are trying to model
# -vertical axis in black are quantiles from fitting using maxlik
# -vertical axis in red are quantiles from fitting with parameter uncertainty
# -vertical axis is based on so-called 'Hazen plotting position'
# https://glossary.ametsoc.org/wiki/Plotting_position
#
oldpar=par(no.readonly = TRUE)
on.exit(par(oldpar))
par(mfrow=c(4,3))
sxx=sort(xx)
#
# 6 define the plotting function
#
plot1=function(x,y1,y2,n){
	ymin=min(y1,y2)
	ymax=max(y1,y2)
	plot(x,y2,main=models[n],
		ylim=c(ymin,ymax),
		xlab="data",ylab="model",
		col="black")
	points(x,y1,col="red")
	points(x,x,"l")
}
#
# 7 plot the qq plots using the fitted quantiles
#
plot1(sxx,qq1	$cp_quantiles-dd1,	qq1	$ml_quantiles-dd1		,1)
plot1(sxx,qq2	$cp_quantiles-dd2,	qq2	$ml_quantiles-dd2		,2)
plot1(sxx,qq3	$cp_quantiles-dd1,	qq3	$ml_quantiles-dd1		,3)
plot1(sxx,qq4	$cp_quantiles,			qq4	$ml_quantiles				,4)
plot1(sxx,qq5	$cp_quantiles-dd1,	qq5	$ml_quantiles-dd1		,5)
plot1(sxx,qq5	$cp_quantiles-dd1,	qq6	$ml_quantiles-dd1		,6)
plot1(sxx,qq6	$cp_quantiles,			qq7	$ml_quantiles				,7)
plot1(sxx,qq8	$cp_quantiles,			qq8	$ml_quantiles				,8)
plot1(sxx,qq9	$cp_quantiles,			qq9	$ml_quantiles				,9)
plot1(sxx,qq10$cp_quantiles,			qq10$ml_quantiles				,10)
#
# 8 put mle parameters into a data frame
#
mle=matrix(NA,nmodels,2)
mle[1,1:1]	=qq1	$ml_params
mle[2,1:1]	=qq2	$ml_params
mle[3,1:1]	=qq3	$ml_params
mle[4,1:2]	=qq4	$ml_params
mle[5,1:2]	=qq5	$ml_params
mle[6,1:2]	=qq6	$ml_params
mle[7,1:2]	=qq7	$ml_params
mle[8,1:2]	=qq8	$ml_params
mle[9,1:2]	=qq9	$ml_params
mle[10,1:2]	=qq10	$ml_params
mleparams_df=data.frame(models,mle)
colnames(mleparams_df)=c("models","param 1","param 2")
#
# 10 extract maic, waic and logscores from the qresults
#
maic=matrix(0,nmodels)
maic[ 1]=qq1	$maic[3]
maic[ 2]=qq2	$maic[3]
maic[ 3]=qq3	$maic[3]
maic[ 4]=qq4	$maic[3]
maic[ 5]=qq5	$maic[3]
maic[ 6]=qq6	$maic[3]
maic[ 7]=qq7	$maic[3]
maic[ 8]=qq8	$maic[3]
maic[ 9]=qq9	$maic[3]
maic[10]=qq10	$maic[3]
#
waic1=matrix(0,nmodels)
waic1[ 1]=qq1	$waic1[3]
waic1[ 2]=qq2	$waic1[3]
waic1[ 3]=qq3	$waic1[3]
waic1[ 4]=qq4	$waic1[3]
waic1[ 5]=qq5	$waic1[3]
waic1[ 6]=qq6	$waic1[3]
waic1[ 7]=qq7	$waic1[3]
waic1[ 8]=qq8	$waic1[3]
waic1[ 9]=qq9	$waic1[3]
waic1[10]=qq10$waic1[3]
#
waic2=matrix(0,nmodels)
waic2[ 1]=qq1	$waic2[3]
waic2[ 2]=qq2	$waic2[3]
waic2[ 3]=qq3	$waic2[3]
waic2[ 4]=qq4	$waic2[3]
waic2[ 5]=qq5	$waic2[3]
waic2[ 6]=qq6	$waic2[3]
waic2[ 7]=qq7	$waic2[3]
waic2[ 8]=qq8	$waic2[3]
waic2[ 9]=qq9	$waic2[3]
waic2[10]=qq10$waic2[3]
#
lsc=matrix(0,nmodels)
lsc[ 1]=qq1$cp_oos_logscore
lsc[ 2]=qq2$cp_oos_logscore
lsc[ 3]=qq3$cp_oos_logscore
lsc[ 4]=qq4$cp_oos_logscore
lsc[ 5]=qq5$cp_oos_logscore
lsc[ 6]=qq6$cp_oos_logscore
lsc[ 7]=qq7$cp_oos_logscore
lsc[ 8]=qq8$cp_oos_logscore
lsc[ 9]=qq9$cp_oos_logscore
lsc[10]=NA
#
# 11 calculate 3 sets of model weights (maic, waic1, waic2)
#
lscd=	exp(0.5*(max(lsc,na.rm=TRUE)+lsc))
maicd=exp(0.5*(max(maic)+maic))
waic1d=exp(0.5*(max(waic1)+waic1))
waic2d=exp(0.5*(max(waic2)+waic2))
logscore_weights=	round(100*lscd	/sum(lscd,na.rm=TRUE)	,digits=1)
maic_weights=			round(100*maicd	/sum(maicd),digits=1)
waic1_weights=		round(100*waic1d	/sum(waic1d),digits=1)
waic2_weights=		round(100*waic2d	/sum(waic2d),digits=1)
#
# 12 print weights table to screen
#
maic=maic
modelselection_df=data.frame(models,maic,maic_weights,lsc,logscore_weights,waic1,waic1_weights,waic2,waic2_weights)
colnames(modelselection_df)=c(	"models",
								"maic","weights",
								"logscore","weights",
								"waic1","weights",
								"waic2","weights")
#
# 13 extract means from the q results
# -except for lnorm cp mean, which has to be calculated from randoms
#
ml_means=matrix(0,nmodels)
ml_means[ 1]=qq1	$ml_mean
ml_means[ 2]=qq2	$ml_mean
ml_means[ 3]=qq3	$ml_mean
ml_means[ 4]=qq4	$ml_mean
ml_means[ 5]=qq5	$ml_mean
ml_means[ 6]=qq6	$ml_mean
ml_means[ 7]=qq7	$ml_mean
ml_means[ 8]=qq8	$ml_mean
ml_means[ 9]=qq9	$ml_mean
ml_means[10]=qq10	$ml_mean
#
cp_means=matrix(0,nmodels)
cp_means[ 1]=qq1	$cp_mean
cp_means[ 2]=qq2	$cp_mean
cp_means[ 3]=qq3	$cp_mean
cp_means[ 4]=mean(rr4	$cp_deviates) #for lnorm, have to calculate from randoms
cp_means[ 5]=qq5	$cp_mean
cp_means[ 6]=qq6	$cp_mean
cp_means[ 7]=qq7	$cp_mean
cp_means[ 8]=qq8  $cp_mean
cp_means[ 9]=qq9	$cp_mean
cp_means[10]=qq10	$cp_mean

# calculate sds from the random deviates
# -and set to inf if we know they are inf

ml_sds=matrix(0,nmodels)
ml_sds[ 1]=sd(rr1	$ml_deviates)
ml_sds[ 2]=ifelse(mle[2,1]>1,sd(rr2$ml_deviates),Inf)
ml_sds[ 3]=sd(rr3	$ml_deviates)
ml_sds[ 4]=sd(rr4	$ml_deviates)
ml_sds[ 5]=sd(rr5	$ml_deviates)
ml_sds[ 6]=sd(rr6	$ml_deviates)
ml_sds[ 7]=sd(rr7	$ml_deviates)
ml_sds[ 8]=sd(rr8	$ml_deviates)
ml_sds[ 9]=sd(rr9	$ml_deviates)
ml_sds[10]=sd(rr10$ml_deviates)

cp_sds=matrix(0,nmodels)
cp_sds[ 1]=sd(rr1	$cp_deviates)
cp_sds[ 2]=Inf
cp_sds[ 3]=sd(rr3	$cp_deviates)
cp_sds[ 4]=sd(rr4	$cp_deviates)
cp_sds[ 5]=Inf
cp_sds[ 6]=sd(rr6	$cp_deviates)
cp_sds[ 7]=sd(rr7	$cp_deviates)
cp_sds[ 8]=Inf
cp_sds[ 9]=sd(rr9	$cp_deviates)
cp_sds[10]=Inf
#
# 14 make data frames to return the results for means and sds
#
means_df=data.frame(models,ml_means,cp_means,ml_sds,cp_sds)
colnames(means_df)=c(	"models","ml_means","cp_means","ml_sds","cp_sds")
#
# 15 return model selection results, means and sds
#
return(list(mleparams_df=mleparams_df,
	modelselection_df=modelselection_df,
	means_df=means_df))

}

Any scripts or data that you put into this service are public.

fitdistcp documentation built on June 8, 2025, 1:04 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

fitdistcp
Distribution Fitting with Calibrating Priors for Commonly Used Distributions

R/04a_ms_flat_1tail.R
In fitdistcp: Distribution Fitting with Calibrating Priors for Commonly Used Distributions

Defines functions ms_flat_1tail

Documented in ms_flat_1tail

Try the fitdistcp package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fitdistcp Distribution Fitting with Calibrating Priors for Commonly Used Distributions

R/04a_ms_flat_1tail.R In fitdistcp: Distribution Fitting with Calibrating Priors for Commonly Used Distributions

Defines functions ms_flat_1tail

Documented in ms_flat_1tail

Try the fitdistcp package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fitdistcp
Distribution Fitting with Calibrating Priors for Commonly Used Distributions

R/04a_ms_flat_1tail.R
In fitdistcp: Distribution Fitting with Calibrating Priors for Commonly Used Distributions