knitr::opts_chunk$set(echo = TRUE) library(fitODBOD) library(ggplot2) library(reshape2) library(grid) library(gridExtra) library(bbmle) library(tibble) library(ggthemes)
IT WOULD BE CLEARLY BENEFICIAL FOR YOU BY USING THE RMD FILES IN THE GITHUB DIRECTORY FOR FURTHER EXPLANATION OR UNDERSTANDING OF THE R CODE FOR THE RESULTS OBTAINED IN THE VIGNETTES.
Fitting Alternate Binomial or Binomial Mixture distribution is the most crucial part of while handling Binomial Outcome Data. Finding the most suitable distribution which is similar to the given data is very important and essential. In order to compare distributions after fitted we can choose several measurements. They are namely
The functions given below are used to fit respective distributions when BOD and estimated parameters are given.
fitBin
- fitting the Binomial distribution.fitTriBin
- fitting the Triangular Binomial distribution.fitBetaBin
- fitting the Beta-Binomial distribution.fitKumBin
- fitting the Kumaraswamy Binomial distribution.fitGHGBB
- fitting the Gaussian Hyper-geometric Generalized Beta-Binomial distribution.fitMcGBB
- fitting the McDonald Generalized Beta-Binomial distribution.fitGammaBin
- fitting Gamma Binomial distribution.fitGrassiaIIBin
- fitting Grassia II Binomial distribution.fitAddBin
- fitting the Additive Binomial distribution.fitBetaCorrBin
- fitting the Beta Correlated Binomial distribution.fitCOMPBin
- fitting the COM Poisson Binomial distribution.fitCorrBin
- fitting the Correlated Binomial distribution.fitMultiBin
- fitting the Multiplicative Binomial distribution.fitLMBin
- fitting the Lovinson Multiplicative Binomial distribution.All six Alternate Binomial distributions will be fitted to the Alcohol data week 2 and their expected frequencies will be plotted with the actual frequency values. This is plot can be used to identify which distribution suits best for the Alcohol data week 2.
Alcohol_data BinRanVar <- Alcohol_data$Days ActFreq <- Alcohol_data$week2 # Fitting Binomial Distribution BinFreq <- fitBin(BinRanVar,ActFreq) # printing the results of fitting Binomial distribution print(BinFreq)
# Estimating and fitting Additive Binomial distribution Para_AddBin <- EstMLEAddBin(BinRanVar,ActFreq) # printing the coefficients and using them coef(Para_AddBin) AddBin_p <- Para_AddBin$p AddBin_alpha <- Para_AddBin$alpha # Fitting Additive Binomial Distribution AddBinFreq <- fitAddBin(BinRanVar, ActFreq, AddBin_p, AddBin_alpha) # printing the results of fitting Additive Binomial Distribution print(AddBinFreq)
# Estimating and fitting Beta Correlated Binoial Distribution Para_BetaCorrBin <- EstMLEBetaCorrBin(x=BinRanVar, freq=ActFreq, cov=0.001,a=10,b=10) # printing the coefficients and using them coef(Para_BetaCorrBin) BetaCorrBin_cov <- coef(Para_BetaCorrBin)[1] BetaCorrBin_a <- coef(Para_BetaCorrBin)[2] BetaCorrBin_b <- coef(Para_BetaCorrBin)[3] # Fitting Beta-Correlated Binomial Distribution BetaCorrBinFreq <- fitBetaCorrBin(BinRanVar,ActFreq,BetaCorrBin_cov, BetaCorrBin_a,BetaCorrBin_b) # printing the results of fitting Beta-Correlated Binomial Distribution print(BetaCorrBinFreq)
# Estimating and fitting COM Poisson Binomial Distribution Para_COMPBin <- EstMLECOMPBin(x=BinRanVar, freq=ActFreq, v=12.1,p=0.9) # printing the coefficients and using them coef(Para_COMPBin) COMPBin_p <- coef(Para_COMPBin)[1] COMPBin_v <- coef(Para_COMPBin)[2] # Fitting COM-Poisson Binomial Distribution COMPBinFreq <- fitCOMPBin(BinRanVar, ActFreq,COMPBin_p,COMPBin_v) # printing the results of fitting COM-Poisson Binomial Distribution print(COMPBinFreq)
# Estimating and fitting Correlated Binomial Distribution Para_CorrBin <- EstMLECorrBin(x=BinRanVar, freq=ActFreq, cov=0.0021,p=0.19) # printing the coefficients and using them coef(Para_CorrBin) CorrBin_p <- coef(Para_CorrBin)[1] CorrBin_cov <- coef(Para_CorrBin)[2] # Fitting Correlated Binomial Distribution CorrBinFreq <- fitCorrBin(BinRanVar, ActFreq,CorrBin_p,CorrBin_cov) # printing the results of fitting Correlated Binomial Distribution print(CorrBinFreq)
# Estimating and fitting Multiplicative Binomial Distribution Para_MultiBin <- EstMLEMultiBin(x=BinRanVar, freq=ActFreq, theta=21,p=0.19) # printing the coefficients and using them coef(Para_MultiBin) MultiBin_p <- coef(Para_MultiBin)[1] MultiBin_theta <- coef(Para_MultiBin)[2] # Fitting Multiplicative Binomial Distribution MultiBinFreq <- fitMultiBin(BinRanVar, ActFreq,MultiBin_p,MultiBin_theta) # printing the results of fitting Multiplicative Binomial Distribution print(MultiBinFreq)
# Estimating and fitting Lovinson Multiplicative Binomial Distribution Para_LMBin <- EstMLELMBin(x=BinRanVar, freq=ActFreq, phi=21,p=0.19) # printing the coefficients and using them coef(Para_LMBin) LMBin_p <- coef(Para_LMBin)[1] LMBin_phi <- coef(Para_LMBin)[2] # Fitting Lovinson Multiplicative Binomial Distribution LMBinFreq <- fitLMBin(BinRanVar, ActFreq,LMBin_p,LMBin_phi) # printing the results of fitting Multiplicative Binomial Distribution print(LMBinFreq)
# creating dataset for plotting ABD_Data <- tibble( w=BinRanVar, x=ActFreq, y=fitted(BinFreq), z=fitted(AddBinFreq), a=fitted(BetaCorrBinFreq), b=fitted(COMPBinFreq), c=fitted(CorrBinFreq), d=fitted(MultiBinFreq), e=fitted(LMBinFreq) ) names(ABD_Data) <- c("Bin_RV","Actual_Freq","EstFreq_BinD","EstFreq_AddBinD","EstFreq_BetaCorrBinD","EstFreq_COMPBinD", "EstFreq_CorrBinD","EstFreq_MultiBinD","EstFreq_LMBinD") ABD_freq <- ggplot(ABD_Data)+theme_get()+ geom_point(aes(x=Bin_RV,y=Actual_Freq),color="red",size=2)+ geom_line(aes(x=Bin_RV,y=Actual_Freq),color="red",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_BinD),color="darkblue",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_BinD),color="darkblue",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_AddBinD),color="forestgreen",size=3.25)+ geom_line(aes(x=Bin_RV,y=EstFreq_AddBinD),color="forestgreen",size=2.25)+ geom_point(aes(x=Bin_RV,y=EstFreq_BetaCorrBinD),color="yellow4",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_BetaCorrBinD),color="yellow4",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_COMPBinD),color="purple",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_COMPBinD),color="purple",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_CorrBinD),color="orangered",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_CorrBinD),color="orangered",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_MultiBinD),color="deeppink",size=3.25)+ geom_line(aes(x=Bin_RV,y=EstFreq_MultiBinD),color="deeppink",size=2.25)+ geom_point(aes(x=Bin_RV,y=EstFreq_LMBinD),color="#008080", size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_LMBinD),color="#008080",size=1)+ ggtitle("Plot of fitted and actual frequencies of Alcohol BOD week2")+ xlab("Binomial Random Variables from 0 to 7")+ ylab("Frequency Values Estimated and Actual")+ theme_clean()+ scale_y_continuous(breaks=seq(0,120,5))+ scale_x_continuous(breaks=seq(0,7,1))+ annotate("text",x=0,y=120,label="Actual",color="red")+ annotate("text",x=0,y=116,label="Binomial",color="darkblue")+ annotate("text",x=0.35,y=112,label="Additive-Binomial",color="forestgreen")+ annotate("text",x=0.55,y=108,label="Beta Correlated Binomial",color="yellow4")+ annotate("text",x=6.45,y=120,label="COM Poisson Binomial",color="purple")+ annotate("text",x=6.45,y=116,label="Correlated Binomial",color="orangered")+ annotate("text",x=6.45,y=112,label="Multiplicative Binomial",color="deeppink")+ annotate("text",x=6.15,y=108,label="Lovinson Multiplicative Binomial",color="#008080") plotly::plotly_build(ABD_freq)
It is clearly visible that Additive and Correlated Binomial distributions behave very similarly, both generate same frequency values. Although they are close to actual frequencies. Comparing Binomial distribution generated expected frequencies with actual frequencies will lead to see that there is very much difference and Binomial distribution does not suit this Alcohol data of week 2. Multiplicative and COM Poisson Binomial distributions show close values to actual frequencies. Multiplicative and Lovinson Multiplicative distributions are behaving similarly as a pair.
Finally, the only distribution left is Beta Correlated Binomial distribution which shows more closeness to actual frequencies. Therefore it is clear that most suitable distribution for alcohol data week 2 is Beta Correlated Binomial distribution, second choice is Multiplicative and COM Poisson Binomial distributions and final choice is Correlated and Additive Binomial distributions.
In the eight BMD distributions except Uniform Binomial distribution others can be used for fitting the Alcohol data week 2. Here also as above a plot was generated to compare estimated frequencies with actual frequency.
# Estimating and fitting Triangular Binomial distribution Para_TriBin <- EstMLETriBin(BinRanVar,ActFreq) # printing the coefficients and using them coef(Para_TriBin) TriBin_c <- Para_TriBin$mode # Fitting Triangular Binomial Distribution TriBinFreq <- fitTriBin(BinRanVar, ActFreq, TriBin_c) # printing the results of fitting Triangular Binomial Distribution print(TriBinFreq)
# Estimating and fitting Beta Correlated Binoial Distribution Para_BetaBin <- EstMLEBetaBin(x=BinRanVar, freq=ActFreq, a=10,b=10) # printing the coefficients and using them coef(Para_BetaBin) BetaBin_a <- coef(Para_BetaBin)[1] BetaBin_b <- coef(Para_BetaBin)[2] # Fitting Beta-Binomial Distribution BetaBinFreq <- fitBetaBin(BinRanVar, ActFreq,BetaBin_a,BetaBin_b) # printing the results of fitting Beta-Binomial Distribution print(BetaBinFreq)
# Estimating and fitting Kumaraswamy Binomial Distribution Para_KumBin <- EstMLEKumBin(x=BinRanVar, freq=ActFreq, a=12.1,b=0.9,it=10000) # printing the coefficients and using them coef(Para_KumBin) KumBin_a <- coef(Para_KumBin)[1] KumBin_b <- coef(Para_KumBin)[2] KumBin_it <- coef(Para_KumBin)[3] # Fitting Kumaraswamy Binomial Distribution KumBinFreq <- fitKumBin(BinRanVar, ActFreq,KumBin_a,KumBin_b,KumBin_it*10) # printing the results of fitting Kumaraswamy Binomial Distribution print(KumBinFreq)
# Estimating and fitting GHGBB Distribution Para_GHGBB <- EstMLEGHGBB(x=BinRanVar, freq=ActFreq, a=0.0021,b=0.19,c=0.3) # printing the coefficients and using them coef(Para_GHGBB) GHGBB_a <- coef(Para_GHGBB)[1] GHGBB_b <- coef(Para_GHGBB)[2] GHGBB_c <- coef(Para_GHGBB)[3] # Fitting GHGBB Distribution GHGBBFreq <- fitGHGBB(BinRanVar, ActFreq,GHGBB_a,GHGBB_b,GHGBB_c) # printing the results of fitting GHGBB Distribution print(GHGBBFreq)
# Estimating and fitting McGBB Distribution Para_McGBB <- EstMLEMcGBB(x=BinRanVar, freq=ActFreq, a=21,b=0.19,c=0.1) # printing the coefficients and using them coef(Para_McGBB) McGBB_a <- coef(Para_McGBB)[1] McGBB_b <- coef(Para_McGBB)[2] McGBB_c <- coef(Para_McGBB)[3] # Fitting McGBB Distribution McGBBFreq <- fitMcGBB(BinRanVar, ActFreq,McGBB_a,McGBB_b,McGBB_c) # printing the results of fitting McGBB Distribution print(McGBBFreq)
# Estimating and fitting Gamma Binoial Distribution Para_GammaBin <- EstMLEGammaBin(x=BinRanVar, freq=ActFreq, c=10,l=10) # printing the coefficients and using them coef(Para_GammaBin) GammaBin_c <- coef(Para_GammaBin)[1] GammaBin_l <- coef(Para_GammaBin)[2] # Fitting Gamma Binomial Distribution GammaBinFreq <- fitGammaBin(BinRanVar, ActFreq,GammaBin_c,GammaBin_l) # printing the results of fitting Beta-Binomial Distribution print(GammaBinFreq)
# Estimating and fitting Grassia II Binoial Distribution Para_GrassiaIIBin <- EstMLEGrassiaIIBin(x=BinRanVar, freq=ActFreq, a=10,b=10) # printing the coefficients and using them coef(Para_GrassiaIIBin) GrassiaIIBin_a <- coef(Para_GrassiaIIBin)[1] GrassiaIIBin_b <- coef(Para_GrassiaIIBin)[2] # Fitting Grassia II Binomial Distribution GrassiaIIBinFreq <- fitGrassiaIIBin(BinRanVar, ActFreq,GrassiaIIBin_a,GrassiaIIBin_b) # printing the results of fitting Grassia II Binomial Distribution print(GrassiaIIBinFreq)
BMD_Data <- tibble( w=BinRanVar, x=ActFreq, y=fitted(BinFreq), z=fitted(TriBinFreq), a=fitted(BetaBinFreq), b=fitted(KumBinFreq), c=fitted(GHGBBFreq), d=fitted(McGBBFreq), e=fitted(GammaBinFreq), f=fitted(GrassiaIIBinFreq) ) names(BMD_Data) <- c("Bin_RV","Actual_Freq","EstFreq_BinD","EstFreq_TriBinD","EstFreq_BetaBinD","EstFreq_KumBinD", "EstFreq_GHGBBD","EstFreq_McGBBD","EstFreq_GammaBinD","EstFreq_GrassiaIIBinD") BMD_freq <- ggplot(BMD_Data)+theme_get()+ geom_point(aes(x=Bin_RV,y=Actual_Freq),color="red",size=2)+ geom_line(aes(x=Bin_RV,y=Actual_Freq),color="red",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_BinD),color="darkblue",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_BinD),color="darkblue",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_TriBinD),color="forestgreen",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_TriBinD),color="forestgreen",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_BetaBinD),color="yellow4",size=3.75)+ geom_line(aes(x=Bin_RV,y=EstFreq_BetaBinD),color="yellow4",size=2.75)+ geom_point(aes(x=Bin_RV,y=EstFreq_KumBinD),color="purple",size=3)+ geom_line(aes(x=Bin_RV,y=EstFreq_KumBinD),color="purple",size=2.25)+ geom_point(aes(x=Bin_RV,y=EstFreq_GHGBBD),color="orangered",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_GHGBBD),color="orangered",size=1)+ geom_point(aes(x=Bin_RV,y=EstFreq_McGBBD),color="deeppink",size=2.5)+ geom_line(aes(x=Bin_RV,y=EstFreq_McGBBD),color="deeppink",size=1.5)+ geom_point(aes(x=Bin_RV,y=EstFreq_GammaBinD),color="#7DBDBD",size=2)+ geom_line(aes(x=Bin_RV,y=EstFreq_GammaBinD),color="#7DBDBD",size=1.15)+ geom_point(aes(x=Bin_RV,y=EstFreq_GrassiaIIBinD),color="#FC92A3",size=1)+ geom_line(aes(x=Bin_RV,y=EstFreq_GrassiaIIBinD),color="#FC92A3",size=0.75)+ ggtitle("Plot of fitted and actual frequencies of Alcohol BOD week2")+ xlab("Binomial Random Variables from 0 to 7")+ ylab("Frequency Values Estimated and Actual")+ theme_clean()+ scale_y_continuous(breaks=seq(0,120,5))+ scale_x_continuous(breaks=seq(0,7,1))+ annotate("text",x=0,y=120,label="Actual",color="red")+ annotate("text",x=0,y=116,label="Binomial",color="darkblue")+ annotate("text",x=0.35,y=112,label="Triangular Binomial",color="forestgreen")+ annotate("text",x=0.25,y=108,label="Beta-Binomial",color="yellow4")+ annotate("text",x=6.5,y=120,label="Kumaraswamy Binomial",color="purple")+ annotate("text",x=6.95,y=116,label="GHGBB",color="orangered")+ annotate("text",x=6.95,y=112,label="McGBB",color="deeppink")+ annotate("text",x=6.5,y=108,label="Gamma Binomial",color="#7DBDBD")+ annotate("text",x=6.5,y=104,label="Grassia II Binomial",color="#FC92A3") plotly::plotly_build(BMD_freq)
Estimated frequencies from Triangular distribution are far more better than estimated frequencies from Binomial distribution. It is clear that Beta-Binomial, Kumaraswamy Binomial, Gamma Binomial and Grassia II Binomial distributions behave identically for the alcohol data of week 2. Also McDonald Generalized Beta-Binomial distribution too behave equally for the alcohol data of week 2.
Finally, Gaussian Hyper-geometric Generalized Beta-Binomial distribution is best suited and generates more accurate frequencies. Therefore, first choice is GHGBB distribution and second choice would be McGBB distribution.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.