#' Alcohol data
#'
#' Lemmens , Knibbe and Tan(1988) described a study of self reported alcohol frequencies.
#' The no of alcohol consumption data in two reference weeks is separately self reported
#' by a randomly selected sample of 399 respondents in the Netherlands in 1983.
#' Number of days a given individual consumes alcohol out of 7 days a week can be treated as a binomial
#' variable.
#' The collection of all such variables from all respondents would be defined as "Binomial Outcome Data".
#'
#' @format A data frame with 3 columns and 8 rows.
#' \describe{
#' \item{\code{Days}}{No of Days Drunk}
#' \item{\code{week1}}{Observed frequencies for week1}
#' \item{\code{week2}}{Observed frequencies for week2}
#' }
#'
#' @examples
#' Alcohol_data$Days # extracting the binomial random variables
#' sum(Alcohol_data$week2) # summing all the frequencies in week2
#'
#' @source
#' Extracted from
#'
#' Manoj, C., Wijekoon, P. & Yapa, R.D., 2013. The McDonald Generalized Beta-Binomial Distribution: A New
#' Binomial Mixture Distribution and Simulation Based Comparison with Its Nested Distributions in Handling
#' Overdispersion. International Journal of Statistics and Probability, 2(2), pp.24-41.
#'
#' Available at: \url{http://www.ccsenet.org/journal/index.php/ijsp/article/view/23491}
#'
"Alcohol_data"
#' Male children data
#'
#' The number of male children among the first 12 children of family size 13 in 6115 families taken
#' from the hospital records in the nineteenth century Saxony (Sokal & Rohlf(1994), Lindsey (1995),
#' p. 59). The thirteenth child is ignored to assuage the effect of families non-randomly stopping
#' when a desired gender is reached.
#'
#' @format A data frame with 2 columns and 13 rows.
#' \describe{
#' \item{\code{No_of_Males}}{No of Male children among first 12 children of family size 13}
#' \item{\code{freq}}{Observed frequencies for corresponding male children}
#' }
#'
#' @examples
#' Male_Children$No_of_Males # extracting the binomial random variables
#' sum(Male_Children$freq) # summing all the frequencies
#'
#' @source
#' Extracted from
#'
#' Borges, P., Rodrigues, J., Balakrishnan, N. and Bazan, J., 2014. A COM-Poisson type
#' generalization of the binomial distribution and its properties and applications.
#' Statistics & Probability Letters, 87, pp.158-166.
#'
#' Available at: \url{http://conteudo.icmc.usp.br/CMS/Arquivos/arquivos_enviados/BIBLIOTECA_113_NSE_90.pdf}
#'
"Male_Children"
#' Plant Disease Incidence data
#'
#' Cochran(1936) provided a data that comprise the number of tomato spotted wilt virus(TSWV) infected
#' tomato plants in the field trials in Australia. The field map was divided into 160 'quadrats'.
#' 9 tomato plants in each quadrat. then the numbers of TSWV infected tomato plants were counted in each
#' quadrat.
#' Number of infected plants out of 9 plants per quadrat can be treated as a binomial variable.
#' the collection of all such responses from all 160 quadrats would form "binomial outcome data"
#' below provided is a data set similar to Cochran plant disease incidence data.
#' Marcus R(1984). orange trees infected with citrus tristeza virus (CTV) in an orchard in central
#' Israel.
#' We divided the field map into 84 "quadrats" of 4 rows x 3 columns and counted the total number
#' (1981 + 1982) of infected trees out of a maximum of n = 12 in each quadrat
#'
#' @format A data frame with 2 columns and 10 rows
#' \describe{
#' \item{\code{Dis.plant}}{Diseased Plants}
#' \item{\code{fre}}{Observed frequencies}
#' }
#'
#' @examples
#' Plant_DiseaseData$Dis.plant # extracting the binomial random variables
#' sum(Plant_DiseaseData$fre) # summing all the frequencies
#'
#' @source
#' Extracted from
#'
#' Hughes, G., 1993. Using the Beta-Binomial Distribution to Describe Aggregated Patterns of Disease Incidence.
#' Phytopathology, 83(9), p.759.
#'
#' Available at: \url{http://www.apsnet.org/publications/phytopathology/backissues/Documents/1993Abstracts/Phyto_83_759.htm}.
#'
"Plant_DiseaseData"
#' Course Data
#'
#' The data refer to the numbers of courses taken by a class of 65 students from the first year of the
#' Department of Statistics of Athens University of Economics. The students enrolled in this class
#' attended 8 courses during the first year of their study. The total numbers of successful
#' examinations (including resits) were recorded.
#'
#' @format A data frame with 2 columns and 9 rows
#' \describe{
#' \item{\code{sub.pass}}{subjects passed}
#' \item{\code{fre}}{Observed frequencies}
#' }
#'
#' @examples
#' Course_data$sub.pass # extracting the binomial random variables
#' sum(Course_data$fre) # summing all the frequencies
#'
#' @source
#' Extracted from
#'
#' Karlis, D. & Xekalaki, E., 2008. The Polygonal Distribution. In Advances in Mathematical and Statistical
#' Modeling. Boston: Birkhuser Boston, pp. 21-33.
#'
#' Available at: \url{http://dx.doi.org/10.1007/978-0-8176-4626-4_2}.
#'
"Course_data"
#' Chromosome Data
#'
#' Data in this example refer to 337 observations on the secondary
#' association of chromosomes in Brassika; n , which is now the number of
#' chromosomes, equals 3 and X is the number of pairs of bivalents showing association.
#'
#' @format A data frame with 2 columns and 4 rows
#' \describe{
#' \item{\code{No.of.Asso}}{No of Associations}
#' \item{\code{fre}}{Observed frequencies}
#' }
#'
#' @examples
#' Chromosome_data$No.of.Asso #extracting the binomial random variables
#' sum(Chromosome_data$fre) #summing all the frequencies
#'
#' @source
#' Extracted from
#'
#' Paul, S.R., 1985. A three-parameter generalization of the binomial distribution. Communications in
#' Statistics - Theory and Methods, 14(6), pp.1497-1506.
#'
#' Available at: \url{http://www.tandfonline.com/doi/abs/10.1080/03610928508828990}.
"Chromosome_data"
#' Exam Data
#'
#' In an examination, there were 9 questions set on a particular
#' topic. Each question is marked out of a total of 20 and in assessing the
#' final class of a candidate, particular attention is paid to the total
#' number of questions for which he has an "alpha", i.e., at least 15 out of 20,
#' as well as his total number of marks. His number of alpha's is a rough
#' indication of the "quality" of his exam performance. Thus, the distribution
#' of alpha's over the candidates is of interest. There were 209 candidates
#' attempting questions from this section of 9 questions and a total of 326 alpha's
#' was awarded. So we treat 9 as the "litter size", and the dichotomous response
#' is whether or not he got an alpha on the question.
#'
#' @format A data frame with 2 columns and 10 rows
#' \describe{
#' \item{\code{No.of.alpha}}{No of Alphas}
#' \item{\code{fre}}{Observed frequencies}
#' }
#'
#' @examples
#' Exam_data$No.of.alpha #extracting the binomial random variables
#' sum(Exam_data$fre) #summing all the frequencies
#'
#' @source
#' Extracted from
#'
#' Paul, S.R., 1985. A three-parameter generalization of the binomial distribution. Communications in
#' Statistics - Theory and Methods, 14(6), pp.1497-1506.
#'
#' Available at: \url{http://www.tandfonline.com/doi/abs/10.1080/03610928508828990}
#'
"Exam_data"
#' Terror Data USA
#'
#' Jenkins and Johnson (1975) compiled a chronology of incidents of international terrorism from 1/1968
#' through 04/1974. During this period 507 incidents are recorded in the world, where 64 incidents occurred
#' in the United States and 65 ones in Argentina.
#'
#' @format A data frame with 2 columns and 9 rows
#' \describe{
#' \item{\code{Incidents}}{No of Incidents Occurred}
#' \item{\code{fre}}{Observed frequencies}
#' }
#'
#' @examples
#' Terror_data_USA$Incidents #extracting the binomial random variables
#' sum(Terror_data_USA$fre) #summing all the frequencies
#'
#' @source
#' Extracted from
#'
#' Li, X. H., Huang, Y. Y., & Zhao, X. Y. (2011). The Kumaraswamy Binomial Distribution. Chinese Journal
#' of Applied Probability and Statistics, 27(5), 511-521.
#'
"Terror_data_USA"
#' Terror Data ARG
#'
#' Jenkins and Johnson (1975) compiled a chronology of incidents of international terrorism from 1/1968
#' through 04/1974. During this period 507 incidents are recorded in the world, where 64 incidents occurred
#' in the United States and 65 ones in Argentina.
#'
#' @format A data frame with 2 columns and 9 rows
#' \describe{
#' \item{\code{Incidents}}{No of Incidents Occurred}
#' \item{\code{fre}}{Observed frequencies}
#' }
#'
#' @examples
#' Terror_data_ARG$Incidents #extracting the binomial random variables
#' sum(Terror_data_ARG$fre) #summing all the frequencies
#'
#' @source
#' Extracted from
#'
#' Li, X. H., Huang, Y. Y., & Zhao, X. Y. (2011). The Kumaraswamy Binomial Distribution. Chinese Journal
#' of Applied Probability and Statistics, 27(5), 511-521.
#'
"Terror_data_ARG"
#' Family Epidemics
#'
#' In this investigation, families of the same size, two parents and three children, living in different
#' circumstances of domestic overcrowding were visited at fortnightly intervals. The date of onset and the clinical
#' nature of upper respiratory infectious experienced by each member of the family were charted on a time scale
#' marked off in days. Family epidemics of acute coryza-or common colds-were thus available for analysis.
#'
#' By inspection of the epidemic time charts, it was possible to identify new or primary introductions of illness
#' into the household by the onset of a cold after a lapse of 10 days since the last such case in the same home.
#' Two such cases occurring on the same or succeeding days were classified as multiple primaries. Thereafter, the
#' links in the epidemic chain of spread were defined by an interval of one day or more between successive cases
#' in the same family. These family epidemics could then be described thus 1-2-1, 1-1-1-0, 2-1-0, etc. It must be
#' emphasized that although this method of classification is somewhat arbitrary, it was completed before the
#' corresponding theoretical distributions were worked out and the interval chosen agrees with the distribution
#' of presumptive incubation periods of the common cold seen in field surveys (e.g. Badger, Dingle, Feller,
#' Hodges, Jordan, and Rammelkamp, 1953).
#'
#' @format A data frame with 6 columns and 5 rows
#' \describe{
#' \item{\code{Cases}}{No of Further Cases}
#' \item{\code{Families}}{No of Families}
#' \item{\code{Father}}{Father with Status of Introducing Cases}
#' \item{\code{Mother}}{Mother with Status of Introducing Cases}
#' \item{\code{SChild}}{School Child with Status of Introducing Cases}
#' \item{\code{PSChild}}{Pre-School Child with Status of Introducing Cases}
#' }
#'
#' @examples
#'
#' Epidemic_Cold$Cases
#' sum(Epidemic_Cold$SChild)
#'
#' @source
#' Extracted from
#'
#' Heasman, M. A. and Reid, D. D. (1961). "Theory and observation in family epidemics of the common cold."
#' Br. J. pleu. SOC. Med., 15, 12-16.
#'
"Epidemic_Cold"
#' @export
.onAttach<-function(libname,pkgname)
{
packageStartupMessage("Hello, This is Amalan. For more details refer --> https://amalan-constat.github.io/R-fitODBOD/index.html")
}
#' Binomial Data Extraction from Raw data
#'
#' The below function has the ability to extract from the raw data to Binomial Outcome Data. This
#' function simplifies the data into more presentable way to the user.
#'
#' @usage
#' BODextract(data)
#'
#' @param data vector of observations
#'
#' @details
#' \strong{NOTE} : If input parameters are not in given domain conditions
#' necessary error messages will be provided to go further
#'
#' @return
#' The output of \code{BODextract} gives a list format consisting
#'
#' \code{RV} binomial random variables in vector form
#'
#' \code{Freq} corresponding frequencies in vector form
#'
#' @examples
#' datapoints <- sample(0:10,340,replace=TRUE) #creating a sample set of observations
#' BODextract(datapoints) #extracting binomial outcome data from observations
#' Random.variable <- BODextract(datapoints)$RV #extracting the binomial random variables
#'
#' @export
BODextract<-function(data)
{
#checking if inputs consist NA(not assigned)values, infinite values or NAN(not a number)values
#if so cleaning the data set and removing all NA,infinite and NAN values
if(any(is.na(data)) | any(is.infinite(data)) |any(is.nan(data)) )
{
data1<-data[!is.nan(data)]
data2<-data1[!is.na(data1)]
data3<-data2[!is.infinite(data2)]
#assigning the cleaned data into value variable
value<-table(data3)
#converting value variable into a matrix
mat.tab<-as.matrix(value)
#assigning the binomial random variables from the row names of the matrix
BOD.Random.variable<-as.integer(row.names(mat.tab))
#removing the dimension names
mat.tab<-unname(mat.tab)
#assigning the frequencies for the corresponding binomial random variables
BOD.Frequency<-mat.tab[,1]
# generating an output in list format consisting Random variable and corresponding frequency
output<-list("RV"=BOD.Random.variable,"Freq"=BOD.Frequency)
return(output)
}
else
{
#assigning the data into value variable
value<-table(data)
#converting value variable into a matrix
mat.tab<-as.matrix(value)
#assigning the binomial random variables from the row names of the matrix
BOD.Random.variable<-as.integer(row.names(mat.tab))
#removing the dimension names
mat.tab<-unname(mat.tab)
#assigning the frequencies for the corresponding binomial random variables
BOD.Frequency<-mat.tab[,1]
#generating an output in list format consisting Random variable and corresponding frequency
output<-list("RV"=BOD.Random.variable,"Freq"=BOD.Frequency)
return(output)
}
}
#' Fitting the Binomial Distribution when binomial random variable, frequency and probability
#' value are given
#'
#' The function will fit the Binomial distribution when random variables, corresponding
#' frequencies and probability value are given. It will provide the expected frequencies, chi-squared
#' test statistics value, p value and degree of freedom so that it can be
#' seen if this distribution fits the data.
#'
#' @usage fitBin(x,obs.freq,p=0)
#'
#' @param x vector of binomial random variables.
#' @param obs.freq vector of frequencies.
#' @param p single value for probability.
#'
#' @details
#' \deqn{x = 0,1,2,...}
#' \deqn{0 \le p \le 1}
#' \deqn{obs.freq \ge 0}
#'
#' \strong{NOTE} : If input parameters are not in given domain conditions
#' necessary error messages will be provided to go further.
#'
#' @return
#' The output of \code{fitBin} gives the class format \code{fitB} and \code{fit} consisting a list
#'
#' \code{bin.ran.var} binomial random variables.
#'
#' \code{obs.freq} corresponding observed frequencies.
#'
#' \code{exp.freq} corresponding expected frequencies.
#'
#' \code{statistic} chi-squared test statistics value.
#'
#' \code{df} degree of freedom.
#'
#' \code{p.value} probability value by chi-squared test statistic.
#'
#' \code{fitB} fitted probability values of \code{dbinom}.
#'
#' \code{phat} estimated probability value.
#'
#' \code{call} the inputs of the function.
#'
#' @examples
#' No.D.D <- 0:7 #assigning the random variables
#' Obs.fre.1 <- c(47,54,43,40,40,41,39,95) #assigning the corresponding frequencies
#'
#'
#' #fitting when the random variable,frequencies are given.
#' fitBin(No.D.D,Obs.fre.1)
#'
#' @export
fitBin<-function(x,obs.freq,p=0)
{
#checking if inputs consist NA(not assigned)values, infinite values or NAN(not a number)values if so
#creating an error message as well as stopping the function progress.
if(any(is.na(c(x,obs.freq,p))) | any(is.infinite(c(x,obs.freq,p))) |
any(is.nan(c(x,obs.freq,p))) )
{
stop("NA or Infinite or NAN values in the Input")
}
else
{
#checking if the probability value is less than or equal to zero and greater than one,
#if so creating an error message as well as stopping the function progress.
if(p<=0 && p>1)
{
stop("Probability value cannot be less than zero or greater than one")
}
else
{
if(p==0)
{
i<-1:length(x)
#estimating the probability value when it is not given
p.hat<-(sum(x[i]*obs.freq[i]))/(max(x)*sum(obs.freq))
#estimating the probability values for the given binomial random variables
est.prob<-stats::dbinom(x,max(x),p.hat)
#calculating the expected frequencies
exp.freq<-round((sum(obs.freq)*est.prob),2)
#chi-squared test statistics is calculated with observed frequency and expected frequency
statistic<-sum(((obs.freq-exp.freq)^2)/exp.freq)
#degree of freedom is calculated
df<-length(x)-2
#p value of chi-squared test statistic is calculated
p.value<-1-stats::pchisq(statistic,df)
#checking if df is less than or equal to zero
if(df<0 | df==0)
{
stop("Degrees of freedom cannot be less than or equal to zero")
}
#checking if any of the expected frequencies are less than five and greater than zero, if so
#a warning message is provided in interpreting the results
if(min(exp.freq)<5 && min(exp.freq) > 0)
{
message("Chi-squared approximation may be doubtful because expected frequency is less than 5")
}
#checking if expected frequency is zero, if so providing a warning message in interpreting
#the results
if(min(exp.freq)==0)
{
message("Chi-squared approximation is not suitable because expected frequency approximates to zero")
}
#the final output is in a list format containing the calculated values
final<-list("bin.ran.var"=x,"obs.freq"=obs.freq,"exp.freq"=exp.freq,"statistic"=round(statistic,4),
"df"=df,"p.value"=round(p.value,4),"fitB"=est.prob,"phat"=p.hat,"call"=match.call())
}
else
{
#estimating the probability values for the given binomial random variables
est.prob<-stats::dbinom(x,max(x),p)
#calculating the expected frequencies
exp.freq<-round((sum(obs.freq)*est.prob),2)
#applying the chi squared test
ans<-stats::chisq.test(x=obs.freq,p=est.prob)
#checking if any of the expected frequencies are less than five and greater than zero, if so
#a warning message is provided in interpreting the results
if(min(exp.freq)<5 && min(exp.freq) > 0)
{
message("Chi-squared approximation may be doubtful because expected frequency is less than 5")
}
#checking if expected frequency is zero, if so providing a warning message in interpreting
#the results
if(min(exp.freq)==0)
{
message("Chi-squared approximation is not suitable because expected frequency approximates to zero")
}
#the final output is in a list format containing the calculated values
final<-list("bin.ran.var"=x,"obs.freq"=obs.freq,"exp.freq"=exp.freq,
"statistic"=round(ans$statistic,4),"df"=ans$parameter,
"p.value"=round(ans$p.value,4),"fitB"=est.prob,"phat"=p,
"call"=match.call())
}
class(final)<-c("fitB","fit")
return(final)
}
}
}
#' @method fitBin default
#' @export
fitBin.default<-function(x,obs.freq,p=0)
{
est<-fitBin(x,obs.freq,p=0)
return(est)
}
#' @method print fitB
#' @export
print.fitB<-function(x,...)
{
cat("Call: \n")
print(x$call)
cat("\nChi-squared test for Binomial Distribution \n\t
Observed Frequency : ",x$obs.freq,"\n\t
expected Frequency : ",x$exp.freq,"\n\t
estimated probability value :",x$phat,"\n\t
X-squared :",x$statistic," ,df :",x$df," ,p-value :",x$p.value,"\n")
}
#' @method summary fitB
#' @export
summary.fitB<-function(object,...)
{
cat("Call: \n")
print(object$call)
cat("\nChi-squared test for Binomial Distribution \n\t
Observed Frequency : ",object$obs.freq,"\n\t
expected Frequency : ",object$exp.freq,"\n\t
estimated probability value :",object$phat," \n\t
X-squared :",object$statistic," ,df :",object$df," ,p-value :",object$p.value,"\n")
}
#' @importFrom stats dbinom
#' @importFrom stats chisq.test
#' @importFrom stats pchisq
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.