Nothing
#' TV and Behavior
#'
#' Data regarding aggressive behavior in relation to exposure to violent
#' television programs used in Example 10.5
#'
#' This is data regarding aggressive behavior in relation to exposure to
#' violent television programs from Gibbons (1997) with the following
#' exposition:
#'
#' \ldots{} a group of children are matched as well as possible as regards home
#' environment, genetic factors, intelligence, parental attitudes, and so
#' forth, in an effort to minimize factors other than TV that might influence a
#' tendency for aggressive behavior. In each of the resulting 16 pairs, one
#' child is randomly selected to view the most violent shows on TV, while the
#' other watches cartoons, situation comedies, and the like. The children are
#' then subjected to a series of tests designed to produce an ordinal measure
#' of their aggression factors. (pages 143-144)
#'
#' @name Aggression
#' @docType data
#' @format A data frame with 16 observations on the following 2 variables:
#' \itemize{
#' \item \code{violence} (an integer vector)
#' \item \code{noviolence} (an integer vector)
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Gibbons, J. D. (1997) \emph{Nonparametric Methods for Quantitative
#' Analysis}. American Sciences Press.
#' @keywords datasets
#' @examples
#'
#' with(data = Aggression,
#' wilcox.test(violence, noviolence, paired = TRUE,
#' alternative = "greater"))
#'
NULL
#' Apple Hardness
#'
#' An experiment was undertaken where seventeen recently picked (\code{Fresh})
#' apples were randomly selected and measured for hardness. Seventeen apples
#' were also randomly selected from a warehouse (\code{Warehouse}) where the
#' apples had been stored for one week. Data are used in Example 8.10.
#'
#'
#' @name Apple
#' @docType data
#' @format A data frame with 17 observations on the following 2 variables:
#' #' \itemize{
#' \item \code{Fresh} (hardness rating measured in \eqn{\texttt{kg}/\texttt{meter}^2})
#' \item \code{Warehouse} (hardness rating measured in \eqn{\texttt{kg}/\texttt{meter}^2})
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' # Figure 8.5
#' attach(Apple)
#' par(pty = "s")
#' Altblue <- "#A9E2FF"
#' Adkblue <- "#0080FF"
#' fresh <- qqnorm(Fresh)
#' old <- qqnorm(Warehouse)
#' plot(fresh, type = "n",ylab = "Sample Quantiles", xlab = "Theoretical Quantiles")
#' qqline(Fresh, col = Altblue)
#' qqline(Warehouse, col = Adkblue)
#' points(fresh, col = Altblue, pch = 16, cex = 1.2)
#' points(old, col = Adkblue, pch = 17)
#' legend(-1.75, 9.45, c("Fresh", "Warehouse"), col = c(Altblue, Adkblue),
#' text.col = c("black","black"), pch = c(16, 17), lty = c(1, 1), bg = "gray95", cex = 0.75)
#' title("Q-Q Normal Plots")
#' detach(Apple)
#' # Trellis approach
#' qqmath(~c(Fresh, Warehouse), type = c("p","r"), pch = c(16, 17),
#' cex = 1.2, col=c("#A9E2FF", "#0080FF"),
#' groups=rep(c("Fresh", "Warehouse"), c(length(Fresh), length(Warehouse))),
#' data = Apple, ylab = "Sample Quantiles", xlab = "Theoretical Quantiles")
#'
NULL
#' Apartment Size
#'
#' Size of apartments in Mendebaldea, Spain and San Jorge, Spain
#'
#'
#' @name AptSize
#' @docType data
#' @format A data frame with 8 observations on the following 2 variables:
#' \describe{ \item{Mendebaldea}{Mendebaldea apartment size in square
#' meters} \item{SanJorge}{San Jorge apartment size in square meters} }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = AptSize,
#' boxplot(Mendebaldea, SanJorge) )
#'
NULL
#' George Herman Ruth
#'
#' Baseball statistics for George Herman Ruth (The Bambino or The Sultan Of
#' Swat)
#'
#'
#' @name Baberuth
#' @docType data
#' @format A data frame with 22 observations on the following 14 variables.
#' \describe{
#' \item{Year}{year in which the season occurred}
#' \item{Team}{team he played for \code{Bos-A}, \code{Bos-N}, or
#' \code{NY-A}} \item{G}{games played} \item{AB}{at bats}
#' \item{R}{runs scored} \item{H}{hits}
#' \item{X2B}{doubles} \item{X3B}{triples}
#' \item{HR}{home runs} \item{RBI}{runs batted in}
#' \item{SB}{stolen bases} \item{BB}{base on balls or walks}
#' \item{BA}{batting average H/AB} \item{SLG}{slugging percentage (total bases/at bats)}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source \url{https://www.baseball-reference.com/about/bat_glossary.shtml}
#' @keywords datasets
#' @examples
#'
#' with(data = Baberuth,
#' hist(RBI))
#'
NULL
#' Blood Alcohol Content
#'
#' Two volunteers each consumed a twelve ounce beer every fifteen minutes for
#' one hour. One hour after the fourth beer was consumed, each volunteer's
#' blood alcohol was measured with a different breathalyzer from the same
#' company. The numbers recorded in data frame \code{Bac} are the sorted blood
#' alcohol content values reported with breathalyzers from company \code{X} and
#' company \code{Y}. Data are used in Example 9.15.
#'
#'
#' @name Bac
#' @docType data
#' @format A data frame with 10 observations on the following 2 variables:
#' \describe{
#' \item{X}{blood alcohol content measured in g/L}
#' \item{Y}{blood alcohol content measured in g/L}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Bac,
#' var.test(X, Y, alternative = "less"))
#'
NULL
#' Lithium Batteries
#'
#' A manufacturer of lithium batteries has two production facilities, A and B.
#' Fifty randomly selected batteries with an advertised life of 180 hours are
#' selected, and tested. The lifetimes are stored in (\code{facilityA}). Fifty
#' ramdomly selected batteries with an advertised life of 200 hours are
#' selected, and tested. The lifetimes are stored in (\code{facilityB}).
#'
#'
#' @name Battery
#' @docType data
#' @format A data frame with 50 observations on the following 2 variables:
#' \describe{
#' \item{facilityA}{life time measured in hours}
#' \item{facilityB}{life time measured in hours}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Battery,
#' qqnorm(facilityA))
#' with(data = Battery,
#' qqline(facilityA))
#'
NULL
#' Beech Trees
#'
#' Several measurements of 42 beech trees (\emph{Fagus Sylvatica}) taken from a
#' forest in Navarra (Spain).
#'
#'
#' @name biomass
#' @docType data
#' @format A data frame with 42 observations on the following 4 variables:
#' \describe{ \item{Dn}{diameter of the stem in centimeters}
#' \item{H}{height of the tree in meters}
#' \item{PST}{weight of the stem in kilograms}
#' \item{PSA}{aboveground weight in kilograms}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source \emph{Gobierno de Navarra} and \emph{Gestion Ambiental Viveros y
#' Repoblaciones de Navarra}, 2006. The data were obtained within the European
#' Project FORSEE.
#' @keywords datasets
#' @examples
#'
#' plot(log(PSA) ~ log(Dn), data = biomass)
#'
NULL
#' Body Fat Composition
#'
#' Values from a study reported in the \emph{American Journal of Clinical
#' Nutrition} that investigated a new method for measuring body composition
#'
#'
#' @name Bodyfat
#' @docType data
#' @format A data frame with 18 observations on the following 3 variables:
#' \describe{
#' \item{age}{age in years}
#' \item{fat}{body fat composition}
#' \item{sex}{a factor with levels \code{F} for female and \code{M} for male}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Mazess, R. B., Peppler, W. W., and Gibbons, M. (1984) \emph{Total
#' Body Composition by Dual-Photon (153 Gd) Absorptiometry}. American Journal
#' of Clinical Nutrition, \bold{40}, \bold{4}: 834-839.
#' @keywords datasets
#' @examples
#'
#' boxplot(fat ~ sex, data = Bodyfat)
#'
NULL
#' Calculus Assessment Scores
#'
#' Mathematical assessment scores for 36 students enrolled in a biostatistics
#' course according to whether or not the students had successfully completed a
#' calculus course prior to enrolling in the biostatistics course
#'
#'
#' @name Calculus
#' @docType data
#' @format A data frame with 18 observations on the following 2 variables:
#' \describe{
#' \item{No.Calculus}{assessment score for students with no prior calculus}
#' \item{Yes.Calculus}{assessment score for students with prior calculus}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Calculus,
#' z.test(x = Yes.Calculus, y = No.Calculus, sigma.x = 5, sigma.y = 12)$conf
#' )
#'
NULL
#' Cars in the European Union (2004)
#'
#' The numbers of cars per 1000 inhabitants (\code{cars}), the total number of
#' known mortal accidents (\code{deaths}), and the country population/1000
#' (\code{population}) for the 25 member countries of the European Union for
#' the year 2004
#'
#'
#' @name Cars2004EU
#' @docType data
#' @format A data frame with 25 observations on the following 4 variables:
#' \describe{
#' \item{country}{a factor with levels \code{Austria},
#' \code{Belgium}, \code{Cyprus}, \code{Czech Republic}, \code{Denmark},
#' \code{Estonia}, \code{Finland}, \code{France}, \code{Germany},
#' \code{Greece}, \code{Hungary}, \code{Ireland}, \code{Italy}, \code{Latvia},
#' \code{Lithuania}, \code{Luxembourg}, \code{Malta}, \code{Netherlands},
#' \code{Poland}, \code{Portugal}, \code{Slovakia}, \code{Slovenia},
#' \code{Spain}, \code{Sweden}, and \code{United Kingdom}}
#' \item{cars}{numbers of cars per 1000 inhabitants}
#' \item{deaths}{total number of known mortal accidents}
#' \item{population}{country population/1000}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' plot(deaths ~ cars, data = Cars2004EU)
#'
NULL
#' Silicon Chips
#'
#' Two techniques of splitting chips are randomly assigned to 28 sheets so that
#' each technique is applied to 14 sheets. The values recorded in \code{Chips}
#' are the number of usable chips from each silicon sheet.
#'
#'
#' @name Chips
#' @docType data
#' @format A data frame with 14 observations on the following 2 variables:
#' \describe{
#' \item{techniqueI}{number of usable chips}
#' \item{techniqueII}{number of usable chips}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' par(mfrow = c(1, 2))
#' with(data = Chips, qqnorm(techniqueI))
#' with(data = Chips, qqline(techniqueI))
#' with(data = Chips, qqnorm(techniqueII))
#' with(data = Chips, qqline(techniqueII))
#' par(mfrow=c(1, 1))
#' # Trellis Approach
#' graph1 <- qqmath(~techniqueI, data = Chips, type=c("p", "r"))
#' graph2 <- qqmath(~techniqueII, data = Chips, type=c("p", "r"))
#' print(graph1, split=c(1, 1, 2, 1), more = TRUE)
#' print(graph2, split=c(2, 1, 2, 1), more = FALSE)
#' rm(graph1, graph2)
#'
NULL
#' Circuit Design Lifetime
#'
#' \code{CircuitDesigns} contains the results from an accelerated life test
#' used to estimate the lifetime of four different circuit designs (lifetimes
#' in thousands of hours).
#'
#'
#' @name CircuitDesigns
#' @docType data
#' @format A data frame with 26 observations on the following 2 variables:
#' \describe{
#' \item{lifetime}{lifetimes in thousands of hours}
#' \item{design}{a factor with levels \code{Design1}, \code{Design2},
#' \code{Design3}, and \code{Design4}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' bwplot(design ~ lifetime, data = CircuitDesigns)
#'
NULL
#' Cosmed Versus Amatek
#'
#' The Cosmed is a portable metabolic system. A study at Appalachian State
#' University compared the metabolic values obtained from the Cosmed to those
#' of a reference unit (Amatek) over a range of workloads from easy to maximal
#' to test the validity and reliability of the Cosmed. A small portion of the
#' results for VO2 (ml/kg/min) measurements taken at a 150 watt workload are
#' stored in \code{CosAma}.
#'
#'
#' @name CosAma
#' @docType data
#' @format A data frame with 14 observations on the following 3 variables:
#' \describe{
#' \item{subject}{subject number}
#' \item{Cosmed}{measured VO2 with Cosmed}
#' \item{Amatek}{measured VO2 with Amatek} }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' bwplot(~(Cosmed - Amatek), data = CosAma)
#'
NULL
#' Butterfat of Cows
#'
#' Random samples of ten mature (five-year-old and older) and ten two-year-old
#' cows were taken from each of five breeds. The average butterfat percentage
#' of these 100 cows is stored in the variable \code{butterfat} with the type
#' of cow stored in the variable \code{breed} and the age of the cow stored in
#' the variable \code{age}.
#'
#'
#' @name Cows
#' @docType data
#' @format A data frame with 100 observations on the following 3 variables:
#' \describe{
#' \item{butterfat}{average butterfat percentage}
#' \item{age}{a factor with levels \code{2 years old} and
#' \code{Mature}}
#' \item{breed}{a factor with levels \code{Ayrshire},
#' \code{Canadian}, \code{Guernsey}, \code{Holstein-Friesian}, and
#' \code{Jersey}}
#' }
#' @references Sokal, R. R. and Rohlf, F. J. (1994) \emph{Biometry}. W. H.
#' Freeman, New York, third edition.
#' @source Canadian record book of purebreed diary cattle.
#' @keywords datasets
#' @examples
#'
#' summary(aov(butterfat ~ breed + age, data = Cows))
#'
NULL
#' Number of Dependent Children for 50 Families
#'
#' Number of dependent children for 50 families.
#'
#'
#' @name Depend
#' @docType data
#' @format A data frame with 50 observations on the following 4 variables.
#' \describe{
#' \item{C1}{a numeric vector}
#' \item{number}{a numeric vector}
#' \item{Count}{a numeric vector}
#' \item{Percent}{a numeric vector} }
#' @source Kitchens, L. J. (2003) \emph{Basic Statistics and Data Analysis}.
#' Duxbury
#' @keywords datasets
#' @examples
#'
#' with(data = Depend, table(C1))
#'
NULL
#' Drosophila Melanogaster
#'
#' \code{Drosophila} contains per diem fecundity (number of eggs laid per
#' female per day for the first 14 days of life) for 25 females from each of
#' three lines of \emph{Drosophila melanogaster}. The three lines are
#' Nonselected (control), Resistant, and Susceptible. Data are used in Example
#' 11.5.
#'
#'
#' @name Drosophila
#' @docType data
#' @format A data frame with 75 observations on the following 2 variables:
#' \describe{
#' \item{Fecundity}{number of eggs laid per female per day for the first 14 days of life}
#' \item{Line}{a factor with levels \code{Nonselected}, \code{Resistant}, and \code{Susceptible}}
#' }
#' @references Sokal, R. R. and Rohlf, F. J. (1994) \emph{Biometry}. W. H.
#' Freeman, New York, third edition.
#' @source The original measurements are from an experiment conducted by R. R.
#' Sokal (Sokal and Rohlf, 1994, p. 237).
#' @keywords datasets
#' @examples
#'
#' summary(aov(Fecundity ~ Line, data = Drosophila))
#'
NULL
#' Engineer Salaries
#'
#' Salaries for engineering graduates 10 years after graduation
#'
#'
#' @name Engineer
#' @docType data
#' @format A data frame with 51 observations on the following 2 variables:
#' \describe{
#' \item{salary}{salary 10 years after graduation in thousands of dollars}
#' \item{university}{one of three different engineering universities}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' boxplot(salary ~ university, data = Engineer, horizontal = TRUE)
#' # Trellis Approach
#' bwplot(university ~ salary, data = Engineer)
#'
NULL
#' Traditional Sitting Position Versus Hamstring Stretch Position
#'
#' Intermediate results from a study to determine whether the traditional
#' sitting position or the hamstring stretch position is superior for
#' administering epidural anesthesia to pregnant women in labor as measured by
#' the number of obstructive (needle to bone) contacts (OC)
#'
#'
#' @name EPIDURALf
#' @docType data
#' @format A data frame with 342 observations on the following 7 variables:
#' \describe{
#' \item{Doctor}{a factor with levels \code{Dr. A}, \code{Dr. B}, \code{Dr. C}, and \code{Dr. D}}
#' \item{kg}{weight in kg of patient}
#' \item{cm}{height in cm of pateint}
#' \item{Ease}{a factor with levels \code{Difficult}, \code{Easy}, and \code{Impossible}
#' indicating the physician's assessment of how well bone landmarks can be felt
#' in the patient}
#' \item{Treatment}{a factor with levels \code{Hamstring Stretch} and \code{Traditional Sitting}}
#' \item{OC}{number of obstructive contacts}
#' \item{Complications}{a factor with levels \code{Failure - person got dizzy},
#' \code{Failure - too many OCs}, \code{None}, \code{Paresthesia}, and \code{Wet Tap}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' boxplot(OC ~ Treatment, data = EPIDURALf)
#'
NULL
#' Traditional Sitting Position Versus Hamstring Stretch Position
#'
#' Initial results from a study to determine whether the traditional sitting
#' position or the hamstring stretch position is superior for administering
#' epidural anesthesia to pregnant women in labor as measured by the number of
#' obstructive (needle to bone) contacts (OC)
#'
#'
#' @name EPIDURAL
#' @docType data
#' @format A data frame with 85 observations on the following 7 variables:
#' \describe{
#' \item{Doctor}{a factor with levels \code{Dr. A},
#' \code{Dr. B}, \code{Dr. C}, and \code{Dr. D}}
#' \item{kg}{weight in kg of patient}
#' \item{cm}{height in cm of pateint}
#' \item{Ease}{a factor with levels \code{Difficult}, \code{Easy}, and \code{Impossible}
#' indicating the physician's assessment of how well bone landmarks can be felt
#' in the patient}
#' \item{Treatment}{a factor with levels \code{Hamstring Stretch} and \code{Traditional Sitting}}
#' \item{OC}{number of obstructive contacts}
#' \item{Complications}{a factor with levels \code{Failure - person got
#' dizzy}, \code{Failure - too many OCs}, \code{None}, \code{Paresthesia}, and
#' \code{Wet Tap}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' EPIDURAL$Teasy <- factor(EPIDURAL$Ease,
#' levels = c("Easy", "Difficult", "Impossible"))
#' X <- table(EPIDURAL$Doctor, EPIDURAL$Teasy)
#' X
#' par(mfrow = c(2, 2)) # Figure 2.12
#' barplot(X,
#' main = "Barplot where Doctor is Stacked \n within Levels of Palpitation")
#' barplot(t(X),
#' main = "Barplot where Levels of Palpitation \n is Stacked within Doctor")
#' barplot(X, beside = TRUE,
#' main = "Barplot where Doctor is Grouped \n within Levels of Palpitation")
#' barplot(t(X), beside = TRUE,
#' main = "Barplot where Levels of Palpitation \n is Grouped within Doctor")
#' par(mfrow = c(1, 1))
#' rm(X)
#'
NULL
#' European Union Research and Development
#'
#' A random sample of 15 countries' research and development investments for
#' the years 2002 and 2003 is taken and the results in millions of euros are
#' stored in \code{EURD}.
#'
#'
#' @name EURD
#' @docType data
#' @format A data frame with 15 observations on the following 3 variables:
#' \describe{
#' \item{Country}{a factor with levels \code{Bulgaria},
#' \code{Croatia}, \code{Cyprus}, \code{Czech Republic}, \code{Estonia},
#' \code{France}, \code{Hungary}, \code{Latvia}, \code{Lithuania},
#' \code{Malta}, \code{Portugal}, \code{Romania}, \code{Slovakia}, and
#' \code{Slovenia}}
#' \item{RD2002}{research and development investments
#' in millions of euros for 2002}
#' \item{RD2003}{research and development investments in millions of euros for 2003}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' qqmath(~(RD2003 - RD2002), data = EURD, type=c("p", "r"))
#'
NULL
#' Retained Carbon in Beech Trees
#'
#' The carbon retained by leaves measured in kg/ha is recorded for forty-one
#' different plots of mountainous regions of Navarra (Spain), depending on the
#' forest classification: areas with 90\% or more beech trees (\emph{Fagus
#' Sylvatica}) are labeled monospecific, while areas with many species of trees
#' are labeled multispecific.
#'
#'
#' @name fagus
#' @docType data
#' @format A data frame with 41 observations on the following 3 variables:
#' \describe{
#' \item{Plot}{plot number}
#' \item{carbon}{carbon retained by leaves measured in kg/ha}
#' \item{type}{a factor with levels \code{monospecific} and \code{multispecific}}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source \emph{Gobierno de Navarra} and \emph{Gestion Ambiental Viveros y
#' Repoblaciones de Navarra}, 2006. The data were obtained within the European
#' Project FORSEE.
#' @keywords datasets
#' @examples
#'
#' boxplot(carbon ~ type, data=fagus)
#'
NULL
#' Fat Cats
#'
#' In a weight loss study on obese cats, overweight cats were randomly assigned
#' to one of three groups and boarded in a kennel. In each of the three
#' groups, the cats' total caloric intake was strictly controlled (1 cup of
#' generic cat food) and monitored for 10 days. The difference between the
#' groups was that group A was given 1/4 of a cup of cat food every six hours,
#' group B was given 1/3 a cup of cat food every eight hours, and group C was
#' given 1/2 a cup of cat food every twelve hours. The weight of the cats at
#' the beginning and end of the study was recorded and the difference in
#' weights (grams) is stored in the variable \code{Weight} of the data frame
#' \code{FCD}. Data are used in Example 11.4.
#'
#'
#' @name FCD
#' @docType data
#' @format A data frame with 36 observations on the following 2 variables:
#' \describe{
#' \item{Weight}{difference in weights (grams)}
#' \item{Diet}{a factor with levels \code{A}, \code{B}, and \code{C}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' # Figure 11.12
#' FCD.aov <- aov(Weight ~ Diet, data = FCD)
#' checking.plots(FCD.aov)
#' rm(FCD.aov)
#'
NULL
#' Cross and Auto Fertilization
#'
#' Plants' heights in inches obtained from two seeds, one obtained by cross
#' fertilization and the other by auto fertilization, in two opposite but
#' separate locations of a pot are recorded.
#'
#'
#' @name Fertilize
#' @docType data
#' @format A data frame with 15 observations on the following 2 variables:
#' \describe{
#' \item{cross}{height of plant in inches}
#' \item{self}{height of plant in inches}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Darwin, C. (1876) \emph{The Effect of Cross and Self-Fertilization
#' in the Vegetable Kingdom}
#' @keywords datasets
#' @examples
#'
#' with(data = Fertilize,
#' t.test(cross, self))
#'
NULL
#' Carrot Shear
#'
#' Shear measured in kN on frozen carrots from four randomly selected freezers
#'
#'
#' @name food
#' @docType data
#' @format A data frame with 16 observations on the following 2 variables:
#' \describe{
#' \item{shear}{carrot shear measured in kN}
#' \item{freezer}{a factor with levels \code{A}, \code{B}, \code{C},
#' and \code{D}} }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' summary(aov(shear ~ freezer, data = food))
#'
NULL
#' Pit Stop Times
#'
#' Pit stop times for two teams at 10 randomly selected Formula 1 races
#'
#'
#' @name Formula1
#' @docType data
#' @format A data frame with 10 observations on the following 3 variables:
#' \describe{
#' \item{Race}{number corresponding to a race site}
#' \item{Team1}{pit stop times for team one}
#' \item{Team2}{pit stop times for team two}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Formula1,
#' boxplot(Team1, Team2))
#'
NULL
#' Times Until Failure
#'
#' Contains time until failure in hours for a particular electronic component
#' subjected to an accelerated stress test.
#'
#'
#' @name GD
#' @docType data
#' @format A data frame with 100 observations on the following variable:
#' \describe{
#' \item{attf}{times until failure in hours}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = GD,
#' hist(attf, prob = TRUE))
#' with(data = GD,
#' lines(density(attf)))
#' # Trellis Approach
#' histogram(~attf, data = GD, type = "density",
#' panel = function(x, ...) {
#' panel.histogram(x, ...)
#' panel.densityplot(x, col = "blue", plot.points = TRUE, lwd = 2)
#' } )
#'
NULL
#' Blood Glucose Levels
#'
#' Fifteen diabetic patients were randomly selected, and their blood glucose
#' levels were measured in mg/100 ml with two different devices.
#'
#'
#' @name glucose
#' @docType data
#' @format A data frame with 15 observations on the following 3 variables:
#' \describe{
#' \item{Patient}{patient number}
#' \item{Old}{blood glucose level in mg/100 ml using old device}
#' \item{New}{blood glucose level in mg/100 ml using new device}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = glucose,
#' boxplot(Old, New))
#'
NULL
#' GPA and SAT Scores
#'
#' The admissions committee of a comprehensive state university selected at
#' random the records of 200 second semester freshmen. The results, first
#' semester college GPA and SAT scores, are stored in the data frame
#' \code{Grades}. Data are used in Example 12.6.
#'
#'
#' @name Grades
#' @docType data
#' @format A data frame with 200 observations on the following 2 variables:
#' \describe{
#' \item{sat}{SAT score}
#' \item{gpa}{grade point average} }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' # traditional scatterplot
#' plot(gpa ~ sat, data = Grades)
#' # trellis scatterplot
#' xyplot(gpa ~ sat, data = Grades, type=c("p", "smooth"))
#'
NULL
#' Grocery Spending
#'
#' The consumer expenditure survey, created by the U.S. Department of Labor,
#' was administered to 30 households in Watauga County, North Carolina, to see
#' how the cost of living in Watauga county with respect to total dollars spent
#' on groceries compares with other counties. The amount of money each
#' household spent per week on groceries is stored in the variable
#' \code{groceries}. Data are used in Example 8.3.
#'
#'
#' @name Grocery
#' @docType data
#' @format A data frame with 30 observations on the following variable:
#' \describe{
#' \item{groceries}{total dollars spent on groceries}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Grocery,
#' z.test(x = groceries, sigma.x = 30, conf.level = 0.97)$conf)
#'
NULL
#' Mortality and Water Hardness
#'
#' Mortality and drinking water hardness for 61 cities in England and Wales.
#'
#' These data were collected in an investigation of environmental causes of
#' disease. They show the annual mortality rate per 100,000 for males,
#' averaged over the years 1958-1964, and the calcium concentration (in parts
#' per million) in the drinking water supply for 61 large towns in England and
#' Wales. (The higher the calcium concentration, the harder the water.)
#'
#' @name HardWater
#' @docType data
#' @format A data frame with 61 observations on the following 4 variables.
#' \describe{
#' \item{location}{a factor with levels \code{North}
#' \code{South} indicating whether the town is as north as Derby}
#' \item{town}{the name of the town}
#' \item{mortality}{averaged annual mortality per 100,000 males}
#' \item{hardness}{calcium concentration (in parts per million)}
#' }
#' @source D. J. Hand, F. Daly, A. D. Lunn, K. J. McConway and E. Ostrowski
#' (1994) \emph{A Handbook of Small Datasets}. Chapman and Hall/CRC, London.
#' @keywords datasets
#' @examples
#'
#' plot(mortality ~ hardness, data = HardWater)
#'
NULL
#' House Prices
#'
#' Random sample of house prices (in thousands of dollars) for three
#' bedroom/two bath houses in Watauga County, NC
#'
#'
#' @name House
#' @docType data
#' @format A data frame with 14 observations on the following 2 variables:
#' \describe{
#' \item{Neighborhood}{a factor with levels \code{Blowing Rock}, \code{Cove Creek},
#' \code{Green Valley}, \code{Park Valley}, \code{Parkway}, and \code{Valley Crucis}}
#' \item{Price}{price of house (in thousands of dollars)}
#' }
#' @keywords datasets
#' @examples
#'
#' with(data = House,
#' t.test(Price))
#'
NULL
#' High School Wrestlers
#'
#' The body fat of 78 high school wrestlers was measured using three separate
#' techniques, and the results are stored in the data frame \code{HSwrestler}.
#' The techniques used were hydrostatic weighing (\code{HWFAT}), skin fold
#' measurements (\code{SKFAT}), and the Tanita body fat scale (\code{TANFAT}).
#' Data are used in Examples 10.11, 12.11, and 12.12.
#'
#'
#' @name HSwrestler
#' @docType data
#' @format A data frame with 78 observations on the following 9 variables:
#' \describe{
#' \item{AGE}{age of wrestler in years}
#' \item{HT}{height of wrestler in inches}
#' \item{WT}{weight ofwrestler in pounds}
#' \item{ABS}{abdominal fat}
#' \item{TRICEPS}{tricep fat}
#' \item{SUBSCAP}{subscapular fat}
#' \item{HWFAT}{hydrostatic fat}
#' \item{TANFAT}{Tanita fat}
#' \item{SKFAT}{skin fat} }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Data provided by Dr. Alan Utter, Department of Health Leisure and
#' Exercise Science, Appalachian State University.
#' @keywords datasets
#' @examples
#'
#' FAT <- c(HSwrestler$HWFAT, HSwrestler$TANFAT, HSwrestler$SKFAT)
#' GROUP <- factor(rep(c("HWFAT", "TANFAT", "SKFAT"), rep(78, 3)))
#' BLOCK <- factor(rep(1:78, 3))
#' friedman.test(FAT ~ GROUP | BLOCK)
#'
NULL
#' Hubble Telescope
#'
#' The Hubble Space Telescope was put into orbit on April 25, 1990.
#' Unfortunately, on June 25, 1990, a spherical aberration was discovered in
#' Hubble's primary mirror. To correct this, astronauts had to work in space.
#' To prepare for the mission, two teams of astronauts practiced making repairs
#' under simulated space conditions. Each team of astronauts went through 15
#' identical scenarios. The times to complete each scenario were recorded in
#' days.
#'
#'
#' @name Hubble
#' @docType data
#' @format A data frame with 15 observations on the following 2 variables:
#' \describe{
#' \item{Team1}{days to complete scenario}
#' \item{Team2}{days to complete scenario}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Hubble,
#' qqnorm(Team1 - Team2))
#' with(data = Hubble,
#' qqline(Team1 - Team2))
#' # Trellis Approach
#' qqmath(~(Team1 - Team2), data = Hubble, type=c("p", "r"))
#'
NULL
#' Insurance Quotes
#'
#' Insurance quotes for two insurers of hazardous waste jobs
#'
#'
#' @name InsurQuotes
#' @docType data
#' @format A data frame with 15 observations on the following 2 variables:
#' \describe{
#' \item{companyA}{quotes from company A in euros}
#' \item{companyB}{quotes from company B in euros}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = InsurQuotes,
#' t.test(companyA, companyB))
#'
NULL
#' Australian Eucalypt Hardwoods
#'
#' The dataset consists of density and hardness measurements from 36 Australian
#' Eucalypt hardwoods.
#'
#' Janka Hardness is an importance rating of Australian hardwood timbers. The
#' test measures the force required to imbed a steel ball into a piece of wood.
#'
#' @name janka
#' @docType data
#' @format A data frame with 36 observations on the following 2 variables.
#' \describe{
#' \item{Density}{a measure of density of the timber}
#' \item{Hardness}{the Janka hardness of the timber}
#' }
#' @source Williams, E.J. (1959) \emph{Regression Analysis}. John Wiley &
#' Sons, New York.
#' @keywords datasets
#' @examples
#'
#' with(data = janka, plot(Hardness ~ Density, col = "blue"))
#'
NULL
#' Kindergarten Class
#'
#' The data frame \code{Kinder} contains the height in inches and weight in
#' pounds of 20 children from a kindergarten class. Data are used in Example
#' 12.17.
#'
#'
#' @name Kinder
#' @docType data
#' @format A data frame with 20 observations on the following 2 variables:
#' \describe{
#' \item{ht}{height in inches of child}
#' \item{wt}{weight in pounds of child}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' # Figure 12.10
#' with(data = Kinder,
#' plot(wt, ht))
#' # Trellis Approach
#' xyplot(ht ~ wt, data = Kinder)
#'
NULL
#' LED Diodes
#'
#' The diameter in millimeters for a random sample of 15 diodes from each of
#' the two suppliers is stored in the data frame \code{Leddiode}.
#'
#'
#' @name Leddiode
#' @docType data
#' @format A data frame with 15 observations on the following 2 variables:
#' \describe{
#' \item{supplierA}{diameter in millimeters of diodes from supplier A}
#' \item{supplierB}{diameter in millimeters of diodes from supplier B}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Leddiode,
#' boxplot(supplierA, supplierB, col = c("red", "blue")))
#'
NULL
#' Lost Revenue Due to Worker Illness
#'
#' Data set containing the lost revenue in dollars/day and number of workers
#' absent due to illness for a metallurgic company
#'
#'
#' @name LostR
#' @docType data
#' @format A data frame with 25 observations on the following 2 variables:
#' \describe{
#' \item{NumberSick}{number of absent workers due to illness}
#' \item{LostRevenue}{lost revenue in dollars}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' xyplot(LostRevenue ~ NumberSick, data = LostR, type=c("p", "r"))
#'
NULL
#' Milk Carton Drying Times
#'
#' A plastics manufacturer makes two sizes of milk containers: half gallon and
#' gallon sizes. The time required for each size to dry is recorded in seconds
#' in the data frame \code{MilkCarton}.
#'
#'
#' @name MilkCarton
#' @docType data
#' @format A data frame with 40 observations on the following 2 variables:
#' \describe{
#' \item{Hgallon}{drying time in seconds for half gallon containers}
#' \item{Wgallon}{drying time in seconds for whole gallon containers}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = MilkCarton,
#' boxplot(Hgallon, Wgallon))
#'
NULL
#' Probability and Statistics with R
#'
#' Data and functions for the book \emph{Probability and Statistics with R}
#'
#' \tabular{ll}{ Package: \tab PASWR\cr Type: \tab Package\cr Version: \tab
#' 1.2\cr Date: \tab 2016-02-24\cr License: \tab GPL (>=2) \cr } Comprehensive
#' and engineering-oriented, \emph{Probability and Statistics with R} provides
#' a thorough treatment of probability and statistics, clear and accessible
#' real-world examples, and fully detailed proofs. The text provides
#' step-by-step explanations for numerous examples in R and S-PLUS for nearly
#' every topic covered, including both traditional and nonparametric
#' techniques. With a wide range of graphs to illustrate complex material as
#' well as a solutions manual, the book also offers an accompanying website
#' that features supporting information, including datasets, functions, and
#' other downloadable material. It is ideal for undergraduate students and for
#' engineers and scientists who must perform statistical analyses.
#'
#' @name PASWR-package
#' @aliases PASWR-package PASWR
#' @docType package
#' @author Alan T. Arnholt
#'
#' Maintainer: <arnholtat@@appstate.edu>
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords package
NULL
#' Phenylketonuria
#'
#' The data frame \code{Phenyl} records the level of Q10 at four different
#' times for 46 patients diagnosed with phenylketonuria. The variable
#' \code{Q10.1} contains the level of Q10 measured in micromoles for the 46
#' patients. \code{Q10.2}, \code{Q10.3}, and \code{Q10.4} are the values
#' recorded at later times respectively for the 46 patients.
#'
#' Phenylketonuria (PKU) is a genetic disorder that is characterized by an
#' inability of the body to utilize the essential amino acid, phenylalanine.
#' Research suggests patients with phenylketonuria have deficiencies in
#' coenzyme Q10.
#'
#' @name Phenyl
#' @docType data
#' @format A data frame with 46 observations on the following 4 variables.
#' \describe{
#' \item{Q10.1}{level of Q10 at time 1 in micromoles}
#' \item{Q10.2}{level of Q10 at time 2 in micromoles}
#' \item{Q10.3}{level of Q10 at time 3 in micromoles}
#' \item{Q10.4}{level of Q10 at time 4 in micromoles} }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Artuch, R., \emph{et. al.} (2004) \dQuote{Study of Antioxidant
#' Status in Phenylketonuric Patients.} \emph{Clinical Biochemistry},
#' \bold{37}: 198-203.
#' @keywords datasets
#' @examples
#'
#' with(data = Phenyl,
#' t.test(Q10.1, conf.level = 0.99))
#'
NULL
#' Telephone Call Times
#'
#' \code{Phone} contains times in minutes of long distance telephone calls
#' during a one month period for a small business. Data are used in Example
#' 10.1.
#'
#'
#' @name Phone
#' @docType data
#' @format A data frame with 23 observations on the following variable:
#' \describe{
#' \item{call.time}{time spent on long distance calls in minutes}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Phone,
#' SIGN.test(call.time, md = 2.1))
#'
NULL
#' Rat Blood Pressure
#'
#' Twelve rats were chosen, and a drug was administered to six rats, the
#' treatment group, chosen at random. The other six rats, the control group,
#' received a placebo. The drops in blood pressure (mmHg) for the treatment
#' group (with probability distribution F) and the control group (with
#' probability distribution G) are stored in the variables \code{Treat} and
#' \code{Cont}, respectively. Data are used in Example 10.18.
#'
#'
#' @name Ratbp
#' @docType data
#' @format A data frame with 6 observations on the following 2 variables:
#' \describe{
#' \item{Treat}{drops in blood pressure in mmHg for treatment group}
#' \item{Cont}{drops in blood pressure in mmHg for control group}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source The data is originally from Ott and Mendenhall (1985, problem 8.17).
#' @keywords datasets
#' @examples
#'
#' with(data = Ratbp,
#' boxplot(Treat, Cont))
#'
NULL
#' Rat Survival Time
#'
#' The survival time in weeks of 20 male rats exposed to high levels of
#' radiation.
#'
#'
#' @name Rat
#' @docType data
#' @format A data frame with 20 observations on the following variable:
#' \describe{
#' \item{survival.time}{number of weeks survived}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Lawless, J. (1982) \emph{Statistical Models and Methods for Lifetime
#' Data}. John Wiley, New York.
#' @keywords datasets
#' @examples
#'
#' with(data = Rat,
#' EDA(survival.time))
#'
NULL
#' Refrigerator Energy Consumption
#'
#' Thirty 18 cubic feet refrigerators were randomly selected from a company's
#' warehouse. The first fifteen had their motors modified while the last
#' fifteen were left intact. The energy consumption (kilowatts) for a 24 hour
#' period for each refrigerator was recorded and stored in the data frame
#' \code{Refrigerator}. The refrigerators with the design modification are
#' stored in the variable \code{modelA}, and those without the design
#' modification are stored in the variable \code{modelB}.
#'
#'
#' @name Refrigerator
#' @docType data
#' @format A data frame with 30 observations on the following 2 variables.
#' \describe{
#' \item{modelA}{energy consumption in kilowatts for a 24 hour period}
#' \item{modelB}{energy consumption in kilowatts for a 24 hour period}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Refrigerator,
#' boxplot(modelA, modelB))
#'
NULL
#' Oriental Cockroaches
#'
#' A laboratory is interested in testing a new child friendly pesticide on
#' \emph{Blatta orientalis} (oriental cockroaches). Scientists apply the new
#' pesticide to 81 randomly selected Blatta orientalis oothecae (eggs). The
#' results from the experiment are stored in the data frame \code{Roacheggs} in
#' the variable \code{eggs}. A zero in the variable \code{eggs} indicates that
#' nothing hatched from the egg while a 1 indicates the birth of a cockroach.
#' Data is used in Example 7.16.
#'
#'
#' @name Roacheggs
#' @docType data
#' @format A data frame with 81 observations on the following variable:
#' \describe{
#' \item{eggs}{numeric vector where a 0 indicates nothing
#' hatched while a 1 indicates the birth of a cockroach.}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' p <- seq(0.1, 0.9, 0.001)
#' negloglike <- function(p){
#' -(sum(Roacheggs$eggs)*log(p) + sum(1 - Roacheggs$eggs)*log(1 - p))
#' }
#' nlm(negloglike, 0.2)
#' rm(negloglike)
#'
NULL
#' Surface Water Salinity
#'
#' Surface-water salinity measurements were taken in a bottom-sampling project
#' in Whitewater Bay, Florida. These data are stored in the data frame
#' \code{Salinity}.
#'
#'
#' @name Salinity
#' @docType data
#' @format A data frame with 48 observations on the following variable:
#' \describe{
#' \item{salinity}{surface-water salinity measurements}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Davis, J. (1986) \emph{Statistics and Data Analysis in Geology}.
#' John Wiley, New York.
#' @keywords datasets
#' @examples
#'
#' with(data = Salinity,
#' EDA(salinity))
#'
NULL
#' Fruit Trees
#'
#' To estimate the total surface occupied by fruit trees in 3 small areas (R63,
#' R67, and R68) of Navarra (Spain) in 2001, a sample of 47 square segments has
#' been taken. The experimental units are square segments or quadrats of 4
#' hectares, obtained by random sampling after overlaying a square grid on the
#' study domain. Data are used in Case Study: Fruit Trees, Chapter 12.
#'
#'
#' @name satfruit
#' @docType data
#' @format A data frame with 47 observations on the following 17 variables:
#' \describe{
#' \item{QUADRAT}{number of the sampled segment or quadrat}
#' \item{SArea}{the small area, a factor with levels \code{R63},
#' \code{R67}, and \code{R68}}
#' \item{WH}{area classified as wheat in sampled segment}
#' \item{BA}{area classified as barley in sampled segment}
#' \item{NAR}{area classified as non arable in sampled segment}
#' \item{COR}{area classified as corn in sampled segment}
#' \item{SF}{area classified as sunflower in sampled segment}
#' \item{VI}{area classified as vineyard in sampled segment}
#' \item{PS}{area classified as grass in sampled segment}
#' \item{ES}{area classified as asparagus in sampled segment}
#' \item{AF}{area classified as lucerne in sampled segment}
#' \item{CO}{area classified as rape (\emph{Brassica Napus}) in sampled segment}
#' \item{AR}{area classified as rice in sampled segment}
#' \item{AL}{area classified as almonds in sampled segment}
#' \item{OL}{area classified as olives in sampled segment}
#' \item{FR}{area classified as fruit trees in sampled segment}
#' \item{OBS}{the observed area of fruit trees in sampled segment}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Militino, A. F., \emph{et. al.} (2006) \dQuote{Using Small Area
#' Models to Estimate the Total Area Occupied by Olive Trees.} \emph{Journal of
#' Agricultural, Biological and Environmental Statistics, \bold{11}: 450-461.}
#' @keywords datasets
#' @examples
#'
#' with(data = satfruit,
#' pairs(satfruit[ , 15:17]))
#' # Trellis Approach
#' splom(~data.frame(satfruit[ , 15:17]), data = satfruit)
#'
NULL
#' County IQ
#'
#' A school psychologist administered the Stanford-Binet intelligence quotient
#' (IQ) test in two counties. Forty randomly selected, gifted and talented
#' students were selected from each county. The Stanford-Binet IQ test is said
#' to follow a normal distribution with a mean of 100 and standard deviation of
#' 16.
#'
#'
#' @name SBIQ
#' @docType data
#' @format A data frame with 40 observations on the following 2 variables:
#' \describe{
#' \item{County1}{IQ scores for county one}
#' \item{County2}{IQ scores for county two}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = SBIQ, qqnorm(County1))
#' with(data = SBIQ, qqline(County1))
#' # Trellis Approach
#' qqmath(~County1, data = SBIQ, type=c("p", "r"))
#'
NULL
#' Dopamine Activity
#'
#' Twenty-five patients with schizophrenia were classified as psychotic or
#' nonpsychotic after being treated with an antipsychotic drug. Samples of
#' cerebral fluid were taken from each patient and assayed for dopamine
#' b-hydroxylase (DBH) activity. The dopamine measurements for the two groups
#' are in nmol/(ml)(h)/(mg) of protein.
#'
#'
#' @name Schizo
#' @docType data
#' @format A data frame with 15 observations on the following 2 variables:
#' \describe{
#' \item{nonpsychotic}{dopamine activity level for patients classified nonpsychotic}
#' \item{psychotic}{dopamine activity level for patients classified psychotic} }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Sternberg, D. E., Van Kammen, D. P., and Bunney,W. E. (1982)
#' \dQuote{Schizophrenia: Dopamine b-Hydroxylase Activity and Treatment
#' Response.} \emph{Science}, \bold{216}: 1423-1425.
#' @keywords datasets
#' @examples
#'
#' with(data = Schizo, boxplot(nonpsychotic, psychotic,
#' names = c("nonpsychotic", "psychotic"), col = c("green", "red")))
#'
NULL
#' Standardized Test Scores
#'
#' Standardized test scores from a random sample of twenty college freshmen.
#'
#'
#' @name Score
#' @docType data
#' @format A data frame with 20 observations on the following variable:
#' \describe{
#' \item{scores}{standardized test score}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' qqmath(~scores, data = Score, type=c("p", "r"))
#'
NULL
#' M1 Motorspeedway Times
#'
#' The times recorded are those for 41 successive vehicles travelling
#' northwards along the M1 motorway in England when passing a fixed point near
#' Junction 13 in Bedfordshire on Saturday, March 23, 1985. After subtracting
#' the times, the following 40 interarrival times reported to the nearest
#' second are stored in \code{SDS4} under the variable \code{Times}. Data are
#' used in Example 10.17.
#'
#'
#' @name SDS4
#' @docType data
#' @format A data frame with 40 observations on the following variable:
#' \describe{
#' \item{Times}{interarrival times to the nearest second}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Hand, D. J., \emph{et. al.} (1994) \emph{A Handbook of Small Data
#' Sets}. Chapman & Hall, London.
#' @keywords datasets
#' @examples
#'
#' with(data = SDS4, hist(Times))
#'
NULL
#' Simulated Data (Predictors)
#'
#' Simulated data for five variables. Data are used with Example 12.21.
#'
#'
#' @name SimDataST
#' @docType data
#' @format A data frame with 200 observations on the following 5 variables:
#' \describe{
#' \item{Y1}{a numeric vector}
#' \item{Y2}{a numeric vector}
#' \item{x1}{a numeric vector}
#' \item{x2}{a numeric vector}
#' \item{x3}{a numeric vector}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' xyplot(Y1 ~ x1, data = SimDataST, type=c("p", "smooth"))
#'
NULL
#' Simulated Data (Logarithms)
#'
#' Simulated data for four varaibles. Data are used with Example 12.18.
#'
#'
#' @name SimDataXT
#' @docType data
#' @format A data frame with 200 observations on the following 4 variables:
#' \describe{
#' \item{Y}{a numeric vector}
#' \item{x1}{a numeric vector}
#' \item{x2}{a numeric vector}
#' \item{x3}{a numeric vector}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' xyplot(Y ~ x1, data = SimDataXT, type=c("p", "smooth"))
#'
NULL
#' World Cup Soccer
#'
#' \code{Soccer} contains how many goals were scored in the regulation 90
#' minute periods of World Cup soccer matches from 1990 to 2002. Data are used
#' in Example 4.4.
#'
#' The World Cup is played once every four years. National teams from all over
#' the world compete. In 2002 and in 1998, thirty-six teams were invited;
#' whereas, in 1994 and in 1990, only 24 teams participated. The data frame
#' \code{Soccer} contains three columns: \code{CGT}, \code{Game}, and
#' \code{Goals}. All of the information contained in \code{Soccer} is
#' indirectly available from the FIFA World Cup website, located at
#' \url{https://www.fifa.com/}.
#'
#' @name Soccer
#' @docType data
#' @format A data frame with 575 observations on the following 3 variables:
#' \describe{
#' \item{CGT}{cumulative goal time in minutes}
#' \item{Game}{game in which goals were scored}
#' \item{Goals}{number of goals scored in regulation period} }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Chu, S. (2003) \dQuote{Using Soccer Goals to Motivate the Poisson
#' Process.} \emph{INFORMS} Transaction on Education, \bold{3}, \bold{2}:
#' 62-68.
#' @keywords datasets
#' @examples
#'
#' with(data = Soccer,
#' table(Goals))
#'
NULL
#' Student Temperature
#'
#' In a study conducted at Appalachian State University, students used digital
#' oral thermometers to record their temperatures each day they came to class.
#' A randomly selected day of student temperatures is provided in
#' \code{StatTemps}. Information is also provided with regard to subject gender
#' and the hour of the day when the students' temperatures were measured.
#'
#'
#' @name StatTemps
#' @docType data
#' @format A data frame with 34 observations on the following 3 variables:
#' \describe{
#' \item{temperature}{temperature in farenheit}
#' \item{gender}{a factor with levels \code{Female} and \code{Male}}
#' \item{class}{a factor with levels \code{8 a.m.} and \code{9 a.m.}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' bwplot(gender ~ temperature, data = StatTemps)
#'
NULL
#' School Satisfaction
#'
#' A questionnaire is randomly administered to 11 students from State School
#' \code{X} and to 15 students from State School \code{Y} (the results have
#' been ordered and stored in the data frame \code{Stschool}). Data are used
#' in Example 9.11.
#'
#'
#' @name Stschool
#' @docType data
#' @format A data frame with 26 observations on the following 4 variables:
#' \describe{
#' \item{X}{satisfaction score}
#' \item{Y}{satisfaction score}
#' \item{Satisfaction}{combined satisfaction scores}
#' \item{School}{a factor with levels \code{X} and \code{Y}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Stschool,
#' t.test(X, Y, var.equal = TRUE))
#'
NULL
#' Workstation Comparison
#'
#' To compare the speed differences between two different brands of
#' workstations (Sun and Digital), the times each brand took to complete
#' complex simulations was recorded. Five complex simulations were selected,
#' and the five selected simulations were run on both workstations. The
#' resulting times in minutes for the five simulations are stored in data frame
#' \code{Sundig}.
#'
#'
#' @name Sundig
#' @docType data
#' @format A data frame with 5 observations on the following 3 variables:
#' \describe{
#' \item{SUN}{time in seconds for a Sun workstation to complete a simulation}
#' \item{DIGITAL}{time in seconds for a Digital
#' workstation to complete a simulation}
#' \item{d}{difference between \code{Sun} and \code{Digital}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Sundig,
#' t.test(SUN, DIGITAL, paired = TRUE)$conf)
#'
NULL
#' Sunflower Defoliation
#'
#' Seventy-two field trials were conducted by applying four defoliation
#' treatments (non defoliated control, 33\%, 66\%, and 100\%) at different
#' growth stages (\code{stage}) ranging from pre-flowering (1) to physiological
#' maturity (5) in four different locations of Navarra, Spain: Carcastillo (1),
#' Melida (2), Murillo (3), and Unciti (4). There are two response variables:
#' \code{yield} in kg/ha of the sunflower and \code{numseed}, the number of
#' seeds per sunflower head. Data are stored in the data frame
#' \code{sunflower}. Data used in Case Study: Sunflower defoliation from
#' Chapter 11.
#'
#'
#' @name sunflower
#' @docType data
#' @format A data frame with 72 observations on the following 5 variables:
#' \describe{
#' \item{location}{a factor with levels \code{A}, \code{B},
#' \code{C}, and \code{D} for locations Carcastillo, Melida, Murillo, and
#' Unciti respectively}
#' \item{stage}{a factor with levels \code{stage1}, \code{stage2}, \code{stage3}, \code{stage4}, and
#' \code{stage5}}
#' \item{defoli}{a factor with levels \code{control},
#' \code{treat1}, \code{treat2}, and \code{treat3}}
#' \item{yield}{sunflower yield in kg/ha}
#' \item{numseed}{number of seeds per sunflower head}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source Muro, J., \emph{et. al.} (2001) \dQuote{Defoliation Effects on
#' Sunflower Yield Reduction.} Agronomy Journal, \bold{93}: 634-637.
#' @keywords datasets
#' @examples
#'
#' summary(aov(yield ~ stage + defoli + stage:defoli, data = sunflower))
#'
NULL
#' Surface Area for Spanish Communities
#'
#' Surface area (km\eqn{\mbox{\textasciicircum}}{^}2) for seventeen autonomous
#' Spanish communities.
#'
#'
#' @name SurfaceSpain
#' @docType data
#' @format A data frame with 17 observations on the following 2 variables:
#' \describe{
#' \item{community}{a factor with levels \code{Andalucia},
#' \code{Aragon}, \code{Asturias}, \code{Baleares}, \code{C.Valenciana},
#' \code{Canarias}, \code{Cantabria}, \code{Castilla-La Mancha},
#' \code{Castilla-Leon}, \code{Cataluna}, \code{Extremadura}, \code{Galicia},
#' \code{La Rioja}, \code{Madrid}, \code{Murcia}, \code{Navarra}, and
#' \code{P.Vasco}}
#' \item{surface}{surface area in km\eqn{\mbox{\textasciicircum}}{^}2}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = SurfaceSpain,
#' barplot(surface, names.arg = community, las = 2))
#' # Trellis Approach
#' barchart(community ~ surface, data = SurfaceSpain)
#'
NULL
#' Swim Times
#'
#' Swimmers' improvements in seconds for two diets are stored in the data frame
#' \code{Swimtimes}. The values in \code{highfat} represent the time
#' improvement in seconds for swimmers on a high fat diet, and the values in
#' \code{lowfat} represent the time improvement in seconds for swimmers on a
#' low fat diet. Data are used in Example 10.9.
#'
#' Times for the thirty-two swimmers for the 200 yard individual medley were
#' taken right after the swimmers' conference meet. The swimmers were randomly
#' assigned to follow one of the diets. The group on diet 1 followed a low fat
#' diet the entire year but lost two swimmers along the way. The group on diet
#' 2 followed the high fat diet the entire year and also lost two swimmers.
#'
#' @name Swimtimes
#' @docType data
#' @format A data frame with 14 observations on the following 2 variables:
#' \describe{
#' \item{lowfat}{time improvement in seconds}
#' \item{highfat}{time improvement in seconds}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Swimtimes,
#' wilcox.test(highfat, lowfat))
#'
NULL
#' Speed Detector
#'
#' The Yonalasee tennis club has two systems to measure the speed of a tennis
#' ball. The local tennis pro suspects one system (Speed1) consistently records
#' faster speeds. To test her suspicions, she sets up both systems and records
#' the speeds of 12 serves (three serves from each side of the court). The
#' values are stored in the data frame \code{Tennis} in the variables
#' \code{Speed1} and \code{Speed2}. The recorded speeds are in kilometers per
#' hour.
#'
#'
#' @name Tennis
#' @docType data
#' @format A data frame with 12 observations on the following 2 variables:
#' \describe{
#' \item{Speed1}{speed in kilometers per hour}
#' \item{Speed2}{speed in kilometers per hour}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Tennis,
#' boxplot(Speed1, Speed2))
#'
NULL
#' Statistics Grades
#'
#' Test grades of 29 students taking a basic statistics course
#'
#'
#' @name TestScores
#' @docType data
#' @format A data frame with 29 observations on the following variable:
#' \describe{
#' \item{grade}{test score}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = TestScores,
#' EDA(grade))
#'
NULL
#' Tire Stopping Distances
#'
#' The data frame \code{Tire} has the stopping distances measured to the
#' nearest foot for a standard sized car to come to a complete stop from a
#' speed of sixty miles per hour. There are six measurements of the stopping
#' distance for four different tread patterns labeled A, B, C, and D. The same
#' driver and car were used for all twenty-four measurements. Data are used in
#' Example 11.1 and 11.2.
#'
#'
#' @name Tire
#' @docType data
#' @format A data frame with 24 observations on the following 2 variables:
#' \describe{
#' \item{StopDist}{stopping distance measured to the nearest foot}
#' \item{tire}{a factor with levels \code{A}, \code{B}, \code{C},
#' and \code{D}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' summary(aov(StopDist ~ tire, data = Tire))
#'
NULL
#' Tire Wear
#'
#' The data frame \code{TireWear} contains measurements for the amount of tread
#' loss after 10,000 miles of driving in thousandths of an inch. Data are used
#' in Example 11.8.
#'
#'
#' @name TireWear
#' @docType data
#' @format A data frame with 16 observations on the following 3 variables:
#' \describe{
#' \item{Wear}{tread loss measured in thousandths of an inch}
#' \item{Treat}{a factor with levels \code{A}, \code{B},
#' \code{C}, and \code{D}}
#' \item{Block}{a factor with levels \code{Car1}, \code{Car2}, \code{Car3}, and \code{Car4}}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' par(mfrow = c(1, 2), cex = 0.8)
#' with(data = TireWear,
#' interaction.plot(Treat, Block, Wear, type = "b", legend = FALSE))
#' with(data = TireWear,
#' interaction.plot(Block, Treat, Wear, type = "b", legend = FALSE))
#' par(mfrow = c(1, 1), cex = 1)
#'
NULL
#' Titanic Survival Status
#'
#' The \code{titanic3} data frame describes the survival status of individual
#' passengers on the Titanic. The \code{titanic3} data frame does not contain
#' information for the crew, but it does contain actual and estimated ages for
#' almost 80\% of the passengers.
#'
#' Thomas Cason of UVa has greatly updated and improved the \code{titanic} data
#' frame using the \emph{Encyclopedia Titanica} and created a new dataset
#' called \code{titanic3}. This dataset reflects the state of data available as
#' of August 2, 1999. Some duplicate passengers have been dropped, many errors
#' have been corrected, many missing ages have been filled in, and new
#' variables have been created.
#'
#' @name titanic3
#' @docType data
#' @format A data frame with 1309 observations on the following 14 variables:
#' \describe{
#' \item{pclass}{a factor with levels \code{1st},
#' \code{2nd}, and \code{3rd}}
#' \item{survived}{Survival (0 = No; 1 = Yes)}
#' \item{name}{Name}
#' \item{sex}{a factor with levels
#' \code{female} and \code{male}}
#' \item{age}{age in years}
#' \item{sibsp}{Number of Siblings/Spouses Aboard}
#' \item{parch}{Number of Parents/Children Aboard}
#' \item{ticket}{Ticket Number}
#' \item{fare}{Passenger Fare}
#' \item{cabin}{Cabin}
#' \item{embarked}{a factor with levels
#' \code{Cherbourg}, \code{Queenstown}, and \code{Southampton}}
#' \item{boat}{Lifeboat}
#' \item{body}{Body IdentificationNumber}
#' \item{home.dest}{Home/Destination} }
#' @references Harrell, F. E. (2001) \emph{Regression Modeling Strategies with
#' Applications to Linear Models, Logistic Regression, and Survival Analysis}.
#' Springer.
#' @source
#' \url{https://hbiostat.org/data/repo/titanic.html}
#' @keywords datasets
#' @examples
#'
#' with(titanic3,
#' table(pclass, sex))
#'
NULL
#' Nuclear Energy
#'
#' Nuclear energy (in TOE, tons of oil equivalent) produced in 12 randomly
#' selected European countries during 2003
#'
#'
#' @name TOE
#' @docType data
#' @format A data frame with 12 observations on the following variable:
#' \describe{
#' \item{energy}{nuclear energy measured in tons of oil equivalent}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(TOE,
#' plot(density(energy)))
#'
NULL
#' Tennis Income
#'
#' \code{Top20} contains data (in millions of dollars) corresponding to the
#' earnings of 15 randomly selected tennis players whose earnings fall
#' somewhere in positions 20 through 100 of ranked earnings.
#'
#'
#' @name Top20
#' @docType data
#' @format A data frame with 15 observations on the following variable:
#' \describe{
#' \item{income}{yearly income in millions of dollars}
#' }
#' @references Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @source \url{https://www.atptour.com/}
#' @keywords datasets
#' @examples
#'
#' with(data = Top20,
#' EDA(income))
#'
NULL
#' Megabytes Downloaded
#'
#' The manager of a URL commercial address is interested in predicting the
#' number of megabytes downloaded, \code{megasd}, by clients according to the
#' number minutes they are connected, \code{mconnected}. The manager randomly
#' selects (megabyte, minute) pairs, and records the data. The pairs
#' (\code{megasd}, \code{mconnected}) are stored in the data frame
#' \code{URLaddress}.
#'
#'
#' @name URLaddress
#' @docType data
#' @format A data frame with 30 observations on the following 2 variables:
#' \describe{
#' \item{megasd}{megabytes dowloaded}
#' \item{mconnected}{number of minutes connected}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' xyplot(mconnected ~ megasd, data = URLaddress, type=c("p", "r"))
#'
NULL
#' Apartments in Vitoria
#'
#' Descriptive information and the appraised total price (in Euros) for
#' apartments in Vitoria, Spain.
#'
#'
#' @name vit2005
#' @docType data
#' @format A data frame with 218 observations on the following 16 variables:
#' \describe{
#' \item{row.labels}{the number of the observation}
#' \item{totalprice}{the market total price (in Euros) of the apartment
#' including garage(s) and storage room(s)}
#' \item{area}{the total living area of the apartment in square meters}
#' \item{zone}{a factor
#' indicating the neighborhood where the apartment is located with levels
#' \code{Z11}, \code{Z21}, \code{Z31}, \code{Z32}, \code{Z34}, \code{Z35},
#' \code{Z36}, \code{Z37}, \code{Z38}, \code{Z41}, \code{Z42}, \code{Z43},
#' \code{Z44}, \code{Z45}, \code{Z46}, \code{Z47}, \code{Z48}, \code{Z49},
#' \code{Z52}, \code{Z53}, \code{Z56}, \code{Z61}, and \code{Z62}.}
#' \item{category}{a factor indicating the condition of the apartment
#' with levels \code{2A}, \code{2B}, \code{3A}, \code{3B}, \code{4A},
#' \code{4B}, and \code{5A}. The factors are ordered so that \code{2A} is the
#' best and \code{5A} is the worst.}
#' \item{age}{age of the aprtment}
#' \item{floor}{floor on which the apartment is located}
#' \item{rooms}{total number of rooms including bedrooms, dining room,
#' and kitchen}
#' \item{out}{a factor indicating the percent of the
#' apartment exposed to the elements. The levels \code{E100}, \code{E75},
#' \code{E50}, and \code{E25}, correspond to complete exposure, 75\% exposure,
#' 50\% exposure, and 25\% exposure respectively.}
#' \item{conservation}{is an ordered factor indicating the state of
#' conservation of the apartment. The levels \code{1A}, \code{2A}, \code{2B},
#' and \code{3A} are ordered from best to worst conservation.}
#' \item{toilets}{the number of bathrooms}
#' \item{garage}{the number of garages}
#' \item{elevator}{indicates the absence (0) or
#' presence (1) of elevators.}
#' \item{streetcategory}{an ordered factor
#' from best to worst indicating the category of the street with levels
#' \code{S2}, \code{S3}, \code{S4}, and \code{S5}}
#' \item{heating}{a
#' factor indicating the type of heating with levels \code{1A}, \code{3A},
#' \code{3B}, and \code{4A} which correspond to: no heating, low-standard
#' private heating, high-standard private heating, and central heating
#' respectively.}
#' \item{tras}{the number of storage rooms outside of
#' the apartment}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' modTotal <- lm(totalprice ~ area + as.factor(elevator) +
#' area:as.factor(elevator), data = vit2005)
#' modSimpl <- lm(totalprice ~ area, data = vit2005)
#' anova(modSimpl,modTotal)
#' rm(modSimpl, modTotal)
#'
NULL
#' Waiting Time
#'
#' A statistician records how long he must wait for his bus each morning. Data
#' are used in Example 10.4.
#'
#'
#' @name Wait
#' @docType data
#' @format A data frame with 15 observations on the following variable:
#' \describe{
#' \item{wt}{waiting time in minutes}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Wait,
#' wilcox.test(wt, mu = 6, alternative = "less"))
#'
NULL
#' Washer Diameter
#'
#' Diameter of washers.
#'
#'
#' @name Washer
#' @docType data
#' @format A data frame with 20 observations on the following variable:
#' \describe{
#' \item{diameters}{diameter of washer in cm}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Washer,
#' EDA(diameters))
#'
NULL
#' Sodium Content of Water
#'
#' An independent agency measures the sodium content in 20 samples from source
#' \code{X} and in 10 samples from source \code{Y} and stores them in data
#' frame \code{Water}. Data are used in Example 9.12.
#'
#'
#' @name Water
#' @docType data
#' @format A data frame with 30 observations on the following 4 variables:
#' \describe{
#' \item{X}{sodium content measured in mg/L}
#' \item{Y}{sodium content measured in mg/L}
#' \item{Sodium}{combined sodium content measured in mg/L}
#' \item{Source}{a factor with levels \code{X} and \code{Y}} }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Water,
#' t.test(X, Y, alternative = "less"))
#'
NULL
#' Wisconsin Card Sorting Test
#'
#' The following data are the test scores from a group of 50 patients from the
#' \emph{Virgen del Camino} Hospital (Pamplona, Spain) on the Wisconsin Card
#' Sorting Test.
#'
#' The \dQuote{Wisconsin Card Sorting Test} is widely used by psychiatrists,
#' neurologists, and neuropsychologists with patients who have a brain injury,
#' neurodegenerative disease, or a mental illness such as schizophrenia.
#' Patients with any sort of frontal lobe lesion generally do poorly on the
#' test.
#'
#' @name WCST
#' @docType data
#' @format A data frame with 50 observations on the following variable:
#' \describe{
#' \item{score}{score on the Wisconsin Card Sorting Test}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' densityplot(~score, data = WCST, ref = TRUE)
#'
NULL
#' Weight Gain in Rats
#'
#' The data come from an experiment to study the gain in weight of rats fed on
#' four different diets, distinguished by amount of protein (low and high) and
#' by source of pretein (beef and cereal).
#'
#' The design of the experiment is acompletely randomized with ten rats on each
#' of the four treatments.
#'
#' @name WeightGain
#' @docType data
#' @format A data frame with 40 observations on the following 4 variables.
#' \describe{
#' \item{ProteinSource}{a factor with levels \code{Beef}
#' \code{Cereal}}
#' \item{ProteinAmount}{a factor with levels \code{High}
#' \code{Low}}
#' \item{weightgain}{weight gain in grams}
#' }
#' @source D. J. Hand, F. Daly, A. D. Lunn, K. J. McConway and E. Ostrowski
#' (1994) \emph{A Handbook of Small Datasets}. Chapman and Hall/CRC, London.
#' @keywords datasets
#' @examples
#'
#' aov(weightgain ~ ProteinSource*ProteinAmount, data = WeightGain)
#'
NULL
#' Wheat Surface Area in Spain
#'
#' Seventeen Spanish communities and their corresponding surface area (in
#' hecatares) dedicated to growing wheat
#'
#'
#' @name WheatSpain
#' @docType data
#' @format A data frame with 17 observations on the following 3 variables:
#' \describe{
#' \item{community}{a factor with levels \code{Andalucia},
#' \code{Aragon}, \code{Asturias}, \code{Baleares}, \code{C.Valenciana},
#' \code{Canarias}, \code{Cantabria}, \code{Castilla-La Mancha},
#' \code{Castilla-Leon}, \code{Cataluna}, \code{Extremadura}, \code{Galicia},
#' \code{La Rioja}, \code{Madrid}, \code{Murcia}, \code{Navarra}, and
#' \code{P.Vasco}}
#' \item{hectares}{surface area measured in hectares}
#' \item{acres}{surface area measured in acres}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = WheatSpain,
#' boxplot(hectares))
#'
NULL
#' USA Wheat Surface 2004
#'
#' USA's 2004 harvested wheat surface by state
#'
#'
#' @name wheatUSA2004
#' @docType data
#' @format A data frame with 30 observations on the following 2 variables.
#' \describe{
#' \item{STATES}{a factor with levels \code{AR}, \code{CA},
#' \code{CO}, \code{DE}, \code{GA}, \code{ID}, \code{IL}, \code{IN}, \code{KS},
#' \code{KY}, \code{MD}, \code{MI}, \code{MO}, \code{MS}, \code{MT}, \code{NC},
#' \code{NE}, \code{NY}, \code{OH}, \code{OK}, \code{OR}, \code{Other},
#' \code{PA}, \code{SC}, \code{SD}, \code{TN}, \code{TX}, \code{VA}, \code{WA},
#' and \code{WI}}
#' \item{ACRES}{wheat surface area measured in 1000s of acres}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = wheatUSA2004,
#' hist(ACRES))
#'
NULL
#' Wool Production
#'
#' Random sample of wool production in kilograms on 5 different days at two
#' different locations
#'
#'
#' @name Wool
#' @docType data
#' @format A data frame with 15 observations on the following 2 variables:
#' \describe{
#' \item{textileA}{wool production in thousands of kilograms}
#' \item{textileB}{wool production in thousands of kilograms}
#' }
#' @source Ugarte, M. D., Militino, A. F., and Arnholt, A. T. (2008)
#' \emph{Probability and Statistics with R}. Chapman & Hall/CRC.
#' @keywords datasets
#' @examples
#'
#' with(data = Wool,
#' t.test(textileA, textileB))
#'
NULL
#####################################################################################
#' @import e1071 grid lattice
#' @importFrom graphics abline axis box boxplot dotchart hist legend lines mtext par plot plot.design points polygon segments text title
#' @importFrom stats complete.cases dbinom density dnorm fitted fivenum interaction.plot ks.test median pnorm pt qchisq qnorm qqline qqnorm qt quantile rbinom rnorm rstandard sd setNames shapiro.test var
#' @importFrom utils combn
#' @importFrom MASS stdres
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.