# R/z.test.R In BSDA: Basic Statistics and Data Analysis

#### Documented in z.test

#' Z-test
#'
#' This function is based on the standard normal distribution and creates
#' confidence intervals and tests hypotheses for both one and two sample
#' problems.
#'
#' If \code{y} is \code{NULL}, a one-sample z-test is carried out with
#' \code{x}.  If y is not \code{NULL}, a standard two-sample z-test is
#' performed.
#'
#' @param x numeric vector; \code{NA}s and \code{Inf}s are allowed but will be
#' removed.
#' @param y numeric vector; \code{NA}s and \code{Inf}s are allowed but will be
#' removed.
#' @param alternative character string, one of \code{"greater"}, \code{"less"}
#' or \code{"two.sided"}, or the initial letter of each, indicating the
#' specification of the alternative hypothesis. For one-sample tests,
#' \code{alternative} refers to the true mean of the parent population in
#' relation to the hypothesized value \code{mu}. For the standard two-sample
#' tests, \code{alternative} refers to the difference between the true
#' population mean for \code{x} and that for \code{y}, in relation to
#' \code{mu}.
#' @param mu a single number representing the value of the mean or difference
#' in means specified by the null hypothesis
#' @param sigma.x a single number representing the population standard
#' deviation for \code{x}
#' @param sigma.y a single number representing the population standard
#' deviation for \code{y}
#' @param conf.level confidence level for the returned confidence interval,
#' restricted to lie between zero and one
#' @return A list of class \code{htest}, containing the following components:
#' \item{statistic}{the z-statistic, with names attribute \code{"z"}}
#' \item{p.value}{the p-value for the test} \item{conf.int}{is a confidence
#' interval (vector of length 2) for the true mean or difference in means. The
#' confidence level is recorded in the attribute \code{conf.level}.  When
#' alternative is not \code{"two.sided"}, the confidence interval will be
#' half-infinite, to reflect the interpretation of a confidence interval as the
#' set of all values \code{k} for which one would not reject the null
#' hypothesis that the true mean or difference in means is \code{k} . Here
#' infinity will be represented by \code{Inf}.} \item{estimate}{vector of
#' length 1 or 2, giving the sample mean(s) or mean of differences; these
#' estimate the corresponding population parameters. Component \code{estimate}
#' has a names attribute describing its elements.} \item{null.value}{is the
#' value of the mean or difference in means specified by the null hypothesis.
#' This equals the input argument \code{mu}. Component \code{null.value} has a
#' names attribute describing its elements.} \item{alternative}{records the
#' value of the input argument alternative: \code{"greater"}, \code{"less"} or
#' \code{"two.sided"}.} \item{data.name}{a character string (vector of length
#' 1) containing the actual names of the input vectors \code{x} and \code{y}}
#' @section Null Hypothesis: For the one-sample z-test, the null hypothesis is
#' that the mean of the population from which \code{x} is drawn is \code{mu}.
#' For the standard two-sample z-tests, the null hypothesis is that the
#' population mean for \code{x} less that for \code{y} is \code{mu}.
#'
#' The alternative hypothesis in each case indicates the direction of
#' divergence of the population mean for \code{x} (or difference of means for
#' \code{x} and \code{y}) from \code{mu} (i.e., \code{"greater"},
#' \code{"less"}, \code{"two.sided"}).
#' @author Alan T. Arnholt
#' @references Kitchens, L.J. (2003). \emph{Basic Statistics and Data
#' Analysis}. Duxbury.
#'
#' Hogg, R. V. and Craig, A. T. (1970). \emph{Introduction to Mathematical
#' Statistics, 3rd ed}. Toronto, Canada: Macmillan.
#'
#' Mood, A. M., Graybill, F. A. and Boes, D. C. (1974). \emph{Introduction to
#' the Theory of Statistics, 3rd ed}. New York: McGraw-Hill.
#'
#' Snedecor, G. W. and Cochran, W. G. (1980). \emph{Statistical Methods, 7th
#' ed}. Ames, Iowa: Iowa State University Press.
#' @keywords htest
#' @examples
#'
#' x <- rnorm(12)
#' z.test(x,sigma.x=1)
#'         # Two-sided one-sample z-test where the assumed value for
#'         # sigma.x is one. The null hypothesis is that the population
#'         # mean for 'x' is zero. The alternative hypothesis states
#'         # that it is either greater or less than zero. A confidence
#'         # interval for the population mean will be computed.
#'
#' x <- c(7.8, 6.6, 6.5, 7.4, 7.3, 7., 6.4, 7.1, 6.7, 7.6, 6.8)
#' y <- c(4.5, 5.4, 6.1, 6.1, 5.4, 5., 4.1, 5.5)
#' z.test(x, sigma.x=0.5, y, sigma.y=0.5, mu=2)
#'         # Two-sided standard two-sample z-test where both sigma.x
#'         # and sigma.y are both assumed to equal 0.5. The null hypothesis
#'         # is that the population mean for 'x' less that for 'y' is 2.
#'         # The alternative hypothesis is that this difference is not 2.
#'         # A confidence interval for the true difference will be computed.
#'
#' z.test(x, sigma.x=0.5, y, sigma.y=0.5, conf.level=0.90)
#'         # Two-sided standard two-sample z-test where both sigma.x and
#'         # sigma.y are both assumed to equal 0.5. The null hypothesis
#'         # is that the population mean for 'x' less that for 'y' is zero.
#'         # The alternative hypothesis is that this difference is not
#'         # zero.  A 90% confidence interval for the true difference will
#'         # be computed.
#' rm(x, y)
#'
#' @export z.test
z.test <-
function(x, y = NULL, alternative = "two.sided", mu = 0, sigma.x = NULL,
sigma.y = NULL, conf.level = 0.95)
{
choices <- c("two.sided", "greater", "less")
alt <- pmatch(alternative, choices)
alternative <- choices[alt]
if(length(alternative) > 1 || is.na(alternative))
stop("alternative must be one \"greater\", \"less\", \"two.sided\""
)
if(!missing(mu))
if(length(mu) != 1 || is.na(mu))
stop("mu must be a single number")
if(is.null(sigma.x) && !is.null(x) && is.null(y))
stop("You must enter the value for sigma.x")
if(!is.null(y) && is.null(sigma.y) || is.null(sigma.x))
stop("You must enter values for both sigma.x and sigma.y")
if(!missing(conf.level))
if(length(conf.level) != 1 || is.na(conf.level) || conf.level <
0 || conf.level > 1)
stop("conf.level must be a number between 0 and 1")
if(!is.null(y)) {
dname <- paste(deparse(substitute(x)), "and", paste(deparse(
substitute(y))))
}
else {
dname <- deparse(substitute(x))
}
nx <- length(x)
if(nx <= 2)
stop("not enough x observations")
mx <- mean(x)
estimate <- mx
if(is.null(y)) {
stderr <- sigma.x/sqrt(nx)
zobs <- (mx - mu)/stderr
method <- c("One-sample z-Test")
names(estimate) <- c("mean of x")
}
else {
ny <- length(y)
if(ny <= 2)
stop("not enough y observations")
my <- mean(y)
method <- c("Two-sample z-Test")
estimate <- c(mx, my)
names(estimate) <- c("mean of x", "mean of y")
stderr <- sqrt(((sigma.x^2)/nx) + ((sigma.y^2)/ny))
zobs <- (mx - my - mu)/stderr
}
if(alternative == "less") {
pval <- pnorm(zobs)
cint <- c(NA, zobs * stderr + qnorm(conf.level) * stderr)
}
else if(alternative == "greater") {
pval <- 1 - pnorm(zobs)
cint <- c(zobs * stderr - qnorm(conf.level) * stderr, NA)
}
else {
pval <- 2 * pnorm( - abs(zobs))
alpha <- 1 - conf.level
cint <- c(zobs * stderr - qnorm((1 - alpha/2)) * stderr, zobs *
stderr + qnorm((1 - alpha/2)) * stderr)
}
cint <- cint + mu
names(zobs) <- "z"
if(!is.null(y))
names(mu) <- "difference in means"
else names(mu) <- "mean"
attr(cint, "conf.level") <- conf.level
rval <- list(statistic = zobs, p.value = pval, conf.int = cint,
estimate = estimate, null.value = mu, alternative =
alternative, method = method, data.name = dname)
attr(rval, "class") <- "htest"
return(rval)
}


## Try the BSDA package in your browser

Any scripts or data that you put into this service are public.

BSDA documentation built on July 30, 2017, 5:01 p.m.