R/zsum.test.R
In BSDA: Basic Statistics and Data Analysis

Documented in zsum.test

#' Summarized z-test
#' 
#' This function is based on the standard normal distribution and creates
#' confidence intervals and tests hypotheses for both one and two sample
#' problems based on summarized information the user passes to the function.
#' Output is identical to that produced with \code{z.test}.
#' 
#' If \code{y} is \code{NULL} , a one-sample z-test is carried out with
#' \code{x} . If y is not \code{NULL}, a standard two-sample z-test is
#' performed.
#' 
#' @param mean.x a single number representing the sample mean of \code{x}
#' @param sigma.x a single number representing the population standard
#' deviation for \code{x}
#' @param n.x a single number representing the sample size for \code{x}
#' @param mean.y a single number representing the sample mean of \code{y}
#' @param sigma.y a single number representing the population standard
#' deviation for \code{y}
#' @param n.y a single number representing the sample size for \code{y}
#' @param alternative is a character string, one of \code{"greater"},
#' \code{"less"} or \code{"two.sided"}, or the initial letter of each,
#' indicating the specification of the alternative hypothesis. For one-sample
#' tests, \code{alternative} refers to the true mean of the parent population
#' in relation to the hypothesized value \code{mu}. For the standard two-sample
#' tests, \code{alternative} refers to the difference between the true
#' population mean for \code{x} and that for \code{y}, in relation to
#' \code{mu}.
#' @param mu a single number representing the value of the mean or difference
#' in means specified by the null hypothesis
#' @param conf.level confidence level for the returned confidence interval,
#' restricted to lie between zero and one
#' @return A list of class \code{htest}, containing the following components:
#' \item{statistic}{the z-statistic, with names attribute \code{z}.}
#' \item{p.value}{the p-value for the test} \item{conf.int }{is a confidence
#' interval (vector of length 2) for the true mean or difference in means. The
#' confidence level is recorded in the attribute \code{conf.level}. When
#' alternative is not \code{"two.sided"}, the confidence interval will be
#' half-infinite, to reflect the interpretation of a confidence interval as the
#' set of all values \code{k} for which one would not reject the null
#' hypothesis that the true mean or difference in means is \code{k}. Here,
#' infinity will be represented by \code{Inf}.} \item{estimate}{vector of
#' length 1 or 2, giving the sample mean(s) or mean of differences; these
#' estimate the corresponding population parameters. Component \code{estimate}
#' has a names attribute describing its elements.} \item{null.value}{the value
#' of the mean or difference in means specified by the null hypothesis. This
#' equals the input argument \code{mu}. Component \code{null.value} has a names
#' attribute describing its elements.} \item{alternative}{records the value of
#' the input argument alternative: \code{"greater"} , \code{"less"} or
#' \code{"two.sided"}. } \item{data.name}{a character string (vector of length
#' 1) containing the names \code{x} and \code{y} for the two summarized
#' samples}
#' @section Null Hypothesis: For the one-sample z-test, the null hypothesis is
#' that the mean of the population from which \code{x} is drawn is \code{mu}.
#' For the standard two-sample z-tests, the null hypothesis is that the
#' population mean for \code{x} less that for \code{y} is \code{mu}.
#' 
#' The alternative hypothesis in each case indicates the direction of
#' divergence of the population mean for \code{x} (or difference of means of
#' \code{x} and \code{y}) from \code{mu} (i.e., \code{"greater"} ,
#' \code{"less"}, \code{"two.sided"} ).
#' @author Alan T. Arnholt
#' @seealso \code{\link{z.test}}, \code{\link{tsum.test}}
#' @references
#' 
#' Kitchens, L. J. (2003). \emph{Basic Statistics and Data Analysis}. Duxbury.
#' 
#' Hogg, R. V. and Craig, A. T. (1970). \emph{Introduction to Mathematical
#' Statistics, 3rd ed}. Toronto, Canada: Macmillan.
#' 
#' Mood, A. M., Graybill, F. A. and Boes, D. C. (1974). \emph{Introduction to
#' the Theory of Statistics, 3rd ed}. New York: McGraw-Hill.
#' 
#' Snedecor, G. W. and Cochran, W. G. (1980). \emph{Statistical Methods, 7th
#' ed}. Ames, Iowa: Iowa State University Press.
#' @keywords htest
#' @examples
#' 
#' zsum.test(mean.x=56/30,sigma.x=2, n.x=30, alternative="greater", mu=1.8)
#'         # Example 9.7 part a. from PASWR.
#' x <- rnorm(12)
#' zsum.test(mean(x),sigma.x=1,n.x=12)
#'         # Two-sided one-sample z-test where the assumed value for
#'         # sigma.x is one. The null hypothesis is that the population
#'         # mean for 'x' is zero. The alternative hypothesis states
#'         # that it is either greater or less than zero. A confidence
#'         # interval for the population mean will be computed.
#'         # Note: returns same answer as:
#' z.test(x,sigma.x=1)
#'         #
#' x <- c(7.8, 6.6, 6.5, 7.4, 7.3, 7.0, 6.4, 7.1, 6.7, 7.6, 6.8)
#' y <- c(4.5, 5.4, 6.1, 6.1, 5.4, 5.0, 4.1, 5.5)
#' zsum.test(mean(x), sigma.x=0.5, n.x=11 ,mean(y), sigma.y=0.5, n.y=8, mu=2)
#'         # Two-sided standard two-sample z-test where both sigma.x
#'         # and sigma.y are both assumed to equal 0.5. The null hypothesis
#'         # is that the population mean for 'x' less that for 'y' is 2.
#'         # The alternative hypothesis is that this difference is not 2.
#'         # A confidence interval for the true difference will be computed.
#'         # Note: returns same answer as:
#' z.test(x, sigma.x=0.5, y, sigma.y=0.5)
#'         #
#' zsum.test(mean(x), sigma.x=0.5, n.x=11, mean(y), sigma.y=0.5, n.y=8,
#' conf.level=0.90)
#'         # Two-sided standard two-sample z-test where both sigma.x and
#'         # sigma.y are both assumed to equal 0.5. The null hypothesis
#'         # is that the population mean for 'x' less that for 'y' is zero.
#'         # The alternative hypothesis is that this difference is not
#'         # zero.  A 90% confidence interval for the true difference will
#'         # be computed.  Note: returns same answer as:
#' z.test(x, sigma.x=0.5, y, sigma.y=0.5, conf.level=0.90)
#' rm(x, y)
#' 
#' @export zsum.test
zsum.test <-
function(mean.x, sigma.x = NULL, n.x = NULL, mean.y = NULL, sigma.y = NULL,
    n.y = NULL, alternative = "two.sided", mu = 0, conf.level = 0.95)
{
    choices <- c("two.sided", "greater", "less")
    alt <- pmatch(alternative, choices)
    alternative <- choices[alt]
    if(length(alternative) > 1 || is.na(alternative))
        stop("alternative must be one \"greater\", \"less\", \"two.sided\""
            )
    if(!missing(mu))
        if(length(mu) != 1 || is.na(mu))
            stop("mu must be a single number")
    if(!is.null(mean.x) && is.null(mean.y) && is.null(n.x) && is.null(
        sigma.x))
        stop("You must enter the value for both sigma.x and n.x")
    if(is.null(n.x) && !is.null(mean.x) && !is.null(sigma.x) && is.null(
        mean.y))
        stop("You must enter the value for n.x")
    if(is.null(sigma.x) && !is.null(mean.x) && !is.null(n.x) && is.null(
        mean.y))
        stop("You must enter the value for sigma.x")
    if(is.null(n.y) && !is.null(mean.x) && !is.null(mean.y) && !is.null(
        sigma.y) && !is.null(sigma.x) && !is.null(n.x))
        stop("You must enter the value for n.y")
    if(is.null(n.y) && is.null(n.x) && !is.null(mean.x) && !is.null(mean.y
        ) && !is.null(sigma.y) && !is.null(sigma.x))
        stop("You must enter the value for both n.x and n.y")
    if(is.null(sigma.x) && is.null(sigma.y) && !is.null(mean.x) && !
        is.null(mean.y) && !is.null(n.x) && !is.null(n.y))
        stop("You must enter the value for both sigma.x and sigma.y")
    if(!is.null(sigma.x) && is.null(sigma.y) && !is.null(mean.x) && !
        is.null(mean.y) && !is.null(n.x) && !is.null(n.y))
        stop("You must enter the value for sigma.y")
    if(is.null(n.y) && is.null(sigma.y) && !is.null(mean.x) && !is.null(
        mean.y) && !is.null(sigma.x) && !is.null(n.x))
        stop("You must enter the value for both sigma.y and n.y")
    if(!missing(conf.level))
        if(length(conf.level) != 1 || is.na(conf.level) || conf.level <
            0 || conf.level > 1)
            stop("conf.level must be a number between 0 and 1")
    if(!is.null(mean.y)) {
        dname <- c("Summarized x and y")
    }
    else {
        dname <- c("Summarized x")
    }
    n.x
    if(n.x <= 1)
        stop("not enough x observations")
    estimate <- mean.x
    if(is.null(mean.y)) {
        stderr <- sigma.x/sqrt(n.x)
        zobs <- (mean.x - mu)/stderr
        method <- c("One-sample z-Test")
        names(estimate) <- c("mean of x")
    }
    else {
        n.y
        if(n.y <= 1)
            stop("not enough y observations")
        method <- c("Two-sample z-Test")
        estimate <- c(mean.x, mean.y)
        names(estimate) <- c("mean of x", "mean of y")
        stderr <- sqrt(((sigma.x^2)/n.x) + ((sigma.y^2)/n.y))
        zobs <- (mean.x - mean.y - mu)/stderr
    }
    if(alternative == "less") {
        pval <- pnorm(zobs)
        cint <- c(NA, zobs * stderr + qnorm(conf.level) * stderr)
    }
    else if(alternative == "greater") {
        pval <- 1 - pnorm(zobs)
        cint <- c(zobs * stderr - qnorm(conf.level) * stderr, NA)
    }
    else {
        pval <- 2 * pnorm( - abs(zobs))
        alpha <- 1 - conf.level
        cint <- c(zobs * stderr - qnorm((1 - alpha/2)) * stderr, zobs *
            stderr + qnorm((1 - alpha/2)) * stderr)
    }
    cint <- cint + mu
    names(zobs) <- "z"
    if(!is.null(mean.y))
        names(mu) <- "difference in means"
    else names(mu) <- "mean"
    attr(cint, "conf.level") <- conf.level
    rval <- list(statistic = zobs, p.value = pval, conf.int = cint, 
        estimate = estimate, null.value = mu, alternative = 
        alternative, method = method, data.name = dname)
    attr(rval, "class") <- "htest"
    return(rval)
}