PMmccrave1: AMS 597 Final Project

Documented in ekbohm.test kim.t.test lin.stivers.test lj.z.test weighted.z.test

#' Liptak’s weighted Z-test
#' @description Perform two sample weighted Liptak Z-test.
#' @usage weighted.z.test(x, y, alternative = c("two.sided", "less", "greater"))
#' @param x numeric vector
#' @param y numeric vector
#' @param alternative	 character string which specifies the alternative hypothesis.  This can only be set as "two.sided" (default), "greater" or "less".
#' @details
#' This formula useful for the 2-sample tests.  Setting alternative = "greater" provides the alternative that vector x has a larger mean than vector y. If paired = TRUE, both x and y must be specified and the same length. Missing values are removed, which can be done in pairs if paired = TRUE or singularly if paired = FALSE. If var.equal = TRUE  the pooled estimate of the variance is used. By default, if var.equal = FALSE then the variance is estimated separately for both groups and the Welch modification to the degrees of freedom is used. If the input data are effectively constant compared to the larger of the two means, an error is generated.
#' @return p value of the test
#' @return z-statistic
#' @examples
#' vec1 <- c(3,7,NA,2,5,8,NA,5,4,5)
#' vec2 <- c(6,NA,4,7,3,NA,3,4,9,2)
#' weighted.z.test(vec1, vec2, alternative = "greater")
#' @importFrom stats cov pnorm pt qnorm sd t.test var var.test
#' @export

weighted.z.test <- function(x, y, alternative = c("two.sided", "less", "greater")){
  t.paired <- x[(!is.na(x)) & (!is.na(y))]
  n.paired <- y[(!is.na(x)) & (!is.na(y))]
  n1 <- length(t.paired)  # number of paired sample

  t.rest <- x[(is.na(y)) & (!is.na(x))]
  n2 <- length(t.rest)

  n.rest <- y[(is.na(x)) & (!is.na(y))]
  n3 <- length(n.rest)

  p.1i <- t.test(t.paired, n.paired, paired = T, alternative = "greater")$p.value
  p.2i <- t.test(t.rest, n.rest, alternative = "greater")$p.value
  z.1i <- qnorm(1-p.1i)
  z.2i <- qnorm(1-p.2i)
  w1 <- sqrt(2*n1)
  w2 <- sqrt(n2+n3)
  p.ci <- 1 - pnorm((w1*z.1i + w2*z.2i)/sqrt(w1^2 + w2^2))

  p.value <- 0
  if(alternative == "two.sided") {
    if(p.ci < 0.5) {
      p.value <- 2*p.ci
    }else {
      p.value <- 2*(1 - p.ci)
    }
  }else if(alternative == "greater") {
    p.value <- p.ci
  }else if(alternative == "less") {
    p.value <- 1 - p.ci
  }

  cat("    weighted z test\n P value is:", p.value)
}
#####################################################################################################
#####################################################################################################
#####################################################################################################
#####################################################################################################

#' Kim et al.’s modified t-statistic
#' @description Perform two sample Kim et al.’s modified t-statistic on vectors of data.
#' @usage kim.t.test(x, y, alternative = c("two.sided", "less", "greater"))
#' @param x numeric vector
#' @param y numeric vector
#' @param alternative	 character string which specifies the alternative hypothesis.  This can only be set as "two.sided" (default), "greater" or "less".
#' @details
#' This formula useful for the 2-sample tests.  Setting alternative = "greater" provides the alternative that vector x has a larger mean than vector y. If paired = TRUE, both x and y must be specified and the same length. Missing values are removed, which can be done in pairs if paired = TRUE or singularly if paired = FALSE. If var.equal = TRUE  the pooled estimate of the variance is used. By default, if var.equal = FALSE then the variance is estimated separately for both groups and the Welch modification to the degrees of freedom is used. If the input data are effectively constant compared to the larger of the two means, an error is generated.
#' @return p value of the test
#' @return t-statistic.
#' @examples
#' vec1 <- c(3,7,NA,2,5,8,NA,5,4,5)
#' vec2 <- c(6,NA,4,7,3,NA,3,4,9,2)
#' kim.t.test(vec1, vec2, alternative = "greater")
#' @export


kim.t.test <- function(x, y, alternative = c("two.sided", "less", "greater")){

  if(!((all(is.numeric(x)|is.na(x)) & !all(is.na(x))) &
       (all(is.numeric(y)|is.na(y)) & !all(is.na(y))))) {
    stop("Each vector must contain at least one numeric values")
  }

  d <- x-y
  d <- d[!is.na(d)]
  n1 <- length(d)

    t.rest <- x[(is.na(y)) & (!is.na(x))]
  n2 <- length(t.rest)

  n.rest <- y[(is.na(x)) & (!is.na(y))]
  n3 <- length(n.rest)

  d.mu <- mean(d)
  t.mu <- mean(t.rest)
  n.mu <- mean(n.rest)
  n.h <- 2/(1/n2 + 1/n3)

  sd <- sqrt(n1*var(d) + n.h^2*(var(n.rest)/n3 + var(t.rest)/n2))
  diff <- n1*mean(d) + n.h*(mean(t.rest) - mean(n.rest))
  t3 <- diff/sd

  p.value <- 0
  if(alternative == "two.sided") {
    p.value <- 2*(1- pnorm(abs(t3)))
  }else if(alternative == "greater") {
    p.value <- 1 - pnorm(t3)
  }else if(alternative == "less") {
    p.value <- pnorm(t3)
  } else {
    stop("argument should be one of the following: \"two.sided\", \"greater\", \"less\"")
  }

  cat("    Modified t test of Kim et al\n      p-value is:", p.value, "\n       statistic:", t3)
}
#####################################################################################################
#####################################################################################################
#####################################################################################################
#####################################################################################################

#' Looney and Jones' Corrected Z-test
#' @description Perform two sample Looney and Jones' Corrected Z-test on vectors of data.
#' @usage lj.z.test(x, y, alternative = c("two.sided", "less", "greater"))
#' @param x numeric vector
#' @param y numeric vector
#' @param alternative	 character string which specifies the alternative hypothesis.  This can only be set as "two.sided" (default), "greater" or "less".
#' @details
#' This formula useful for the 2-sample tests.  Setting alternative = "greater" provides the alternative that vector x has a larger mean than vector y. If paired = TRUE, both x and y must be specified and the same length. Missing values are removed, which can be done in pairs if paired = TRUE or singularly if paired = FALSE. If var.equal = TRUE  the pooled estimate of the variance is used. By default, if var.equal = FALSE then the variance is estimated separately for both groups and the Welch modification to the degrees of freedom is used. If the input data are effectively constant compared to the larger of the two means, an error is generated.
#' @return p value of the test
#' @return z-statistic.
#' @examples
#' vec1 <- c(3,7,NA,2,5,8,NA,5,4,5)
#' vec2 <- c(6,NA,4,7,3,NA,3,4,9,2)
#' lj.z.test(vec1, vec2, alternative = "greater")
#' @export


lj.z.test <- function(x, y, alternative = c("two.sided", "less", "greater")){

  t.paired <- x[(!is.na(x)) & (!is.na(y))]
  n.paired <- y[(!is.na(x)) & (!is.na(y))]
  n1 <- length(t.paired)  # number of paired sample

  t.rest <- x[(is.na(y)) & (!is.na(x))]
  n2 <- length(t.rest)

  n.rest <- y[(is.na(x)) & (!is.na(y))]
  n3 <- length(n.rest)

  t.mu.star <- mean(c(t.paired,t.rest))
  n.mu.star <- mean(c(n.paired,n.rest))
  t.var.star <- var(c(t.paired,t.rest))
  n.var.star <- var(c(n.paired,n.rest))
  paired.cov <- cov(t.paired,n.paired)

  sd <- sqrt(t.var.star/(n1+n2) + n.var.star/(n1+n3) - 2*n1*paired.cov/((n1+n2)*(n1+n3)))
  z.corr = (t.mu.star - n.mu.star)/sd

  p.value <- 0
  if(alternative == "two.sided") {
    p.value <- 2*(1- pnorm(abs(z.corr)))
  }else if(alternative == "greater") {
    p.value <- 1 - pnorm(z.corr)
  }else if(alternative == "less") {
    p.value <- pnorm(z.corr)
  } else{
    stop("arg should be one of \"two.sided\", \"greater\", \"less\"")
  }

  cat("    Corrected Z-test of Looney and Jones\n          P value is:", p.value, "\n           statistic:", z.corr)
}
#####################################################################################################
#####################################################################################################
#####################################################################################################
#####################################################################################################

#' Lin and Stivers’s MLE based test under heteroscedasticity
#' MLE based test of Lin and Stivers
#' @description Perform two sample Lin and Stivers’s MLE based test under heteroscedasticity on vectors of data.
#' @usage lin.stivers.test(x, y, alternative = c("two.sided", "less", "greater"))
#' @param x numeric vector
#' @param y numeric vector
#' @param alternative	 character string which specifies the alternative hypothesis.  This can only be set as "two.sided" (default), "greater" or "less".
#' @details
#' This formula useful for the 2-sample tests.  Setting alternative = "greater" provides the alternative that vector x has a larger mean than vector y. If paired = TRUE, both x and y must be specified and the same length. Missing values are removed, which can be done in pairs if paired = TRUE or singularly if paired = FALSE. If var.equal = TRUE  the pooled estimate of the variance is used. By default, if var.equal = FALSE then the variance is estimated separately for both groups and the Welch modification to the degrees of freedom is used. If the input data are effectively constant compared to the larger of the two means, an error is generated.
#' @return p value of the test
#' @return t-statistic.
#' @examples
#' vec1 <- c(3,7,NA,2,5,8,NA,5,4,5)
#' vec2 <- c(6,NA,4,7,3,NA,3,4,9,2)
#' lin.stivers.test(vec1, vec2, alternative = "greater")
#' @export


lin.stivers.test <- function(x, y, alternative = c("two.sided", "less", "greater")){
  # get the paired sample, assign it to t.paired and n.paired
  t.paired <- x[(!is.na(x)) & (!is.na(y))]
  n.paired <- y[(!is.na(x)) & (!is.na(y))]
  n1 <- length(t.paired)  # number of paired sample

  # get the rest sample which is in T(not NA) and in N(is NA)
  t.rest <- x[(is.na(y)) & (!is.na(x))]
  n2 <- length(t.rest)

  # get the rest sample which is in N(not NA) and in T(is NA)
  n.rest <- y[(is.na(x)) & (!is.na(y))]
  n3 <- length(n.rest)


  r <- cov(t.paired, n.paired)/(sd(t.paired)*sd(n.paired))
  f <- n1*(n1+n3+n2*cov(t.paired, n.paired)/var(t.paired))/((n1+n2)*(n1+n3) - n2*n3*r*r)
  g <- n1*(n1+n2+n3*cov(t.paired, n.paired)/var(n.paired))/((n1+n2)*(n1+n3) - n2*n3*r*r)

  v1.numerator <- (f^2/n1 + (1-f)^2/n2)*var(t.paired)*(n1-1) + (g^2/n1 + (1-g)^2/n3)*var(n.paired)*(n1-1) - 2*f*g*cov(t.paired,n.paired)*(n1-1)/n1
  v1 <- v1.numerator/(n1-1)
  diff <- f*(mean(t.paired) - mean(t.rest)) - g*(mean(n.paired) - mean(n.rest)) + mean(t.rest) - mean(n.rest)

  z.ls <- diff/sqrt(v1)
  df <- n1

  p.value <- 0
  if(alternative == "two.sided") {
    p.value <- 2*(1- pt(abs(z.ls), df))
  }else if(alternative == "greater") {
    p.value <- 1 - pt(z.ls, df)
  }else if(alternative == "less") {
    p.value <- pt(z.ls, df)
  }

  cat("    lin stiver test\n P value is:", p.value, "\n statistic:", z.ls)
}
#####################################################################################################
#####################################################################################################
#####################################################################################################
#####################################################################################################

#' MLE based t-test of Ekbohm under homoscedasticity
#' MLE based test of Ekbohm
#' @description Perform two sample MLE based t-test of Ekbohm under homoscedasticity on vectors of data.
#' @usage ekbohm.test(x, y, alternative = c("two.sided", "less", "greater"))
#' @param x numeric vector
#' @param y numeric vector
#' @param alternative	 character string which specifys the alternative hypothesis.  This can only be set as "two.sided" (default), "greater" or "less".
#' @details
#' This formula useful for the 2-sample tests.  Setting alternative = "greater" provides the alternative that vector x has a larger mean than vector y. If paired = TRUE, both x and y must be specified and the same length. Missing values are removed, which can be done in pairs if paired = TRUE or singularly if paired = FALSE. If var.equal = TRUE  the pooled estimate of the variance is used. By default, if var.equal = FALSE then the variance is estimated separately for both groups and the Welch modification to the degrees of freedom is used. If the input data are effectively constant compared to the larger of the two means, an error is generated.
#' @return p value of the test
#' @return t-statistic.
#'
#' @examples
#' vec1 <- c(3,7,NA,2,5,8,NA,5,4,5)
#' vec2 <- c(6,NA,4,7,3,NA,3,4,9,2)
#' ekbohm.test(vec1, vec2, alternative = "greater")
#' @export

ekbohm.test <- function(x, y, alternative = "two.sided") {

  # get the paired sample, assign it to t.paired and n.paired
  t.paired <- x[(!is.na(x)) & (!is.na(y))]
  n.paired <- y[(!is.na(x)) & (!is.na(y))]
  n1 <- length(t.paired)  # number of paired sample

  # get the rest sample which is in T(not NA) and in N(is NA)
  t.rest <- x[(is.na(y)) & (!is.na(x))]
  n2 <- length(t.rest)

  # get the rest sample which is in N(not NA) and in T(is NA)
  n.rest <- y[(is.na(x)) & (!is.na(y))]
  n3 <- length(n.rest)

  # Since this Ekbohm test is based on the same variance, we give warning if two samples have different variance at level 0.05
  x.na.rm <- x[!is.na(x)]
  y.na.rm <- y[!is.na(y)]
  f = var.test(x.na.rm, y.na.rm)
  if(f$p.value >= 0.05) {
    warning("Warning: The assumption of Ekbohm test is the same variance of two samples. Since the variance are different between two samples at alpha = 0.05, Lin Stiver test is recommended", noBreaks. = T)
  }

  r <- var(t.paired, n.paired)/(sd(t.paired)*sd(n.paired))
  f <- n1*(n1+n3+n2*r)/((n1+n2)*(n1+n3) - n2*n3*r^2)
  g <- n1*(n1+n2+n3*r)/((n1+n2)*(n1+n3) -n2*n3*r^2)
  var.hat <- (var(t.paired)*(n1-1) + var(n.paired)*(n1-1) + (1+r^2)*(var(t.rest)*(n2-1) + var(n.rest)*(n3-1)))/(2*(n1-1) + (1+r^2)*(n2+n3-2))
  v1.star <- var.hat*((2*n1*(1-r) + (n2+n3)*(1-r^2))/((n1+n2)*(n1+n3) - n2*n3*r^2))

  diff <- f*(mean(t.paired) - mean(t.rest)) - g*(mean(n.paired) - mean(t.rest)) + mean(t.rest) - mean(n.rest)
  z.e <- diff/sqrt(v1.star)
  df <- n1

  p.value <- 0
  if(alternative == "two.sided") {
    p.value <- 2*(1 - pt(abs(z.e), df))
  }else if(alternative == "greater") {
    p.value <- 1 - pt(z.e, df)
  }else if(alternative == "less") {
    p.value <- pt(z.e, df)
  }

  cat("    Ekbohm test\n P value is:", p.value, "\n statistic:", z.e)
}

ekbohm.test <- function(x, y, alternative = c("two.sided", "less", "greater")){

}