R/tests_wilcoxon.R

#' @title Performs One Sample and Two Sample Wilcoxon Tests on a given dataset
#' 
#' @description Performs One Sample and Two Sample Wilcoxon Tests on a given dataset. 
#' The data can be a mixture of numric and factor variables.
#' The results are outputed as a data frame. 
#' Furthermore the results an be saved as .csv file to a specified directory.
#' 
#' @param dataset A dataset on which Wilcoxon Tests are performed.
#' 
#' @param y_index An integer value, the column index of the response variable, the default is NULL.
#' 
#' @param y_name A character value, the column name of the response variable, the default is NULL.
#' 
#' @param mu An numeric value specifying the median
#' 
#' @param alternative The type of hypothesis being tested; two.sided, greater, less. 
#'     The default is "two.sided"
#' 
#' @param conf.level The level of confidence used in the Wilcoxon Test, default is 0.95
#' 
#' @param paired Logical value indicating a paired Wilcoxon Test
#' 
#' @param file_name A character object indicating the file name when saving the data frame.
#'                  The default is NULL.
#'                  The name must include the .csv suffixs.
#' 
#' @param directory A character object specifying the directory where the data frame is to be saved as a .csv file.
#'                  The default is NULL.
#'                  
#' @return Outputs the T-test information as a data frame.
#' 
#' @export 
#' 
#' @seealso \code{\link{tests_chisq}}, \code{\link{tests_cors}}, \code{\link{tests_ks}}, \code{\link{tests_norm}}, \code{\link{tests_proptest}}, \code{\link{tests_t}}, \code{\link{tests_var}}
#' 
#' @keywords Wilicox Test, Mann-Whitney Test, Sum Rank Test
#' 
#' @examples 
#' #-- Example Lung Capcity Data --#
#' 
#' # Perform Wilicoxon tests on the entire dataset
#' tests_wilcoxon(dataset = lungcap)
#' 
#' # Perform Wilicon=xon tests in relation to the second column.
#' tests_wilcoxon(dataset = lungcap, y_index = 2)
#' 
#' # Perform Wilcoxon tests in relation to the 'Age' Column.
#' tests_wilcoxon(dataset = lungcap, y_name = "Age")
#' 
tests_wilcoxon <- function(dataset, 
                           y_index = NULL, 
                           y_name = NULL,
                           mu = NULL, 
                           alternative = c("two.sided", "greater", "less"), 
                           conf.level = 0.95, 
                           paired = FALSE, 
                           file_name = NULL, 
                           directory = NULL) 
  {

  #-------------------------------------------------------------------------------#
  # When y_index = NULL and y_name = NULL                                         #
  #-------------------------------------------------------------------------------#
    
  if(is.null(y_index)){
    
    # Confirm correct choice for alternative
    alternative <- match.arg(alternative)
    
    # Convert the dataset set to a data frame
    dataset <- as.data.frame(dataset)
    
    # the number of numeric columns in the dataset
    n_cols <- sum(sapply(X = dataset, FUN = function(x) is.factor(x))) - 1
    
    # First create a dataframe to store the relevent t-test data
    wilcoxontestdf <- as.data.frame(matrix(nrow = 1, 
                                    ncol = 7))
    
    # rename the columns of the data frame
    colnames(wilcoxontestdf) <- c("Xi", "Xj", "Xim", "Xjm", "WCT Stat", "WCT P.V.", "Ha")
    
    # r represents the row index and will be used to input the relevent data
    r = 1
    
    #--------------------------------------------------------------#
    # When mu = NULL                                               #
    #--------------------------------------------------------------#
    
    if(is.null(mu)){
      
      for (i in 1:(ncol(dataset))) {
        
        j = i + 1
        
        while (j <= ncol(dataset)) {
          
          if((is.numeric(dataset[,i]) & is.numeric(dataset[,j]))) {
            
            # Save the variables name being tested
            wilcoxontestdf[r,1] <- colnames(dataset)[i]
            wilcoxontestdf[r,2] <- colnames(dataset)[j]
            
            # Save the means of the variables
            wilcoxontestdf[r,3] <- median(x = dataset[,i], na.rm = TRUE)
            wilcoxontestdf[r,4] <- median(x = dataset[,j], na.rm = TRUE)
            
            # Perform the T-Test
            WCT <- wilcox.test(x = dataset[,i], 
                              y = dataset[,j], 
                              alternative = alternative,
                              conf.level = conf.level,
                              paired = paired)
            
            # Extract the WCT Stat
            wilcoxontestdf[r,5] <- round(WCT$statistic, digits = 4)
            
            # Extract the WCT P-Vlaue
            wilcoxontestdf[r,6] <- round(WCT$p.value, digits = 4)
            
            # Enter "Ha"
            wilcoxontestdf[r,7] <- alternative
            
            # update the r index
            r = r + 1
            
          }
          
          # update j
          j = j + 1
          
        }
        
      }
      
      # Remove the incomplete cases
      wilcoxontestdf <- wilcoxontestdf[complete.cases(wilcoxontestdf[,]), ]
      
      #---------------------------------------------------------------#
      # When mu != NULL                                               #
      #---------------------------------------------------------------#
      
    } else if(!is.null(mu)){
      
      for (i in 1:ncol(dataset)) {
        
        if(is.numeric(dataset[,i])) {
          
          # Save the variables name being tested
          wilcoxontestdf[r,1] <- colnames(dataset)[i]
          wilcoxontestdf[r,2] <- "Mu"
          
          # Save the means of the variables
          wilcoxontestdf[r,3] <- median(x = dataset[,i], na.rm = TRUE)
          wilcoxontestdf[r,4] <- mu
          
          # Perform the T-Test
          WCT <- wilcox.test(x = dataset[,i], 
                       mu = mu ,
                       alternative = alternative,
                       conf.level = conf.level)
          
          # Extract the WCT Stat
          wilcoxontestdf[r,5] <- round(WCT$statistic, digits = 4)
          
          # Extract the WCT P-Vlaue
          wilcoxontestdf[r,6] <- round(WCT$p.value, digits = 4)
          
          # Enter "Ha"
          wilcoxontestdf[r,7] <- alternative
          
          # update the r index
          r = r + 1
          
        }
        
      }
      
      # Remove the incomplete cases
      wilcoxontestdf <- wilcoxontestdf[complete.cases(wilcoxontestdf[,]), ] 
      
    }
    
    #-------------------------------------------------------------------------------#
    # When y_index != NULL or y_name != NULL                                        #
    #-------------------------------------------------------------------------------#
    
  } else if(!is.null(y_index) | !is.null(y_name)){
    
    if(!is.null(y_name)){
      y_index = which(colnames(dataset) == y_name)
    }
    
    # Confirm correct choice for alternative
    alternative <- match.arg(alternative)
    
    # extract the test data
    test_data <- dataset[,-y_index]
    
    # Convert the dataset set to a data frame
    dataset <- as.data.frame(dataset)
    
    # First create a dataframe to store the relevent t-test data
    wilcoxontestdf <- as.data.frame(matrix(nrow = 1, 
                                           ncol = 7))
    
    # rename the columns of the data frame
    colnames(wilcoxontestdf) <- c("Xi", "Y", "Xim", "Ym", 
                                  "WCT Stat", "WCT P.V.", "Ha")
    
    # r represents the row index and will be used to input the relevent data
    r = 1
    
    #---------------------------------------------------------------#
    # When mu = NULL                                                #
    #---------------------------------------------------------------#
    
    if(is.null(mu)){
      
      for (i in 1:(ncol(test_data))) {
        
        if((is.numeric(test_data[,i]) & (is.numeric(dataset[,y_index])))) {
          
          # Save the variables name being tested
          wilcoxontestdf[r,1] <- colnames(test_data)[i]
          wilcoxontestdf[r,2] <- colnames(dataset)[y_index]
          
          # Save the means of the variables
          wilcoxontestdf[r,3] <- median(x = test_data[,i], na.rm = TRUE)
          wilcoxontestdf[r,4] <- median(x = dataset[,y_index], na.rm = TRUE)
          
          # Perform the T-Test
          WCT <- wilcox.test(x = test_data[,i], 
                             y = dataset[,y_index], 
                             alternative = alternative,
                             conf.level = conf.level,
                             paired = paired)
          
          # Extract the WCT Stat
          wilcoxontestdf[r,5] <- round(WCT$statistic, 
                                       digits = 4)
          
          # Extract the WCT P-Vlaue
          wilcoxontestdf[r,6] <- round(WCT$p.value, 
                                       digits = 4)
          
          # Enter "Ha"
          wilcoxontestdf[r,7] <- alternative
          
          # update the r index
          r = r + 1
          
        }
        
      }
      
    #---------------------------------------------------------------#
    # When mu != NULL                                               #
    #---------------------------------------------------------------#
      
    } else if(!is.null(mu)){
      
       if(is.numeric(dataset[ ,y_index])){
         
        # Save the variables name being tested
        wilcoxontestdf[1,1] <- colnames(dataset)[y_index]
        wilcoxontestdf[1,2] <- "Mu"
        
        # Save the means of the variables
        wilcoxontestdf[1,3] <- median(x = dataset[ ,y_index], 
                                      na.rm = TRUE)
        wilcoxontestdf[1,4] <- mu
        
        # Perform the T-Test
        WCT <- wilcox.test(x = dataset[ ,y_index], 
                           mu = mu,
                           alternative = alternative,
                           conf.level = conf.level)
        
        # Extract the WCT Stat
        wilcoxontestdf[1,5] <- round(WCT$statistic, 
                                     digits = 4)
        
        # Extract the WCT P-Vlaue
        wilcoxontestdf[1,6] <- round(WCT$p.value, 
                                     digits = 4)
        
        # Enter "Ha"
        wilcoxontestdf[1,7] <- alternative
        
       }
      
    }
    
  }
  
  # return the wilcoxontestdf
  return(wilcoxontestdf)
  
  # Write the data frame to the specified directory
  
  if(!is.null(directory)) {
    
    write.csv(x = wilcoxontestdf, 
              file = paste(directory, "/", file_name, sep = ""), 
              row.names = F)
    
  }
  
}
oislen/BuenaVista documentation built on May 16, 2019, 8:12 p.m.