R/extract_variables.R

#' @title Extract Variables of Specific Types from a given Dataset
#' 
#' @description This function extracts variables of specific types from a given dataset.
#' Variables that are either numeric, factor or character are extracted.
#' Furthermore, all variables except the specified type can be extracted also.
#' The results are outputed as a data frame.
#' These results can be saved to a specified directory as a .csv file.
#' 
#' @param dataset A dataset from which the categorical variables are extracted
#' 
#' @param type The type of data type to be extracted from the dataset.
#' One of three; "numeric", "factor", "character"
#' Default is "numeric"
#' 
#' @param extract_not A logical object indicting whether all but the specified data type should be extracted.
#' Default is FALSE.
#' 
#' @param file_name A character object indicating the file name when saving the data frame.
#' The default is NULL.
#' The name must include the .csv suffixs.
#'
#' @param directory A character object specifying the directory where the data frame is to be saved as a .csv file.
#'
#' @return Outputs the categorical variables as a data frame.
#' 
#' @export
#'
#' @seealso \code{\link{remove_variables}}, \code{\link{derive_variables}}, \code{\link{impute_variables}}, \code{\link{standardise_variables}}, \code{\link{transform_variables}}
#'
#' @keywords extract variables, numeric, factor, character
#' 
#' @examples 
#' # Example - titanic
#' 
#' # Extract the Numeric Variables
#' extract_variables(dataset = titanic, type = "numeric")
#' 
#' # Extract the Factor Variables
#' extract_variables(dataset = titanic, type = "factor")
#' 
#' Extract all but the Character Variables
#' extract_variables(dataset = titanic, type = "character", extract_not = FALSE)
#' 
extract_variables <- function(dataset, 
                              type = c("numeric", "factor", "character"), 
                              extract_not = FALSE, 
                              file_name = NULL, 
                              directory = NULL) 
  {
  
  # Convert the dataset set to a data frame
  dataset <- as.data.frame(dataset)
  
  # Confirm correct choice for type
  type <- match.arg(type)

  #-------------------------------------------------------------------------------#
  # If extract_not is FALSE                                                       #
  #-------------------------------------------------------------------------------#
  
  if (extract_not == FALSE){
    
    #------------------------------------------------------------------------------#
    # If Numeric type                                                              # 
    #------------------------------------------------------------------------------#
    
    if(type == "numeric"){
      
      # create a data frame to hold the numeric attributes
      extracted_data <- as.data.frame(matrix(nrow = nrow(dataset), 
                                             ncol = 1))
      # use a seperate index j to store the derived features in the power_data
      j = 1
      
    for (i in 1:ncol(dataset)){
        
        if(is.numeric(dataset[,i])) {
          
          extracted_data[,j] <- dataset[,i]
          
          colnames(extracted_data)[j] <- colnames(dataset)[i]
          
          j = j + 1
        
        }
        
      }
        
      #----------------------------------------------------------------------------#
      # If Factor type                                                             #
      #----------------------------------------------------------------------------#
    
    } else if(type == "factor"){
      
      # create a data frame to hold the categorical attributes
      extracted_data <- as.data.frame(matrix(nrow = nrow(dataset), 
                                             ncol = 1))
      
      # we will use a seperate index j to store the derived features in the power_data
      j = 1
      
      for (i in 1:ncol(dataset)){
        
        if(is.factor(dataset[,i])) {
          
          extracted_data[,j] <- dataset[,i]
          
          colnames(extracted_data)[j] <- colnames(dataset)[i]
          
          j = j + 1
        }
        
      }
      
      #----------------------------------------------------------------------------#
      # If Character type                                                          #
      #----------------------------------------------------------------------------#
    
    } else if(type == "character"){
     
     
      # create a data frame to hold the categorical attributes
      extracted_data <- as.data.frame(matrix(nrow = nrow(dataset), 
                                             ncol = 1))
      
      # we will use a seperate index j to store the derived features in the power_data
        
      j = 1
        
      for (i in 1:ncol(dataset)){
          
        if(is.character(dataset[,i])) {
            
          extracted_data[,j] <- dataset[,i]
            
            colnames(extracted_data)[j] <- colnames(dataset)[i]
          
            j = j + 1
        
          }
          
      }
      
    }
    
    #-------------------------------------------------------------------------------#
    # If extract_not is TRUE                                                        #
    #-------------------------------------------------------------------------------#
  
  } else if(extract_not == TRUE){
    
    #------------------------------------------------------------------------------#
    # If Numeric type                                                              # 
    #------------------------------------------------------------------------------#
  
    if(type == "numeric"){
      
      # create a data frame to hold the numeric attributes
      extracted_data <- as.data.frame(matrix(nrow = nrow(dataset), 
                                             ncol = 1))
      
      # use a seperate index j to store the derived features in the power_data
      j = 1
      
      for (i in 1:ncol(dataset)){
        
        if(!is.numeric(dataset[,i])) {
          
          extracted_data[,j] <- dataset[,i]
          
          colnames(extracted_data)[j] <- colnames(dataset)[i]
          
          j = j + 1
          
        }
        
      }
      
      #----------------------------------------------------------------------------#
      # If Factor type                                                             #
      #----------------------------------------------------------------------------#
    
    } else if(type == "factor"){
      
      # create a data frame to hold the categorical attributes
      extracted_data <- as.data.frame(matrix(nrow = nrow(dataset), 
                                             ncol = 1))
      
      # we will use a seperate index j to store the derived features in the power_data
      
      j = 1
      
      for (i in 1:ncol(dataset)){
        
        if(!is.factor(dataset[,i])) {
          
          extracted_data[,j] <- dataset[,i]
          
          colnames(extracted_data)[j] <- colnames(dataset)[i]
          
          j = j + 1
        }
        
      }
      
      #----------------------------------------------------------------------------#
      # If Character type                                                          #
      #----------------------------------------------------------------------------#
    
    } else if(type == "character"){
      
      # create a data frame to hold the categorical attributes
      extracted_data <- as.data.frame(matrix(nrow = nrow(dataset), 
                                             ncol = 1))
      
      # we will use a seperate index j to store the derived features in the power_data
      
      j = 1
      
      for (i in 1:ncol(dataset)){
        
        if(!is.character(dataset[,i])) {
          
          extracted_data[,j] <- dataset[,i]
          
          colnames(extracted_data)[j] <- colnames(dataset)[i]
          
          j = j + 1
        
        }
        
      }
      
    }
    
  }
    
  if(!is.null(directory)) {
    
    write.csv(x = extracted_data, 
              file = paste(directory, "/", file_name, sep = ""), 
              row.names = F)
  
  }
  
  # return the extracted dataset
  return(extracted_data)

}
oislen/BuenaVista documentation built on May 16, 2019, 8:12 p.m.