Nothing
#############################################################################
#
# This file is a part of the R package "gradDescentR".
#
# Author: Dendi Handian
# Co-author: Imam Fachmi Nasrulloh
# Supervisors: Lala Septem Riza, Rani Megasari
# Copyright (c) Department of Computer Science Education, Indonesia University of Education.
#
# This package is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 2 of the License, or (at your option) any later version.
#
# This package is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
#############################################################################
#' A function to do feature scaling to dataset with variance/standardization
#' scaling method .
#'
#' This function changes the value of dataset that represented by data.frame
#' object into variance scaled value that has interval value near -1 to 1.
#'
#' @title The Variance/Standardization Feature Scaling Function
#'
#' @param dataSet a data.frame that representing dataset to be processed.
#' dataSet must have at leas two columns and ten rows of data that
#' contain only numbers (integer or float). The last column to the
#' left will be defined as output variable.
#'
#' @examples
#' ##################################
#' ## Feature scaling with Variance Scaling Method
#' ## load R Package data
#' data(gradDescentRData)
#' ## get z-factor Data
#' dataSet <- gradDescentRData$CompressilbilityFactor
#' ## do variance scaling to dataset
#' featureScalingResult <- varianceScaling(dataSet)
#' ## show result
#' print(featureScalingResult$scaledDataSet)
#' print(featureScalingResult$scalingParameter)
#'
#' @seealso \code{\link{varianceDescaling}}
#'
#' @return a list contains feature scaled dataset and scaling parameter
#'
#' @export
varianceScaling <- function(dataSet){
columnLength <- ncol(dataSet)
varianceParameter <- getMeanStd(dataSet)
meanValue <- varianceParameter[1,]
stdValue <- varianceParameter[2,]
scaledDataSet <- (dataSet-meanValue)/stdValue
result <- list()
result$scaledDataSet <- scaledDataSet
result$scalingParameter <- varianceParameter
return(result)
}
#' A function to revert the value that has been done by variance/ .
#' standardization scaling method.
#'
#' This function changes the value of variance scaled dataset that
#' produced by \code{\link{varianceScaling}} function and represented
#' by data.frame object.
#'
#' @title Variance/Standardization Revert Function
#'
#' @param dataSet a data.frame that representing dataset (\eqn{m \times n}),
#' where \eqn{m} is the number of instances and \eqn{n} is the number
#' of variables where the last column is the output variable. dataSet
#' must have at leas two columns and ten rows of data that contain
#' only numbers (integer or float).
#'
#' @param varianceParameter a matrix that has value of variance scaling
#' parameter, such as mean value and standard deviation value of
#' data that can be used to restore the original value of dataset.
#' This parameter is exclusively produced by \code{\link{varianceScaling}}
#' function.
#'
#' @examples
#' ##################################
#' ## Revert Variance Scaling
#' ## load R Package data
#' data(gradDescentRData)
#' ## get z-factor Data
#' dataSet <- gradDescentRData$CompressilbilityFactor
#' fsr <- varianceScaling(dataSet)
#' rfsr <- varianceDescaling(fsr$scaledDataSet, fsr$scalingParameter)
#'
#' @seealso \code{\link{varianceScaling}}
#'
#' @return a data.frame representing reverted dataset value
#'
#' @export
varianceDescaling <- function(dataSet, varianceParameter){
columnLength <- ncol(dataSet)
meanValue <- varianceParameter[1,]
stdValue <- varianceParameter[2,]
descaledDataSet <- (dataSet * stdValue) + meanValue
result <- descaledDataSet
return(result)
}
#' A function to do feature scaling to dataset with min-max scaling method.
#'
#' This function changes the value of dataset that represented by data.frame
#' object into min-max scaled value that has interval between 0 to 1.
#'
#' @title The Min-Max Feature Scaling Function
#'
#' @param dataSet a data.frame that representing dataset (\eqn{m \times n}),
#' where \eqn{m} is the number of instances and \eqn{n} is the number
#' of variables where the last column is the output variable. dataSet
#' must have at least two columns and ten rows of data that contain
#' only numbers (integer or float).
#'
#' @examples
#' ##################################
#' ## Feature scaling with Min-Max Scaling Method
#' ## load R Package data
#' data(gradDescentRData)
#' ## get z-factor Data
#' dataSet <- gradDescentRData$CompressilbilityFactor
#' ## do min-max scaling to dataset
#' featureScalingResult <- minmaxScaling(dataSet)
#' ## show result
#' print(featureScalingResult$scaledDataSet)
#' print(featureScalingResult$scalingParameter)
#'
#' @seealso \code{\link{minmaxDescaling}}
#'
#' @return a list contains feature scaled dataset and scaling parameter
#'
#' @export
minmaxScaling <- function(dataSet){
columnLength <- ncol(dataSet)
minmaxParameter <- getMinMax(dataSet)
minValue <- minmaxParameter[1,]
maxminValue <- minmaxParameter[2,]
scaledDataSet <- (dataSet-minValue)/(maxminValue)
result <- list()
result$scaledDataSet <- scaledDataSet
result$scalingParameter <- minmaxParameter
return(result)
}
#' A function to revert the value that has been done by min-max
#' scaling method.
#'
#' This function changes the value of min-max scaled dataset that
#' produced by \code{\link{varianceScaling}} function and represented
#' by data.frame object.
#'
#' @title Min-Max Scaling Revert Function
#'
#' @param dataSet a data.frame that representing dataset (\eqn{m \times n}),
#' where \eqn{m} is the number of instances and \eqn{n} is the number
#' of variables where the last column is the output variable. dataSet
#' must have at least two columns and ten rows of data that contain
#' only numbers (integer or float).
#'
#' @param minmaxParameter a matrix that has value of minmax scaling
#' parameter, such as minimum value and maximum value of data that
#' can be used to restore the original value of dataset. This
#' parameter is exclusively produced by \code{\link{varianceScaling}}
#' function.
#'
#' @examples
#' ##################################
#' ## Revert Min-Max Scaling
#' ## load R Package data
#' data(gradDescentRData)
#' ## get z-factor Data
#' dataSet <- gradDescentRData$CompressilbilityFactor
#' fsr <- minmaxScaling(dataSet)
#' rfsr <- minmaxDescaling(fsr$scaledDataSet, fsr$scalingParameter)
#'
#' @seealso \code{\link{minmaxScaling}}
#'
#' @return a data.frame representing reverted dataset value
#'
#' @export
minmaxDescaling <- function(dataSet, minmaxParameter){
columnLength <- ncol(dataSet)
minValue <- minmaxParameter[1,]
maxminValue <- minmaxParameter[2,]
descaledDataSet <- (dataSet * maxminValue) + minValue
result <- descaledDataSet
return(result)
}
#' A function to split dataset into training and testing data
#'
#' This function split dataset into training and testing data. By default,
#' this function split dataset into 50% of \code{dataTrain} and 50% of
#' \code{dataTest}. You can decide the training data rate by change the
#' value of \code{dataTrainRate}. Example, if you want to set the training
#' data rate by 80%, you can simply set the \code{dataTrainRate} to 0.8.
#' As the remaining of \code{dataTrainRate} value, which is 0.2, will be
#' set as \code{dataTest} rate.
#'
#' @title The Data Spliting Function
#'
#' @param dataSet a data.frame that representing dataset (\eqn{m \times n}),
#' where \eqn{m} is the number of instances and \eqn{n} is the number
#' of variables where the last column is the output variable. dataSet
#' must have at least two columns and ten rows of data that contain
#' only numbers (integer or float).
#'
#' @param dataTrainRate a float number between 0 to 1 representing the
#' training data rate of given dataset. This parameter has
#' default value of 0.5.
#'
#' @param seed a integer value for static random. Default value is NULL, which
#' means the function will not do static random.
#'
#' @examples
#' ##################################
#' ## Splitting Dataset into Training and Testing Data
#' ## load R Package data
#' data(gradDescentRData)
#' ## get z-factor data
#' dataSet <- gradDescentRData$CompressilbilityFactor
#' ## split dataset
#' splitedDataSet <- splitData(dataSet)
#' #show result
#' print(splitedDataSet$dataTrain)
#' print(splitedDataSet$dataTest)
#'
#' @return a list contains data.frame of training data and testing data.
#'
#' @export
splitData <- function(dataSet, dataTrainRate=0.5, seed=NULL){
#get all index in dataSet
allDataIndex <- 1:nrow(dataSet)
#set static random
set.seed(seed)
#randomize and set dataTrain index
dataTrainIndex <- sample(allDataIndex, trunc(length(allDataIndex) * dataTrainRate))
#unset static random
set.seed(NULL)
#set dataTrain and dataTest
dataTrain <- dataSet[dataTrainIndex, ]
dataTest <- dataSet[-dataTrainIndex, ]
#wrap result into list
result <- list()
result$dataTrain <- dataTrain
result$dataTest <- dataTest
return(result)
}
#' A function to predict testing data with built gradient descent model
#'
#' This function used to predict testing data with only input variable named
#' \code{dataTestInput}. The \code{model} parameter is the coefficients
#' that produced by gradient-descent-based learning function. The result of
#' this function is a dataset that contains \code{dataTestInput} combined
#' with prediction data as the last column of dataset.
#'
#' @title Predicting Function for Linear Model
#'
#' @param dataTestInput a data.frame represented dataset with input variables
#' only (\eqn{m \times n-1}), where \eqn{m} is the number of instances
#' and \eqn{n} is the number of input variables only.
#'
#' @param model a matrix of coefficients used as a linear model to predict
#' testing data input. This parameter exclusively produced by the
#' gradient-descent-based learning function.
#'
#' @examples
#' ##################################
#' ## Predict Testing Data Using GD Model
#' ## load R Package data
#' data(gradDescentRData)
#' ## get z-factor Data
#' dataSet <- gradDescentRData$CompressilbilityFactor
#' ## do variance scaling to dataset
#' featureScalingResult <- varianceScaling(dataSet)
#' ## split dataset
#' splitedDataSet <- splitData(featureScalingResult$scaledDataSet)
#' ## built model using GD
#' model <- GD(splitedDataSet$dataTrain)
#' ## separate testing data with input only
#' dataTestInput <- (splitedDataSet$dataTest)[,1:ncol(splitedDataSet$dataTest)-1]
#' ## predict testing data using GD model
#' prediction <- prediction(model,dataTestInput)
#' ## show result()
#' prediction
#'
#'
#' @return a data.frame of testing data input variables and prediction variables.
#'
#' @seealso \code{\link{GD}}, \code{\link{MBGD}}, \code{\link{SGD}}, \code{\link{SAGD}},
#' \code{\link{MGD}}, \code{\link{AGD}}, \code{\link{ADAGRAD}}, \code{\link{ADADELTA}}
#' \code{\link{RMSPROP}}, \code{\link{ADAM}}
#'
#' @export
prediction <- function(model, dataTestInput){
#convert data.frame of dataTestInput in matrix
dataTestInputMatrix <- matrix(unlist(dataTestInput), ncol=ncol(dataTestInput), byrow=FALSE)
#bind 1 column into dataTestInput
dataTestInputMatrix <- cbind(1, dataTestInputMatrix)
#predict
prediction <- dataTestInputMatrix %*% t(model)
#convert prediction into data.frame
prediction <- as.data.frame(prediction)
#merge prediction with dataTestInput
predictionData <- cbind(dataTestInput, prediction)
#wrap to result
result <- predictionData
return(result)
}
#' A function to calculate error using Root-Mean-Square-Error
#'
#' This function used to calculate the error between two variables.
#' \code{outputData} is the first parameter of this function representing
#' the real output value. \code{prediction} is the second parameter of
#' this function representing the prediction value.
#'
#' @title RMSE Calculator Function
#'
#' @param outputData a data.frame represented dataset with output variable
#' only (\eqn{m \times 1}), where \eqn{m} is the number of instances
#' has one variable, which is the output.
#'
#' @param prediction a data.frame represented prediction data with output
#' variable only (\eqn{m \times 1}), where \eqn{m} is the number of
#' instances has one variable, which is the output.
#'
#' @examples
#' ##################################
#' ## Calculate Error using RMSE
#' ## load R Package data
#' data(gradDescentRData)
#' ## get z-factor Data
#' dataSet <- gradDescentRData$CompressilbilityFactor
#' ## do variance scaling to dataset
#' featureScalingResult <- varianceScaling(dataSet)
#' ## split dataset
#' splitedDataSet <- splitData(featureScalingResult$scaledDataSet)
#' ## built model using GD
#' model <- GD(splitedDataSet$dataTrain)
#' ## separate testing data with input only
#' dataTestInput <- (splitedDataSet$dataTest)[,1:ncol(splitedDataSet$dataTest)-1]
#' ## predict testing data using GD model
#' predictionData <- prediction(model, dataTestInput)
#' ## calculate error using rmse
#' errorValue <- RMSE(splitedDataSet$dataTest[,ncol(splitedDataSet$dataTest)],
#' predictionData[,ncol(predictionData)])
#' ## show result
#' errorValue
#'
#' @return a float value represent the average error of the prediction
#'
#' @export
RMSE <- function(outputData, prediction){
#get data length
rowLength <- nrow(cbind(outputData, prediction))
#calculate root-mean-squared-error
inner <- (outputData - prediction)^2
rmse <- sqrt(sum(inner)) / (rowLength)
result <- rmse
return(result)
}
getMeanStd <- function(dataSet){
#create result matrix
result <- matrix(dataSet, nrow = 2, ncol(dataSet))
#loop the data column
for(column in 1:ncol(dataSet)) {
result[1 , column] <- mean(unlist(dataSet[column]))
result[2 , column] <- sd(unlist(dataSet[column]))
}
return (result)
}
getMinMax <- function(dataSet){
#create result matrix
result <- matrix(dataSet, nrow = 2, ncol(dataSet))
#loop the data column
for(column in 1:ncol(dataSet)) {
result[1 , column] <- min(unlist(dataSet[column]))
result[2 , column] <- max(unlist(dataSet[column])) - min(unlist(dataSet[column]))
}
return (result)
}
getTheta <- function(columnLength, minTheta=0, maxTheta=1, seed=NULL){
#create static random
set.seed(seed)
#random a value
thetaList <- runif(columnLength, min=minTheta, max=maxTheta)
#clear static random
set.seed(seed)
#transform into matrix
result <- matrix(unlist(thetaList), ncol=columnLength, nrow=1, byrow=FALSE)
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.