knitr::opts_chunk$set(echo = TRUE)
StatComp21088 is a simple R package developed to achieve three kinds of gradient descent algorithm.Include Batch Gradient Descent,Stochastic Gradient Descent,Mini-batch gradient descent algorithm.
The function can be used to shuffle the sequence of input data(matrix) whose fist column is filled of number one. And the output of the function is a vector of random permutation from 1 to n. n is the rowcount of imput data.
#library(StatComp21088) Shuffle_sequence <- function(data){ n <- nrow(data) random_sequence <- sample(1:n,n) return(random_sequence) } n <- 10 A <- matrix(rep(1,n*n),n,n) shuffle_sequence <- Shuffle_sequence(A) print(shuffle_sequence)
Before using the algorithm to train the model, the first column of data should be filled with 1.
Addo <- function(data){ n <- nrow(as.matrix(data)) c <- rep(1,n) new_data <- as.matrix(data.frame(c,data)) return(new_data) } A = matrix( c(1,1,1,1,1,2,3,4),4,2) x <- Addo(A) print(x)
Batch gradient descent algorithm.The idea is to use all samples per iteration to update the parameters.This function is developed to train the model one time in Batch gradient descent algorithm.The input parameters are input_data, real_result,learning rate alpha and iterative initial value theta. The output is theta after iteration.
BGD <- function(input_data,real_result,alpha,theta){ n0 <- nrow(input_data) p0 <- ncol(input_data) gradient_increasment <- matrix(NA,nrow = n0, ncol = p0) for (i in 1:n0){ g <- as.numeric(input_data[i,]*as.vector(real_result[i] - input_data[i,]%*%theta)) gradient_increasment[i,] <- g } avg_g <- colMeans(gradient_increasment) theta <- theta + alpha * avg_g return(theta) } x <- seq(0.1,10,0.002) n <- length(x) y <- 2*x+5+rnorm(n) z <- as.matrix(data.frame(rep(1,n),x)) theta <- BGD(z, y,0.002,c(1,1)) print(theta)
Stochastic gradient descent algorithm.The idea is to use every sample per iteration to update the parameters.This function is developed to train the model in Batch gradient descent algorithm for one iteration.The input parameters are input_data, real_result,learning rate alpha and iterative initial value theta. The output is theta after iterations.
SGD <- function(input_data,real_result,alpha,theta){ shuffle_sequence <- Shuffle_sequence(input_data) x <- input_data[shuffle_sequence,] y <- real_result[shuffle_sequence] n0 <- nrow(x) for (i in 1:n0){ g <- as.numeric(x[i,]*as.vector(y[i] - x[i,]%*%theta)) theta <- theta + alpha * g } return(theta) } x <- seq(0.1,10,0.002) n <- length(x) y <- 2*x+5+rnorm(n) z <- as.matrix(data.frame(rep(1,n),x)) theta <- SGD(z, y,0.002,c(1,1)) print(theta)
This function is developed to train the model one time in Mini-batch gradient descent algorithm.The input parameters are input_data, real_result, batch_size, learning rate alpha and iterative initial value theta. The output is theta after iterations.
MBGD <- function(input_data,real_result,batch_size,alpha,theta){ shuffle_sequence <- Shuffle_sequence(input_data) x <- input_data[shuffle_sequence,] y <- real_result[shuffle_sequence] n <- nrow(x) for(start in seq(1,n,batch_size)){ end <- min(c(start+batch_size-1,n)) Mini_train_data <- x[start:end,] Mini_train_result <- y[start:end] n0 <- nrow(Mini_train_data) p0 <- ncol(Mini_train_data) gradient_increasment <- matrix(NA,nrow = n0, ncol = p0) for (i in 1:n0){ g <- as.numeric(Mini_train_data[i,]*as.vector(Mini_train_result[i] - Mini_train_data[i,]%*%theta)) gradient_increasment[i,] <- g } avg_g <- colMeans(gradient_increasment) theta <- theta + alpha * avg_g } return(theta) } x <- seq(0.1,10,0.002) n <- length(x) y <- 2*x+5+rnorm(n) z <- as.matrix(data.frame(rep(1,n),x)) theta <- MBGD(z, y, 100,0.002,c(1,1)) print(theta)
This function is developed to record the loss function of the model. The output is the loss function value of a particular theta.
Cost <- function(input_data,real_result,theta){ predict <- input_data%*%theta cost <- predict - real_result cost <- mean(cost^2) return(cost) } x <- seq(0.1,10,0.002) n <- length(x) y <- 2*x+5+rnorm(n) z <- as.matrix(data.frame(rep(1,n),x)) cost <- Cost(z, y, c(1,1)) print(cost)
This function is developed to train the model for iterations in batch gradient descent algorithm and record the loss function value of the model after every iteration.
train_BGD <- function(input_data,real_result,iter,alpha,theta){ input_data <- Addo(input_data) cost <- numeric(iter) for(i in 1:iter){ theta <- BGD(input_data,real_result,alpha,theta) cost[i] <- Cost(input_data,real_result,theta) } return(list(theta = theta ,cost = cost)) } x <- seq(0.1,10,0.01) n <- length(x) z <- rnorm(n) y <- 2*x+5+z theta <- train_BGD(x,y,500,0.02,c(1,1)) print(theta)
This function is developed to train the model for iterations in stochastic gradient descent algorithm and record the loss function value of the model after every iteration.
train_SGD <- function(input_data,real_result,iter,alpha,theta){ input_data <- Addo(input_data) cost <- numeric(iter) for(i in 1:iter){ theta <- SGD(input_data,real_result,alpha,theta) cost[i] <- Cost(input_data,real_result,theta) } return(list(theta = theta ,cost = cost)) } x <- seq(0.1,10,0.01) n <- length(x) z <- rnorm(n) y <- 2*x+5+z theta <- train_SGD(x,y,200,0.02,c(1,1)) print(theta)
This function is developed to train the model in SGD algorithm for iterations in Mini-batch gradient descent algorithm and record the loss function value of the model after every iteration.
train_MBGD <- function(input_data,real_result,iter,batch_size,alpha,theta){ input_data <- Addo(input_data) cost <- numeric(iter) for(i in 1:iter){ theta <- MBGD(input_data,real_result,batch_size,alpha,theta) cost[i] <- Cost(input_data,real_result,theta) } return(list(theta = theta ,cost = cost)) } x <- seq(0.1,10,0.01) n <- length(x) z <- rnorm(n) y <- 2*x+5+z theta <- train_MBGD(x,y,500,200,0.01,c(1,1)) print(theta)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.