#' Linear model iteration with square loss
#'
#' Training by using gradient descent on a linear model with square loss .
#' Return a matrix of weight vector for each iteration to the max iteration.
#'
#' @param X.mat train feature matrix of size [n x p]
#' @param y.vec train label vector of size [n x 1]
#' @param max.iterations integer scalar greater than 1
#' @param step.size integer scalar
#'
#' @return W.mat matrix of weight vectors of size [(p + 1) x max.iterations].
#' A prediction can be obtained by cbind(1,X.mat) %*% W.mat.
#' @export
#'
#' @examples
#' data(ozone, package = "ElemStatLearn")
#' y.vec <- ozone[, 1]
#' X.mat <- as.matrix(ozone[,-1])
#' W.mat <- LMSquareLossIterations(X.mat, y.vec, max.iterations = 5L)
LMSquareLossIterations <-
function(X.mat, y.vec, max.iterations, step.size = 0.01) {
if (!all(is.matrix(X.mat), is.numeric(X.mat))) {
stop("X.mat must be a numeric matrix.")
}
if (!all(is.vector(y.vec),
is.numeric(y.vec),
length(y.vec) == nrow(X.mat))) {
stop("y.vec must be a numeric vector of the same number of rows as X.mat.")
}
if (!all(is.integer(max.iterations),
max.iterations > 1,
length(max.iterations) == 1)) {
stop("Input max.iterations must be a greater than 1 integer scalar number.")
}
if (!all(is.numeric(step.size), length(step.size) == 1)) {
stop("step.size must be a numeric scalar value.")
}
#Obatin X.scaled.mat from the orginal X.mat, to make sure std = 1, u = 0
num.train <- dim(X.mat)[1]
num.feature <- dim(X.mat)[2]
X.mean.vec <- colMeans(X.mat)
X.std.vec <-
sqrt(rowSums((t(X.mat) - X.mean.vec) ^ 2) / num.train)
X.std.mat <- diag(num.feature) * (1 / X.std.vec)
X.scaled.mat <- t((t(X.mat) - X.mean.vec) / X.std.vec)
slope.mat <-
matrix(c(
rep(0, num.feature * max.iterations)),
num.feature,
max.iterations
)
intercept.vec <- c(rep(0, max.iterations))
# for-loop to get the slope.mat matrix
for (iter.index in (1:max.iterations)) {
if (iter.index == 1) {
mean.loss.slope.vec <- 2 * t(X.scaled.mat) %*%
(X.scaled.mat %*% slope.mat[,1] + rep(intercept.vec[1], num.train) - y.vec)/num.train
mean.loss.intercept <- 2 * colSums(X.scaled.mat %*% slope.mat[,1] + rep(intercept.vec[1], num.train) - y.vec)/num.train
slope.vec.temp <-
slope.mat[, 1] - step.size * mean.loss.slope.vec
intercept.vec.temp <- intercept.vec[1] - step.size * mean.loss.intercept
} else{
mean.loss.slope.vec <- 2 * t(X.scaled.mat) %*%
(X.scaled.mat %*% slope.mat[,iter.index - 1] + rep(intercept.vec[iter.index - 1], num.train) - y.vec)/num.train
mean.loss.intercept <- 2 * colSums(X.scaled.mat %*% slope.mat[,iter.index - 1] + rep(intercept.vec[iter.index - 1], num.train) - y.vec)/num.train
slope.vec.temp <-
slope.mat[, iter.index - 1] - step.size * mean.loss.slope.vec
intercept.vec.temp <- intercept.vec[iter.index - 1] - step.size * mean.loss.intercept
}
slope.mat[, iter.index] <- slope.vec.temp
intercept.vec[iter.index] <- intercept.vec.temp
}
intercept.part1 <- -X.mean.vec %*% X.std.mat %*% slope.mat #1 x m
intercept <- intercept.part1 + t(intercept.vec)
slope <- X.std.mat %*% slope.mat #p x m
W.mat <- rbind(intercept, slope) #(p+1) x m
return(W.mat)
}
#' Linear model iteration with logistic loss
#'
#' Training by using gradient descent on a linear model with logistic loss .
#' Return a matrix of weight vector for each iteration to the max iteration.
#'
#' @param X.mat train feature matrix of size [n x p]
#' @param y.vec train label vector of size [n x 1]
#' @param max.iterations integer scalar greater than 1
#' @param step.size a numeric scalar greater than zero
#'
#' @return W.mat matrix of weight vectors of size [(p + 1) x max.iterations]
#'
#' @export
#'
#' @examples
#'
#' data(spam, package = "ElemStatLearn")
#' X.mat <- as.matrix(spam[, 1:57])
#' y.vec <- ifelse(spam$spam == "spam", 1, -1)
#' W.mat <- LMLogisticLossIterations(X.mat, y.vec, max.iterations = 100L, step.size = 0.5)
#' (W.mat)
LMLogisticLossIterations <-
function(X.mat, y.vec, max.iterations, step.size) {
# Check type and dimension
if (!all(is.numeric(X.mat), is.matrix(X.mat))) {
stop("X.mat must be a numeric matrix")
}
if (!all(is.numeric(y.vec),
is.vector(y.vec),
length(y.vec) == nrow(X.mat))) {
stop("y.vec must be a numeric vector of length nrow(X.mat)")
}
if (!all(
is.numeric(max.iterations),
is.integer(max.iterations),
length(max.iterations) == 1,
max.iterations >= 1
)) {
stop("max.iterations must be an integer scalar greater than one")
}
if (!all(is.numeric(step.size), length(step.size) == 1, step.size > 0)) {
stop("step.size must be a numeric scalar value.")
}
# # remove the 1 vector if that is the case
# X.mat <- X.mat[, -1]
n.train <- nrow(X.mat)
n.features <- ncol(X.mat)
# Scale X.mat with m = 0, sd = 1
feature.mean.vec <- colMeans(X.mat)
feature.sd.vec <-
sqrt(rowSums((t(X.mat) - feature.mean.vec) ^ 2) / n.train) # try to use sd but that gives the sd for the whole matrix
# Check if there is a feature with 0 deviation
# column with zero variance will become zero at the end
feature.sd.vec[feature.sd.vec == 0] <- 1
feature.sd.mat <- diag(1 / feature.sd.vec)
X.scaled.mat <-
t((t(X.mat) - feature.mean.vec) / feature.sd.vec) # Use scale to simplify.
# Initialize W.mat matrix
W.mat <- matrix(0, nrow = n.features, ncol = max.iterations)
beta.vec <- rep(0, l = max.iterations)
W.temp.vec <- W.mat[, 1]
beta.temp <- 0
# Iteration
for (n.iterations in (1:max.iterations)) {
# Calculate L(w)'
# This is training without interception
# loss.gradient.vec <-
# -t(X.scaled.mat) %*% y.vec / (1 + exp(y.vec * (X.scaled.mat %*% W.temp.vec)))
# This is trainging with interception
# Calculate L(w)'
W.gradient.vec <-
-t(X.scaled.mat) %*% (y.vec / (1 + exp(y.vec * (
X.scaled.mat %*% W.temp.vec + rep(1,n.train) * beta.temp
))))/ n.train
# Calculate L(beta)'
beta.gradient <-
-sum(y.vec / (1 + exp(y.vec * (
X.scaled.mat %*% W.temp.vec + rep(1,n.train) * beta.temp
))))/n.train
# Take a step
W.mat[, n.iterations] <-
W.temp.vec - step.size * W.gradient.vec
beta.vec[n.iterations] <-
beta.temp - step.size * beta.gradient
W.temp.vec <- W.mat[, n.iterations]
beta.temp <- beta.vec[n.iterations]
}
# unscaling
intercept.vec <-
-feature.mean.vec %*% feature.sd.mat %*% W.mat + beta.vec
W.mat <- feature.sd.mat %*% W.mat
W.mat <- rbind(intercept.vec, W.mat)
return(W.mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.