# R/linear_regression.R In EvanWie/linearRegress: What the Package Does (One Line, Title Case)

#### Documented in back_selectlin_model

```Rcpp::sourceCpp('src/beta.cpp')

#' Linear regression model
#'
#' Performs linear regression using Ordinary Least Squares (or Weighted Least Squares when weights are specified)
#'
#' @param formula Model formula using specified columns of DataFrame 'data'. Can include interactions and select no intercept
#' with -1
#' @param data Dataframe from which model variables are pulled.
#' @param weights Vector of weights to be used in Weighted Least Squares regression estimates. Default = NULL.
#' @return List of coefficients, estimate standard errors, test statistics, and p-values
#'
#' @export
lin_model = function(formula, data, weights= NULL){

if(length(formula[[2]]) != 1){
stop("y must be univariate")
}

modframe = model.frame(formula = formula, data = data)

y <- model.response(modframe, "numeric")

x = model.matrix(formula, data = data)

n = length(y)

if(is.null(weights)){
#beta = solve(t(x)%*%x)%*%t(x)%*%y
beta = betaRcpp(y,x)

res = y - x%*%beta

sigma_sq = sum(res^2)/(n-ncol(x))

varcov_beta = solve(t(x)%*%x)*sigma_sq
} else {

if(!is.vector(weights) | length(weights) != n){
stop("Weights must be written as a vector with length equal to length y")
}

w = diag(weights)
beta = solve(t(x) %*% w %*% x) %*% t(x) %*% w %*% y
#beta = betaRcpp(y,x)

res = y - x%*%beta

sigma_sq <- sum(weights*res^2)/(n - ncol(x))

varcov_beta = sigma_sq* solve(t(x)%*%w%*%x)
}

se_beta =sqrt(diag(varcov_beta))

test_stat = beta/se_beta

p = 2*pt(-abs(test_stat), n-1)

results = cbind.data.frame(coefficients = round(beta,6), St.Error = round(se_beta,6),
test_statistic = round(test_stat,3), p_value = p)
row.names(results) = colnames(x)

return(results)
}

#' Backwards selcection of linear regression model
#'
#' Performs backwards selection of model parameters. Removes parameter with greatest p-value above "prem" threshold.
#' P-values are calculated using Ordinary Least Squares (no weighting option).
#'
#' @param formula Model formula using specified columns of DataFrame 'data'. Can include interactions and select no intercept
#' with -1.
#' @param data Dataframe from which model variables are pulled.
#' @param prem Threshold at which a parameter will be removed from the model if it has the highest p-value above the threshold.
#' The default value is .1.
#' @return Dataframe of coefficients, estimate standard errors, test statistics, and p-values
#' for parameters remaining in the model following backwards selection.
#'
#' @export
back_select = function(formula, data, prem = .1){
if(length(formula[[2]]) != 1){
stop("y must be univariate")
}

modframe = model.frame(formula = formula, data = data)

y <- model.response(modframe, "numeric")

x = model.matrix(formula, data = data)

n = length(y)

beta = betaRcpp(y,x)

res = y - x%*%beta

q = ifelse(is.matrix(x), ncol(x), 1)
sigma_sq = sum(res^2)/(n-q)

varcov_beta = solve(t(x)%*%x)*sigma_sq

se_beta =sqrt(diag(varcov_beta))

test_stat = beta/se_beta

p = 2*pt(-abs(test_stat), n-1)

removed_vars = c()

while(max(p[2:max(2,q)]) > prem){
rem_col = which(p == max(p[2:ncol(x)]))

removed_vars = c(removed_vars, colnames(x)[rem_col])

x = x[,-rem_col]

beta = beta = solve(t(x) %*% x) %*% t(x) %*% y

res = y - x%*%beta

q = ifelse(is.matrix(x), ncol(x), 1)
sigma_sq = sum(res^2)/(n-q)

varcov_beta = solve(t(x)%*%x)*sigma_sq

se_beta =sqrt(diag(varcov_beta))

test_stat = beta/se_beta

p = 2*pt(-abs(test_stat), n-1)

if(q == 1){break}
}

results = cbind.data.frame(coefficients = round(beta,6), St.Error = round(se_beta,6),
test_statistic = round(test_stat,3), p_value = p)
row.names(results) = c("Intercept",colnames(x)[2:max(2,q)])

cat("Removed variables:\n", removed_vars, "\n")

return(results)
}

#x = rnorm(10)
#x2 = rnorm(10)
#x3 = rnorm(10)
#y = rnorm(10)
#df = data.frame(y, x, x2, x3)
#lin_model(y~x*x2,df)

#back_select(y~x*x2,df,.5)

#summary(lm(y~x*x2, data = df))
#olsrr::ols_regress(y~x*x2, data = df)
#result = bench::mark(lin_model(y~x*x2,df)[1], as.vector(olsrr::ols_regress(y~x*x2, data = df)\$beta)[1])
#plot(result)
#result2 = bench::mark(lin_model(y~x*x2,df)[1], as.vector(lm(y~x*x2, data = df)\$coefficients[1]))
#plot(result2)

#result3 = bench::mark(lin_model(y~x*x2,df)[1], lin_model2(y~x*x2,df)[1])
#plot(result3)
```
EvanWie/linearRegress documentation built on Nov. 20, 2019, 12:02 a.m.