knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=4, fig.height=2, fig.align = "center" )
library(cpp) library(dplyr) library(ggplot2) library(bench) #we constract a function to show the result of the benchmark in a plot print_bench <- function(bm) { bm %>% mutate(expression = as.character(expression)) } show_bm <- function(bm) { print(print_bench(bm)) autoplot(bm) } #The aim of this Monte Carlo experiment is to show that the estimate of a #Classical Linear Regression model (i.e. when all the classical assumptions of #the linear regression model are satisfied) is consistent. #To run the experiment we benchmark the Gradient Descent R function with its C++ version. #we set the sample size and the number of simulations for the MC n = c(10, 25, 50, 100, 500, 1000) nsim = 1000 #we set the initial values of the parameters beta0 = c(5,0.5, 0.2, 0.1) beta_hat=matrix(NA, nrow=length(beta0), ncol=nsim) #we set the tolerance level for stopping: that will be compared to the maximum difference #between the observed parameters and the estimated ones tolerance=1e-8 #we set the maximum number of iteration, so if the criteria for the stop won't be satisfied, #the function will stop running anyway maxit = 1000 #we set the value of the learning parameter stepsize=1e-5 #we construct the matrix for consistency analysis consistency_r=matrix(NA, nrow=length(beta0), ncol=length(n), dimnames = list(1:length(beta0), paste("n=", n, sep = ""))) consistency_cpp=matrix(NA, nrow=length(beta0), ncol=length(n), dimnames = list(1:length(beta0), paste("n=", n, sep = ""))) #we compare the function for estimating a linear model via the Gradient Descent optimization method, #constructed in R and the one in C++ (in the simple formulation) in terms of time of computation bench::mark( SD_r_fun = { set.seed(1) std=5 beta = rnorm(length(beta0)) for(i in 1:length(n)){ for(j in 1:nsim){ x1 = rnorm(n[i]); x2 = rnorm(n[i]); x3 = rnorm(n[i]) x = cbind(rep(1,n[i]), x1, x2, x3) y = as.numeric( x%*%beta0 + rnorm(n[i]), sd=std ) beta_hat[,j] = betahat_GD_R(beta, x, y, tolerance, maxit, stepsize) } consistency_r[,i] = rowMeans(beta_hat) } consistency_r }, SD_cpp_fun = { set.seed(1) std=5 beta = rnorm(length(beta0)) for(i in 1:length(n)){ for(j in 1:nsim){ x1 = rnorm(n[i]); x2 = rnorm(n[i]); x3 = rnorm(n[i]) x = cbind(rep(1,n[i]), x1, x2, x3) y = as.numeric( x%*%beta0 + rnorm(n[i]), sd=std ) beta_hat[,j] = betahat_GD_Cpp(beta, x, y, tolerance, maxit, stepsize) } consistency_cpp[,i] = rowMeans(beta_hat) } consistency_cpp } ) %>% show_bm() #we show the benchmark plot #the C++ function shows to be remarkable faster than the R version, as expected since it is in a #low-level language #we show the consistency results consistency_r consistency_cpp
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.