inst/extdata/doc/regression.md

Regression

Enjoy this brief demonstration of the regression module

First we simulate some data

# Create normal distributed data with mean = 0 and standard deviation = 1
## r = 0.5
data <- MASS::mvrnorm(n=100,
                      mu=c(0, 0),
                      Sigma=matrix(c(1, 0.5, 0.5, 1), 2),
                      empirical=TRUE)
# Add names
colnames(data) <- c("x","y")

Check the correlation and regression results using frequentist methods

# Correlation
stats::cor(data)[2]
#> [1] 0.5

# Regression
summary(stats::lm(y ~ x, data=data.frame(data)))
#> 
#> Call:
#> stats::lm(formula = y ~ x, data = data.frame(data))
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -2.0482 -0.5273  0.0887  0.5984  1.8607 
#> 
#> Coefficients:
#>                           Estimate             Std. Error t value   Pr(>|t|)    
#> (Intercept) 0.00000000000000000416 0.08704326862109958152    0.00          1    
#> x           0.50000000000000022204 0.08748177652797066439    5.72 0.00000012 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.87 on 98 degrees of freedom
#> Multiple R-squared:  0.25,   Adjusted R-squared:  0.242 
#> F-statistic: 32.7 on 1 and 98 DF,  p-value: 0.000000118

Then the regression results using the Bayesian model

mcmc <- bfw::bfw(project.data = data,
            y = "y",
            x = "x",
            saved.steps = 50000,
            jags.model = "regression",
            jags.seed = 100,
            silent = TRUE)
# Print the results            
round(mcmc$summary.MCMC[,3:7],3)
#>                        Mode   ESS  HDIlo HDIhi   n
#> beta0[1]: Intercept  -0.008 50000 -0.172 0.173 100
#> beta[1]: Y vs. X      0.492 51970  0.330 0.674 100
#> sigma[1]: Y vs. X     0.863 28840  0.760 1.005 100
#> zbeta0[1]: Intercept -0.008 50000 -0.172 0.173 100
#> zbeta[1]: Y vs. X     0.492 51970  0.330 0.674 100
#> zsigma[1]: Y vs. X    0.863 28840  0.760 1.005 100
#> R^2 (block: 1)        0.246 51970  0.165 0.337 100

Now we add some noise

# Create noise with mean = 10 / -10 and sd = 1
## r = -1.0
noise <- MASS::mvrnorm(n=2,
                       mu=c(10, -10),
                       Sigma=matrix(c(1, -1, -1, 1), 2),
                       empirical=TRUE)
# Combine data
biased.data <- rbind(data,noise)

Repeat

# Correlation
stats::cor(biased.data)[2]
#> [1] -0.498

# Regression
summary(stats::lm(y ~ x, data=data.frame(biased.data)))
#> 
#> Call:
#> stats::lm(formula = y ~ x, data = data.frame(biased.data))
#> 
#> Residuals:
#>    Min     1Q Median     3Q    Max 
#> -5.272 -0.815  0.027  0.822  3.108 
#> 
#> Coefficients:
#>             Estimate Std. Error t value    Pr(>|t|)    
#> (Intercept)  -0.0983     0.1487   -0.66        0.51    
#> x            -0.4984     0.0867   -5.75 0.000000098 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.49 on 100 degrees of freedom
#> Multiple R-squared:  0.248,  Adjusted R-squared:  0.241 
#> F-statistic: 33.1 on 1 and 100 DF,  p-value: 0.0000000975

Finally, using Bayesian model with robust estimates

mcmc.robust <- bfw::bfw(project.data = biased.data,
            y = "y",
            x = "x",
            saved.steps = 50000,
            jags.model = "regression",
            jags.seed = 100,
            run.robust = TRUE,
            silent = TRUE)
# Print the results            
round(mcmc.robust$summary.MCMC[,3:7],3)
#>                        Mode   ESS  HDIlo HDIhi   n
#> beta0[1]: Intercept  -0.026 29844 -0.204 0.141 102
#> beta[1]: Y vs. X      0.430 29549  0.265 0.604 102
#> sigma[1]: Y vs. X     0.671 16716  0.530 0.842 102
#> zbeta0[1]: Intercept  0.138 28442  0.042 0.244 102
#> zbeta[1]: Y vs. X     0.430 29549  0.265 0.604 102
#> zsigma[1]: Y vs. X    0.392 16716  0.310 0.492 102
#> R^2 (block: 1)        0.236 29549  0.145 0.331 102


oeysan/bfw documentation built on March 27, 2022, 7:41 p.m.