inst/doc/Resampling.R

## ----setup, include = FALSE-----------------------------------------
library(mosaic)
require(grDevices) 
require(datasets) 
require(stats) 
require(lattice)
require(grid) 
require(mosaic) 
require(mosaicData) 
trellis.par.set(theme=col.mosaic(bw=FALSE))
trellis.par.set(fontsize=list(text=9))
options(keep.blank.line=FALSE) 
options(width=70)
require(vcd)
require(knitr)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align="center",
  fig.show="hold"
)
options('mosaic:parallelMessage'=FALSE)

## ----eval=FALSE, echo=TRUE------------------------------------------
#  install.packages("mosaic")

## ----moreSetup------------------------------------------------------
require(mosaic)
options(digits = 3)

## -------------------------------------------------------------------
library(mosaic)
data(Mustangs)

## ----dot, fig.width=4, fig.height=2---------------------------------
gf_histogram( ~ Price, data = Mustangs)

## ----meanmust-------------------------------------------------------
mean( ~ Price, data = Mustangs)

## ----eval=FALSE-----------------------------------------------------
#  mean(Mustangs$Price)

## ----eval=FALSE-----------------------------------------------------
#  mean( ~ Price, data = Mustangs)

## -------------------------------------------------------------------
simple = c(1, 2, 3, 4, 5)
resample(simple)
resample(simple)
resample(simple)

## ----results="hide"-------------------------------------------------
resample(Mustangs)

## ----echo=FALSE-----------------------------------------------------
set.seed(104)
head(resample(Mustangs))
cat("... and so on")

## -------------------------------------------------------------------
mean( ~ Price, data = resample(Mustangs))

## -------------------------------------------------------------------
mean( ~ Price, data = resample(Mustangs))

## -------------------------------------------------------------------
do(5) * mean( ~ Price, data = resample(Mustangs))

## -------------------------------------------------------------------
mustangs_price_boot <- do(2000) * mean( ~ Price, data = resample(Mustangs))

## ----hist, fig.width=4, fig.height=2--------------------------------
gf_histogram( ~ mean, data = mustangs_price_boot, 
  xlab="Mean Mustang Price (in thousand dollars)")

## ----confint--------------------------------------------------------
confint(mustangs_price_boot, level = 0.90, method = "quantile")
confint(mustangs_price_boot, level = 0.90, method = "stderr")

## ----qdata----------------------------------------------------------
qdata( ~ mean, c(.05, .95), data = mustangs_price_boot)
# alternative
cdata(~ mean, 0.90, data = mustangs_price_boot)

## ----tstar----------------------------------------------------------
tstar <- qt(.95, df = 24)
zstar <- qnorm(.95)

## ----margin---------------------------------------------------------
tstar * sd( ~ mean, data = mustangs_price_boot)
zstar * sd( ~ mean, data = mustangs_price_boot)

## ----proptable------------------------------------------------------
prop( ~ rbinom(1000, prob = 0.5, size = 428) >= 240)

## ----proptable2-----------------------------------------------------
prop( ~ rbinom(1000, prob = 0.5, size = 428) >= 240)

## ----pbinom---------------------------------------------------------
xpbinom(239, prob = 0.5, size = 428)

## -------------------------------------------------------------------
binom.test(x = 240, n = 428)

## ----coinflip-------------------------------------------------------
do(1) * rflip(428)

## ----flips1, fig.width=4, fig.height=3------------------------------
nfl_null <- do(2000) * rflip(428)
prop( ~ heads >= 240, data = nfl_null)

## ----flips, fig.width=4, fig.height=2-------------------------------
gf_histogram( ~ heads, fill = ~ (heads >= 240), data = nfl_null)

## ----fetchsleep-----------------------------------------------------
data(Sleep) 

## ----obsmean--------------------------------------------------------
mean(Words ~ Group, data = Sleep)
obs <- diff(mean(Words ~ Group, data = Sleep))
obs

## ----fig.width=4, fig.height=2--------------------------------------
gf_boxplot(Words ~ Group, data = Sleep)

## -------------------------------------------------------------------
diff(mean(Words ~ shuffle(Group), data = Sleep))

## -------------------------------------------------------------------
diff(mean(Words ~ shuffle(Group), data = Sleep))

## ----setseed134, echo=FALSE-----------------------------------------
set.seed(134) # make sure the result below is "typical"

## ----sleep, tidy=FALSE, fig.width=4, fig.height=2-------------------
sleep_null <- do(2000) * diff(mean(Words ~ shuffle(Group), data = Sleep))
gf_histogram( ~ Sleep, fill = ~ (Sleep >= obs), data = sleep_null, 
  binwidth = 0.4,
  xlab = "Distribution of difference in means under the null hypothesis")

## -------------------------------------------------------------------
cor(Price ~ Miles, data = Mustangs)

## -------------------------------------------------------------------
mustangs_cor_boot <- do(2000) * cor(Price ~ Miles, data = resample(Mustangs))
quantiles <- qdata( ~ cor, c(.025, .975), data = mustangs_cor_boot)
quantiles

## ----cor, fig.width=4, fig.height=2, tidy=FALSE---------------------
mustangs_hist <- mutate(mustangs_cor_boot, 
  colorval = cut(cor, c(-Inf, quantiles, Inf),
    labels = c("Lower", "Middle", "Upper")))
gf_histogram( ~ cor, data = mustangs_hist, fill = ~ colorval, n = 50) 
confint(mustangs_cor_boot)

## ----price-mileage-graph, fig.width=4, fig.height=3-----------------
gf_point(Price ~ Miles, data = Mustangs) %>%
  gf_lm()

## ----mustangregression----------------------------------------------
lm(Price ~ Miles, data = Mustangs)

## -------------------------------------------------------------------
mean( ~ Price, data = Mustangs)

## -------------------------------------------------------------------
lm(Price ~ 1, data = Mustangs)

## -------------------------------------------------------------------
mean(Price ~ 1, data = Mustangs)

## -------------------------------------------------------------------
mean(Words ~ 1, data = Sleep)

## -------------------------------------------------------------------
mean(Words ~ Group, data = Sleep)

## -------------------------------------------------------------------
lm(Words ~ Group, data = Sleep)

## -------------------------------------------------------------------
diffmean(Words ~ Group, data = Sleep) 

## -------------------------------------------------------------------
prop(homeless ~ 1, data = HELPrct)

## -------------------------------------------------------------------
prop(homeless ~ sex, data = HELPrct)

## -------------------------------------------------------------------
diffprop(homeless ~ sex, data = HELPrct)

## -------------------------------------------------------------------
lm(homeless=="homeless" ~ 1, data = HELPrct)

## -------------------------------------------------------------------
lm(homeless=="homeless" ~ sex, data = HELPrct)

## -------------------------------------------------------------------
mustangs_lm_boot <- do(2000) * lm(Price ~ Miles, data = resample(Mustangs))
confint(mustangs_lm_boot)
# compare to large sample estimate
confint(lm(Price ~ Miles, data = Mustangs))

## -------------------------------------------------------------------
# large sample estimate
msummary(lm(age ~ sex, data = HELPrct))
# compare to permutation test
HELPrct_null <- do(2000) * lm(age ~ shuffle(sex), data = HELPrct)
prop(~ (abs(sexmale) > 0.7841), data = HELPrct_null) 

## -------------------------------------------------------------------
mustangs_boot1 <- do(2000) * lm(Price ~ Age, data = resample(Mustangs))
mustangs_boot2 <- do(2000) * lm(Price ~ Miles, data = resample(Mustangs))
mustangs_boot3 <- do(2000) * lm(Price ~ Miles + Age, data = resample(Mustangs))


## -------------------------------------------------------------------
confint(mustangs_boot1)

## -------------------------------------------------------------------
confint(mustangs_boot2)

## -------------------------------------------------------------------
confint(mustangs_boot3)

## -------------------------------------------------------------------
confint(lm(Price ~ Miles + Age, data = Mustangs))

## ----results = "asis", echo = FALSE---------------------------------
ds <- data.frame(
  y = c(234,324,231), residual = c(25,-23,-22))
set.seed(104)

ds <- ds %>%
  mutate(
    predicted = y - residual,
    resamp_residual = resample(residual),
    resamp_observed = predicted + resamp_residual) %>%
  select(y, predicted, residual, resamp_residual, resamp_observed)
knitr::kable(ds)

## -------------------------------------------------------------------
set.seed(104)

mustang_mod <- lm(Price ~ Miles + Age, data = Mustangs)
mustang_relm_boot <- do(2000) * relm(mustang_mod)

## -------------------------------------------------------------------
confint(mustang_relm_boot)
gf_histogram( ~ Miles, data = mustang_relm_boot)

## -------------------------------------------------------------------
anova(lm(Price ~ Miles + Age, data = Mustangs))

## -------------------------------------------------------------------
anova(lm(Price ~ Age + Miles, data = Mustangs))

## -------------------------------------------------------------------
do(1) * lm(Price ~ Miles, data = Mustangs)
do(1) * lm(Price ~ Miles + Age, data = Mustangs)

## -------------------------------------------------------------------
mustangs_age_boot <- do(2000) * lm(Price ~ Miles + shuffle(Age), data = Mustangs )
favstats(~ r.squared, data = mustangs_age_boot)
cdata(~ r.squared, .95, data = mustangs_age_boot)

## -------------------------------------------------------------------
mustangs_miles_boot <- do(2000) * lm(Price ~ shuffle(Miles) + Age, data = Mustangs)
favstats(~ r.squared, data = mustangs_miles_boot)
cdata(~ r.squared, .95, data = mustangs_miles_boot)

## -------------------------------------------------------------------
chisq.test(tally( ~ homeless + sex, 
                   data = HELPrct, margins = FALSE))

## -------------------------------------------------------------------
pval(chisq.test(tally( ~ homeless + sex, 
                   data = HELPrct, margins = FALSE)) )

## -------------------------------------------------------------------
pval(chisq.test(tally( ~ shuffle(homeless) + sex, 
                         data = HELPrct, margins = FALSE)))

## -------------------------------------------------------------------
chisq_null <- do(2000)* pval(chisq.test(tally( ~ shuffle(homeless) + sex, 
                         data = HELPrct, margins = FALSE)))

## ----fig.width=4, fig.height=2--------------------------------------
prop( ~ (p.value < 0.05), data = chisq_null)
gf_histogram( ~ p.value, data = chisq_null, binwidth = 0.1, center = 0.05)

## ----fig.width=4, fig.height=2--------------------------------------
qqmath( ~ p.value, data = chisq_null, dist = qunif)

## -------------------------------------------------------------------
HELP_logistic_boot <- do(2000) * 
   glm(homeless=="homeless" ~ age + sex, 
     data = resample(HELPrct), family = "binomial")
confint(HELP_logistic_boot)

Try the mosaic package in your browser

Any scripts or data that you put into this service are public.

mosaic documentation built on Nov. 10, 2023, 1:11 a.m.