inst/doc/introduction_to_the_connected_package.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align="center",
  fig.height = 6,
  fig.width = 6
)

## ----setup--------------------------------------------------------------------
library(connected)
data_fernando

## -----------------------------------------------------------------------------
set.seed(42)
data_fernando <- transform(data_fernando, y = rnorm(nrow(data_fernando), mean=100))
m1 <- lm(y ~ gen + herd, data=data_fernando)
m1

## -----------------------------------------------------------------------------
con_view(data_fernando, y ~ gen * herd, main="data_fernando")

## -----------------------------------------------------------------------------
con_concur(data_fernando, y ~ gen/herd, main="data_fernando")

## -----------------------------------------------------------------------------
data_eccleston

## -----------------------------------------------------------------------------
## library(reshape2)
## acast(data_eccleston, row~col, value.var='trt')
##    1  2  3  4 
##  1 A1 B2 E5 F6
##  2 C3 D4 G7 H8
##  3 H8 F6 A1 C3
##  4 G7 E5 B2 D4

## -----------------------------------------------------------------------------
con_check(data_eccleston, ~ trt + col)
# Here is how to add group membership to the data.  Or use cbind().
# data_eccleston %>% mutate(.grp = con_check(. , ~trt+col))

## -----------------------------------------------------------------------------
con_check(data_eccleston, ~ trt + row)

## -----------------------------------------------------------------------------
con_check(data_eccleston, ~ trt + row + col)

## -----------------------------------------------------------------------------
set.seed(42)
data_eccleston <- transform(data_eccleston,
                            y = rnorm(nrow(data_eccleston), mean=100))
m1 <- lm(y ~ trt + row + col, data=data_eccleston)
m1

## -----------------------------------------------------------------------------
X <- model.matrix(m1)
rcond(X)
.Machine$double.eps

## -----------------------------------------------------------------------------
tab <- data.frame(gen=c("G1","G1","G1","G1", "G2","G2","G2", "G3"),
                  state=c("S1","S2","S3","S4", "S1","S2","S4", "S1"))
library(janitor) # For tabyl
tab %>% tabyl(state,gen)

## -----------------------------------------------------------------------------
subset(tab, gen != "G3") %>% tabyl(state,gen)
dplyr::filter(tab, gen != "G3") %>% tabyl(state,gen)

## -----------------------------------------------------------------------------
# Read this as "2 state per gen"
tab2 <- con_filter(tab, ~ 2 * state / gen)
tab2 %>% tabyl(state,gen)

## -----------------------------------------------------------------------------
con_filter(tab2, ~ 2 * gen / state) %>% tabyl(state, gen)

## -----------------------------------------------------------------------------
  set.seed(42)
  orch <- OrchardSprays
  orch[runif(nrow(orch)) > .5 , "decrease"] <- NA
  head(orch)

## -----------------------------------------------------------------------------
subset(orch, !is.na(decrease)) %>% tabyl(rowpos, colpos) 

## -----------------------------------------------------------------------------
# Read: decrease has at least 2 colpos per rowps
orch2 <- con_filter(orch, decrease ~ 2 * colpos / rowpos)
tabyl(orch2, rowpos, colpos) # Column 1 has only 1 observation

# Read: decrease has at least 2 rowpos per colpos
orch2 <- con_filter(orch2, decrease ~ 2 * rowpos / colpos)
tabyl(orch2, rowpos, colpos)

## -----------------------------------------------------------------------------
library(connected)
library(janitor)
test1 <- matrix( c("G1", "IA", "2020", # gen has 1 state, 1 yr,
                   "G2", "IA", "2020", # gen has 1 state, 2 yr
                   "G2", "IA", "2021",
                   "G3", "NE", "2020", # 2 states, 1 yr
                   "G3", "IA", "2020",
                   "G4", "KS", "2020", # state has 1 gen, 1 yr
                   "G5", "MO", "2020", # state has 1 gen, 2yr
                   "G5", "MO", "2021",
                   "G6", "IL", "2020", # state has 2 gen, 1yr
                   "G7", "IL", "2020",
                   "G8", "AR", "2019", # year has 1 gen 1 state
                   "G9", "IN", "2018", # year has 1 gen, 2 state
                   "G9", "OH", "2018",
                   "G10", "MN", "2017", # year has 2 gen, 1 state
                   "G11", "MN", "2017",
                   "G12", "MD", "2010", # gen has 2 state, 2 yr, 2 reps
                   "G12", "MD", "2010",
                   "G12", "GA", "2011",
                   "G12", "GA", "2011"), byrow=TRUE, ncol=3)
test1 <- as.data.frame(test1)
colnames(test1) <- c("gen","state","year")

set.seed(42)
test1$y <- round( runif(nrow(test1)), 2)
head(test1)

## -----------------------------------------------------------------------------
con_filter(test1, y ~ 2 * gen / state:year) |>
  transform(stateyr=paste0(state,"_",year)) |>
  tabyl(gen,stateyr)

## -----------------------------------------------------------------------------
con_filter(test1, y ~ 2 * gen / state:year, returndropped=TRUE)

## -----------------------------------------------------------------------------
library(agridat)
library(dplyr)

dat0 <- agridat::minnesota.barley.yield
if(nrow(dat0) < 2083) stop("Please update the agridat package.")
dat0 <- mutate(dat0, gen=factor(gen), site=factor(site), year=factor(year))
library(lme4)
m0 <- lmer(yield ~ (1|gen) + (1|site) + (1|year) + 
             (1|gen:site) + (1|gen:year) + (1|site:year) + (1|gen:site:year),
           data=dat0)
summary(m0)

# Note, the asreml package gives the same estimates.
# library(asreml)
# m0a <- asreml(yield ~ 1, data=dat0, random  = ~ gen*site*year, workspace="1GB")
# lucid::lucid(summary(m0a)$varcomp)
##               component std.error z.ratio bound %ch
## site             42.2       29.1     1.45     P   0
## year             38.1       15.7     2.42     P   0
## gen              46          5.74    8.01     P   0
## site:year        89         12.1     7.34     P   0
## gen:site          0.826      0.55    1.5      P   0
## gen:year          9.88       1.67    5.93     P   0
## gen:site:year    24.6        2.32   10.6      P   0
## units!R           2.66       1.88    1.41     P   0

## -----------------------------------------------------------------------------
# Keep the original data in dat0 and pruned data in dat1
dat1 <- dat0

con_view(dat1, yield~site*year, cluster=FALSE,
         xlab="site", ylab="year", main="Minnesota Barley")

## -----------------------------------------------------------------------------
# Require 2 sites per year
dat2 <- filter(dat1, !is.na(yield), n_distinct(site) >= 2, .by=year)
dat1 <- con_filter(dat1, yield ~ 2*site/year)
con_view(dat1, yield~gen*year, cluster=FALSE,
         xlab="genotype", ylab="year")

## -----------------------------------------------------------------------------
# Require 2 year per gen
dat2 <- filter(dat2, n_distinct(year) >= 2, .by=gen)
dat1 <- con_filter(dat1, yield~ 2*year/gen)
con_view(dat1, yield~gen*year, cluster=FALSE, xlab="gen", ylab="year")

## -----------------------------------------------------------------------------
con_view(dat1, yield~gen*site, cluster=FALSE, xlab="genotype", ylab="site")

## -----------------------------------------------------------------------------
# Drop genotypes tested in only 1 site
dat2 <- filter(dat2, n_distinct(site) >= 2, .by=gen)
dat1 <- con_filter(dat1, yield~ 2*site/gen)
con_view(dat1, yield~gen*site, cluster=FALSE, xlab="genotype", ylab="site")

## -----------------------------------------------------------------------------
con_view(dat1, yield ~ gen*site, cluster=FALSE, xlab="gen", ylab="site")
con_view(dat1, yield ~ gen*year, cluster=FALSE, xlab="gen", ylab="year")
con_view(dat1, yield ~ site*year, cluster=FALSE, xlab="site", ylab="year")

## -----------------------------------------------------------------------------
library(lme4)
m1 <- lmer(yield ~ (1|gen) + (1|site) + (1|year) + 
          (1|gen:site) + (1|gen:year) + (1|site:year) + (1|gen:site:year), data=dat1)
summary(m1)

# asreml converges to the same estimated values, so lmer is just finnicky
# m1a <- asreml(yield ~ 1, data=dat1, random  = ~ gen*site*year, workspace="1GB")
# lucid::vc(m1a)

## -----------------------------------------------------------------------------
# Require at least 3 year per genotype
dat3 <- filter(dat1, n_distinct(year) >= 3, .by=gen)
dat2 <- con_filter(dat1, yield ~ 3*year / gen)
m2 <- lmer(yield ~ (1|gen) + (1|site) + (1|year) +
             (1|gen:site) + (1|gen:year) + (1|site:year) + (1|gen:site:year),
           data=dat2)
summary(m2)

# asreml gives the same estimated variance parameters. Not shown.
# m2a <- asreml(yield ~ 1, data=dat2, random  = ~ gen*site*year, workspace="1GB")
# summary(m2a)$varcomp

## -----------------------------------------------------------------------------
library(lucid)
full_join( select(as.data.frame(VarCorr(m0)), grp, vcov), 
           select(as.data.frame(VarCorr(m1)), grp, vcov), 
           by="grp", suffix=c(".0",".1")) %>%
  full_join(select(as.data.frame(VarCorr(m2)), grp, vcov), 
           by="grp", suffix=c(".0",".2")) %>% lucid

## -----------------------------------------------------------------------------
library(connected)
dat <- data_fernando
dat <- transform(dat, y = rnorm(nrow(dat)))
tmp <- con_view(dat, y ~ gen*herd)
tmp$x.limits[[1]]
tmp$y.limits[[1]]

## -----------------------------------------------------------------------------
dd1 <- data.frame(x=rep(c("E1","E2"),2),
                  y=rep(c("G1","G2"), each=2),
                  z1=c(1,2,1.5,2.5))
dd2 <- data.frame(x=rep(c("E3","E2"),2),
                  y=rep(c("G3","G2"), each=2),
                  z2=c(3,4,3.5,4.5))
dd1
dd2

## -----------------------------------------------------------------------------
dd0 <- expand.grid(x=sort(unique(c(dd1$x, dd2$x))),
                   y=sort(unique(c(dd1$y, dd2$y))) )

## -----------------------------------------------------------------------------
dd0 <- merge(dd0, dd1, by=c("x","y"), all.x=TRUE)
dd0 <- merge(dd0, dd2, by=c("x","y"), all.x=TRUE)
library(connected)
con_view(dd0, z1 ~ x * y, cluster=FALSE, dropNA=FALSE, main="dd1")
con_view(dd0, z2 ~ x * y, cluster=FALSE, dropNA=FALSE, main="dd2")

Try the connected package in your browser

Any scripts or data that you put into this service are public.

connected documentation built on April 12, 2025, 2:24 a.m.