Nothing
# functions.R
# functions: con_check, con_concur, con_filter, con_view
# Idea: allow con_view to work without a response variable. Maybe by coloring all cells the same color?
# Idea cluster only 1 of the 2 directions
## ---------------------------------------------------------------------------
#' @importFrom grDevices colorRampPalette
RedGrayBlue <- colorRampPalette(c("firebrick", "lightgray", "#375997"))
## ---------------------------------------------------------------------------
#' @title Check connectedness of multiple factors in a dataframe
#'
#' @description
#' Multiple factors in a dataframe are said to be connected if a model matrix
#' based on those factors is full rank.
#'
#' This function provides a formula interface to the lfe::compfactor() function
#' to check for connectedness of the factors.
#'
#' @param data A dataframe
#'
#' @param formula A formula with multiple factor names in the dataframe,
#' like \code{y ~ f1 + f2 + f3}
#'
#' @param WW Pass-through argument to `compfactor`
#'
#' @param dropNA If TRUE, observed data that are `NA` will be dropped.
#'
#' @return A vector with integers representing the group membership of each observation.
#'
#' @author Kevin Wright
#'
#' @examples
#' # In the data_eccleston dataframe, each pair of factors is connected.
#' con_check(data_eccleston, ~ row + trt)
#' con_check(data_eccleston, ~ col + trt)
#' con_check(data_eccleston, ~ row + col)
#' # But all three factors are COMPLETELY disconnected into 16 groups.
#' con_check(data_eccleston, ~ row + col + trt)
#'
#' @references
#' None
#'
#' @export
con_check <- function(data=NULL, formula=NULL, WW=TRUE, dropNA=TRUE) {
formula <- deparse(formula)
.resp <- trimws( strsplit(formula, "~")[[1]][1] ) # left of tilde
rhs <- strsplit(formula, "~")[[1]][2] # right of tilde
factor_vars <- trimws( strsplit(rhs, "[+*]")[[1]] )
# If there is a response, drop rows that have missing values in response
if(.resp != "" & dropNA) data = data[ !is.na(data[[.resp]]), ]
# Make sure the data are factors
out <- lfe::compfactor(lapply(data[factor_vars], as.factor), WW=TRUE)
return(out)
}
## ---------------------------------------------------------------------------
#' @title View connectedness of two factors in a dataframe using a levelplot
#'
#' @description
#' If there is replication for the treatment combination cells in a
#' two-way table, the replications are averaged together (or counted)
#' before constructing the heatmap.
#'
#' By default, rows and columns are clustered using the 'incidence' matrix of 0s and 1s.
#'
#' The function checks to see if the cells in the heatmap form a connected
#' set. If not, the data is divided into connected subsets and the subset
#' group number is shown within each cell.
#'
#' By default, missing values in the response are deleted.
#'
#' Factor levels are shown along the left and bottom sides.
#'
#' The number of cells in each column/row is shown along the top/right sides.
#'
#' If the 2 factors are disconnected, the group membership ID is shown in
#' each cell.
#'
#' @param data A dataframe
#'
#' @param formula A formula with two (or more) factor names in the dataframe
#' like \code{yield ~ f1 *f2}
#'
#' @param fun.aggregate The function to use for aggregating data in cells. Default is mean.
#'
#' @param xlab Label for x axis
#'
#' @param ylab Label for y axis
#'
#' @param cex.num Disjoint group number.
#'
#' @param cex.x Scale factor for x axis tick labels. Default 0.7.
#'
#' @param cex.y Scale factor for y axis tick labels Default 0.7.
#'
#' @param col.regions Function for color regions. Default RedGrayBlue.
#'
#' @param cluster If "incidence", cluster rows and columns by the
#' incidence matrix. If FALSE, no clustering is performed.
#'
#' @param dropNA If TRUE, observed data that are `NA` will be dropped.
#'
#' @param ... Other parameters passed to the levelplot() function.
#'
#' @author Kevin Wright
#'
#' @return A lattice graphics object
#'
#' @examples
#' require(lattice)
#' bar = transform(lattice::barley, env=factor(paste(site,year)))
#' set.seed(123)
#' bar <- bar[sample(1:nrow(bar), 70, replace=TRUE),]
#' con_view(bar, yield ~ variety * env, cex.x=1, cex.y=.3, cluster=FALSE)
#'
#' # Create a heatmap of cell counts
#' w2b = colorRampPalette(c('wheat','black'))
#' con_view(bar, yield ~ variety * env, fun.aggregate=length,
#' cex.x=1, cex.y=.3, col.regions=w2b, cluster=FALSE)
#'
#' # Example from paper by Fernando et al. (1983).
#' set.seed(42)
#' data_fernando = transform(data_fernando,
#' y=stats::rnorm(9, mean=100))
#' con_view(data_fernando, y ~ gen*herd, cluster=FALSE,
#' main = "Fernando unsorted")
#' con_view(data_fernando, y ~ gen*herd, cluster=TRUE,
#' main = "Fernando unsorted")
#'
#' # Example from Searle (1971), Linear Models, p. 325
#' dat2 = transform(data_searle,
#' y=stats::rnorm(nrow(data_searle)) + 100)
#'
#' con_view(dat2, y ~ f1*f2, cluster=FALSE, main="data_searle unsorted")
#' con_view(dat2, y ~ f1*f2, main="data_searle clustered")
#'
#' @import lattice
#' @importFrom lfe compfactor
#' @importFrom reshape2 acast
#' @importFrom stats aggregate as.dendrogram as.dist
#' @importFrom stats dist hclust na.omit order.dendrogram
#' @export
#'
con_view <- function(data, formula,
fun.aggregate=mean,
xlab="", ylab="",
cex.num=0.75,
cex.x=0.7, cex.y=0.7,
col.regions=RedGrayBlue,
cluster="incidence",
dropNA=TRUE,
...){
# Check that the formula has valid names (in the data)
if(any(c('.resp','.fx','.fy') %in% names(data)))
stop(".resp, .fx, .fy are reserved names")
formula <- deparse(formula)
.resp <- trimws( strsplit(formula, "~")[[1]][1] ) # left of tilde
rhs <- strsplit(formula, "~")[[1]][2] # right of tilde
leftstar <- strsplit(rhs, "\\*")[[1]][1]
rightstar <- strsplit(rhs, "\\*")[[1]][2]
if(is.na(leftstar) | is.na(rightstar) )
stop("Incorrect formula")
if( grepl("[:+]", leftstar) & grepl("[:+]", rightstar) ) {
# Example: f1:f2 / f3:f4
stop("Only 3 terms can be included in the formula")
}
if( grepl("[:+]", leftstar) & !grepl("[:+]", rightstar) ) {
# Ex: f1:f2 / f3
method="21"
f1a <- trimws( strsplit(leftstar, ":+")[[1]][1] )
f1b <- trimws( strsplit(leftstar, ":+")[[1]][2] )
f2 <- trimws( rightstar )
}
if( !grepl("[:+]", leftstar) & grepl("[:+]", rightstar) ) {
# Ex: f1 / f2:f3
method="12"
f1 <- trimws(leftstar)
f2a <- trimws( strsplit(rightstar, ":+")[[1]][1] )
f2b <- trimws( strsplit(rightstar, ":+")[[1]][2] )
}
if( !grepl("[:+]", leftstar) & !grepl("[:+]", rightstar) ) {
# Ex: f1 / f2
method="11"
f1 <- trimws(leftstar)
f2 <- trimws(rightstar)
}
# Drop rows that have missing values in response
if(dropNA) data = data[ !is.na(data[[.resp]]), ]
# Merge f1a & f1b or else f2a & f2b into a single item
if(method=="11") {
.fx <- f1
.fy <- f2
}
if(method=="12") {
.fx <- f1
.fy <- paste0(f2a, "_", f2b)
data[[.fy]] <- paste0(data[[f2a]], "_", data[[f2b]])
}
if(method=="21") {
.fx <- paste0(f1a, "_", f1b)
.fy <- f2
data[[.fx]] <- paste0(data[[f1a]], "_", data[[f1b]])
}
# Guarantee fx and fy to be factors so that 'levels' will work later.
data[[.fx]] <- factor(data[[.fx]])
data[[.fy]] <- factor(data[[.fy]])
# flag for fun.aggregate=length
agglen <- FALSE
if( "fun.aggregate" %in% names(as.list(sys.call())) &&
as.character(as.list(sys.call())$fun.aggregate)=="length" )
agglen <- TRUE
# Aggregate reps together
# Syntax found at http://stackoverflow.com/questions/27235088
if(agglen) {
# 'length' does not accept na.rm argument so use an anonymous function
data <- aggregate(data[.resp], by=data[c(.fx,.fy)],
FUN=function(x) length(na.omit(x)))
} else {
# aggregate by mean
data <- aggregate(data[.resp], by=data[c(.fx,.fy)], mean, na.rm=TRUE)
}
#browser()
# Identify connected sets
data$.grp <- lfe::compfactor(data[c(.fx,.fy)])
#data$.grp <- lfe::compfactor(list(data[[.fx]], data[[.fy]]))
data$.grp <- as.numeric(data$.grp)
# Clustering needs a matrix. We have already aggregated.
datm <- reshape2::acast(data, paste(.fx,"~",.fy), value.var=.resp)
# If we aggregate by length (number of reps), then change
# 0 to NA so that row/column counts of cells will be right
if(agglen) {
datm[datm==0L] <- NA
datm[is.nan(datm)] <- NA
}
if(cluster=="incidence"){
# use row and column clustering of the _incidence_ matrix
tab <- !is.na(datm)
class(tab) <- 'matrix'
if(nrow(tab) > 2){
hcr <- hclust(dist(tab))
ddr <- as.dendrogram(hcr)
ixr <- order.dendrogram(ddr)
# need to re-order datm for cell counts
datm <- datm[ixr, ]
data[[.fx]] <- factor(data[[.fx]], levels=levels(data[[.fx]])[ixr])
}
if(ncol(tab)>2){
hcc <- hclust(dist(t(tab)))
ddc <- as.dendrogram(hcc)
ixc <- order.dendrogram(ddc)
datm <- datm[ , ixc]
data[[.fy]] <- factor(data[[.fy]], levels=levels(data[[.fy]])[ixc])
}
# Convert to 0/1
tab <- 1*tab
n.common.env <- crossprod(tab)
n.common.gen <- tcrossprod(tab)
}
# Change axis text size
nc <- ncol(datm)
nr <- nrow(datm)
# Note, the 'par' function opens a graphics window, which causes
# knitr to create an empty pdf file
pctMiss <- sum(is.na(datm))/(nc * nr)
pctMiss <- round(100*pctMiss, 1)
subtitle <- paste("(", pctMiss, "% missing)", sep="")
tab <- !is.na(datm)
cells.per.row <- apply(tab, 1, sum)
cells.per.col <- apply(tab, 2, sum)
myxscale <- function(...) {
ans <- xscale.components.default(...)
ans$top <- ans$bottom
ans$top$labels$labels <- as.character(cells.per.row)
ans
}
myyscale <- function(...) {
ans <- yscale.components.default(...)
ans$right <- ans$left
ans$right$labels$labels <- as.character(cells.per.col)
ans
}
# if there is only 1 group, don't show it
if( all(data$.grp<2) ) {
data$.grp <-""
}
# if there are 2+ groups, warn the user
max.grp.num = max(data$.grp)
if (max.grp.num > 1)
warning("There are ", max.grp.num, " groups")
formula2 <- eval(parse(text=paste0(.resp, "~", .fx, "*", .fy)))
# We need "data" instead of "datm" for the group information
levelplot(formula2, data=data,
aspect="fill",
xlab=xlab, ylab=ylab,
col.regions=col.regions,
# Note, relation=free is required for top/right axes!
scales=list(
x=list(cex=cex.x, relation="free", rot=90),
y=list(cex=cex.y, relation="free", rot=0)),
xscale.components = myxscale,
yscale.components = myyscale,
# more space between panel/legend
par.settings=list(layout.heights=list(key.axis.padding=4),
layout.widths=list(axis.key.padding=5)),
panel=function(x,y,z,...){
panel.levelplot(x,y,z,...)
panel.text(x,y,data$.grp, cex=cex.num)
} ,
...)
}
## ---------------------------------------------------------------------------
#' @title Filter a dataframe using two-way criteria to increase connectedness
#'
#' @description
#' Traditional filtering (subsetting) of data is typically performed via
#' some criteria based on the *columns* of the data.
#'
#' In contrast, this function performs filtering of data based on the
#' *joint* rows and columns of a matrix-view of two factors.
#'
#' Conceptually, the idea is to re-shape two or three columns of a dataframe
#' into a matrix, and then delete entire rows (or columns) of the matrix if
#' there are too many missing cells in a row (or column).
#'
#' The two most useful applications of two-way filtering are to:
#'
#' 1. Remove a factor level that has few interactions with another factor.
#' This is especially useful in linear models to remove rare factor
#' combinations.
#'
#' 2. Remove a factor level that has any missing interactions with another
#' factor. This is especially useful with biplots of a matrix to remove
#' rows or columns that have missing values.
#'
#' A formula syntax is used to specify the two-way filtering criteria.
#'
#' Some examples may provide the easiest understanding.
#'
#' dat <- data.frame(state=c("NE","NE", "IA", "NE", "IA"),
#' year=c(1,2,2,3,3), value=11:15)
#'
#' When the 'value' column is re-shaped into a matrix it looks like:
#'
#' state/year | 1 | 2 | 3 |
#' NE | 11 | 12 | 14 |
#' IA | | 13 | 15 |
#'
#' Drop states with too much missing combinations.
#' Keep only states with "at least 3 years per state"
#' con_filter(dat, ~ 3 * year / state)
#' NE 1 11
#' NE 2 12
#' NE 3 14
#'
#' Keep only years with "at least 2 states per year"
#' con_filter(dat, ~ 2 * state / year)
#' NE 2 12
#' IA 2 13
#' NE 3 14
#' IA 3 15
#'
#' If the constant number in the formula is less than 1.0, this is
#' interpreted as a *fraction*.
#' Keep only states with "at least 75% of years per state"
#' con_filter(dat, ~ .75 * year / state)
#'
#' It is possible to include another factor on either side of the slash "/".
#' Suppose the data had another factor for political party called "party".
#' Keep only states with "at least 2 combinations of party:year per state"
#' con_filter(dat, ~ 2 * party:year / state)
#'
#' If the formula contains a response variable, missing values are dropped
#' first, then the two-way filtering is based on the factor combinations.
#' con_filter(dat, value ~ 2 * state / year)
#'
#' @param data A dataframe
#'
#' @param formula A formula with two factor names in the dataframe
#' that specifies the criteria for filtering,
#' like \code{y ~ 2 * f1 / f2}
#'
#' @param verbose If TRUE, print some diagnostic information about what data
#' is being deleted. (Similar to the 'tidylog' package).
#'
#' @param returndropped If TRUE, return the dropped rows instead of the
#' kept rows. Default is FALSE.
#'
#' @return
#' The original dataframe is returned, minus rows that are filtered out.
#'
#' @author Kevin Wright
#'
#' @examples
#' dat <- data.frame(
#' gen = c("G3", "G4", "G1", "G2", "G3", "G4", "G5",
#' "G1", "G2", "G3", "G4", "G5",
#' "G1", "G2", "G3", "G4", "G5",
#' "G1", "G2", "G3", "G4", "G5"),
#' env = c("E1", "E1", "E1", "E1", "E1", "E1", "E1",
#' "E2", "E2", "E2", "E2", "E2",
#' "E3", "E3", "E3", "E3", "E3",
#' "E4", "E4", "E4", "E4", "E4"),
#' yield = c(65, 50, NA, NA, 65, 50, 60,
#' NA, 71, 76, 80, 82,
#' 90, 93, 95, 102, 97,
#' 98, 102, 105, 130, 135))
#'
#' # How many observations are there for each combination of gen*env?
#' with( subset(dat, !is.na(yield)) , table(gen,env) )
#'
#' # Note, if there is no response variable, the two-way filtering is based
#' # only on the presence of the factor combinations.
#' dat1 <- con_filter(dat, ~ 4*env / gen)
#'
#' # If there is a response variable, missing values are dropped first,
#' # then the two-way filtering is based on the factor combinations.
#'
#' dat1 <- con_filter(dat, yield ~ 4*env/gen)
#' dat1 <- con_filter(dat, yield ~ 5*env/ gen)
#' dat1 <- con_filter(dat, yield ~ 6*gen/ env)
#' dat1 <- con_filter(dat, yield ~ .8 *env / gen)
#' dat1 <- con_filter(dat, yield ~ .8* gen / env)
#' dat1 <- con_filter(dat, yield ~ 7 * env / gen)
#'
#' @references
#' None.
#'
#' @export
con_filter <- function(data, formula, verbose=TRUE, returndropped=FALSE) {
formula <- deparse(formula)
.resp <- trimws( strsplit(formula, "~")[[1]][1] ) # response left of tilde
if(.resp != "" &
!(.resp %in% names(data))) stop(.resp, " not found in data")
rhs <- strsplit(formula, "~")[[1]][2] # right of tilde
leftslash <- strsplit(rhs, "/")[[1]][1]
rightslash <- strsplit(rhs, "/")[[1]][2]
# peel off the threshold
thresh <- as.numeric(strsplit(leftslash, "\\*")[[1]][1])
leftslash <- trimws(strsplit(leftslash, "\\*")[[1]][2])
if( grepl("[:+]", leftslash) & grepl("[:+]", rightslash) ) {
# Example: f1:f2 / f3:f4
stop("Only 3 terms can be included in the formula")
}
if( grepl("[:+]", leftslash) & !grepl("[:+]", rightslash) ) {
# Ex: f1:f2 / f3
method="21"
f1a <- trimws( strsplit(leftslash, ":+")[[1]][1] )
f1b <- trimws( strsplit(leftslash, ":+")[[1]][2] )
f2 <- trimws( rightslash )
}
if( !grepl("[:+]", leftslash) & grepl("[:+]", rightslash) ) {
# Ex: f1 / f2:f3
method="12"
f1 <- trimws(leftslash)
f2a <- trimws( strsplit(rightslash, ":+")[[1]][1] )
f2b <- trimws( strsplit(rightslash, ":+")[[1]][2] )
}
if( !grepl("[:+]", leftslash) & !grepl("[:+]", rightslash) ) {
# Ex: f1 / f2
method="11"
f1 <- trimws(leftslash)
f2 <- trimws(rightslash)
}
# If we have a response variable, omit rows with missing values
if(.resp != ""){
dropix <- is.na(data[[.resp]])
data <- data[!dropix,]
}
n0 <- nrow(data)
# Merge f1a & f1b or else f2a & f2b into a single item
if(method=="11") {
f1vec <- data[[f1]]
f2vec <- data[[f2]]
}
if(method=="12") {
f1vec <- data[[f1]]
f2vec <- paste0(data[[f2a]], data[[f2b]])
f2 <- paste0(f2a,":",f2b)
}
if(method=="21") {
f1vec <- paste0(data[[f1a]], data[[f1b]])
f2vec <- data[[f2]]
f1 <- paste0(f1a,":",f1b)
}
# Unique combinations of f1 f2 (in case there are multiple reps per combination)
uni <- unique(data.frame(f1=f1vec, f2=f2vec))
# table() output includes unused factor levels, so get rid of those
uni <- droplevels(uni)
f1.nlev <- length(unique( uni$f1 ))
f2.nlev <- length(unique( uni$f2 ))
# Delete levels of one factor
f1.per.f2 <- table( uni$f2 )
if(thresh < 1)
f2.keep.levs <- names(f1.per.f2)[f1.per.f2/f1.nlev >= thresh]
else
f2.keep.levs <- names(f1.per.f2)[f1.per.f2 >= thresh]
f2.drop.levs <- setdiff(names(f1.per.f2), f2.keep.levs)
# Delete rows with the f2 drop levels
if(length(f2.drop.levs)>0) {
if(verbose){
message("Dropping these ", length(f2.drop.levs),
" of ", f2.nlev,
" levels of ",f2,":")
print(f2.drop.levs)
}
keepix <- f2vec %in% f2.keep.levs
f1vec <- f1vec[keepix]
f2vec <- f2vec[keepix]
if(returndropped)
data <- data[ !keepix, ]
else
data <- data[ keepix, ]
data <- droplevels(data)
uni <- droplevels(uni[uni$f2 %in% f2.keep.levs,])
}
n2 <- nrow(data)
if(verbose)
cat("Deleted", n0-n2, "of", n0, "rows of data.\n")
if(verbose){
tab1 <- table( uni$f1 )
if(any(tab1 < 2))
warning("Some ",f1," have only 1 ", f2, ".")
tab2 <- table( uni$f2 )
if(any(tab2 < 2))
warning("Some ",f2," have only 1 ", f1,".")
}
if(nrow(data)==0L) warning("No data remains.")
data <- droplevels(data)
return(data)
}
## ---------------------------------------------------------------------------
#' @title View concurrence of two factors in a dataframe using a matrix plot.
#'
#' @description Draws a concurrence plot of 2 factors in a dataframe.
#' For example, in a multi-environment yield trial (testing multiple crop
#' varieties in multple environments) it is interesting to examine the
#' balance of the testing pattern.
#' For each pair of environments, how many genotypes are tested
#' in both environments? The concurrence plot shows the amount of
#' connectedness (number of varieties) of the environments with each other.
#'
#' By default, missing values in the response are deleted.
#'
#' Replicated combinations of the two factors are ignored.
#' (This could be changed if someone has a need.)
#'
#' @param data A dataframe
#'
#' @param formula A formula with multiple factor names in the dataframe,
#' like \code{y ~ f1 / f2}.
#'
#' @param dropNA If TRUE, observed data that are `NA` will be dropped.
#'
#' @param xlab Label for x axis
#'
#' @param ylab Label for y axis
#'
#' @param cex.x Scale factor for x axis tick labels. Default 0.7.
#'
#' @param cex.y Scale factor for y axis tick labels Default 0.7.
#'
#' @param ... Other parameters passed to the levelplot() function.
#'
#' @return A lattice graphics object
#'
#' @author Kevin Wright
#'
#' @examples
#' require(lattice)
#' bar = transform(lattice::barley, env=factor(paste(site,year)))
#' set.seed(123)
#' bar <- bar[sample(1:nrow(bar), 70, replace=TRUE),]
#' con_concur(bar, yield ~ variety / env, cex.x=0.75, cex.y=.3)
#'
#' @references
#' None
#' @export
con_concur <- function(data, formula,
dropNA=TRUE,
xlab="", ylab="", cex.x=.7, cex.y = 0.7, ...){
# Check that the formula has valid names (in the data)
if(any(c('.resp','.fx','.fy') %in% names(data)))
stop(".resp, .fx, .fy are reserved names")
formula <- deparse(formula)
.resp <- trimws( strsplit(formula, "~")[[1]][1] ) # left of tilde
rhs <- strsplit(formula, "~")[[1]][2] # right of tilde
leftslash <- strsplit(rhs, "\\/")[[1]][1]
rightslash <- strsplit(rhs, "\\/")[[1]][2]
if(is.na(leftslash) | is.na(rightslash) )
stop("Incorrect formula")
.fx <- trimws(leftslash)
.fy <- trimws(rightslash)
# If there is a response, drop rows that have missing values in response
if(.resp != "" & dropNA) data = data[ !is.na(data[[.resp]]), ]
if(missing(xlab)) xlab <- paste0("Number of concurrent ", .fx, " per ", .fy)
# Need unique() in case there are multiple observations
dat2 <- unique(data[c(.fx,.fy)])
# Add indicator, convert to incidence matrix
dat2 <- cbind(dat2, ind=1)
dat2 <- acast(dat2, paste(.fx,"~",.fy), value.var="ind", fill=0)
dat2 <- crossprod(dat2)
levelplot(dat2, scales=list(x=list(cex=cex.x, rot=90), y=list(cex=cex.y)),
xlab=xlab, ylab=ylab,
...)
}
## ---------------------------------------------------------------------------
if(FALSE){
# Explore the syntax tree of the formula.
# Note: I decided to parse the formula by myself.
ff <- formula(y~ 2 * gen/site:yr)
deparse(ff[[1]]) # ~
deparse(ff[[2]]) # y
deparse(ff[[3]]) # 2*gen/site:yr
deparse(ff[[3]][[1]]) # /
deparse(ff[[3]][[2]]) # 2*gen
deparse(ff[[3]][[3]]) # site:yr
deparse(ff[[3]][[3]][[1]]) # :
deparse(ff[[3]][[3]][[2]]) # site
deparse(ff[[3]][[3]][[3]]) # yr
all.vars(ff)
lobstr::ast(y~ 2 * gen/site:yr)
}
if(FALSE){
libs(janitor)
test1 <- matrix( c("G1", "IA", "2020", # gen has 1 state, 1 yr,
"G2", "IA", "2020", # gen has 1 state, 2 yr
"G2", "IA", "2021",
"G3", "NE", "2020", # 2 states, 1 yr
"G3", "IA", "2020",
"G4", "KS", "2020", # state has 1 gen, 1 yr
"G5", "MO", "2020", # state has 1 gen, 2yr
"G5", "MO", "2021",
"G6", "IL", "2020", # state has 2 gen, 1yr
"G7", "IL", "2020",
"G8", "AR", "2019", # year has 1 gen 1 state
"G9", "IN", "2018", # year has 1 gen, 2 state
"G9", "OH", "2018",
"G10", "MN", "2017", # year has 2 gen, 1 state
"G11", "MN", "2017",
"G12", "MD", "2010", # gen has 2 state, 2 yr, 2 reps
"G12", "MD", "2010",
"G12", "GA", "2011",
"G12", "GA", "2011"), byrow=TRUE, ncol=3)
test1 <- as.data.frame(test1)
colnames(test1) <- c("gen","state","year")
set.seed(42)
test1$y <- round( runif(nrow(test1)), 2)
head(test1)
con_filter(test1, y~ 2 * f1:f2 / f3:f4) # Errs, as it should
#con_filter(test1, y ~ 2 * gen / state) |> with(data=_, table(gen,state))
con_filter(test1, y ~ 2 * gen / state) |> tabyl(gen,state)
con_filter(test1, y ~ 2 * gen / state, returndropped=TRUE)
#con_filter(test1, y ~ 2 * gen / year) |> with(data=_, table(gen,year))
con_filter(test1, y ~ 2 * gen / year) |> tabyl(gen,year)
con_view(test1, y~ gen * state:year)
con_filter(test1, y ~ 2 * gen / state:year) |> transform(stateyr=paste(state,year)) |> with(table(gen,stateyr))
con_filter(test1, y ~ 2 * gen / state:year) %>% arrange(state,year)
con_filter(test1, y ~ 2 * gen / state:year, returndropped=TRUE)
con_view(test1, y~ state:year * gen)
con_filter(test1, y ~ 2 * state:year / gen) %>% arrange(gen)
con_filter(test1, y ~ 2 * state:year / gen, returndropped=TRUE)
library(agridat)
dat <- lin.unbalanced
head(dat)
con_view(dat, yield ~ gen*loc)
# Maybe this could be a function con_table
with(dat2, table(gen,yl)) %>% apply(2, \(x) sum(x>0)) %>% sort()
with(dat2, table(year,gl)) %>% apply(2, \(x) sum(x>0)) %>% sort() # Problem here!
with(dat2, table(gen,yl)) %>% apply(2, \(x) sum(x>0)) %>% sort()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.