#'@title
#' Make and assign strata based on two input columns, typically at least one is numeric
#'
#'
#'@description
#' Make and assign strata based on two input attributes and some parameters
#' functions are makeStrata and assignStrata
#'
#'@details
#' <Delete and Replace>
#'
#'\cr
#'Revision History
#' \tabular{ll}{
#'1.0 \tab 2019 01 24 created \cr
#'1.1 \tab 2020 06 25 updated to allow more complex inputs \cr
#'}
#'
#'@author
#'
#'Jacob Strunk <Jstrunk@@fs.fed.us>
#'
#'@param data input data
#'@param x1 name of input x field
#'@param x2 name of input y field
#'@param split_x1 how to split - equal interval or quantile based
#'@param split_x2 how to split - equal interval or quantile based
#'@param nest_x2 should x2 be nested in x1, or should x1 and x2 be split independently
#'@param n1 number of strata for x1
#'@param n2 number of strata for x2
#'@param type_x1 is x1 numeric or factor
#'@param type_x2 is x2 numeric or factor
#'@param minRecs what is the minimum number of records in a stratum before collapse
#'@param precision what precision is retained for cutting numeric values into strata
#'
#'
#'@return
#' makeStrata returns a data.frame of strata boundaries
#' assignStrata takes a data.frame and appends a column with the stratum name for each record
#'
#'@examples
#'
#'
#'
#' #prepare pseudo data
#' n=500
#' hts = sample(1:150,n,T)
#' dbhs = abs(hts/5 + rnorm(#' )*2)
#' dat_test = data.frame(height = hts , dbh = dbhs, height_cat = cut(hts,10) , dbh_cat = cut(dbhs,10))
#' plot(dbhs,hts)
#'
#' #example 1 factor and numeric
#' str_test = makeStrata(
#' dat_test
#' , x1="height_cat"
#' , x2="dbh"
#' , split_x1 =c("qt","eq")[1]
#' , split_x2 =c("qt","eq")[1]
#' , nest_x2 = T
#' , n1 = 10
#' , n2 = 10
#' , type_x1 = "factor"
#' , type_x2 = "numeric"
#' , minRecs = 7
#' , precision = 0
#' )
#'
#'
#' res = assignStrata(
#' str_test
#' ,dat_test
#' )
#'
#' res$stratum
#'
#' summary(lm(height ~ dbh , data = res))
#' summary(lm(height ~ dbh_cat , data = res))
#' summary(lm(height ~ stratum , data = res))
#'
#' #example 2 numeric and numeric
#' str_test1 = makeStrata(
#' dat_test
#' , x1="height"
#' , x2="dbh"
#' , split_x1 =c("qt","eq")[1]
#' , split_x2 =c("qt","eq")[1]
#' , nest_x2 = T
#' , n1 = 10
#' , n2 = 10
#' , type_x1 = "numeric"
#' , type_x2 = "numeric"
#' , minRecs = 7
#' , precision = 0
#' )
#'
#' res1 = assignStrata(
#' str_test1
#' ,dat_test
#' )
#'
#' #example 3 numeric and factor
#' str_test2 = makeStrata(
#' dat_test
#' , x1="height"
#' , x2="dbh_cat"
#' , split_x1 =c("qt","eq")[1]
#' , split_x2 =c("qt","eq")[1]
#' , nest_x2 = T
#' , n1 = 10
#' , n2 = 10
#' , type_x1 = "numeric"
#' , type_x2 = "factor"
#' , minRecs = 7
#' , precision = 0
#' )
#'
#' res2 = assignStrata(
#' str_test2
#' ,dat_test
#' )
#'
#'
#'
#'
#'@export
#
#@seealso \code{\link{another_function}}\cr \code{\link{yet_another_function}}\cr
#Desired upgrades to this function:
#
#
# x = function(x){}
#copy function arguments and use this code to format arguments
##writeClipboard(paste(gsub("^[[:space:]]*[,]*","#'@param ",gsub("=.*"," ?",readClipboard())),collapse="\n"))
#function takes a factor at the first level and then cuts based upon
# a numeric vector as the second level
makeStrata = function(
data
, x1="PlotStrata1"
, x2="Elev.P95"
, split_x1=c("qt","eq")
, split_x2=c("qt","eq")
, nest_x2 = T
, n1 = 10
, n2 = 10
, type_x1 = c("factor","numeric")[1]
, type_x2 = c("numeric","factor")[1]
, minRecs = 7
, precision = 0
, collapse=T
){
x1_in = x1
if( !type_x1 == "numeric"){
str_levels_x1 = levels(as.factor(data[,x1]))
dfStr0 = data.frame(stratum_x1 = str_levels_x1, x1.from=NA , x1.to=NA)
}
if( type_x1 == "numeric"){
dfStr0 = read.csv(text=paste("stratum_x1,x1.from,x1.to,n_x1"))
r_x1 = range(data[,x1])
dr_x1 = diff(r_x1)
if(split_x1[1] == "qt" ){
str_levels_x1 = c(
r_x1[1]-dr_x1
,quantile(data[,x1] ,seq(1/n1,1-1/n1,1/n1))
,r_x1[1]+dr_x1
)
}
if(split_x1[1] == "eq" ){
str_levels_x1 = c(
r_x1[1] - dr_x1
,seq(r_x1[1],r_x1[2],dr_x1/n1)[-c(1,n1)]
,r_x1[1]+ dr_x1
)
}
#prep description of x1 stratum
str_levels_x1 = unique(str_levels_x1)
str_levels_x1[-c(1,length(str_levels_x1))] = round(str_levels_x1[-c(1,length(str_levels_x1))],precision)
str_levels_x1 = unique(str_levels_x1)
#prepare new factor version of attribute
x1_in = paste(x1,"_ctg",sep="")
data[,x1_in] = cut(data[,x1] , str_levels_x1 , labels = 1:(length(str_levels_x1)-1) )
#assign bins
dfStr0[1:(length(str_levels_x1)-1),"stratum_x1"] = 1:(length(str_levels_x1)-1)
dfStr0[, "x1.from"] = str_levels_x1[-length(str_levels_x1)]
dfStr0[, "x1.to"] = str_levels_x1[-1]
#dfStr0[, "n_x1"] = table(data[,c(x1_in)])
}
if( nest_x2 & type_x2 == "numeric"){
spl1 = split(data,data[,x1_in])
if( type_x2 == "numeric"){
for(i in 1:length(spl1)){
dfStri = read.csv(text=paste("stratum,stratum_x1,stratum_x2,x2.from,x2.to"))
dati = spl1[[i]]
if(nrow(dati) / n2 < minRecs ) n2i = max(floor(nrow(dati) / minRecs),1)
else n2i = n2
ri = range(dati[,x2])
dri = diff(ri)
if(split_x2[1] == "qt" ){
str_levels_x2 = c(
ri[1]-dri
,quantile(dati[,x2] ,seq(1/n2i,1-1/n2i,1/n2i))
,ri[2]+dri
)
}
if(split_x2[1] == "eq" ){
str_levels_x2 = c(
ri[1]-dri
,seq(ri[1],ri[2],dri/n2i)[-c(1,n2i)]
,ri[2]+dri
)
}
#prep description of x2 stratum
str_levels_x2 = unique(str_levels_x2)
str_levels_x2[-c(1,length(str_levels_x2))] = round(str_levels_x2[-c(1,length(str_levels_x2))],precision)
str_levels_x2 = unique(str_levels_x2)
dfStri[1:(length(str_levels_x2)-1),"stratum_x2"] = 1:(length(str_levels_x2)-1)
dfStri[, "stratum_x1" ] = dati[1,x1_in]
dfStri[, "x2.from"] = str_levels_x2[-length(str_levels_x2)]
dfStri[, "x2.to"] = str_levels_x2[-1]
dfStri[,"stratum"] = apply(dfStri[,c("stratum_x1","stratum_x2")],1,paste,collapse=".")
df_ct = as.data.frame(table(as.factor(cut(dati[,x2], round(str_levels_x2,precision) , labels = F ))))
if(i == 1) dfStr1 = merge(x=dfStri,df_ct,by.x="stratum_x2",by.y="Var1",all=T)
if(i > 1) dfStr1 = plyr::rbind.fill(dfStr1, merge(x=dfStri,df_ct,by.x="stratum_x2",by.y="Var1",all=T))
}
}
}
if(!nest_x2 & type_x2 == "numeric"){
dfStr1 = read.csv(text=paste("stratum_x2,x2.from,x2.to,n_x2"))
r_x2 = range(data[,x2])
dr_x2 = diff(r_x2)
if(split_x2[1] == "qt" ){
str_levels_x2 = c(
r_x2[1]-dr_x2
,quantile(data[,x2] ,seq(1/n1,1-1/n1,1/n1))
,r_x2[1]+dr_x2
)
}
if(split_x2[1] == "eq" ){
str_levels_x2 = c(
r_x2[1] - dr_x2
,seq(r_x2[1],r_x2[2],dr_x2/n1)[-c(1,n1)]
,r_x2[1]+ dr_x2
)
}
#prep description of x2 stratum
str_levels_x2 = unique(str_levels_x2)
str_levels_x2[-c(1,length(str_levels_x2))] = round(str_levels_x2[-c(1,length(str_levels_x2))],precision)
str_levels_x2 = unique(str_levels_x2)
#prepare new factor version of attribute
x2_in = paste(x2,"_ctg",sep="")
data[,x2_in] = cut(data[,x2] , str_levels_x2 , labels = 1:(length(str_levels_x2)-1) )
#assign bins
str_levels_x1b = levels(as.factor(dfStr0[,"stratum_x1"]))
str_levels_x2b = levels(as.factor(data[,x2_in]))
dfStr1 = data.frame(stratum=NA,stratum_x1 = sort(rep(str_levels_x1b,length(str_levels_x2b))) , stratum_x2 = rep(str_levels_x2b , length(str_levels_x1b)) )
dfStr1[,"stratum"] = gsub(" ","",apply(dfStr1[,c("stratum_x1","stratum_x2")],1,paste,collapse="."))
dfStr1[, "x2.from"] = rep(str_levels_x2[-length(str_levels_x2)],length(str_levels_x1b))
dfStr1[, "x2.to"] = rep(str_levels_x2[-1],length(str_levels_x1b))
}
if( type_x2 == "factor"){
str_levels_x1b = levels(as.factor(dfStr0[,"stratum_x1"]))
str_levels_x2 = levels(as.factor(data[,x2]))
dfStr1 = data.frame(stratum=NA,stratum_x1 = sort(rep(str_levels_x1b,length(str_levels_x2))) , stratum_x2 = rep(str_levels_x2 , length(str_levels_x1b)) )
dfStr1[,"stratum"] = gsub(" ","",apply(dfStr1[,c("stratum_x1","stratum_x2")],1,paste,collapse="."))
dfStr1[,"x2.from"] = NA
dfStr1[,"x2.to"] = NA
}
#prep final data
dfStr2 = merge(x=dfStr0, dfStr1 , by = 'stratum_x1',all.y=T)
dfStr2[, "nm_x1"] = x1
dfStr2[, "nm_x2"] = x2
#collapse strata ?
if(collapse){
pdstrat_in = assignStrata(dfStr2,data[,c(x1,x2)])$stratum
tbtest = table(data$strat_in)
nmtest = names(tbtest)
missing_idx = !dfStr2$stratum %in% pdstrat_in
if( sum(missing_idx) > 0 ){
warning("currently 'collapse=T' uses a cheap fix - eventually add something smarter")
idx_bad = which(missing_idx)
idx_update = which(missing_idx) - 1
idx_update1 = which(missing_idx) + 1
idx_update[idx_update < 1] = idx_update1[idx_update < 1]
idx_update[idx_update %in% idx_bad] = idx_update1[idx_update %in% idx_bad]
dfStr2[idx_update,c("x1.to","x2.to")] = dfStr2[idx_bad,c("x1.to","x2.to")]
dfStr2 = dfStr2[-idx_bad,]
#eventually add something smarter:
if(type_x1 == "numeric" & type_x2 == "numeric"){
}
if(type_x1 != "numeric" & type_x2 == "numeric"){
}
if(type_x1 == "numeric" & type_x2 != "numeric"){
}
}
}
return( dfStr2[,c('stratum','stratum_x1','stratum_x2','x1.from','x1.to','x2.from','x2.to',"nm_x1","nm_x2")])
}
#'@export
assignStrata = function(strata,data){
x1numeric = !is.na(strata[1,"x1.from"])
x2numeric = !is.na(strata[1,"x2.from"])
for(i in 1:nrow(strata)){
#two numeric inputs
if(x1numeric & x2numeric){
id_i =
( data[,strata[i,"nm_x1"]] > strata[i,"x1.from"] ) &
( data[,strata[i,"nm_x1"]] <= strata[i,"x1.to"] ) &
( data[,strata[i,"nm_x2"]] > strata[i,"x2.from"] ) &
( data[,strata[i,"nm_x2"]] <= strata[i,"x2.to"] )
id_i[is.na(id_i)] = F
data[ id_i , "stratum" ] = strata[ i , "stratum" ]
}
#numeric x1, factor x2
if(x1numeric & !x2numeric){
id_i =
( data[,strata[i,"nm_x2"]] == strata[i,"stratum_x2"] ) &
( data[,strata[i,"nm_x1"]] > strata[i,"x1.from"] ) &
( data[,strata[i,"nm_x1"]] <= strata[i,"x1.to"] )
id_i[is.na(id_i)] = F
data[ id_i , "stratum" ] = strata[ i , "stratum" ]
}
#numeric x2, factor x1
if(!x1numeric & x2numeric){
id_i =
( data[,strata[i,"nm_x1"]] == strata[i,"stratum_x1"] ) &
( data[,strata[i,"nm_x2"]] > strata[i,"x2.from"] ) &
( data[,strata[i,"nm_x2"]] <= strata[i,"x2.to"] )
id_i[is.na(id_i)] = F
data[ id_i , "stratum" ] = strata[ i , "stratum" ]
}
}
data
}
if(F){
#stratify on vegetation type and height
n=500
hts = sample(1:150,n,T)
dbhs = abs(hts/5 + rnorm(n)*2)
dat_test = data.frame(height = hts , dbh = dbhs, height_cat = cut(hts,10) , dbh_cat = cut(dbhs,10))
plot(dbhs,hts)
str_test = makeStrata(
dat_test
, x1="height_cat"
, x2="dbh"
, split_x1 =c("qt","eq")[1]
, split_x2 =c("qt","eq")[1]
, nest_x2 = T
, n1 = 10
, n2 = 10
, type_x1 = "factor"
, type_x2 = "numeric"
, minRecs = 7
, precision = 0
)
res = assignStrata(
str_test
,dat_test
)
res$stratum
summary(lm(height ~ dbh , data = res))
summary(lm(height ~ dbh_cat , data = res))
summary(lm(height ~ stratum , data = res))
str_test1 = makeStrata(
dat_test
, x1="height"
, x2="dbh"
, split_x1 =c("qt","eq")[1]
, split_x2 =c("qt","eq")[1]
, nest_x2 = T
, n1 = 10
, n2 = 10
, type_x1 = "numeric"
, type_x2 = "numeric"
, minRecs = 7
, precision = 0
)
res1 = assignStrata(
str_test1
,dat_test
)
str_test2 = makeStrata(
dat_test
, x1="height"
, x2="dbh_cat"
, split_x1 =c("qt","eq")[1]
, split_x2 =c("qt","eq")[1]
, nest_x2 = T
, n1 = 10
, n2 = 10
, type_x1 = "numeric"
, type_x2 = "factor"
, minRecs = 7
, precision = 0
)
res2 = assignStrata(
str_test2
,dat_test
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.