# setup library(knitr, quietly=TRUE) # need to do this in order to access general-purpose functions library(soilReports, quietly=TRUE) # package options opts_knit$set(message=FALSE, warning=FALSE, verbose=FALSE, progress=FALSE) # chunk options opts_chunk$set(message=FALSE, warning=FALSE, background='#F7F7F7', fig.align='center', fig.retina=2, dev='png', antialias='cleartype', tidy=FALSE) # R session options options(width=100, stringsAsFactors=FALSE) # load required packages library(MASS, quietly=TRUE) library(plyr, quietly=TRUE) library(reshape2, quietly=TRUE) library(latticeExtra, quietly=TRUE) library(cluster, quietly=TRUE) library(clhs, quietly=TRUE) library(randomForest, quietly=TRUE) library(ggplot2, quietly = TRUE) library(kableExtra, quietly = TRUE) library(RColorBrewer, quietly = TRUE) ## load report-specific functions source('custom.R') ## load local configuration source('config.R')
# load raster samples load(prism.path) load(geomorphons.path) load(nlcd.path) load(soil.path) load(namrad.path) load(pop2015.path) # monthly data load(monthly.pet.path) load(monthly.ppt.path) # subset to requested MLRA mlra.prism.data <- subset(mlra.prism.data, subset=mlra %in% mu.set) pet.prism.data <- subset(pet.prism.data, subset=mlra %in% mu.set) ppt.prism.data <- subset(ppt.prism.data, subset=mlra %in% mu.set) mlra.geomorphons.data <- subset(mlra.geomorphons.data, subset=mlra %in% mu.set) mlra.nlcd.data <- subset(mlra.nlcd.data, subset=mlra %in% mu.set) mlra.soil.data <- subset(mlra.soil.data, subset=mlra %in% mu.set) mlra.namrad.data <- subset(mlra.namrad.data, subset=mlra %in% mu.set) mlra.pop2015.data <- subset(mlra.pop2015.data, subset=mlra %in% mu.set) ## post-processing # compute monthly PPT - PET wb.prism.data <- data.frame(mlra=ppt.prism.data$mlra, ppt.prism.data[, -1] - pet.prism.data[, -1]) # set factor levels to order in config.R mlra.prism.data$mlra <- factor(mlra.prism.data$mlra, levels=mu.set) ppt.prism.data$mlra <- factor(ppt.prism.data$mlra, levels=mu.set) pet.prism.data$mlra <- factor(pet.prism.data$mlra, levels=mu.set) wb.prism.data$mlra <- factor(wb.prism.data$mlra, levels=mu.set) mlra.geomorphons.data$mlra <- factor(mlra.geomorphons.data$mlra, levels=mu.set) mlra.nlcd.data$mlra <- factor(mlra.nlcd.data$mlra, levels=mu.set) mlra.soil.data$mlra <- factor(mlra.soil.data$mlra, levels=mu.set) mlra.namrad.data$mlra <- factor(mlra.namrad.data$mlra, levels=mu.set) mlra.pop2015.data$mlra <- factor(mlra.pop2015.data$mlra, levels=mu.set) # cast some data to long format for plotting mlra.prism.data.long <- melt(mlra.prism.data, id.vars = 'mlra') mlra.soil.data.long <- melt(mlra.soil.data, id.vars = 'mlra') mlra.namrad.data.long <- melt(mlra.namrad.data, id.vars = 'mlra') # convert monthly data into long format ppt.prism.data.long <- melt(ppt.prism.data, id.vars = 'mlra') pet.prism.data.long <- melt(pet.prism.data, id.vars = 'mlra') wb.prism.data.long <- melt(wb.prism.data, id.vars = 'mlra') # fix months: assuming correct ordering via column order levels(ppt.prism.data.long$variable) <- c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') levels(pet.prism.data.long$variable) <- c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') levels(wb.prism.data.long$variable) <- c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') # nice colors # 7 or fewer classes, use high-constrast colors if(length(mu.set) <= 7) { cols <- brewer.pal(9, 'Set1') # remove light colors cols <- cols[c(1:5,7,9)] } else { # otherwise, use 12 paired colors cols <- brewer.pal(12, 'Paired') } gc(reset = TRUE)
r paste(mu.set, collapse = ", ")
r .report.version
r format(Sys.time(), "%Y-%m-%d")
This report is designed to provide statistical summaries of the environmental properties for one or more MLRA Summaries are based on raster data extracted from fixed-density sampling of map unit polygons. Percentiles are used as robust metrics of distribution central tendency and spread.
Whiskers extend from the 5th to 95th percentiles, the body represents the 25th through 75th percentiles, and the dot is the 50th percentile.
Suggested usage:
tps <- list(box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), box.dot=list(cex=0.5), plot.symbol=list(col=rgb(0.1, 0.1, 0.1, alpha = 0.25, maxColorValue = 1), cex=0.25)) # NOTE: notches rely on effective sampling size bwplot(mlra ~ value | variable, data=mlra.prism.data.long, scales=list(y=list(alternating=3), x=list(relation='free', tick.number=10)), as.table=TRUE, col='black', strip=strip.custom(bg=grey(0.85)), xlab='', par.settings=tps, layout=c(1, length(unique(mlra.prism.data.long$variable))), panel=function(...) { # make a grid panel.grid(h=0, v=-1, col='grey', lty=3) panel.abline(h=1:length(unique(mlra.prism.data.long$mlra)), col='grey', lty=3) # boxplots panel.bwplot(..., stats=custom.bwplot) })
Aggregate soil properties developed from SSURGO/STATSGO at 800m resolution.
tps <- list(box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), box.dot=list(cex=0.5), plot.symbol=list(col=rgb(0.1, 0.1, 0.1, alpha = 0.25, maxColorValue = 1), cex=0.25)) # NOTE: notches rely on effective sampling size bwplot(mlra ~ value | variable, data=mlra.soil.data.long, scales=list(y=list(alternating=3), x=list(relation='free', tick.number=10)), as.table=TRUE, col='black', strip=strip.custom(bg=grey(0.85)), xlab='', par.settings=tps, layout=c(1, length(unique(mlra.soil.data.long$variable))), panel=function(...) { # make a grid panel.grid(h=0, v=-1, col='grey', lty=3) panel.abline(h=1:length(unique(mlra.soil.data.long$mlra)), col='grey', lty=3) # boxplots panel.bwplot(..., stats=custom.bwplot) })
tps <- list(box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), box.dot=list(cex=0.5), plot.symbol=list(col=rgb(0.1, 0.1, 0.1, alpha = 0.25, maxColorValue = 1), cex=0.25)) # NOTE: notches rely on effective sampling size bwplot(mlra ~ value | variable, data=mlra.namrad.data.long, scales=list(y=list(alternating=3), x=list(relation='free', tick.number=10)), as.table=TRUE, col='black', strip=strip.custom(bg=grey(0.85)), xlab='', par.settings=tps, layout=c(1, length(unique(mlra.namrad.data.long$variable))), panel=function(...) { # make a grid panel.grid(h=0, v=-1, col='grey', lty=3) panel.abline(h=1:length(unique(mlra.namrad.data.long$mlra)), col='grey', lty=3) # boxplots panel.bwplot(..., stats=custom.bwplot) })
tps <- list(box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), box.dot=list(cex=0.5), plot.symbol=list(col=rgb(0.1, 0.1, 0.1, alpha = 0.25, maxColorValue = 1), cex=0.25)) # NOTE: notches rely on effective sampling size bwplot(mlra ~ pop2015, data=mlra.pop2015.data, subset=pop2015 > 0, xlab='People / 1km Grid Cell\nNASA 2015', title='2015 Population Density', scales=list(alternating=3, x=list(log=10)), col='black', par.settings=tps, xscale.components=xscale.components.log10ticks, panel=function(...) { # make a grid panel.grid(h=0, v=-1, col='grey', lty=3) panel.abline(h=1:length(unique(mlra.pop2015.data$mlra)), col='grey', lty=3) # boxplots panel.bwplot(..., stats=custom.bwplot) })
These plots are a smooth alternative (denisty estimation) to the classic "binned" (histogram) approach to visualizing distributions. Peaks correspond to values that are most frequent within a data set. Each data set (ID / variable) are rescaled to {0,1} so that the y-axis can be interpreted as the "relative proportion of samples". Note that density estimates are constrained to the range defined by the 1--99 percentiles.
Suggested usage:
tps <- list(superpose.line=list(col=cols, lwd=2, lend=2)) # dynamic setting of columns in legend n.cols <- ifelse(length(mu.set) <= 4, length(mu.set), 5) ## TODO: this is simple syntax but very wasteful memory management # compute densities and re-scale to {0,1} density.plot.data <- ddply(mlra.prism.data.long, c('mlra', 'variable'), scaled.density) # split into list zzz <- split(density.plot.data, density.plot.data$variable) lapply(zzz, function(i) { xyplot(y ~ x | variable, groups=mlra, data=i, xlab='', ylab='Relative Proportion', scales=list(relation='free', x=list(tick.number=10), y=list(at=NULL)), plot.points=FALSE, strip=strip.custom(bg=grey(0.85)), as.table=TRUE, auto.key=list(lines=TRUE, points=FALSE, columns=n.cols), par.settings=tps, type=c('l','g')) }) # cleanup rm(density.plot.data, zzz)
Aggregate soil properties developed from SSURGO/STATSGO at 800m resolution.
tps <- list(superpose.line=list(col=cols, lwd=2, lend=2)) # dynamic setting of columns in legend n.cols <- ifelse(length(mu.set) <= 4, length(mu.set), 5) # compute densities and re-scale to {0,1} density.plot.data <- ddply(mlra.soil.data.long, c('mlra', 'variable'), scaled.density) # split into list zzz <- split(density.plot.data, density.plot.data$variable) lapply(zzz, function(i) { xyplot(y ~ x | variable, groups=mlra, data=i, xlab='', ylab='Relative Proportion', scales=list(relation='free', x=list(tick.number=10), y=list(at=NULL)), plot.points=FALSE, strip=strip.custom(bg=grey(0.85)), as.table=TRUE, auto.key=list(lines=TRUE, points=FALSE, columns=n.cols), par.settings=tps, type=c('l','g')) }) # cleanup rm(density.plot.data, zzz)
tps <- list(superpose.line=list(col=cols, lwd=2, lend=2)) # dynamic setting of columns in legend n.cols <- ifelse(length(mu.set) <= 4, length(mu.set), 5) # compute densities and re-scale to {0,1} density.plot.data <- ddply(mlra.namrad.data.long, c('mlra', 'variable'), scaled.density) # split into list zzz <- split(density.plot.data, density.plot.data$variable) lapply(zzz, function(i) { xyplot(y ~ x | variable, groups=mlra, data=i, xlab='', ylab='Relative Proportion', scales=list(relation='free', x=list(tick.number=10), y=list(at=NULL)), plot.points=FALSE, strip=strip.custom(bg=grey(0.85)), as.table=TRUE, auto.key=list(lines=TRUE, points=FALSE, columns=n.cols), par.settings=tps, type=c('l','g')) }) # cleanup rm(density.plot.data, zzz)
Median PPT vs. PET, bounded by 25th and 75th percentile.
# combine PPT and PET into the same long-format DF # this only works because data and structures are identical # # all(ppt.prism.data.long$mlra == pet.prism.data.long$mlra) # all(ppt.prism.data.long$variable == pet.prism.data.long$variable) g <- ppt.prism.data.long names(g)[3] <- 'PPT' g$PET <- pet.prism.data.long$value # compute univariate quantiles monthly.data <- ddply(g, c('mlra', 'variable'), .fun = plyr::summarise, PPT.q25=quantile(PPT, na.rm=TRUE, probs=0.25), PPT.q50=quantile(PPT, na.rm=TRUE, probs=0.5), PPT.q75=quantile(PPT, na.rm=TRUE, probs=0.75), PET.q25=quantile(PET, na.rm=TRUE, probs=0.25), PET.q50=quantile(PET, na.rm=TRUE, probs=0.5), PET.q75=quantile(PET, na.rm=TRUE, probs=0.75) ) tps <- list(superpose.line=list(col=cols, lwd=2, lend=2), superpose.symbol=list(pch=15, col=cols, cex=1)) # plot 2D median and IQR xyplot(PPT.q50 ~ PET.q50 | variable, data=monthly.data, groups=mlra, as.table=TRUE, scales=list(alternating=3, relation='free', tick.number=8, y=list(rot=0)), xlab='25-50-75th Percentiles PET (mm)', ylab='25-50-75th Percentiles PPT (mm)', par.settings=tps, strip=strip.custom(bg=grey(0.85)), auto.key=list(lines=TRUE, points=FALSE, title='MLRA', columns=length(mu.set)), prepanel=function(left=monthly.data$PET.q25, right=monthly.data$PET.q75, top=monthly.data$PPT.q75, bottom=monthly.data$PPT.q25, groups=groups, subscripts=subscripts, ...) { x <- c(left[subscripts], right[subscripts]) y <- c(bottom[subscripts], top[subscripts]) ord <- order(as.numeric(x)) dx <- diff(as.numeric(x[ord])) dy <- diff(as.numeric(y[ord])) list(xlim = lattice:::scale.limits(x), ylim = lattice:::scale.limits(y), dx = dx, dy = dy, xat = if (is.factor(x)) sort(unique(as.numeric(x))) else NULL, yat = if (is.factor(y)) sort(unique(as.numeric(y))) else NULL) }, panel=function(x=x, y=y, left=monthly.data$PET.q25, right=monthly.data$PET.q75, top=monthly.data$PPT.q75, bottom=monthly.data$PPT.q25, groups=groups, subscripts=subscripts, ...) { panel.grid(-1, -1) panel.xyplot(x, y, groups=groups, subscripts=subscripts, ...) d <- data.frame(groups=groups[subscripts], x=x, y=y, top=top[subscripts], bottom=bottom[subscripts], right=right[subscripts], left=left[subscripts]) s <- split(d, d$groups) grp.colors <- cols lapply(s, function(i) { # get current color color <- grp.colors[match(i$groups, levels(groups))] # PPT panel.segments(i$left, i$y, i$right, i$y, col=color, lwd=2) # PET panel.segments(i$x, i$bottom, i$x, i$top, col=color, lwd=2) }) }) rm(g, monthly.data) gc(reset = TRUE)
Modified box-whisker comparisons by month.
# ## bwplot side/side # h <- ggplot(ppt.prism.data.long, aes(variable, value)) # # h + geom_boxplot(aes(color=mlra), outlier.shape = NA) + # xlab('') + ylab('PPT (mm)') + # ylim(c(quantile(ppt.prism.data.long$value, probs = 0.05, na.rm = TRUE), quantile(ppt.prism.data.long$value, probs = 0.95, na.rm = TRUE))) + # scale_color_manual(values=alpha(cols, 0.75)) + # theme_bw() ## this is much simpler to look at h <- ggplot(ppt.prism.data.long, aes(mlra, value)) h + geom_boxplot(aes(color=mlra), outlier.shape = NA) + facet_wrap(vars(variable)) + xlab('') + ylab('PPT (mm)') + ylim(c(quantile(ppt.prism.data.long$value, probs = 0.05, na.rm = TRUE), quantile(ppt.prism.data.long$value, probs = 0.95, na.rm = TRUE))) + scale_color_manual(values=alpha(cols, 0.75)) + theme_bw() h <- ggplot(pet.prism.data.long, aes(mlra, value)) h + geom_boxplot(aes(color=mlra), outlier.shape = NA) + facet_wrap(vars(variable)) + xlab('') + ylab('PET (mm)') + ylim(c(quantile(pet.prism.data.long$value, probs = 0.05, na.rm = TRUE), quantile(pet.prism.data.long$value, probs = 0.95, na.rm = TRUE))) + scale_color_manual(values=alpha(cols, 0.75)) + theme_bw() h <- ggplot(wb.prism.data.long, aes(mlra, value)) h + geom_boxplot(aes(color=mlra), outlier.shape = NA) + facet_wrap(vars(variable), scales = 'free_y') + geom_abline(intercept=0, slope = 0, lty=2) + xlab('') + ylab('PPT - PET (mm)') + scale_color_manual(values=alpha(cols, 0.75)) + theme_bw()
Monthly inter-quartile range.
## time-series of IQR monthly.data <- ddply(ppt.prism.data.long, c('mlra', 'variable'), .fun = plyr::summarise, q25=quantile(value, na.rm=TRUE, probs=0.25), q50=quantile(value, na.rm=TRUE, probs=0.5), q75=quantile(value, na.rm=TRUE, probs=0.75) ) h <- ggplot(monthly.data, aes(x = variable, group=mlra)) h + geom_ribbon(aes(ymin = q25, ymax = q75, fill=mlra)) + geom_line(aes(variable, q25)) + geom_line(aes(variable, q75)) + geom_abline(intercept=0, slope = 0, lty=2) + xlab('') + ylab('PPT (mm)') + ggtitle('Monthly IQR') + scale_fill_manual(values=alpha(cols, 0.75)) + theme_bw() # ggplot(monthly.data, aes(x = variable, y = q50, group=mlra, color=mlra)) + # geom_line(size=1.25) + # xlab('') + ylab('PPT (mm)') + # ggtitle('Monthly Median') + # scale_color_manual(values=alpha(cols, 0.75)) + # # geom_dl(aes(label = mlra), method = list(dl.combine("first.points", "last.points"))) + # theme_bw() monthly.data <- ddply(pet.prism.data.long, c('mlra', 'variable'), .fun = plyr::summarise, q25=quantile(value, na.rm=TRUE, probs=0.25), q75=quantile(value, na.rm=TRUE, probs=0.75) ) h <- ggplot(monthly.data, aes(x = variable, group=mlra)) h + geom_ribbon(aes(ymin = q25, ymax = q75, fill=mlra)) + geom_line(aes(variable, q25)) + geom_line(aes(variable, q75)) + xlab('') + ylab('PET (mm)') + ggtitle('Monthly IQR') + geom_abline(intercept=0, slope = 0, lty=2) + scale_fill_manual(values=alpha(cols, 0.75)) + theme_bw() monthly.data <- ddply(wb.prism.data.long, c('mlra', 'variable'), .fun = plyr::summarise, q25=quantile(value, na.rm=TRUE, probs=0.25), q75=quantile(value, na.rm=TRUE, probs=0.75) ) h <- ggplot(monthly.data, aes(x = variable, group=mlra)) h + geom_ribbon(aes(ymin = q25, ymax = q75, fill=mlra)) + geom_line(aes(variable, q25)) + geom_line(aes(variable, q75)) + geom_abline(intercept=0, slope = 0, lty=2) + xlab('') + ylab('PPT - PET (mm)') + ggtitle('Monthly IQR') + scale_fill_manual(values=alpha(cols, 0.75)) + theme_bw()
Table of select percentiles, by variable. In these tables, headings like "Q5" can be interpreted as the the "5th percentile"; 5% of the data are less than this value. The 50th percentile ("Q50") is the median.
# summarize raster data for tabular output mu.stats <- ddply(mlra.prism.data.long, c('variable', 'mlra'), f.summary, p=p.quantiles) # print medians dg <- c(0, rep(2, times=length(unique(mu.stats$variable)))) mu.stats.wide <- dcast(mu.stats, mlra ~ variable, value.var = 'Q50') kable_styling( kable( mu.stats.wide, row.names=FALSE, caption = 'Median Values', align = 'r', digits=dg, col.names=c('MLRA', names(mu.stats.wide)[-1]) ), font_size = 12 )
# iterate over variables and print smaller tables # note: https://github.com/yihui/knitr/issues/886 l_ply(split(mu.stats, mu.stats$variable), function(i) { # remove variable column var.name <- unique(i$variable) i$variable <- NULL dg <- c(0, rep(2, times=length(p.quantiles)), 3) print( kable_styling( kable(i, caption = var.name, row.names=FALSE, align = 'r', digits=dg, col.names=c('MLRA', names(i)[-1])) , font_size = 12, full_width = FALSE) ) })
Proportion of samples within each map unit that correspond to 1 of 10 possible landform positions, as generated via geomorphon algorithm. Landform classification by this method is scale-invariant and is therefore not affected by computational window size selection.
Suggested usage:
# make some colors, and set style cols.geomorphons <- c('grey', brewer.pal(9, 'Spectral')) tps <- list(superpose.polygon=list(col=cols.geomorphons, lwd=2, lend=2)) # cast to wide format for clustering mlra.geomorphons.data.wide <- dcast(mlra.geomorphons.data, mlra ~ geomorphons, value.var = 'Freq') # clustering of proportions only works with >1 group if(length(unique(mlra.geomorphons.data$mlra)) > 1) { # cluster proportions x.d <- as.hclust(diana(daisy(mlra.geomorphons.data.wide[, -1]))) # re-order MU labels levels based on clustering mlra.geomorphons.data$mlra <- factor(mlra.geomorphons.data$mlra, levels=mlra.geomorphons.data.wide$mlra[x.d$order]) # musym are re-ordered according to clustering trellis.par.set(tps) barchart(mlra ~ Freq, groups=geomorphons, data=mlra.geomorphons.data, horiz=TRUE, stack=TRUE, xlab='Proportion of Samples', scales=list(cex=1.5), key=simpleKey(space='top', columns=5, text=levels(mlra.geomorphons.data$geomorphons), rectangles = TRUE, points=FALSE), legend=list(right=list(fun=dendrogramGrob, args=list(x = as.dendrogram(x.d), side="right", size=10)))) } else { trellis.par.set(tps) barchart(mlra ~ Freq, groups=geomorphons, data=mlra.geomorphons.data, horiz=TRUE, stack=TRUE, xlab='Proportion of Samples', scales=list(cex=1.5), key=simpleKey(space='top', columns=5, text=levels(mlra.geomorphons.data$geomorphons), rectangles = TRUE, points=FALSE)) }
# print and truncate to 2 decimal places kable_styling( kable(mlra.geomorphons.data.wide, digits = 3, caption = 'Geomorphon Proportions') , font_size = 12, full_width = FALSE )
These values are from the 2011 NLCD (30m) database.
# These are from the NLCD 2011 metadata nlcd.leg <- structure(list(ID = c(0L, 11L, 12L, 21L, 22L, 23L, 24L, 31L, 41L, 42L, 43L, 51L, 52L, 71L, 72L, 73L, 74L, 81L, 82L, 90L, 95L ), name = c("nodata", "Open Water", "Perennial Ice/Snow", "Developed, Open Space", "Developed, Low Intensity", "Developed, Medium Intensity", "Developed, High Intensity", "Barren Land (Rock/Sand/Clay)", "Deciduous Forest", "Evergreen Forest", "Mixed Forest", "Dwarf Scrub", "Shrub/Scrub", "Grassland/Herbaceous", "Sedge/Herbaceous", "Lichens", "Moss", "Pasture/Hay", "Cultivated Crops", "Woody Wetlands", "Emergent Herbaceous Wetlands"), col = c("#000000", "#476BA0", "#D1DDF9", "#DDC9C9", "#D89382", "#ED0000", "#AA0000", "#B2ADA3", "#68AA63", "#1C6330", "#B5C98E", "#A58C30", "#CCBA7C", "#E2E2C1", "#C9C977", "#99C147", "#77AD93", "#DBD83D", "#AA7028", "#BAD8EA", "#70A3BA")), .Names = c("ID", "name", "col"), row.names = c(NA, -21L), class = "data.frame") # These are from the NLCD 2011 metadata # get colors for only those classes in this data cols.nlcd.classes <- nlcd.leg$col[match(levels(mlra.nlcd.data$nlcd), nlcd.leg$name)] tps <- list(superpose.polygon=list(col=cols.nlcd.classes, lwd=2, lend=2)) # no re-ordering of musym trellis.par.set(tps) barchart(mlra ~ Freq, groups=nlcd, data=mlra.nlcd.data, horiz=TRUE, stack=TRUE, xlab='Proportion of Samples', scales=list(cex=1.5), key=simpleKey(space='top', columns=3, text=levels(mlra.nlcd.data$nlcd), rectangles = TRUE, points=FALSE))
# print and truncate to 2 decimal places mlra.nlcd.data.wide <- dcast(mlra.nlcd.data, mlra ~ nlcd, value.var = 'Freq') kable_styling( kable(mlra.nlcd.data.wide, digits = 2, caption = 'Landcover Proportions') , font_size = 10, full_width = FALSE )
The following "ordination" summarizes environmental variables by MLRA. The flattening of multivariate data (16 dimensions) onto an optimal 2D projection is performed using principal coordinates. Ellipses represent 50% probability contours via multivariate homogeneity of group dispersions. MLRA delineations with more than 1,000 samples are (sub-sampled via cLHS). MLRA with very low variation in environmental variables can result in tightly clustered points in the ordination. See this chapter, from the new Statistics for Soil Scientists NEDS course, for an soils-specific introduction to these concepts.
Suggested usage:
## NOTES: # 1. this section will fail if there are NA in the samples: this is usually caused by raster extent not covering MU extent ## TODO: # 1. combine median of continuous, geomorphons proportions, and NLCD proportions for dendrogram # join soil + PRISM annual + limited PRISM monthly data # Jan, June, Oct PPT and PET # cbind() works because data structures are identical mlra.data <- cbind(mlra.prism.data, mlra.soil.data[, -1], ppt.prism.data[, c(2, 7, 11)], pet.prism.data[, c(2, 7, 11)]) # find variables with low SD, by group # leave-out ID vars mlra.data.vars <- findSafeVars(mlra.data, id = 'mlra') ## NOTE: this should be a lot faster with C++ clhs # only sub-sample if there are "a lot" of samples if(nrow(mlra.data) > 1000) { # sub-sample via LHS: this takes time # first column is ID # n: this is the number of sub-samples / map unit # non.id.vars: this is an index to non-ID columns d.sub <- ddply(mlra.data, 'mlra', cLHS_subset, n=50, non.id.vars=mlra.data.vars) } else { d.sub <- mlra.data } # remove NA d.sub <- na.omit(d.sub) ## NOTE: data with very low variability will cause warnings # eval numerical distance, removing ID columns d.dist <- daisy(d.sub[, mlra.data.vars], stand=TRUE) ## map distance matrix to 2D space via principal coordinates d.betadisper <- vegan::betadisper(d.dist, group=d.sub$mlra, bias.adjust = TRUE, sqrt.dist = FALSE, type='median') d.scores <- vegan::scores(d.betadisper) sub.text <- sprintf('MLRA %s', paste(mu.set, collapse = ', ')) # plot plot( d.betadisper, hull=FALSE, ellipse=TRUE, conf=0.5, col=cols, main='Ordination of Raster Samples\n50% Probability Ellipse', sub = sub.text, las = 1, xlab = '', ylab= '' )
Pair-wise comparisons at the 90% level of confidence.
## pair-wise comparisons of variance par(mar=c(4.5, 5.5, 4.5, 1)) plot(TukeyHSD(d.betadisper, conf.level = 0.9), las=1)
The following figure highlights shared information among raster data sources based on Spearman's Ranked Correlation coefficient. Branch height is associated with the degree of shared information between raster data.
Suggested usage:
par(mar=c(2,5,2,2)) ## note that we don't load the Hmisc package as it causes many NAMESPACE conflicts ## This requires 3 or more variables if(ncol(d.sub[, mlra.data.vars]) > 3) { try(plot(Hmisc::varclus(as.matrix(d.sub[, mlra.data.vars]))), silent=TRUE) } else print('This plot requires three or more raster variables, apart from aspect, curvature class, and geomorphons.')
The following figure ranks raster data sources in terms of how accurately each can be used to discriminate between map unit concepts.
Suggested usage:
# this will only work with >= 2 map units if(length(levels(d.sub$mlra)) >= 2) { # use supervised classification to empirically determine the relative importance of each raster layer # TODO: include geomorphons and curvature classes # TODO: consider using party::cforest() for conditional variable importance-- varimp m <- randomForest(x=d.sub[, mlra.data.vars], y=d.sub$mlra, importance = TRUE) # variable importance # TODO: how to interpret raw output from importance: # http://stats.stackexchange.com/questions/164569/interpreting-output-of-importance-of-a-random-forest-object-in-r/164585#164585 varImpPlot(m, scale=TRUE, type=1, main='Mean Decrease in Accuracy') # kable(importance(m, scale=FALSE, type=2), digits = 3) ## TODO: join with output SHP via sid } else { # print message about not enough map unit s print('This plot requires two or more map units.') }
Report configuration and source code are hosted on GitHub.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.