inst/reports/southwest/shiny-pedon-summary/util.R

#util.R

loadReportData <- function() {
  if(!loaded) {
    if(!any(file.exists(c("pedon_cache.Rda"))) | !cache_data) {
      
      if(!inherits(pedons_raw, 'try-error')) {
        
        if(!inherits(rasters, 'try-error') && length(rasters) > 0) {
          
          # transform to raster coordinate reference 
          #  system (assumes common between all rasters)
          pedons_sf <- sf::st_transform(pedons_sf, sf::st_crs(rasters[[1]]))
          
          # extract from raster stack at points
          l.res <- lapply(lapply(rasters[1:2], terra::extract, pedons_sf), function(x) x[, 2])
          
          # attach raster data to site-level data of SPC
          l.res <- as.data.frame(l.res, stringsAsFactors=FALSE)
          l.res$peiid <- pedons_sf$peiid
          
          site(pedons) <- l.res
          
          if(!is.null(pedons$gis_geomorphons)) {
            # assign geomorphon labels
            geomorphon.labels <- c('flat', 'summit', 'ridge', 'shoulder', 
                                   'spur', 'slope', 'hollow', 'footslope',
                                   'valley', 'depression')
            pedons$gis_geomorphons <- factor(pedons$gis_geomorphons, 
                                             levels=1:10, 
                                             labels=geomorphon.labels)
          }
        }
      }
      
      if(cache_data) {
        #if there was no cache but caching is on, save the data for next time
        save(pedons,file = "pedon_cache.Rda") 
      }
      
    } else if (file.exists("pedon_cache.Rda")) { 
      #if there is a cache...
      if(cache_data) { 
        
        # and caching is ON, load the data from file rather than NASIS
        load("pedon_cache.Rda")
        print("Loaded data from cache.")
        
      } else { 
        
        # and caching is OFF, make it a backup 
        file.rename(from = "pedon_cache.Rda",
                    to = "pedon_cache.Rda.bak")
      }
    }
    loaded <<- TRUE
  }
  return(list(pedons=pedons, pedons_sf=pedons_sf))
}


getPedonsByPattern <- function(input, s.pedons, musym, 
                               compname, upid,
                               pedon_list, taxon_kind,
                               phasename) {
  
  if(!is.null(isolate(input))) {
    #print(isolate(input))
    #print(paste0("musym:",musym))
    if(is.null(musym))
      musym <- ".*"
    musymmatch <- grepl(pattern=musym, s.pedons$musym)
    #print(musymmatch)
    
    if(is.null(upid))
      upid <- ".*"
    upidmatch <- grepl(pattern=upid, s.pedons$upedonid)
    
    taxname <- as.character(s.pedons$taxonname)
    taxname[is.na(s.pedons$taxonname)]  <- ""
    
    if(is.null(compname) | !length(compname))
      compname <- ".*"
    compmatch <- grepl(pattern=compname, taxname)
    
    localphase <- as.character(s.pedons$localphase)
    localphase[is.na(s.pedons$localphase)]  <- ""
    
    if(is.null(phasename))
      phasename <- ".*"
    phasematch <- grepl(pattern=phasename, localphase)
    
    if(is.null(taxon_kind))
      taxon_kind <- ".*"
    if(taxon_kind == "any")
      taxon_kind = ".*"
    taxkindmatch <- grepl(pattern=taxon_kind, s.pedons$taxonkind)
    
    # idx.match <- rep(TRUE, length(s.pedons$upedonid))
    # # If there has been a list of pedons specified, 
    # #  use that in lieu of all other patterns
    # if(pedon_list != "." & pedon_list != "") { 
    #   
    #   #TODO: use regex and handle whitespace before or after comma in list?
    #   plist <- strsplit(pedon_list,",",fixed=T) 
    #   
    #   if(length(plist[[1]]) >= 1) {
    #     # length should always be 1 or more?
    #     idx.match <- (s.pedons$upedonid %in% plist[[1]]) 
    #     # should this allow for regex too? 
    #   }
    #} else {
      idx.match <- which(musymmatch & upidmatch & compmatch & phasematch & taxkindmatch)
    #}
    
    if(inherits(s.pedons, 'SoilProfileCollection'))
      peds <- s.pedons[idx.match,] 
    else stop("not a spc")
    
      # TODO: assign ghz
    return(peds)
  } else {
    return(numeric(0))
  }
}

diaghzplot2 <- function (f, v, grid.label = "upedonid") 
{
  id <- idname(f)
  s <- site(f)
  v <- names(s)[na.omit(match(v, names(s)))]
  m <- s[, v]
  m <- as.data.frame(lapply(m, factor, levels = c("FALSE", "TRUE")))
  m.plot <- t(as.matrix(as.data.frame(lapply(m, as.numeric))))
  n.vars <- ncol(m)
  n.profiles <- nrow(m)
  if(n.vars > 0 & n.profiles > 0) {
    par(mar = c(1, 6, 6, 1))
    image(x = 1:n.vars, y = 1:n.profiles, z = m.plot, axes = FALSE, col = c(grey(0.9), "RoyalBlue"), 
          xlab = "", ylab = "", ylim = c(0.5, n.profiles + 0.5))
    axis(side = 2, at = 1:n.profiles, labels = s[[grid.label]], 
         las = 1, cex.axis = 1, tick = FALSE)
    axis(side = 3, at = 1:n.vars, labels = v, las = 2, 
         cex.axis = 1)
    abline(h = 1:(n.profiles + 1) - 0.5)
    abline(v = 1:(n.vars + 1) - 0.5)
  }
}


#########Dylan functions

tps.standard <- list(plot.symbol=list(col=1, cex=1, pch=1), 
                     plot.line=list(col=1, lwd=2, alpha=0.75), 
                     strip.background=list(col=grey(c(0.85,0.75))), 
                     layout.heights=list(strip=1.5),
                     box.umbrella=list(col=1, lty=1), 
                     box.rectangle=list(col=1), 
                     box.dot=list(col=1, cex=0.5)
)


##
## functions 
##


# return a table of proportions, including missing data, along with sample size as data.frame
# 'x' must be a factor with levels set in logical order
categorical.prop.table <- function(x, digits=2) {
  # table of proportions, including missing data, with rounding
  x.table <- round(prop.table(table(x, useNA='always')), digits)
  # convert to data.frame for pretty-printing
  x.table.df <- data.frame(t(as.numeric(x.table)), stringsAsFactors=FALSE)
  # transfer names
  names(x.table.df) <- names(x.table)
  # fix NA heading, always the last item
  names(x.table.df)[ncol(x.table.df)] <- 'No.Data'
  # add number of pedons
  x.table.df$N.pedons <- length(x)
  # done
  return(x.table.df)
}


## TODO: finish this function
# generate dendrogram or Sammon mapping plot and color by musym
dend.by.musym <- function(f.i, v=c('clay','total_frags_pct')) {
  
  # lower depth for comparison
  max.depth.dissimilarity <- max(f.i, 'clay')
  if(is.na(max.depth.dissimilarity))
    max.depth.dissimilarity <- 200
  
  # if we have more than 3 profiles, eval dissimilarity
  if(length(f.i) > 3) {
    ## evaluate this: would make more sense to include site-level data too
    # generate between-profile dissimilarity for plot-order
    # note that this breaks when we include profiles without any data
    # generate an index to non-R|Cr|Cd horizons
    filter.idx <- grep('R|Cr|Cd', f.i$genhz, invert=TRUE)
    d <- profile_compare(f.i, vars=v, max_d=max.depth.dissimilarity, k=0, filter=filter.idx, rescale.result=TRUE)
    
    # reduce to 2D: fudge 0-distances by adding a little bit
    s <- isoMDS(d+0.001)
    # generate plotting order as Euclidean distance to origin
    d.order <- order(sqrt(rowSums(sweep(s$points, MARGIN=2, STATS=c(0, 0), FUN='-')^2)))
  }
  
}


## TODO: which quantile "type" is most appropriate?
##    see: http://stackoverflow.com/questions/95007/explain-the-quantile-function-in-r
##    test with: x <- sample(1:100, size=20, replace=TRUE) ; round(t(sapply(1:9, function( i ) quantile(x, type=i))), 2)
##    type = 1: standard interpretation, no interpolation
##    type = 7: (default)
## TODO: this cannot be applied to circular data!
##  data and return as L-RV-H
l.rv.h.summary <- function(i, p=getOption('p.low.rv.high'), qt=getOption('q.type'), sep='-') {
  q <- round(quantile(i, probs=p, na.rm=TRUE, type=qt))
  paste(q, collapse='-')
}

## TODO: which quantile "type" is most appropriate?
##    see: http://stackoverflow.com/questions/95007/explain-the-quantile-function-in-r
##    test with: x <- sample(1:100, size=20, replace=TRUE) ; round(t(sapply(1:9, function( i ) quantile(x, type=i))), 2)
##    type = 1: standard interpretation, no interpolation
##    type = 7: (default)
#  data by generalized horizon
# note: conditional evaluation of rounding: phfield rounded to 1 dec. place, 
# all others to 0 dec. places
conditional.l.rv.h.summary <- function(x, p=getOption('p.low.rv.high'), qt=getOption('q.type')) {
  precision.table <- c(1, 2)
  precision.vars <- c('phfield', 'albedo')
  variable <- unique(x$variable)
  v <- na.omit(x$value) # extract column, from long-formatted input data
  n <- length(v) # get length of actual data
  ci <- quantile(v, na.rm=TRUE, probs=p, type=qt)
  mn <- suppressWarnings(min(v, na.rm=TRUE))
  mx <- suppressWarnings(max(v, na.rm=TRUE))
  low <- ci[1] # low
  rv <- ci[2] #  median
  high <- ci[3] # high
  precision <- if(variable %in% precision.vars) precision.table[match(variable, precision.vars)] else 0
  s <- round(c(mn, low, rv, high, mx), precision)
  # combine essentials into DF
  d <- data.frame(n=n, min=mn, max=mx, low=low, rv=rv, high=high, stringsAsFactors=FALSE) 
  
  # add 'range' column for pretty-printing
  
  #d$range <- with(d, paste0('<div style="width=100%;"><span style="font-size:70%; width:10%; padding:2px">' , s[1], '</span>','<span style="font-size:90%; width:15%; padding:2px">' , s[2], '</span>', '<span style="font-size:100%; width:25%; padding:2px">', s[3], '</span><span style="font-size:90%; width:15%; padding:2px">', s[4], '</span><span style="font-size:70%; width:10%; padding:2px">', s[5], '</span>')) 
  # add the number of data points
  #d$range <- paste0(d$range, '<br><span style="width: 25%;">(', n, ')</span><div>')
  
  d$range <- paste0(s[1],"-",s[2],"-",s[3],"-",s[4],"-",s[5],"(",d$n,")")
  
  # remove NA, Inf, and -Inf
  d$range <- gsub(pattern='NA', replacement='*', x=d$range, fixed=TRUE)
  d$range <- gsub(pattern='-Inf', replacement='*', x=d$range, fixed=TRUE)
  d$range <- gsub(pattern='Inf', replacement='*', x=d$range, fixed=TRUE)
  return(d)
}

## TODO integrate into single summary -> L-RV-H function
diagnostic.hz.summary <- function(x, p, qt=7) {
  # get number of records involved
  n <- nrow(x)
  
  # get low,RV,high values for diagnostic hz boundaries
  f.top <- quantile(x$featdept, probs=p, na.rm=TRUE, type=qt)
  f.bottom <- quantile(x$featdepb, probs=p, na.rm=TRUE, type=qt)
  f.thick <- quantile(x$featdepb - x$featdept, probs=p, na.rm=TRUE, type=qt)
  
  # convert into low-RV-high notation
  r.top <- paste(round(f.top), collapse='-')
  r.bottom <- paste(round(f.bottom), collapse='-')
  r.thick <- paste(round(f.thick), collapse='-')
  
  # combine into DF and return
  d <- data.frame(n=n, top=r.top, bottom=r.bottom, thick=r.thick, stringsAsFactors=FALSE)
  return(d)
}


# from ?toupper
capwords <- function(s, strict = FALSE) {
  cap <- function(s) paste(toupper(substring(s,1,1)),
                           {s <- substring(s,2); if(strict) tolower(s) else s},
                           sep = "", collapse = " " )
  sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}

## custom bwplot function: note that we are altering the interpretation of a bwplot!
## "outliers" removed
custom.bwplot <- function(x, coef=1.5, do.out=FALSE) {
  stats <- quantile(x, p=c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm=TRUE)
  n <- length(na.omit(x))
  # out.low <- x[which(x < stats[1])]
  # out.high <- x[which(x > stats[5])]
  return(list(stats=stats, n=n, conf=NA, out=c(NA, NA)))
}


## summarise surface frags
## TODO: low-rv-high values are hard-coded!
summarise.pedon.surface.frags <-  function(d) {
  
  # compute low-rv-high values... result is L-RV-H
  vars <- c('surface_gravel', 'surface_cobbles', 'surface_stones', 'surface_boulders', 'surface_channers', 'surface_flagstones', 'surface_paragravel', 'surface_paracobbles')
  
  d.summary <- colwise(l.rv.h.summary)(d[, vars])
  # done
  return(d.summary)
}


## summarise GIS data embedded in our SPC (d) as site-level data
## TODO: low-rv-high values are hard-coded!
## TODO: proper summary of aspect data
summarise.pedon.gis.data <- function(d) {
  
  # keep only those variables with gis_* prefix, 'slope_field'
  nm <- names(d)
  nm.idx <- grep('gis_', nm, ignore.case=TRUE)
  v <- c('slope_field', nm[nm.idx])
  # note: drop=FALSE will not convert to vector when gis_ vars are missing
  d <- d[, v, drop=FALSE]
  
  # TODO: this is a hack remove gis_aspect and gis_geomorphons if present
  d$gis_aspect <- NULL
  d$gis_geomorphons <- NULL
  
  # compute low-rv-high values, by column
  # result is L-RV-H
  d.summary <- colwise(l.rv.h.summary)(d)
  
  # done
  return(d.summary)
}


##  texture class
summarize.texture.class <- function(i) {
  # # texcl or inlieu -- no modifiers
  # toi <- i$texcl
  # toi.idx <- is.na(toi) & !is.na(i$lieutex)
  # toi[toi.idx] <- i$lieutex[toi.idx] 
  tt <- sort(round(prop.table(table(i$texture)), 2), decreasing=TRUE)
  tt.formatted <- paste(paste(toupper(names(tt)), ' (', tt, ')', sep=''), collapse=', ' )
  
  # return blank when there are no values
  if(length(tt) == 0)
    return('')
  
  return(tt.formatted)
}


# f.i: subset of the SPC containing only pedons to aggregate
# comp: the name of the current component
summarize.component <- function(f.i) {
  ## TODO: this is wasteful, as we only need 'upedonid' from @site
  # extract horizon+site as data.frame
  h.i <- as(f.i, 'data.frame')
  
  # add a new column: horizon thickness
  h.i$thick <- with(h.i, hzdepb - hzdept) 
  
  # convert dry Munsell Hue into Albedo
  # Soil Sci. Soc. Am. J. 64:1027-1034 (2000)
  h.i$albedo <- (0.069 * h.i$d_value) - 0.114
  
  # extract site data
  site.i <- site(f.i)
  
  ## pedon GIS summaries
  pedon.gis.table <- summarise.pedon.gis.data(site.i)
  
  ## surface fragment summary
  pedon.surface.frags.table <- summarise.pedon.surface.frags(site.i)
  
  #make not-used placeholder values na
  h.i$genhz[h.i$genhz == 'not-used'] <- NA
  
  # ## check for missing genhz labels by pedon
  missing.all.genhz.IDs <- ddply(h.i, 'upedonid', function(i) all(is.na(i$genhz)))
  missing.some.genhz.IDs <- ddply(h.i, 'upedonid', function(i) any(is.na(i$genhz)))
  missing.genhz.IDs <- join(missing.all.genhz.IDs, missing.some.genhz.IDs, by='upedonid')
  names(missing.genhz.IDs) <- c('upedonid', 'missing.all', 'missing.some')
  
  # determine type of missing data
  missing.genhz.IDs$missing.genhz <- apply(missing.genhz.IDs[, -1], 1, function(i) {
    if(all(is.na(i)))
      return('<span style="color:red; font-weight:bold;">all</span>')
    
    if(any(i) & !all(i))
      return('<span style="color:yellow; font-weight:bold;">some</span>')
    
    if(all(i))
      return('<span style="color:red; font-weight:bold;">all</span>')
    
    return('<span style="color:green;">none</span>')
  })

  ## check generalized hz classification
  gen.hz.classification.table <- table(h.i$genhz, h.i$hzname)
  
  # variables to summarize
  vars <- c('claytotest', 'sandtotest', 'phfield', 'gravel', 'paragravel', 'cobbles', 'paracobbles', 'stones', 'channers', 'total_frags_pct', 'albedo')
  # better names, used in final tables / figures
  var.names <- c('HZ', 'claytotest', 'sandtotest', 'pH', 'GR', 'PGR', 'CB', 'PCB', 'ST', 'CN', 'Total RF', 'Albedo')
  
  
  # wide -> long, inclide musym for plotting
  h.i.long <- melt(h.i, id.vars=c('genhz', 'musym'), measure.vars=vars)
  
  # drop values not associated with a generalized horizon label
  h.i.long <- subset(h.i.long, subset=!is.na(genhz))
  h.i.long <- subset(h.i.long, subset=!(genhz=='not-used'))
  
  # summary by variable / generalized hz label
  s.i <- ddply(h.i.long, c('variable', 'genhz'), .fun=conditional.l.rv.h.summary, p=getOption('p.low.rv.high'), qt=getOption('q.type'))
  
  ## tables
  # long -> wide
  s.i$genhz <- factor(s.i$genhz, levels=guessGenHzLevels(f.i)$levels)
  prop.by.genhz.table <- dcast(s.i, genhz ~ variable, value.var='range')
  names(prop.by.genhz.table) <- var.names
  
  # texture class tables
  texture.table <- ddply(h.i, c('genhz'), summarize.texture.class)
  names(texture.table) <- c('Generalized HZ', 'Texture Classes')
  
  # diagnostic hz tables
  diag.hz.table <- data.frame(t(c(1,1,1,1,1)))[0,]
  if(nrow(diagnostic_hz(f.i)))
    diag.hz.table <- ddply(diagnostic_hz(f.i), c('featkind'), .fun=diagnostic.hz.summary, p=getOption('p.low.rv.high'), qt=getOption('q.type'))
  names(diag.hz.table) <- c('kind', 'N', 'top', 'bottom', 'thick')
  
  ## ML-horizonation
  # aggregate
  
  gen.hz.aggregate <- slab(f.i, ~ genhz, cpm=1)
  
  # extract original horizon designations and order
  original.levels <- attr(gen.hz.aggregate, 'original.levels')
  
  #drop not-used level
  #original.levels <- original.levels[original.levels != 'not-used']
  
  # melt into long format, accomodating illegal column names (e.g. 2Bt)
  gen.hz.aggregate.long <- melt(gen.hz.aggregate, id.vars='top', measure.vars=make.names(original.levels))
  
  # replace corrupted horizon designations with original values
  gen.hz.aggregate.long$variable <- factor(gen.hz.aggregate.long$variable, 
                                           levels=levels(gen.hz.aggregate.long$variable), 
                                           labels=original.levels)
  
  # replace very small probabilities with NA
  gen.hz.aggregate.long$value[gen.hz.aggregate.long$value < 0.001] <- NA
  
  # remove NA probabilities
  gen.hz.aggregate.long <- na.omit(gen.hz.aggregate.long)
  
  
  ## EXPERIMENTAL: smooth horizon probability depth functions for simpler interpretation
  ## a smoothing parameter of 0.65 seens to be a good compromise
  ## this can create over-shoots
  ## ideally we would use a PO-model for this
  ## note: smoothing is only possible >= 4 unique values, in this case no smoothing is applied
  gen.hz.aggregate.long <- ddply(gen.hz.aggregate.long, 'variable', function(i) {
    if(length(unique(i$value)) >= 4)
      s <- smooth.spline(i$value, spar=getOption('ml.profile.smoothing'), keep.data=FALSE)$y
    else
      s <- i$value
    # combine into a data.frame
    d <- data.frame(top=i$top, value=s)
    return(d)
  } )
  
  
  # generate ML hz table
  gen.hz.ml <- get.ml.hz(gen.hz.aggregate, original.levels)
  
  # combine all genhz levels with those in the ML hz table
  gzml <- data.frame(hz=factor(original.levels, levels=original.levels))	
  gzml <- join(gzml, gen.hz.ml, by='hz')
  
  # paste together columns, into the format: genhz top-bottom [pseudo.brier]
  labels.for.plot <- with(gzml, paste0(hz, ' ', top, '-', bottom, ' [', round(pseudo.brier, 3), ']'))
  labels.for.plot <- gsub(pattern='NA', replacement='*', labels.for.plot)
  
  # reset factor labels, using data from the ML hz table
  gen.hz.aggregate.long$variable <- factor(gen.hz.aggregate.long$variable, levels=original.levels, labels=labels.for.plot)
  
  # get a reasonable lower depth for the plot
  y.max <- pmax(155, max(f.i, 'clay'))
  
  # plot genhz probabilities vs. depth
  gen.hz.aggregate.plot <- xyplot(top ~ value, groups=variable, data=gen.hz.aggregate.long, type=c('l'), ylim=c(y.max, -5), xlim=c(-0.1, 1.1), auto.key=list(cex=1, space='right', columns=1, padding.text=3, points=FALSE, lines=TRUE), ylab='Depth (cm)', xlab='Horizon Probability', scales=list(alternating=3, y=list(tick.number=15)), panel=function(...) {
    panel.abline(v=seq(0, 1, by=0.1), h=seq(0, 150, by=10), col=gray(0.75), lty=3)
    panel.xyplot(...)
  })
  
  ## properties by musym / genhz
  # throw-out non-soil horizons
  h.i.long.sub <- h.i.long[grep('R|Cr|Cd', h.i.long$genhz, ignore.case=TRUE, invert=TRUE), ]
  
  # bwplot
  prop.by.musym.and.genhz <- bwplot(musym ~ value | variable + genhz, data=h.i.long.sub, as.table=TRUE, xlab='', scales=list(x=list(relation='free')), drop.unused.levels=TRUE, par.settings=tps.standard, stats=custom.bwplot, par.strip.text=list(cex=0.75), panel=function(...) {
    panel.grid(-1, -1)
    panel.bwplot(...)
  })
  
  # convert second paneling dimension to outer strips
  prop.by.musym.and.genhz <- useOuterStrips(prop.by.musym.and.genhz)
  
  # pack into list and return
  return(list(n=length(f.i), pg=pedon.gis.table, mgz=missing.genhz.IDs, ct=gen.hz.classification.table, rt=prop.by.genhz.table, dt=diag.hz.table, tt=texture.table, ml.hz=gen.hz.ml, ml.hz.plot=gen.hz.aggregate.plot, sf=pedon.surface.frags.table, pmg=prop.by.musym.and.genhz))
}
ncss-tech/soilReports documentation built on March 6, 2025, 1:15 a.m.