Nothing
.get_phlabresults_data_from_NASIS_db <- function(SS=TRUE, dsn = NULL) {
# hacks to make R CMD check --as-cran happy:
sampledepthbottom <- NULL
sampledepthtop <- NULL
phiidref <- NULL
# test_ph <- NULL
q <- "SELECT peiidref AS peiid, phiid, phl.seqnum, sampledepthtop, sampledepthbottom, sampleid, datacollector, claytotmeasured, claycarbmeasured, silttotmeasured, siltfinemeasured, siltcomeasured, sandtotmeasured, sandtotmethod, sandvcmeasured, sandcomeasured, sandmedmeasured, sandfinemeasured, sandvfmeasured, sandvfmethod, textureclfieldlab, fiberrubbedpct, fiberunrubbedpct, ph1to1h2o, ph01mcacl2, phnaf, phoxidized, phdeltah2o2, liquidlimitmeasured, plasticlimitmeasured, pi, atterbergsampcond, cole, esttotpotacidityetpa, camgmeh2, potassiummeh2, camgsatpaste, extractaciditykcl, basesatmeh2, cec7, cec82, ecec, phosphatephos, nitratenitrogen, ecmeasured, ecdeterminemeth, ec15, caco3equivmeasured, gypsumequiv, sodium, sar, gypsumreq, humiccolor, fulviccolor, humicfulviccolor, alummeasured, pyrophoshue, pyrophosvalue, pyrophoschroma, melanicindex
FROM
phorizon_View_1 ph
LEFT OUTER JOIN phlabresults_View_1 phl on phl.phiidref = ph.phiid
ORDER BY peiidref, phiid, sampledepthtop;"
channel <- dbConnectNASIS(dsn)
if (inherits(channel, 'try-error'))
return(data.frame())
# toggle selected set vs. local DB
if (SS == FALSE) {
q <- gsub(pattern = '_View_1', replacement = '', x = q, fixed = TRUE)
}
# exec query
d.phlabresults <- dbQueryNASIS(channel, q)
# recode metadata domains
d.phlabresults <- uncode(d.phlabresults, dsn = dsn)
# cache original column names
orig_names <- names(d.phlabresults)
# compute thickness
d.phlabresults$hzthk <- d.phlabresults$sampledepthbottom - d.phlabresults$sampledepthtop
# identify horizons with duplicate phiid
idx <- which(duplicated(d.phlabresults$phiid))
if (length(idx) > 0) {
message(paste("NOTICE: multiple records per pedon horizon exist in the `phlabresults` table, computing weighted averages and dominant values based on sample thickness"))
if (any(is.na(d.phlabresults[idx, "sampledepthbottom"]))) {
message("NOTICE: some `phlabresults` records are missing `sampledepthbottom`; affected weighted averages will return `NA` and dominant values will be from the first (shallowest top depth) record per horizon")
}
# aggregate dup phiid
dup <- d.phlabresults[idx, "phiid"]
dup_idx <- which(d.phlabresults$phiid %in% dup)
d.dups <- d.phlabresults[dup_idx, ]
id_vars <- c("peiid","phiid")
num_vars <- names(d.dups)[!grepl("ph1to1h2o|ph01mcacl2|peiid|phiid", names(d.dups)) &
sapply(d.dups, is.numeric)]
d.dups_num <- cbind(d.dups[, id_vars, drop = FALSE], d.dups[, num_vars, drop = FALSE])
var <- "phiid"
d.dups_num <- do.call("rbind",
by(d.dups_num, d.dups_num[[var]], function(x) {
data.frame(
peiid = unique(x[['peiid']]),
lapply(x[,colnames(x)[2:ncol(x)]], function(x2)
weighted.mean(x2, w = x$hzthk, na.rm = TRUE))
)})
)
char_vars <- names(d.dups)[names(d.dups) %in% c("hzthk") |
sapply(d.dups, function(x) is.character(x) | is.factor(x) | is.logical(x))]
d.dups_char <- cbind(d.dups[, id_vars, drop = FALSE], d.dups[, char_vars, drop = FALSE])
d.dups_char <- do.call(
"rbind", by(d.dups_char, d.dups_char[[var]], function(x) {
data.frame(
peiid = unique(x[['peiid']]),
lapply(x[2:ncol(x)], function(x2) x2[max(c(1, which.max(x$hzthk)), na.rm = TRUE)])
)})
)
num_ph <- names(d.dups)[names(d.dups) %in% c("hzthk") |
grepl("ph1to1h2o|ph01mcacl2", names(d.dups))]
d.dups_ph <- cbind(d.dups[, id_vars, drop = FALSE], d.dups[, num_ph, drop = FALSE])
d.dups_ph <- do.call(
"rbind",
by(d.dups_ph, d.dups_ph[[var]], function(x) { data.frame(
peiid = unique(x[['peiid']]),
phiid = unique(x[['phiid']]),
lapply(x[3:ncol(x)], function(x2) -log10(weighted.mean(1/10^x2, weights = x$hzthk, na.rm = TRUE)))
)})
)
# remove calculated horizon thickness
# TODO: should hzthk be returned as metadata? relevant to interpretation of combined lab results
d.dups_num$hzthk <- NULL
d.dups_char$hzthk <- NULL
d.dups_ph$hzthk <- NULL
d.phlabresults$hzthk <- NULL
d.nodups <- merge(d.dups_num, d.dups_char, by = c("peiid", "phiid"), all.x = TRUE)
d.nodups <- merge(d.nodups, d.dups_ph, by = c("peiid", "phiid"), all.x = TRUE)
# fill missing columns in "duplicates removed" data with NA
# these are choice lists e.g. "sandtotmethod", "sandvfmethod", "atterbergsampcond", "ecdeterminemeth",
# "pyrophoshue", "pyrophosvalue", "pyrophoschroma"
# when these columns area all NA the data type is logical and they fall through the cracks of data type splits
d.nodups[orig_names[!orig_names %in% colnames(d.nodups)]] <- NA
# recombine duplicated and non-duplicated data
d.phlabresults <- rbind(d.phlabresults[-dup_idx, orig_names], d.nodups[orig_names])
}
# relabel names
d.phlabresults[c("sampledepthtop", "sampledepthbottom", "hzthk")] <- NULL
names(d.phlabresults) <- gsub("measured|lab$", "", names(d.phlabresults))
vars <- c("peiid","phiid")
idx <- names(d.phlabresults) %in% vars
names(d.phlabresults)[idx] <- vars
names(d.phlabresults)[!idx] <- paste0(names(d.phlabresults)[!idx], "_lab")
# TODO: final cleaning of duplicate rows - dups exist in NASIS for some reason, so should this happen first
# to eliminate extra rows with no data? Not sure what is causing this on the NASIS side
d.phlabresults <- d.phlabresults[rowSums(is.na(d.phlabresults)) < (length(d.phlabresults) - 1),]
# done
return(d.phlabresults)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.