globalVariables(c(
".SD", "AgeHeaderKey", "ageMethod", "AgeSampleKey", "AgeSampleStatusCode",
"AgeSampleTypeCode", "AgeTreeKey", "baseAgeYear", "CoordTypeCode",
"CrownClassCode", "CrownClsr", "Datum", "FieldAge", "FieldAge_Base",
"FieldAge_DBH", "FieldAge_diff", "FieldSeasonYear", "firstAgeMsrYear",
"firstMsrYear", "fullGenusSpec", "GrowthPlotNum", "HtTot", "Latin_full",
"Length", "LocPlotKey", "MsrDate", "nCodominant", "nDominant",
"numAreas", "OfficeAge", "OfficeAge_Base", "OfficeAge_DBH", "OfficeAge_diff",
"PackageKey", "plotArea", "PlotKey", "PlotName", "PSP", "Radius",
"SpecCode", "SpecCommon", "SpecGenus", "SpecSpec", "standardizedAge",
"StartYear", "TreatTypeName", "TreatYear", "TreeGrowthPlotKey",
"TreeHeaderKey", "TreeKey", "TreeMsrKey", "TreeNum", "TreeOriginCode",
"TreeRenumber", "TreeStatusCode", "unifiedAge", "VisitTypeName", "Width"
))
#' standardize and treat the Ontario PSP data
#'
#' @param ONPSPlist list of relevant plots
#' @param sppEquiv table of species names - see `LandR::sppEquiv`-
#' must have column 'latin' and 'PSP'
#'
#' @return a list of plot and tree data.tables
#'
#' @export
#' @importFrom data.table copy data.table set setcolorder setkey dcast
#'
dataPurification_ONPSP <- function(ONPSPlist, sppEquiv) {
## TODO: review excludeAllObs - I dont' think we exclude anything at the moment
##### Location ####
Plot <- ONPSPlist[["tblPlot"]]
LocCoord <- ONPSPlist[["tblLocCoord"]]
LocPlot <- ONPSPlist[["tblLocPlot"]][, .(LocPlotKey, PlotKey, Zone)] # need UTM Zone here
Plot <- Plot[LocPlot, on = c("PlotKey")]
# 41 plots have no location ? Unclear why these are missing
# the same plot may have two location entries due to coordtypeCode
Plot <- LocCoord[Plot, on = c("PlotKey")]
# CoordTypeCode: 1 = road post, 3 = location post, from tlkCoordType
# note that not all plots are unique locations
investigateMissingLoc <- Plot[is.na(CoordTypeCode)]
Plot <- Plot[CoordTypeCode == 3]
Plot <- Plot[, .SD, .SDcols = c(
"PlotKey", "PlotName", "Easting",
"Northing", "Datum", "Zone", "DatasetCode"
)]
rm(LocCoord, LocPlot)
#### Identifying treated plots ####
standInfoTreatment <- ONPSPlist[["tblStandInfoTreat"]]
treatType <- ONPSPlist[["tlkpTreatType"]]
standInfoTreatment <- treatType[standInfoTreatment, on = c("TreatTypeCode")]
rm(treatType)
treated <- Plot[PlotKey %in% standInfoTreatment$PlotKey]
treated <- treated[standInfoTreatment, on = c("PlotKey")]
Visit <- ONPSPlist[["tblVisit"]]
VisitType <- ONPSPlist[["tlkpVisitType"]]
Visit <- VisitType[Visit, on = c("VisitTypeCode")]
Visit$VisitTypeName <- as.factor(Visit$VisitTypeName)
rm(VisitType)
treeGrowthPlot <- ONPSPlist[["tblTreeGrowthPlot"]]
treeMsr <- ONPSPlist[["tblTreeMsr"]]
treeMsr <- treeMsr[, .(TreeMsrKey, TreeGrowthPlotKey, TreeKey, TreeStatusCode, DBH, CrownClassCode)]
treeMsr <- treeGrowthPlot[treeMsr, on = ("TreeGrowthPlotKey")]
# 2- connect treeheader with Visit and Package
treeHeader <- ONPSPlist[["tblTreeHeader"]] # has headerkey, VisitKey, and MsrDate.
treeHeader <- Visit[treeHeader, on = c("VisitKey")] # getting package key from Package
Package <- ONPSPlist[["tblPackage"]]
treeHeader <- Package[, .(PackageKey, PlotKey, StartYear)][treeHeader, on = c("PackageKey")]
treeHeader[, c("ManualCode", "VisitTypeCode") := NULL]
# connect plot location and tree:
# treeGrowthPlotKey in treeMsr -> treeGrowthPlotKey in treeGrowthPlot;
# treeHeader in treeGrowthPlot -> treeHeader in treeHeader;
# PlotKey in treeHeader -> PlotKey in Plot
treeGrowthPlot[Radius == 11.26, Radius := 11.28]
treeGrowthPlot[Radius == 11.33, Radius := 11.28]
treeGrowthPlot[, plotArea := Width * Length]
treeGrowthPlot[is.na(plotArea), plotArea := round(Radius^2 * 3.142)]
# standardize to ha
treeGrowthPlot[, plotArea := plotArea / 10000]
# some plots change from radius 11.33 to 11.28 over time - leading to differing areas.
# IMO this area discrepancy is allowable (< 1%) but others are drastic and should be excluded
smallGrowthPlotKey <- treeGrowthPlot[, .(TreeGrowthPlotKey, TreeHeaderKey, GrowthPlotNum, TreeRenumber, plotArea)]
plotData <- treeHeader[Plot, on = c("PlotKey")]
plotData[, c("DatasetCode", "StartYear") := NULL]
plotData[, c("Easting", "Northing", "Zone", "Datum") := NULL]
plotData <- smallGrowthPlotKey[plotData, on = c("TreeHeaderKey")]
plotData[, VisitTypeName := NULL]
# TODO confirm that the reason this table is < rows than smallGrowthPlot is the loss of treated plots
# partially confirmed - all but two (PlotKey 7565 and 7566)
rm(smallGrowthPlotKey)
##### height#####
treeMsr <- treeMsr[, .SD, .SDcol = colnames(treeMsr)[!colnames(treeMsr) %in%
c("LocAzi", "Radius", "LocDist", "Width", "Length")]]
treeHeight <- ONPSPlist[["tblHt"]]
treeMsr <- treeHeight[, .(TreeMsrKey, HtTot)][treeMsr, on = c("TreeMsrKey")]
rm(treeHeight)
#### species####
tree <- ONPSPlist[["tblTree"]]
specLk <- ONPSPlist[["tlkpSpec"]]
specLk[, fullGenusSpec := paste(SpecGenus, tolower(SpecSpec))]
tree <- specLk[, .(SpecCommon, fullGenusSpec, SpecCode)][tree, on = c("SpecCode")]
tree <- tree[, c("PostNum", "Dist", "Azi", "MortCauseCode", "TagTypeCode") := NULL]
tree <- tree[treeMsr, on = c("TreeKey")]
rm(specLk, treeMsr)
# clean up
tree[, TreeStatusCode := gsub(pattern = " ", replacement = "", x = TreeStatusCode)]
tree[, CrownClsr := NULL]
##### age####
# connect ageHeader with plots for purpose of stand age
ageHeader <- ONPSPlist[["tblAgeHeader"]] # connects visit key (msr purpose) with other age data
ageHeader <- Visit[ageHeader, on = c("VisitKey")]
ageHeader <- Package[ageHeader, on = c("PackageKey")]
# now we can connect with plot
ageHeader[, c(
"ResidCmpnt", "ManualCode", "VisitTypeName", "VisitTypeCode",
"CoOpMethod", "ApproachCode", "StartYear"
) := NULL]
ageHeader <- Plot[, .(PlotKey, PlotName)][ageHeader, on = c("PlotKey")]
# MsrDate cannot be used for joins even though it is in both tables as it will vary by day, and not a key (unlike VisiKey)
# get measurements of individual trees
ageTree <- ONPSPlist[["tblAgeTree"]]
# crownclass codes C D and E are code for codominant, dominant, emergent
# ageTree[, c("SiteCondCode", "HtLiveBranch", "MortAzi", "MortDist", "HtLiveFoliage", "HtToDBH") := NULL]
ageSample <- ONPSPlist[["tblAgeSample"]]
# join so we get plot ID
temp <- ageHeader[, .(AgeHeaderKey, PlotName, PlotKey, FieldSeasonYear)]
ageTree <- temp[ageTree, on = c("AgeHeaderKey")]
rm(temp)
# there are two plots with missing IDs - ageHeaderKey 9466 and 9467.
# also note that AgeSample does not have AgeTreeKey 20639, and 165 others,
# though these exist the AgeTree table
treeAges <- ageSample[, .(
AgeSampleKey, AgeTreeKey, FieldAge, OfficeAge,
AgeSampleStatusCode, AgeSampleTypeCode
)]
treeAges <- treeAges[ageTree, on = c("AgeTreeKey")]
ageSampleType <- ONPSPlist[["tlkpAgeSampleType"]]
treeAges <- ageSampleType[treeAges, on = ("AgeSampleTypeCode")]
# 1 DBH Core and 4 DBH Cookie need to be adjusted to match Base
treeAges <- treeAges[, .(
AgeSampleKey, AgeTreeKey, FieldAge, OfficeAge, AgeSampleTypeCode,
AgeSampleStatusCode, GrowthPlotNum, FieldSeasonYear, TreeNum, CrownClassCode, DBH
)]
# sometimes the base will be wrong due to rot, etc. So Only use C - Complete
treeAges <- treeAges[AgeSampleStatusCode == "C"]
# remove the non dominant crown class
treeAges[, CrownClassCode := gsub(pattern = " ", replacement = "", x = CrownClassCode)]
treeAges <- treeAges[CrownClassCode %in% c("C", "D")]
# treat DBH and Base measurements as separate columns, calculate difference when both are measured
# use this to standardize measurements (as done by Yong Luo in Alberta) by adding difference to DBH
treeAges[AgeSampleTypeCode %in% c(1, 4), ageMethod := "DBH"] # DBH Core and DBH cookie
treeAges[AgeSampleTypeCode %in% c(2, 3), ageMethod := "Base"] # Base Core and Base cookie
treeAges <- treeAges[!is.na(AgeSampleKey)] # get rid of age trees with no samples
# cast the multiple age measurements - there may be a way to drop the NA columns
treeAges <- dcast(treeAges, AgeTreeKey + GrowthPlotNum + FieldSeasonYear +
TreeNum + CrownClassCode + DBH ~ ageMethod,
value.var = c("FieldAge", "OfficeAge"), fun.aggregate = mean, drop = c(TRUE)
)
# correct obvious mistakes
treeAges[AgeTreeKey == 80459, OfficeAge_Base := 73] # they are clearly missing the 7 - age is 3 otherwise
treeAges[AgeTreeKey == 43397, FieldAge_Base := 94] # they are clearly missing the 9
# standardize DBH and Base measurements
treeAges[!is.na(FieldAge_DBH) & !is.na(FieldAge_Base), FieldAge_diff := FieldAge_Base - FieldAge_DBH]
treeAges[!is.na(OfficeAge_DBH) & !is.na(OfficeAge_Base), OfficeAge_diff := OfficeAge_Base - OfficeAge_DBH]
meanFieldAgeDiff <- as.integer(mean(treeAges$FieldAge_diff, na.rm = TRUE))
meanOfficeAgeDiff <- as.integer(mean(treeAges$OfficeAge_diff, na.rm = TRUE))
treeAges[, c("FieldAge_diff", "OfficeAge_diff") := NULL]
# apply difference
treeAges[is.na(OfficeAge_Base) & !is.na(OfficeAge_DBH), OfficeAge_Base := OfficeAge_DBH + meanOfficeAgeDiff]
treeAges[is.na(FieldAge_Base) & !is.na(FieldAge_DBH), FieldAge_Base := FieldAge_DBH + meanFieldAgeDiff]
treeAges[is.nan(OfficeAge_Base), OfficeAge_Base := NA]
treeAges[is.nan(FieldAge_Base), FieldAge_Base := NA]
treeAges[, c("OfficeAge_DBH", "FieldAge_DBH") := NULL]
treeAges[, unifiedAge := OfficeAge_Base]
treeAges[is.na(unifiedAge), unifiedAge := FieldAge_Base]
treeAges <- treeAges[!is.na(unifiedAge)]
# view 1328596PIP to see why this is necessary - some trees have complicated data
temp <- ageTree[, .(AgeHeaderKey, AgeTreeKey)]
treeAges <- temp[treeAges, on = c("AgeTreeKey")]
treeAges <- ageHeader[treeAges, on = c("AgeHeaderKey", "FieldSeasonYear")]
rm(temp, meanOfficeAgeDiff, meanFieldAgeDiff)
treeAges[, firstAgeMsrYear := min(FieldSeasonYear), .(PlotName)]
# For jurisdictions with no estimated stand age,
# the PSPclean approach derives standAge as mean of N trees with crown class Dominant, where N > 1.
# if none are available, we take the mean of all dominant and co-dominant trees
treeAges[, nDominant := sum(CrownClassCode == "D "), .(PlotName)]
treeAges[, nCodominant := sum(CrownClassCode == "C "), .(PlotName)]
treeAges[, standardizedAge := unifiedAge - FieldSeasonYear + firstAgeMsrYear] # standardize
standAgesDominant <- treeAges[
nDominant > 1 & CrownClassCode == "D",
.(meanStandAge = as.integer(mean(standardizedAge))), .(PlotName, firstAgeMsrYear)
]
standAgesOther <- treeAges[
nDominant < 2,
.(meanStandAge = as.integer(mean(standardizedAge, na.rm = TRUE))),
.(PlotName, firstAgeMsrYear)
]
standAges <- rbind(standAgesDominant, standAgesOther)
rm(ageHeader, ageSample, ageSampleType, ageTree, standAgesOther, standAgesDominant, treeAges, Visit)
#### Canopy Origin ####
standInfoHeader <- ONPSPlist[["tblStandInfoHeader"]]
# some stands originated from old fields and from harvest
mainCanopyOrigin <- ONPSPlist[["tlkpMainCanopyOrigin"]]
standInfoHeader <- mainCanopyOrigin[standInfoHeader, on = c("MainCanopyOriginCode")]
standInfoHeader[, c("PlotUnifRationale", "MaturClassRationale") := NULL]
rm(mainCanopyOrigin)
###### prep for modules #####
plotData <- unique(plotData[, .(
PlotName, TreeGrowthPlotKey, plotArea,
FieldSeasonYear, MsrDate, GrowthPlotNum
)])
# add relevant columns to tree
tree <- plotData[, .(PlotName, FieldSeasonYear, TreeGrowthPlotKey)][tree, on = c("TreeGrowthPlotKey")]
# filter out dead trees - done first so that plots aren't excluded because of unmeasured dead trees
# Cut, Dead, Dead Veteran, Exclude, Gone (X), Live, Live Veteran - from tlkpTreeStatus
tree <- tree[TreeStatusCode %in% c("L", "V")]
tree[, c("TreeStatusCode", "SpecCode") := NULL]
# filter out missing DBH
missingDBH_TGPK <- unique(tree[is.na(DBH), ]$TreeGrowthPlotKey)
tree <- tree[!TreeGrowthPlotKey %in% missingDBH_TGPK]
# rm(missingDBH_TGPK)
# identify the plots with re-numbered trees, these can be kept but need new PlotNames
needNewPlotNames <- unique(tree[TreeRenumber == 1, ]$TreeGrowthPlotKey)
# remove planted stands (and natural stands with planted trees)
# 1584 plots have planted trees - remove individual growth plots.
#### drop dead trees and planted trees
treeOrigin <- ONPSPlist[["tlkpTreeOrigin"]]
tree <- treeOrigin[tree, on = c("TreeOriginCode")]
hasPlanted <- unique(tree[TreeOriginCode %in% c("P", "p")]$TreeGrowthPlotKey)
tree <- tree[!TreeGrowthPlotKey %in% hasPlanted]
rm(hasPlanted, treeOrigin)
tree[, TreeOriginCode := NULL]
# filter for only natural stands
# 328 plots have some kind of treatment (planted, seeded, thinning, pesticide, herbicide, etc)
# some plots can be kept because measuring happened first - 32 growth plots
treatedBad <- plotData[treated[, .(PlotName, TreatYear, TreatTypeName)], on = c("PlotName")]
treatedBad <- treatedBad[FieldSeasonYear >= TreatYear]
tree <- tree[!TreeGrowthPlotKey %in% treatedBad$TreeGrowthPlotKey, ]
rm(treated, treatedBad)
# use treeKey for treeNumber, as it is unique to each plot but not through time.
tree[, c("TreeNum", "TreeRenumber", "PlotMapGrowthPlotKey", "Section") := NULL]
setnames(tree, "TreeKey", "TreeNumber")
# correct growthPlot numbers using factor of growthPlot_treeNumber. Confirm rerenumber, oldNumber, treeNumber
# standardize species names - for biomass estimation
sppEquiv <- sppEquiv[, .(Latin_full, PSP)]
setnames(sppEquiv, new = c("fullGenusSpec", "newSpeciesName"))
sppEquiv <- unique(sppEquiv)
# fix a few codings to match PSP - there is no biomass equation for species-specific willow anyway
tree[
fullGenusSpec %in% c("Salix sp (tree)", "Salix alba", "Salix babylonica"),
fullGenusSpec := "Salix spp."
]
# this seems most likely, among populus spp with biomass equations
tree[fullGenusSpec %in% c("Populus x", "Populus sp"), fullGenusSpec := "Populus balsamifera"]
tree[fullGenusSpec == "Acer saccharum ssp. nigrum", fullGenusSpec := "Acer saccharum"]
tree <- sppEquiv[tree, on = ("fullGenusSpec")]
tree[SpecCommon == "Unknown Hardwood", newSpeciesName := "unknown hardwood"]
tree[is.na(newSpeciesName) | newSpeciesName == "", newSpeciesName := tolower(SpecCommon)]
tree[, c("OriginName", "fullGenusSpec", "OrigTreeNum") := NULL]
#### final clean up of Plot ####
rm(standInfoTreatment, standInfoHeader, Package)
# filter for those missing locations
plotData <- plotData[!PlotName %in% investigateMissingLoc$PlotName]
## filter for only those with age
setnames(standAges, c("firstAgeMsrYear", "meanStandAge"), new = c("baseAgeYear", "baseSA"))
plotData <- standAges[plotData, on = c("PlotName")]
plotData <- plotData[!is.na(baseSA)]
gc()
# as some some plot measurements occured prior to age measurements, adjust ages backwards
plotData[, baseYear := min(FieldSeasonYear), .(PlotName)]
plotData[baseYear < baseAgeYear, baseSA := baseSA + (baseAgeYear - baseYear)]
plotData[, baseAgeYear := NULL]
# filter for incorrect plot size - this plot size changed over time and was not recorded correctly-
# person comm.
plotData <- plotData[PlotName != "FCBOW56-2002-21PGP", ]
# standardize area -
# some plots have inconsistent plot sizes -
# some differences are rounding error (11.33m radius vs. 11.28m) but others change by > 100 m2
# page 79 of the manual explains why some plots have been expanded over time
# but they should not have been reduced (though FCTEM2001038PGP appears to have been reduced by 100m2)
# reject any where area differs by > 5m2
# this needs to be reviewed
plotData <- plotData[TreeGrowthPlotKey %in% tree$TreeGrowthPlotKey]
plotData[, plotArea := sum(plotArea), .(PlotName, FieldSeasonYear)]
badAreas <- plotData[, .(numAreas = length(unique(plotArea))), .(PlotName)]
badAreas <- badAreas[numAreas > 1]
plotData <- plotData[!PlotName %in% badAreas$PlotName]
plotData[, GrowthPlotNum := NULL]
# add location info
plotData <- Plot[, .(PlotName, Easting, Northing, Zone, Datum)][plotData, on = c("PlotName")]
plotData[, MeasureID := as.factor(paste0(PlotName, FieldSeasonYear))]
plotData[, MeasureID := as.factor(paste0("ONPSP_", as.numeric(MeasureID)))]
setnames(plotData, c("PlotName", "FieldSeasonYear", "plotArea"), c("OrigPlotID1", "MeasureYear", "PlotSize"))
plotData[, c("MsrDate", "TreeGrowthPlotKey") := NULL]
plotData <- unique(plotData) # treat different growth plots within a plot as one
#### Final clean up of Tree####
# use TreeNumber - MeasureID will come from paste of PlotName_FieldSeasonYear
tree[, c("CrownClassCode", "TreeHeaderKey") := NULL]
setnames(tree, c("HtTot", "FieldSeasonYear", "PlotName", "SpecCommon"), c("Height", "MeasureYear", "OrigPlotID1", "Species"))
tree <- tree[plotData[, .(MeasureYear, OrigPlotID1, MeasureID)], on = c("OrigPlotID1", "MeasureYear")]
# some tree measurements wil be dropped as the plots were filtered out
# TODO: review needNewPlotNames - these are renumbered tree
tree[, c("TreeGrowthPlotKey", "TreeMsrKey", "GrowthPlotNum") := NULL]
plotData[, OrigPlotID1 := as.factor(paste0("ONPSP_", OrigPlotID1))]
tree[, OrigPlotID1 := as.factor(paste0("ONPSP_", OrigPlotID1))]
plotData[, Datum := as.factor(Datum)]
setkey(tree, MeasureID, OrigPlotID1, MeasureYear, TreeNumber, Species, DBH, Height, newSpeciesName)
setcolorder(tree)
setkey(plotData, OrigPlotID1, MeasureID, MeasureYear)
setcolorder(plotData)
return(list(
plotHeaderData = plotData,
treeData = tree
))
}
#' retrieve preprocessed Ontario PSP and PGP data
#'
#' @param dPath the Access database for PSP and PGP plots
#' @param ... additional args passed to prepInputs
#'
#' @return a list of plot and tree data.tables
#'
#' @export
#' @importFrom data.table fread
prepInputsOntarioPSP <- function(dPath, ...) {
toget <- c(
"tblAgeSample.csv", "tblAgeTree.csv", "tblLocCoord.csv", "tblLocPlot.csv",
"tblPackage.csv", "tblPlot.csv", "tblStandInfoHeader.csv",
"tblStandInfoTreat.csv", "tblTree.csv", "tblTreeGrowthPlot.csv",
"tblTreeHeader.csv", "tblTreeMsr.csv", "tblVisit.csv", "tlkpAgeSampleType.csv",
"tlkpMainCanopyOrigin.csv", "tlkpSpec.csv", "tlkpTreatType.csv",
"tlkpTreeOrigin.csv", "tlkpVisitType.csv", "tblHt.csv"
)
out <- prepInputs(url = "https://drive.google.com/file/d/1ca6TC7952cU4M2dJkT68IOsFmObaDrQn",
targetFile = "tblAgeHeader.csv",
archive = "Ontario_GrowthPlot_Subset.zip",
destinationPath = dPath,
alsoExtract = toget,
fun = "fread",
...)
actualOut <- file.path(dPath, c(toget, "tblAgeHeader.csv"))
actualNames <- sub(basename(actualOut), pattern = ".csv", replacement = "")
actualOut <- lapply(actualOut, fread)
names(actualOut) <- actualNames
return(actualOut)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.