#' RecruitmentCorrection
#'
#' @param Data (data.frame or data.table)
#' The dataset should preferably contain the column of corrected diameters:
#' 'DBH_TreeDataCor', otherwise the function will take the column 'Diameter'
#'
#' @param KeepMeas In case of **multiple diameter measurements** in the same
#' census year, on which to apply the correction:
#' Possible values: "MaxHOM", "MaxDate" (character).
#' - "MaxHOM": apply the correction to the measurement taken at the
#' **highest POM**
#' - "MaxDate": apply the correction to the **most recent measurement** (same
#' year but more recent date)
#'
#' @param MinDBH Minimum diameter of trees inventoried according to your
#' protocol (in cm) (numeric, 1 value) (Default = 10 cm)
#'
#' @param PositiveGrowthThreshold A tree widening by more than x cm/year is
#' considered abnormal (numeric, 1 value) (Default = 5 cm)
#'
#' @param InvariantColumns Vector with the names of the columns that are
#' supposed to have always the same value for each measurement of the same
#' tree (character)
#'
#' @param DetectOnly TRUE: Only detect errors, FALSE: detect and correct errors
#' (logical)
#'
#' @return Add rows for forgotten recruits with them estimated DBH in the
#' 'Diameter_TreeDataCor' column, create a 'CorrectedRecruit' col (logical)
#' and fill the 'Comment' column : "This DBH is/was the 1st recorded for this
#' tree, according to its annual growth and the census done for this plot, it
#' should have been recruited earlier according to your protocol (MinDBH)."
#'
#' @details If the size of the tree has never changed, or if there is only one
#' value, no recruits are added. If the Diameter has not been corrected
#' ('Diameter_TreeDataCor' column does not exist), the function will create it
#' for the forgotten recruits. It is strongly recommended to correct the
#' Diameter before correcting the recruits
#'
#' @importFrom data.table data.table rbindlist
#' @importFrom stats na.omit lm
#'
#' @export
#'
#' @examples
#' library(data.table)
#' data(TestData)
#' setnames(TestData, "Diameter", "Diameter_TreeDataCor")
#'
#' Rslt <- RecruitmentCorrection(TestData,
#' InvariantColumns = c("Site",
#' "Genus",
#' "Species",
#' "Family",
#' "ScientificName"))
#' IdCorr <- Rslt[CorrectedRecruit == TRUE, IdTree]
#' TreesCorr <- Rslt[IdTree %in% IdCorr]
#'
#' library(ggplot2)
#' ggplot(TreesCorr) +
#' aes(x = Year, y = Diameter_TreeDataCor) +
#' geom_line(size = 0.5, colour = "#112446") +
#' geom_point(shape = "circle", size = 1.5, mapping = aes(color = CorrectedRecruit)) +
#' theme_minimal() +
#' facet_wrap(vars(IdTree), scales = "free")
#'
RecruitmentCorrection <- function(
Data,
KeepMeas = c("MaxHOM", "MaxDate"),
MinDBH = 10,
PositiveGrowthThreshold = 5,
InvariantColumns = c("Site",
"Genus_TreeDataCor",
"Species_TreeDataCor",
"Family_TreeDataCor",
"ScientificName_TreeDataCor"),
DetectOnly = FALSE
){
#### Arguments check ####
# Data
if (!inherits(Data, c("data.table", "data.frame")))
stop("Data must be a data.frame or data.table")
# IdStem or IdTree? ---------------------------------------------------------------------------------------
# If no IdStem take IdTree
if((!"IdStem" %in% names(Data) | all(is.na(Data$IdStem))) &
("IdTree" %in% names(Data) & any(!is.na(Data$IdTree))) ){ ID <- "IdTree"
}else{ ID <- "IdStem"}
if(!any(c("IdStem", "IdTree") %in% names(Data)) | (all(is.na(Data$IdStem)) & all(is.na(Data$IdTree))) )
stop("The 'IdStem' or 'IdTree' column is missing in your dataset")
# ---------------------------------------------------------------------------------------------------------
# MinDBH/PositiveGrowthThreshold (numeric, 1 value)
if(!all(unlist(lapply(list(MinDBH, PositiveGrowthThreshold), length)) %in% 1) |
!all(unlist(lapply(list(MinDBH, PositiveGrowthThreshold),
inherits, c("numeric", "integer")))))
stop("The 'MinDBH' and 'PositiveGrowthThreshold'' arguments
of the 'RecruitmentCorrection' function must be 1 numeric value each")
if(DetectOnly %in% FALSE){
# InvariantColumns
if (!inherits(InvariantColumns, "character"))
stop("'InvariantColumns' argument must be of character class")
}
# DetectOnly (logical)
if(!all(unlist(lapply(list(DetectOnly),
inherits, "logical"))))
stop("The 'DetectOnly' argument
of the 'RecruitmentCorrection' function must be logicals")
# Diameter_TreeDataCor column exists
if(!"Diameter" %in% names(Data) & !"Diameter_TreeDataCor" %in% names(Data))
stop("The 'Diameter' or the 'Diameter_TreeDataCor' (corrected Diameter)
column does't exist in the dataset.")
if(DetectOnly %in% FALSE){
# Check if the InvariantColumns name exists in Data
for(c in InvariantColumns){
if (!c %in% names(Data)){ cc <- gsub("_TreeDataCor", "", c) # remove - Cor
if (!cc %in% names(Data)){ # Col without - Cor exists?
stop(paste("InvariantColumns argument must contain one or several column names (see help)."
,cc,"is apparently not a dataset's column"))
}else{ InvariantColumns[InvariantColumns == c] <- cc # If yes replace by the col name without cor
warning("",c," column does't exist. ",cc," column is therefore considered as InvariantColumns instead of ",c,"")
}
} # if c doest exist
} # end c loop
}
#### Function ####
# data.frame to data.table
setDT(Data)
# Remove duplicated measurements per Year because different POM or Date -----------------------------------
CompleteData <- copy(Data)
Data <- UniqueMeasurement(Data, KeepMeas = KeepMeas, ID = ID)
DuplicatedRows <- CompleteData[!Data, on = .NATURAL] # rows removed
if(!"Comment" %in% names(Data)) Data[, Comment := ""]
if(DetectOnly %in% FALSE){
Data[, CorrectedRecruit := FALSE] # The initial rows are not corrected recruits
}
# Order IDs and times in ascending order ----------------------------------------------------------------------------
Data <- Data[, Year := as.numeric(Year)]
Data <- Data[order(get(ID), Year)]
# IDs vector --------------------------------------------------------------------------------------------------------
Ids <- as.vector(na.omit(unique(Data[!is.na(Year), get(ID)]))) # Tree Ids
# Dataset with the rows without IDS ----------------------------------------------------------------------------------
DataIDNa <- Data[is.na(get(ID))]
# Dataset with the rows without Year --------------------------------------------------------------------------------
DataYearNa <- Data[is.na(Year)]
# Apply for all the trees
# i = "100635"
Data <- do.call(rbind, lapply(Ids, function(i) RecruitmentCorrectionByTree(
Data[get(ID) %in% i & !is.na(Year)], # per ID, all censuses
MinDBH = MinDBH,
PositiveGrowthThreshold = PositiveGrowthThreshold,
InvariantColumns = InvariantColumns,
PlotCensuses = as.vector(na.omit( # rm NA
unique(Data[Plot %in% unique(Data[get(ID) %in% i, Plot]), Year]) # the censuses for the plot in which the tree is
)),
DetectOnly = DetectOnly
)
)) # do.call apply the 'rbind' to the lapply result
# Re-put the rows duplicated, or without ID or Year -----------------------------------------------------------------
Data <- rbindlist(list(Data, DuplicatedRows, DataIDNa, DataYearNa), use.names = TRUE, fill = TRUE)
# Order IDs and times in ascending order ----------------------------------------------------------------------------
Data <- Data[order(get(ID), Year)]
return(Data)
}
#' RecruitmentCorrectionByTree
#'
#' @param DataTree A dataset corresponding to a single tree's (1 IdTree/Idstem)
#' measurements (data.frame or data.table). The dataset should preferably
#' contain the column of corrected diameters: 'Diameter_TreeDataCor', otherwise the function
#' will take the column "Diameter"
#'
#' @param MinDBH Minimum diameter of trees inventoried according to your
#' protocol (in cm) (numeric, 1 value)
#'
#' @param PositiveGrowthThreshold A tree widening by more than x
#' cm/year is considered abnormal (numeric, 1 value)
#'
#' @param InvariantColumns Vector with the names of the columns that are
#' supposed to have always the same value for each measurement of the same
#' tree (character)
#'
#' @param PlotCensuses Census years for the plot in which the tree is (integer)
#'
#' @param DetectOnly TRUE: Only detect errors, FALSE: detect and correct errors
#' (logical)
#'
#' @details If the size of the tree has never changed, or if there is only one
#' value the same value is kept for the added forgotten recruits.
#' If the Diameter has not been corrected (Diameter_TreeDataCor column does
#' not exist), the function will create it for the forgotten recruits. It is
#' strongly recommended to correct the Diameter before correcting the
#' recruits.
#'
#' @return Add rows for forgotten recruits with them estimated DBH in the
#' 'Diameter_TreeDataCor' column, create a 'CorrectedRecruit' column (logical)
#' and fill the 'Comment' column : "This DBH is/was the 1st recorded for this
#' tree, according to its annual growth and the census done for this plot, it
#' should have been recruited earlier according to your protocol (MinDBH)."
#'
#' @importFrom data.table data.table rbindlist copy
#' @importFrom stats na.omit lm
#'
#' @export
#'
#' @examples
#' library(data.table)
#' DataTree <- data.table(IdTree = "a",
#' Year = seq(2000,2008, by = 2), # 2 years/census
#' Diameter_TreeDataCor = as.numeric(c(13:17)), # 1cm/census(0.5 cm/year)
#' Site = "Imaginary forest"
#' )
#' # 1st DBH = 13 > MinDBH. In 1998 the tree was 12cm, 11cm in 1996 and 10cm in
#' 1994.
#' Rslt <- RecruitmentCorrectionByTree(DataTree,
#' InvariantColumns = "Site",
#' PlotCensuses = seq(1996,2016, by = 2))
#'
RecruitmentCorrectionByTree <- function(
DataTree,
MinDBH = 10,
PositiveGrowthThreshold = 5, # je garde ?
InvariantColumns = c("Site",
"GenusCor",
"SpeciesCor",
"FamilyCor",
"ScientificNameCor"),
PlotCensuses,
DetectOnly = FALSE
){
#### Arguments check ####
# DataTree
if (!inherits(DataTree, c("data.table", "data.frame")))
stop("DataTree must be a data.frame or data.table")
# IdStem or IdTree? ---------------------------------------------------------------------------------------
# If no IdStem take IdTree
if((!"IdStem" %in% names(DataTree) | all(is.na(DataTree$IdStem))) &
("IdTree" %in% names(DataTree) & any(!is.na(DataTree$IdTree))) ){ ID <- "IdTree"
}else{ ID <- "IdStem"}
if(!any(c("IdStem", "IdTree") %in% names(DataTree)) | (all(is.na(DataTree$IdStem)) & all(is.na(DataTree$IdTree))) )
stop("The 'IdStem' or 'IdTree' column is missing in your dataset")
# ---------------------------------------------------------------------------------------------------------
# MinDBH/PositiveGrowthThreshold (numeric, 1 value)
if(!all(unlist(lapply(list(MinDBH, PositiveGrowthThreshold), length)) %in% 1) |
!all(unlist(lapply(list(MinDBH, PositiveGrowthThreshold),
inherits, c("numeric", "integer")))))
stop("The 'MinDBH' and 'PositiveGrowthThreshold'' arguments
of the 'RecruitmentCorrectionByTree' function must be 1 numeric value each")
if(DetectOnly %in% FALSE){
# InvariantColumns
if (!inherits(InvariantColumns, "character"))
stop("'InvariantColumns' argument must be of character class")
}
# PlotCensuses
if (!inherits(PlotCensuses, c("numeric", "integer")))
stop("'PlotCensuses' argument must be numeric or integer")
# DetectOnly (logical)
if(!all(unlist(lapply(list(DetectOnly),
inherits, "logical"))))
stop("The 'DetectOnly' argument
of the 'RecruitmentCorrectionByTree' function must be logicals")
if(!"Diameter" %in% names(DataTree) & !"Diameter_TreeDataCor" %in% names(DataTree))
stop("The 'Diameter' or the 'Diameter_TreeDataCor' (corrected Diameter)
column does't exist in the dataset.")
if(DetectOnly %in% FALSE){
# Diameter_TreeDataCor column exists
if(!"Diameter_TreeDataCor" %in% names(DataTree))
warning("The 'Diameter_TreeDataCor' (corrected Diameter) column does't exist in the dataset.
We advise to first correct the diameter measurements before correcting the recruitment")
}
# if there are several IdTrees
if(length(unique(DataTree[, get(ID)])) != 1){
stop("DataTree must correspond to only 1 same tree/stem so 1 same ",ID,"
(the ",ID,"s: " ,paste0(unique(DataTree[, get(ID)]), collapse = "/"),"")
}
# if there are several plots for the same IdTree
# if(length(as.vector(na.omit(unique(DataTree$Plot)))) != 1){
# stop(paste0("Tree ",unique(DataTree$IdTree)," has multiple plots: " ,paste0(unique(DataTree$Plot), collapse = "/")))
# }
if(DetectOnly %in% FALSE){
# Check if the InvariantColumns name exists in DataTree
for(c in InvariantColumns){
if(!c %in% names(DataTree)){
stop(paste("InvariantColumns argument must contain one or several column names (see help)."
,c,"is apparently not a dataset's column"))
}
}
}
#### Function ####
# print(unique(DataTree[, get(ID)])) # to debug
# data.frame to data.table
setDT(DataTree)
# if there are no 'Diameter_TreeDataCor' col, create it from the Diameter col
# (removes Diameter_TreeDataCor values that are not of order RecruitmentCorrection at the end)
InitialDT <- copy(DataTree)
if(!"Diameter_TreeDataCor" %in% names(DataTree))
DataTree[, Diameter_TreeDataCor := Diameter]
# Arrange year in ascending order
PlotCensuses <- sort(PlotCensuses, decreasing = FALSE) # order plot census years
DataTree <- DataTree[order(Year)] # order years in the data
#### Compute annual diameter incrementation ####
DBHCor <- DataTree[,Diameter_TreeDataCor]
Year <- DataTree[,Year]
if(!"Comment" %in% names(DataTree)) DataTree[, Comment := ""]
# Initialisation
cresc <- rep(0, length(DBHCor) - 1) # (cresc[1] corresponds to the 2nd DBH)
if (sum(!is.na(DBHCor)) > 1) { # if there is at least 1 measurement
cresc[which(!is.na(DBHCor))[-1] - 1] <- # 8 cresc for 9 dbh values ([-1]), shift all indices by 1 to the left (-1)
diff(DBHCor[!is.na(DBHCor)]) / diff(Year[!is.na(DBHCor)]) # DBH difference between pairwise censuses / time difference between pairwise censuses
}
#### Detect the overgrown recruits ####
RecruitYear <- min(DataTree[, Year], na.rm = TRUE) # recruitment year
if(RecruitYear > min(PlotCensuses, na.rm = TRUE) & sum(!is.na(DataTree$Diameter_TreeDataCor)) > 0){ # if the 1st census of the plot is smaller than the 1st census of the tree, and there are measured DBH
PrevCens <- PlotCensuses[which(PlotCensuses == RecruitYear)-1] # 1 census before the recruit year among the plot censuses
FirstDBH <- DataTree[!is.na(Diameter_TreeDataCor), Diameter_TreeDataCor][1] # 1st measured DBH
# if(DataTree$Diameter_TreeDataCor[1] > (MinDBH + (RecruitYear - PrevCens) * PositiveGrowthThreshold)){ # ah ben il detecte pas mes oublis pcq il considère l'erreur que si l'écart est superieur à la limite de croissance
# Growth criteria
if(length(cresc) > 0){ # if there are a growth
Growth <- cresc[1] # 1st growth value
}else{Growth <- 0} # if only 1 DBH value (no cresc)
# Maybe here we should round up the Growth, or propose a limit.
# Detection
#### If the 1st DBH is larger than it would have been if at the previous census
# it was at the minimum DBH
if((FirstDBH > (MinDBH + (RecruitYear - PrevCens) * Growth)) & (Growth > 0)){ # ma proposition
# and Growth > 0 je pense à ajouter
DataTree <- GenerateComment(DataTree,
condition = DataTree[, Year] %in% RecruitYear,
comment = "This DBH is/was the 1st recorded for this tree,
according to its annual growth and the census done for this plot,
it should have been recruited earlier according to your protocol (MinDBH).")
if(DetectOnly %in% FALSE){
MissingCens <- PlotCensuses[which(PlotCensuses < RecruitYear)] # the previous missing censuses
#### Create new rows for the forgotten recruits ####
if(length(MissingCens) > 0){
if("Plot" %in% names(DataTree)){ # if we have the plot info
NewRow <- data.table(ID = unique(DataTree[,get(ID)]), # the IdTree
Year = NA, # the censuses to add
Plot = unique(DataTree$Plot), # the unique plot of the tree
CorrectedRecruit = TRUE, # there are corrected recruits
stringsAsFactors = FALSE) # do not convert characters into factors
}
else{
NewRow <- data.table(ID = unique(DataTree[,get(ID)]), # the IdTree
Year = NA, # the censuses to add
CorrectedRecruit = TRUE, # there are corrected recruits
stringsAsFactors = FALSE) # do not convert characters into factors
}
setnames(NewRow, "ID", ID)
if(length(InvariantColumns) > 0){ # if there are invariant columns
NewRow[,(InvariantColumns) := NA] # empty the invariant columns for the added rows
# Fill in the invariant columns in the added rows
NewRow <- FillinInvariantColumns(NewRow = NewRow,
InvariantColumns = InvariantColumns,
DataTree = DataTree,
IdTree = unique(DataTree[,get(ID)]))
}
# Multiply this new row the number of times as well as the number of absents
NewRows <- do.call("rbind", replicate(n = length(MissingCens), NewRow, simplify = FALSE))
NewRows[, Year := MissingCens]
# Add these rows in the dataset
DataTree <- rbindlist(list(DataTree, NewRows), use.names=TRUE, fill=TRUE)
DataTree[, CorrectedRecruit := ifelse(is.na(CorrectedRecruit), FALSE, CorrectedRecruit)] # FALSE for the other rows
DataTree <- DataTree[order(Year)] # order by Year
#### Linear regression (DBH ~ Year) #### with all the DBH values
coef <- stats::lm(
DataTree[!is.na(Diameter_TreeDataCor), Diameter_TreeDataCor] ~ DataTree[!is.na(Diameter_TreeDataCor), Year])$coefficients
if(is.na(coef[2]) | coef[2] %in% 0) { # if no slope
### if only 1 DBH value: replace all non-recruited DBH by this value (pas sure que ce soit une bonne idée)
DataTree[Year < RecruitYear, ("Diameter_TreeDataCor") := unique(DataTree[!is.na(Diameter_TreeDataCor), Diameter_TreeDataCor])] # DBHCor := coef[1] (ça donne des pptés à la valeur)
}else{
# Estimate the recruits DBHCor with linear extrapolation
RecruitsDBH <- coef[1] + DataTree[Year < RecruitYear, Year]*coef[2] # y = b + ax. Min entre ces DBH inférés et le 1er DBH
# If estimated DBHCors are higher than the first measured DBHCors, (comment c possible ?)
# these are replaced by first measured DBHCors.
for(y in 1: length(RecruitsDBH)){
RecruitsDBH[y] <- min(RecruitsDBH[y], FirstDBH)
}
DataTree[Year < RecruitYear, ("Diameter_TreeDataCor") := RecruitsDBH]
}
# UselessRows: added trees under the MinDBH
UselessRows <- ((DataTree[, Diameter_TreeDataCor] < MinDBH) & (DataTree[, Year] %in% MissingCens))
if(any(UselessRows)){
DataTree <- DataTree[-which(UselessRows)] # remove them
}
} # end: missing censuses to add
} # end: DetectOnly == FALSE
} # end: overgrown recruit
} # end: if the plot have previous censuses
# If no 'Diameter_TreeDataCor' initially in the dataset
if(!"Diameter_TreeDataCor" %in% names(InitialDT)){
if(DetectOnly %in% TRUE) DataTree[, Diameter_TreeDataCor := NULL] # if detect only, remove Diameter_TreeDataCor if it didn't exist before
if(DetectOnly %in% FALSE)
DataTree[, Diameter_TreeDataCor := ifelse(CorrectedRecruit %in% FALSE, NA, Diameter_TreeDataCor)] # keep only recruitment correction
}
DataTree[is.na(Comment), ("Comment") := ""] # NAs come from NewRow
return(DataTree)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.