library(forestNETN)
library(tidyverse)
#----- Testing the import/export functions -----
#importData(instance = 'local', server = "localhost", new_env = T) # release 1.0.22 on 4/22
#importData(instance = 'server', server = "INP2300VTSQL16\\IRMADEV1", name = "NETN_Forest_Migration", new_env = T) #
path = "C:/Forest_Health/exports/NETN"
#exportCSV(path, zip = TRUE)
importCSV(path = path, zip_name = "NETN_Forest_20210507.zip") # migration check 5/7
#importCSV(path = path, zip_name = "NETN_Forest_20210503.zip") #release 1.0.22 on 4/26 ##with IsGerminant added back by hand
# Function arguments
park = 'all'
from = 2006
to = 2019
QAQC = TRUE
locType = "all"
eventType = 'all'
panels = 1:4
last_lab_year = 2019
arglist <- list(park = park, from = from, to = to, QAQC = QAQC, panels = panels,
locType = locType, eventType = eventType)
compev_arglist <- list(park = park, from = from, to = to, QAQC = QAQC, panels = panels,
locType = locType)
env = VIEWS_NETN
# Checking data function
check_data <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("Plot_Name", "StartYear", col1, col2)]}
)) %>% bind_rows()
}
# For some reason R crashes if I do this after the import from old database
library(RODBC)
db <- RODBC::odbcConnect("NETNFVM") #
disturb<-sqlFetch(db,"tbl_Disturbances")
disttlu<-sqlFetch(db,"tlu_Disturbance_Codes")
disttlutc<-sqlFetch(db,"tlu_Disturbance_Threshhold_Codes")
odbcClose(db)
# import old database for comparisons
forestNETNarch::importData(type='file',
path='D:/NETN/Monitoring_Projects/Forest_Health/Database/2021_Forest_Database/Forest_Backend_NETN_20210503_Migration.mdb')
# Benchmarking example
names(VIEWS_NETN)
trees <- get("COMN_TreesByEvent", VIEWS_NETN)
head(trees)
library(microbenchmark)
microbenchmark(subset(trees, ParkUnit == "ACAD"),
filter(trees, ParkUnit == "ACAD"), times = 10)
#Unit: milliseconds
#expr min lq mean median uq max neval
#subset(trees, ParkUnit == "ACAD") 9.5459 9.8856 10.02015 10.05535 10.1789 10.3859 10
#filter(trees, ParkUnit == "ACAD") 2.0483 2.0950 2.99471 2.18675 2.2793 10.4267 10
#----- Testing joinLocEvent and migration -----
plotevs_old <- do.call(forestNETNarch::joinLocEvent, c(arglist, output = 'verbose'))
plotevs_new <- do.call(joinLocEvent, arglist)
debug(joinLocEvent)
joinLocEvent(park = "ACAD", from = 2006, to = 2019)
names(plotevs_new)
nrow(plotevs_old) #1280
nrow(plotevs_new) #1280
microbenchmark::microbenchmark(do.call(forestNETNarch::joinLocEvent, c(arglist, output = 'verbose')),
do.call(joinLocEvent, arglist), times = 10)
pe_merge <- full_join(plotevs_new, plotevs_old, by = c("EventLegacyID" = "Event_ID", "Plot_Name" = "Plot_Name"))
check_data(pe_merge, "ParkSubUnit", "Unit_ID") #0
check_data(pe_merge,"xCoordinate", "X_Coord")#0
check_data(pe_merge,"yCoordinate", "Y_Coord")#0
#check_data(pe_merge,"StartDate", "Start_Date") # they're diff. format, so all show as different
check_data(pe_merge,"PanelCode", "Panel")#0
check_data(pe_merge,"Event_QAQC", "IsQAQC")#0
#test <- check_data(pe_merge,"ZoneCode", "UTM_Zone") # diff b/c one has N.
table(pe_merge$ZoneCode, pe_merge$UTM_Zone)#ok
check_data(pe_merge,"Orientation.x", "Orientation.y")#0
check_data(pe_merge,"cycle.x", "cycle.y")#0
check_data(pe_merge,"PlotTypeCode", "Loc_Type")#0
check_data(pe_merge,"PlotLegacyID", "Location_ID")#0
check_data(pe_merge,"Aspect.x", "Aspect.y")#0
check_data(pe_merge,"PhysiographyCode", "Physiographic_Class")#0
plot_check <- unique(pe_merge[, c("ParkUnit", "Plot_Name")])
table(plot_check$ParkUnit) # That's the correct # of plots/park
#ACAD MABI MIMA MORR ROVA SAGA SARA WEFA
#176 24 20 29 40 21 32 10
dir_dif <- check_data(pe_merge, "Directions.x", "Directions.y")
# No issues remaining
#----- Stand Views -----
stand_new <- do.call(joinStandData, arglist)
stand_old <- do.call(forestNETNarch::joinStandData, arglist)
stand_old2 <- merge(stand_old, stand[,c("Event_ID", "Deer_Browse_Line_pre09_ID")],
by = "Event_ID", all.x = T, all.y = T)
stand_merge <- full_join(stand_new, stand_old2, by = c("Plot_Name" = "Plot_Name", "IsQAQC" = "Event_QAQC",
"StartYear" = "Year"))
check_data(stand_merge,"Panel", "PanelCode") # Only ACAD-029-2010
check_data(stand_merge, "Stand_Structure.x", "Stand_Structure.y") #0
check_data(stand_merge, "Stand_Structure_Code", "Stand_Structure_ID") #0
check_data(stand_merge, "Pct_Crown_Closure.x", "Pct_Crown_Closure.y") #0
check_data(stand_merge, "Pct_Understory_Low.x", "Pct_Understory_Low.y") #0
check_data(stand_merge, "Pct_Understory_Mid.x", "Pct_Understory_Mid.y") #0
check_data(stand_merge, "Pct_Understory_High.x", "Pct_Understory_High.y") #0
check_data(stand_merge, "Pct_Bare_Soil", "Pct_Bare_Soil_Cover") #0
check_data(stand_merge, "Pct_Rock", "Pct_Rock_Cover")#0
check_data(stand_merge, "Pct_Lichen", "Pct_Lichen_Cover")#0
check_data(stand_merge, "Pct_Bryophyte", "Pct_Bryophyte_Cover")#0
check_data(stand_merge, "Pct_Water", "Pct_Surface_Water_Cover")#0
check_data(stand_merge, "Pct_Bare_Soil", "Pct_Bare_Soil_Cover")#0
check_data(stand_merge, "Pct_Trampled", "Pct_Trampled_Cover")#0
check_data(stand_merge, "IsStuntedWoodland", "Stunted_Woodland") # Only ACAD-029-2010
check_data(stand_merge, "Earthworms.x", "Earthworms.y")#0
check_data(stand_merge, "Microtopography", "Microtopography_ID")#0
check_data(stand_merge, "Deer_Browse_Index", "Deer_Browse_Line_ID")
table(stand_merge$Deer_Browse_Label, stand_merge$StartYear, useNA = 'always') # All are 5s that are recorded pre 2010 as DBL pres
table(stand_merge$Deer_Browse_Label, stand_merge$Deer_Browse_Line_pre09_ID) # Either NC or Very High. OK
table(stand_merge$Deer_Browse_Label, stand_merge$Deer_Browse_Line_ID) # Labels match codes. OK
stand_merge %>% mutate(slp_dif = abs(PlotSlope - Plot_Slope_Deg)) %>%
select(Plot_Name, StartYear, IsQAQC, EventID, PlotSlope, Plot_Slope_Deg, slp_dif) %>%
filter(slp_dif > 0.1) # All QAQC events where slopes were entered. Don't care.
#check_data(stand_merge, "PlotSlope", "Plot_Slope_Deg") # Lots due to rounding, but fine
stand_merge %>% mutate(slp_dif = abs(PlotSlope - Plot_Slope_Deg)) %>%
select(Plot_Name, StartYear, IsQAQC, EventID, PlotSlope, Plot_Slope_Deg, slp_dif) %>%
filter(slp_dif > 0.5) # Only differences is a QAQC event, which we don't care about
#----- Stand Disturbance -----
stdist_new <- do.call(joinStandDisturbance, arglist)
#--- Tends to crash if run after old DB import, so moved code to beginning
# library(RODBC)
# db <- RODBC::odbcConnect("NETNFVM") #
# disturb<-sqlFetch(db,"tbl_Disturbances")
# disttlu<-sqlFetch(db,"tlu_Disturbance_Codes")
# disttlutc<-sqlFetch(db,"tlu_Disturbance_Threshhold_Codes")
# odbcClose(db)
st_dist_o <- merge(plotevs_old[, c("Plot_Name", "Event_ID", "Event_QAQC", "Year")],
disturb[, c(2:6)], by = "Event_ID", all.x = T, all.y = T)
dist_merge <- merge(stdist_new, st_dist_o,
by.x = c("Plot_Name", "StartYear", "IsQAQC"),#, "Disturbance.Code"),
by.y = c("Plot_Name", "Year", "Event_QAQC"),#, "Disturbance_Code"),
all.x = T, all.y = T)
dist_merge[which(!complete.cases(dist_merge$DisturbanceCode)),] # SARA.915. OK
check_data(dist_merge, "Disturbance_Threshold", "ThresholdCode") %>% arrange(Plot_Name) #18 All dups sorted wrong or SARA-915 (NA)
check_data(dist_merge, "Disturbance_Notes", "DisturbanceNote") #11 All dups sorted wrong, or SARA-915 (NA)
check_data(dist_merge, "DisturbanceCode", "Disturbance_Code") %>% arrange(Plot_Name) # All dups that sorted wrong or SARA-915 (NA)
#+++++ No remaining disturbance issues
#----- Tree Height -----
#ht_dif <-
stand_merge %>% mutate(cod_dif = abs(Avg_Height_Codom - Avg_Codom_HT),
int_dif = abs(Avg_Height_Inter - Avg_Inter_HT)) %>%
select(Plot_Name, StartYear, IsQAQC, Avg_Height_Codom, Avg_Codom_HT,
cod_dif, Avg_Height_Inter, Avg_Inter_HT, int_dif) %>%
filter(cod_dif > 0.5 | int_dif > 0.5)
#0 Tree height issues resolved
tree_ht_old <- stand %>% select(Event_ID, Stand_Structure_ID, Stunted_Woodland, Tree_1_Number_Codom:Height_Tree_3_Inter) %>%
filter(Stand_Structure_ID != 5) %>% filter(Stunted_Woodland == 0)
tree_ht2 <- merge(plotevs_old[,c("Event_ID", "Plot_Name", "Year", "Event_QAQC")],
tree_ht_old, by = "Event_ID", all.x = FALSE, all.y = TRUE) %>% filter(!is.na(Plot_Name))
tree_ht_w1 <- tree_ht2 %>% select(Event_ID:Tree_3_Number_Inter) %>%
pivot_longer(cols = c(Tree_1_Number_Codom:Tree_3_Number_Inter),
names_to = "Samp",
values_to = "Tree_Number") %>%
mutate(Samp_Num = case_when(str_detect(Samp, "_1_") ~ 1L,
str_detect(Samp, "_2_") ~ 2L,
str_detect(Samp, "_3_") ~ 3L),
Crown = ifelse(str_detect(Samp, "Codom"), "Codom", "Inter")
)
tree_ht_w2 <- tree_ht2 %>% select(Event_ID:Event_QAQC, Height_Tree_1_Codom:Height_Tree_3_Inter) %>%
pivot_longer(cols = c(Height_Tree_1_Codom:Height_Tree_3_Inter),
names_to = "Samp_ht",
values_to = "Height_m") %>%
mutate(Samp_Num = case_when(str_detect(Samp_ht, "_1_") ~ 1L,
str_detect(Samp_ht, "_2_") ~ 2L,
str_detect(Samp_ht, "_3_") ~ 3L),
Crown = ifelse(str_detect(Samp_ht, "Codom"), "Codom", "Inter")
)
tree_ht3 <- merge(tree_ht_w1, tree_ht_w2, by = c("Event_ID", "Plot_Name", "Year", "Event_QAQC", "Samp_Num", "Crown"),
all.x = T, all.y = T) %>% select(-Samp, -Samp_ht) %>% filter(!is.na(Height_m))
tree_ht_old_sum <- tree_ht3 %>% group_by(Plot_Name, Year, Event_QAQC, Crown) %>%
summarize(total_ht_old = sum(Height_m, na.rm =T),
num_trees_old = sum(!is.na(Height_m)),
.groups = 'drop')
tree_ht_vw <- get("COMN_StandTreeHeights", envir = VIEWS_NETN)#[, -c(18:24, 27)]
tree_ht_vw$Plot_Name <- paste(tree_ht_vw$ParkUnit, sprintf("%03d", tree_ht_vw$PlotCode), sep = "-")
tree_ht_vw$Event_QAQC <- ifelse(tree_ht_vw$IsQAQC == 0, FALSE, TRUE)
tree_ht_vw_sum <- tree_ht_vw %>% group_by(Plot_Name, StartYear, Event_QAQC, EventID, CrownClassLabel) %>%
summarize(total_ht_new = sum(Height),
num_trees_new = sum(!is.na(Height)),
.groups = 'drop') %>%
mutate(Crown = ifelse(CrownClassLabel == "Intermediate", "Inter", "Codom"))
tree_height_comps <- merge(tree_ht_vw_sum,
tree_ht_old_sum,
by.x = c("Plot_Name", "StartYear", "Event_QAQC", "Crown"),
by.y = c("Plot_Name", "Year", "Event_QAQC", "Crown"),
all.x = T, all.y = T) %>%
mutate(ht_diff = abs(total_ht_new - total_ht_old),
n_tree_diff = num_trees_new - num_trees_old)
tree_height_comps %>% filter(ht_diff > 0.1) # Only 4 plots with ht diff > 0.1. All appear to be rounding related. OK.
tree_height_comps %>% filter(n_tree_diff == -1) # No missing tree heights
#+++++ No remaining stand height issues +++++
#----- CWD -----
cwd_old <- do.call(forestNETNarch::joinCWDData, arglist) %>% mutate(ScientificName = ifelse(Latin_Name == "No species recorded",
"None present", Latin_Name))
cwd_new <- do.call(joinCWDData, arglist)
cwd_merge <- merge(cwd_new, cwd_old, by.x = c("Plot_Name", "StartYear", "IsQAQC", "ScientificName", "DecayClassCode"),
by.y = c("Plot_Name", "Year", "Event_QAQC", "ScientificName", "Decay_Class_ID"),
all = TRUE)
cwd_merge %>% mutate(cwd_dif = abs(CWD_Vol.x - CWD_Vol.y)) %>% filter(cwd_dif > 0.05 & IsQAQC == FALSE) %>%
select(Plot_Name:ScientificName, DecayClassCode, CWD_Vol.x, CWD_Vol.y, cwd_dif)
#9 All are small differences due to rounding
#+++++ No remaining CWD issues +++++
#----- Tree Data
tree_old <- do.call(forestNETNarch::joinTreeData, c(arglist, list(speciesType = 'all', status = 'all'))) %>%
mutate(TagCode = as.numeric(Tree_Number_NETN))
tree_new <- do.call(joinTreeData, c(arglist, list(speciesType = 'all', status = 'all')))
table(tree_new$HWACode, tree_new$ScientificName, useNA = 'always') # Only values for TSUCAN. Good!
non_tsucan <- tree_new %>% filter(ScientificName != "Tsuga canadensis")
table(non_tsucan$HWACode, non_tsucan$StartYear, useNA = 'always') # All NA. Good!
table(tree_new$BBDCode, tree_new$ScientificName, useNA = 'always') # Only values for FAGGRA. Good!
non_faggra <- tree_new %>% filter(ScientificName != "Fagus grandifolia")
table(non_faggra$BBDCode, non_faggra$StartYear, useNA = 'always')# All NA. Good!
tree_merge <- full_join(tree_new, tree_old, by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"IsQAQC" = "Event_QAQC",
"TagCode" = "TagCode"),
suffix = c("_new", "_old")
)
check_trees <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("Plot_Name", "StartYear", "TagCode", "IsQAQC", "ScientificName",
"Status_ID", "TreeStatusCode",col1, col2)]}
)) %>% bind_rows()
}
check_trees(tree_merge, "TSN_new", "TSN_old") #SARA-015-2012 trees that weren't located or EX after plot reset. OK.
#check_trees(tree_merge, "Distance_new", "Distance_old") # too many rounding differences. Need dif. check.
check_trees(tree_merge, "Azimuth_new", "Azimuth_old") #SARA-015-2012 trees that were NL or EX after plot reset. OK.
check_trees(tree_merge, "Fork_new", "Fork_old") #0
tree_merge %>% mutate(dist_diff = abs(Distance_new - Distance_old)) %>%
filter(dist_diff > 0.1) #0
tree_merge %>% mutate(dbh_diff = abs(DBH - DBHcm)) %>%
filter(dbh_diff > 0) %>%
select(Plot_Name, StartYear, IsQAQC, TagCode, ScientificName, DBHcm, DBH, dbh_diff)
# Only big difference is bad data in ACAD-019-2006 Tag 30 in old db not migrating to new.
# Remaining differences are cleaner rounding in new db. Good.
check_trees(tree_merge, "TreeStatusCode", "Status_ID")
# differences are ACAD-029-2010 getting PMs correctly
# bad ACAD-019-2006 Tag 30 not migrating.
# dropped SARA-015-2012 trees that were NL or EX after plot reset. All good.
crown_check <- check_trees(tree_merge, "Crown_Class_ID", "CrownClassCode")
table(crown_check$Status_ID, crown_check$Crown_Class_ID, useNA = 'always')
# Differences are because dead and EX trees were correctly scrubbed of their crown class.
tree_simp <- tree_merge %>% select(Plot_Name, StartYear, IsQAQC, TagCode, ScientificName,
TreeStatusCode, Status_ID, CrownClassCode, Crown_Class_ID)
table(tree_simp$CrownClassCode, tree_simp$Crown_Class_ID, useNA = 'always')
# Only rogue tree (4/3) in table is ACAD-019 Tag 30 not migrating correctly. OK.
check_trees(tree_merge, "Decay_Class_ID", "DecayClassCode")
# No issues. Decays that are diff are DF and AS, which correctly shouldn't have a decay class
table(tree_merge$IsDBHVerified, tree_merge$DBH_Verified, useNA = 'always') # Align with diagonal. OK
table(tree_merge$Pct_Tot_Foliage_Cond, tree_merge$Total_Foliage_Condition, tree_merge$StartYear, useNA = 'always')
# 4/26: 7 records in 2011 and 1 in 2012 that have a 0 % Foliage still
# 5/5: No issues
tree_simp <- tree_merge %>% select(Plot_Name, StartYear, IsQAQC, TagCode, Pct_Tot_Foliage_Cond, Total_Foliage_Condition)
table(tree_simp$Pct_Tot_Foliage_Cond, tree_simp$Total_Foliage_Condition, useNA = 'always')
fol_view <- get("COMN_TreesFoliageCond", envir = VIEWS_NETN) #%>% select(ParkUnit, PlotCode, IsQAQC, StartYear, TagCode,
# FoliageConditionLabel, TotalFoliageConditionLabel)
fol_simp <- fol_view %>% select(ParkUnit, PlotCode, StartYear, IsQAQC, TagCode, PercentLeavesLabel, TotalFoliageConditionLabel)
table(fol_simp$PercentLeavesLabel, fol_simp$TotalFoliageConditionLabel, useNA = 'always')
tree_view <- VIEWS_NETN$COMN_TreesFoliageCond
names(tree_view)
table(tree_view$FoliageConditionLabel, tree_view$TotalFoliageConditionLabel, useNA = 'always')
# There's actually a dead tree with Leaf Loss: ACAD-039-2007 Tag 9. I deleted it in 20210503 migration
tree_merge %>% filter(Pct_Tot_Foliage_Cond == 0) %>% arrange(Plot_Name, StartYear) %>%
select(Plot_Name, StartYear, IsQAQC, TagCode, ScientificName, TreeStatusCode, CrownClassCode,
Pct_Tot_Foliage_Cond, Total_Foliage_Condition)
# These trees appear to have had foliage conditions recorded, but the Total Foliage was recorded as 0%.
# 6 were fixed b/c only 1 condition was reported, so applied its percent. 2 were deleted and should migrate as PM.
# 5/3 0 records. Issues resolved
#write.csv(fol_check, "./testing_scripts/tot_fol_still_0.csv", row.names = F)
# +++ Tree data finished checking.
#----- Tree Foliage Conditions -----
fol_vw <- VIEWS_NETN$COMN_TreesFoliageCond
table(fol_vw$TotalFoliageConditionCode, fol_vw$StartYear, useNA = 'always')
# 8 records in 2011 and 1 in 2012 where Total Foliage was recorded as 0, but there were conditions recorded.
# Already fixed in previous section. Should migrate 2 PM and the rest with non-0 values in next migration.
# 5/3 Issue resolved
table(fol_vw$PercentLeafAreaLabel, fol_vw$StartYear, useNA = 'always') # LA NC for 2006-2015; 14 PM from 2016 & 2017 correct
nrow(fol_vw %>% filter(PercentLeafAreaLabel == "Not Collected" )) #18556 additional rows for NC
table(fol_vw$PercentLeafAreaLabel, fol_vw$FoliageConditionCode, useNA = 'always') # NA correctly applied to L, NO, S, W
table(fol_vw$TotalFoliageConditionLabel, fol_vw$FoliageConditionCode, useNA = 'always')
# 4/22 14 0 TotFol when foliage conditions exist. Not clear why this is happening. NO to NotApp is correct.
# 4/26 9 TotFol with 0% and foliage conditions pres. All but 3 were fixed b/c only 1 condition recorded.
# Rest should be PM in next migration.
# 5/3 No 0s remaining
pm_fol <- fol_vw %>% filter(TotalFoliageConditionCode == "PM")
pm_fol$Plot_Name <- paste(pm_fol$ParkUnit, sprintf("%03d", pm_fol$PlotCode), sep = "-")
write.csv(pm_fol, "./testing_scripts/PM_totfoliage.csv")
#no_folcond_with_totfol_0p <-
fol_vw %>% filter(FoliageConditionCode == "NO" & TotalFoliageConditionCode %in% c(1:4)) %>%
select(ParkUnit, PlotCode, StartYear, IsQAQC, TagCode, ScientificName, TreeStatusCode,
FoliageConditionCode, TotalFoliageConditionCode)
# 287 missing foliage conditions were converted to NO instead of PM (see no_folcond_with_totfol_0p)
# Instead totfol should be kept as is and foliage conditions should come in as PM.
# Remains an issue. See line 192 in legacy migration data questions spreadsheet.
# No issues remain
table(fol_vw$PercentLeavesLabel, fol_vw$FoliageConditionCode, useNA = 'always')# 1 L with 0 and 4 N with 0. NO to Not Applicable correct.
# 5/3 no issues remain
fol_vw %>% filter(PercentLeavesLabel == "0%" & FoliageConditionCode %in% c("L", "N")) %>% arrange(ParkUnit, PlotCode, StartYear, TagCode) %>%
select(ParkUnit, PlotCode, StartYear, IsQAQC, TagCode, ScientificName, TreeStatusCode,
PercentLeavesLabel, FoliageConditionCode, TotalFoliageConditionCode) #0
table(fol_vw$FoliageConditionCode, fol_vw$PercentLeavesLabel)
table(fol_vw$TotalFoliageConditionLabel, fol_vw$PercentLeavesLabel, fol_vw$StartYear)
# 2012 1 0%; 2011 7 0%. Should be fixed in next migration (see previous section)
# 5/3 no issues remaining
fol_vw %>% filter(TotalFoliageConditionLabel == "0%") %>%
select(ParkUnit, PlotCode, StartYear, IsQAQC, TagCode, ScientificName, TreeStatusCode,
FoliageConditionCode, TotalFoliageConditionLabel) %>%
arrange(ParkUnit, PlotCode, StartYear, TagCode)
# 9 records with 0% TotFolConLabel that should be > 0 b/c conditions recorded. Fixed all but 3, which should migrate as PM.
# 5/3 0 issues remain
compev_arglist <- list(park = park, from = from, to = to, QAQC = QAQC, panels = panels,
locType = locType)
fol_new <- do.call(joinTreeFoliageCond, compev_arglist)
fol_old1 <- xrfolcond %>% mutate(Cond = case_when(Foliage_Condition_ID == 1 ~ "C",
Foliage_Condition_ID == 2 ~ "N",
Foliage_Condition_ID == 3 ~ "H",
Foliage_Condition_ID == 4 ~ "S",
Foliage_Condition_ID == 5 ~ "W",
Foliage_Condition_ID == 6 ~ "L",
Foliage_Condition_ID == 7 ~ "O",
Foliage_Condition_ID == 8 ~ "NO",
TRUE ~ NA_character_),
Pct_Leaves_Aff = case_when(Foliage_Condition_Percent == 0 ~ 0,
Foliage_Condition_Percent == 1 ~ 5.5,
Foliage_Condition_Percent == 2 ~ 30,
Foliage_Condition_Percent == 3 ~ 70,
Foliage_Condition_Percent == 4 ~ 95,
TRUE ~ NA_real_),
Pct_Leaf_Area = case_when(Leaf_Area_Percent == 0 ~ 0,
Leaf_Area_Percent == 1 ~ 5.5,
Leaf_Area_Percent == 2 ~ 30,
Leaf_Area_Percent == 3 ~ 70,
Leaf_Area_Percent == 4 ~ 95,
TRUE ~ NA_real_)
)
tree_evs_old <- left_join(plotevs_old, trees %>% select(Tree_ID:Tree_Notes),
by = intersect(names(plotevs_old), names(trees %>% select(Tree_ID:Tree_Notes)))) %>%
left_join(., treedata %>% select(Tree_Data_ID:Notes),
by = intersect(names(.), names(treedata %>% select(Tree_Data_ID:Notes)))) %>%
select(Plot_Name, Year, Event_QAQC, Event_ID, Tree_ID, Tree_Number_NETN, Total_Foliage_Condition,
Tree_Data_ID, Status_ID) %>% filter(Status_ID %in% c("1", "AB", "AF", "AL", "AM", "AS",
"RB", "RF", "RL", "RS"))
names(tree_evs_old)
fol_old <- left_join(tree_evs_old, fol_old1, by = "Tree_Data_ID") %>%
select(Plot_Name, Year, Event_QAQC, Tree_Number_NETN, Status_ID, Cond, Pct_Leaves_Aff, Pct_Leaf_Area) %>%
arrange(Plot_Name, Year, Event_QAQC, Tree_Number_NETN) %>%
pivot_wider(names_from = Cond,
values_from = c(Pct_Leaves_Aff, Pct_Leaf_Area),
values_fill = NA_real_) %>%
mutate(TagCode = as.numeric(Tree_Number_NETN)) %>%
select(-Pct_Leaves_Aff_NA, -Pct_Leaf_Area_NA, -Pct_Leaf_Area_W, -Pct_Leaf_Area_O, -Pct_Leaf_Area_S)
nrow(fol_old) #23188
nrow(fol_new) #23189
fol_merge <- full_join(fol_new, fol_old,
by = c("Plot_Name" = "Plot_Name", "StartYear" = "Year", "IsQAQC" = "Event_QAQC", "TagCode" = "TagCode"),
suffix = c("_new", "_old"))
head(fol_merge)
check_conds <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("Plot_Name", "StartYear", "TagCode", "Status_ID", "IsQAQC", col1, col2)]}
)) %>% bind_rows()
}
names(fol_merge)
table(fol_merge$Pct_Tot_Foliage_Cond)
# 0s in _new because they were filled in function
lvs_C <- check_conds(fol_merge, "Pct_Leaves_Aff_C_new", "Pct_Leaves_Aff_C_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(lvs_C$Pct_Leaves_Aff_C_new, lvs_C$Pct_Leaves_Aff_C_old, useNA = 'always')
lvs_H <- check_conds(fol_merge, "Pct_Leaves_Aff_H_new", "Pct_Leaves_Aff_H_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(lvs_H$Pct_Leaves_Aff_H_new, lvs_H$Pct_Leaves_Aff_H_old, useNA = 'always')
lvs_L <- check_conds(fol_merge, "Pct_Leaves_Aff_L_new", "Pct_Leaves_Aff_L_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(lvs_L$Pct_Leaves_Aff_L_new, lvs_L$Pct_Leaves_Aff_L_old, useNA = 'always')
lvs_N <- check_conds(fol_merge, "Pct_Leaves_Aff_N_new", "Pct_Leaves_Aff_N_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(lvs_N$Pct_Leaves_Aff_N_new, lvs_N$Pct_Leaves_Aff_N_old, useNA = 'always')
lvs_S <- check_conds(fol_merge, "Pct_Leaves_Aff_S_new", "Pct_Leaves_Aff_S_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(lvs_S$Pct_Leaves_Aff_S_new, lvs_S$Pct_Leaves_Aff_S_old, useNA = 'always')
lvs_W <- check_conds(fol_merge, "Pct_Leaves_Aff_W_new", "Pct_Leaves_Aff_W_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(lvs_W$Pct_Leaves_Aff_W_new, lvs_W$Pct_Leaves_Aff_W_old, useNA = 'always')
lvs_O <- check_conds(fol_merge, "Pct_Leaves_Aff_O_new", "Pct_Leaves_Aff_O_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(lvs_O$Pct_Leaves_Aff_O_new, lvs_O$Pct_Leaves_Aff_O_old, useNA = 'always')
la_C <- check_conds(fol_merge, "Pct_Leaf_Area_C_new", "Pct_Leaf_Area_C_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(la_C$Pct_Leaf_Area_C_new, la_C$Pct_Leaf_Area_C_old, useNA = 'always')
la_H <- check_conds(fol_merge, "Pct_Leaf_Area_H_new", "Pct_Leaf_Area_H_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(la_H$Pct_Leaf_Area_H_new, la_H$Pct_Leaf_Area_H_old, useNA = 'always')
la_N <- check_conds(fol_merge, "Pct_Leaf_Area_N_new", "Pct_Leaf_Area_N_old") # all diffs are NAs converted to 0s. I think they should still be NA.
table(la_N$Pct_Leaf_Area_N_new, la_N$Pct_Leaf_Area_N_old, useNA = 'always')
# +++++ The remaining issue is when Total_Foliage_Condition > 0 but no foliage conditions.
# Foliage conditions are getting converted to NO instead of PM in this case.
# 5/3 no issues remain
#----- Tree Conditions
# NETN old db condition counts
#H AD BBD BWA CAVL CAVS CW DBT EAB EB EHS G GM HWA ID NO OTH RPS SB VIN VOB
#15682 927 569 162 362 349 3880 2412 2 932 163 193 1 206 30 1854 136 2 2 422 35
table(VIEWS_NETN$COMN_TreesConditions$TreeConditionCode, useNA = 'always')
# Not quite the same. New DB has fewer when they differ
# H AD BBD BWA CAVL CAVS CW DBT EAB EB EHS G GM HWA ID NO OTH RPS SB VINE <NA>
# 15561 925 569 162 362 349 3869 2402 2 929 163 193 1 206 30 1851 131 2 2 455 3149
con_vw <- VIEWS_NETN$COMN_TreesConditions
table(VIEWS_NETN$COMN_TreesConditions$TreeConditionCode,
VIEWS_NETN$COMN_TreesConditions$StartYear, useNA = 'always')
table(con_vw$TreeStatusCode)
live <- c("1", "AB", "AF", "AL", "AM", "AS", "RB", "RF", "RL", "RS")
dead <- c("2", "DB", "DL", "DM", "DS")
con_vw$status_simp <- ifelse(con_vw$TreeStatusCode %in% live, "live",
ifelse(con_vw$TreeStatusCode %in% dead, "dead",
"inactive"))
table(con_vw$status_simp, con_vw$TreeConditionCode, con_vw$StartYear, useNA = "always")
# Still missing NC and PMs. Otherwise RPS issue resolved
# 5/3 resolved
dead_trees_with_conds <-
con_vw %>%
filter(TreeStatusCode %in% c("2", "DB", "DL", "DM", "DS")) #%>%
#filter(!TreeConditionCode %in% c("NO", "CAVS", "CAVL")) #%>%
#filter(!is.na(TreeConditionCode)) # 0 records
table(dead_trees_with_conds$TreeConditionCode, dead_trees_with_conds$StartYear, useNA = 'always')
# Still need NC for NAs <=2011 and PM for >2011 (1 record in 2013)
cond_new <- do.call(joinTreeConditions, c(compev_arglist, list(status = 'active')))
active <- c("1", "AB" ,"AF", "AL", "AS", "AM", "DB", "DL", "DM", "DS", "RB", "RF", "RL", "RS")
tree_evs_old <- left_join(plotevs_old, trees %>% select(Tree_ID:Tree_Notes),
by = intersect(names(plotevs_old), names(trees %>% select(Tree_ID:Tree_Notes)))) %>%
left_join(., treedata %>% select(Tree_Data_ID:Notes),
by = intersect(names(.), names(treedata %>% select(Tree_Data_ID:Notes)))) %>%
select(Plot_Name, Year, Event_QAQC, Event_ID, Tree_ID, Tree_Number_NETN, Total_Foliage_Condition,
Tree_Data_ID, Status_ID) %>% filter(Status_ID %in% active)
table(tree_evs_old$Status_ID)
tlucond <- read.csv("./testing_scripts/tlu_Tree_Conditions.csv")
cond_old1 <- left_join(xrtreecond %>% select(Tree_Data_ID, Tree_Condition_ID),
tlucond %>% select(Tree_Condition_ID, Code), by = c("Tree_Condition_ID")) %>%
right_join(tree_evs_old, ., by = intersect(names(tree_evs_old), names(.))) %>%
select(Plot_Name, Year, Event_QAQC, Tree_Number_NETN, Status_ID, Code) %>%
filter(!is.na(Code)) %>% filter(!is.na(Plot_Name)) %>%
arrange(Plot_Name, Year, Tree_Number_NETN) %>% # drop trees without conditions
mutate(pres = 1, TagCode = as.numeric(Tree_Number_NETN)) %>% select(-Tree_Number_NETN)
cond_old <- cond_old1 %>% mutate(pres = 1) %>% unique() %>%
pivot_wider(names_from = Code, values_from = pres, values_fill = 0)
cond_merge <- full_join(cond_new, cond_old, by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"IsQAQC" = "Event_QAQC",
"TagCode" = "TagCode"),
suffix = c("_new", "_old"))
cond_merge[,18:67][is.na(cond_merge[,18:67])] <- 0 # just so rowwise checking works. There were 0s in new and NAs in old
names(cond_merge)
check_conds(cond_merge, "H_new", "H_old") # 0
check_conds(cond_merge, "NO_new", "NO_old") # 0
check_conds(cond_merge, "AD_new", "AD_old") #0
# no ALB, BC, SOD so no check for it
check_conds(cond_merge, "BBD_new", "BBD_old") #0
check_conds(cond_merge, "BWA_new", "BWA_old") #0
check_conds(cond_merge, "CAVL_new", "CAVL_old") #0
check_conds(cond_merge, "CAVS_new", "CAVS_old") #0
check_conds(cond_merge, "CW_new", "CW_old") # 0
check_conds(cond_merge, "DBT_new", "DBT_old") #0
check_conds(cond_merge, "DOG_new", "DOG_old") #0
check_conds(cond_merge, "EAB_new", "EAB_old") #0
check_conds(cond_merge, "EB_new", "EB_old") #0
check_conds(cond_merge, "EHS_new", "EHS_old") #0
check_conds(cond_merge, "G_new", "G_old") #0
check_conds(cond_merge, "GM_new", "GM_old") #0
check_conds(cond_merge, "HWA_new", "HWA_old") #0
check_conds(cond_merge, "ID_new", "ID_old") #0
check_conds(cond_merge, "OTH_new", "OTH_old") #0
check_conds(cond_merge, "RPS_new", "RPS_old") #0
check_conds(cond_merge, "SB_new", "SB_old") #0
#++++++ Remaining issue: NC and PM for dead trees are currently NA.
#++++++ Missing conditions for live trees are NA instead of PM.
# 5/3 both issues resolved
#------ Vines -----
vines_new <- do.call(joinTreeVineSpecies, c(compev_arglist, speciesType = 'all')) %>%
select(Plot_Name, StartYear, IsQAQC, TagCode, VinePositionCode, VinePositionLabel, ScientificName, TSN)
names(vines_new)
#View(VIEWS_NETN$COMN_TreesVine)
vines_old <- left_join(xrtreecond %>% select(Tree_Data_ID, Tree_Condition_ID, Species_ID),
tlucond %>% select(Tree_Condition_ID, Code), by = c("Tree_Condition_ID")) %>%
right_join(tree_evs_old, ., by = intersect(names(tree_evs_old), names(.))) %>%
left_join(., plants %>% select(TSN, Latin_Name), by = c("Species_ID" = "TSN")) %>%
select(Plot_Name, Year, Event_QAQC, Tree_Number_NETN, Status_ID, Code, Species_ID, Latin_Name) %>%
filter(!is.na(Code)) %>% filter(!is.na(Plot_Name)) %>% filter(Code %in% c("VIN", "VOB")) %>%
arrange(Plot_Name, Year, Tree_Number_NETN) %>% # drop trees without conditions
mutate(TagCode = as.numeric(Tree_Number_NETN)) %>% select(-Tree_Number_NETN)
nrow(vines_old) #455
nrow(vines_new) #455
vines_merge <- full_join(vines_new, vines_old, by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"IsQAQC" = "Event_QAQC",
"TagCode" = "TagCode",
"TSN" = "Species_ID"),
suffix = c("_new", "_old"))
head(vines_merge)
table(vines_merge$VinePositionCode, vines_merge$StartYear, useNA = 'always') # B only >2019. good.
table(vines_merge$Code, vines_merge$StartYear, useNA = 'always') # Numbers are the same between old and new and correct by year
table(vines_merge$ScientificName, vines_merge$Latin_Name, useNA = 'always') # Looks good
# check if trees with multiple vines are migrating
mult_vines <- vines_new %>% group_by(Plot_Name, StartYear, IsQAQC, TagCode) %>% summarize(num_vines = n()) %>% filter(num_vines > 1)
# Multiple vines are now migrating into the database, which is great. The view is actually duplicating them, so there are 4 instead of 2.
# The xref table is correct.
#+++++ No issues remaining
#----- Quadrat Character -----
qchar_new <- joinQuadData(park = 'all', from = 2006, to = 2019, locType = 'all', eventType = 'all',
valueType = 'all', QAQC = T)
locev <- merge(loc, event, by = "Location_ID", all.x = T, all.y = T)
drops <- anti_join(locev, plotevs_old, by = "Event_ID")
plotevs_old <- forestNETNarch::joinLocEvent(park = 'all', from = 2006, to = 2019, eventType = 'all',
locType = 'all', QAQC = T, rejected = FALSE)
qchar_old <- merge(plotevs_old, quadchr, by = intersect(names(plotevs_old), names(quadchr)), all.x = T, all.y = F) %>%
select(Plot_Name,Year, Event_QAQC, Quadrat_ID, UC:UR)
quad_names = c("UC", "UR", "MR", "BR", "BC", "BL", "ML", "UL")
head(qchar_old)
qchar_old[ , quad_names][qchar_old[ , quad_names] == 1] <- 0.1
qchar_old[ , quad_names][qchar_old[ , quad_names] == 2] <- 1.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 3] <- 3.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 4] <- 7.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 5] <- 17.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 6] <- 37.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 7] <- 62.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 8] <- 85
qchar_old[ , quad_names][qchar_old[ , quad_names] == 9] <- 97.5
qchar_old <- qchar_old %>% mutate(Cover_Type = case_when(Quadrat_ID == 2 ~ "Soil",
Quadrat_ID == 3 ~ "Rock",
Quadrat_ID == 4 ~ "Stem",
Quadrat_ID == 5 ~ "Wood",
Quadrat_ID == 6 ~ "Sphagnum",
Quadrat_ID == 7 ~ "NonSphagnum",
Quadrat_ID == 8 ~ "Lichens")) %>% # 9 = Herbs MIDN
select(Plot_Name, Year, Event_QAQC, Cover_Type, UC, UR, MR, BR, BC, BL, ML, UL)
head(qchar_old)
table(qchar_old$Cover_Type, useNA = 'always') # Lichens are the only thing different b/c added later
table(qchar_new$CharacterLabel, useNA = 'always') # Lichens are NC for early years. OK.
table(qchar_new$Txt_Cov_BC, qchar_new$CharacterLabel, qchar_new$StartYear, useNA = 'always') # Make sure Lichens are NC for early years. They're all NC.
# Herbs shouldn't be in here for NETN! Issue line 206.
names(qchar_old)
names(qchar_new)
check_qchr <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("Plot_Name", "StartYear", "IsQAQC", "CharacterLabel", col1, col2)]}
)) %>% bind_rows()
}
quadchr_merge <- merge(qchar_new, qchar_old,
by.x = c("Plot_Name", "StartYear", "IsQAQC", "CharacterLabel"),
by.y = c("Plot_Name", "Year", "Event_QAQC", "Cover_Type"),
all.x = T, all.y = T)
check_qchr(quadchr_merge, "Pct_Cov_UC", "UC") # 0
check_qchr(quadchr_merge, "Pct_Cov_UR", "UR") # 0
check_qchr(quadchr_merge, "Pct_Cov_MR", "MR") # 0
check_qchr(quadchr_merge, "Pct_Cov_BR", "BR") # 0
check_qchr(quadchr_merge, "Pct_Cov_BC", "BC") # 0
check_qchr(quadchr_merge, "Pct_Cov_BL", "BL") # 0
check_qchr(quadchr_merge, "Pct_Cov_UL", "UL") # 0
qchar_vw <- get("COMN_QuadCharacter", envir = VIEWS_NETN)
table(qchar_vw$CharacterLabel)
# Herbs were dropped correctly.
#+++++ No remaining Quadrat character issues
#----- Quadrat Species -----
quadspp_old <- forestNETNarch::joinQuadData(from = 2006, to = 2019, QAQC = T, eventType = "all", locType = "all")
quadspp_new <- joinQuadSpecies(from = 2006, to = 2019,
QAQC = T, eventType = 'all', locType = 'all', valueType = 'midpoint')
nrow(quadspp_new)#25296
nrow(quadspp_old) #24858
#438 new quadspp rows b/c germinants have their own row
names(quadspp_new)
names(quadspp_old)
germs <- quadspp_old %>% filter(germ.cover > 0) %>% select(-avg.cover, -avg.freq) %>%
rename(avg.cover = germ.cover, avg.freq = germ.freq) %>% mutate(IsGerminant = 1)
rest <- quadspp_old %>% filter(germ.cover == 0) %>% select(-germ.cover, -germ.freq) %>% mutate(IsGerminant = 0)
quadspp_old2 <- rbind(rest, germs)
quadspp_merge <- full_join(quadspp_new %>% filter(IsGerminant == FALSE),
quadspp_old2,
by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"IsQAQC" = "Event_QAQC",
"TSN" = "TSN",
"IsGerminant" = "IsGerminant")) %>%
select(Plot_Name, PlotID, EventID, StartYear, cycle.x, cycle.y, IsQAQC,
IsGerminant, Confidence, TSN, ScientificName, Latin_Name,
num_quads, quad_avg_cov,
quad_pct_freq, avg.cover, avg.freq, Pct_Cov_UC: Pct_Cov_UL,
UC, UR, MR, BR, BC, BL, ML, UL)
check_qspp <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("PlotID", "EventID", "Plot_Name", "Plot_Name2", "StartYear", "IsQAQC", "ScientificName", "Latin_Name",
"IsGerminant", col1, col2)]}
)) %>% bind_rows()
}
# Returns 0 records now
quadspp_merge %>% mutate(cov_diff = quad_avg_cov - avg.cover) %>%
filter(cov_diff > 0.01) %>%
select(PlotID, EventID, Plot_Name, StartYear, IsQAQC, TSN, ScientificName, num_quads,
quad_avg_cov, avg.cover, quad_pct_freq, avg.freq, cov_diff) #0 records
# Returns 0 records
quadspp_merge %>% mutate(freq_diff = quad_pct_freq - 100*(avg.freq)) %>%
filter(freq_diff > 0.01) %>%
select(PlotID, EventID, Plot_Name, StartYear, IsQAQC, TSN, ScientificName, num_quads,
quad_avg_cov, avg.cover, quad_pct_freq, avg.freq, freq_diff)
# Unknown Spp ##s were swapped for 5 visits in 2007 that had multiple unknowns of a type (eg herb).
# No issues remain
quadsamp$numQuads <- apply(quadsamp[,c(15:22)], 1, sum)
quads1 <- merge(plotevs_old, quadsamp[, c("Event_ID", "numQuads")], all = TRUE)
quadspp <- merge(quads[,c("Event_ID","TSN","Germinant","qUC_Cover_Class_ID","qUL_Cover_Class_ID",
"qML_Cover_Class_ID", "qBL_Cover_Class_ID","qBC_Cover_Class_ID","qBR_Cover_Class_ID",
"qMR_Cover_Class_ID","qUR_Cover_Class_ID")],
plants[,c("TSN","Latin_Name")],
by = "TSN", all.x = T) %>% filter(Event_ID != "4AFBA34C-83F8-4F67-8B7C-8F6E096AB21D")
new_quads <- c("Pct_Cov_UC", "Pct_Cov_UR", "Pct_Cov_MR", "Pct_Cov_BR", "Pct_Cov_BC",
"Pct_Cov_BL", "Pct_Cov_ML", "Pct_Cov_UL")
quads2 <- merge(quads1, quadspp, by = "Event_ID", all.x = T)
quads2[,15:22][quads2[,15:22]==1]<-0.1
quads2[,15:22][quads2[,15:22]==2]<-1.5
quads2[,15:22][quads2[,15:22]==3]<-3.5
quads2[,15:22][quads2[,15:22]==4]<-7.5
quads2[,15:22][quads2[,15:22]==5]<-17.5
quads2[,15:22][quads2[,15:22]==6]<-37.5
quads2[,15:22][quads2[,15:22]==7]<-62.5
quads2[,15:22][quads2[,15:22]==8]<-85
quads2[,15:22][quads2[,15:22]==9]<-97.5
old.names<-names(quads2[,15:22])
new.names<-c('UC','UL','ML','BL','BC','BR','MR','UR')
quads2<-quads2 %>% rename_at(all_of(vars(old.names)),~new.names)
quads2[,c(15:22)][is.na(quads2[,c(15:22)])]<-0
quads2$Plot_Name2 <- quads2$Plot_Name
quadspp_merge <- full_join(quadspp_new,
quads2 %>% select(Plot_Name, Plot_Name2, Year, Event_QAQC, Germinant, numQuads,
TSN, Latin_Name, UC:UR),
by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"IsQAQC" = "Event_QAQC",
"IsGerminant" = "Germinant",
"TSN" = "TSN")) %>% filter(!is.na(Plot_Name)) # drops 1 mostly NA record came in from old
head(quadspp_merge)
check_qspp(quadspp_merge, "Pct_Cov_UC", "UC") # ACAD-029. ok
check_qspp(quadspp_merge, "Pct_Cov_UR", "UR") # ACAD-029. ok
check_qspp(quadspp_merge, "Pct_Cov_MR", "MR") # ACAD-029. ok
check_qspp(quadspp_merge, "Pct_Cov_BR", "BR") # ACAD-029. ok
check_qspp(quadspp_merge, "Pct_Cov_BC", "BC") # ACAD-029. ok
check_qspp(quadspp_merge, "Pct_Cov_BL", "BL") # ACAD-029, and NS ROVA-008-2011. ok
check_qspp(quadspp_merge, "Pct_Cov_ML", "ML") # ACAD-029. ok
check_qspp(quadspp_merge, "Pct_Cov_UL", "UL") # ACAD-029. ok
check_qspp(quadspp_merge, "ScientificName", "Latin_Name") # 5 weird UTF issues. all ok.
#++++++ 5/3 Duplicate species introduced in ACAD-001-2018 are the only remaining problem, which is a big problem.
# Reported issue in line 213.
#----- Microplot Shrubs -----
shrubs_vw <- get("COMN_MicroplotShrubs", envir = env) %>%
select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC, SQShrubCode,
MicroplotCode, TSN, ScientificName, CoverClassCode, CoverClassLabel)
table(shrubs_vw$SQShrubCode, shrubs_vw$StartYear) # 6 NS in 2010 ACAD-029; SAGA-008; and UL/B in 2006
table(shrubs_vw$CoverClassCode, shrubs_vw$StartYear)
# All cover < 2010 are correctly NC.
shrub_old <- forestNETNarch::joinMicroShrubData(from = 2006, to = 2019, locType = 'all', eventType = 'all', QAQC = T)
shrub_old$Latin_Name[shrub_old$Latin_Name == "No species recorded"] <- "None present"
shrub_new <- joinMicroShrubData(from = 2006, to = 2019, locType = 'all', eventType = 'all', QAQC = T)
shrub_merge <- full_join(shrub_new, shrub_old, by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"IsQAQC" = "Event_QAQC",
"ScientificName" = "Latin_Name") )
table(complete.cases(shrub_merge$Event_ID)) # all T
check_shrbs <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("PlotID", "EventID", "Plot_Name", "StartYear", "IsQAQC",
"ScientificName", col1, col2)]}
)) %>% bind_rows()
}
names(shrub_merge)
table(shrub_merge$Pct_Cov_UR, shrub_merge$StartYear, useNA = 'always')
shrub_merge %>% select(Plot_Name, StartYear, IsQAQC, ScientificName, shrub_avg_cov, cover) %>%
mutate(cov_diff = abs(shrub_avg_cov - cover)) %>% filter(cov_diff > 0.1)
# The only record >2009 is SARA-023-2012 b/c duplicate Vitis in UR. Second doesn't migrate, which is fine.
#++++++ No remaining shrub issues
#----- Microplot Seedlings ------
seeds_vw <- get("NETN_MicroplotSeedlings", envir = VIEWS_NETN) %>%
select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC, SQSeedlingCode,
MicroplotCode, TSN, ScientificName, SizeClassCode, SizeClassLabel, Count)
table(seeds_vw$SQSeedlingCode, seeds_vw$StartYear) # NS 178 in 2006 (good).
# 2010: 6 of 7 NS are ACAD-029 & SAGA-008, which is correct.
seed_new <- joinMicroSeedlings(from = 2006, to = 2019, eventType = 'all', locType = 'all', QAQC = TRUE)
#seed_inv <- joinMicroSeedlings(speciesType = 'invasive')
table(seed_new$SQSeedlingCode, seed_new$StartYear, useNA = 'always')
# SAGA-007-2010 UL micro is now migrating correctly as NP.
# rest of NS are correctly SAGA-008-2010 and ACAD-029-2010 or seedlings
# initially entered as shrubs, so no counts available
#-------------------------------
# Microplot saplings
#-------------------------------
# Checking raw view first, so make sure SQs are correct
saps_vw <- get("NETN_MicroplotSaplings", envir = VIEWS_NETN) %>%
select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, StartDate, IsQAQC, SQSaplingCode,
MicroplotCode, TSN, ScientificName, DBHcm)
nrow(saps_vw[saps_vw$ScientificName == "No species recorded",])
table(saps_vw$SQSaplingCode)
# NP NS SS
# 1819 184 5720
table(saps_vw$SQSaplingCode, saps_vw$StartYear) # Most of the NS are in 2006 for UL/B, and 6 are 2010. Correct.
table(saps_vw$SQSaplingCode, saps_vw$MicroplotCode) # Most NS are for UL/B in 2006. Correct.
saps_new <- joinMicroSaplings(locType = "all", QAQC = T, eventType = 'all', canopyForm = 'all', speciesType = 'all')
length(unique(saps_new$EventID)) #1280
saps_prep <- merge(plotevs_old, micro, by = 'Event_ID', all = TRUE)
saps_prep2 <- merge(saps_prep, saps[, 1:6], by = "Microplot_Characterization_Data_ID", all = TRUE)
saps_old <- merge(saps_prep2, plants[, c("Latin_Name", "TSN")], by = "TSN", all.x = TRUE) %>%
mutate(Latin_Name2 = ifelse(Latin_Name == "No species recorded", "None present", Latin_Name))
saps_merge <- full_join(saps_new, saps_old, by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"IsQAQC" = "Event_QAQC",
"ScientificName" = "Latin_Name2",
"MicroplotCode" = "Microplot_Name"),
suffix = c("_new", "_old"))
table(complete.cases(saps_merge$ScientificName)) # 6 F ACAD-029-2010 and SAGA-008-2010 b/c NS and join didn't match. OK
# Hard to check much more than this, since NETN doesn't track individual saplings.
# Will check that joinRegen returns the same values.
#----- joinRegenData -----
reg_old <- forestNETNarch::joinRegenData(from = 2006, to = 2019, QAQC = T, locType = 'all', speciesType = 'all', canopyForm = 'all') %>%
mutate(Latin_Name2 = ifelse(Latin_Name %in% c("No species recorded", 'no species recorded'), "None present", Latin_Name),
Latin_Name2 = ifelse(Latin_Name == "MissingData", "Not Sampled", Latin_Name2))
reg_new <- joinRegenData(from = 2006, to = 2019, QAQC = T, locType = 'all', eventType = 'all', speciesType = 'all', canopyForm = 'all')
reg_merge <- full_join(reg_new, reg_old, by = c('Plot_Name' = 'Plot_Name',
'StartYear' = 'Year',
'IsQAQC' = "Event_QAQC",
"ScientificName" = "Latin_Name2"),
suffix = c("_new", "_old"))
reg_merge_ss <- reg_merge %>% filter(ScientificName != "None present")
check_reg <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("PlotID", "EventID", "Plot_Name", "StartYear", "IsQAQC",
"ScientificName", col1, col2)]}
)) %>% bind_rows()
}
table(reg_new$ScientificName)
table(reg_old$Latin_Name2)
stock_check <- check_reg(reg_merge_ss, "stock_new", "stock_old") # Different b/c change in stocking.
check_reg(reg_merge_ss, "seed_15_30cm", "seed15.30") # only diff is b/c SAGA-008-2010 is correctly NA in new db
check_reg(reg_merge_ss, "seed_30_100cm", "seed30.100") # SAGA-008-2010 data handled diff.
check_reg(reg_merge_ss, "seed_100_150cm", "seed100.150") # SAGA-008-2010 data handled diff.
check_reg(reg_merge_ss, "seed_p150cm", "seed150p") # SAGA-008-2010 data handled diff.
check_reg(reg_merge_ss, "seed_den", "seed.den") # SAGA-008-2010 data handled diff.
check_reg(reg_merge_ss, "sap_den", "sap.den") # SAGA-008-2010 data handled diff.
#++++++ Regen checks complete. no remaining issues
#----- Additional Species -----
addspp_vw <- get("COMN_AdditionalSpecies", envir = VIEWS_NETN)
addspp_vw$Plot_Name <- paste(addspp_vw$ParkUnit, sprintf("%03d", addspp_vw$PlotCode), sep = "-")
addspp_vw <- addspp_vw %>%
select(Plot_Name, PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC, SQAddSppCode,
TSN, ScientificName, ConfidenceClassCode, IsCollected, Note, SQAddSppNotes)
table(addspp_vw$SQAddSppCode)
# NP NS SS
# 6 1 15257 # 5/3 15260
# NS is ACAD-029-2010. 6 NPs are ACADs, and are correct
nrow(filter(addspp_vw, ScientificName == "No species recorded")) #0 Resolved!
addspp_new <- do.call(joinAdditionalSpecies, arglist)
addspp_old <- forestNETNarch::joinLocEvent(locType = 'all', eventType = 'all', from = 2006, to = 2019, QAQC = T) %>%
left_join(., addspp) %>% left_join(., plants[, c("TSN", "Latin_Name")]) %>%
select(Plot_Name, Year, Event_QAQC, TSN, Latin_Name, Confidence_ID, Collected, Notes) %>%
mutate(Latin_Name2 = ifelse(Latin_Name == "No species recorded", "None present", Latin_Name))
addspp_merge <- merge(addspp_new, addspp_old,
by.x = c("Plot_Name", "StartYear", "IsQAQC", "TSN"),
by.y = c("Plot_Name", "Year", "Event_QAQC", "TSN"),
all.x = T, all.y = T)
check_spp <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("Plot_Name", "StartYear", "IsQAQC", col1, col2)]}
)) %>% bind_rows()
}
check_spp(addspp_merge, "ScientificName", "Latin_Name2")
# Some weirdness here with unknown TSNs getting switched around
# These are the same plots where Unknowns got switched around in the quad. species. Not sure it matters...
# Plot_Name StartYear IsQAQC ScientificName Latin_Name2
# 1 ACAD-029 2010 0 Not Sampled <NA> # OK
# 2 ACAD-043 2007 0 <NA> Unknown Herb - 04 # Mig changed to Unknown Herb - 02, which is causing dups. Fix after final migration
# 3 ACAD-043 2007 0 Unknown Herb - 02 <NA> # Changed from Unknown Herb - 04; Fix after final migration
# 4 ACAD-059 2007 0 <NA> Unknown Grass - 02 # This was dropped b/c duplicate in quadspp. ~OK
# 5 ROVA-002 2007 0 <NA> Unknown Herb - 01 # This is entirely missing from new database.
# 6 ROVA-020 2007 0 <NA> Unknown Herb - 01 # Missing from database
# 7 ROVA-020 2007 0 <NA> Unknown Grass - 01 # Missing from database
# 8 SARA-013 2006 1 Unknown Carex - 02 <NA> # The row below was replaced with this. OK, I guess. ugh
# 9 SARA-013 2006 1 <NA> Carex # This was changed to Unknown Carex - 02. OK, I guess.
#++++++ Issues resolved! Only returns are None present handled differently.
#------ Taxa/tlu_Plants
head(plants)
taxa_wide <- prepTaxa()
head(taxa_wide)
taxa_merge <- merge(taxa_wide, plants[, c(1,4:9,11:21,24)], by = "TSN",
all.x = T, all.y = T)
check_taxa <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("TSN", "ScientificName", "Latin_Name", col1, col2)]}
)) %>% bind_rows()
}
check_taxa(taxa_merge, "ScientificName", "Latin_Name") # 2 weird UTF things. NO issues
check_taxa(taxa_merge, "Order.x", "Order.y") # 3 weird UTF things. NO issues
check_taxa(taxa_merge, "Family.x", "Family.y") # 3 weird UTF things. NO issues
check_taxa(taxa_merge, "Genus.x", "Genus.y") #0
check_taxa(taxa_merge, "Tree.x", "Tree.y") # Differences are b/c of new Tree/Shrub column
check_taxa(taxa_merge, "Shrub.x", "Shrub.y") # Differences are b/c of new Tree/Shrub and Vine exclusive columns
check_taxa(taxa_merge, "Vine.x", "Vine.y") # Persicaria was removed from the list b/c herbaceous
check_taxa(taxa_merge, "Herbaceous.x", "Herbaceous.y")
check_taxa(taxa_merge, "Graminoid.x", "Graminoid.y")
check_taxa(taxa_merge, "CommonName", "Common") # Differences are b/c old common has lists
taxa_wide %>% filter(TSN == -9999999951)
#+++++ Only remaining issue is remove "No species recorded" from list +++++
#----- Soil data
# Soil sample data
soilsamp_old <- read.csv("./testing_scripts/soilsamp_data_old.csv")
soillab_old <- read.csv("./testing_scripts/soillab_data_old.csv")
tbl_ssd <- read.csv("C:/Forest_Health/exports/tbl_Soil_Sample_Data.csv")
length(unique(soilsamp_old$Event_ID)) # 819
length(unique(soillab_old$Event_ID)) # 749
# Check views
soilsamp_vw <- get("COMN_SoilSample", envir = env) %>%
select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC,
SQSoilCode, SampleSequenceCode, SoilLayerLabel,
Depth_cm, Note) %>%
filter(StartYear > 2006 & !is.na(SoilLayerLabel) #& StartYear < 2020
)
table(soilsamp_vw$SQSoilCode) # All SS. Good.
length(unique(soilsamp_vw$EventID)) # 820
soillab_vw <- get("COMN_SoilLab", envir = env) %>%
select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC,
LabLayer, LabDateSoilCollected, UMOSample:ECEC, LabNotes, EventID, PlotID) %>%
filter(StartYear > 2006 #& StartYear < 2020
)
length(unique(soillab_vw$EventID)) # 823. Looks similar enough to move on.
# Now to compare old and new
# need to remove records that weren't sampled but had earthworms recorded
soildata2 <- soildata[!grepl("[++]", soildata$Notes), -c(10:13)] # drop updated/created cols
soil_old <- merge(soildata2, soilsamp[,-c(13:16)],
by = intersect(names(soildata2), names(soilsamp[,-c(13:16)])), all = TRUE)
plotevs_old <- forestNETNarch::joinLocEvent(from = 2007, to = 2019, QAQC = T, locType = 'all', eventType = 'all')
soil_old2 <- merge(plotevs_old, soil_old, by = intersect(names(plotevs_old), names(soil_old)), all.x = FALSE, all.y = TRUE) %>%
filter(!is.na(Location_ID)) %>% select(Plot_Name, Year, Event_QAQC,
Sampling_Position, Sample_Type, Horizon_Type, Archived,
Sample_Number, Litter_Depth, FF_Depth,
A_Horizon_Depth, Total_Excavation_Depth, Notes, Comments, Sample_Missed) %>%
filter(Year > 2006)
# convert NA horizons to 0, but not total
soil_old2[,c(9:11)][is.na(soil_old2[,c(9:11)])]<-0
names(soil_old2)
soilold_sum <- soil_old2 %>% group_by(Plot_Name, Year, Event_QAQC, Sample_Type, Archived) %>%
summarize(litter = mean(Litter_Depth, na.rm = T),
O_hor = mean(FF_Depth, na.rm = T),
A_hor = mean(A_Horizon_Depth, na.rm = T),
Tot_dep = mean(Total_Excavation_Depth, na.rm =T),
numsamps = length(unique(!is.na(Sample_Number))))
#soilsamp_wide comes from line 130 in joinSoilSampleData.R
soilsamp_wide$Plot_Name <- paste(soilsamp_wide$ParkUnit, sprintf("%03d", soilsamp_wide$PlotCode), sep = "-")
soilsamp_merge <- merge(soilsamp_wide, soilold_sum,
by.x = c("Plot_Name", "StartYear", "IsQAQC"),
by.y = c("Plot_Name", "Year", "Event_QAQC"), all = T)
soilsamp_merge %>% filter(is.na(EventID))
# Only ACAD-029-2010 and SARA-015-2018. OK
check_soils <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("Plot_Name", "StartYear", "IsQAQC", "Sampling_Position", "SampleSequenceCode", col1, col2)]}
)) %>% bind_rows()
}
names(soilsamp_merge)
soilsamp_merge %>% mutate(lit_diff = abs(Litter_cm - litter),
O_diff = abs(O_Horizon_cm - O_hor),
A_diff = abs(A_Horizon_cm - A_hor),
tot_diff = abs(Total_Depth_cm - Tot_dep)) %>%
filter(lit_diff > 0.5 | O_diff > 0.5 | A_diff > 0.5 | tot_diff > 0.5)
# Only returns 2 records
# SAGA-009-2010 is missing a total depth for Sample 3. Fixed missing value. Should migrate corrected in next version.
# MORR-008-2011 did not have a 3rd sample collected. Removed note, so hopefully migrates as NS instead of SS.
#+++++5/3 0 records returned.
check_soils(soilsamp_merge, "Note", "Comments") # 0
# Soil lab data
names(soildata2)
names(soillab)
soillab_old <- merge(soildata2, soillab[,-c(1, 34, 35)],
by = intersect(names(soildata2), names(soillab[,-c(1, 34, 35)])), all = TRUE)
names(soillab_old)
soillab_old2 <- merge(plotevs_old, soillab_old, by = intersect(names(plotevs_old), names(soillab_old)),
all.x = FALSE, all.y = TRUE) %>%
filter(!is.na(Location_ID) & Year > 2006) %>% select(Plot_Name, Year, Event_QAQC, Layer, UMO_Sample:ECEC, Notes,
Sampling_Position, Sample_Type, Archived)
head(soillab_old2)
soillab_new <- get("COMN_SoilLab", envir = VIEWS_NETN) %>%
select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC,
LabLayer, LabDateSoilCollected, UMOSample:ECEC, LabNotes, EventID, PlotID) %>%
filter(!is.na(UMOSample))# %>%
# filter(LabLayer %in% c("10 cm", "10cm - NonVS", "A", "A - NonVS", "O", "O/A")) #%>% filter(StartYear > 2006)
soillab_new$Plot_Name <- paste(soillab_new$ParkUnit, sprintf("%03d", soillab_new$PlotCode), sep = "-")
soillab_merge <- full_join(soillab_new, soillab_old2, by = c("Plot_Name" = "Plot_Name",
"StartYear" = "Year",
"UMOSample" = "UMO_Sample"),
suffix = c("_new", "_old"))
names(soillab_merge)
check_lab <- function(df, col1, col2){
lapply(1:nrow(df), function(x) (
if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
df[x, c("Plot_Name", "StartYear", "IsQAQC", "UMOSample", "LabLayer", col1, col2)]}
)) %>% bind_rows()
}
soillab_old_act <- soillab_old %>% filter(Layer %in% c("10cm", "A", "O", "O/A"))
nrow(soillab_old_act) #1193
table(soillab_old_act$Layer)
#10cm A O O/A
#220 405 544 24
soillab_new_act <- soillab_new %>% filter(LabLayer %in% c("10 cm", "A", "O", "O/A"))
nrow(soillab_new_act) #1193
table(soillab_new_act$LabLayer)
# 10 cm A O O/A
# 220 405 544 24
# Same numbers. Good
#tn <- check_lab(soillab_merge, "pctTN", "X..TN") # Something weird is going on with join that I can't seem to figure out.
# Going to spot check instead.
#++++++++ No lab-related issues to report based on spot checks
# Done with 4/26 migration check
#-------------------------------------
# Testing summary functions with latest updates
plotTreeMap(plotName = "WEFA-001", from = 2016, to = 2019 ) # worked
quadtest <- sumQuadGuilds(speciesType = 'invasive')
saptest <- sumSapDBHDist()
spplist <- sumSpeciesList()
spplist <- sumSpeciesList(speciesType = 'invasive')
strstg <- sumStrStage()
trdist <- sumTreeDBHDist()
notes <- joinVisitNotes()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.