testing_scripts/MIDN_testing_code_20210419.R

library(forestMIDN)
library(tidyverse)

#----- Testing the import/export functions
#importData(instance = 'local', server = "localhost", new_env = T) # release 1.0.22 4/22
#importData(instance = 'server', server = "INP2300VTSQL16\\IRMADEV1", new_env = T) #
path = "C:/Forest_Health/exports/MIDN"

#exportCSV(path, zip = TRUE)
importCSV(path = path, zip_name = "MIDN_Forest_20210423.zip")# release 1.0.22 4/22

# Function arguments
park = 'all'
from = 2007
to = 2019
QAQC = TRUE
locType = "all"
eventType = 'all'
panels = 1:4

arglist <- list(park = park, from = from, to = to, QAQC = QAQC, panels = panels,
                locType = locType, eventType = eventType)

compev_arglist <- list(park = park, from = from, to = to, QAQC = QAQC, panels = panels,
                       locType = locType)

# Checking data function
check_data <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", col1, col2)]}
  )) %>% bind_rows()
}

# import old database for comparisons
forestMIDNarch::importData(type = 'file',
  path='D:/NETN/Monitoring_Projects/Forest_Health/Database/MIDN/2021_Forest_Database/MIDN_FVM_BE_MASTER_20210422_Migration.accdb')

#forestMIDNarch::importData()

names(VIEWS_MIDN)
names(VIEWS_MIDN$MIDN_QuadSpecies)

#----- Testing joinLocEvent and migration -----
plotevs_old <- do.call(forestMIDNarch::joinLocEvent, c(arglist, output = 'verbose')) %>% mutate(Year = as.numeric(Year))
plotevs_new <- do.call(joinLocEvent, arglist)
names(plotevs_old)
names(plotevs_new)
nrow(plotevs_old) #1182
nrow(plotevs_new) #1182

pe_merge <- full_join(plotevs_new, plotevs_old, by = c("EventLegacyID" = "Event_ID", "Plot_Name" = "Plot_Name"))

check_data(pe_merge, "ParkSubUnit", "Unit_ID")
check_data(pe_merge,"xCoordinate", "X_Coord")
check_data(pe_merge,"yCoordinate", "Y_Coord")
#check_data(pe_merge,"StartDate", "Start_Date") # they're diff. format, so all show as different
check_data(pe_merge,"PanelCode", "Panel")
check_data(pe_merge,"Event_QAQC", "IsQAQC")
#test <- check_data(pe_merge,"ZoneCode", "UTM_Zone") # diff b/c one has N.
table(pe_merge$ZoneCode, pe_merge$UTM_Zone)
check_data(pe_merge,"Orientation.x", "Orientation.y")
#check_data(pe_merge,"cycle.x", "cycle.y")
check_data(pe_merge,"PlotTypeCode", "Loc_Type")
check_data(pe_merge,"PlotLegacyID", "Location_ID")
check_data(pe_merge,"Aspect.x", "Aspect.y")
check_data(pe_merge,"PhysiographyCode", "Physiographic_Class")
plot_check <- unique(pe_merge[, c("ParkUnit", "Plot_Name")])

table(plot_check$ParkUnit) # That's the correct # of plots/park
dir_dif <- check_data(pe_merge,"Directions.x", "Directions.y")
# No issues remaining

#----- Stand Views -----
stand_new <- do.call(joinStandData, arglist)
stand_old <- do.call(forestMIDNarch::joinStandData, arglist)
stand_old2 <- merge(stand_old, stand[,c("Event_ID", "Deer_Browse_Line_pre09_ID")],
                    by = "Event_ID", all.x = T, all.y = T)
stand_old2$Year <- as.numeric(stand_old2$Year)

names(stand_new)
names(stand_old)
stand_merge <- full_join(stand_new, stand_old2, by = c("Plot_Name" = "Plot_Name", "IsQAQC" = "Event_QAQC",
                                                      "StartYear" = "Year"))
names(stand_merge)

check_data(stand_merge, "Stand_Structure.x", "Stand_Structure.y") # Lots of NC
check_data(stand_merge, "Stand_Structure_Code", "Stand_Structure_ID") # Lots of NC
check_data(stand_merge, "Pct_Crown_Closure.x", "Pct_Crown_Closure.y") # Lots of NC
check_data(stand_merge, "Pct_Understory_Low.x", "Pct_Understory_Low.y")
check_data(stand_merge, "Pct_Understory_Mid.x", "Pct_Understory_Mid.y")
check_data(stand_merge, "Pct_Understory_High.x", "Pct_Understory_High.y")
check_data(stand_merge, "Pct_Bare_Soil", "Pct_Bare_Soil_Cover")
check_data(stand_merge, "Pct_Rock", "Pct_Rock_Cover")
check_data(stand_merge, "Pct_Lichen", "Pct_Lichen_Cover")
check_data(stand_merge, "Pct_Bryophyte", "Pct_Bryophyte_Cover")
check_data(stand_merge, "Pct_Water", "Pct_Surface_Water_Cover")
check_data(stand_merge, "Pct_Bare_Soil", "Pct_Bare_Soil_Cover")
check_data(stand_merge, "Pct_Trampled", "Pct_Trampled_Cover")
check_data(stand_merge, "Microtopography", "Microtopography_ID") # Lots of NC
check_data(stand_merge, "Deer_Browse_Index", "Deer_Browse_Line_ID") # All are 5s that are recorded pre 2010 as DBL pres
table(stand_merge$Deer_Browse_Label, stand_merge$StartYear, useNA = 'always') # PMs are accurate
table(stand_merge$Deer_Browse_Label, stand_merge$Deer_Browse_Line_pre09_ID)

ht_dif <- stand_merge %>% mutate(cod_dif = abs(Avg_Height_Codom - Avg_Codom_HT),
                                 int_dif = abs(Avg_Height_Inter - Avg_Inter_HT)) %>%
  select(Plot_Name, StartYear, IsQAQC, Avg_Height_Codom, Avg_Codom_HT,
         cod_dif, Avg_Height_Inter, Avg_Inter_HT, int_dif )

slope_dif <- stand_merge %>% mutate(slp_dif = abs(PlotSlope - Plot_Slope_Deg)) %>%
  select(Plot_Name, StartYear, IsQAQC, EventID, PlotSlope, Plot_Slope_Deg, slp_dif)
  # Only differences are QAQC events, which we don't care about

#----- Stand Disturbance -----
stdist_new <- do.call(joinStandDisturbance, arglist)

library(RODBC)
db <- odbcConnect("MIDNFVM") #
disturb<-sqlFetch(db,"tbl_Disturbances")
disttlu<-sqlFetch(db,"tlu_Disturbance_Codes")
disttlutc<-sqlFetch(db,"tlu_Disturbance_Threshhold_Codes")
odbcClose(db)

st_dist_o <- merge(plotevs_old[, c("Plot_Name", "Event_ID", "Event_QAQC", "Year")],
                   disturb[, c(2:6)], by = "Event_ID", all.x = T, all.y = T)

dist_merge <- merge(stdist_new, st_dist_o,
                    by.x = c("Plot_Name", "StartYear", "IsQAQC"),#, "Disturbance.Code"),
                    by.y = c("Plot_Name", "Year", "Event_QAQC"),#, "Disturbance_Code"),
                    all.x = T, all.y = T)
dist_merge[which(!complete.cases(dist_merge$DisturbanceCode)),] # None

check_data(dist_merge, "Disturbance_Threshold", "ThresholdCode") %>% arrange(Plot_Name) # PMs from early or dups
check_data(dist_merge, "Disturbance_Notes", "DisturbanceNote") # dups
check_data(dist_merge, "DisturbanceCode", "Disturbance_Code") %>% arrange(Plot_Name, StartYear) # All dups that sorted wrong or NAs to 0s. OK

#----- Tree Height -----
ht_dif <- stand_merge %>% mutate(cod_dif = abs(Avg_Height_Codom - Avg_Codom_HT),
                                 int_dif = abs(Avg_Height_Inter - Avg_Inter_HT)) %>%
  select(Plot_Name, StartYear, IsQAQC, Avg_Height_Codom, Avg_Codom_HT,
         cod_dif, Avg_Height_Inter, Avg_Inter_HT, int_dif)

# Tree height issues remain. +++I can't seem to find where I reported this now.+++
# Have to start from scratch to find the issues

tree_ht_old <- stand %>% select(Event_ID, Stand_Structure_ID, Tree_1_Number_Codom:Height_Tree_3_Inter) %>%
  filter(Stand_Structure_ID != 5)

tree_ht2 <- merge(plotevs_old[,c("Event_ID", "Plot_Name", "Year", "Event_QAQC")],
                  tree_ht_old, by = "Event_ID", all.x = FALSE, all.y = TRUE) %>% filter(!is.na(Plot_Name))

tree_ht_w1 <- tree_ht2 %>% select(Event_ID:Tree_3_Number_Inter) %>%
  pivot_longer(cols = c(Tree_1_Number_Codom:Tree_3_Number_Inter),
               names_to = "Samp",
               values_to = "Tree_Number") %>%
  mutate(Samp_Num = case_when(str_detect(Samp, "_1_") ~ 1L,
                              str_detect(Samp, "_2_") ~ 2L,
                              str_detect(Samp, "_3_") ~ 3L),
         Crown = ifelse(str_detect(Samp, "Codom"), "Codom", "Inter")
  )

tree_ht_w2 <- tree_ht2 %>% select(Event_ID:Event_QAQC, Height_Tree_1_Codom:Height_Tree_3_Inter) %>%
  pivot_longer(cols = c(Height_Tree_1_Codom:Height_Tree_3_Inter),
               names_to = "Samp_ht",
               values_to = "Height_m") %>%
  mutate(Samp_Num = case_when(str_detect(Samp_ht, "_1_") ~ 1L,
                              str_detect(Samp_ht, "_2_") ~ 2L,
                              str_detect(Samp_ht, "_3_") ~ 3L),
         Crown = ifelse(str_detect(Samp_ht, "Codom"), "Codom", "Inter")
  )

tree_ht3 <- merge(tree_ht_w1, tree_ht_w2, by = c("Event_ID", "Plot_Name", "Year", "Event_QAQC", "Samp_Num", "Crown"),
                  all.x = T, all.y = T) %>% select(-Samp, -Samp_ht) %>% filter(!is.na(Height_m))

head(tree_ht3)

tree_ht_old_sum <- tree_ht3 %>% group_by(Plot_Name, Year, Event_QAQC, Crown) %>%
  summarize(total_ht_old = sum(Height_m, na.rm =T),
            num_trees_old = sum(!is.na(Height_m)),
            .groups = 'drop')

tree_ht_vw <- get("COMN_StandTreeHeights", envir = VIEWS_MIDN)#[, -c(18:24, 27)]
tree_ht_vw$Plot_Name <- paste(tree_ht_vw$ParkUnit, sprintf("%03d", tree_ht_vw$PlotCode), sep = "-")
tree_ht_vw$Event_QAQC <- ifelse(tree_ht_vw$IsQAQC == 0, FALSE, TRUE)

tree_ht_vw_sum <- tree_ht_vw %>% group_by(Plot_Name, StartYear, Event_QAQC, EventID, CrownClassLabel) %>%
  summarize(total_ht_new = sum(Height),
            num_trees_new = sum(!is.na(Height)),
            .groups = 'drop') %>%
  mutate(Crown = ifelse(CrownClassLabel == "Intermediate", "Inter", "Codom"))

tree_height_comps <- merge(tree_ht_vw_sum,
                           tree_ht_old_sum,
                           by.x = c("Plot_Name", "StartYear", "Event_QAQC", "Crown"),
                           by.y = c("Plot_Name", "Year", "Event_QAQC", "Crown"),
                           all.x = T, all.y = T) %>%
  mutate(ht_diff = abs(total_ht_new - total_ht_old),
         n_tree_diff = num_trees_new - num_trees_old)

missing_tree_hts <- tree_height_comps %>% filter(n_tree_diff == -1)
missing_tree_hts # Problem resolved
#+++++ No remaining issues +++++

#----- CWD -----
cwd_old <- do.call(forestMIDNarch::joinCWDData, arglist) %>% mutate(ScientificName = ifelse(Latin_Name == "No species recorded",
                                                                                            "None present", Latin_Name))
cwd_new <- do.call(joinCWDData, arglist)

cwd_merge <- merge(cwd_new, cwd_old, by.x = c("Plot_Name", "StartYear", "IsQAQC", "ScientificName", "DecayClassCode"),
                   by.y = c("Plot_Name", "Year", "Event_QAQC", "ScientificName", "Decay_Class_ID"),
                   all = TRUE)

cwd_dif <- cwd_merge %>% mutate(cwd_dif = abs(CWD_Vol.x - CWD_Vol.y)) %>% filter(cwd_dif > 0.01 & IsQAQC == FALSE) %>%
  select(Plot_Name:ScientificName, DecayClassCode, CWD_Vol.x, CWD_Vol.y, cwd_dif)
# small differences due to rounding

#----- Tree Data
tree_old <- do.call(forestMIDNarch::joinTreeData, c(arglist, list(speciesType = 'all', status = 'all'))) %>%
  mutate(TagCode = as.numeric(Tree_Number_MIDN))
tree_new <- do.call(joinTreeData, c(arglist, list(speciesType = 'all', status = 'all')))

table(tree_new$HWACode, tree_new$ScientificName, useNA = 'always') # Only values for TSUCAN. Good!
non_tsucan <- tree_new %>% filter(ScientificName != "Tsuga canadensis")
table(non_tsucan$HWACode, non_tsucan$StartYear, useNA = 'always') # All NA. Good!

table(tree_new$BBDCode, tree_new$ScientificName, useNA = 'always') # Only values for FAGGRA. Good!
non_faggra <- tree_new %>% filter(ScientificName != "Fagus grandifolia")
table(non_faggra$BBDCode, non_faggra$StartYear, useNA = 'always')# All NA. Good!

tree_merge <- merge(tree_new, tree_old, by.x = c("Plot_Name", "StartYear", "IsQAQC", "TagCode"),
                    by.y = c("Plot_Name", "Year", "Event_QAQC", "TagCode"))

check_trees <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", "TagCode", "IsQAQC", "ScientificName",
              "Status_ID", "TreeStatusCode",col1, col2)]}
  )) %>% bind_rows()
}

check_trees(tree_merge, "TSN.x", "TSN.y") #none
check_trees(tree_merge, "Azimuth.x", "Azimuth.y") #none
check_trees(tree_merge, "Fork.x", "Fork.y") #none

tree_merge %>% mutate(dist_diff = abs(Distance.x - Distance.y)) %>%
  filter(dist_diff > 0.1)  #None

tree_dbh <- tree_merge %>% mutate(dbh_diff = abs(DBH - DBHcm)) %>%
  filter(dbh_diff > 0) %>%
  select(Plot_Name, StartYear, IsQAQC, TagCode, ScientificName, DBHcm, DBH, dbh_diff)
# differences are all OK.

check_trees(tree_merge, "TreeStatusCode", "Status_ID")# none
crown_check <- check_trees(tree_merge, "Crown_Class_ID", "CrownClassCode")
table(crown_check$Status_ID, crown_check$Crown_Class_ID, useNA = 'always')
table(tree_merge$CrownClassCode, tree_merge$Crown_Class_ID, useNA = 'always')
# Differences are because dead and EX trees were correctly scrubbed of their crown class.

check_trees(tree_merge, "Decay_Class_ID", "DecayClassCode")
# No issues. Decays that are diff are DF and AS, which shouldn't have a decay class

check_trees(tree_merge, "IsDBHVerified", "DBH_Verified")
table(tree_merge$IsDBHVerified, tree_merge$DBH_Verified, useNA = 'always')

#check_trees(tree_merge, "Pct_Tot_Foliage_Cond", "Total_Foliage_Condition")
table(tree_merge$Pct_Tot_Foliage_Cond, tree_merge$Total_Foliage_Condition, tree_merge$StartYear, useNA = 'always')
# 2 records still with 0

# Foliage still 0
tree_merge %>% filter(Pct_Tot_Foliage_Cond == 0) %>% arrange(Plot_Name, StartYear) %>%
  select(Plot_Name, StartYear, IsQAQC, TagCode, ScientificName, TreeStatusCode, CrownClassCode, Pct_Tot_Foliage_Cond)
  # need to figure out what's going on.
# +++ Tree data finished checking.

#----- Tree Foliage Conditions -----
fol_vw <- VIEWS_MIDN$COMN_TreesFoliageCond
table(fol_vw$TotalFoliageConditionCode, fol_vw$StartYear, useNA = 'always')

table(fol_vw$PercentLeafAreaLabel, fol_vw$StartYear) # 17 records with Not Applicable before 2016, which is wrong
table(fol_vw$PercentLeafAreaLabel, fol_vw$FoliageConditionCode) # NA correctly applied to L, NO, S, W, but 1 H and N w/ 0%

table(fol_vw$TotalFoliageConditionLabel, fol_vw$FoliageConditionCode, useNA = 'always') # NO/NotApp correct
table(fol_vw$PercentLeavesLabel, fol_vw$FoliageConditionCode, useNA = 'always')
# 2 H and 2 N that are incorrectly NA- should be PM
# pre 2011 0s are converting to NA correctly. 2011+ are incorrectly 0 for both percent columns.

no_folcond_with_totfol_0p <- fol_vw %>% filter(FoliageConditionCode == "NO" & TotalFoliageConditionCode %in% c(1:4)) %>%
  select(ParkUnit, PlotCode, StartYear, IsQAQC, TagCode, ScientificName, TreeStatusCode,
         FoliageConditionCode, TotalFoliageConditionCode)

table(fol_vw$PercentLeavesLabel, fol_vw$FoliageConditionCode, useNA = 'always')# No 0s!
fol_vw %>% filter(PercentLeavesLabel == "0%" & FoliageConditionCode %in% c("L", "N")) %>% arrange(ParkUnit, PlotCode, StartYear, TagCode) %>%
  select(ParkUnit, PlotCode, StartYear, IsQAQC, TagCode, ScientificName, TreeStatusCode,
         PercentLeavesLabel, FoliageConditionCode, TotalFoliageConditionCode)


compev_arglist <- list(park = park, from = from, to = to, QAQC = QAQC, panels = panels,
                       locType = locType)

fol_new <- do.call(joinTreeFoliageCond, compev_arglist)
fol_old1 <- xrfolcond %>% mutate(Cond = case_when(Foliage_Condition_ID == 1 ~ "C",
                                                  Foliage_Condition_ID == 2 ~ "N",
                                                  Foliage_Condition_ID == 3 ~ "H",
                                                  Foliage_Condition_ID == 4 ~ "S",
                                                  Foliage_Condition_ID == 5 ~ "W",
                                                  Foliage_Condition_ID == 6 ~ "L",
                                                  Foliage_Condition_ID == 7 ~ "O",
                                                  Foliage_Condition_ID == 8 ~ "NO",
                                                  TRUE ~ NA_character_),
                                 Pct_Leaves_Aff = case_when(Foliage_Condition_Percent == 0 ~ 0,
                                                            Foliage_Condition_Percent == 1 ~ 5.5,
                                                            Foliage_Condition_Percent == 2 ~ 30,
                                                            Foliage_Condition_Percent == 3 ~ 70,
                                                            Foliage_Condition_Percent == 4 ~ 95,
                                                            TRUE ~ NA_real_),
                                 Pct_Leaf_Area = case_when(Leaf_Area_Percent == 0 ~ 0,
                                                           Leaf_Area_Percent == 1 ~ 5.5,
                                                           Leaf_Area_Percent == 2 ~ 30,
                                                           Leaf_Area_Percent == 3 ~ 70,
                                                           Leaf_Area_Percent == 4 ~ 95,
                                                           TRUE ~ NA_real_)
)

tree_evs_old <- left_join(plotevs_old, trees %>% select(Tree_ID:Tree_Notes),
                          by = intersect(names(plotevs_old), names(trees %>% select(Tree_ID:Tree_Notes)))) %>%
  left_join(., treedata %>% select(Tree_Data_ID:Notes),
            by = intersect(names(.), names(treedata %>% select(Tree_Data_ID:Notes)))) %>%
  select(Plot_Name, Year, Event_QAQC, Event_ID, Tree_ID, Status_ID, Tree_Number_MIDN, Total_Foliage_Condition,
         Tree_Data_ID, Status_ID) %>% filter(Status_ID %in% c("1", "AB", "AF", "AL", "AM", "AS",
                                                              "RB", "RF", "RL", "RS"))

fol_old <- left_join(tree_evs_old, fol_old1, by = "Tree_Data_ID") %>%
  select(Plot_Name, Year, Event_QAQC, Tree_Number_MIDN, Cond, Status_ID, Pct_Leaves_Aff, Pct_Leaf_Area) %>%
  arrange(Plot_Name, Year, Event_QAQC, Tree_Number_MIDN) %>%
  pivot_wider(names_from = Cond,
              values_from = c(Pct_Leaves_Aff, Pct_Leaf_Area),
              values_fill = NA_real_) %>%
  mutate(TagCode = as.numeric(Tree_Number_MIDN)) %>%
  select(-Pct_Leaves_Aff_NA, -Pct_Leaf_Area_NA, -Pct_Leaf_Area_W, -Pct_Leaf_Area_O, -Pct_Leaf_Area_S)

nrow(fol_old) #23401
nrow(fol_new) #23401

fol_merge <- full_join(fol_new, fol_old,
                       by = c("Plot_Name" = "Plot_Name", "StartYear" = "Year", "IsQAQC" = "Event_QAQC", "TagCode" = "TagCode"),
                       suffix = c("_new", "_old"))


check_conds <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", "TagCode", "Status_ID", "IsQAQC", col1, col2)]}
  )) %>% bind_rows()
}

names(fol_merge)
# 0s in _new because they were filled in function
lvs_C <- check_conds(fol_merge, "Pct_Leaves_Aff_C_new", "Pct_Leaves_Aff_C_old") # all diffs are NAs converted to 0s. I think they should still be NA.
lvs_H <- check_conds(fol_merge, "Pct_Leaves_Aff_H_new", "Pct_Leaves_Aff_H_old") # all diffs are NAs converted to 0s. I think they should still be NA.
lvs_L <- check_conds(fol_merge, "Pct_Leaves_Aff_L_new", "Pct_Leaves_Aff_L_old") # all diffs are NAs converted to 0s. I think they should still be NA.
lvs_N <- check_conds(fol_merge, "Pct_Leaves_Aff_N_new", "Pct_Leaves_Aff_N_old") # all diffs are NAs converted to 0s. I think they should still be NA.
lvs_S <- check_conds(fol_merge, "Pct_Leaves_Aff_S_new", "Pct_Leaves_Aff_S_old") # all diffs are NAs converted to 0s. I think they should still be NA.
lvs_W <- check_conds(fol_merge, "Pct_Leaves_Aff_W_new", "Pct_Leaves_Aff_W_old") # all diffs are NAs converted to 0s. I think they should still be NA.
lvs_O <- check_conds(fol_merge, "Pct_Leaves_Aff_O_new", "Pct_Leaves_Aff_O_old") # all diffs are NAs converted to 0s. I think they should still be NA.

la_C <- check_conds(fol_merge, "Pct_Leaf_Area_C_new", "Pct_Leaf_Area_C_old") # all diffs are NAs converted to 0s. I think they should still be NA.
la_H <- check_conds(fol_merge, "Pct_Leaf_Area_H_new", "Pct_Leaf_Area_H_old") # all diffs are NAs converted to 0s. I think they should still be NA.
la_N <- check_conds(fol_merge, "Pct_Leaf_Area_N_new", "Pct_Leaf_Area_N_old") # all diffs are NAs converted to 0s. I think they should still be NA.

# +++++ Still a lot of issues with foliage condition for Os getting converted to NA.

#----- Tree Conditions
# MIDN correct condition counts
#    H	AD	BBD	CAVL CAVS	 CW	   DBT	EAB	EB	   G	HWA	ID	NO	  OTH	SPB	VIN	  VOB
#13828	968	29	297	 279	2979	2271	9	  3123	172	3	  58	1257	21	8	  1453	189
table(VIEWS_MIDN$COMN_TreesConditions$TreeConditionCode)

#H AD   BBD  CAVL  CAVS    CW   DBT   EAB    EB     G     H   ID    NO   OTH   SPB  VINE
#13801 970    29   297   279  2971  2257     9  3119   171    3    58  1257    21     8  1636
table(VIEWS_NETN$COMN_TreesConditions$TreeConditionCode, VIEWS_NETN$COMN_TreesConditions$StartYear, useNA = 'always')

con_vw <- VIEWS_MIDN$COMN_TreesConditions
dead_trees_with_conds <- con_vw %>%
  filter(TreeStatusCode %in% c("DB", "DL", "DM", "DS")) #%>%
  #filter(!TreeConditionCode %in% c("NO", "CAVS", "CAVL")) #%>%
  #filter(!is.na(TreeConditionCode)) #0

table(dead_trees_with_conds$TreeConditionCode, dead_trees_with_conds$StartYear, useNA = 'always')
# Still need NC for NAs <=2011 and PM for >2011 (1 record in 2012)

cond_new <- do.call(joinTreeConditions, c(compev_arglist, list(status = 'active')))

active <- c("1", "AB" ,"AF", "AL", "AS", "AM", "DB", "DL", "DM", "DS", "RB", "RF", "RL", "RS")

tree_evs_old <- left_join(plotevs_old, trees %>% select(Tree_ID:Tree_Notes),
                          by = intersect(names(plotevs_old), names(trees %>% select(Tree_ID:Tree_Notes)))) %>%
  left_join(., treedata %>% select(Tree_Data_ID:Notes),
            by = intersect(names(.), names(treedata %>% select(Tree_Data_ID:Notes)))) %>%
  select(Plot_Name, Year, Event_QAQC, Event_ID, Tree_ID, Tree_Number_MIDN, Total_Foliage_Condition,
         Tree_Data_ID, Status_ID) %>% filter(Status_ID %in% active)

table(tree_evs_old$Status_ID)

tlucond <- read.csv("D:/NETN/R_Dev/forestNETN/testing_scripts/tlu_Tree_Conditions.csv")
spb <- data.frame(Tree_Condition_ID = 28, Tree_Condition_ORDER = 28, Code = "SPB", Description = "SPB", Type = "Alive")
tlucond <- rbind(tlucond, spb)

cond_old1 <- left_join(xrtreecond %>% select(Tree_Data_ID, Tree_Condition_ID),
                       tlucond %>% select(Tree_Condition_ID, Code), by = c("Tree_Condition_ID")) %>%
  right_join(tree_evs_old, ., by = intersect(names(tree_evs_old), names(.))) %>%
  select(Plot_Name, Year, Event_QAQC, Tree_Number_MIDN, Status_ID, Code) %>%
  filter(!is.na(Code)) %>% filter(!is.na(Plot_Name)) %>%
  arrange(Plot_Name, Year, Tree_Number_MIDN) %>% # drop trees without conditions
  mutate(pres = 1, TagCode = as.numeric(Tree_Number_MIDN)) %>% select(-Tree_Number_MIDN)

cond_old <- cond_old1 %>% mutate(pres = 1) %>% unique() %>%
  pivot_wider(names_from = Code, values_from = pres, values_fill = 0)

names(cond_old)
names(cond_new)
cond_merge <- full_join(cond_new, cond_old, by = c("Plot_Name" = "Plot_Name",
                                                   "StartYear" = "Year",
                                                   "IsQAQC" = "Event_QAQC",
                                                   "TagCode" = "TagCode"),
                        suffix = c("_new", "_old"))


check_conds <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", "TagCode", "TreeStatusCode", "IsQAQC", col1, col2)]}
  )) %>% bind_rows()
}

cond_merge[,18:63][is.na(cond_merge[,18:63])] <- 0 # just so rowwise checking works. There were 0s in new and NAs in old
names(cond_merge)
check_conds(cond_merge, "H_new", "H_old") # 0
check_conds(cond_merge, "NO_new", "NO_old") # 0
check_conds(cond_merge, "AD_new", "AD_old") #0
# no ALB, BC, BWA, DOG, GM, RPS, SB, SOD SPB, SW so no check for it
check_conds(cond_merge, "BBD_new", "BBD_old") #0
check_conds(cond_merge, "CAVL_new", "CAVL_old") #0
check_conds(cond_merge, "CAVS_new", "CAVS_old") #0
check_conds(cond_merge, "CW_new", "CW_old") # 0
check_conds(cond_merge, "DBT_new", "DBT_old") #0
check_conds(cond_merge, "EAB_new", "EAB_old") #0
check_conds(cond_merge, "EB_new", "EB_old") #0
check_conds(cond_merge, "G_new", "G_old") #0
check_conds(cond_merge, "HWA_new", "HWA_old") #0
check_conds(cond_merge, "ID_new", "ID_old") #0
check_conds(cond_merge, "OTH_new", "OTH_old") #0
check_conds(cond_merge, "SPB_new", "SPB_old") #0

#++++++++ Remaining issue: NC and PM for dead trees are currently NA

#------ Vines -----
vines_new <- do.call(joinTreeVineSpecies, c(compev_arglist, speciesType = 'all')) %>%
  select(Plot_Name, StartYear, IsQAQC, TagCode, VinePositionCode, VinePositionLabel, ScientificName, TSN)

names(vines_new)
View(VIEWS_MIDN$COMN_TreesVine)
vines_old <- left_join(xrtreecond %>% select(Tree_Data_ID, Tree_Condition_ID, Species_ID),
                       tlucond %>% select(Tree_Condition_ID, Code), by = c("Tree_Condition_ID")) %>%
  right_join(tree_evs_old, ., by = intersect(names(tree_evs_old), names(.))) %>%
  left_join(., plants %>% select(TSN, Latin_Name), by = c("Species_ID" = "TSN")) %>%
  select(Plot_Name, Year, Event_QAQC, Tree_Number_MIDN, Status_ID, Code, Species_ID, Latin_Name) %>%
  filter(!is.na(Code)) %>% filter(!is.na(Plot_Name)) %>% filter(Code %in% c("VIN", "VOB")) %>%
  arrange(Plot_Name, Year, Tree_Number_MIDN) %>% # drop trees without conditions
  mutate(TagCode = as.numeric(Tree_Number_MIDN)) %>% select(-Tree_Number_MIDN)

nrow(vines_old) #1636
nrow(vines_new) #1634 # Persicaria perfoliata is getting dropped b/c not a woody species

vines_merge <- full_join(vines_new, vines_old, by = c("Plot_Name" = "Plot_Name",
                                                      "StartYear" = "Year",
                                                      "IsQAQC" = "Event_QAQC",
                                                      "TagCode" = "TagCode",
                                                      "TSN" = "Species_ID"),
                         suffix = c("_new", "_old"))
head(vines_merge)
table(vines_merge$VinePositionCode, vines_merge$StartYear, useNA = 'always') # B only >2019
table(vines_merge$Code, vines_merge$StartYear, useNA = 'always') # 2007 and 2019 are off by 1, with NAs
  # Persicaria perfoliata, an herbaceous vine was recorded in old and not migrating to new database. Decided this is OK,
  # b/c unlikely to make it into the crown.
table(vines_merge$ScientificName, vines_merge$Latin_Name, useNA = 'always') # Looks good

# check if trees with multiple vines are migrating
mult_vines <- vines_new %>% group_by(Plot_Name, StartYear, IsQAQC, TagCode) %>% summarize(num_vines = n()) %>% filter(num_vines > 1)
# Multiple vines are now migrating into the database, which is great. The view is actually duplicating them, but my function
# uses unique() to fix it for now.
#++++++ No issues remaining

#----- Quadrat Character -----
qchar_new <- joinQuadData(park = 'all', from = 2007, to = 2019, locType = 'all', eventType = 'all',
                          valueType = 'all', QAQC = T)

qchar_new %>% filter(is.na(CharacterLabel) | num_quads < 12) %>%
  select(Plot_Name, StartYear, IsQAQC, CharacterLabel, num_quads) %>% arrange(Plot_Name, StartYear)
# COLO-380;RICH-63/RICH-73 all showing up correctly

plotevs_old <- forestMIDNarch::joinLocEvent(park = 'all', from = 2007, to = 2019, eventType = 'all',
                                            locType = 'all', QAQC = T, rejected = FALSE)

qchar_old <- merge(plotevs_old, quadchr, by = intersect(names(plotevs_old), names(quadchr)), all.x = T, all.y = F) %>%
  select(Plot_Name,Year, Event_QAQC, Quadrat_ID, A2:CC)

names(quadchr)

quad_names = c('A2', 'A5', 'A8', 'AA', 'B2', 'B5', 'B8', 'BB', 'C2', 'C5', 'C8', 'CC')
head(qchar_old)

qchar_old[ , quad_names][qchar_old[ , quad_names] == 1] <- 0.1
qchar_old[ , quad_names][qchar_old[ , quad_names] == 2] <- 1.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 3] <- 3.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 4] <- 7.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 5] <- 17.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 6] <- 37.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 7] <- 62.5
qchar_old[ , quad_names][qchar_old[ , quad_names] == 8] <- 85
qchar_old[ , quad_names][qchar_old[ , quad_names] == 9] <- 97.5

table(qchar_old$Quadrat_ID)
table(qchar_new$CharacterLabel, useNA = 'always')

qchar_old <- qchar_old %>% mutate(Cover_Type = case_when(Quadrat_ID == 2 ~ "Soil",
                                                         Quadrat_ID == 3 ~ "Rock",
                                                         Quadrat_ID == 4 ~ "Stem",
                                                         Quadrat_ID == 5 ~ "Wood",
                                                         Quadrat_ID == 6 ~ "Sphagnum",
                                                         Quadrat_ID == 7 ~ "NonSphagnum",
                                                         Quadrat_ID == 8 ~ "Lichens",
                                                         Quadrat_ID == 9 ~ "Herbs")) %>% # 9 = Herbs MIDN
  select(Plot_Name, Year, Event_QAQC, Cover_Type, A2, A5, A8, AA, B2, B5, B8, BB, C2, C5, C8, CC)

incomplete_old <- qchar_old[which(!complete.cases(qchar_old)),]
# Plots with at least one quad missing quadrat data:
# SQs should be NS for: COLO-380-2018 All- needs fixing;
# RICH-063-2011 AA, B2, B5, B8 (NS correct for data/quads, missing SQ for seedlings);
# RICH-073-2015 B5 & B8 (NS correct for data/quads, missing SQ for seedlings);
# PMs should replace blanks for: APCO-184-2009 A8 Herbs; FRSP-106-2008 A2 Herbs.

table(qchar_old$Cover_Type, useNA = 'always') # Lichens are the only thing different b/c added later
table(qchar_new$CharacterLabel, useNA = 'always') # Make sure Lichens are NC for early years. They're all NC.
table(qchar_new$Txt_Cov_A2, qchar_new$CharacterLabel, qchar_new$StartYear, useNA = 'always') # No issues

head(qchar_old)

check_qchr <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", "IsQAQC", "CharacterLabel", col1, col2)]}
  )) %>% bind_rows()
}

quadchr_merge <- merge(qchar_new, qchar_old,
                       by.x = c("Plot_Name", "StartYear", "IsQAQC", "CharacterLabel"),
                       by.y = c("Plot_Name", "Year", "Event_QAQC", "Cover_Type"),
                       all.x = T, all.y = T)

check_qchr(quadchr_merge, "Pct_Cov_A2", "A2") #0
check_qchr(quadchr_merge, "Pct_Cov_A5", "A5") #0
check_qchr(quadchr_merge, "Pct_Cov_A8", "A8") #0
check_qchr(quadchr_merge, "Pct_Cov_AA", "AA") #0

check_qchr(quadchr_merge, "Pct_Cov_B2", "B2") #0
check_qchr(quadchr_merge, "Pct_Cov_B5", "B5") #0
check_qchr(quadchr_merge, "Pct_Cov_B8", "B8") #0
check_qchr(quadchr_merge, "Pct_Cov_BB", "BB") #0

check_qchr(quadchr_merge, "Pct_Cov_C2", "C2") #0
check_qchr(quadchr_merge, "Pct_Cov_C5", "C5") #0
check_qchr(quadchr_merge, "Pct_Cov_C8", "C8") #0
check_qchr(quadchr_merge, "Pct_Cov_CC", "CC") #0
#+++++ No issues

#----- Quadrat Species -----
quadspp_old <- forestMIDNarch::joinQuadData(from = 2007, to = 2019, QAQC = T, eventType = "all", locType = "all") %>%
               mutate(Year = as.numeric(Year))

quadspp_new <- joinQuadSpecies(from = 2007, to = 2019,
                               QAQC = T, eventType = 'all', locType = 'all', valueType = 'midpoint')
nrow(quadspp_new)#10172
nrow(quadspp_old) #10169
#3 new quadspp rows.

names(quadspp_new)
names(quadspp_old)

quadspp_merge <- full_join(quadspp_new,
                           quadspp_old,
                           by = c("Plot_Name" = "Plot_Name",
                                  "StartYear" = "Year",
                                  "IsQAQC" = "Event_QAQC",
                                  "TSN" = "TSN")) %>%
  select(Plot_Name, PlotID, EventID, StartYear, cycle.x, cycle.y, IsQAQC,
         Confidence, TSN, ScientificName, Latin_Name,
         num_quads, quad_avg_cov,
         quad_pct_freq, avg.cover, avg.freq, Pct_Cov_A2: Pct_Cov_CC,
         A2:CC)

check_qspp <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("PlotID", "EventID", "Plot_Name", "Plot_Name2", "StartYear", "IsQAQC", "ScientificName", "Latin_Name",
               col1, col2)]}
  )) %>% bind_rows()
}

# Check diff Pct cover
quadspp_merge %>% mutate(cov_diff = quad_avg_cov - avg.cover) %>%
  filter(cov_diff > 0.01) %>%
  select(PlotID, EventID, Plot_Name, StartYear, IsQAQC, TSN, ScientificName, num_quads,
         quad_avg_cov, avg.cover, quad_pct_freq, avg.freq, cov_diff) #0

# Check % freq
quadspp_merge %>% mutate(freq_diff = quad_pct_freq - 100*(avg.freq)) %>%
  filter(freq_diff > 0.01) %>%
  select(PlotID, EventID, Plot_Name, StartYear, IsQAQC, TSN, ScientificName, num_quads,
         quad_avg_cov, avg.cover, quad_pct_freq, avg.freq, freq_diff) #0

quadsamp$numQuads <- apply(quadsamp[,c(3:14)], 1, sum)
quads1 <- merge(plotevs_old, quadsamp[, c("Event_ID", "numQuads")], all = TRUE)

quadspp <- merge(quads[,c("Event_ID","TSN",
                          "qA2_Cover_Class_ID", "qA5_Cover_Class_ID","qA8_Cover_Class_ID","qAA_Cover_Class_ID",
                          "qB2_Cover_Class_ID", "qB5_Cover_Class_ID","qB8_Cover_Class_ID","qBB_Cover_Class_ID",
                          "qC2_Cover_Class_ID", "qC5_Cover_Class_ID","qC8_Cover_Class_ID","qCC_Cover_Class_ID")],
                 plants[,c("TSN","Latin_Name")],
                 by = "TSN", all.x = T) %>% filter(Event_ID != "4AFBA34C-83F8-4F67-8B7C-8F6E096AB21D")

new_quads <- c("Pct_Cov_A2", "Pct_Cov_A5", "Pct_Cov_A8", "Pct_Cov_AA",
               "Pct_Cov_B2", "Pct_Cov_B5", "Pct_Cov_B8", "Pct_Cov_BB",
               "Pct_Cov_C2", "Pct_Cov_C5", "Pct_Cov_C8", "Pct_Cov_CC"
               )
quads2 <- merge(quads1, quadspp, by = "Event_ID", all.x = T)
names(quads2)
quads2[,14:25][quads2[,14:25]==1]<-0.1
quads2[,14:25][quads2[,14:25]==2]<-1.5
quads2[,14:25][quads2[,14:25]==3]<-3.5
quads2[,14:25][quads2[,14:25]==4]<-7.5
quads2[,14:25][quads2[,14:25]==5]<-17.5
quads2[,14:25][quads2[,14:25]==6]<-37.5
quads2[,14:25][quads2[,14:25]==7]<-62.5
quads2[,14:25][quads2[,14:25]==8]<-85
quads2[,14:25][quads2[,14:25]==9]<-97.5
old.names<-names(quads2[,14:25])
old.names
new.names<-c('A2','A5','A8','AA','B2','B5','B8','BB','C2','C5','C8','CC')
quads2<-quads2 %>% rename_at(all_of(vars(old.names)),~new.names)
quads2[,c(14:25)][is.na(quads2[,c(14:25)])]<-0
quads2$Plot_Name2 <- quads2$Plot_Name
quads2$Year <- as.numeric(quads2$Year)

quadspp_merge <- full_join(quadspp_new,
                           quads2 %>% select(Plot_Name, Plot_Name2, Year, Event_QAQC, numQuads,
                                             TSN, Latin_Name, A2:CC),
                           by = c("Plot_Name" = "Plot_Name",
                                  "StartYear" = "Year",
                                  "IsQAQC" = "Event_QAQC",
                                  "TSN" = "TSN")) %>% filter(!is.na(Plot_Name)) # drops 1 mostly NA record came in from old
head(quadspp_merge)

check_qspp(quadspp_merge, "Pct_Cov_A2", "A2") # 5 records. 4 are NS and NP in new. VAFO-161 No spp rec is showing up.
# It's not matching new record, because that's correctly None present (row 3 in output). Should be fixed in next migration
check_qspp(quadspp_merge, "Pct_Cov_A5", "A5") # 5 records. same as above
check_qspp(quadspp_merge, "Pct_Cov_A8", "A8") # 5 records. same as above
check_qspp(quadspp_merge, "Pct_Cov_AA", "AA") # 11 records. same as above plus NS coming in correctly in new

check_qspp(quadspp_merge, "Pct_Cov_B2", "B2") # 11 records. same as above plus NS coming in correctly in new
check_qspp(quadspp_merge, "Pct_Cov_B5", "B5") # 16 records. same as above plus NS coming in correctly in new
check_qspp(quadspp_merge, "Pct_Cov_B8", "B8") # 16 records. same as above plus NS coming in correctly in new
check_qspp(quadspp_merge, "Pct_Cov_BB", "BB") # 55 records. same as above

check_qspp(quadspp_merge, "Pct_Cov_C2", "C2") # 5 records. same as above
check_qspp(quadspp_merge, "Pct_Cov_C5", "C5") # 5 records. same as above
check_qspp(quadspp_merge, "Pct_Cov_C8", "C8") # 5 records. same as above
check_qspp(quadspp_merge, "Pct_Cov_CC", "CC") # 5 records. same as above

head(quadspp_merge)
check_qspp(quadspp_merge, "ScientificName", "Latin_Name") # 5 weird UTF issues. all ok.
#++++++ No species recorded are still in the xref and VAFO-161-2009 SQ should be NP not NS.

#----- Quad seedlings
# Catching no species recorded and incorrect NP SQ
qseeds <- VIEWS_MIDN$MIDN_QuadSeedlings
table(qseeds$SQSeedlingCode, qseeds$ScientificName)
# There are 2987 "No species recorded" with NP
table(qseeds$SQSeedlingCode)

table(qseeds$SQSeedlingCode) # the numbers below are close to what it should be
# with a few more NS b/c of COLO-380-2018
#NS     NP       SS
#65   2987  107948
table(qseeds$ScientificName, qseeds$SQSeedlingCode, useNA = 'always')
# Still have 2987 "No species recorded" coming in. These shouldn't be migrating.
# It at least matches the number of NP in the higher level table.

seeds_new <- joinQuadSeedlings(locType = 'all', eventType = 'all', QAQC = T, valueType = 'all', canopyForm = 'all')
table(seeds_new$ScientificName, seeds_new$SQSeedlingCode, useNA = 'always')

reg_new <- joinRegenData(eventType = 'all', locType = 'all', QAQC = T, canopyForm = 'all', speciesType = 'all')
reg_old <- forestMIDNarch::joinRegenData(eventType = 'all', locType = 'all', QAQC = T,
                                         from = 2007, to = 2019, canopyForm = 'all')

reg_old$Latin_Name[reg_old$Latin_Name == "No species recorded"] <- "None present"
reg_old2 <- reg_old %>% filter(!Latin_Name %in% c("None present", "no species recorded"))

reg_merge <- merge(reg_new, reg_old2, by.x = c("Plot_Name", "StartYear", "IsQAQC", "ScientificName"),
                   by.y = c("Plot_Name", "Year", "Event_QAQC", "Latin_Name"),
                   all.x=T, all.y=T) %>% filter(ScientificName != "None present")
merge_nas <- reg_merge[is.na(reg_merge$Network),] # no As

check_reg <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", "IsQAQC", "ScientificName", col1, col2)]}
  )) %>% bind_rows()
}
#+++++++++ ENDED HERE +++++++++++++
# There are a couple of issues with my code and legacy data that need fixing
# The not sampled events for seeds and saps which should only turn 1 or 2 things off are turning all size classes off
# need to look into how I'm doing that in the joinRegenData() function. the seedling and saplings are okay I think.
#++++++++++++++++++++++++++++++++++

head(reg_merge)
# Issues below (n=36) are all due to diff/better error handling in new function
check_reg(reg_merge, "seed_15_30cm", "seed15.30") # 14 records that should mostly be fixed by migration
check_reg(reg_merge, "seed_30_100cm", "seed30.100") # 13 records that should mostly be fixed by migration
check_reg(reg_merge, "seed_100_150cm", "seed100.150") # 13 records that should mostly be fixed by migration
check_reg(reg_merge, "seed_p150cm", "seed150p")# 13 records that should mostly be fixed by migration
check_reg(reg_merge, "sap_den", "sap.den") #47 records that differ. Most should be fixed by migration

#----- Microplot Shrubs -----
shrubs_vw <- get("COMN_MicroplotShrubs", envir = env) %>%
  select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC, SQShrubCode,
         MicroplotCode, TSN, ScientificName, CoverClassCode, CoverClassLabel)
table(shrubs_vw$SQShrubCode, shrubs_vw$StartYear) # 9 NS PETE-185;VAFO-9999;COLO-380 OK
table(shrubs_vw$CoverClassCode, shrubs_vw$StartYear)
# 2007-8 are all NC, 2009-2019 are comb of NC and other stuff. All looks good.

shrub_old <- forestMIDNarch::joinMicroShrubData(from = 2007, to = 2019, locType = 'all', eventType = 'all', QAQC = T) %>%
               mutate(Year = as.numeric(Year))

shrub_old$Latin_Name[shrub_old$Latin_Name == "No species recorded"] <- "None present"
shrub_new <- joinMicroShrubData(from = 2007, to = 2019, locType = 'all', eventType = 'all', QAQC = T)


shrub_merge <- full_join(shrub_new, shrub_old, by = c("Plot_Name" = "Plot_Name",
                                                      "StartYear" = "Year",
                                                      "IsQAQC" = "Event_QAQC",
                                                      "ScientificName" = "Latin_Name") )

table(complete.cases(shrub_merge$Event_ID)) # 266 FALSE
na_evs <- shrub_merge %>% filter(is.na(Event_ID)) # These are all "None present" OK

check_shrbs <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("PlotID", "EventID", "Plot_Name", "StartYear", "IsQAQC",
              "ScientificName", col1, col2)]}
  )) %>% bind_rows()
}

names(shrub_merge)
table(shrub_merge$Pct_Cov_UR, shrub_merge$StartYear, useNA = 'always')

shrub_merge %>% select(Plot_Name, StartYear, IsQAQC, ScientificName, shrub_avg_cov, cover) %>%
  mutate(cov_diff = abs(shrub_avg_cov - cover)) %>% filter(cov_diff > 0.1)
# GETT-252-2010 is different b/c combination of PM and pct_cover. Nothing to change

#------ Quadrat Seedlings ------
seeds_vw <- get("MIDN_QuadSeedlings", envir = VIEWS_MIDN) %>%
  select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC, SQSeedlingCode,
         QuadratCode, TSN, ScientificName, SizeClassCode, SizeClassLabel, Count, CoverClassCode, CoverClassLabel)
sort(unique(seeds_vw$ScientificName)) # "No species recorded" included. Wrong.

table(seeds_vw$SQSeedlingCode)
table(seeds_vw$SQSeedlingCode, seeds_vw$StartYear)
# 65 NS total
# 12 COLO-380; 12 PETE-185; 4 RICH-063; 2 RICH-073; 32 VAFO 9999 = 62; Correct
# 3 ISSUES REMAINING:
   # VAFO-036-2011 B8 is NP not NS;
   # GETT-258-2010 AA is NP not NS;
   # FRSP-276-2010 CC is NP not NS;

seed_new <- joinQuadSeedlings(from = 2007, to = 2019, eventType = 'all', locType = 'all', QAQC = TRUE)
seed_inv <- joinMicroSeedlings(speciesType = 'invasive')
table(seed_new$SQSeedlingCode, seed_new$StartYear)
table(seeds_vw$ScientificName) # 2987 No species recorded still
table(seeds_vw$SQSeedlingCode)# 2987 matches number of visits that have NP SQ.

#+++++ Issues remaining: No species recorded needs to be dropped. 3 plot visits that need NS coverted to NP.

#----- Microplot Saplings ------
saps_vw <- get("MIDN_MicroplotSaplings", envir = VIEWS_MIDN) %>%
  select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, StartDate, IsQAQC, SQSaplingCode,
         MicroplotCode, TSN, ScientificName, DBHcm)
table(saps_vw$ScientificName) # "No species recorded" not included. Good.
sort(unique(saps_vw$ScientificName))  # "No species recorded" not included. Good.

table(saps_vw$SQSaplingCode)
#  NP   NS   SS
# 684  12  14600

table(saps_vw$SQSaplingCode, saps_vw$StartYear)
    # 12 NS: 2 NS are COLO-380-2018, but UR SQ is SS instead of NS. Change after final migration.
    # GETT-029-2015 QAQC UL and VAFO-040-2015-QAQC UR should be NP, not NS. Change after final migration.

table(saps_vw$SQSaplingCode, saps_vw$MicroplotCode) # B 5; UL 4; UR 3. Eventually should have the same # for all micros

saps_new <- joinMicroSaplings(locType = 'all', QAQC = T, eventType = 'all', canopyForm = 'all', speciesType = 'all')
length(unique(saps_new$EventID)) #1182
table(complete.cases(saps_new$ScientificName)) # all T
table(saps_new$ScientificName)
table(saps_new$SQSaplingCode) # only 4 NS, 684 NP and 14234 SS. No ND. Good.
#++++++ Remaining sapling issues #GETT-029-2015; VAFO-040-2015 have a quad that should be NP not NS.

#----- joinRegenData -----
reg_old <- forestMIDNarch::joinRegenData(from = 2007, to = 2019, QAQC = T, locType = 'all', speciesType = 'all', canopyForm = 'all') %>%
  mutate(Latin_Name2 = ifelse(Latin_Name %in% c("No species recorded", 'no species recorded'), "None present", Latin_Name),
         Latin_Name2 = ifelse(Latin_Name == "MissingData", "Not Sampled", Latin_Name2),
         Year = as.numeric(Year)) %>% filter(sap.den + seed.den > 0)

reg_new <- joinRegenData(from = 2007, to = 2019, QAQC = T, locType = 'all', eventType = 'all', speciesType = 'all', canopyForm = 'all')
length(unique(reg_new$EventID)) #1182

reg_merge <- full_join(reg_new, reg_old, by = c('Plot_Name' = 'Plot_Name',
                                                'StartYear' = 'Year',
                                                'IsQAQC' = "Event_QAQC",
                                                "ScientificName" = "Latin_Name2"),
                       suffix = c("_new", "_old"))

reg_merge_ss <- reg_merge %>% filter(ScientificName != "None present")

check_reg <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("PlotID", "EventID", "Plot_Name", "StartYear", "IsQAQC",
              "ScientificName", "num_quads", "num_micros", col1, col2)]}
  )) %>% bind_rows()
}

names(reg_merge)

table(reg_new$ScientificName)
table(reg_old$Latin_Name2)

stock_check <- check_reg(reg_merge_ss, "stock_new", "stock_old") # Different b/c change in stocking. Not sure what's going on with NP.
check_reg(reg_merge_ss, "seed_15_30cm", "seed15.30") # differences are when there are < 12 quads and better SQ handling
check_reg(reg_merge_ss, "seed_30_100cm", "seed30.100") # differences are when there are < 12 quads and better SQ handling
check_reg(reg_merge_ss, "seed_100_150cm", "seed100.150") # differences are when there are < 12 quads and better SQ handling
s150p <- check_reg(reg_merge_ss, "seed_p150cm", "seed150p") # differences are when there are < 12 quads and better SQ handling
check_reg(reg_merge_ss, "seed_den", "seed.den") # differences are when there are <12 quads and better SQ handling
check_reg(reg_merge_ss, "sap_den", "sap.den") # differences b/c SQs not correct for several plots and better SQ handling

#++++++ No new issues not already reported for seedlings and saplings.

#----- Additional Species -----
addspp_vw <- get("COMN_AdditionalSpecies", envir = VIEWS_MIDN)
addspp_vw$Plot_Name <- paste(addspp_vw$ParkUnit, sprintf("%03d", addspp_vw$PlotCode), sep = "-")
addspp_vw <- addspp_vw %>%
  select(Plot_Name, PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC, SQAddSppCode,
         TSN, ScientificName, ConfidenceClassCode, IsCollected, Note, SQAddSppNotes)
table(addspp_vw$SQAddSppCode)
# NP    NS    SS
# 69    147 4214
table(addspp_vw$StartYear, addspp_vw$SQAddSppCode) # Cleaned up pre-migration database so all 2008 SQs should migrate as NS.

# addspp08 <- addspp_vw %>% filter(StartYear == 2008 & SQAddSppCode != "NS")
# write.csv(addspp08, "./testing_scripts/addspp_2008_delete.csv")
nrow(filter(addspp_vw, ScientificName == "No species recorded"))
# 69 records with No species recorded still
# matches number of NP SQs. Just need to drop these from tblCOMN_AdditionalSpecies

addspp_new <- do.call(joinAdditionalSpecies, arglist) %>% filter(ScientificName != "Not Sampled")

addspp_old <- forestMIDNarch::joinLocEvent(locType = 'all', eventType = 'all', from = 2006, to = 2019, QAQC = T) %>%
  left_join(., addspp) %>% left_join(., plants[, c("TSN", "Latin_Name")]) %>%
  select(Plot_Name, Year, Event_QAQC, TSN, Latin_Name, Confidence_ID, Collected, Notes) %>%
  mutate(Latin_Name2 = ifelse(Latin_Name == "No species recorded", "None present", Latin_Name))

addspp_merge <- merge(addspp_new, addspp_old,
                      by.x = c("Plot_Name", "StartYear", "IsQAQC", "TSN"),
                      by.y = c("Plot_Name", "Year", "Event_QAQC", "TSN"),
                      all.x = T, all.y = T)

check_spp <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", "IsQAQC", col1, col2)]}
  )) %>% bind_rows()
}

check_spp(addspp_merge, "ScientificName", "Latin_Name2")
#    Plot_Name StartYear IsQAQC ScientificName                Latin_Name2
# 1  GEWA-320      2015      0   None present                       <NA> # NULL in old DB. Migrating as SS. Change to NP post migration.
# 2  RICH-226      2009      0           <NA> Polystichum acrostichoides

#+++++ We can fix the issues after the final migration, rather than have Stephen fix with code.
#+++++ Need to make sure 2008 migrates in as NS for all visits in next migration.

#----- Soil data

# need to remove records that weren't sampled but had earthworms recorded
soildata2 <- soildata[!grepl("[++]", soildata$Notes), -c(10:13)] # drop updated/created cols
soildata2 <- soildata2[!grepl("[No soils]", soildata2$Notes),]
names(soildata2)

soil_old <- merge(soildata2, soilsamp[,-c(13:16)],
                  by = intersect(names(soildata2), names(soilsamp[,-c(13:16)])), all = TRUE)
plotevs_old <- forestMIDNarch::joinLocEvent(from = 2007, to = 2019, QAQC = T, locType = 'all', eventType = 'all')
soil_old2 <- merge(plotevs_old, soil_old, by = intersect(names(plotevs_old), names(soil_old)), all.x = FALSE, all.y = TRUE) %>%
  filter(!is.na(Location_ID)) %>% select(Plot_Name, Year, Event_QAQC,
                                         Sampling_Position, Sample_Type, Horizon_Type, Archived,
                                         Sample_Number, Litter_Depth, FF_Depth,
                                         A_Horizon_Depth, Total_Excavation_Depth, Notes, Comments, Sample_Missed)
names(soil_old2)
# convert NA horizons to 0, but not total
soil_old2[,c(9:11)][is.na(soil_old2[,c(9:11)])]<-0

# Check views
soilsamp_vw <- get("COMN_SoilSample", envir = env) %>%
  select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC,
         SQSoilCode, SampleSequenceCode, SoilLayerLabel,
         Depth_cm, Note) %>%
  filter(StartYear > 2006 & !is.na(SoilLayerLabel) #& StartYear < 2020
  )

table(soilsamp_vw$SQSoilCode) # All SS. Good.
length(unique(soilsamp_vw$EventID)) # 328

soillab_vw <- get("COMN_SoilLab", envir = env) %>%
  select(PlotID, EventID, ParkUnit, ParkSubUnit, PlotCode, StartYear, IsQAQC,
         LabLayer, LabDateSoilCollected, UMOSample:ECEC, LabNotes, EventID, PlotID) %>%
  filter(StartYear > 2006 #& StartYear < 2020
  )

length(unique(soillab_vw$EventID)) # 328. Looks similar enough to move on.

# Now to compare old and new

soilold_sum <- soil_old2 %>% group_by(Plot_Name, Year, Event_QAQC, Sample_Type, Archived) %>%
  summarize(litter = mean(Litter_Depth, na.rm = T),
            O_hor = mean(FF_Depth, na.rm = T),
            A_hor = mean(A_Horizon_Depth, na.rm = T),
            Tot_dep = mean(Total_Excavation_Depth, na.rm =T),
            numsamps = length(unique(!is.na(Sample_Number))))

#soilsamp_wide comes from line 127 in joinSoilSampleData.R
soilsamp_wide$Plot_Name <-  paste(soilsamp_wide$ParkUnit, sprintf("%03d", soilsamp_wide$PlotCode), sep = "-")

soilsamp_merge <- merge(soilsamp_wide, soilold_sum,
                        by.x = c("Plot_Name", "StartYear", "IsQAQC"),
                        by.y = c("Plot_Name", "Year", "Event_QAQC"), all = T)

soilsamp_merge %>% filter(is.na(EventID)) #0

check_soils <- function(df, col1, col2){
  lapply(1:nrow(df), function(x) (
    if(length(setdiff(union(df[x, col1], df[x, col2]), intersect(df[x, col1], df[x, col2]))) > 0){
      df[x, c("Plot_Name", "StartYear", "IsQAQC", "Sampling_Position", "SampleSequenceCode", col1, col2)]}
  )) %>% bind_rows()
}
names(soilsamp_merge)
soilsamp_check <- soilsamp_merge %>% mutate(lit_diff = abs(Litter_cm - litter),
                                           O_diff = abs(O_Horizon_cm - O_hor),
                                           A_diff = abs(A_Horizon_cm - A_hor),
                                           tot_diff = abs(Total_Depth_cm - Tot_dep)) %>%
  filter(lit_diff > 0.5 | O_diff > 0.5 | A_diff > 0.5 | tot_diff > 0.5) #0

check_soils(soilsamp_merge, "Note", "Comments") # 0
# Hard to check soils b/c there's no tab for it in the field app, but based on my comparisions, it all looks good.

# Soil lab data
soillab_old <- merge(soildata2, soillab[,-c(1, 34, 35)],
                     by = intersect(names(soildata2), names(soillab[,-c(1, 34, 35)])), all = TRUE)
soillab_old2 <- merge(plotevs_old, soillab_old, by = intersect(names(plotevs_old), names(soillab_old)),
                      all.x = FALSE, all.y = TRUE) %>%
  filter(!is.na(Location_ID) & Year > 2006) %>% select(Plot_Name, Year, Event_QAQC, Layer, UMO_Sample:ECEC, Notes,
                                                       Sampling_Position, Sample_Type, Archived)
head(soillab_old2)

#++++++++ No lab-related issues to report (though didn't check as thoroughly)

# Done with 4/22 migration check. Rerun with 4/26 check
KateMMiller/forestMIDN documentation built on Nov. 10, 2024, 11:54 p.m.