R/defunct/00_data-processing-fiadb_TODELETE_26March2020.R

######################################################################
#
# GRM -- FIA-DB data processing
#
#  Rob Smith, phytomosaic@gmail.com, 26 Mar 2020
#
##  CC-BY-SA 4.0 License (Creative Commons Attribution-ShareAlike 4.0)


### preamble
rm(list=ls())
require(ecole)
pth <- 'C:/Users/Rob/Documents/_prj/1_pnw_traits/'
setwd(pth)


### load data
d   <- read.csv('./data_raw/WA_TREE_GRM_COMPONENT.csv', stringsAsFactors=F)
f2  <- read.csv('./data_raw/WA_TREE.csv', stringsAsFactors=F)
pth <- 'C:/Users/Rob/Documents/_prj/5_div/div/data_raw/'
f0  <- read.csv(paste0(pth, 'CONFIDENTIAL_PLOT_TABLE.csv'), stringsAsFactors=F)
f1  <- read.csv(paste0(pth, 'PLOT.csv'), stringsAsFactors=F) # for MEASYR and REMPER
f6  <- read.csv('./data_raw/REF_SPECIES.csv', stringsAsFactors=F)
names( d )  <- tolower(names( d ))      # names to lowercase
names( f0 ) <- tolower(names( f0 ))     # names to lowercase
names( f1 ) <- tolower(names( f1 ))     # names to lowercase
names( f2 ) <- tolower(names( f2 ))     # names to lowercase
names( f6 ) <- tolower(names( f6 ))     # names to lowercase
names(f1)[names(f1)=='cn'] <- 'plt_cn'  # rename
names(f2)[names(f2)=='cn'] <- 'tre_cn'  # rename
d      <- d[,1:which(names(d)=='stem_component')] # trim unneeded cols
f6$spp <- paste0(f6$genus,'_',f6$species,'_',f6$variety,'_',f6$subspecies)
f6$spp <- ecole::clean_text(f6$spp, lower=TRUE)

### force plot_cn to character (R can't handle large integers)
d$plt_cn       <- as.character(d$plt_cn)
d$tre_cn       <- as.character(d$tre_cn)
# d$prev_tre_cn  <- as.character(d$prev_tre_cn)
f1$plt_cn      <- as.character(f1$plt_cn)
f2$plt_cn      <- as.character(f2$plt_cn)
f2$tre_cn      <- as.character(f2$tre_cn)
f0$plt_cn      <- as.character(f0$plt_cn)

### match CONFIDENTIAL attributes
i      <- match(d$plt_cn, f0$plt_cn)
d$latt <- f0[i, 'lat_actual_nad83']
d$lonn <- f0[i, 'lon_actual_nad83']
d$el   <- f0[i, 'elevation_dem_pnwrs'] * 0.3047851 # from ft to m
rm(i,f0,pth)

### match tre_cn to get 'spcd' and other attributes at *y2*
i          <- match(d$tre_cn, f2$tre_cn)
d          <- cbind(d,f2[i,c('spcd','dia','ht','volcfnet','volcfgrs',
                             'invyr')])
pltcn_y2   <- f2[i, c('plt_cn')]
i          <- match(pltcn_y2, f1$plt_cn) # link y2 plt_cn for MEASYEAR
d$remper   <- f1[i, 'remper']   # use to infer original measyear
d$measyr_2 <- f1[i, 'measyear']
d$measyr_1 <- round(d$measyr_2 - d$remper,0) # original measyear?
rm(i, pltcn_y2, f1, f2)

### match spcd to get 'spp'
i          <- match(d$spcd, f6$spcd)
d          <- data.frame(d, spp=f6[i, c('spp')], stringsAsFactors=F)
rm(i,f6)

### change one name
d$spp[d$spp == 'chrysolepis_chrysophylla_chrysophylla'] <-
     'chrysolepis_chrysophylla'
d$spp[d$spp == 'populus_balsamifera_trichocarpa'] <-
     'populus_trichocarpa'

# ### convert abs to rel growth rate
# d$rgr_dia  <- d$ann_dia_growth / d$dia_begin * 100
# d$rgr_ht   <- d$ann_ht_growth  / d$ht_begin  * 100

### temporarily fill NA=0 for new recruits (for next step)
d$dia_begin[is.na(d$dia_begin) & d$stem_component == 'INGROWTH'] <- 0

### *annualize* Y1/Y2 diameters using 'ann_dia_growth'
d$dia_y1   <- d$dia_begin
d$dia_y2   <- d$dia_begin + d$ann_dia_growth

### remove invalid/removed/logged trees
table(d$stem_component)
is_invalid <- d$stem_component %in% c('REMEASURED DEAD','NOT USED',
                                      'UNKNOWN')
was_harvested  <- d$stem_component %in% c('CUT DEAD', 'CUT1', 'CUT2')
d <- d[!c(is_invalid | was_harvested),] # CAUTION!

### assign tree fate, recode by recruit/mortality
table(d$stem_component)
is_newrecruit  <- d$stem_component == 'INGROWTH'
is_naturaldead <- d$stem_component %in% c('MORTALITY1','MORTALITY2')
is_survivor    <- d$stem_component == 'SURVIVOR'
d$dia_y1[is_newrecruit]  <- NA # if new recruit then expect no Y1
d$dia_y2[is_naturaldead] <- NA # if dead then expect no Y2 - CAUTION! dead trees still have 'size'
rm(is_invalid,is_naturaldead,is_newrecruit,is_survivor,was_harvested)

### CAUTION: remove if both Y1/Y2 == NA (and therefore untrackable)
d <- d[!(is.na(d$dia_y2) & is.na(d$dia_y1)),]

### tally recruits/mortality/survivors
### identify survivors (if has measurement at both events)
d$surv <- ifelse(c(!is.na(d$dia_y2) & !is.na(d$dia_y1)),1,0)
sum(d$surv)                             # tally survival
sum(is.na(d$dia_y2))                    # tally mortality
sum(is.na(d$dia_y1) & !is.na(d$dia_y2)) # tally new recruits

# remove *negative* diameter growth (only a few hundred cases)
isneg <- c(d$dia_y1 > d$dia_y2)
isneg <- isneg & !is.na(isneg)
sum(isneg)
d <- d[!isneg,]
rm(isneg)

# TODO: get climate values for *exact* remeasurement period
a <- d[!duplicated(d$plt_cn),c('plt_cn','latt','lonn','el','measyr_1')]
# ### retrieve ClimateNA values (manually)
# prep_climatena(ID1=a$plt_cn,lat=a$latt,long=a$lonn,el=a$el,
#                file = 'C:/Users/Rob/Desktop/to_climatena.csv')
# file.copy(from = 'C:/Users/Rob/Desktop/from_climatena.csv',
#           to = './data_raw/from_climatena.csv',
#           overwrite = TRUE)   # copy for posterity
aa <- read.csv('./data_raw/from_climatena.csv')
aa[aa==(-9999)] <- NA   # replace NA value with NA
a  <- cbind(a, aa[match(a$plt_cn, as.character(aa$ID1)),])
names(a) <- tolower(names(a))
a  <- a[ ,!names(a) %in% c('dd_0','dd5','dd_18','dd18','bffp',
                           'effp','mar','id1','id2',
                           'elevation','latitude','longitude')]
d <- cbind(d,a[match(d$plt_cn, a$plt_cn),-c(1:5)])# match climate data
rm(aa,a)

### tranformations
d$dia_y1 <- log10(1 + d$dia_y1)
d$dia_y2 <- log10(1 + d$dia_y2)

### remove if coordinates (and therefore climate) not available
isna <- is.na(d$latt)
sum(isna) / length(d$latt) # 29.4% !
d <- d[!isna,]
rm(isna)

####    END    ####
phytomosaic/pnw documentation built on April 16, 2020, 7:29 p.m.