exec/data_scripts.R

#####
library(RSFIA)
NC_DIR <- '/media/bem/hot_storage/RSFIA_data/FIA_dfs'
WTH_DIR <- '/media/bem/scratch/wth'

# FIA
FIA <- new('FIA')
FIA@nc_dir <- NC_DIR
FIA@file_name <- 'FIA_plots.nc'
FIA@FIA_dir <- '/media/bem/hot_storage/FIA/data'
FIA@remeasure <- 4L
FIA <- ImportPlots(FIA, overwrite = T)
FIA <- ImportConditions(FIA, overwrite = T)
FIA <- ImportTrees(FIA)
FIA <- Smallify(FIA)
use_data(FIA)
c0 <- CoordVecsToList(FIA@coords)

# this could be added to a PulLDataFrame method for 'analysis'
# 'remote' data object
remote <- new('remote')
remote@bands <- c(101L, 102L, 103L, 7L)
remote <- AddCoords(remote, c0[, 1], c0[, 2])
remote@sample <- FIA@sample
remote@nc_dir <- SCRATCH
remote@file_name <- 'RGI_NDVI_NDMI_MIR_2000_2015_nplots12k.nc'
remote@ft_dir <- paste0(DATA_DIRECTORY, '/GEE_outputs')
remote@variables <- as.character(remote@bands)
remote <- SetupDataFile(remote)
t1 <- Sys.time()
ReadEarthEngineOutputs(remote, 'Landsat 7')
remote@n_fill <- length(remote@sample)
print(difftime(Sys.time(), t1))
use_data(remote)

# pulling remote
#data('remote')
rs_data_long <- PullDataFrame(remote)
rs_data_short <- aggregate(rs_data_long[, c(5:8)], by = list(rs_data_long$sample), FUN = mean)
rs_data_short <- rs_data_short[order(rs_data_short$Group.1), ]
rs_data <- data.frame(CoordVecsToList(remote@coords), rs_data_short[, 2:ncol(rs_data_short)])
colnames(rs_data)[3:ncol(rs_data)] <- c('NDVI', 'NDMI', 'RGI', 'MIR')
rm(rs_data_long, rs_data_short)

# 'soilgrids' data object
sgrid <- new('soilgrids')
sgrid@coords <- remote@coords
sgrid@sample <- remote@sample
sgrid@nc_dir <- DATA_DIRECTORY
sgrid@file_name <- 'soil_variables_2000_2015_nplots12k.nc'
sgrid <- SetupDataFile(sgrid)
sgrid <- DownloadSoilGrids(sgrid)
#use_data(sgrid)

rm(list = ls())
data('remote')
remote0 <- AggregateByDimension(remote, dim = 'time', FUN = mean)
remote0 <- PullDataFrame(remote0)
colnames(remote0)[4:7] <- c('NDVI', 'NDMI', 'RGI', 'MIR')
data('yearly_daymet')
yearly_daymet0 <- AggregateByDimension(yearly_daymet, dim = 'time', FUN = mean)
yearly_daymet0 <- PullDataFrame(yearly_daymet0)
data('mort_response_variables')
mort_response_variables <- PullDataFrame(mort_response_variables)

if (TRUE) {
  mort_df <- FIA_mortality_with_explanatory
  meta <- SoilGridMeta()
  meta <- meta[meta$major_var, ]
  soils <- data.frame(matrix(ncol = nrow(meta) + 2, nrow = nrow(mort_df)), stringsAsFactors = F)
  soils[, c(1, 2)] <- data.frame(mort_df$LON, mort_df$LAT, stringsAsFactors = F)
  for (i in seq_along(meta$vars)) {
    ii <- meta$vars[i]
    soils[, i + 2] <- mort_df[, ii]
  }
  colnames(soils) <- c('lon', 'lat', meta$vars)
} else {
  data('sgrid')
  soils <- PullDataFrame(sgrid)
}
if (TRUE) {
  #mort_df <- FIA_mortality_with_explanatory
  PLT_CN <- mort_df[, c('LON', 'LAT', 'PLT_CN')]
  colnames(PLT_CN)[1:2] <- c('lon', 'lat')
}
full_data <- dplyr::left_join(remote0, yearly_daymet0, by = c('lon', 'lat', 'sample'))
full_data <- dplyr::left_join(full_data, soils, by = c('lon', 'lat'))
full_data <- dplyr::left_join(full_data, mort_response_variables, by = c('lon', 'lat', 'sample'))
full_data <- dplyr::left_join(full_data, PLT_CN, by = c('lon', 'lat'))

rm(list = ls()[-which(ls() == 'full_data')])


rfm <- new('analysis')
rfm@response <- colnames(full_data)[27:55]
rfm@explanatory <- colnames(full_data)[4:25]
rfm@train_data <- full_data
rfm@sample <- full_data$sample
lmm <- rfm
#rfm <- randomForest(rfm)
lmm <- lm(formula = NULL, data = lmm)
lmm <- DropModels(lmm)
bmcnellis/RSFIA documentation built on June 1, 2019, 7:40 a.m.