# root folder
knitr::opts_knit$set(root.dir = params$wd)
knitr::opts_chunk$set(error = TRUE, echo = FALSE)

# log setup
logger_name <- paste('QC', params$code, 'qc_start_process',
                     'rep_sfn_render', sep = '.')

log_sapfluxnet_setup('Logs/sapfluxnet.log',
                     logger = logger_name,
                     level = 'DEBUG')

SITE: r params$code


Metadata QC

Quick summary

# Libraries
library(sapfluxnetQC1)
library(dplyr)
library(DT)

# Data load
## site_md
site_md <- dl_metadata(params$md_file, 'site_md', parent_logger = logger_name)

## stand_md
stand_md <- dl_metadata(params$md_file, 'stand_md', si_code_loc = site_md,
                        parent_logger = logger_name)

## species_md
species_md <- dl_metadata(params$md_file, 'species_md', si_code_loc = site_md,
                          parent_logger = logger_name)

## plant_md
plant_md <- dl_metadata(params$md_file, 'plant_md', si_code_loc = site_md,
                        parent_logger = logger_name)

## env_md
env_md <- dl_metadata(params$md_file, 'environmental_md', si_code_loc = site_md,
                      parent_logger = logger_name)

## sapf_data
sapf_data <- dl_data(params$sapf_data_file, 'sapflow_hd', n = 2000, na = '',
                     parent_logger = logger_name)

## env_data
env_data <- dl_data(params$env_data_file, 'environmental_hd', n = 2000, na = '',
                    parent_logger = logger_name)

################################################################################

# md qc

## metadata columns
md_cols <- bind_rows(
  qc_md_cols(site_md, 'site_md', parent_logger = logger_name),
  qc_md_cols(stand_md, 'stand_md', parent_logger = logger_name),
  qc_md_cols(species_md, 'species_md', parent_logger = logger_name),
  qc_md_cols(plant_md, 'plant_md', parent_logger = logger_name),
  qc_md_cols(env_md, 'environmental_md', parent_logger = logger_name)
)

## factor variables values 
factor_values <- qc_factor_values(site_md, stand_md, species_md,
                                  plant_md, env_md, parent_logger = logger_name)

## email
email_check <- qc_email_check(site_md, parent_logger = logger_name)

## coordinates and biome
site_md_coordfix <- qc_coordinates(site_md, parent_logger = logger_name)

site_md_coordfix <- qc_get_biome(site_md_coordfix, merge_deserts = FALSE,
                                 parent_logger = logger_name)

## soil_texture
stand_md <- qc_soil_texture(stand_md, parent_logger = logger_name)

## species
species_md_spnames <- qc_species_names_info(
  species_md$sp_name,
  parent_logger = logger_name
) %>%
  mutate(Md = 'sp')

plant_md_spnames <- qc_species_names_info(
  plant_md$pl_species,
  parent_logger = logger_name
) %>%
  mutate(Md = 'pl')

species_md$sp_name <- qc_species_names(species_md$sp_name,
                                       parent_logger = logger_name)
plant_md$pl_species <- qc_species_names(plant_md$pl_species,
                                        parent_logger = logger_name)
sp_verification <- qc_species_verification(species_md, plant_md,
                                           parent_logger = logger_name)

## plant treatment check
pl_treatments_check <- qc_pl_treatments(plant_md, parent_logger = logger_name)

## environmental vars presence
env_var_presence <- qc_env_vars_presence(
  env_data, env_md, parent_logger = logger_name
)

################################################################################

# data qc
## timestamp
### sapf
sapf_data_fixed <- qc_fix_timestamp(sapf_data, env_md,  logger_name)
### env
env_data_fixed <- qc_fix_timestamp(env_data, env_md, logger_name)

## timestamp NAs
### sapf
sapf_timestamp_nas <- qc_timestamp_nas(sapf_data_fixed, logger_name)
### env
env_timestamp_nas <- qc_timestamp_nas(env_data_fixed, logger_name)

## timestamp errors
### sapf
timestamp_errors_sapf <- qc_timestamp_errors(
  sapf_data_fixed,
  timestep = qc_get_timestep(plant_md, parent_logger = logger_name),
  parent_logger = logger_name
)
### env
timestamp_errors_env <- qc_timestamp_errors(
  env_data_fixed,
  timestep = qc_get_timestep(env_md, parent_logger = logger_name),
  parent_logger = logger_name
)

## Timestamp concordance
timestamp_concordance <- qc_timestamp_concordance(
  sapf_data_fixed, env_data_fixed,
  plot = FALSE, parent_logger = logger_name
)
# timestamp_concordance_plot <- qc_timestamp_concordance(
#   sapf_data_fixed, env_data_fixed,
#   plot = TRUE, parent_logger = logger_name
# )

## Gaps
sapf_gaps_info <- qc_mind_the_gap(
  sapf_data_fixed, parent_logger = logger_name
)
env_gaps_info <- qc_mind_the_gap(
  env_data_fixed, parent_logger = logger_name
)

### concordance and gaps info
gap_lines_plot <- vis_gap_lines(sapf_gaps_info, env_gaps_info,
                                qc_time_interval(sapf_data_fixed),
                                qc_time_interval(env_data_fixed),
                                parent_logger = logger_name)

### swc check
swc_check_shallow <- qc_swc_check(env_data_fixed[['swc_shallow']])
swc_check_deep <- qc_swc_check(env_data_fixed[['swc_deep']])
swc_check <- c(swc_check_shallow, swc_check_deep)

env_data_fixed_swc <- qc_swc_fix(env_data_fixed, parent_logger = logger_name)

### plots
#### calendars
sapf_gaps_cal <- vis_gaps_calendar(sapf_data_fixed, parent_logger = logger_name)
env_gaps_cal <- vis_gaps_calendar(env_data_fixed_swc, parent_logger = logger_name)

#### plot the gaps coverage
sapf_gaps_plot <- vis_plot_the_gap(sapf_gaps_info, type = "gap_coverage",
                                   parent_logger = logger_name)
env_gaps_plot <- vis_plot_the_gap(env_gaps_info, type = "gap_coverage",
                                  parent_logger = logger_name)

#### plot the gaps interval
sapf_gaps_plot_int <- vis_plot_the_gap(sapf_gaps_info, type = "gap_interval",
                                   parent_logger = logger_name)
env_gaps_plot_int <- vis_plot_the_gap(env_gaps_info, type = "gap_interval",
                                   parent_logger = logger_name)

################################################################################
# create the SfnData object and save it as a RData file for later use
## sfndata_object
sfn_data_object <- sfn_data_constructor(
  sapf_data = sapf_data_fixed, env_data = env_data_fixed_swc,
  site_md = site_md_coordfix, stand_md = stand_md, plant_md = plant_md,
  species_md = species_md, env_md = env_md,
  parent_logger = logger_name
)

# save it!
assign(params$code, sfn_data_object)
save(list = c(params$code),
     file = file.path('Data', params$code, 'Lvl_1',
                      paste(params$code, '.RData', sep = '')),
     envir = environment())

################################################################################
# trasnformations availabilty
transformations_table <- qc_transformation_vars(
  sfn_data_object, parent_logger = logger_name
)
################################################################################
# results md_qc table
qc_md_results_table(md_cols, factor_values, email_check, site_md_coordfix,
                    species_md, plant_md, species_md_spnames, plant_md_spnames,
                    sp_verification, env_var_presence,
                    parent_logger = logger_name)
################################################################################

NOTE: Metadata Quality Checks are under continous development. Checks made and results presentation can change in the future to adapt to new insights in data digesting processes


Metadata columns

Information about metadata variables (presence, class and NA)

md_cols %>%
  mutate(PresenceOK = as.character(PresenceOK),
         ClassOK = as.character(ClassOK),
         IsNA = as.character(IsNA)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Metadata variables info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '100px',
                                                  targets = c(1,2,3,4)),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('IsNA',
              backgroundColor = styleEqual(c('TRUE', 'FALSE'),
                                           c('#89c4f4', 'transparent'))) %>%
  formatStyle('PresenceOK',
              backgroundColor = styleEqual(c('TRUE', 'FALSE'),
                                           c('transparent', '#d91e18'))) %>%
  formatStyle('ClassOK',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#89c4f4', 'transparent')))

Metadata factor variables value

Information about factor variables values

factor_values %>%
  mutate(Check_result = as.character(Check_result),
         NA_presence = as.character(NA_presence),
         foo_var = paste(Check_result, NA_presence, sep = '')) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Metadata factor variables info',
            options = list(dom = 'ti',
                           columnDefs = list(list(targets = 4, visible = FALSE),
                                             list(className = 'dt-center',
                                                  width = '100px',
                                                  targets = 1:3),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('NA_presence',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('transparent', '#89c4f4'))) %>%
  formatStyle('Check_result',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

Contributors email

Information about email directions provided

email_check %>%
  mutate(Is_correct = as.character(Is_correct),
         email = stringr::str_replace_all(email, '([a-z]|[0-9])*', '\\*')) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Email info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '100px',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('Is_correct',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

Coordinates check and fix

Information about site coordinates provided

site_md_coordfix %>%
  select(si_name, si_lat, si_long, is_inside_country) %>%
  mutate(is_inside_country = as.character(is_inside_country)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Coordinates info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  targets = 1:3,
                                                  width = '100px'),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('is_inside_country',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

Species names

Information about species names

species_md_spnames %>%
  bind_rows(plant_md_spnames) %>%
  mutate(Concordance = as.character(Concordance),
         IsNA = as.character(IsNA)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Species names spelling',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  targets = 1:4),
                                             list(className = 'dt-right',
                                                  targets = 0),
                                             list(width = '150px',
                                                  targets = 1),
                                             list(width = '100px',
                                                  targets = 2:3),
                                             list(width = '50px',
                                                  targets = 4)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('Concordance',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))
sp_verification %>%
  mutate(Concordance = as.character(Concordance)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Species names info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  targets = 1:3),
                                             list(className = 'dt-right',
                                                  targets = 0),
                                             list(width = '50px',
                                                  targets = 1:2),
                                             list(width = '100px',
                                                  targets = 3)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('Concordance',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

Plant treatments check

Information about declared plant treatments

datatable(pl_treatments_check, class = 'display compact', rownames = FALSE, filter = 'none', 
            extensions = c('Scroller'),
          caption = 'Plant treatments info',
          options = list(dom = 'ti',
                         columnDefs = list(list(className = 'dt-center',
                                                width = '100px',
                                                targets = 1),
                                           list(className = 'dt-right',
                                                targets = 0)),
                         scroller = TRUE,
                         scrollY = 450, scrollCollapse = TRUE))

Environmental variables

Information about environmental variables and their presence in metadata and data

env_var_presence %>%
  mutate(Concordance = as.character(Concordance),
         Md_presence = as.character(Md_presence),
         Data_presence = as.character(Data_presence)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Environmental variables info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '100px',
                                                  targets = 1:3),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('Concordance',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

Metadata Remarks

List of remarks provided by the contributor/s


Site level remarks

r if (is.null(site_md_coordfix$si_remarks)) {'*No remarks*'} else {site_md_coordfix$si_remarks}


Stand level remarks

r if (is.null(stand_md$st_remarks)) {'*No remarks*'} else {stand_md$st_remarks}


Species level remarks

r if (is.null(species_md$sp_remarks)) {'*No remarks*'} else {species_md$sp_remarks}


Plant level remarks

r if (is.null(plant_md$pl_remarks)) {'*No remarks*'} else {plant_md$pl_remarks}


Environmental remarks

r if (is.null(env_md$env_remarks)) {'*No remarks*'} else {env_md$env_remarks}


Data QC

Quick Summary

################################################################################
# table
qc_data_results_table(sapf_data_fixed, env_data_fixed_swc, timestamp_errors_sapf,
                      timestamp_errors_env, sapw_md,
                      timestamp_concordance, sapf_gaps_info,
                      env_gaps_info, sapf_timestamp_nas, env_timestamp_nas,
                      swc_check, transformations_table,
                      parent_logger = logger_name)
################################################################################

NOTE: Data Quality Checks are under continous development. Checks made and results presentation can change in the future to adapt to new insights in data digesting processes


TIMESTAMP Checks

Information about TIMESTAMPS

timestamp_sapf <- sapf_data_fixed %>%
  select(TIMESTAMP) %>%
  summarise(Sample = as.character(sample(TIMESTAMP, 1)),
            Range = paste(range(TIMESTAMP), collapse = ' - ')) %>%
  mutate(Data = 'Sapflow') %>%
  select(Data, Sample, Range)

timestamp_env <- env_data_fixed_swc %>%
  select(TIMESTAMP) %>%
  summarise(Sample = as.character(sample(TIMESTAMP, 1)),
            Range = paste(range(TIMESTAMP), collapse = ' - ')) %>%
  mutate(Data = 'Environmental') %>%
  select(Data, Sample, Range)

timestamp_sapf %>%
  bind_rows(timestamp_env) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'TIMESTAMP format info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '40%',
                                                  targets = 2),
                                             list(className = 'dt-center',
                                                  width = '20%',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE))
if (is.logical(sapf_timestamp_nas)) {
  invisible(TRUE)
} else {
 sapf_timestamp_nas %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Sapflow TIMESTAMP NAs info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '25%',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) 
}
if (is.logical(env_timestamp_nas)) {
  invisible(TRUE)
} else {
 env_timestamp_nas %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Environmental TIMESTAMP NAs info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '25%',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) 
}
timestamp_errors_sapf %>%
  mutate(Int_length_minutes = Int_length/60) %>%
  select(Interval, Int_length_minutes) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Sapflow TIMESTAMP continuity info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '25%',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE))
timestamp_errors_env %>%
  mutate(Int_length_minutes = Int_length/60) %>%
  select(Interval, Int_length_minutes) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Environmental TIMESTAMP continuity info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '25%',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE))

TIMESTAMP concordance between sapflow and environmental data

# timestamp_concordance %>%
#   datatable(class = 'display compact', rownames = FALSE, filter = 'none',
#             caption = 'Table 4. TIMESTAMP concordance info',
#             options = list(dom = 'ti',
#                            # columnDefs = list(list(className = 'dt-center',
#                            #                        width = '40%',
#                            #                        targets = 1),
#                            #                   list(className = 'dt-right',
#                            #                        targets = 0)),
#                            scroller = TRUE,
#                            scrollY = 450, scrollCollapse = TRUE))

# timestamp_concordance_plot
gap_lines_plot

Time series gaps

Sap flow gaps info

sapf_gaps_info %>%
  select(-timestamp_start, -timestamp_end) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Sap flow gaps info. gap_interval is expressed in minutes',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '20%',
                                                  targets = c(1:2)),
                                             list(className = 'dt-center',
                                                  width = '15%',
                                                  targets = c(3:4)),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatPercentage('gap_coverage', 2) %>%
  formatStyle('gap_coverage',
              backgroundColor = styleInterval(c(.10, .25),
                                              c('#89c4f4', '#f39c12',
                                                '#d91e18'))) %>%
  formatRound('gap_interval', 3)
suppressWarnings(cowplot::ggdraw() +
  cowplot::draw_plot(sapf_gaps_cal, 0, .5, 1, .5) +
  cowplot::draw_plot(sapf_gaps_plot, 0, 0, .5, .5) +
  cowplot::draw_plot(sapf_gaps_plot_int, .5, 0, .5, .5) +
  cowplot::draw_plot_label(c("A", "B", "C"), c(0, 0, 0.5), c(1, 0.5, 0.5), size = 12))

Environmental gaps info

env_gaps_info %>%
  select(-timestamp_start, -timestamp_end) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Environmental gaps info. gap_interval is expressed in minutes',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '20%',
                                                  targets = c(1:2)),
                                             list(className = 'dt-center',
                                                  width = '15%',
                                                  targets = c(3:4)),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatPercentage('gap_coverage', 2) %>%
  formatStyle('gap_coverage',
              backgroundColor = styleInterval(c(.10, .25),
                                              c('#89c4f4', '#f39c12',
                                                '#d91e18'))) %>%
  formatRound('gap_interval', 3)
suppressWarnings(cowplot::ggdraw() +
  cowplot::draw_plot(env_gaps_cal, 0, .5, 1, .5) +
  cowplot::draw_plot(env_gaps_plot, 0, 0, .5, .5) +
  cowplot::draw_plot(env_gaps_plot_int, .5, 0, .5, .5) +
  cowplot::draw_plot_label(c("A", "B", "C"), c(0, 0, 0.5), c(1, 0.5, 0.5), size = 12))

Unit Conversion, standarization and transformation

The following table shows the available transformations

transformations_table %>%
  mutate(Presence = as.character(Presence)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Unit conversion, standarization and transformation',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '10%',
                                                  targets = 3),
                                             list(className = 'dt-center',
                                                  width = '15%',
                                                  targets = c(1:2)),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE, scrollY = 450,
                           scrollCollapse = TRUE)) %>%
  formatStyle('Presence',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))
# 2.2.5 log setup
log_sapfluxnet_setup('Logs/sapfluxnet.log',
                     logger = 'DataFlow',
                     level = 'WARNING')
# 2.2.6 saving the fixed datasets and the objects created in the level1 folder
df_accepted_to_lvl1(
          params$code, sapf_data_fixed, env_data_fixed_swc,
          site_md_coordfix, stand_md, plant_md, species_md,
          env_md, parent_logger = 'DataFlow'
        )

# saving Rdata file with all the objects (just in case)
save(list = ls(all.names = TRUE),
     file = file.path('Data', params$code, 'Lvl_1',
                            paste(params$code, 'objects.RData', sep = '_')),
     envir = environment())


sapfluxnet/sapfluxnetQC1 documentation built on May 29, 2019, 1:50 p.m.