# root folder
knitr::opts_knit$set(root.dir = params$wd)
knitr::opts_chunk$set(error = TRUE, echo = FALSE)
# log setup
logger_name <- paste('QC', params$code, 'qc_start_process',
                     'rep_psi_render', sep = '.')
log_psi_setup('Logs/psi.log',
                     logger = logger_name,
                     level = 'DEBUG')

SITE: r params$code


# Metadata QC

## Quick summary

# Libraries
library(psiQC)
library(dplyr)
library(DT)


# Data load
## site_md
site_md <- dl_metadata(params$md_file, 'Data', data_type = "site_md",
                       parent_logger = logger_name)

## plant_md
plant_md <- dl_metadata(params$md_file, 'Data', data_type = "plant_md",
                        si_code_loc = site_md, parent_logger = logger_name)

## psi_data
psi_data <- dl_metadata(params$psi_data_file, 'Data', data_type = "psi_data",
                        si_code_loc = site_md, parent_logger = logger_name)

## questionnaire
questionnaire_md <- dl_metadata(params$md_file, 'Questionnaire',
                                si_code_loc = site_md,
                                parent_logger = logger_name)

################################################################################

#Simplify questions of the questionnaire
questionnaire_simplified <- qc_simplify_questions(questionnaire_md)

################################################################################

# md qc

## metadata columns
md_cols <- bind_rows(
  qc_md_cols(site_md, 'site_md', parent_logger = logger_name),
  qc_md_cols(plant_md, 'plant_md', parent_logger = logger_name),
  qc_md_cols(psi_data, 'psi_data', parent_logger = logger_name),
  qc_md_cols(questionnaire_simplified, 'Questionnaire', parent_logger = logger_name)
)

## factor variables values
factor_values <- qc_factor_values(site_md, plant_md, psi_data,
                                  parent_logger = logger_name)

## email
email_check <- qc_email_check(site_md, parent_logger = logger_name) %>% unique()

## coordinates
site_md_coordfix <- qc_coordinates(site_md, parent_logger = logger_name) %>% unique()

## species
plant_md_spnames <- qc_species_names_info(
  plant_md$pl_species,
  parent_logger = logger_name
) %>%
  mutate(Md = 'pl')

plant_md$pl_species <- qc_species_names(plant_md$pl_species,
                                        parent_logger = logger_name)

plant_md <- qc_measured_sfn(plant_md, parent_logger = logger_name)


## plant treatment check
pl_treatments_check <- qc_pl_treatments(plant_md, parent_logger = logger_name)


################################################################################

# data qc
## timestamp
psi_data_fixed <- qc_as_timestamp(psi_data, site_md, logger_name)


## timestamp NAs
psi_timestamp_nas <- qc_timestamp_nas(psi_data_fixed, logger_name)

## psi NAs
psi_nas <- qc_psi_nas(psi_data_fixed, logger_name)

## psi SE NAs
psi_SE_nas <- qc_psi_SE_nas(psi_data_fixed, logger_name)

## psi N NAs
psi_N_nas <- qc_psi_N_nas(psi_data_fixed, logger_name)

## extraterrestrial ratiation and timestamp
psi_data_fixed <- qc_ext_radiation(psi_data_fixed, site_md,TRUE)




################################################################################
# create the psiData object and save it as a RData file for later use
## psidata_object
psi_data_object <- psi_data_constructor(
  psi_data = psi_data_fixed,
  site_md = site_md_coordfix,
  plant_md = plant_md,
  question_md = questionnaire_simplified,
  parent_logger = logger_name
)

# save it!
assign(params$code, psi_data_object)
save(list = c(params$code),
     file = file.path('Data', params$code, 'Lvl_1',
                      paste(params$code, '.RData', sep = '')),
     envir = environment())
################################################################################
# results md_qc table
qc_md_results_table(md_cols, factor_values, email_check, site_md_coordfix,
                    plant_md_spnames, parent_logger = logger_name)
################################################################################

NOTE: Metadata Quality Checks are under continous development. Checks made and results presentation can change in the future to adapt to new insights in data curation processes


## Metadata columns

Information about metadata variables (presence, class and NA)

md_cols %>%
  mutate(PresenceOK = as.character(PresenceOK),
         ClassOK = as.character(ClassOK),
         allNA = as.character(allNA),
         anyNA = as.character(anyNA),
         UniqueValue = as.character(UniqueValue)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Metadata variables info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '100px',
                                                  targets = c(1,2,3,4)),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('allNA',
              backgroundColor = styleEqual(c('TRUE', 'FALSE'),
                                           c('#89c4f4', 'transparent'))) %>%
  formatStyle('anyNA',
              backgroundColor = styleEqual(c('TRUE', 'FALSE'),
                                           c('#89c4f4', 'transparent'))) %>%
  formatStyle('PresenceOK',
              backgroundColor = styleEqual(c('TRUE', 'FALSE'),
                                           c('transparent', '#d91e18'))) %>%
  formatStyle('ClassOK',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#89c4f4', 'transparent')))%>%
  formatStyle('UniqueValue',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#89c4f4', 'transparent')))

## Metadata factor variables value

Information about factor variables values

factor_values %>%
  mutate_all(as.character) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Metadata factor variables info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '100px',
                                                  targets = 0:7)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('site_country',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent'))) %>%
  formatStyle('pl_status',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent'))) %>%
  formatStyle('measured_sfn',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent'))) %>%
  formatStyle('time_psi',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent'))) %>%
  formatStyle('canopy_position',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent'))) %>%
  formatStyle('method',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent'))) %>%
  formatStyle('organ',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent'))) %>%
  formatStyle('aggregation_level',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

## Contributors email

Information about email directions provided

email_check %>%
  mutate(Is_correct = as.character(Is_correct),
         email = stringr::str_replace_all(email, '([a-z]|[0-9])*', '\\*')) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Email info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '100px',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('Is_correct',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

## Coordinates check and fix

Information about site coordinates provided

site_md_coordfix %>%
  select(site_name, lat, lon, is_inside_country) %>%
  mutate(is_inside_country = as.character(is_inside_country)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Coordinates info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  targets = 1:3,
                                                  width = '100px'),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('is_inside_country',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

## Species names

Information about species names

plant_md_spnames %>%
  mutate(Concordance = as.character(Concordance),
         IsNA = as.character(IsNA)) %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'Species names spelling',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  targets = 1:4),
                                             list(className = 'dt-right',
                                                  targets = 0),
                                             list(width = '150px',
                                                  targets = 1),
                                             list(width = '100px',
                                                  targets = 2:3),
                                             list(width = '50px',
                                                  targets = 4)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('Concordance',
              backgroundColor = styleEqual(c('FALSE', 'TRUE'),
                                           c('#d91e18', 'transparent')))

## Plant treatments check

Information about declared plant treatments

datatable(pl_treatments_check, class = 'display compact', rownames = FALSE, filter = 'none',
          extensions = c('Scroller'),
          caption = 'Plant treatments info',
          options = list(dom = 'ti',
                         columnDefs = list(list(className = 'dt-center',
                                                width = '100px',
                                                targets = 1),
                                           list(className = 'dt-right',
                                                targets = 0)),
                         scroller = TRUE,
                         scrollY = 450, scrollCollapse = TRUE))

## Metadata Remarks

List of remarks provided by the contributor/s

psi measure remarks

r if (is.null(psi_data_fixed$remarks)) {'*No remarks*'} else {psi_data_fixed$remarks}


# Data QC

## Quick Summary

################################################################################
# table
qc_data_results_table(psi_data_fixed, psi_timestamp_nas,
                      psi_nas, psi_SE_nas, psi_N_nas,
                      parent_logger = logger_name)
################################################################################

NOTE: Data Quality Checks are under continous development. Checks made and results presentation can change in the future to adapt to new insights in data digesting processes


## TIMESTAMP Checks

Information about TIMESTAMPS

timestamp_psi <- psi_data_fixed %>%
  select(timestamp,pl_code,pl_name) %>%
  group_by(pl_name,pl_code) %>% 
  summarise(Sample = as.character(sample(timestamp, 1)),
            Range = paste(range(timestamp), collapse = ' - ')) %>%
  mutate(Data = 'psi') %>%
  select(Data, pl_name, pl_code, Sample, Range)

timestamp_psi %>%
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'TIMESTAMP format info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '40%',
                                                  targets = 2),
                                             list(className = 'dt-center',
                                                  width = '20%',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE))
if (is.logical(psi_timestamp_nas)) {
  invisible(TRUE)
} else {
  psi_timestamp_nas %>%
    datatable(class = 'display compact', rownames = FALSE, filter = 'none',
              extensions = c('Scroller'),
              caption = 'Psi TIMESTAMP NAs info',
              options = list(dom = 'ti',
                             columnDefs = list(list(className = 'dt-center',
                                                    width = '25%',
                                                    targets = 1),
                                               list(className = 'dt-right',
                                                    targets = 0)),
                             scroller = TRUE,
                             scrollY = 450, scrollCollapse = TRUE))
}
# 2.2.5 log setup
log_psi_setup('Logs/psi.log',
                     logger = 'DataFlow',
                     level = 'WARNING')
# 2.2.6 saving the fixed datasets and the objects created in the level1 folder
df_accepted_to_lvl1_psi(
  params$code, psi_data_fixed,
  site_md_coordfix, plant_md, 
  questionnaire_simplified,
  parent_logger = 'DataFlow'
)

# saving Rdata file with all the objects (just in case)
save(list = ls(all.names = TRUE),
     file = file.path('Data', params$code, 'Lvl_1',
                      paste(params$code, 'objects.RData', sep = '_')),
     envir = environment())

## PSI checks

psi_data_fixed %>%
  select(timestamp, pl_name, pl_code, psi) %>%
  mutate(timestamp = as.character(timestamp)) %>% 
  datatable(class = 'display compact', rownames = FALSE, filter = 'none',
            extensions = c('Scroller'),
            caption = 'out of range info',
            options = list(dom = 'ti',
                           columnDefs = list(list(className = 'dt-center',
                                                  width = '40%',
                                                  targets = 2),
                                             list(className = 'dt-center',
                                                  width = '20%',
                                                  targets = 1),
                                             list(className = 'dt-right',
                                                  targets = 0)),
                           scroller = TRUE,
                           scrollY = 450, scrollCollapse = TRUE)) %>%
  formatStyle('psi',
              backgroundColor = styleInterval(c(-10, 0),
                                           c('#f39c12', 'transparent','#d91e18')))


vflo/PSIsapfluxnetQC1 documentation built on Feb. 15, 2024, 3:19 a.m.