##### chunks settings for codes and figures #####
knitr::opts_chunk$set(
    echo = FALSE,
    fig.height = 6,
    fig.pos = "H",
    fig.width = 8,
    fig.align = "center",
    message = FALSE,
    warning = FALSE,
    out.extra = "",
    tidy = TRUE,
    dpi = 300
)
# replace NA with '' in table
options(knitr.kable.NA = '')
# prohibit changing of the default format of the table when kableExtra is imported,
# so as to solve the problem of changing all the table styles after importing.
options(kableExtra.auto_format = FALSE)

##### set CRAN mirror #####
local({
  r = getOption('repos')
  if (!length(r) || identical(unname(r['CRAN']), '@CRAN@'))
    r['CRAN'] = params$cran_mirror 
  options(repos = r)
})

##### install from CRAN mirrors #####
lapply(
  c('antgreens', 'knitr', 'tidyverse', 'data.table', 'readxl', 'Rcpp', 'rlang', 'webshot', 'DT', 'echarts4r'), 
  function(pkg) {
    if (system.file(package = pkg) == '') install.packages(pkg)
  }
)

##### install from github #####
githubs <- c(
  'citr' = 'crsh/citr', 
  'echarts4r.assets' = 'JohnCoene/echarts4r.assets', 
  'echarts4r.assets' = 'JohnCoene/echarts4r.maps'
)
lapply(names(githubs), function(pkg) {
  if (system.file(package = pkg) == '') remotes::install_github(githubs[pkg], upgrade = FALSE)
})
library(antgreens)
library(tidyverse)
library(echarts4r)
library(readxl)
library(DT)

webshot::install_phantomjs()
# dimensions to be analyzed
ana_dims <- c(names(params$ana_dims_part), "product", "drug")
# names translator of dimensions to be analyzed 
ana_dims_en2cn <- modifyList(
  params$ana_dims_part, 
  list(product = params$ana_dims_product, drug = params$ana_dims_drug)
)
# province names translator
provinces_trans_short <- list()
for (i in 1:length(ProvinceShort)) {
  provinces_trans_short[[ProvinceStandard[i]]] <- ProvinceShort[i]
}
provinces_short_trans_pinyin <- list()
for (i in 1:length(ProvinceShort)) {
    provinces_short_trans_pinyin[[ProvinceShort[i]]] <- ProvincePinyin[i]
}

##### DT and report settings #####
# `dable` is the customized function of DT::databale
options(
  htmltools.dir.version = FALSE, formatR.indent = 2,
  # width = 55, digits = 4, 
  # warnPartialMatchAttr = FALSE, warnPartialMatchDollar = FALSE,
  DT.options = DTOptions,
  report.options = list(
    colnameMap = modifyList(ColnamesI18n$chinese, params$colname_map),
    provinceTransShort = provinces_trans_short,
    provinceTransPinyin = provinces_short_trans_pinyin
  )
)
raw_data <- as.data.frame(data.table::fread(params$raw_data, encoding = "UTF-8"))
mrl_data <- readxl::read_excel(params$mrl_data)
provinces <- readxl::read_excel(params$province)
products <- readxl::read_excel(params$products)
pesticides <- readxl::read_excel(params$pesticides)
# ...
ana_threshold <- load_threshold(
  params$ana_threshold, dims_en2cn = ana_dims_en2cn, threshold_key = params$threshold_kw
)
treated_data <- calc_treated_data(
  raw_data, mrl_data, params$st_drug, params$ed_drug,
  raw_col_en2cn = params$ana_dims_part, product_name = params$ana_dims_product
)
dims_comb_data <- gen_dims_comb_stats(treated_data, ana_dims, fast = TRUE)
code_files <- dir(params$codes_dir, pattern = "*.R", full.names = TRUE, recursive = TRUE)
for (cf in code_files) {
  source(cf, encoding = "UTF-8")
}

前言 {-}

使用流程

创建项目

  载入 antgreens,使用 use_template(dir, folder) 复制报告模板到指定目录并将工作目录移至该目录下。

确定维度及名称

  确定除了“具体产品”和“药品”之外需要分析的所有维度,并为其中文名称指定英文变量名,放入列表中,例如:

dims_en2cn <- list(year = "年份", ...)

  接着,确定原始数据与 MRL 数据中共同代表具体产品的列名

product_name <- "产品名称"

  最后,选取一个中文名称代表检测的所有农药这一维度,默认为“药物”

drug_name <- "药物"

antgreens 默认使用 r paste0(sapply(names(RawColnameTrans), \(x) {paste0("\x60", x, " = ", RawColnameTrans[[x]], "\x60")}), collapse = '、')r paste0("\x60product = ", ProductLabelName, "\x60、\x60drug = ", DrugNames, "\x60") 作为分析的维度以及其名称,其中中文名为原始数据中的列名,英文名为运行时使用的变量名。

计算各维度组合的样本量数据

  载入原始数据 data,确定需要分析的所有维度名(即列名),使用 check_sample_size(data, dimensions, drug_name, path) 获取各维度组合下的样本量(最大、最小、中位数),并保存在文件中;然后手动根据实际情况确定各维度组合的样本量阈值。确定阈值时可以使用

get_sample_size_stats(data, draw_figure = TRUE, 年份, 药物)

来输出这一维度组合下各种组合的样本量分布图,辅助确定样本量阈值。

  将确定好的阈值写入之前输出的文件,列名建议包含“阈值”一词(例如“样本量阈值”),也可以取其他名称,但是需要记录阈值所在列的列名关键词,编译时作为参数传入:

threshold_key <- "阈值"

(待续)



YuanchenZhu2020/antgreens documentation built on Dec. 18, 2021, 8:20 p.m.