完成两套数据的拼接
library(Ipaper) library(dplyr) # library(data.table) library(hydroTools) library(tidyfst) library(data.table) f_raw = "data-raw/INPUT/INPUT_met2481_Tmax&RHmax_for_HImax_1951-2019.fst"
# 去除不完整的年份 cal_heatindex <- function(d) { Pa <- atm # select(site, date, RH_avg, Tair_avg, Tair_max) mutate(d, q_mean = RH2q(RH_avg, Tair_avg, Pa), RH_min2 = q2RH(q_mean, Tair_max, Pa), # Tw = cal_Tw(ea = q2ea(q_mean, Pa), Tair_max, Pa), HI_max = heat_index(Tair_max, RH_min2), HI_max_e = heat_index(Tair_max, RH_avg) ) } rm_TailNaN <- function(d) { ind <- d[, which.notna(RH_avg + Tair_avg + Tair_max)] i_begin <- first(ind) i_end <- last(ind) d[i_begin:i_end, ] } get_siteInfo <- function(d) { ind <- d[, which.notna(RH_avg + Tair_avg + Tair_max)] n_all <- nrow(d) n_miss <- n_all - length(ind) data.table( date_begin = min(d$date), date_end = max(d$date), n_all, n_miss, perc_miss = n_miss / n_all * 100 ) }
f = "Z:/DATA/China/2400climate data/ChinaMeteDaily_SURF_CLI_CHN_MUL_DAY_[195101,202003]_processed.csv" df = fread(f)
# RH_min缺失30%左右,因此不直接使用 data_raw = df %>% select(site, date, # starts_with("Pa"), starts_with("RH"), starts_with("Tair")) %>% mutate(across(starts_with("Tair"), ~ divide_by(., 1e1)), across(starts_with("Pa"), ~ divide_by(., 1e2))) setkeyv(data, c("site"))
# 这一步仅有0.16%的数据移除 .dat = data_raw[date <= "2019-12-31", rm_TailNaN(.SD), .(site)] data <- .dat %>% cal_heatindex() export_fst(data, f_raw, 50)
data = import_fst(f_raw) varnames = c("site", "date", "RH_avg", "RH_min", "Tair_avg", "Tair_max", "Tair_min", "Pa_avg", "Pa_max", "Pa_min") f_2020 = path.mnt("Z:/DATA/China/ChinaMet_hourly_mete2000/data-raw/China_Mete2000_daily_HImete_2020-2022.csv") data_2020 = fread(f_2020) %>% rename_with(~ gsub("RHU", "RH", .) %>% gsub("mean", "avg", .)) %>% select(all_of(varnames)) %>% mutate(across(starts_with("Pa_"), ~ divide_by(., 10))) %>% # Pa to kPa cal_heatindex() # 挑选年份完成的数据 df = rbind(data, data_2020) setkeyv(df, c("site", "date")) export_fst(df, "data-raw/INPUT/INPUT_met2481_Tmax&RHmax_for_HImax_raw_1951-2022.fst") # info = data[, get_siteInfo(.SD), site] # sites_bad <- info[perc_miss >= 2, site] ## 存在一些异常值,需要调用`fix_badValues`进行处理
可以看到一些缺失比较多的站点
info_miss = data2[site %in% sites_bad, missInfo(RH_avg + Tair_avg + Tair_max, date, site[1])$info, site] info_miss
library(tidymet) # st_met2481 f_org = "/mnt/z/GitHub/rpkgs/RHtestsHelper/data-raw/INPUT/INPUT_met2474_Tmax&RHmax_for_HImax_1951-2022.fst" %>% path.mnt() ## 均一化数据 # RH_avg, Tair_avg, Tair_max f_homo = "/mnt/z/GitHub/rpkgs/RHtestsHelper/OUTPUT/ChinaHI/OUTPUT_mete2481_1961-2022_RHtests_v20230228_RH_avg.csv" %>% path.mnt() df = fread(f_homo) df_org = import_fst(f_org)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.