scripts/Table1_HW_characteristics_variation.R

library(ncdf4)
library(glue)
library(data.table)
library(lubridate)
library(magrittr)
library(foreach)
library(iterators)
library(dplyr)
library(raster)



data_trend <- function(dat, number, type, ifHI) {
    fit <- lm(value ~ year, data = dat)
    info <- summary(fit)
    data.frame(number = number, type = type, ifHI = ifHI,
               slope = info$coefficients[2, 1] * 10,
               pvalue = info$coefficients[2, 4],
               stringsAsFactors = FALSE)
}

table <- data.table()

## HI
file = "OUTPUT/clusterId/HI/HW1.nc"
nc = nc_open(file)
date <- ncvar_get(nc, 'time') %>% as_date(origin = '1970-01-01')
a <- raster(file) %>% area
load('OUTPUT/HI/tmin_arr_HI.rda')
load('OUTPUT/HI/tmax_arr_HI.rda')
load('OUTPUT/HI/tmean_arr_HI.rda')

foreach(i = c(1:13), icount()) %do% {
    if(i %in% c(1, 2, 5, 6)) {
        raw <- tmean_arr_HI
    } else if (i %in% c(3, 7, 8, 10, 11)) {
        raw <- tmax_arr_HI
    } else if (i %in% c(4, 9, 12, 13)) {
        raw <- tmin_arr_HI
    }
    dim(raw) <- 283 * 163 * 20574
    load(paste0('OUTPUT/diff/HI/HW', i, '.rda'))
    cmd = glue("diff <- HW{i}")
    eval(parse(text = cmd))
    dim(diff) <- 283 * 163 * 20574
    rm(list = paste0('HW', i))
    gc()
    clusterId <- nc_open(paste0('OUTPUT/clusterId/HI/HW', i, '.nc')) %>%
        ncvar_get('clusterId')
    dim(clusterId) <- 283 * 163 * 20574
    dt <- data.table(raw, clusterId, diff,
                     date = rep(date, each = 283 * 163), area = values(a))[!is.na(clusterId)]

    #plan1 yearly average
    draw <- data.table(year = unique(year(dt$date)))
    draw$D <- dt[, .(D = uniqueN(date)), .(clusterId, year(date))
    ][, .(D = mean(D)), year]$D

    draw$I <- dt[, .(I = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(I = mean(I)), .(clusterId, year(date))
    ][, .(I = mean(I)), year]$I

    draw$S <- dt[, .(S = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(S = sum(S)), .(clusterId, year(date))
    ][, .(S = mean(S)), year]$S

    draw$area_mean <- dt[, .(area_mean = sum(area) / 1e4), .(clusterId, date)
    ][, .(area_mean = mean(area_mean)), .(clusterId, year(date))
    ][, .(area_mean = mean(area_mean)), year]$area_mean

    draw$area_max <- dt[, .(area_max = sum(area) / 1e4), .(clusterId, date)
    ][, .(area_max = max(area_max)), .(clusterId, year(date))
    ][, .(area_max = mean(area_max)), year]$area_max

    draw$area_sum <- dt[, .(area_sum = sum(area) / 1e4), .(clusterId, year(date))
    ][, .(area_sum = mean(area_sum)), year]$area_sum

    draw <- draw[year <= 2016] %>% melt(id.vars = 'year')
    labels <- draw %>%
        group_by(variable) %>%
        do(data_trend(., paste0('HW', i), 'mean', 'HI'))
    table <- rbind(table, labels)


    #plan2 yearly max
    draw <- data.table(year = unique(year(dt$date)))
    draw$D <- dt[, .(D = uniqueN(date)), .(clusterId, year(date))
    ][, .(D = max(D)), year]$D

    draw$I <- dt[, .(I = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(I = mean(I)), .(clusterId, year(date))
    ][, .(I = max(I)), year]$I

    draw$S <- dt[, .(S = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(S = sum(S)), .(clusterId, year(date))
    ][, .(S = max(S)), year]$S

    draw$area_mean <- dt[, .(area_mean = sum(area) / 1e6), .(clusterId, date)
    ][, .(area_mean = mean(area_mean)), .(clusterId, year(date))
    ][, .(area_mean = max(area_mean)), year]$area_mean

    draw$area_max <- dt[, .(area_max = sum(area) / 1e6), .(clusterId, date)
    ][, .(area_max = max(area_max)), .(clusterId, year(date))
    ][, .(area_max = max(area_max)), year]$area_max

    draw$area_sum <- dt[, .(area_sum = sum(area) / 1e6), .(clusterId, year(date))
    ][, .(area_sum = max(area_sum)), year]$area_sum

    draw <- draw[year <= 2016] %>% melt(id.vars = 'year')
    labels <- draw %>%
        group_by(variable) %>%
        do(data_trend(., paste0('HW', i), 'max', 'HI'))
    table <- rbind(table, labels)


}

rm(tmax_arr_HI, tmin_arr_HI, tmean_arr_HI)
gc()


## nonHI
file = "OUTPUT/clusterId/nonHI/HW1.nc"
date <- nc_open(file) %>% ncvar_get('time') %>% as_date(origin = '1970-01-01')

file = "data-raw/CN05.1_Tm_1961_2018_daily_025x025.nc"
raw_mean <- nc_open(file) %>%
    ncvar_get('tm')
dim(raw_mean) <- 283 * 163 * 21184

file = "data-raw/CN05.1_Tmax_1961_2018_daily_025x025.nc"
raw_max <- nc_open(file) %>%
    ncvar_get('tmax')
dim(raw_max) <- 283 * 163 * 21184

file = "data-raw/CN05.1_Tmin_1961_2018_daily_025x025.nc"
raw_min <- nc_open(file) %>%
    ncvar_get('tmin')
dim(raw_min) <- 283 * 163 * 21184

foreach(i = c(11:13), icount()) %do% {
    if(i %in% c(1, 2, 5, 6)) {
        raw <- raw_mean
    } else if (i %in% c(3, 7, 8, 10, 11)) {
        raw <- raw_max
    } else if (i %in% c(4, 9, 12, 13)) {
        raw <- raw_min
    }
    dim(raw) <- 283 * 163 * 21184
    load(paste0('OUTPUT/diff/nonHI/HW', i, '.nc'))
    cmd = glue("diff <- HW{i}")
    eval(parse(text = cmd))
    dim(diff) <- 283 * 163 * 21184
    rm(list = paste0('HW', i))
    gc()
    clusterId <- nc_open(paste0('OUTPUT/clusterId/nonHI/HW', i, '.nc')) %>%
        ncvar_get('clusterId')
    dim(clusterId) <- 283 * 163 * 21184
    dt <- data.table(raw, clusterId, diff,
                     date = rep(date, each = 283 * 163), area = values(a))[!is.na(clusterId)]

    #plan1 yearly average
    draw <- data.table(year = unique(year(dt$date)))
    draw$D <- dt[, .(D = uniqueN(date)), .(clusterId, year(date))
    ][, .(D = mean(D)), year]$D

    draw$I <- dt[, .(I = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(I = mean(I)), .(clusterId, year(date))
    ][, .(I = mean(I)), year]$I

    draw$S <- dt[, .(S = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(S = sum(S)), .(clusterId, year(date))
    ][, .(S = mean(S)), year]$S

    draw$area_mean <- dt[, .(area_mean = sum(area) / 1e4), .(clusterId, date)
    ][, .(area_mean = mean(area_mean)), .(clusterId, year(date))
    ][, .(area_mean = mean(area_mean)), year]$area_mean

    draw$area_max <- dt[, .(area_max = sum(area) / 1e4), .(clusterId, date)
    ][, .(area_max = max(area_max)), .(clusterId, year(date))
    ][, .(area_max = mean(area_max)), year]$area_max

    draw$area_sum <- dt[, .(area_sum = sum(area) / 1e4), .(clusterId, year(date))
    ][, .(area_sum = mean(area_sum)), year]$area_sum

    draw <- melt(draw, id.vars = 'year')
    labels <- draw %>%
        group_by(variable) %>%
        do(data_trend(., paste0('HW', i), 'mean', 'nonHI'))
    table <- rbind(table, labels)


    #plan2 yearly max
    draw <- data.table(year = unique(year(dt$date)))
    draw$D <- dt[, .(D = uniqueN(date)), .(clusterId, year(date))
    ][, .(D = max(D)), year]$D

    draw$I <- dt[, .(I = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(I = mean(I)), .(clusterId, year(date))
    ][, .(I = max(I)), year]$I

    draw$S <- dt[, .(S = weighted.mean(diff, area)), .(clusterId, date)
    ][, .(S = sum(S)), .(clusterId, year(date))
    ][, .(S = max(S)), year]$S

    draw$area_mean <- dt[, .(area_mean = sum(area) / 1e6), .(clusterId, date)
    ][, .(area_mean = mean(area_mean)), .(clusterId, year(date))
    ][, .(area_mean = max(area_mean)), year]$area_mean

    draw$area_max <- dt[, .(area_max = sum(area) / 1e6), .(clusterId, date)
    ][, .(area_max = max(area_max)), .(clusterId, year(date))
    ][, .(area_max = max(area_max)), year]$area_max

    draw$area_sum <- dt[, .(area_sum = sum(area) / 1e6), .(clusterId, year(date))
    ][, .(area_sum = max(area_sum)), year]$area_sum

    draw <- melt(draw, id.vars = 'year')
    labels <- draw %>%
        group_by(variable) %>%
        do(data_trend(., paste0('HW', i), 'max', 'nonHI'))
    table <- rbind(table, labels)

}


save(table, file = 'OUTPUT/table1.rda')
CUG-hydro/heatwave documentation built on Dec. 17, 2021, 1:53 p.m.