Build Status

codecov

knitr::opts_chunk$set(echo = FALSE, message = FALSE)
library(tidyr)
library(ggplot2)
theme_set(theme_bw())
library(lubridate)
library(tibble)
library(cosore)

desc <- csr_table("description")
diag <- csr_table("diagnostics")

compute_interval <- function(dsd) {
  results <- cosore:::compute_interval(dsd)
  median(round(weighted.mean(results$Interval, results$N, na.rm = TRUE), 0))
}

compute_years <- function(x) {
  tibble(Dataset = x$description$CSR_DATASET,
         Records = x$diagnostics$CSR_RECORDS,
         Start = x$diag$CSR_TIMESTAMP_BEGIN,
         End = x$diag$CSR_TIMESTAMP_END,
         `Interval (min)` = as.integer(compute_interval(x$data)),
         Size  = format(object.size(x), units = "Mb"),
         Gas = x$diag$CSR_GASES)
}

compute_coverage <- function(x) {
  tibble(Dataset = x$description$CSR_DATASET,
         IGBP = x$description$CSR_IGBP,
         Year = year(x$data$CSR_TIMESTAMP_BEGIN), 
         Month = month(x$data$CSR_TIMESTAMP_BEGIN),
         Day = day(x$data$CSR_TIMESTAMP_BEGIN))
}

compute_flux_dist <- function(x) {
  if(!"CSR_FLUX_CO2" %in% names(x$data)) x$data$CSR_FLUX_CO2 <- NA_real_
  if(!"CSR_FLUX_CH4" %in% names(x$data)) x$data$CSR_FLUX_CH4 <- NA_real_

  tibble(Dataset = x$description$CSR_DATASET,
         IGBP = x$description$CSR_IGBP,
         CSR_FLUX_CO2 = x$data$CSR_FLUX_CO2,
         CSR_FLUX_CH4 = x$data$CSR_FLUX_CH4)
}

Summary

# Extract min and max date information for each dataset
datasets <- list_datasets()

years <- list()
coverage <- list()
flux_dist <- list()
db_size <- 0
records <- 0
for(dsn in datasets) {
  x <- csr_dataset(dsn, quiet = TRUE)

  if(is.data.frame(x$data)) {
    years[[dsn]] <- compute_years(x)
    coverage[[dsn]] <- compute_coverage(x)
    flux_dist[[dsn]] <- compute_flux_dist(x)
    db_size <- db_size + object.size(x)
    records <- records + nrow(x$data)
  }
}

years <- cosore:::rbind_list(years)
years$Start <- format(years$Start, format = "%Y-%m")
years$End <- format(years$End, format = "%Y-%m")

ports <- aggregate(CSR_MSMT_VAR ~ CSR_DATASET, data = csr_table("ports"), FUN = function(x) paste(unique(x), collapse = ", "))
names(ports) <- c("Dataset", "Var")
years <- merge(years, ports)

Datasets: r length(list_datasets())

Datasets with data: r nrow(years)

Total records: r format(records, big.mark = ",")

Total size: r format(db_size, "Mb")

x <- knitr::kable(years, format = "html", format.args = list(big.mark = ',')) 
kableExtra::kable_styling(x, full_width = FALSE)

Spatial coverage

library(sp)
library(leaflet)
df <- data.frame(label = paste(desc$CSR_DATASET, desc$CSR_SITE_NAME),
                 lon = desc$CSR_LONGITUDE, lat = desc$CSR_LATITUDE)
df <- subset(df, !is.na(lon) & !is.na(lat))
coordinates(df) <- ~lon + lat
lflt <- addMarkers(leaflet(df), label = df$label)
addTiles(lflt)

Temporal coverage

coverage <- cosore:::rbind_list(coverage)
cover_day <- unique(coverage) # this is super slow compared to dplyr::distinct!
dpm <- aggregate(Day ~ Dataset + IGBP + Year + Month, 
                 data = cover_day, length)
dpm$Time <- lubridate::ymd(paste(dpm$Year, dpm$Month, "1"))
ggplot(dpm, aes(Time, Dataset, fill = Day)) + 
  geom_tile(na.rm = TRUE) +
  scale_fill_continuous("Days per month",
                        high = "#132B43", low = "#56B1F7")

IGBP coverage

dpm_igbp <- aggregate(Day ~ IGBP + Year + Month, data = cover_day, length)
dpm_igbp$Time <- lubridate::ymd(paste(dpm_igbp$Year, dpm_igbp$Month, "1"))
ggplot(dpm_igbp, aes(Time, IGBP, fill = Day)) + geom_tile(na.rm = TRUE) +
  scale_fill_continuous("Days per month",
                        high = "#132B43", low = "#56B1F7")

Flux distribution

x <- cosore:::rbind_list(flux_dist)
x_co2 <- x[x$CSR_FLUX_CO2 > -1 & x$CSR_FLUX_CO2 < 15 & is.finite(x$CSR_FLUX_CO2),]
ggplot(x_co2, aes(x = CSR_FLUX_CO2)) + 
  geom_histogram(bins = 30) +
  facet_wrap(~IGBP, scales = "free_y") 
x_ch4 <- x[x$CSR_FLUX_CH4 > -5 & x$CSR_FLUX_CH4 < 25 & is.finite(x$CSR_FLUX_CH4),]
ggplot(x_ch4, aes(x = CSR_FLUX_CH4)) + 
  geom_histogram(bins = 30) +
  facet_wrap(~IGBP, scales = "free_y")

Database growth

x <- system.file(file.path("extdata", "database_growth.csv"), 
                 package = "cosore", mustWork = TRUE)
growth <- read.csv(x, colClasses = c("Date", rep("numeric", 3), "character"))
growth <- rbind(growth,
                data.frame(Date = Sys.Date(),
                           Records = records,
                           Size_Mb = as.numeric(db_size) / 1024 / 1024,
                           Datasets = length(list_datasets()),
                           Version = "*", stringsAsFactors = FALSE))
growth <- cosore:::minigather(growth, c("Records", "Size_Mb", "Datasets"), 
                              "Variable", "Value")
ggplot(growth, aes(Date, Value, label = Version)) + 
  geom_line() + geom_label() + 
  facet_wrap(~Variable, scales = "free")


bpbond/cosore documentation built on July 20, 2021, 3:17 p.m.