inst/doc/data-prep.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(tidyverse.quiet = TRUE)

## ----single-file, message=FALSE-----------------------------------------------
library(tidyverse)
# readr, dplyr and lubridate are part of tidyverse

raw_conc <- read_delim(
  "ex_data/26124054001.#00",
  delim = ",", # our file is comma separated
  skip = 25 # the first 25 rows are logger infos that we do not want to keep
)

## ----rawconc-str1, echo=FALSE-------------------------------------------------


str(raw_conc, width = 70, strict.width = "cut", give.attr = FALSE)

## ----cols-correction----------------------------------------------------------
raw_conc <- raw_conc |>
  rename(
    co2_conc = "CO2_calc (ppm)"
  ) |>
  mutate(
    datetime = paste0(Date, Time), # we paste date and time together
    datetime = dmy_hms(datetime) # datetime instead of character
  ) |>
  select(datetime, co2_conc)

## ----rawconc-str2, echo=FALSE-------------------------------------------------


str(raw_conc, width = 70, strict.width = "cut", give.attr = FALSE)

## ----multiple-files, message=FALSE--------------------------------------------
library(tidyverse)

raw_conc <- list.files( # list the files
  "ex_data", # at location "ex_data"
  full.names = TRUE,
  pattern = "*CO2*" # that contains "CO2" in their name
) |>
  map_dfr(
    read_csv, # we map read_csv on all the files
    na = c("#N/A", "Over") # "#N/A" and Over should be treated as NA
  ) |>
  rename(
    conc = "CO2 (ppm)"
  ) |>
  mutate(
    datetime = dmy_hms(`Date/Time`)
  ) |>
  select(datetime, conc)

## ----one-file-one-flux, message=FALSE-----------------------------------------

library(tidyverse)

raw_conc <- list.files( #listing all the files
  "ex_data/field_campaign", # at location "ex_data/field_campaign"
  full.names = TRUE
) |>
  map_dfr( # we map read_tsv on all the files
    # read_tsv is the version of read_delim for tab separated value files
    read_tsv,
    skip = 3,
    # creates a column with the filename, that we can use as flux ID
    id = "filename"
  ) |>
  rename( # a bit of renaming to make the columns more practical
    co2_conc = "CO2 (umol/mol)",
    h2o_conc = "H2O (mmol/mol)",
    air_temp = "Temperature (C)",
    pressure = "Pressure (kPa)"
  ) |>
  mutate(
    datetime = paste(Date, Time),
    # we get rid of the milliseconds
    datetime = as.POSIXct(datetime, format="%Y-%m-%d %H:%M:%OS"),
    pressure = pressure / 101.325, # conversion from kPa to atm
    filename = basename(filename) # removing folder names
  ) |>
  select(datetime, co2_conc, h2o_conc, air_temp, pressure, filename)

## ----rawconc-str3, echo=FALSE-------------------------------------------------


str(raw_conc, width = 70, strict.width = "cut", give.attr = FALSE)

## ----tricky, message=FALSE, warning=FALSE-------------------------------------
library(tidyverse)

raw_conc <- read_csv( # read_csv is the same as read_delim(delim = ",")
  "ex_data/011023001.#01",
  col_types = "Tcdddddd",
  na = "#N/A" # we tell read_csv what NA look like in that file
)

## ----rawconc-str4, echo=FALSE-------------------------------------------------


str(raw_conc, width = 70, strict.width = "cut", give.attr = FALSE)

## ----tricky2, warning=FALSE, message=FALSE------------------------------------
raw_conc <- read_csv(
  "ex_data/011023001.#01",
  skip = 1, # this time we skip the row with the column names
  col_names = FALSE, # we tell read_csv that column names are not provided
  na = "#N/A" # we tell read_csv what NA looks like in that file
)

## ----rawconc-str5, echo=FALSE-------------------------------------------------


str(raw_conc, width = 70, strict.width = "cut", give.attr = FALSE)

## ----tricky3------------------------------------------------------------------

# we read each row of our file as an element of a list
lines <- readLines("ex_data/011023001.#01")
lines <- lines[-1] # removing the first element with the column names

# we first deal with the elements where we have those environmental data
# that were measured every 10 seconds
linesenv <- lines[seq(1, length(lines), 10)]
env_df <- read.csv(
  textConnection(linesenv), # we read the list into a csv
  header = FALSE, # there is no header
  colClasses = rep("character", 14)
  # specifying that those columns are character is important
  # if read as integer, 06 becomes 6, and when putting columns together,
  # 400.06 will be read as 400.6, which is wrong
)

env_df <- env_df |>
  mutate(
    datetime = dmy_hms(V1),
    temp_air = paste(
      V7, # V7 contains the left side of the decimal point
      V8, # V8 the right side
      sep = "." # this time we put it in american format
    ),
    temp_air = as.double(temp_air), # now we can make it a double
    temp_soil = as.double(paste(V9, V10, sep = ".")),
    co2_conc = as.double(paste(V11, V12, sep = ".")),
    PAR = as.double(paste(V13, V14, sep = "."))
  ) |>
  select(datetime, temp_air, temp_soil, co2_conc, PAR)

# now we do the same with the other elements of the list
lines_other <- lines[-seq(1, length(lines), 10)]
other_df <- read.csv(
  textConnection(lines_other),
  header = FALSE,
  colClasses = rep("character", 10)
)

other_df <- other_df  |>
  mutate(
    datetime = dmy_hms(V1),
    co2_conc = as.double(paste(V8, V9, sep = "."))
  ) |>
  select(datetime, co2_conc)

# and finally we do a full join with both
conc_df <- bind_rows(env_df, other_df) |>
  arrange(datetime) # I like my dataframes in chronological order

## ----rawconc-str6, echo=FALSE-------------------------------------------------


str(conc_df, width = 70, strict.width = "cut", give.attr = FALSE)

Try the fluxible package in your browser

Any scripts or data that you put into this service are public.

fluxible documentation built on June 25, 2025, 1:08 a.m.