cleaning_code/5_trait_data/seedclim_traitdata_cleaning.R

#### SeedClim trait data cleaning ####

#### Load libraries ####
library(tidyverse)
library(lubridate)
library("dataDownloader")

### Download data from OSF
get_file(node = node,
         file = "Trait_data_2011-2012.zip",
         path = "plant_traits/data/",
         remote_path = "4_Trait_data/Raw_data")

zipFile <- "plant_traits/data/Trait_data_2011-2012.zip"
outDir <- "plant_traits/data/"
unzip(zipFile, exdir = outDir)

get_file(node = node,
         file = "Trait_data_2016-2017.zip",
         path = "plant_traits/data/",
         remote_path = "4_Trait_data/Raw_data")

zipFile <- "plant_traits/data/Trait_data_2016-2017.zip"
outDir <- "plant_traits/data/"
unzip(zipFile, exdir = outDir)

#### Trait data from 2012 ####

## Read in data ##

my_sla <- read.csv('cleaning_code/5_trait_data/data/RawTraitData_SLA.csv', header=TRUE, stringsAsFactors = FALSE)
#my_leaf_chem <- read.csv('plant_traits/Data/raw_data_CN_2014Sept15.csv', header=TRUE, stringsAsFactors = FALSE)
my_leaf_chem <- read_csv('cleaning_code/5_trait_data/Data/raw_data_CN_2014Sept15.csv')

## Site name dictionary ##

dict_Site_2012 <- read.table(header = TRUE, stringsAsFactors = FALSE, text = 
                          "old new
  Arh Arhelleren
  Øvs Ovstedalen
  Ves Veskre
  Skj Skjelingahaugen
  Låv Lavisdalen
  Gud Gudmedalen
  Ulv Ulvehaugen
  Vik Vikesland
  Høg Hogsete
  Ålr Alrust
  Fau Fauske
  Ram Rambera")

dict_Site_mistakes <- read.table(header = TRUE, stringsAsFactors = FALSE, text = 
                          "old new
  Ovstedal Ovstedalen
  Skjellingahaugen Skjelingahaugen
  Ulvhaugen Ulvehaugen")


## Clean and combine data sets ##

my_sla <- my_sla %>% 
  mutate(leaf_area_cm2 = true.leaf.area..cm2. + true.petiole.area,
         dry_weight_g = Weight*0.001,
         SLA_cm2_g = leaf_area_cm2/dry_weight_g) %>% 
  select(-Weight, -true.leaf.area..cm2., -true.petiole.area, -SLA..m2.kg.1.) %>% 
  mutate(siteID = plyr::mapvalues(Site, from = dict_Site_mistakes$old, to = dict_Site_mistakes$new)) %>% 
  separate(Species, c("Genus","species"), sep = ' ') %>% 
  mutate(Genus = substr(Genus, 1,3)) %>% 
  mutate(species = substr(species, 1,3)) %>% 
  mutate(species = ifelse(is.na(species), "sp", species)) %>% 
  mutate(species = paste0(Genus,".", species)) %>% 
  select(-Genus, -Site) %>% 
  mutate(Date = mdy(Date)) %>% 
  mutate(year = "2012") %>% 
  rename(date = Date, dry_mass_g = dry_weight_g, individual = Individual) %>% 
  mutate(individual = as.character(individual)) %>% 
  select(-photo)


my_leaf_chem <- my_leaf_chem %>% 
  rename(CN_ratio=CN, 
         d13C=d13C.UCD, 
         d15N= d15N.UCD) %>% 
  filter(!Site == "Apple") %>% 
  mutate(Site = plyr::mapvalues(Site, from = dict_Site_2012$old, to = dict_Site_2012$new)) %>% 
  rename(siteID = Site, species = Species) %>% 
  mutate(year = "2012",
         siteID = str_replace(siteID, "\xc5lr", "Alrust"),
         siteID = str_replace(siteID, "H\xf8g", "Hogsete"),
         siteID = str_replace(siteID, "L\xe5v", "Lavisdalen"),
         siteID = str_replace(siteID, "\xd8vs", "Ovstedalen"))



#### Trait data from 2016 & 2017 ####
 
#### Load trait data ####
 
traits <- read.csv("cleaning_code/5_trait_data/Data/LeafTraits_SeedClim.csv", header=TRUE, sep = ";", stringsAsFactors = FALSE)
 LA <- read.csv2("cleaning_code/5_trait_data/Data/Leaf_area_total.csv", stringsAsFactors = FALSE)
 CN <- read.csv2("cleaning_code/5_trait_data/Data/CNratio.csv", dec=".", sep=";")
 
#### Dictionaries ####
 
dict_CN <- read.csv2("cleaning_code/5_trait_data/Data/Dict_CN.csv", header = TRUE, sep=";", stringsAsFactors = FALSE)

dict_Site_CN_2016 <- read.table(header = TRUE, stringsAsFactors = FALSE, text = 
                           "old new
  AR Arhelleren
  OV Ovstedalen
  VE Veskre
  SK Skjelingahaugen
  LA Lavisdalen
  GU Gudmedalen
  UL Ulvehaugen
  VI Vikesland
  HO Hogsete
  AL Alrust
  FA Fauske
  RA Rambera")

dict_Site_2016 <- read.table(header = TRUE, stringsAsFactors = FALSE, text = 
                                  "old new
Arh Arhelleren
Ovs Ovstedalen
Ves Veskre
Skj Skjelingahaugen
Lav Lavisdalen
Gud Gudmedalen
Ulv Ulvehaugen
Vik Vikesland
Hog Hogsete
Alr Alrust
Fau Fauske
Ram Rambera")
 
 
#### Cleaning the trait data before merging ####
 
 traits <- traits %>%
   rename(Height=Height..mm., Lth_1=Lth.1..mm., Lth_2= Lth.2..mm., Lth_3= Lth.3..mm., Wet_mass=Wet.mass..g., Dry_mass=Dry.mass..g., Site=Location) %>% #Renaming weird named columns
   select(-Lth.average..mm.) %>% #removing this column, as we make it again later
   mutate(Date = mdy(Date)) %>% #formating the date column
  mutate(year = year(Date)) %>% 
  mutate(year = as.character(year)) %>% 
   mutate(Site = factor(Site, levels = c("Ulv", "Lav", "Gud", "Skj", "Alr", "Hog", "Ram", "Ves", "Fau", "Vik", "Arh", "Ovs"))) %>% #Ordering the sites from cold to warm and dry to wet
  mutate(ID_LA = paste0(Site, "_", Species, "_", Individual, ".jpg")) %>%
  mutate(Site_sp=paste0(Site,"_", Species)) %>%
   mutate(Site = plyr::mapvalues(Site, from = dict_Site_2016$old, to = dict_Site_2016$new)) %>%
  mutate(ID_CN = paste0(Site, "_", Species, "_", Individual, ".jpg")) %>%
  separate(Species, c("Genus","species"), sep = '_') %>% 
  mutate(species = paste0(Genus,".", species)) %>% 
   mutate(Dry_mass = replace(Dry_mass, Dry_mass < 0.0005, NA)) %>% #Replacing anything that us under 0.0005 grams with NA because these values are so low and not very trustworthy (outside of the margin of error of the balance)
   mutate(Wet_mass = replace(Wet_mass, Wet_mass < 0.0005, NA)) %>% #Same as dry mass
   mutate(Lth_3 = replace(Lth_3, Lth_3 < 0.9, NA)) #One outlier that is very far off measurement 1 and 2, replacing with NA.
 
 
 #### Cleaning the leaf area data before merging ####
 
 LA<-transform(LA, Leaf_area = as.numeric(Leaf_area))
 
 LA <- LA %>%
   filter(Leaf_area > 0.1) #Lower threshold for data that is within the error of the scanner and the area calculations
 
 
 #### Merge the trait data and the leaf area data ####
 
 traitdata <- traits %>%
   left_join(LA, by=c("ID_LA"="Image_file"))
 
 
 #### Changes to the CN data before merging ####
 
 CN <- CN %>% 
   mutate(Site= substr(Name, 1,2)) %>%
   mutate(Species = substr(Name, 3,6)) %>%
   mutate(Individual = substr(Name, 7,8)) %>%
   mutate(Species = plyr::mapvalues(Species, from = dict_CN$CN_ab, to = dict_CN$Species)) %>%
   mutate(Site = plyr::mapvalues(Site, from = dict_Site_CN_2016$old, to = dict_Site_CN_2016$new)) %>%
   mutate(ID = paste0(Site, "_", Species, "_", Individual, ".jpg")) %>%
   mutate(Species = gsub("_", "\\.", Species)) %>% 
   filter(!(Name=="VECAR101")) %>%  #Because it was a to small sample to get good data from it
   select(-Humidity.., -Name, -Weight, -Method, -N.Factor, -C.Factor, -N.Blank, -C.Blank, -Memo, -Info, -Date..Time, -N.Area, -C.Area) %>% 
   rename(C_percent=C.., N_percent = N.., CN_ratio = CN.ratio)
 
 #### Merge the trait data and the CN data ####
 
 traitdata <- traitdata %>%
   full_join(CN, by=c("ID_CN"="ID", "Site"="Site", "species"="Species", "Individual"="Individual"))
 
 #### Clean data set by removing outliers and setting thresholds ###
 
 traitdata <- traitdata %>% 
   filter(!(ID_LA =="Alr_Agr_cap_9.jpg")) %>%  #Looks damaged from picture
   filter(!(ID_LA == "Ves_Leo_aut_6.jpg")) %>%  #Looks damaged from scan of leaf area
   mutate(Dry_mass = replace(Dry_mass, Dry_mass < 0.0005, NA)) %>% #Replacing anything that us under 0.0005 grams with NA because these values are so low and not very trustworthy (outside of the margin of error of the balance)
   mutate(Wet_mass = replace(Wet_mass, Wet_mass < 0.0005, NA)) %>% #Same as for dry mass
   mutate(Lth_3 = replace(Lth_3, Lth_3 < 0.9, NA)) #One outlier that is very far off measurment 1 and 2, replacing with NA.

 
 ### Calculate traits and transform traits ###
 
 traitdata <- traitdata %>% 
   mutate(SLA = Leaf_area/Dry_mass) %>%
   mutate(LDMC = Dry_mass/Wet_mass)%>%
   mutate(leaf_thickness = rowMeans(select(traitdata, starts_with("Lth")), na.rm = TRUE)) %>% #Make the numbers only with four digits
   filter(LDMC<1) %>% 
   select(-Lth_1, -Lth_2, -Lth_3)
 
 #### Creating comments for flagged values ####
 
 traitdata <- traitdata %>% 
   group_by(Site, species) %>% 
   mutate(n_individuals = length(Individual)) %>% 
   mutate(flag = ifelse(n_individuals < 5, "Less then 5 individuals", NA)) %>% 
   select(-n_individuals)
 
 #### Changing names of columns to match SeedClim standards ####
 
 traitdata <- traitdata %>% 
   rename(siteID = Site, date = Date, individual = Individual, height_mm = Height, fresh_mass_g = Wet_mass, dry_mass_g = Dry_mass, leaf_area_cm2 = Leaf_area, SLA_cm2_g = SLA, LDMC_g_g = LDMC) %>% 
   select(-Image, -Comment, -ID_LA, -ID_CN, -Site_sp)
 
 #### Merging the 2012, 2016&2017 data together ####
 
traitdata_full <- traitdata %>% 
   bind_rows(my_sla) %>% 
   bind_rows(my_leaf_chem) %>% 
   # make a long table
   pivot_longer(cols = c(height_mm:dry_mass_g, leaf_area_cm2:leaf_thickness, d13C, d15N), names_to = "trait", values_to = "value") %>% 
   filter(!is.na(value)) %>% 
   ungroup() |> 
   mutate(trait = fct_recode(trait,
                                   "height" = "height_mm",
                                   "fresh_mass" = "fresh_mass_g",
                                   "dry_mass" = "dry_mass_g",
                                   "leaf_area" = "leaf_area_cm2",
                                   "SLA" = "SLA_cm2_g",
                                   "LDMC" = "LDMC_g_g",
                                   "leaf_thickness" = "leaf_thickness",
                                   "N" = "N_percent",
                                   "C" = "C_percent",
                                   "CN_ratio" = "CN_ratio",
                                   "d13C" = "d13C",
                                   "d15N" = "d15N"),
          unit = fct_recode(trait,
                             "mm" = "height",
                             "g" = "fresh_mass",
                             "g" = "dry_mass",
                             "cm2" = "leaf_area",
                             "cm2g-1" = "SLA",
                             "gg-1" = "LDMC",
                             "mm" = "leaf_thickness",
                             "percent" = "N",
                             "percent" = "C",
                             "unitless" = "CN_ratio",
                             "permil" = "d13C",
                             "permil" = "d15N")) |> 
   select(year, date, siteID, species, individual_nr = individual, trait, value, unit, flag)

write_csv(traitdata_full, file = "cleaning_code/5_trait_data/data/VCG_clean_trait_data_2012-2016.csv")
 
richardjtelford/seedclimComm documentation built on March 5, 2023, 11:38 p.m.