Need to check:
library(dplyr); library(tidyselect); library(tidyr); knitr::opts_knit$set(root.dir = rprojroot::find_root(rprojroot::has_file("DESCRIPTION"))) devtools::load_all(rprojroot::find_root(rprojroot::has_file("DESCRIPTION")))
traits <- read.csv("./private/data/raw/Australian_Bird_Data_Version_1.csv") traits_ikin <- as.data.frame( readxl::read_excel( "./private/data/raw/Ikin_SWS_Bird_Traits_updatedApril2017.xlsx", sheet = "Ikin_SWS_Bird_Traits" )) birds_raw <- readRDS("./private/data/raw/birds_long.rds") birds_raw$CommonName <- gsub("Horsfield\x92s Bushlark", "Horsfield's Bushlark", birds_raw$CommonName) #weird encoding of a bird name birds_raw <- birds_raw %>% dplyr::filter(CommonName != "Corella sp.") #remove the single observation of Corella sp. as this could be at least two different common names.
Column's in Garnett's data that make for good names are unique to each taxon (each row), and similar common names or scientific names in our data.
anyDuplicated(traits$X3_Taxon_common_name_2) anyDuplicated(traits$X5_Species_name_2) anyDuplicated(traits[, c("X5_Species_name_2", "X6_Subspecies_name_2")]) anyDuplicated(traits$X10_Family_scientific_name_2) anyDuplicated(traits$X17_Species_2) anyDuplicated(traits[, c("X17_Species_2", "X18_Subspecies_2")])
In fact only the index and X3_Taxon_common_name_2 are unique to each row:
which(apply(traits, 2, anyDuplicated) == 0)
X3_Taxon_common_name_2
is also quite similar to our common names (see below).
Generalised Levenshtein distances between names
namedists <- adist(unique(birds_raw$CommonName), traits$X3_Taxon_common_name_2) rownames(namedists) <- unique(birds_raw$CommonName) colnames(namedists) <- traits$X3_Taxon_common_name_2
Summarising the row minimums shows that nearly every common name we have in our data is within (approximately) 1 substitution from a name in the Australia Birds data set
summary(factor(apply(namedists, 1, min, na.rm = TRUE)))
Create a mapping by get the closest taxon names for each of our common names
NameMap <- apply(namedists, 1, function(v){ ind <- which.min(v) return(names(v)[[ind]]) })
names of NameMap are the common name that we use in our data values of NameMap are the common names in the Garnett et al data set
These species have a difference of 1 r NameMap[apply(namedists, 1, min) == 1]
.
These names have a higher difference r NameMap[apply(namedists, 1, min) > 1]
.
Transform our data:
birds_renamed <- birds_raw %>% mutate(CommonName = NameMap[CommonName])
Corella sp has been removed because it could be multiple species
grep("Corella", unique(birds_renamed$CommonName), ignore.case = TRUE, value = TRUE) grep("Corella", traits$X3_Taxon_common_name_2, ignore.case = TRUE, value = TRUE)
Exclude birds taxonimically: raptors and obligate water birds. We will also exclude the Australian Reed-Warbler, which is a passerine, because it requires wetlands to persist.
excluded_orders <- c( "Caprimulgiformes", # Frogmouths "Accipitriformes", # Eagles, Kites, Goshawks and Osprey "Strigiformes", # Masked Owls "Falconiformes", # Falcons "Podicepiformes", # Greebes "Gruiformes", # Crakes, Rails and Swamphens "Pelecaniformes", # Herons, Egrets, Pelicans etc "Anseriformes" # Ducks, Geese ) birds_to_exclude <- as_tibble(traits) %>% dplyr::filter((X11_Order_2 %in% excluded_orders) | (X3_Taxon_common_name_2 == "Australian Reed-Warbler")) %>% dplyr::distinct() # Birds to Include if Observed birdskeep <- traits[!(traits$X1_Taxon_sort_1 %in% birds_to_exclude$X1_Taxon_sort_1), ]
To compare to Ikin need to create a map between species (taxon) names
namedists_ikin <- adist(traits_ikin$species, traits$X3_Taxon_common_name_2) rownames(namedists_ikin) <- unique(traits_ikin$species) colnames(namedists_ikin) <- traits$X3_Taxon_common_name_2 NameMap_ikin <- apply(namedists_ikin, 1, function(v){ ind <- which.min(v) return(names(v)[[ind]]) }) # names of NameMap_ikin are the common name that Ikin's traits use # values of NameMap_ikin are the common names in the Garnett et al data set # The following have a difference of 1 NameMap_ikin[apply(namedists_ikin, 1, min) > 0]
Transform Ikin:
traits_ikin$species <- NameMap_ikin[traits_ikin$species]
Compare these to Ikin
species_to_remove_ikin <- traits_ikin %>% filter((diet == "Vertebrates") | (substrate == "Water") | (species == "Australian Reed-Warbler")) %>% dplyr::select(species) %>% arrange(species) %>% unlist() %>% as.vector() species_to_keep_ikin <- setdiff(traits_ikin$species, species_to_remove_ikin)
nearly all birds we want to remove based on Ikin's traits are in the birds to exclude list based on Garnett's data
summary((species_to_remove_ikin %in% birds_to_exclude$X3_Taxon_common_name_2)) species_to_remove_ikin[!(species_to_remove_ikin %in% birds_to_exclude$X3_Taxon_common_name_2)]
How about species suggested to keep using Ikin's traits, but excluded by the new method above?
summary(species_to_keep_ikin %in% birds_to_exclude$X3_Taxon_common_name_2) species_to_keep_ikin[(species_to_keep_ikin %in% birds_to_exclude$X3_Taxon_common_name_2)] birds_renamed %>% dplyr::filter(CommonName %in% species_to_keep_ikin[(species_to_keep_ikin %in% birds_to_exclude$X3_Taxon_common_name_2)]) %>% group_by(CommonName) %>% summarise(Nobsatvisits = n()) %>% arrange(-Nobsatvisits)
11 species kept using Ikin's traits are removed using the new method. They look like ducks and water birds. Only 6 of these are observed enough to plausibly be placed in our model.
The following compare these 11 species on some functional traits: they all feed on inland waters. Some eat vertebrates.
library(ggplot2) birds_to_exclude %>% dplyr::filter(X3_Taxon_common_name_2 %in% species_to_keep_ikin) %>% dplyr::select(X3_Taxon_common_name_2, X169_Food_Terrestrial_vertebrates_10, X157_Breeding_habitat_inland_wetland_9, matches("_Feeding_habitat_Inland_waters_")) %>% pivot_longer(-X3_Taxon_common_name_2) %>% dplyr::filter(value == 1) %>% ggplot() + geom_point(aes(y = X3_Taxon_common_name_2, x = name, col = name))
modeldata <- readRDS("~/dlinkingdatacode/private/data/clean/7_2_10_input_data.rds") modelledspecies <- modeldata$species length(setdiff(modelledspecies, traits_ikin$species)) traits_ikin[traits_ikin$species %in% modelledspecies, ]
All species modelled have traits in the traits_ikin data! That is very convenient for functional diversity!
source('../../../ComputerRelated/AccessLindenmayerSQL.R', echo=FALSE) dbo_traits <- dbGetQuery(con, "SELECT sp.CommonName, bs.* FROM dbo.BirdSpecies AS bs INNER JOIN dbo.Species AS sp ON bs.SpeciesId=sp.SpeciesId") DBI::dbDisconnect(con)
all(modelledspecies %in% dbo_traits$CommonName) dbo_traits %>% dplyr::filter(CommonName %in% modelledspecies) %>% dplyr::select(c(CommonName, ends_with("Score"))) %>% filter_all(any_vars(is.na(.)))
There are five birds that don't have any trait scores information:
White-throated Gerygone
Yellow-faced Honeyeater
White-plumed Honeyeater
White-plumed Honeyeater
Diamond Firetail
dbo_traits %>% dplyr::filter(CommonName %in% c( "White-throated Gerygone", "Yellow-faced Honeyeater", "White-plumed Honeyeater", "White-plumed Honeyeater", "Diamond Firetail" )) %>% dplyr::filter(PresentInRegion == "1") %>% dplyr::select(c(CommonName, ends_with("Score")))
Some CommonNames occur twice or three times! Many of species present in region versions have scores. Of the 5 common names, only Yellow-faced Honeyeater has no scores.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.