R/catminat_download.R

Defines functions catminat_download_to_local_directory catminat_replace

catminat_replace<-function(origin,change){
    
    origin<-as.character(origin)
    
    for(i in names(change)){
        origin<-gsub(i,change[i],origin)
    }
    return(origin)
}




catminat_download_to_local_directory<-function(directory){
    

    url<-"http://philippe.julve.pagesperso-orange.fr/baseflor.xlsx"
    ##baseflor<-read.xls (url, sheet = 1, header=T,method="tab")
    ##baseflor<-read.xls (url, sheet = 1, header=T,method="tab",fileEncoding="utf-8")
    temp_dest<-tempfile(fileext=".xlsx")
    download.file(url,temp_dest,mode="wb")

    catminat_df<-read_excel(temp_dest,sheet=1,col_names=T,col_types="text")
    
    ## catminat_df<-catminat_df[grep("[0-9]+",catminat_df$rang_taxinomique,invert=T),]
    catminat_df <- as.data.frame(catminat_df)

    ##### TODO correggere nomi delle specie\


    ## remove entries for which CHOROLOGIE=="?"
    catminat_df<-catminat_df[catminat_df$CHOROLOGIE!="?",]
    
    ## change columns' name in order to
    ## avoid possible conflicts with non-
    ## ascii
    ## french chars
    recode_catminat_values<-c(
        "N.*menclatural_BDNFF"="name_bdnff",
        "NOM_SCIENTIFIQUE"="species_name",
        "Lumi.{1}re"="ell_L_fr",
        "Temp.{1}rature"="ell_T_fr",
        "Continentalit.{1}"="ell_C_fr",
        "Humidit.{1}_.{1}daphique"="ell_U_fr",
        "R.{1}action_du_sol_.pH."="ell_R_fr",
        "Niveau_trophique"="ell_N_fr",
        "Salinit.*"="ell_S_fr",
        "Texture"="Soil_texture_fr",
        "Mati.*re_organique"="organic_matter_fr",
        "Humidit.*_atmosph.*rique"="ell_U_atm_fr",
        "pollinisation"="poll_vect_fr",
        "diss.*mination"="dissemination_fr",
        "couleur_fleur"="flower_colour_fr",
        "fruit"="fruit_type_fr",
        "sexualit.*"="sex_reprod_fr",
        "ordre_maturation"="order_of_maturation",
        "inflorescence"="inflorescence_fr",
        "Nom.Phytobase"="species_name",
        "TYPE_BIOLOGIQUE"="li_form_fr"
    )
    
    for(i in names(recode_catminat_values)){
        names(catminat_df)<-gsub(i,recode_catminat_values[i],names(catminat_df))
    }

    catminat_df$species_name<-as.character(catminat_df$species_name)
    catminat_df<-catminat_df[!is.na(catminat_df$species_name),]

    ## Work on species names

    namesH <- catminat_df$nomH
    namesH <- gsub(" \\*$","",namesH)
    namesH <- gsub(" H$","",namesH)
    
    namesB <- catminat_df$nomB
    namesB <- gsub(" \\*$","",namesB)
    namesB <- gsub(" B$","",namesB)

    namesA <- catminat_df$nomA
    namesA <- gsub(" \\*$","",namesA)
    namesA <- gsub(" A$","",namesA)

    namesH[is.na(namesH)] <- namesB[is.na(namesH)]
    namesH[is.na(namesH)] <- namesA[is.na(namesH)]
    
    catminat_df$species_name <- namesH
    catminat_df$species_name<-gsub("&amp","&",catminat_df$species_name,perl=TRUE)
    catminat_df$species_name<-gsub(";","",catminat_df$species_name,perl=TRUE)
    catminat_df$species_name<-gsub("\\s+\\*$","",catminat_df$species_name,perl=TRUE)
    catminat_df$species_name<-gsub("\\s+A$","",catminat_df$species_name,perl=TRUE)
    catminat_df$species_name<-gsub("\\s+B$","",catminat_df$species_name,perl=TRUE)


    catminat_df <- catminat_df[!is.na(catminat_df$species_name),]
    

    ## I split flowering dates into 2 columns,
    ## flower begin and end
    flowering<-as.character(catminat_df$floraison)
    beg_flow_fr<-as.numeric(gsub("^([0-9]+)+\\-([0-9])+$","\\1",flowering))
    end_flow_fr<-as.numeric(gsub("^([0-9]+)+\\-([0-9])+$","\\2",flowering))
    catminat_df<-data.frame(catminat_df,beg_flow_fr,end_flow_fr)

    
    
    ## I choose only a subset of the available columns in baseflor dataset
    selected_columns_catminat<-c(
        "species_name",
        "name_bdnff",
        "CHOROLOGIE",
        "inflorescence_fr",
        "sex_reprod_fr",
        "order_of_maturation",
        "poll_vect_fr",
        "fruit_type_fr",
        "dissemination_fr",
        "flower_colour_fr",
        "macule",
        "type_ligneux",
        "li_form_fr",
        "ell_L_fr",
        "ell_T_fr",
        "ell_C_fr",
        "ell_U_atm_fr",
        "ell_U_fr",
        "ell_R_fr",
        "ell_N_fr",
        "ell_S_fr",
        "Soil_texture_fr",
        "organic_matter_fr",
        "beg_flow_fr",
        "end_flow_fr",
        "numSpH"
        )

    catminat_df<-catminat_df[,selected_columns_catminat]

    ## recode inflorescenses types
    inflorescences<-c(
        "capitule de capitules$"="Compound capitulum",
        "capitule simple$"="Capitulum",
        "c.*ne$"="Cone",                                
        "corymbe$"="Corymb",
        "corymbe de capitules$"="Corymb of capitula",
        "cyathe$"="Cyathium",
        "cyme bipare$"="Dichasia cyme",                         
        "cyme biscorpio.*de$"="Scorpioid cyme",
        "cyme capituliforme$"="Capituliform cyme ",
        "cyme d'.+pis$"="Cyme of spikes",
        "cyme d'ombelles$"="Cyme of umbels",                     
        "cyme de capitules$"="Cyme of capitula",
        "cyme de glom.+rules$"="Cyme of glomerula",
        "cyme multipare$"="Pleiochasium",
        "cyme unipare h.*lico.*de$"="Helicoid cyme",              
        "cyme unipare scorpio.*de$"="Scorpioid cyme",
        ".*pi d'.+pillets$"="Spike of spikelets",
        ".*pi de capitules$"="Spike of capitula",
        ".+pi de cymes triflores$"="Spike of three-flowers cymes",              
        ".+pi simple$"="Simple spike",
        "fleur solitaire lat.+rale$"="Solitary lateral flower",
        "fleur solitaire terminale$"="Solitary terminal flower",
        "glom.+rules$"="Glomerula",                          
        "glom.*rules spiciformes$"="Spike-like glomerula",
        "ombelle d'ombellules$"="Umbel of umbels",
        "ombelle simple$"="Simple umbel",
        "ombelle simple d'.+pis$"="Simple umbel of spikes",               
        "ombelle simple de capitules$"="Simple umbel of capitula",
        "panicule d'.+pillets$"="Panicle of spikelets",
        "panicule spiciforme$"="Spike-like panicle",
        "rac.{1}me capituliforme$"="Capitulum-like raceme",                 
        "rac.{1}me d'.+pis$"="Raceme of spikes",
        "rac.{1}me d'ombelles$"="Raceme of umbels",
        "rac.{1}me de capitules$"="Raceme of capitula",
        "ra.{1}me de cymes bipares$"="Raceme of dichasia cymes",             
        "rac.{1}me de cymes unipares h.{1}lico.{1}des$"="Raceme of helicoid cymes",
        "rac.{1}me de cymes unipares scorpio.{1}des$"="Raceme of scorpioid cymes",
        "rac.{1}me de rac.{1}mes$"="Raceme of racemes",
        "rac.{1}me simple$"="Simple raceme",                       
        "rac.{1}me de cymes bipares$"="Raceme of dichasia cymes", 
        "spadice$"="Spadix",
        "verticille d'ombelles$"="Verticil of umbels"               
        )
    catminat_df$inflorescence_fr<-catminat_replace(catminat_df$inflorescence_fr,inflorescences)

    ## recode fruit types
    fruit_types=c("ak.{1}ne$"="achene",
        "baie$"="berry",
        "capsule$"="capsule",
        "caryopse$"="caryopsis",
        "c.{1}ne$"="cone",
        "drupe$"="drupe",
        "follicule$"="follicle",
        "catminat_dfusse$"="legume",
        "pyxide$"="pyxid",
        "samare$"="samara",
        "silique$"="silique"  
        )
    catminat_df$fruit_type_fr<-catminat_replace(catminat_df$fruit_type_fr,fruit_types)

    ## recode flower colours
    ## flower_colours<-c("blanc$"="white",
    ##                   "jaune$"="yellow",
    ##                   "vert$"="green",
    ##                   "marron$"="brown",
    ##                   "bleu$"="blue",
    ##                   "jaune$"="yellow",
    ##                   "jauna$"="yellow",
    ##                   "noir$"="black"
    ##                   )

    flower_colours<-c("blanc"="white",
                      "jaune"="yellow",
                      "vert"="green",
                      "marron"="brown",
                      "bleu"="blue",
                      "jaune"="yellow",
                      "jauna"="yellow",
                      "noir"="black"
                      )

    catminat_df$flower_colour_fr<-catminat_replace(catminat_df$flower_colour_fr,flower_colours)

    ## recode dissemination types
    dissemination<-c(
        "an.*mochore"="anemochores",
        "myrm.*cochore"="myrmecochores",
        #"myrm.*cochore"="myrmecochores",
        "autochore"="autochores",
        "barochore"="barochores",
        "endozoochore"="endozoochores",
        "endozoochorie"="endozoochores",
        ".+pizoochore"="epizoochores",              
        "dyszoochore"="dyszoochores", 
        "hydrochore"="hydrochores"                             
        )
    catminat_df$dissemination_fr<-catminat_replace(catminat_df$dissemination_fr,dissemination)

    ## recode sexual reproduction types
    sex_reprod<-c(
        "androdio.{1}que"="Androdioecy",
        "gynomono.{1}que" ="Gynomonoecious",
        "gynodio.{1}que"="Gynodioecious",
        "polygame"="Polygamous",
        "mono.{1}que"="Monoecious",
        "dio.{1}que"="Dioecious",
        "hermaphrodite"="Hermaphroditic",
        "polygame"="Polygamous"
        )
    catminat_df$sex_reprod_fr<-catminat_replace(catminat_df$sex_reprod_fr,sex_reprod)

    ## remove species names where the phrase
    ## "sans nom" is found
    catminat_df<-catminat_df[grep("sans nom",catminat_df$species_name,invert=TRUE),]

    ## recode pollen vector
    poll_vec<-c(
        "an.{1}mogame"="wind",
        "autogame"="self",
        "apogame"="apogamy",
        "entomogame"="insect",
        "hydrogame"="water"
        )
    catminat_df$poll_vect_fr<-catminat_replace(as.character(catminat_df$poll_vect_fr),poll_vec)

    ## recode life_form_fr

    ## -- for the moment leave the original values
    
    
    ## remove entries without species names
    catminat_df<-catminat_df[catminat_df$species_name!="",]


    ## Remove double entries

    ## beware: catminat is now read with readxl package and empty cells are coded as <NA>. not as empty strings
    ##    catminat_df<-catminat_df[!(catminat_df$species_name=="Juncus articulatus" & catminat_df$sex_reprod_fr==""),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Juncus articulatus" & is.na(catminat_df$sex_reprod_fr)),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Capparis spinosa" & catminat_df$numSpH=="13450"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Centaurium ery. erythraea" & catminat_df$numSpH=="2361"),]
##    catminat_df<-catminat_df[!(catminat_df$species_name=="Centaurium ery. erythraea" & catminat_df$numSpH=="2361"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Chenopodium ambrosioides" & is.na(catminat_df$flower_colour_fr)),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Chenopodium opulifolium" & catminat_df$CHOROLOGIE=="cosmopolite"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Cornus sanguinea" & catminat_df$numSpH=="2258"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Crataegus mon. monogyna" & catminat_df$numSpH=="1657"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Daphne laureola" & catminat_df$numSpH=="2084"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Eryngium bourgatii" & catminat_df$numSpH=="11913"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Festuca ovi. ovina" & catminat_df$numSpH=="352"),]
    ## Gaudina has 2 different ecotypes... better to remove both to avoid misinterpretation
    catminat_df<-catminat_df[!(catminat_df$species_name=="Gaudinia fragilis"),]
    ## catminat_df<-catminat_df[!(catminat_df$species_name=="Genista salzmannii" & catminat_df$numSpH=="12481"),]
    catminat_df<-catminat_df[!(catminat_df$species_name=="Medicago turbinata" & catminat_df$name_bdnff=="41226"),]
    ## Remove Ophrys bertolonii subsp bertolonii
    catminat_df<-catminat_df[!(catminat_df$species_name=="Ophrys bertolonii" & catminat_df$name_bdnff=="81834"),]
    ## Leave only Populus nigra removing subspecies
    catminat_df<-catminat_df[!(catminat_df$species_name=="Populus nigra" & catminat_df$name_bdnff=="52035"),]
    catminat_df<-catminat_df[!(catminat_df$species_name=="Populus nigra" & catminat_df$name_bdnff=="52031"),]

    catminat_df<-catminat_df[!(catminat_df$species_name=="Rhamnus saxatilis" & catminat_df$name_bdnff=="118976"),]
    catminat_df<-catminat_df[!(catminat_df$species_name=="Rosmarinus officinalis" & catminat_df$name_bdnff=="57822"),]
    catminat_df<-catminat_df[!(catminat_df$species_name=="Salvia officinalis" & catminat_df$name_bdnff=="60181"),]
    catminat_df<-catminat_df[!(catminat_df$species_name=="Linum bienne"),]

    ## remove double entries
    catminat_df<-catminat_df[!(duplicated(catminat_df)),]

    ## and remove double entry
    ##catminat_df<-catminat_df[catminat_df$numSpH!=11083,]

    ## fix CHOROLOGIE for Rubus cinerascens and  Rubus conspicuus and then remove double entries
    ## catminat_df<-catminat_df[(1:nrow(catminat_df))!=which(catminat_df$species_name=="Rubus cinerascens")[1],]
    ## catminat_df<-catminat_df[(1:nrow(catminat_df))!=which(catminat_df$species_name=="Rubus conspicuus")[1],]

    ##catminat_df<-catminat_df[row.names(catminat_df)!=(which(catminat_df$species_name=="Erodium glandulosum")[1]),]
    ##catminat_df<-catminat_df[row.names(catminat_df)!=(which(catminat_df$species_name=="Erodium rupicola")[1]),]
    ##catminat_df<-catminat_df[row.names(catminat_df)!=(which(catminat_df$species_name=="Onosma echioides")[1]),]

    ## remove CHOROLOGIE column
    catminat_df<-catminat_df[,names(catminat_df)!="CHOROLOGIE"]
    
    save(file=file.path(directory,"catminat.Rda"),catminat_df)
}
GioBo/TR8 documentation built on June 16, 2022, 9:10 p.m.