R/worms_ecology.R

Defines functions worms_ecology

## Requirement: 'worrms' + 'tibble'

# Info: Assessment of the missing environmental informations for all species with an Aphia ID number (retrieved from OBIS)

# Note: For tables with more than 1500 species names, it must be used in the division_ecology() function because too much requests are not supported.

worms_ecology = function(data, name_aphia_col = "APHIA_ID", name_status_col = "STATUS", name_species_col = "SPECIES", 
                         name_envir_col = "ENVIRONMENT", add_aphia_id = F){
  
  zone = list()
  species = list()
  aphia_id_missing = list()
  aphia_id = list()
  status = list()
  
  rm_NA = function(col){ 
    
    col = col[!is.na(col)] 
    
    if(length(col) == 0) col = NA
    
    col
    
  }
  
  if(add_aphia_id){
    
    missing_aphia_data_pos = which(apply(data.frame(is.na(data[, name_aphia_col]), is.na(data[, name_envir_col])), 1, all))
    
    not_missing_aphia_data_pos = which(!apply(data.frame(is.na(data[, name_aphia_col]), is.na(data[, name_envir_col])), 1, all))
    
    missing_aphia_data = data[missing_aphia_data_pos, ]
    
    not_missing_aphia_data = data[not_missing_aphia_data_pos, ]
    
    if(anyNA(data[[name_aphia_col]])){
      
      cat("Retrieving missing Aphia IDs\n")
      cat("-----------------------------------------------------\n")
      
      start = Sys.time()
      
      for(j in 1:nrow(missing_aphia_data)){
        
        cat(paste0(missing_aphia_data[[name_species_col]][j], " (", j, "/", length(missing_aphia_data[[name_species_col]]), ")\n"))
        
        aphia_id_missing[j] = tryCatch(wm_name2id(missing_aphia_data[[name_species_col]][j]), error = function(e) { NA })
        
      }
      
      end = Sys.time()
      duration = difftime(end, start)
      
      cat("-----------------------------------------------------\n")
      cat(paste("Aphia ID retrieved in:", round(duration[[1]], 2), units(duration), "\n"))
      
      missing_aphia_data[[name_aphia_col]] = unlist(aphia_id_missing)
      
    }
    
    completed_data = rbind(missing_aphia_data, not_missing_aphia_data)
    
    return(completed_data)
    
  }
  
  else {
    
    completed_data = data
    
    start_time_retrieving = Sys.time()
    
    id = as.numeric(rm_NA(completed_data[[name_aphia_col]]))
    
    if(!all(is.na(id))){
      
      infos = wm_record(id)
      
      end_time_retrieving = Sys.time()
      
      cat(paste0("Data retrieved in ", round(end_time_retrieving - start_time_retrieving, 2), " seconds\n"))
      
      start_time_processing = Sys.time()
      
      for(i in 1:nrow(infos)){
        
        species[i] = infos$scientificname[i]
        
        aphia_id[i] = infos$AphiaID[i]
        
        status[i] = infos$status[i]
        
        if(is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i]) && is.na(infos$isTerrestrial[i])) zone[i] = NA
        
        else if(!is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i]) && is.na(infos$isTerrestrial[i])){
          
          if(infos$isMarine[i] == 1) zone[i] = "marine"
          
          else zone[i] = NA
          
        }
        
        else if(!is.na(infos$isFreshwater[i]) && is.na(infos$isMarine[i]) && is.na(infos$isTerrestrial[i])){
          
          if(infos$isFreshwater[i] == 1) zone[i] = "freshwater"
          
          else zone[i] = NA
          
        }
        
        else if(!is.na(infos$isTerrestrial[i]) && is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i])){
          
          if(infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
          
          else zone[i] = NA
          
        }
        
        else if(!is.na(infos$isMarine[i]) && !is.na(infos$isFreshwater[i]) && is.na(infos$isTerrestrial[i])){
          
          if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 1) zone[i] = "both"
          
          else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 0) zone[i] = "marine"
          
          else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 1) zone[i] = "freshwater"
          
          else zone[i] = NA
          
        }
        
        else if(!is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i]) && !is.na(infos$isTerrestrial[i])){
          
          if(infos$isMarine[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
          
          else if(infos$isMarine[i] == 1) zone[i] = "marine"
          
          else zone[i] = NA
          
        }
        
        else if(is.na(infos$isMarine[i]) && !is.na(infos$isFreshwater[i]) && !is.na(infos$isTerrestrial[i])){
          
          if(infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
          
          else if(infos$isFreshwater[i] == 1) zone[i] = "freshwater"
          
          else zone[i] = NA
          
        }
        
        else if(!is.na(infos$isMarine[i]) && !is.na(infos$isFreshwater[i]) && !is.na(infos$isTerrestrial[i])){
          
          if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 1) zone[i] = "both"
          
          else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 0) zone[i] = "both"
          
          else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "marine"
          
          else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 1) zone[i] = "freshwater"
          
          else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 0) zone[i] = "marine"
          
          else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 0) zone[i] = "freshwater"
          
          else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
          
          else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 0) zone[i] = NA
          
        }
        
        else stop("Case not planned")
        
      }
      
      end_time_processing = Sys.time()
      
      cat(paste0("Data processed in ", round(end_time_processing - start_time_processing, 2), " seconds\n"))
      cat("-----------------------------------------------------\n")
      cat("\n")
      
      new_infos = data.frame(APHIA_ID = unlist(aphia_id), STATUS = unlist(status), ENVIRONMENT = unlist(zone), WORMS_VALID_SPECIES = unlist(species))
      
      completed_data = cbind(completed_data, WORMS_VALID_SPECIES = rep(NA, nrow(completed_data)), INITIAL_ENVIRONMENT = rep(NA, nrow(completed_data)), 
                             INITIAL_STATUS = rep(NA, nrow(completed_data)))
      
      for(j in 1:nrow(new_infos)){
        
        position = which(new_infos[j, ]$APHIA_ID == completed_data[[name_aphia_col]])
        
        completed_data$INITIAL_STATUS = replace(completed_data$INITIAL_STATUS, position, completed_data[[name_status_col]][position])
        
        completed_data$INITIAL_ENVIRONMENT = replace(completed_data$INITIAL_ENVIRONMENT, position, completed_data[[name_envir_col]][position])
        
        if(!is.na(new_infos[j, ]$STATUS)) completed_data[[name_status_col]] = 
          replace(completed_data[[name_status_col]], position, toupper(new_infos[j, ]$STATUS))
        
        if(!is.na(new_infos[j, ]$ENVIRONMENT)) 
          completed_data[[name_envir_col]] = replace(completed_data[[name_envir_col]], position, new_infos[j, ]$ENVIRONMENT)
        
        if(any(new_infos[j, ]$WORMS_VALID_SPECIES != data[position, ]$SPECIES))
          completed_data$WORMS_VALID_SPECIES = replace(completed_data$WORMS_VALID_SPECIES, position, new_infos[j, ]$WORMS_VALID_SPECIES)
        
      }
      
      tibble(completed_data)
      
    }
    
    else{
      
      tibble(completed_data)
      
    }
    
  }
  
}
Eliot-RUIZ/eDNAevaluation documentation built on Dec. 17, 2021, 6:25 p.m.