R/importShark.R

Defines functions annotateSHARK addDyntaxa getSHARK

Documented in addDyntaxa annotateSHARK getSHARK

#' Downloads current monitoring data from SHARKdata
#'
#' This function downloads all current data at sharkdata.se by data type.
#'
#' @param datatype (Required) A string representing the type of data to be downloaded. (Only "Zooplankton" and "Phytoplankton" tested).
#' @param possibly (Optional, default=TRUE) Logical. If TRUE: datasets that does not fit the format (cannot be read as TSV) will not be downloaded. If FALSE: the function will stop if an errournous dataset is encountered.
#' @return A dataframe.
#' @examples
#' data <- getSHARK("Phytoplankton")
#'
#' @export

getSHARK <- function(datatype, possibly=TRUE) {

  require('httr')
  require('tidyverse')

  # Function for downloading each separate dataset from SHARKdata
  download <- function(name) {
    tsv <- paste('http://sharkdata.se/datasets/',name,'/data.txt',sep='') %>%  # Define the dataset name and ULR
      read_tsv(locale = locale(encoding = "windows-1252")) %>% # Download each dataseta, as tab separated tibble
      mutate_all(as.character) # Set datatype to character.
    return(tsv)
  } # End of "download" function

  if (possibly==TRUE) {  #### Get data, ignore malfunctioning datasets

    possibly_download <- possibly(download, "Malfunctioning_dataset")

    data <- read_delim("http://sharkdata.se/datasets/table.txt", delim = "\t") %>%
      filter(Datatype == datatype) %>% pull(1) %>%  # Filter out the datasets of interest
      map(possibly_download) # Download each dataaset from the list (apply function above)

    data_combined <- data[sapply(data, function(d)length(d)!=1)] %>%  #Remove errornous data
      bind_rows() # Combine all th datasets


  } else { #### possibly == FALSE: Stop if error in any dataset

    data_combined <-read_delim("http://sharkdata.se/datasets/table.txt", delim = "\t") %>%# Download metdata file overwiew from SHARK data
      filter(Datatype %in% datatype) %>% pull(1) %>% # Filter out the datasets of interest
      map(download) %>%  # Download each dataaset from the list (apply function above)
      bind_rows() # Combine all th datasets
  }
  return(data_combined)
} # End of getSHARK function



#' Add taxonomic ranks to datasets from dyntaxa.se
#'
#' @param data (Required) A string representing the type of data to be downloaded. (Only "Zooplankton" and "Phytoplankton" tested).
#' @param possibly (Optional, default=TRUE) Logical. If TRUE: datasets that does not fit the format (cannot be read as TSV) will not be downloaded. If FALSE: the function will stop if an errournous dataset is encountered.
#' @return A dataframe.
#' @examples
#'
#' data <- getSHARK("Phytoplankton") %>%
#'    addDyntaxa()
#'
#' @export

addDyntaxa <- function(data) {
  data(dyntaxa)

  require("dplyr")
  merged_data <- left_join(data, dyntaxa, by="dyntaxa_id")
  return(merged_data)
}





#' Annotate the downloaded dataset
#'
#' @param data (Required) The dataset.
#' @param possibly (Optional, default=TRUE) Logical. If TRUE: datasets that does not fit the format (cannot be read as TSV) will not be downloaded. If FALSE: the function will stop if an errournous dataset is encountered.
#' @return Annotated dataframe.
#' @examples
#' data <- getSHARK("Phytoplankton") %>%
#'    addDyntaxa() %>%
#'    annotateSHARK()
#'
#' @export

annotateSHARK <- function(data) {

  require("tidyverse")

  modified_data <- data %>%
    mutate(SDATE = as.Date(sample_date, format='%Y-%m-%d'),
           Yr_mon = format(as.Date(SDATE), '%Y-%m'),
           Month = as.numeric(format(as.Date(SDATE), "%m")),
           Year = format(as.Date(SDATE), "%Y"),
           Day = format(as.Date(SDATE), "%d"),
           Station = station_name,
           Parameter = parameter,
           Value = as.double(value),
           Depth = sample_max_depth_m) %>%
    as_tibble()

  return(modified_data)
}
andreasnovotny/shaRk documentation built on Feb. 12, 2023, 7:06 p.m.