R/sits_getdata.R

#' @title Obtain timeSeries from different sources
#' @name sits_getdata
#' @author Gilberto Camara
#'
#' @description reads descriptive information about a data source to retrive a set of
#' time series. The following options are available:
#' (a) The source is a SITS table - retrieves the metadata from the sits table and the data
#' from the WTSS service
#' (b) The source is a CSV file - retrieves the metadata from the CSV file and the data
#' from the WTSS service
#' (c) The source is a JSON file - retrieves the metadata and data from the JSON file.
#' (d) The source is a gz file (compressed JSON file) - retrieves the metadata and data from the compressed JSON file.
#' (e) No source is given - it retrives the data based on <long, lat, wtss>
#' A sits table has the metadata and data for each time series
#' <longitude, latitude, start_date, end_date, label, coverage, time_series>
#'
#' A Web Time Series Service (WTSS) is a light-weight service that
#' retrieves one or more time series in JSON format from a data base.
#' @references
#' Lubia Vinhas, Gilberto Queiroz, Karine Ferreira, Gilberto Camara,
#' Web Services for Big Earth Observation Data.
#' In: XVII Brazilian Symposium on Geoinformatics, 2016, Campos do Jordao.
#' Proceedings of GeoInfo 2016. Sao Jose dos Campos: INPE/SBC, 2016. v.1. p.166-177.
#'
#' @param file            the name of a file with information on the data to be retrieved (options - CSV, JSON, SHP)
#' @param table           an R object ("sits_table")
#' @param longitude       double - the longitude of the chosen location
#' @param latitude        double - the latitude of the chosen location)
#' @param start_date      date - the start of the period
#' @param end_date        date - the end of the period
#' @param label           string - the label to attach to the time series
#' @param URL             string - the URL of WTSS (Web Time Series Service)
#' @param coverage        string - the name of the coverage to be retrieved
#' @param bands           string vector - the names of the bands to be retrieved
#' @param n_max           integer - the maximum number of samples to be read (optional)
#' @param ignore_dates    use the start and end dates from the coverage instead of the time series
#' @return data.tb        tibble  - a SITS table
#' @export
sits_getdata <- function (file        = NULL,
                          table       = NULL,
                          longitude   = NULL,
                          latitude    = NULL,
                          start_date  = NULL,
                          end_date    = NULL,
                          label       = NULL,
                          URL         = "http://www.dpi.inpe.br/tws/wtss",
                          coverage    = NULL,
                          bands       = NULL,
                          n_max       = Inf,
                          ignore_dates = FALSE) {

     # a JSON file has all the data and metadata - no need to access the WTSS server
     if  (!purrr::is_null (file) && tolower(tools::file_ext(file)) == "json"){
          data.tb <- sits_fromJSON (file)
          return (data.tb)
     }
    # get data based on gz (compressed JSON) file
    if (!purrr::is_null (file) && tolower(tools::file_ext(file)) == "gz") {
        data.tb <- sits_fromGZ(file)
        return (data.tb)
    }
     # Ensure that required inputs exist
     ensurer::ensure_that(coverage, !purrr::is_null (.), err_desc = "sits_getdata: Missing coverage name")
     ensurer::ensure_that(bands, !purrr::is_null (.), err_desc = "sits_getdata: Missing bands vector")
     ensurer::ensure_that(URL, !purrr::is_null (.), err_desc = "sits_getdata: Missing WTSS URL")

     # obtains an R object that represents the WTSS service
     wtss.obj <- wtss::WTSS(URL)

     #retrieve coverage information
     cov <- sits_getcovWTSS(URL, coverage)

     # get data based on latitude and longitude
     if (purrr::is_null (file) && purrr::is_null (table) && !purrr::is_null(latitude) && !purrr::is_null(longitude)) {
          data.tb <- sits_fromlatlong (longitude, latitude, start_date, end_date, wtss.obj, cov, bands)
          return (data.tb)
     }
     # get data based on table
     if (!purrr::is_null (table)){
          data.tb <- sits_fromtable (table, wtss.obj, cov, bands)
          return (data.tb)
     }

     # get data based on CSV file
     if (tolower(tools::file_ext(file)) == "csv") {
          data.tb <- sits_fromCSV (file, wtss.obj, cov, bands, n_max, ignore_dates)
          return (data.tb)
     }
     # get data based on SHP file
     if (tolower(tools::file_ext(file)) == "shp") {
          data.tb <- sits_fromSHP (file, wtss.obj, cov, bands, start_date, end_date, label)
          return (data.tb)
     }
     message (paste ("No valid input to retrieve time series data!!","\n",sep=""))
     stop()
}

#' @title Obtain timeSeries from a JSON file.
#'
#' @name sits_fromJSON
#'
#' @description reads a set of data and metadata for satellite image time series from a JSON file
#'
#' @param json_file  string  - name of a JSON file with sits data and metadata
#' @return data.tb    tibble  - a SITS table
#' @export
sits_fromJSON <- function (json_file) {
     # add the contents of the JSON file to a SITS table
     table <- tibble::as_tibble (jsonlite::fromJSON (json_file))
     # convert Indexes in time series to dates
     table1 <- sits_table()
     table %>%
          purrrlyr::by_row(function (r) {
               tb <- tibble::as_tibble(r$time_series[[1]])
               tb$Index <- lubridate::as_date(tb$Index)
               r$time_series[[1]] <- tb
               r$start_date <- lubridate::as_date(r$start_date)
               r$end_date   <- lubridate::as_date(r$end_date)
               table1 <<- dplyr::bind_rows(table1, r)
          })
     return (table1)
}

#' @title Obtain timeSeries from a compressed JSON file.
#'
#' @name sits_fromGZ
#'
#' @description reads a set of data and metadata for satellite image time series from a compressed JSON file
#'
#' @param gz_file  string  - name of a compressed JSON file with sits data and metadata
#' @return data.tb    tibble  - a SITS table
#' @export
sits_fromGZ <- function (gz_file) {

    # uncompress the file
    json_file <- R.utils::gunzip (gz_file, remove = FALSE)
    # retrieve the data
    data.tb <- sits_fromJSON (json_file)
    # remove the uncompressed file
    file.remove (json_file)

    # return the JSON file
    return (data.tb)
}

#' @title Obtain timeSeries from WTSS server based on a lat/long information.
#' @name sits_fromlatlong
#'
#' @description This function uses the lat/long location to retrive a time seriees
#' for a WTSS service. A Web Time Series Service is a light-weight service that
#' retrieves one or more time series in JSON format from a data base.
#' @references
#' Lubia Vinhas, Gilberto Queiroz, Karine Ferreira, Gilberto Camara,
#' Web Services for Big Earth Observation Data.
#' In: XVII Brazilian Symposium on Geoinformatics, 2016, Campos do Jordao.
#' Proceedings of GeoInfo 2016. Sao Jose dos Campos: INPE/SBC, 2016. v.1. p.166-177.

#' @param longitude       double - the longitude of the chosen location
#' @param latitude        double - the latitude of the chosen location)
#' @param start_date      date - the start of the period
#' @param end_date        date - the end of the period
#' @param wtss.obj       an R object that represents the WTSS server
#' @param cov            a list with the coverage parameters (retrived from the WTSS server)
#' @param bands          string vector - the names of the bands to be retrieved
#' @return data.tb       tibble  - a SITS table
#' @export
sits_fromlatlong <-  function (longitude, latitude, start_date = NULL, end_date = NULL, wtss.obj, cov, bands) {

     # set the class of the time series
     label <-  "NoClass"
     # use the WTSS service to retrieve the time series
     data.tb <- sits_fromWTSS (longitude, latitude, start_date, end_date, label, wtss.obj, cov, bands)
     return (data.tb)
}

#' @title Obtain timeSeries from WTSS server, based on a SITS table.
#' @name sits_fromtable
#'
#' @description reads descriptive information about a set of
#' spatio-temporal locations from a SITS table. Then it uses the WTSS service to
#' obtain the required data. This function is useful when you have a sits table
#' but you want to get the time series from a different set of bands.
#'
#' @param table          a  sits_table
#' @param wtss.obj       an R object that represents the WTSS server
#' @param cov            a list with the coverage parameters (retrived from the WTSS server)
#' @param bands          string vector - the names of the bands to be retrieved
#' @return data.tb       tibble  - a SITS table
#' @export
sits_fromtable <-  function (table, wtss.obj, cov, bands) {
     # create the table to store
     data.tb <- sits_table()

     table %>%
          purrrlyr::by_row( function (r){
               # does the lat/long information exist
               ensurer::ensure_that(r$longitude, !purrr::is_null(.) && !is.na(.), err_desc = "sits_getdata - no longitude information")
               ensurer::ensure_that(r$latitude,  !purrr::is_null(.) && !is.na(.), err_desc = "sits_getdata - no longitude information")

               # ajust the start and end dates and the label
               if (is.na(r$start_date)) { r$start_date <- lubridate::as_date(cov$timeline[1])}
               if (is.na(r$end_date))   { r$end_date   <- lubridate::as_date(cov$timeline[length(cov$timeline)])}
               if (is.na(r$label))      { r$label <- "NoClass"}

               # retrieve the data row
               t <- sits_fromWTSS (r$longitude, r$latitude, r$start_date, r$end_date,
                                    r$label, wtss.obj, cov, bands)

               # add the row to the output
               data.tb <<- dplyr::bind_rows (data.tb, t)
          })
     return (data.tb)
}

#' @title Obtain timeSeries from WTSS server, based on a CSV file.
#' @name sits_fromCSV
#'
#' @description reads descriptive information about a set of
#' spatio-temporal locations from a CSV file. Then, it uses the WTSS time series service
#' to retrieve the time series, and stores the time series on a SITS table for later use.
#' The CSV file should have the following column names:
#' "longitude", "latitude", "start_date", "end_date", "label"
#'
#' @param csv_file        string  - name of a CSV file with information <id, latitude, longitude, from, end, label>
#' @param wtss.obj        WTSS object - the WTSS object that describes the WTSS server
#' @param cov             list - a list with coverage information (retrieved from the WTSS)
#' @param bands           string vector - the names of the bands to be retrieved
#' @param n_max           integer - the maximum number of samples to be read
#' @param ignore_dates    whether to use the dates of the coverage and not those specified in the file
#' @return data.tb        tibble  - a SITS table
#' @export

sits_fromCSV <-  function (csv_file, wtss.obj, cov, bands, n_max = Inf, ignore_dates = FALSE){
     # configure the format of the CSV file to be read
     cols_csv <- readr::cols(id          = readr::col_integer(),
                             longitude   = readr::col_double(),
                             latitude    = readr::col_double(),
                             start_date  = readr::col_date(),
                             end_date    = readr::col_date(),
                             label       = readr::col_character())
     # read sample information from CSV file and put it in a tibble
     csv.tb <- readr::read_csv (csv_file, n_max = n_max, col_types = cols_csv)
     # create the table
     data.tb <- sits_table()
     # for each row of the input, retrieve the time series
     csv.tb %>%
          purrrlyr::by_row( function (r){
               row <- sits_fromWTSS (r$longitude, r$latitude, r$start_date, r$end_date, r$label, wtss.obj, cov, bands, ignore_dates)

               # ajust the start and end dates
               row$start_date <- lubridate::as_date(utils::head(row$time_series[[1]]$Index, 1))
               row$end_date   <- lubridate::as_date(utils::tail(row$time_series[[1]]$Index, 1))

               data.tb <<- dplyr::bind_rows (data.tb, row)
          })
     return (data.tb)
}
#' @title Obtain timeSeries from WTSS server, based on a SHP file.
#' @name sits_fromSHP
#'
#' @description reads a shapefile and retrieves a SITS table
#' containing time series from a coverage that are inside the SHP file.
#' The script uses the WTSS service, taking information about coverage, spatial and
#' temporal resolution from the WTSS configuration.
#'
#'
#' @param shp_file   string  - name of a SHP file which provides the boundaries of a region of interest
#' @param wtss.obj        WTSS object - the WTSS object that describes the WTSS server
#' @param cov             list - a list with coverage information (retrieved from the WTSS)
#' @param bands           string vector - the names of the bands to be retrieved
#' @param start_date      date - the start of the period
#' @param end_date        date - the end of the period
#' @param label           string - the label to attach to the time series
#' @return table     tibble  - a SITS table
#' @export
sits_fromSHP <- function (shp_file, wtss.obj, cov, bands, start_date, end_date, label) {

     # build grid points in Sinusoidal
     buildGridPoints <- function(points_Sinu.sp) {

          # pixel size Sinusoidal according to the gdalinfo
          # this should be changed to use information from "describeCoverage" (ATTENTION)
          pixel_size_Sinu <- 231.656358263958

          # bounding box of sinusoidal points
          bb_Sinu.num <- sp::bbox(points_Sinu.sp)

          # define coordinates by resolution according to the bounding box + one pixel size
          long.num <- seq(from=bb_Sinu.num[1],
                          to=bb_Sinu.num[3],
                          by=pixel_size_Sinu)

          lat.num <- seq(from=bb_Sinu.num[2],
                         to=bb_Sinu.num[4],
                         by=pixel_size_Sinu)

          # build the latitude and longitude
          coordinates.sp <- sp::SpatialPoints(data.frame(longitude = rep(long.num,
                                                                         each=length(lat.num)),
                                                         latitude = rep(lat.num,
                                                                        length(long.num))),
                                              proj4string=sp::CRS(sp::proj4string(points_Sinu.sp)))

          coordinates.sp

     }

     # spatial points to sits table
     sp2SitsTable <- function(coordinates.sp){

          # using the WTSS
          sits_points.tb <- sits_table()

          # fill rows with coordinates
          sits_points.tb <- tibble::add_row(sits_points.tb,
                                            longitude = coordinates.sp@coords[,1],
                                            latitude = coordinates.sp@coords[,2])

          sits_points.tb

     }

     # sitsTable longitude and latitude to crs shapefile
     sitsTable2sp <- function(points.tb){

          points.sp <-  sp::SpatialPoints(data.frame(points.tb$longitude,
                                                     points.tb$latitude),
                                          proj4string=sp::CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))

          points.sp

     }

     # extract latitude and longitude values of sits points from polygon
     extractFromPolygon <- function(points.sp, roi.shp) {

          # subset of points.sp
          points.sp <- points.sp[!is.na(sp::over(points.sp, methods::as(roi.shp, "SpatialPolygons")) == 1)]

          points.sp

     }

     # read shapefile and get first point time series
     roi.shp <- raster::shapefile(shp_file)
     roi.shp <- sp::spTransform(roi.shp, sp::CRS("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"))
     pt.df <- data.frame(longitude = sp::bbox(roi.shp)[1,], latitude = sp::bbox(roi.shp)[2,])
     pt.sp <- sp::SpatialPoints(pt.df, sp::CRS(sp::proj4string(roi.shp)))
     pt.sp <- sp::spTransform(pt.sp, sp::CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))
     pt.tb <- sp2SitsTable(pt.sp)
     pt_wtss.tb <- pt.tb %>%
          dplyr::rowwise() %>%
          dplyr::do (sits_fromWTSS (.$longitude, .$latitude, start_date, end_date, label, wtss.obj, cov, bands))

     # transform grid points in Sinusoidal into WGS
     pt_wtss.sp <- sitsTable2sp(pt_wtss.tb)
     pt_wtss.sp <- sp::spTransform(pt_wtss.sp, sp::CRS("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"))
     grid_pts.sp <- buildGridPoints(pt_wtss.sp)
     # transform WGS sp into sits table to get data from server
     plg_pts.sp <- extractFromPolygon(grid_pts.sp, roi.shp)
     if (nrow(plg_pts.sp@coords)) {
         plg_pts.sp <- sp::spTransform(plg_pts.sp, sp::CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))
         plg_pts.tb <- sp2SitsTable(plg_pts.sp)
         wtss_points.tb <- plg_pts.tb %>%
               dplyr::rowwise() %>%
               dplyr::do (sits_fromWTSS (.$longitude, .$latitude, start_date, end_date, label, wtss.obj, cov, bands))
     } else
          wtss_points.tb <- sits_table()

     return (wtss_points.tb)
}

#' @title Obtain one timeSeries from WTSS server and load it on a sits table
#' @name sits_fromWTSS
#'
#' @description Returns one set of time series provided by a WTSS server
#' Given a location (lat/long), and start/end period, and the WTSS server information
#' retrieve a time series and include it on a stis table.
#' A Web Time Series Service (WTSS) is a light-weight service that
#' retrieves one or more time series in JSON format from a data base.
#' @references
#' Lubia Vinhas, Gilberto Queiroz, Karine Ferreira, Gilberto Camara,
#' Web Services for Big Earth Observation Data.
#' In: XVII Brazilian Symposium on Geoinformatics, 2016, Campos do Jordao.
#' Proceedings of GeoInfo 2016. Sao Jose dos Campos: INPE/SBC, 2016. v.1. p.166-177
#'
#' @param longitude       double - the longitude of the chosen location
#' @param latitude        double - the latitude of the chosen location
#' @param start_date      date - the start of the period
#' @param end_date        date - the end of the period
#' @param label           string - the label to attach to the time series (optional)
#' @param wtss.obj        an R object that represents the WTSS server
#' @param cov             a list containing information about the coverage from which data is to be recovered
#' @param bands           list of string - a list of the names of the bands of the coverage
#' @param ignore_dates    whether to use the dates of the coverage and not those specified in the file
#' @return data.tb        tibble  - a SITS table
#' @export
sits_fromWTSS <- function (longitude, latitude, start_date, end_date, label, wtss.obj, cov, bands, ignore_dates = FALSE) {

     # set the start and end dates from the coverage
     if (purrr::is_null (start_date) | ignore_dates ) start_date <- lubridate::as_date(cov$timeline[1])
     if (purrr::is_null (end_date)   | ignore_dates ) end_date <- lubridate::as_date(cov$timeline[length(cov$timeline)])

     # get a time series from the WTSS server
     ts <- wtss::timeSeries (wtss.obj, coverages = cov$name, attributes = bands,
                               longitude = longitude, latitude = latitude,
                               start = start_date, end = end_date)

     # retrieve the time series information
     time_series <- ts[[cov$name]]$attributes

     # retrieve information about the bands
     band_info <- cov$attributes

     # determine the missing value for each band
     miss_value <- function (band) {
          return (band_info[which(band == band_info[,"name"]),"missing_value"])
     }
     # update missing values to NA
     for (b in bands){
          time_series[,b][time_series[,b] == miss_value(b)] <- NA
     }

     # interpolate missing values
     time_series[,bands] <- zoo::na.spline(time_series[,bands])

     # calculate the scale factor for each band
     scale_factor <- function (band){
          return (band_info[which(band == band_info[,"name"]),"scale_factor"])
     }
     # scale the time series
     bands %>%
          purrr::map (function (b) {
               time_series[,b] <<- time_series[,b]*scale_factor(b)
          })

     # convert the series to a tibble
     row.tb <- tibble::as_tibble (zoo::fortify.zoo (time_series))


     # clean the time series
     # create a list to store the zoo time series coming from the WTSS service
     ts.lst <- list()
     # transform the zoo list into a tibble to store in memory
     ts.lst[[1]] <- row.tb

     # create a table to store the WTSS data
     data.tb <- sits_table()
     # add one row to the table
     data.tb <- tibble::add_row (data.tb,
                         longitude    = longitude,
                         latitude     = latitude,
                         start_date   = as.Date(start_date),
                         end_date     = as.Date(end_date),
                         label        = label,
                         coverage     = cov$name,
                         time_series  = ts.lst
     )

     # return the table with the time series
     return (data.tb)
}

#' @title Obtain a confusion matrix from a compressed JSON file.
#'
#' @name sits_conf_fromGZ
#'
#' @description reads a set of data and metadata for satellite image time series from a compressed JSON file
#'
#' @param file        string  - name of a compressed JSON file with sits data and metadata
#' @return data.tb    tibble  with a confusion matrix
#' @export
sits_conf_fromGZ <- function (file) {

    # uncompress the file
    json_file <- R.utils::gunzip (file, remove = FALSE)
    # retrieve the data
    conf.tb <- sits_conf_fromJSON (json_file)
    # remove the uncompressed file
    file.remove (json_file)

    # return the JSON file
    return (conf.tb)
}

#' @title Obtain a confusion matrix from a JSON file.
#'
#' @name sits_conf_fromJSON
#'
#' @description reads a set of data and metadata for satellite image time series from a JSON file
#'
#' @param  file       string  - name of a JSON file with sits data and metadata
#' @return data.tb    tibble  with a confusion matrix
#' @export
sits_conf_fromJSON <- function (file) {
    # add the contents of the JSON file to a SITS table
    table <- tibble::as_tibble (jsonlite::fromJSON (file))

    return (table)
}
luizassis/sits documentation built on May 30, 2019, 7:15 p.m.