R/download_function.r

#' Download function
#'
#' This function allows you to download water level, precipitation and flow data for a selected time period
#' @param stationCodes 7 digit code for flow stations, 8 digit for flow
#' @param timei starting date, defaults to January 1st, 1900 and will pull first available record 
#' @param timef ending date, defaults to most recent available
#' @param tipoDados data type, 1 is for water level, 2 for precipitation and 3 for discharge
#' @param nivelConsistencia 1 is for bruto, 2 for consistido and blank for both 
#' @param dlpath where the final .csv files should be saved to
#' @keywords ANA download waterlevel precipitation flow 
#' @export
#' @examples
#' download_function()

downloadANA <- function(stationCodes,timei="01/01/1900",timef="",tipoDados,nivelConsistencia="",dlpath) {
  for (i in 1:length(stationCodes)) {
    codEstacao = stationCodes[i]
    bodyPOST = paste0("codEstacao=",codEstacao,"&dataInicio=",timei,"&dataFim=",timef,"&tipoDados=",tipoDados,"&nivelConsistencia=",nivelConsistencia)
    b2 <- "http://telemetriaws1.ana.gov.br/ServiceANA.asmx/HidroSerieHistorica"
    xml = POST(b2, body = bodyPOST, content_type("application/x-www-form-urlencoded"))
    xml.doc = content(xml)
    
    ##Extract data from list object
    xml.list <- xml2::as_list(xml.doc) 
    xml.data <- xml.list$DataTable$diffgram$`DocumentElement` #207 elements represent 207 months, from most recent to 2001 about 17 years this makes sense
    
    station.data <- matrix(ncol=length(xml.data),nrow = 31) #we want to make a data frame with 31 rows and length(xml.data columns)
    if (length(is.na(station.data))==31) {
      next
      print("Skipping this iteration")
    } #if no data for this station go onto next iteration
    for (j in 1:length(xml.data)) { #j for each record, rows 1:LOD
      month <- xml.data[[j]] #represents month of which we want to extract the 28-31 flows
      df <- as.data.frame(unlist(month[16:46])) #need to find a way of putting each value from 1 to 31 or NA
      if(length(df)==0) next
      station.data[1:length(df[,1]),j] <- as.numeric(as.character(df[,1])) 
    } 
    
    stationData <- melt(station.data)
    colnames(stationData) <- c("day","month","value")
    
    ## Add dates (get this info from the original list object)
    dates = list()
    for (k in 1:length(xml.data)) {
      date = xml.data[k]$SerieHistorica$DataHora
      date = as.character(date)
      dates[[k]] <- date
    } #now we have our YYYY-MM vectors to append to stationData!
    
    ##Add years, months and combine to final date
    stationData$year <- substr(dates,1,4)
    stationData$month <- substr(dates,6,7)
    stationData$date <- as.Date(with(stationData,paste(month,day,year,sep="/")),"%m/%d/%Y")
    
    ##note: all the NA dates that come out are days that don't exist (i.e. 2/31) Yet many of them have flow values
    stationData <- stationData[-c(which(is.na(stationData$date))),] ##for now assume, this data was extrapolated an remove
    ##sort by date and remove DMY cols
    stationData <- stationData[order(stationData$date),]
    stationData <- stationData[,-which(names(stationData) %in% c("day","month","year"))]
    stationData <- stationData[,c(2,1)]
    
    #write to .csv
    setwd(dlpath)
    outName = paste0("ANA_",codEstacao,".csv")
    write.csv(stationData,outName)
  }
}
sharsiddiqui/readANAData documentation built on May 17, 2019, 12:12 a.m.