library(ooir)
library(tidync)
library(tidyverse)
library(ncdf4)
# Set up parameters -------------------------------------------------------
# which field do you want to download?
field <- "ctd" # "do"
csv_write <- TRUE # write the csv?
csv_file <- "~/Desktop/text.csv" # where to write the csv file?
stephr = 1 # time step in hours for extracting data
# insert your api login details (use yours not mine)
api_user <- 'OOIAPI-GBZLL30PK7OU67'
api_token <- 'TEMP-TOKEN-NVSQ4OQM8VH7H7'
# list sites and subset to just Global Irminger (GI) and the surface and two flanking moorings
sites <- unique(stringr::str_subset(sensors_all$site,"GI02HYPM"))
df_sites <- filter(sensors_all, site %in% sites)
sensor_type <- "CTD"
depth <- c(200,4000)
param_names <- c("time",
"ctdpf_ckl_seawater_temperature",
"practical_salinity",
"ctdpf_ckl_seawater_pressure")
df <- filter(df_sites,str_detect(name,sensor_type) & min_depth > depth[1] & max_depth < depth[2])
params <- get_sensor_meta(df$site[1], df$node[1], df$sensor[1], api_user, api_token)
inst_id <- unique(as.numeric(stringr::str_extract(filter(params, particleKey %in% param_names)$pdId, "[0-9].*")))
# CHECK IF: all inst_ids exist in all files. Can we need to make inst_ids for each line?
# # NOTE!!! field dependent change --------------------------------------------------
#
# # things change here depending on which field you want to extract
# if (field == "chla") {
# # filter df_sites for just the fluorometer sensors - name found in df_sites$name
# df <- filter(df_sites,str_detect(name,"^3-Wavelength Fluorometer$"))
# # get all the parameters exported in a sensor stream by just looking at the first one
# params <- get_sensor_meta(df$site[1], df$node[1], df$sensor[1], api_user, api_token)
# # parameters you want to download - see params$pdId column in params (7 = time, 22 = chla)
# inst_id = c(7,22)
# }
#
# if (field == "do") {
# df <- filter(df_sites,str_detect(name,"^Dissolved Oxygen$"))e
# params <- get_sensor_meta(df$site[1], df$node[1], df$sensor[1], api_user, api_token)
# inst_id = c(7,14)
# }
#
# if (field == "nit") {
# df <- filter(df_sites,str_detect(name,"^Nitrate$"))
# params <- get_sensor_meta(df$site[1], df$node[1], df$sensor[1], api_user, api_token)
# inst_id = c(7,315)
# }
#
# if (field == "ctd") {
# df <- filter(df_sites,str_detect(name,"^CTD$|^CTD \\(30 meters\\)$"))
# params <- get_sensor_meta(df$site[2], df$node[2], df$sensor[2], api_user, api_token)
# inst_id = c(7,13,193)
# inst_id = c(7,13,2927)
# }
# continueing on ----------------------------------------------------------
# set the method (recovered_host? telemetered?)
method <- "recovered_wfp"
df <- mutate(df, method = method)
# Get the streams at each site, node, and sensor to make the request to the API
# Loop through each row of df to find the streams for each and put these into a new data frame
dfout <- get_streams_all(df,api_user, api_token)
dfout <- dfout[1,]
# Set up a parameter enquirely (this will only request certain parameters)
query <- paste0("parameters=",paste0(inst_id,collapse = ","))
query <- paste(query,
'beginDT=2018-06-01T00:00:00.000Z',
'endDT=2019-09-01T00:00:00.000Z',
sep = "&")
# loop through the rows of dfout and make a request for each site, node, etc combo
# save the request details to list (data_request)
data_request <- vector(mode = "list", length = nrow(dfout))
for (i in 1:nrow(dfout)) {
df2 <- filter(dfout, 1:nrow(dfout) %in% i)
data_request[[i]] <- request_data(df2$site,df2$node,df2$sensor,method,df2$stream, api_user, api_token, query = query)
}
#####
# PAUSE - the server needs time to process your request and create your data
# Give it 5 mins.
# Have a cup of tea.
#####
# Retrieve the data -------------------------------------------------------
#
# Loop through list of requests and extract the data for each request
for (i in 1:length(data_request)) {
# If there's an error with a request, skip it
if("message" %in% names(data_request[[i]])) {
message("skipping: ", i)
next
}
# get the filenames of all the files generated during the request
filenames <- get_filenames(data_request[[i]]) # get all filenames
# remove some of the filenames that have been causing some trouble (not sure why right now)
if (field != "ctd") {
filenames <- filenames[!str_detect(filenames, "METBK|CTD")]
}
# loop through filenames, read the data and combine
for (file in filenames) {
# get the number of data points in the file
# This is a real hack - involves reading all the html from a page and then extacting 1 instance of the number of obs
file_html <- RCurl::getURL(paste0(file,".html"))
n <- as.numeric(stringr::str_extract(file_html,"(?<=time\\[obs \\= )[0-9]*"))-1
# find the length of the timestep and hence the number of data points to skip to get ~1hr resolution
# # this involves downloading a very small data chunk and searching the global attributes for the time step
param_add <- "obs[1]"
file2 <- file2 <- paste(file,param_add,sep="?")
f <- nc_open(file2)
timecov <- ncatt_get(f, varid = 0)$time_coverage_resolution
dt <- as.numeric(stringr::str_extract(timecov,"(?<=[A-Z]).*(?=[A-Z])"))
# step <- ceiling(stephr * 3600 / dt)
step <- 100
# create file request that only selects param_names from 0->n in steps of "step"
param_add <- paste(paste0(param_names,"[0:", step, ":", n, "]"), collapse = ",")
file2 <- paste(file,param_add,sep="?")
# read data and combine
f <- tidync(file2) %>%
hyper_tibble()
f <- mutate(f,
site = dfout$site[i],
node = dfout$node[i],
sensor = dfout$sensor[i],
method = method,
stream = dfout$stream[i],
file = file)
if (i == 1 & file == filenames[1]) {
df <- f
} else {
df <- bind_rows(df,f)
}
}
}
# convert time column to date time format (from seconds since 1900-01-01)
df <- mutate(df, time = as.POSIXct(time, origin = "1900-01-01"))
if(csv_write) {
write_csv(df, csv_file)
}
filter(df, 1:nrow(df) %in% seq(1,nrow(df),100)) %>%
ggplot(aes(time, ctdpf_ckl_seawater_pressure, color = ctdpf_ckl_seawater_temperature)) +
geom_point()
filter(df, 1:nrow(df) %in% seq(1,nrow(df),100)) %>%
ggplot(aes(time, ctdpf_ckl_seawater_pressure)) +
geom_point()
ii <- c(1, which(diff(df$time) > 100))
ctd <- vector(list,length(ii)-1)
for (i in 1:(length(ii)-1)) {
vec <- ii[i]:ii[i+1]
ctd_int <- oce::as.ctd(df$practical_salinity[vec],
df$ctdpf_ckl_seawater_temperature[vec],
df$ctdpf_ckl_seawater_pressure[vec],
time = df$time[vec],
station = i,
longitude = i,
latitude = 0,
startTime = df$time[ii[i]])
ctd[[i]] <- oce::ctdDecimate(ctd_int, p = 5)
ctd_add <- tibble::tibble(dep = oce::swDepth(ctd[[i]]@data$pressure),
pres = ctd[[i]]@data$pressure,
temp = ctd[[i]]@data$temperature,
theta = oce::swTheta(ctd[[i]]@data$salinity,
ctd[[i]]@data$temperature,
ctd[[i]]@data$pressure),
sigtheta = oce::swSigma0(ctd[[i]]@data$salinity,
ctd[[i]]@data$temperature,
ctd[[i]]@data$pressure),
sal = ctd[[i]]@data$salinity,
profile = i,
time = df$time[ii[i]])
if (i == 1){
ctdtib <- ctd_add
} else {
ctdtib <- dplyr::bind_rows(ctdtib,ctd_add)
}
}
unprof <- unique(ctdtib$profile)
filter(ctdtib, profile %in% seq(1,max(unprof),3)) %>%
ggplot(aes(time, dep, color = sigtheta)) + geom_point()
write_csv(ctdtib,"~/Desktop/test.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.