hydro.db = function( ip=NULL, p=NULL, DS=NULL, yr=NULL, additional.data=c("groundfish", "snowcrab", "USSurvey_NEFSC", "lobster"), ...) {
# manipulate temperature databases from osd, groundfish and snow crab and grid them
# OSD data source is
# http://www.meds-sdmm.dfo-mpo.gc.ca/zmp/climate/climate_e.htm
# http://www.mar.dfo-mpo.gc.ca/science/ocean/database/data_query.html
## must download manually to this directory and run gzip
## use choijae/Jc#00390
## depths: 500,500, "complete profile" .. raw data for the SS
# (USER Defined -- region: jc.ss")
# no time records, just day/mon/year .. assume utc
basedir = file.path( project.datadirectory("bio.temperature"), "data" )
loc.archive = file.path( basedir, "archive", "profiles")
loc.basedata = file.path( basedir, "basedata", "rawdata" )
dir.create( loc.basedata, recursive=T, showWarnings=F )
# OSD data series variables of interest
if ( DS == "osd.rawdata" ) {
# simple loading of annual data files
out = NULL
for ( y in yr ) {
print (y)
fn = file.path( loc.basedata, paste( "osd.rawdata", y, "rdata", sep=".") )
if (file.exists ( fn ) ) {
load(fn)
out = rbind( out, X )
}
}
return ( out )
}
if ( DS=="osd.rawdata.allfiles.redo" ) {
fn.all = list.files( path=loc.archive, pattern="osd.clim.*.gz", full.names=T)
X = NULL
varlist = c("DEPTH","PRESSURE","CRUISE_DATE","LATITUDE" ,"LONGITUDE" ,"TEMPERATURE","SALINITY" ,"SIGMAT" )
varstosave = c( "depth", "pressure", "latitude" ,"longitude" ,"temperature" ,"salinity" ,"sigmat", "date" )
for (fn in fn.all) {
f = read.csv( gzfile(fn), header=T, as.is=T, sep=",", na.strings="9999")
f = f[,varlist]
fyears = as.numeric( matrix( unlist( strsplit( f$CRUISE_DATE, "/" ) ), ncol=3, byrow=T) [,3] )
years = sort( unique( fyears ))
for (yrs in years) {
fn.out = file.path( loc.basedata, paste( "osd.rawdata", yrs, "rdata", sep=".") )
print( paste(yrs, ":", fn.out) )
X = f[ which( fyears == yrs) ,]
names(X) = tolower( names(X) )
X$date = lubridate::dmy( X$cruise_date ) # default is UTC ... need to confirm-- no time records .. assume utc
X = X[ , varstosave ]
save( X, file=fn.out, compress=T)
}
}
}
if (DS=="osd.rawdata.singleyear.redo" ) {
varlist = c("DEPTH","PRESSURE","CRUISE_DATE","LATITUDE" ,"LONGITUDE" ,"TEMPERATURE","SALINITY" ,"SIGMAT" )
varstosave = c( "depth", "pressure", "latitude" ,"longitude" ,"temperature" ,"salinity" ,"sigmat", "date" )
for ( y in yr) {
X = NULL
fn.all = list.files( path=loc.archive, pattern="osd.clim.*.gz", full.names=T)
fn = fn.all[ grep (as.character(y), fn.all) ]
f = read.csv( gzfile(fn), header=T, as.is=T, sep=",", na.strings="9999")
X = f[,varlist]
fn.out = file.path( loc.basedata, paste( "osd.rawdata", y, "rdata", sep=".") )
names(X) = tolower( names(X) )
X$date = lubridate::ymd( X$cruise_date, tz= ) # default is UTC ... need to confirm
X= X[, varstosave ]
save( X, file=fn.out, compress=T)
}
}
# ----------------------
if (DS=="osd.initial" ) {
## this is a data dump directly from Roger Pettipas for 2008 to 2015
varstosave = c( "depth", "pressure", "latitude" ,"longitude" ,"temperature" ,"salinity" ,"sigmat", "date" )
fndata = file.path( loc.archive, "Data_2008-2015.csv.xz" )
XX = read.csv( file=xzfile(fndata), header=FALSE, skip=2 , stringsAsFactors=FALSE, na.strings="9999" )
header = c("MissionID", "Latitude", "Longitude", "Year", "Month", "Day", "Hour", "Minute", "Pressure", "Temperature", "Salinity", "SigmaT" ,"StationID" )
names(XX) = tolower( header )
XX$depth = decibar2depth ( P=XX$pressure, lat=XX$latitude )
if (!exists( "sigmat", XX)) XX$sigmat = XX$sigma.t # naming is variable
XX$date_string = paste( XX$year, XX$month, XX$day, sep="-" )
XX$date = lubridate::ymd( XX$date_string ) # default is UTC ... need to confirm
yrs = sort( unique( XX$year) )
for ( y in yrs ) {
print (y)
fn.out = file.path( loc.basedata, paste( "osd.rawdata", y, "rdata", sep=".") )
ii = which ( XX$year == y )
if (length(ii) > 1) {
X= XX[ ii, varstosave ]
save( X, file=fn.out, compress=T)
}
}
}
# ----------------------
if (DS=="osd.current" ) {
## this is a data dump directly from Roger Pettipas for 2015 and on
varstosave = c( "depth", "pressure", "latitude" ,"longitude" ,"temperature" ,"salinity" ,"sigmat", "date" )
for ( y in yr ) {
print (y)
fndata = file.path( loc.archive, paste( "Data_", y, ".csv.xz", sep="" ) )
fn.out = file.path( loc.basedata, paste( "osd.rawdata", y, "rdata", sep=".") )
X = read.csv( file=xzfile(fndata), skip=2, stringsAsFactors=FALSE, na.strings="9999" )
# insert Header :
header = c("MissionID", "Latitude", "Longitude", "Year", "Month", "Day", "Hour", "Minute", "Pressure", "Temperature", "Salinity", "SigmaT" ,"StationID" )
names(X) = tolower( header )
X$depth = decibar2depth ( P=X$pressure, lat=X$latitude )
if (!exists( "sigmat", X)) X$sigmat = X$sigma.t # naming is variable
X$date_string = paste( X$year, X$month, X$day, sep="-" )
X$date = lubridate::ymd( X$date_string ) # default is UTC ... need to confirm
X= X[, varstosave ]
save( X, file=fn.out, compress=T)
}
}
# ----------------------
if (DS=="USSurvey_NEFSC") {
# data dump supplied by Adam Cook .. assumed to tbe bottom temperatures from their surveys in Gulf of Maine area?
fn = file.path( project.datadirectory("bio.temperature"), "archive", "NEFSCTemps_formatted.rdata" )
if (!is.null(yr)) {
if (file.exists(fn)) {
load(fn)
i = which( lubridate::year( ne$timestamp) %in% yr )
out = NULL
if (length(i) > 0) out = ne[i,]
return(out)
}
}
# else assume a re-assimilation of data
ne = NULL
fn_input = file.path( project.datadirectory("bio.temperature"), "archive", "NEFSCTemps.rdata" )
if (file.exists(fn_input)) load(fn_input)
ne$id = paste(ne$plon, ne$plat, lubridate::date( ne$timestamp), sep="~" )
ne$salinity = NA
ne$oxyml = NA
ne$sigmat = NA
ne$date = ne$timestamp
ne$yr = lubridate::year( ne$timestamp )
ne$dyear = lubridate::decimal_date( ne$timestamp ) - ne$yr
ne = planar2lonlat( ne, proj.type=p$internal.projection ) # convert lon lat to coord system of p0
save( ne, file=fn, compress=TRUE )
return (fn)
}
# ----------------------
if (DS %in% c("lobster","lobster.redo")) {
if (DS == "lobster.redo"){
require(RODBC)
con = odbcConnect(oracle.server , uid=oracle.lobster.username, pwd=oracle.lobster.password, believeNRows=F) # believeNRows=F required for oracle db's
fsrs = sqlQuery(con, "select * from fsrs_lobster.FSRS_LOBSTER_VW")
odbcClose(con)
fsrs$SYEAR = fsrs$HAUL_YEAR # add season-year identifier
fsrs$HAUL_DATE = as.Date(fsrs$HAUL_DATE)
fsrs$SYEAR[fsrs$LFA%in%c("33","34")] = as.numeric(substr(fsrs$S_LABEL[fsrs$LFA%in%c("33","34")],6,9)) # add season-year identifier
fsrsT = subset(fsrs,TEMP>-90) #remove no temp data
fsrsT$Dloc = paste(fsrsT$HAUL_DATE,fsrsT$LATITUDE,fsrsT$LONGITUDE)
fsrsT = subset(fsrsT,!duplicated(Dloc)) #remove duplicate date-locations
save( fsrsT, file=file.path( project.datadirectory("bio.temperature"), "archive", "FSRStempdata_formatted.rdata" ), compress=T)
}
# data dump supplied by Brad Hubley (2017) of nearshore lobster trap temperatures (sourced originally from FSRS) and converted into daily means
fn = file.path( project.datadirectory("bio.temperature"), "archive", "FSRStempdata_formatted.rdata" )
if (!is.null(yr)) {
if (file.exists(fn)) {
load(fn)
i = which( lubridate::year( lob$timestamp) %in% yr )
out = NULL
if (length(i) > 0) out = lob[i,]
return(out)
}
}
lob = NULL
fn = file.path( project.datadirectory("bio.temperature"), "archive", "FSRStempdata.rdata" )
if (file.exists(fn)) load(fn)
lob = fsrsT
names(lob) = tolower( names(lob))
lon = trunc( lob$longitude / 100)
lob$longitude = lob$lon_dd
lob$latitude = lob$lat_dd
lob$timestamp = lob$haul_date
lob$id = paste(lob$plon, lob$plat, lubridate::date( lob$timestamp), sep="~" )
lob$salinity = NA
lob$oxyml = NA
lob$sigmat = NA
lob$date = lob$timestamp
lob$yr = lubridate::year( lob$timestamp )
lob$dyear = lubridate::decimal_date( lob$timestamp ) - lob$yr
lob = planar2lonlat( lob, proj.type=p$internal.projection ) # convert lon lat to coord system of p0
save( log, file=fn, compress=TRUE )
return (fn)
}
# ----------------‘x == (x %% y) + y * ( x %/% y )
if ( DS %in% c("ODF_ARCHIVE", "ODF_ARCHIVE.redo") ) {
# PTRAN/CHOIJ
loc = file.path( project.datadirectory("bio.temperature"), "data" )
DataDumpFromWindows = F
if ( DataDumpFromWindows ) {
loc = file.path("C:", "datadump")
}
dir.create( path=loc, recursive=T, showWarnings=F )
fn.root = file.path( loc, "ODF_ARCHIVE" )
dir.create( fn.root, recursive = TRUE, showWarnings = FALSE )
out = NULL
if ( is.null(DS) | DS=="ODF_ARCHIVE" ) {
fl = list.files( path=fn.root, pattern="*.rdata", full.names=T )
for ( fny in fl ) {
load (fny)
out = rbind( out, odfdat )
}
return (out)
}
con = ROracle::dbConnect( DBI::dbDriver("Oracle"), username=oracle.personal.user, password=oracle.personal.password, dbname="PTRAN" )
cruises <- ROracle::dbGetQuery(con, "select * from ODF_ARCHIVE.ODF_CRUISE_EVENT" )
for ( y in yr ) {
fny = file.path( fn.root, paste( y, "rdata", sep="."))
odfdat = ROracle::dbGetQuery( con, paste(
" select * " ,
" from ODF_ARCHIVE.ODF_CRUISE_EVENT i, ODF_ARCHIVE.ODF_DATA j " ,
" where i.CRUISE_EVENT_ID(+)=j.DATA_VAL_ID ",
" and EXTRACT(YEAR from start_date_time) =", y, ";"
) )
names(odfdat) = tolower( names(odfdat) )
print(fny)
save(odfdat, file=fny, compress=T)
gc() # garbage collection
print(y)
}
ROracle::dbDisconnect(connect)
return (fn.root)
}
# ----------------
if (DS %in% c( "profiles.annual.redo", "profiles.annual" ) ) {
# read in annual depth profiles then extract bottom temperatures
basedir = project.datadirectory("bio.temperature", "data" )
loc.profile = file.path( basedir, "basedata", "profiles" )
dir.create( loc.profile, recursive=T, showWarnings=F )
if (DS=="profiles.annual") {
fn = file.path( loc.profile, paste("depthprofiles", yr, "rdata", sep="."))
Y = NULL
if (file.exists( fn) ) load (fn )
return(Y)
}
####### "ip" is the first parameter expected when run in parallel mode .. do not move this one
if ( is.null(ip)) {
if( exists( "nruns", p ) ) {
ip = 1:p$nruns
} else {
if ( !is.null(yr)) {
# if only selected years being re-run
ip = 1:length(yr)
p$runs = data.frame(yrs = yr)
} else {
ip = 1:length(p$tyears)
p$runs = data.frame(yrs = p$tyears)
}
}
}
if (exists( "libs", p)) RLibrary( p$libs )
# bring in snow crab, groundfish and OSD data ...
set = bio.snowcrab::snowcrab.db( DS="setInitial" )
mlu = bio.snowcrab::minilog.db( DS="set.minilog.lookuptable" )
slu = bio.snowcrab::seabird.db( DS="set.seabird.lookuptable" )
set = merge( set, mlu, by= c("trip", "set"), all.x=TRUE, all.y=FALSE )
set = merge( set, slu, by= c("trip", "set"), all.x=TRUE, all.y=FALSE )
set$longitude =set$lon
set$latitude = set$lat
set$oxyml = NA
set$salinity = NA
set$sigmat = NA
set = set[ ,c("minilog_uid", "seabird_uid", "longitude", "latitude", "oxyml", "salinity", "sigmat" ) ]
grdfish = bio.groundfish::groundfish.db( "gshyd.georef" )
Ydummy = bio.temperature::hydro.db( DS="osd.rawdata", yr=2000, p=p ) [1,] # dummy entry using year=2000
Ydummy$yr = NA
Ydummy$dyear = 0.5
Ydummy$id = "dummy"
Ydummy$depth = -1
Ydummy$oxyml = NA
dyears = (c(1:(p$nw+1))-1) / p$nw # intervals of decimal years... fractional year breaks
for (iy in ip) {
yt = p$runs[iy, "yrs"]
Y = bio.temperature::hydro.db( DS="osd.rawdata", yr=yt, p=p )
if ( is.null(Y) ) {
Y = Ydummy
Y$yr = yt
} else {
Y$yr = yt
Y$dyear = lubridate::decimal_date( Y$date ) - Y$yr
Yid = cut( Y$dyear, breaks=dyears, include.lowest=T, ordered_result=TRUE )
Y$id = paste( round(Y$longitude,2), round(Y$latitude,2), Yid , sep="~" )
Y$depth = bio.utilities::decibar2depth ( P=Y$pressure, lat=Y$latitude )
Y$oxyml = NA
# next should not be necessary .. but just in case the osd data types get altered
Y$temperature = as.numeric(Y$temperature )
Y$salinity= as.numeric(Y$salinity)
Y$sigmat = as.numeric(Y$sigmat)
}
Y$pressure = NULL
if ("groundfish" %in% additional.data ) {
gfkeep = c( "id", "sdepth", "temp", "sal", "oxyml", "lon", "lat", "yr", "timestamp")
gf = grdfish[ which( grdfish$yr == yt ) , gfkeep ]
if (nrow(gf) > 0) {
gf$sigmat = NA
gf$date = gf$timestamp
# gf$date = as.POSIXct(gf$date, origin=lubridate::origin)
gf$dyear = lubridate::decimal_date( gf$date ) - gf$yr
names(gf) = c( "id", "depth", "temperature", "salinity", "oxyml", "longitude", "latitude", "yr", "date", "dyear", "sigmat" )
Y = rbind( Y, gf[, names(Y)] )
}
}
if ("snowcrab" %in% additional.data ) {
minilog = bio.snowcrab::minilog.db( DS="basedata", Y=yt )
if (! is.null( nrow( minilog ) ) ) {
minilog = merge( minilog, set, by="minilog_uid", all.x=TRUE, all.y=FALSE )
minilog$id = minilog$minilog_uid
minilog$date = minilog$timestamp
# minilog$date = as.POSIXct(minilog$chron, origin=lubridate::origin)
minilog$yr = yt
minilog$dyear = lubridate::decimal_date( minilog$date ) - minilog$yr
Y = rbind( Y, minilog[, names(Y) ] )
}
seabird = bio.snowcrab::seabird.db( DS="basedata", Y=yt )
if ( !is.null( nrow( seabird ) ) ) {
seabird = merge( seabird, set, by="seabird_uid", all.x=TRUE, all.y=FALSE )
seabird$id = seabird$seabird_uid
seabird$yr = yt
seabird$date = seabird$timestamp
# seabird$date = as.POSIXct(seabird$chron, origin=lubridate::origin)
seabird$dyear = lubridate::decimal_date( seabird$date ) - seabird$yr
seabird$oxyml = NA
Y = rbind( Y, seabird[, names(Y) ] )
}
}
oo = which( Y$id == "dummy" )
if (length(oo) > 0 ) Y = Y[ -oo, ]
if ( is.null( nrow(Y) ) ) next()
if ( nrow(Y) < 5 ) next()
if ( is.null(Y) ) next()
iiY = which(duplicated(Y))
if (length(iiY)>0) Y = Y [ -iiY, ]
bad = which( Y$temperature < -5 | Y$temperature > 30 )
if (length(bad)>0) Y=Y[-bad,]
fn = file.path( loc.profile, paste("depthprofiles", yt, "rdata", sep="."))
print( fn )
save( Y, file=fn, compress=T )
}
return ("Completed")
}
# ----------------
if (DS %in% c( "bottom.annual", "bottom.annual.redo", "bottom.all" ) ) {
# extract bottom temperatures
basedir = project.datadirectory("bio.temperature", "data" )
loc.bottom = file.path( basedir, "basedata", "bottom" )
dir.create( loc.bottom, recursive=T, showWarnings=F )
fbAll = file.path( loc.bottom, "bottom.all.rdata" )
if (DS=="bottom.all") {
O = NULL
if (file.exists(fbAll) ) load (fbAll)
return(O)
}
if (DS=="bottom.annual") {
fn = file.path( loc.bottom, paste("bottom", yr, "rdata", sep="."))
Z = NULL
if (file.exists(fn) ) load (fn )
return(Z)
}
if ( is.null(ip)) {
if( exists( "nruns", p ) ) {
ip = 1:p$nruns
} else {
if ( !is.null(yr)) {
# if only selected years being re-run
ip = 1:length(yr)
p$runs = data.frame(yrs = yr)
} else {
ip = 1:length(p$tyears)
p$runs = data.frame(yrs = p$tyears)
}
}
}
if (exists( "libs", p)) RLibrary( p$libs )
dyears = (c(1:(p$nw+1))-1) / p$nw # intervals of decimal years... fractional year breaks
for (iy in ip) {
yt = p$runs[iy, "yrs"]
Y = bio.temperature::hydro.db( DS="profiles.annual", yr=yt, p=p )
if (is.null(Y)) next()
igood = which( Y$temperature >= -3 & Y$temperature <= 25 ) ## 25 is a bit high but in case some shallow data
Y = Y[igood, ]
# Bottom temps
Yid = cut( Y$dyear, breaks=dyears, include.lowest=T, ordered_result=TRUE )
Y$id = paste( round(Y$longitude,2), round(Y$latitude,2), Yid, sep="~" )
ids = sort( unique( Y$id ) )
Z = copy.data.structure( Y)
for (i in ids ) {
W = Y[ which( Y$id == i ), ]
jj = which( is.finite( W$depth ) )
if ( length(jj) < 3 ) next()
Wmax = max( W$depth, na.rm=T ) - 10 # accept any depth within 10 m of the maximum depth
kk = which( W$depth >= Wmax )
R = W[ which.max( W$depth ) , ]
R$temperature = median( W$temperature[kk] , na.rm=T )
R$salinity = median( W$salinity[kk] , na.rm=T )
R$sigmat = median( W$sigmat[kk] , na.rm=T )
R$oxyml = median( W$oxyml[kk] , na.rm=T )
Z = rbind( Z, R )
}
Z = rename.df( Z, "longitude", "lon")
Z = rename.df( Z, "latitude", "lat")
Z = rename.df( Z, "temperature", "t")
Z = rename.df( Z, "depth", "z")
tne = hydro.db( p=p, DS="USSurvey_NEFSC", yr=yt )
if ( !is.null(tne) ) Z = rbind( Z, tne[,names(Z)] )
lob = hydro.db( p=p, DS="lobster", yr=yt )
if ( !is.null(lob) ) Z = rbind( Z, lob[,names(Z)] )
Z$date = as.Date( Z$date ) # strip out time of day information
Z$ddate = lubridate::decimal_date( Z$date )
Z$dyear = Z$ddate - Z$yr
Z = lonlat2planar( Z, proj.type=p$internal.projection )
igood = which( Z$t >= -3 & Z$t <= 25 ) ## 25 is a bit high but in case some shallow data
Z = Z[igood, ]
igood = which( Z$lon >= p$corners$lon[1] & Z$lon <= p$corners$lon[2]
& Z$lat >= p$corners$lat[1] & Z$lat <= p$corners$lat[2] )
Z = Z[igood, ]
Z = Z[ which( is.finite( Z$lon + Z$lat + Z$plon + Z$plat ) ) , ]
## ensure that inside each grid/time point
## that there is only one point estimate .. taking medians
vars = c("z", "t", "salinity", "sigmat", "oxyml")
Z$st = paste( Z$ddate, Z$plon, Z$plat )
o = which( ( duplicated( Z$st )) )
if (length(o)>0) {
dupids = unique( Z$st[o] )
for ( dd in dupids ) {
e = which( Z$st == dd )
keep = e[1]
drop = e[-1]
for (v in vars) Z[keep, v] = median( Z[e,v], na.rm=TRUE )
Z$st[drop] = NA # flag for deletion
}
Z = Z[ -which( is.na( Z$st)) ,]
}
Z$st = NULL
fn = file.path( loc.bottom, paste("bottom", yt, "rdata", sep="."))
print (fn)
save( Z, file=fn, compress=T)
}
O = NULL
for ( yr in p$tyears ) {
o = hydro.db( p=p, DS="bottom.annual", yr=yr )
if (!is.null(o)) O = rbind(O, o)
}
save(O, file=fbAll, compress=TRUE)
return ("Completed")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.