#################################
#
# Auto-generate occupancy reports
#
# Written by: M. Fidino
#
################################# start on line 490
library(uwinutils)
library(lubridate)
library(dplyr)
connect2db()
# little function we need for this:
data_source <- function(x){
# We first want to determine if it's cloud data
# this shoud have 1) one file and 2) should start with
# "Seasons Legend:,Start Date,End Date"
# first rule, determine number of files in folder
files <- list.files(
x,
full.names = TRUE
)
nfile <- length(
files
)
if(nfile == 1){
# then check to see if the first line is
firstline <-
readLines(
files,
n = 1
)
if(firstline == "Seasons Legend:,Start Date,End Date"){
return("CLOUD")
}
}
# check to see if this was summarized from the uwin access db
# we can determine if the folder contains "site_locations.csv" and
# "observation_matrix.csv"
has_site_locations <- grep(
"site_locations.csv",
files
)
has_observation_mat <- grep(
"observation_matrix.csv",
files
)
if(
all(
c(
length(has_site_locations) == 1,
length(has_observation_mat) == 1
)
)
){
return("ACCESS")
}
# MAKS has some seasons generated differently
if(
length(grep("*\\.xls", files)) > 0 &
!all(
c(
length(has_site_locations) == 1,
length(has_observation_mat) == 1
)
) &
nfile > 1
){
return("CPW_WAREHOUSE")
}
if(
length(grep("scut|sewa|lbca", x)) == 1 &
all(
c(
length(has_site_locations) == 0,
length(has_observation_mat) == 0
)
) &
nfile > 1
){
return("PRECLEANED")
}
# if neither of these are true then the data are from some other source
return("UNKNOWN")
}
# the folder to save all the data in.
data_dest <- paste0(
"../uwin-dataset/data_", Sys.Date()
)
if(!file.exists(data_dest)){
dir.create(data_dest)
}
# the years of data you want to query
years <- 2011:2023
# The unique season codes I'll need for making
# the folder. We have four distinct months we
# sample per year
unq_se <- paste0(
c("JA", "AP", "JU", "OC"),
rep(years - 2000, each = 4)
)
# Step 1. Get the names of each city that has detection data
qry <- "
SELECT DISTINCT sa.areaAbbr, sa.areaID FROM StudyAreas sa
INNER JOIN Locations cl on cl.areaID = sa.areaID
INNER JOIN Visits vi ON vi.locationID = cl.locationID
INNER JOIN Photos ph ON ph.visitID = vi.visitID
INNER JOIN Detections de ON de.photoName = ph.photoName
WHERE NOT sa.areaAbbr IN('CHMF', 'FAKE', 'CHIL2', 'TEST')"
cities <- SELECT(qry)
cities <- cities[order(cities$areaAbbr),]
# Step 2. Get the species we want to query
# this is a curated list to drop out all the birds and
# the like.
my_species <- read.csv(
"../uwin-dataset/species_to_pull.csv",
header = FALSE
)
species <- SELECT(
paste0(
"SELECT * FROM Species sp\n",
"WHERE sp.commonName IN ", sql_IN(my_species[,1]),";"
)
)
species <- species[order(species$commonName),]
# This starts setting up the sampling windows
seasons <- expand.grid(
years,
c(1,4,7,10), # the months, (JA, AP, JU, OC)
1
) %>% apply(
.,
1,
paste,
collapse = "-"
) %>% ymd %>% sort
# I add a little buffer window around those months
# this is to accomodate slight variation in sampling
# across UWIN partners. This is the potential
# dates we would query for a given primary sampling
# period.
buffers <- data.frame(
lower = seasons - 14,
upper = ceiling_date(seasons, "month") + 13
)
for(city in 1:nrow(cities)){
print(cities$areaAbbr[city])
#if(cities$areaAbbr[city] == 'POCA') next
# Pull all the locations, visits, etc.
vis_loc <- SELECT(
paste0(
"SELECT v.visitID, v.activeStart, v.activeEnd, v.firstPhotoDate, v.lastPhotoDate, cl.longitude, cl.latitude, cl.locationAbbr, cl.defaultTimeZone FROM Visits v\n",
"INNER JOIN Locations cl ON cl.locationID = v.locationID\n",
"AND cl.areaID = ", cities$areaID[city]
)
)
# drop any without photo data (we record when
# we deploy cameras as well, which have no images
# associated to them. Camera check events and
# camera pickup events have photos).
if(any(is.na(vis_loc$firstPhotoDate))){
vis_loc <- vis_loc[!is.na(vis_loc$firstPhotoDate),]
}
# And now convert them to the appropriate timezone.
# we store all our image date/times
# in UTC.
vis_loc$activeStart <- with_tz(
vis_loc$activeStart,
unique(vis_loc$defaultTimeZone)
)
vis_loc$activeEnd <- with_tz(
vis_loc$activeEnd,
unique(vis_loc$defaultTimeZone)
)
vis_loc$firstPhotoDate <- with_tz(
vis_loc$firstPhotoDate,
unique(vis_loc$defaultTimeZone)[1]
)
vis_loc$lastPhotoDate<- with_tz(
vis_loc$lastPhotoDate,
unique(vis_loc$defaultTimeZone)[1]
)
# Pull in all of the detection data
response <- try(
SELECT(
paste0(
"SELECT DISTINCT s.commonName, v.locationID, v.visitID, c.locationAbbr, p.photoDatetime, d.valStatID FROM Photos p \n",
"INNER JOIN Visits v ON p.visitID=v.visitID AND p.photoDateTime >= v.activeStart AND p.photoDateTime <= v.activeEnd\n",
"INNER JOIN Locations c ON (v.locationID=c.locationID)\n",
"INNER JOIN Detections d ON (p.photoName=d.photoName)",
"INNER JOIN DetectionSpecies ds ON (d.detectionID=ds.detectionID)\n",
"INNER JOIN Species s ON (ds.speciesID=s.speciesID)\n",
"WHERE c.areaID = ", cities$areaID[city],"\n",
#"AND s.speciesID IN ", sql_IN(species$speciesID, FALSE),
"\n AND d.valStatID IN (1,2)"
)
),
silent = TRUE
)
# Correct the datetime data
response$photoDatetime <- lubridate::with_tz(
response$photoDatetime,
unique(vis_loc$defaultTimeZone)[1]
)
response$photoDatetime <- lubridate::ymd_hms(
response$photoDatetime,
tz = unique(vis_loc$defaultTimeZone)[1]
)
# get the species from this city
city_species <- unique(
response$commonName[
response$commonName %in% species$commonName
]
) %>%
sort
# also get the utms
# This list will store the detection matrix for
# each primary sampling period
det_list <- vector("list", length = nrow(buffers))
for(season in 1:nrow(buffers)){
# Pull data from a given season, queried
# from the visits data.frame
tmp <- response[
response$photoDatetime >= lubridate::ymd_hms(
paste(
as.character(buffers$lower[season]), "00:00:00",
tz = unique(vis_loc$defaultTimeZone)[1]
)
),]
tmp <- tmp[
tmp$photoDatetime <= ymd_hms(
paste(
as.character(buffers$upper[season]), "23:59:59",
tz = unique(vis_loc$defaultTimeZone)[1]
)
),
]
# If there is no data, then there is no data!
if(nrow(tmp) == 0){
det_list[[season]] <- NA
next
}
# Get min and max date for each visitID
samps <- tmp %>% group_by(visitID) %>%
summarise(
min = as.Date(min(photoDatetime)),
max = as.Date(max(photoDatetime))
)
# reduce tmp down to the species we are interested in
tmp <- tmp[tmp$commonName %in% species$commonName,]
# make the detection array. To do so we need to determine
# the number of sites and days sampled.
sites <- sort(
unique(
vis_loc$locationAbbr
)
)
# While we are here we may as well grab the coordinates
site_utms <- suppressWarnings(dplyr::left_join(
data.frame(locationAbbr = sites),
vis_loc[,c("locationAbbr", "longitude", "latitude")],
by = "locationAbbr"
)) %>%
distinct
nsite <- length(
unique(
sites
)
)
days <- seq(
buffers$lower[season],
buffers$upper[season],
by = "1 day"
)
nday <- length(
days
)
# The detection matrix
det_mat <- matrix(
NA,
ncol = nday,
nrow = nsite
)
# Filling it in with zeroes
for(i in 1:nrow(samps)){
# map visitID to site
which_row <- which(
sites %in%
vis_loc$locationAbbr[vis_loc$visitID == samps$visitID[i]]
)
# get the appropriate dat
which_col <- which(
days %in% seq(samps$min[i], samps$max[i], by = "1 day")
)
# add some zeroes in there
det_mat[which_row, which_col] <- 0
}
colnames(det_mat) <- paste0("Day_", 1:nday)
# Make the data.frame to store all the species detection data
det_df <- data.frame(
Species = rep(city_species, each = nrow(det_mat)),
Season = 1,
Site = rep(site_utms$locationAbbr, length(city_species)),
Longitude = rep(site_utms$longitude, length(city_species)),
Latitude = rep(site_utms$latitude, length(city_species))
)
to_add <- ncol(det_df)
# Combine the detection matrix
det_df <- cbind(
det_df,
det_mat[rep(1:nrow(det_mat), length(city_species)),]
)
# And now we go trough and add 1's for when a species was
# detected.
for(i in 1:nrow(tmp)){
# Which site and species is the record?
which_row <- which(
det_df$Species == tmp$commonName[i] &
det_df$Site == tmp$locationAbbr[i]
)
if(length(which_row)>1){
stop()
}
# And what day was it seen?
which_col <- which(
days %in% as.Date(tmp$photoDatetime[i])
) + to_add
if(length(which_col)>1){
stop()
}
# With that info we can put a 1 in this matrix
det_df[which_row, which_col] <- 1
}
det_list[[season]] <- det_df
}
# check to see where we have seasons of data that we just pulled
# from the data.base.
# null dimensions mean there is no data. the opposite means there is.
has_data <- unq_se[sapply(det_list, function(x) !is.null(dim(x)))]
if(length(has_data) == 0){
next
}
# order it like it should be
has_data <- factor(
unq_se[unq_se %in% has_data],
levels = unq_se[unq_se %in% has_data]
) %>%
as.character
# The tabulated season folders all start with a leading zero
season_paths <- list.files(
paste0(
"../uwin-dataset/data_2023-04-26/",
tolower(cities$areaAbbr[city])
),
"0[0-9]?[1-9]_",
full.names = TRUE
)
# and get just the names in case we need to rename them
season_folders <- list.files(
paste0(
"../uwin-dataset/data/",
tolower(cities$areaAbbr[city])
),
"0[0-9]?[1-9]_"
)
# Drop the leading numbers
if(length(season_folders)>0){
just_seasons <- sapply(
strsplit(
season_folders,
"_"
),
"[",
2
)
# determine data source for each season
came_from <- sapply(
season_paths,
data_source
)
names(came_from) <- just_seasons
# Now check to see where the overlap is at.
has_overlap <- has_data[which(has_data %in% just_seasons)]
# check if any of them are not cloud data, and make
# the det_list NA if that is the case.
if(length(has_overlap)>0){
to_drop <- came_from[
names(came_from) %in% has_overlap &
came_from != "CLOUD"]
if(length(to_drop)>0){
det_list[which(unq_se %in% names(to_drop))] <- NA
has_data <- has_data[-which(has_data %in% names(to_drop))]
}
}
} else {
# make just_seasons NA in the event that there
# is no data
just_seasons <- NA
came_from <- rep(NA, length(has_data))
names(came_from) <- has_data
}
# this specifies the number of seasons of data we have
# for a given city
folders_to_make <- unique(c(just_seasons, has_data))
# order them appropriately
folders_to_make <- folders_to_make %>%
factor(., levels = unq_se) %>%
sort %>%
as.character
# make this a data.frame now.
folders_to_make <- data.frame(
season = folders_to_make,
folder = paste0(
stringr::str_pad(
1:length(folders_to_make),
3,
pad = "0"
),
"_",
folders_to_make
),
path = paste0(
"../uwin-dataset/data/",
tolower(cities$areaAbbr[city]),"/"
),
stringsAsFactors = FALSE
)
# tack on the source of the data.
folders_to_make <- suppressWarnings(
dplyr::left_join(
folders_to_make,
data.frame(
season = names(came_from),
source = unname(came_from),
stringsAsFactors = FALSE
),
by = "season"
)
)
# # make any NA be CLOUD
folders_to_make$source[is.na(folders_to_make$source)] <- "CLOUD"
# and check to see if the number of the folder needs to change
# basically we could have a new 1
if(all(!is.na(just_seasons))){
folders_to_make <- suppressWarnings(
dplyr::left_join(
folders_to_make,
data.frame(
season = just_seasons,
og_folder = season_folders,
stringsAsFactors = FALSE
),
by = "season"
)
)
}
# start by moving everything but the cloud data over.
if(
!file.exists(
paste0(data_dest,"/", tolower(cities$areaAbbr[city]))
)
){
dir.create(
paste0(data_dest,"/", tolower(cities$areaAbbr[city]))
)
}
# start making the folders now!
for(i in 1:nrow(folders_to_make)){
# need to move things differently if the og folders
# differ from the new folders
flag <- folders_to_make$folder[i] != folders_to_make$og_folder[i] &
!is.na(folders_to_make$og_folder[i])
if(
file.exists(
paste0(
folders_to_make$path[i],
ifelse(
flag,
folders_to_make$og_folder[i],
folders_to_make$folder[i]
)
)
) &
folders_to_make$source[i] != "CLOUD"
){
dir.create(
paste0(
data_dest,
"/",tolower(cities$areaAbbr[city]),"/",
folders_to_make$folder[i]
)
)
if(flag){
# We need to list all the files in that folder
to_ship <-
list.files(
paste0(
folders_to_make$path[i],
folders_to_make$og_folder[i]
),
full.names = TRUE
)
file.copy(
to_ship,
paste0(
data_dest,
"/",tolower(cities$areaAbbr[city]),
"/",folders_to_make$folder[i]
)
)
} else {
file.copy(
paste0(
folders_to_make$path[i],
folders_to_make$folder[i]
)
,
paste0(
data_dest,
"/",tolower(cities$areaAbbr[city])
),
recursive = TRUE
)
}
}
}
# Deal with cloud data. If we have the data in det_list
# it needs to be formatted and saved in the correct way. If we
# don't (for some reason) we just copy over the old data.
for(i in 1:nrow(folders_to_make)){
if(folders_to_make$source[i] != "CLOUD"){
next
}
# locate where in det_list the season of data lives.
my_season <- which(unq_se == folders_to_make$season[i])
# modify the dates for the sampling window.
tmp <- format(
c(
buffers$lower[my_season],
buffers$upper[my_season]
),
format = "%m/%d/%y"
)
# drop leading zero from months
tmp <- gsub("^0", "", tmp)
tmp <- gsub("/0", "/", tmp)
# make the first two rows of the file
first_two <- matrix(
c("Seasons Legend:", "Start Date", "End Date",
"Season 1", tmp),
ncol = 3, nrow = 2, byrow = TRUE
)
fpath <-paste0(
data_dest,
"/",tolower(cities$areaAbbr[city]),"/",
folders_to_make$folder[i]
)
fname <- paste0(
"/",
tolower(cities$areaAbbr[city]),
"_",
tolower(folders_to_make$season[i]),
".csv"
)
if(!file.exists(fpath)){
dir.create(fpath)
}
# write the first three rows of data.
write.table(
first_two,
paste0(fpath,fname),
quote = FALSE,
row.names = FALSE,
col.names = FALSE,
sep = ","
)
# add a blank line
write.table(
"\n",
paste0(fpath, fname),
quote = FALSE,
row.names = FALSE,
col.names = FALSE,
append = TRUE,
eol = ""
)
# and then the detection data. Supress warnings
# because it justs tells us it's appending column names
# to the file, which is what we want.
suppressWarnings(
write.table(
det_list[[my_season]],
paste0(fpath, fname),
quote = FALSE,
row.names = FALSE,
col.names = TRUE,
append = TRUE,
sep = ","
)
)
}
}
# check to see if there are any cities with data that were not
# brought over.
# get folder names in uwin-dataset
fnames_uwds <- list.files(
"../uwin-dataset/data_2023-02-13/",
"^\\w\\w\\w\\w$"
)
# and the folder names in new dataset
fnames_new <- list.files(
data_dest,
"^\\w\\w\\w\\w$"
)
to_add <- fnames_uwds[which(!fnames_uwds %in% fnames_new)]
if(length(to_add) > 0){
for(i in 1:length(to_add)){
dir.create(
paste0(data_dest,"/", to_add[i])
)
# check how many folders we need to recreate
to_make <- list.files(
paste0("../uwin-dataset/data/",to_add[i]),
"^0"
)
for(j in 1:length(to_make)){
dir.create(
paste0(data_dest,"/",to_add[i],"/",to_make[j])
)
file.copy(
paste0("../uwin-dataset/data/",to_add[i],"/",to_make[j]),
paste0(data_dest,"/",to_add[i]),
recursive = TRUE
)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.