################ Utils ################
seqvec <- Vectorize(seq.default, vectorize.args = c("from", "to"))
smooth_vts <- function (data, interval = 127) {
out = tibble::tibble(time = data$time, volts = stats::runmed(data$volts, interval))
return(out)
}
pd_helper <- function (data) {
waveform = wave_group = NULL
rm(list = c("waveform", "wave_group"))
out <- data %>%
dplyr::mutate(waveform = dplyr::if_else(waveform %in% c("pd", "pd1", "pd2"),
"pd", "non")) %>%
dplyr::mutate(wave_group = rep(1:length(rle(waveform)[[1]]),
rle(waveform)[[1]])) %>%
dplyr::group_by(wave_group)
# returns grouped tibble with pd and non, grouped into wave_groups
return(out)
}
################### A ##############
# run on 24 hr data as a whole
a_label <- function (data, a_o = c(0.75, 0.5, 1, 1.25), a_drop = 0.75) {
out <- a_times(data, a_o = a_o, a_drop = a_drop)
return(out)
}
# takes raw data
a_times <- function (data, a_o = c(0.75, 0.5, 1, 1.25), a_drop = 0.75) {
udat <- a_ao(data, a_o = a_o, a_drop = a_drop)
times = seq(from = udat$time[1], to = udat$time[2], by = 0.01)
out = data %>%
dplyr::mutate(a = dplyr::if_else(round(time, 2) %in% round(times, 2), "A", NA_character_))
return(out)
}
# takes raw data
a_ao <- function (data, a_o = c(0.75, 0.5, 1, 1.25), a_drop = 0.75) {
# check for feeding activity at very beginning
# choose first quantile (lowest value) for beginning, or 0 if feeding immediate
begin = dplyr::if_else(data$volts[1] > -0.1, quantile(data$volts[1:1000])[[1]], 0)
# begin = ifelse(data$volts[1] > -0.1, quantile(data$volts[1:1000])[[1]], 0)
alpha <- data %>%
# because before feeding, dc is only voltage
dplyr::filter(volts < begin - a_drop*sd(volts, na.rm = TRUE)) %>%
dplyr::slice_head() %>%
dplyr::select(time, volts)
omega <- data %>%
dplyr::mutate(sdvolts = sd(volts, na.rm = TRUE)) %>%
# select time after a start
dplyr::filter(time >= alpha$time) %>%
# cut windows to get smoothed state of voltage
mutate(win = cut(time, breaks = floor(length(time)/25000), labels = FALSE)) %>%
group_by(win) %>%
dplyr::summarise(upspike = max(volts) - median(volts), delta = max(volts) - min(volts),
time = max(time), volts = utils::tail(volts), sdvolts = sdvolts[1]) %>%
dplyr::filter(upspike < a_o[1]*sdvolts[1] & upspike > a_o[2]*sdvolts[1] &
delta > a_o[3]*sdvolts[1], delta < a_o[4]*sdvolts[1]) %>%
dplyr::ungroup() %>%
dplyr::slice_head() %>%
dplyr::select(time, volts)
out <- rbind(alpha, omega)
return(out)
}
########### G ################
# works for one G in 24 hr period
g_label <- function (data, g_drop = 0.75) {
out <- g_times(data, g_drop)
return(out)
}
g_times <- function (data, g_drop = 0.75) {
udat <- g_ao(data, g_drop)
if (is.null(udat)) { # if no g, ao will return null
out = data %>%
dplyr::mutate(g = NA_character_)
} else {
times = seq(from = udat$time[1], to = udat$time[2], by = 0.01)
out = data %>%
dplyr::mutate(g = dplyr::if_else(round(time, 2) %in% round(times, 2), "G", NA_character_))
# mutate(g = case_when(
# round(time, 2) %in% round(times, 2) ~ "g"
# ))
}
return(out)
}
g_ao <- function (data, g_drop = 0.75) {
aend <- data %>%
dplyr::filter(waveform == "A") %>%
dplyr::slice_tail()
# check for feeding activity at very beginning
begin = if_else(data$volts[1] > -0.1, mean(data$volts[1:1000]), 0)
# begin = ifelse(data$volts[1] > -0.1, mean(data$volts[1:1000]), 0)
rough <- data %>%
# cut half hour windows to get smoothed state of voltage
dplyr::mutate(win = cut(time, breaks = floor(length(time)/180000),
labels = FALSE)) %>%
dplyr::group_by(win) %>%
# for each half hour period, find average volts
dplyr::mutate(winavg = mean(volts, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
# filter times before A end
dplyr::filter(time > aend$time) %>%
# average volts must be greater than _*sd below beginning
dplyr::filter(winavg > begin - g_drop*sd(volts, na.rm = TRUE))
if (is.na(rough$time[1])) { # if no G, return NULL
out = NULL
} else {
rough <- rough %>%
# because of averaging, will miss beginning and end frames
dplyr::summarise(time = seq(from = min(time) - 0.5*60*60,
to = max(time) + 0.5*60*60, by = 0.01))
alpha = data %>%
# calculate alpha target before filtering down data
dplyr::mutate(atar = begin - g_drop*sd(volts, na.rm = T)) %>%
# keep first hour
dplyr::filter(time >= min(rough$time) & time < (min(rough$time) +
1*60*60)) %>%
# filter down to only passing points
dplyr::filter(volts > atar) %>%
# calculate time difference to check for outliers
dplyr::mutate(timediff = c(0, diff(time)))
if (any(alpha$timediff > 0.5)) {
alpha <- alpha %>%
# filter down to time jump points only
dplyr::filter(timediff > 0.5) %>%
# select last time jump
dplyr::slice_tail() %>%
# leave commented for now, not sure if theoretically sound
# add time jump from 2 lines above
# mutate(time = time + timediff) %>%
select(time, volts)
} else {
alpha <- alpha %>%
dplyr::slice_head() %>%
dplyr::select(time, volts)
}
omega = data %>%
# calculate omega target before filtering down data
mutate(otar = begin - g_drop*sd(volts, na.rm = T)) %>%
# keep last hour
dplyr::filter(time <= max(rough$time) & time > (max(rough$time) -
1*60*60)) %>%
# filter down to only passing points
dplyr::filter(volts > otar) %>%
# check if more than 1 sec from previous measurement
mutate(timediff = c(0, diff(time, 1)),
# if large gap, then false
timecheck = dplyr::if_else(timediff < 0.5, TRUE, FALSE)) %>%
# select first up to first fail (length of first run)
dplyr::slice(1:rle(timecheck)[[1]][1]) %>%
# select last time that passes
tail(1) %>%
dplyr::select(time, volts)
out = rbind(alpha, omega)
}
return(out)
}
######################## E ################
# run on 24 hr data as a whole
e_label <- function (data, e_var = 0.1, e_o = c(2, 1.25), e_low = 1.25) {
udat = data %>%
e_times(., e_var = e_var, e_o = e_o, e_low = e_low) %>%
select(time, volts, we)
# find the subforms
out <- udat %>%
left_join(e_sub(udat), by = c("time", "volts", "we"))
########## replaced, keep for now
# # bandaid filter
# # if no deep basin of e then don't bother evaluating
# if (quantile(data$volts)[1] > -1.5) {
# out = data %>%
# mutate(subform = NA_character_)
# # otherwise, continue
# } else {
# udat = data %>%
# # add waveform e label
# e_times(., e_var = e_var, e_o = e_o, e_low = e_low) %>%
# select(time, volts, we)
# }
#
# if (subform) {
# # find the subforms
# out <- udat %>%
# left_join(e_sub(udat), by = c("time", "volts", "we"))
# # if not subform, arbitrary E1
# } else {
# out = udat %>%
# mutate(subform = ifelse(we == "e", "E1", NA_character_))
# }
########## End old code
return(out)
}
# takes raw data
e_ao <- function (data, e_o = c(2, 1.25)) {
out <- data %>%
dplyr::mutate(we = dplyr::case_when(
# will be positive bc voltage drops - bigger minus smaller
# very large lag because gradual voltage drop
volts - lead(volts, 75000L) > e_o[1]*sd(volts, na.rm = TRUE) ~
"start",
# smaller lag because steeper voltage rise
# og 5000
volts - lag(volts, 50000L) > e_o[2]*sd(volts, na.rm = TRUE) ~
"end"
)) %>%
dplyr::filter(!is.na(we)) %>%
# add grouping index:
dplyr::mutate(idx = rep(1:length(rle(we)[[1]]), rle(we)[[1]])) %>%
dplyr::group_by(idx) %>%
dplyr::mutate(index = dplyr::case_when(
# og max
we == "start" ~ which.max(time),
we == "end" ~ which.min(time)
)) %>%
dplyr::slice(index[1]) %>%
dplyr::ungroup()
return(out)
}
e_check <- function (data) {
# if not enough rows, arbitrary false
if (nrow(data) < 5) {
out = 10
} else {
# receive each "waveform" data
middle <- data %>%
# cut each into 5 windows
mutate(grp = cut(time, breaks = 5)) %>%
group_by(grp) %>%
summarise(sdvolts = sd(volts)) %>%
# select only one section, avoid beginning irregularities
slice(4)
out <- middle$sdvolts
}
# returns sd in the middle cut
return(out)
}
elow_check <- function (data) {
# if not enough rows, arbitrary false
if (nrow(data) < 3) {
test = 0
} else {
# receive each waveform data
middle <- data %>%
# cut each into 3 windows (beginning, bottom, end)
mutate(grp = cut(time, breaks = 3)) %>%
group_by(grp) %>%
summarise(low = mean(volts)) %>%
# select only middle, should be u bottom
slice(2)
test <- middle$low
}
return(test)
}
# takes raw data
e_times <- function (data, e_var = 0.1, e_o = c(2, 1.25), e_low = 1.25) {
ao = e_ao(data, e_o = e_o)
# if incorrectly picks starting end
if(slice_head(ao)$we == "end") {
# drop first
ao = ao[2:(length(ao$we)), ]
}
# if found start with no end, add end bc e is very long
if(slice_tail(ao)$we == "start") {
ao = ao %>%
# time and volts from last row of data
summarise(time = c(time, max(data$time)), # +25 if later -lag
volts = c(volts, slice_tail(data)$volts),
we = c(we, "end"))
}
# remove start/end that ids an e with incorrect duration
ao <- ao %>%
mutate(time = case_when(
# duration less than 25 secs (impossible with lag)
# filter to longer than 3 minutes (for now)
we == "start" & lead(time) - time < 3*60 ~ NA_real_,
we == "end" & time - lag(time) < 3*60 ~ NA_real_,
TRUE ~ time
)) %>%
filter(!is.na(time))
times <- unlist(seqvec(from = ao$time[ao$we == "start"],
# could subtract 25 to "remove" the effects of lag (see e_ao)
to = ao$time[ao$we == "end"], by = 0.01))
out <- data %>%
mutate(we = if_else(round(time, 2) %in% round(times, 2), "e", "non-e"),
# case_when(round(time, 2) %in% round(times, 2) ~ "e", TRUE ~ "non-e"),
sdvolts = sd(volts),
low_tar = mean(volts, na.rm = TRUE) - e_low*sdvolts[1]
) %>%
# group by each individual waveform
mutate(idx = rep(1:length(rle(we)[[1]]), rle(we)[[1]])) %>%
group_by(idx) %>%
# set targets
mutate(e_tar = e_check(data.frame(time, volts)),
low_test = elow_check(data.frame(time, volts)),
low_pf = low_test < low_tar,
# only pass if low sd
e_pf = e_tar < e_var*sdvolts[1],
we = case_when(
low_pf == FALSE ~ "non-e",
e_pf == FALSE ~ "non-e",
TRUE ~ we )) %>%
ungroup()
return(out)
}
# receives full e labeled data
e_mid <- function (data) {
if (!any(data$we == "e")) { # if no e, midpoint is NA
out = data %>% mutate(subform = NA)
} else {
out <- data %>%
# add grouping index:
mutate(idx = rep(1:length(rle(we)[[1]]), rle(we)[[1]])) %>%
# select only each e instance
filter(we == "e") %>%
group_by(idx) %>%
# cut _ second windows to capture spikes
mutate(win = cut(time, breaks = floor(length(time)/500),
labels = FALSE)) %>%
group_by(win) %>%
# bc E2 shows higher amplitude compared to E1
# those large negative voltage spikes
mutate(test = length(volts[volts > mean(volts)])/length(volts)) %>%
# set target for percent above quantile
mutate(target = quantile(test)[4],
subform = if_else(test > target, "E2", "E1")) %>%
# case_when(
# # E2 has larger percent above because only spikes below
# test > target ~ "E2",
# # anything else labeled E1
# TRUE ~ "E1"
# )) %>%
ungroup()
}
if (!is.na(out$subform[1])) { # only if e actually exists check for only E1
# if quantile is only sampling on E1 waveform, the mean test will be low
if (mean(out$test, na.rm = TRUE) <= 0.5) {
# since no E2, return no midpoint
out = out %>% mutate(subform = "none")
} else if (!any(out$subform == "E2")) { # if no identified E2
out = out %>% mutate(subform = "none")
} else { # has E, has E2
out <- out %>%
group_by(idx) %>%
filter(subform == "E2") %>%
slice(which.min(time)) %>%
ungroup()
}
}
# returns time, volts, of midpoint
return(out)
}
# receive full e labeled
e_sub <- function (data) {
mid = e_mid(data)
if (is.na(mid$subform[1])) { # no E, midpoint is NA, subforms NA as well
out <- data %>%
mutate(subform = NA_character_)
} else if (mid$subform[1] == "none") { # only E1, midpoint is 'none'
out <- data %>%
# select only waveform e times
filter(we == "e") %>%
mutate(subform = "E1")
} else { # has e and has midpoint
out <- data %>%
# add grouping index:
mutate(idx = rep(1:length(rle(we)[[1]]), rle(we)[[1]])) %>%
# select only each e instance, group
filter(we == "e") %>%
group_by(idx) %>%
mutate(midpoint = if_else(round(time, 2) %in% round(mid$time, 2), "mid", "filler"),
# case_when(
# round(time, 2) %in% round(mid$time, 2) ~ "mid",
# TRUE ~ "filler"), # put in mid times
subform = case_when(
# could adjust times here if warranted
time <= time[midpoint == "mid"] ~ "E1",
time > time[midpoint == "mid"] ~ "E2"
)) %>% # since already grouped, time ranges per e
ungroup()
}
return(out)
}
################# pd ##################
pd_label <- function (data, ...) {
# if no pd exists in the data, return NA for pdsubform/pd_group
if (is.null(pd_times(data, ...))) {
out = data %>%
mutate(pdsubform = NA_character_)
# otherwise, proceed
} else {
# find the whole pd before separation into subforms
whole <- pd_times(data, ...) %>%
select(time, volts, pd)
# find the pd subforms within each pd
out <- whole %>%
left_join(pd_sub(whole), by = c("time", "volts", "pd"))
}
return(out)
}
# receive data without a, e, and g
pd_ao <- function (data, pd_win = 1) {
out <- data %>%
mutate(win = ggplot2::cut_number(rows, n = pd_win, labels = FALSE)) %>%
group_by(win) %>%
mutate(relsd = sd(volts)) %>%
# return to original ungrouped format, now with relative sd
ungroup() %>%
mutate(drop = volts - lead(volts, 1000L),
rise = volts - dplyr::lag(volts, 700L),
pd = case_when(
drop > 2.75*relsd ~ "start",
# rise must start from trough
rise > 2*relsd & lag(volts, 700) <
(mean(volts) -1.5*sd(volts)) ~ "end"
)) %>%
dplyr::filter(!is.na(pd)) %>%
# add grouping index to group starts and ends
mutate(idx = rep(1:length(rle(pd)[[1]]), rle(pd)[[1]])) %>%
group_by(idx) %>%
mutate(index = dplyr::if_else(pd[1] == "start", which.max(drop), which.max(rise))
) %>%
# select best fit starts and ends
slice(index[1]) %>%
ungroup() %>%
select(time, pd)
return(out)
}
# helper to define u shape
u_check <- function (data) {
# receive each "pd" data
middle <- data %>%
# cut each "pd" into 3 windows (beginning, bottom, end)
mutate(grp = cut(time, breaks = 3)) %>%
group_by(grp) %>%
summarise(middle = min(volts)) %>%
# select only middle, should be u bottom
slice(2)
# test should be larger for real pd
test = mean(data$volts[1], data$volts[length(data$volts)], na.rm = TRUE) -
middle$middle
return(test)
}
# helper to check for voltage drop
low_check <- function (data) {
# if not pd, could have non-consec times; arbitrary false
if (data$pd[1] == "non-pd") {
out = 0
} else {
# receive each "pd" data
middle <- data %>%
# cut each "pd" into 3 windows (beginning, bottom, end)
mutate(grp = cut(time, breaks = 3)) %>%
group_by(grp) %>%
summarise(low = mean(volts)) %>%
# select only middle, should be u bottom
slice(3)
# slice(low_sl)
out <- middle$low
}
# returns mean in the middle cut
return(out)
}
# default one window, otherwise indicate # desired
# e.g. if half hour windows, give 2x the data length (hours)
# ... allows to select low_check: low_sl value
pd_times <- function (data, pd_low = 1.25, pd_win = 1) {
# get starts and ends
udat <- pd_ao(data, pd_win = pd_win) %>%
ungroup()
# if incorrectly picks up ending start
if(slice_tail(udat)$pd == "start") {
# drop last by indexing to penultimate
udat = udat[1:(length(udat$pd) - 1), ]
}
# if incorrectly picks starting end
if(slice_head(udat)$pd == "end") {
# drop first
udat = udat[2:(length(udat$pd)), ]
}
# remove start/end that ids a pd with incorrect duration
udat <- udat %>%
mutate(time = case_when(
# duration less than 20secs
pd == "start" & lead(time) - time <= 20 ~ NA_real_,
pd == "end" & time - lag(time) <= 20 ~ NA_real_,
# duration greater than 1.5 minutes (avg duration around 45 secs)
pd == "start" & lead(time) - time >= 5*60 ~ NA_real_,
pd == "end" & time - lag(time) >= 5*60 ~ NA_real_,
TRUE ~ time
)) %>%
filter(!is.na(time))
times <- unlist(seqvec(from = udat$time[udat$pd == "start"],
to = udat$time[udat$pd == "end"],
by = 0.01))
out <- data %>%
mutate(pd = if_else(round(time, 2) %in% round(times, 2), "pd", "non-pd")) %>%
# case_when(
# # times between id'ed starts and ends labeled pd
# round(time, 2) %in% round(times, 2) ~ "pd",
# TRUE ~ "non-pd"
# )) %>%
# below is to remove non u shaped:
#
# group by each individual pd/non-pd
mutate(idx = rep(1:length(rle(pd)[[1]]), rle(pd)[[1]])) %>%
group_by(idx) %>%
# currently test returns test value, can do tf later
# to eliminate redudancy
mutate(test = u_check(data.frame(time, volts))) %>%
ungroup() %>%
# need get relative sd again to determine
# cut windows to get relative sd
mutate(win = cut_number(rows, n = pd_win, labels = FALSE)) %>%
# mutate(win = cut(rows, breaks = ceiling(length(rows)/180000),
# labels = FALSE)) %>%
# mutate(win = 1) %>%
group_by(win) %>%
mutate(relsd = sd(volts),
lowtar = mean(volts, na.rm = T) - pd_low*relsd) %>%
ungroup() %>%
# group by each individual pd/non-pd, for relsd breakpoint
# in middle of pd
mutate(idx = rep(1:length(rle(pd)[[1]]), rle(pd)[[1]])) %>%
group_by(idx) %>%
mutate(u_thresh = 3*mean(relsd, na.rm = T),
low_thresh = low_check(data.frame(time, volts, pd)),
u_pf = test > u_thresh,
# min needs to be lower than low target
low_pf = low_thresh < lowtar,
pd = case_when(
# if u_pf fails, non-pd
u_pf == FALSE ~ "non-pd",
# if low_pf fails, non-pd
low_pf == FALSE ~ "non-pd",
TRUE ~ pd )
) %>%
ungroup()
# return data frame with pd label
return(out)
}
# will receive zebras output
pd_mid <- function (data) {
out <- data %>%
# cut _ second windows to get relative sd
mutate(win = cut(time, breaks = ceiling(length(time)/180000),
labels = FALSE)) %>%
group_by(win) %>%
# find sd for each _ sec window
mutate(relsd = sd(volts)) %>%
# ungroup to return to original
ungroup() %>%
# index is each pd or non pd period
mutate(pd_group = rep(1:length(rle(pd)[[1]]), rle(pd)[[1]])) %>%
filter(pd == "pd") %>%
# group to each individual pd
group_by(pd_group) %>%
# rough pick of drops that could be the correct one
mutate(drop = lag(volts, 300L) - volts,
midpoint = case_when(
# want sig drop that's not from large beginnning
drop > 0.25*relsd &
drop < 2*relsd ~ "mid"
)) %>%
# filter out points b4 halfway point (pd1 longer)
# and filter after 75% way through
filter(time >= quantile(time)[3] &
time <= quantile(time)[4]) %>%
# filter down to possible midpoints
filter(!is.na(midpoint)) %>%
# choose largest drop
slice(which.max(drop)) %>%
# to drop pd_group
ungroup() %>%
select(time, volts, midpoint)
return(out)
}
# must receive "whole" data
pd_sub <- function (data) {
out <- data %>%
# whales is output of pd_mid
left_join(pd_mid(data), by = c("time", "volts")) %>%
# group each pd or non pd period
mutate(pd_group = rep(1:length(rle(pd)[[1]]), rle(pd)[[1]])) %>%
filter(pd == "pd") %>%
group_by(pd_group) %>%
# filter out "pds" without midpoint
filter(any(midpoint == "mid")) %>%
mutate(midpoint = dplyr::if_else(is.na(midpoint), "filler", midpoint)) %>%
# case_when(
# is.na(midpoint) ~ "filler",
# TRUE ~ midpoint
# )) %>%
mutate(pdsubform = case_when(
# when midpoint does not exist, list as NA, not pd
# probably unnecesary see above
# is.na(midpoint == "mid") ~ NA_character_,
time <= time[midpoint == "mid"] ~ "pd1",
# may need to re-add one second lag
time > time[midpoint == "mid"] ~ "pd2"
))
return(out)
}
##################### Wave ##################
# shiny specific function
wave_label_eg <- function (data, e_var = 0.1, e_o = c(2, 1.25), e_low = 1.25,
g_drop = 0.75) {
edat <- data %>%
# a use smoothed data to label, but use raw data for actual kept volts
mutate(e = e_label(data, e_var = e_var, e_o = e_o, e_low = e_low)$subform,
waveform = case_when(
!is.na(waveform) ~ waveform,
!is.na(e) ~ e,
TRUE ~ NA_character_))
out <- edat %>%
mutate(g = g_label(., g_drop)$g,
waveform = case_when(
!is.na(waveform) ~ waveform,
!is.na(g) ~ g,
TRUE ~ NA_character_))
return(out)
}
# potentially same as stand-alone, unsure
wave_label_pdc <- function (data, ...) {
addg <- data
# find end for removing pdformat below
aend <- addg %>%
filter(waveform == "A") %>%
slice_tail()
udat <- addg %>%
# filter out e, g, and a
filter(is.na(waveform)) %>%
# filter early times before feeding, a, and 60s after a
filter(time >= aend$time + 1*60) %>%
# add rows column to cut windows on
mutate(rows = 1:n()) %>%
select(-waveform)
# find pd times
pds <- pd_label(udat, ...) %>%
filter(pd == "pd") %>%
mutate(time = round(time, 2))
if (any(!is.na(addg$g))) {
gend <- addg %>%
filter(waveform == "G") %>%
slice_tail()
gtimes <- seq(gend$time + 0.01, gend$time + 500, by = 0.01)
} else {gtimes = NA}
# add in pds
withpd <- addg %>%
# wopd has all times, pds only has pd times
left_join(pds, by = c("time", "volts")) %>%
mutate(waveform = case_when(
!is.na(waveform) ~ waveform,
!is.na(pdsubform) ~ pdsubform,
TRUE ~ NA_character_
)) %>%
mutate(waveform = ifelse(
# if close to G, wipe out
round(time, 2) %in% round(gtimes, 2), NA, waveform
)) %>%
select(time, volts, waveform)
# add waveform C, fix A
out <- withpd %>%
mutate(waveform = case_when(
# first set no activity + A as pre-labeled
time <= aend$time ~ waveform,
# any remaining intervening time becomes C
is.na(waveform) ~ "C",
# keep all other labels
TRUE ~ waveform
))
return(out)
}
################# Probe ###################
# works for n48, s236, s196
probe_a <- function (data) {
# check for feeding activity at very beginning
begin = ifelse(data$volts[1] > -0.1, mean(data$volts[1:1000]), 0)
data <- epgminer:::smooth_vts(data, 127)
bandpass <- data %>%
# cut windows to get smoothed state of voltage
mutate(win = cut(time, breaks = floor(length(time)/5000),
labels = FALSE)) %>%
group_by(win) %>%
# for each period, find average volts
mutate(winpeak = max(volts, na.rm = TRUE),
winval = min(volts, na.rm = TRUE)) %>%
ungroup() %>%
# mutate(high = winpeak > begin - probe_drop*sd(volts, na.rm = T)) %>%
# average volts must be greater than _*sd below beginning
# og is 0.75sd
mutate(hightar = (max(volts, na.rm = T) + min(volts, na.rm = T))/3,
highpass = winpeak > (max(volts, na.rm = T) + min(volts, na.rm = T))/3,
lowtar = (max(volts, na.rm = T) + min(volts, na.rm = T))/2,
lowpass = winval < lowtar)
possible <- bandpass %>%
mutate(bandpass = ifelse(highpass & lowpass, TRUE, FALSE)) %>%
# i think need to rle from high/low passes together
# start here
mutate(idx = rep(1:length(rle(bandpass)[[1]]), rle(bandpass)[[1]])) %>%
group_by(idx) %>%
mutate(exit = ifelse(highpass == TRUE & lowpass == TRUE &
# seems the length filter isn't working properly?
length(idx) > 20000, TRUE, FALSE),
tmp = length(idx))
exit <- possible[possible$exit, ]
starts <- exit %>%
select(time, volts, idx) %>%
group_by(idx) %>%
slice(c(1, n())) %>%
mutate(ao = c("start", "end")) %>%
ungroup() %>%
mutate(bridge = case_when(
ao == "start" & (time - lag(time) < 100) ~ TRUE,
TRUE ~ FALSE
)) %>%
filter(!bridge & ao == "start")
return(starts)
}
# works
probe_o <- function (data) {
starts <- probe_a(data)
data <- epgminer:::smooth_vts(data, 127)
ends <- list()
for (i in 1:nrow(starts)) {
search <- data %>%
filter(time >= starts$time[i]) %>%
mutate(hightar = (max(volts, na.rm = T) + min(volts, na.rm = T))/4,
highpass = volts > hightar)
gaps <- search %>%
mutate(idx = rep(1:length(rle(highpass)[[1]]), rle(highpass)[[1]])) %>%
group_by(idx) %>%
mutate(size = length(idx)) %>%
filter(size > 10000) %>%
ungroup() %>%
slice(which(highpass)[1]:n())
ends[[i]] <- gaps[which(!gaps$highpass), ] %>%
slice(1)
}
out <- ends %>%
Reduce(function(dtf1,dtf2) rbind(dtf1, dtf2), .)
return(out)
}
# if there is non-probing activity in the middle
# returns each section as list entry
probe_split <- function (data) {
a <- c(probe_a(data)$time, max(data$time))
o <- probe_o(data)$time
splits <- list()
splits[[1]] <- data %>%
filter(time <= a[1])
for (i in 1:length(o)) {
splits[[i + 1]] <- data %>%
filter(time >= o[i] & time < a[i+1])
}
# remove empty list entries
idx <- c()
for (i in 1:length(splits)) {
idx[i] <- dplyr::if_else(dim(splits[[i]])[1] == 0, FALSE, TRUE)
}
return(splits[idx])
}
# probe_split_old <- function (data) {
# a <- c(probe_a(data)$time, max(data$time))
# o <- probe_o(data)$time
#
# splits <- list()
# splits[[1]] <- data %>%
# filter(time <= a[1])
# for (i in 1:length(o)) {
# splits[[i + 1]] <- data %>%
# filter(time >= o[i] & time < a[i+1])
# }
#
# return(splits)
# }
# new dev, untested
probe_apply <- function (data) {
# take in probe_split data
split <- probe_split(data)
as <- list()
for (i in 1:length(split)) {
as[[i]] <- a_ao(split[[i]], a_o = c(0.75, 0.5, 1, 1.25), a_drop = 0.75)
}
out <- rbindlist(as)
return(out)
}
# want to take data, remove probe and a sections
# receive a labelled data - a_data_probe, shiny only
probe_comb <- function (data, e_var = 0.1, g_drop = 0.75) {
feed <- rbindlist(probe_split(data))
# using a_labeled data (has all time points)
adat <- data %>%
# keep only splits (feeding times)
filter(round(time, 2) %in% round(feed$time, 2))
# will need different wave_label functions
out <- wave_label_probe(adat, e_var = e_var, g_drop = g_drop)
return(out)
}
# take in a_labeled data
wave_label_probe <- function (data, e_var = 0.1, e_o = c(2, 1.25), e_low = 1.25,
g_drop = 0.75, ...) {
edat <- data %>%
mutate(e = e_label(data, e_var = e_var, e_o = e_o, e_low = e_low)$subform,
waveform = case_when(
!is.na(waveform) ~ waveform,
!is.na(e) ~ e,
TRUE ~ NA_character_))
aeg <- edat %>%
# technically will only find g is after last bit of a
# right now think no g if nonprobe middle, to be fixed later perhaps
mutate(g = g_label(., g_drop)$g,
waveform = case_when(
!is.na(waveform) ~ waveform,
!is.na(g) ~ g,
TRUE ~ NA_character_))
# find feeding beginning
a_a <- aeg %>%
filter(waveform == "A") %>%
slice_head()
pdform <- aeg %>%
# filter out aeg and non-feed before first a
filter(is.na(waveform) & time >= a_a$time) %>%
filter(time >= a_a$time) %>%
# add rows column to cut windows on
mutate(rows = 1:n()) %>%
select(time, volts, rows)
# find pd times
pds <- pd_label(pdform, ...) %>%
filter(pd == "pd") %>%
mutate(time = round(time, 2))
if (any(!is.na(aeg$g))) {
gend <- aeg %>%
filter(waveform == "G") %>%
slice_tail()
gtimes <- seq(gend$time + 0.01, gend$time + 500, by = 0.01)
} else {gtimes = NA}
# add in pds
withpd <- aeg %>%
left_join(pds, by = c("time", "volts")) %>%
mutate(waveform = case_when(
!is.na(waveform) ~ waveform,
!is.na(pdsubform) ~ pdsubform,
TRUE ~ NA_character_
)) %>%
mutate(waveform = ifelse(
# if close to G, wipe out
round(time, 2) %in% round(gtimes, 2), NA, waveform
)) %>%
select(time, volts, waveform)
# add waveform C, fix A
out <- withpd %>%
mutate(waveform = case_when(
# first set no activity as pre-labeled
time <= a_a$time ~ waveform,
# any remaining intervening time becomes C
is.na(waveform) ~ "C",
# keep all other labels
TRUE ~ waveform
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.