## FUNCTION: Return complete summary with one row per participant
#' @export
full_summary <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2017-04-11T23:59:59Z') {
basic <- basic_summary(pt, filterStart = filterStart, filterEnd = filterEnd)
arrivals <- arrival_table(pt, filterStart = filterStart, filterEnd = filterEnd)
arrivals <- subset(arrivals, select = -c(pt))
# Difference from manual arrival and trigger
trigger_diff <- arrival_diff_avg(pt, filterStart = filterStart, filterEnd = filterEnd)
condensed <- arrival_condense(pt, diff = trigger_diff)
condensed <- subset(condensed, select = -c(pt))
# cbind all dataframes
ret <- cbind(basic, arrivals, condensed)
ret
}
## FUNCTION: Quantify arrivals
## For each at_venue arrival, return if it came from cold, warm, or hot category
#' @export
arrival_summary <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2017-04-11T23:59:59Z') {
for (k in 1:length(pt)) {
log <- read_pilr(data_set = "pilrhealth:mobile:app_log", schema = "1",
query_params = list(participant = pt[k]))
filterStart = filterStart %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
filterEnd = filterEnd %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
ret <- data.frame(pt = character(), venue = character(),
arrival_time = as.Date(character()),
last_cat = character())
if (nrow(log)) {
log$local_time = log$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log = log[log$local_time > filterStart & log$local_time < filterEnd, ]
# Only include pollings with a category
log <- log[!is.na(log$args.category),]
log <- log[log$args.category != "",]
# Loop through all, find each time you enter 'at_venue'
i <- 1
polled_at_venue = FALSE
while (i <= nrow(log)) {
# Find initial instance of at_venue
if (log$args.category[i] == "at_venue" && !is.na(log$args.category[i])) {
# Find time since last reported as hot,warm,cold
j <- i - 1
while ((is.na(log$args.category[j]) || log$args.category[j] == "") && j > 1)
j <- j - 1
prev_cat <- log$args.category[j]
if (j <= 1) prev_cat <- "first"
temp <- data.frame(pt = pt[k], venue = log$args.nearest_venue[i],
arrival_time = log$local_time[i],
last_cat = prev_cat)
ret <- rbind(ret, temp)
# Loop through current set of 'at_venue' polls
while (log$args.category[i] == "at_venue" && i <= nrow(log)
&& !is.na(log$args.category[i])) i <- i + 1
}
i <- i + 1
}
}
else {}
if (k == 1) ret2 <- ret
else ret2 <- rbind(ret2, ret)
}
ret2
}
## FUNCTION: Return percentages of arrival_summary
#' @export
arrival_table <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2017-04-11T23:59:59Z') {
# For each participant entered summarize arrivals
for (i in 1:length(pt)) {
arrivals <- arrival_summary(pt[i], filterStart = filterStart, filterEnd = filterEnd)
totals <- as.data.frame(table(arrivals$last_cat))
from_hot <- as.numeric(totals[totals$Var1 == "hot",][2])
from_warm <- as.numeric(totals[totals$Var1 == "warm",][2])
from_cold <- as.numeric(totals[totals$Var1 == "cold",][2])
if (is.na(from_hot)) from_hot <- 0
if (is.na(from_warm)) from_warm <- 0
if (is.na(from_cold)) from_cold <- 0
temp <- data.frame(pt = pt[i], Hot_Arrivals = from_hot,
Warm_Arrivals = from_warm,
Cold_Arrivals = from_cold)
if (i == 1) ret <- temp
else ret <- rbind(ret, temp)
}
ret
}
## FUNCTION: Basic summary stats
#' @export
basic_summary <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2016-04-11T23:59:59Z') {
for (i in 1:length(pt)) {
# Read in data
log <- read_pilr(data_set = "pilrhealth:mobile:app_log", schema = "1",
query_params = list(participant = pt[i]))
venues <- read_pilr(data_set = "pilrhealth:liitah:personal_venue", schema = "1",
query_params = list(participant = pt[i]))
filterStart = filterStart %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
filterEnd = filterEnd %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
if (nrow(log) == 0) {
temp <- data.frame(pt = paste0(pt[i], " (NO LOGIN)"),
Total_Venues = nrow(venues),
Total_Polls = 0,
Total_Triggers = 0, Polls_at_Venue = 0,
Hot_Polls = 0,
Warm_Polls = 0,
Cold_Polls = 0,
Last_Venue_Added = NA,
Last_Manual_Arrival_Log = NA,
Total_Manual_Arrival_Logs = 0,
Last_Poll = NA,
First_Poll = NA)
}
else {
log$local_time = log$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log = log[log$local_time > filterStart & log$local_time < filterEnd, ]
venues = venues[venues$local_time > filterStart & venues$local_time < filterEnd, ]
#training_recs <- read_pilr(data_set = "pilrhealth:liitah:personal_venue_training_record", schema = "1",
# query_params = list(participant = pt[i]))
filterStart = filterStart %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
filterEnd = filterEnd %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log$local_time = log$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log = log[log$local_time > filterStart & log$local_time < filterEnd, ]
if (nrow(venues) != 0) venues$local_time = venues$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
#training_recs$local_time = training_recs$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
#training_recs = training_recs[training_recs$local_time > filterStart & training_recs$local_time < filterEnd, ]
# ==== Summarize data ====
# Query data for some summary measures
polls = log[log$tag == "POLLING_SERVICE_ANDROID", ]
polls_at_location = polls[polls$args.category == "at_venue", ]
triggers = log[log$tag == 'ARRIVAL_TRIGGER',]
# This line for MANUAL_ARRIVAL
training_recs = log[log$tag == 'MANUAL_ARRIVAL',]
# Table of the summary measures
temp <- data.frame(pt = paste0(pt[i]),
Total_Venues = nrow(venues),
Total_Polls = nrow(polls),
Total_Triggers = nrow(triggers), Polls_at_Venue = nrow(polls_at_location),
Hot_Polls = polls[polls$args.category == "hot", ] %>% nrow(),
Warm_Polls = polls[polls$args.category == "warm", ] %>% nrow(),
Cold_Polls = polls[polls$args.category == "cold", ] %>% nrow(),
Last_Venue_Added = max(venues$local_time) %>% as.character(),
Last_Manual_Arrival_Log = max(training_recs$local_time) %>% as.character(),
Total_Manual_Arrival_Logs = nrow(training_recs),
Last_Poll = max(log$local_time) %>% as.character(),
First_Poll = min(log$local_time) %>% as.character())
}
if (i == 1) ret <- temp
else ret <- rbind(ret, temp)
}
if (!exists("ret")) ret <- temp
ret[ret == -Inf] <- NA
ret
}
## FUNCTION: Comparing locations in venues to training recs
#' @export
venue_diff <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2017-04-11T23:59:59Z') {
# Read in data from PiLR API
venues <- read_pilr(data_set = "pilrhealth:liitah:personal_venue", schema = "1",
query_params = list(participant = pt))
training <- read_pilr(data_set = "pilrhealth:liitah:personal_venue_training_record", schema = "1",
query_params = list(participant = pt))
filterStart = filterStart %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
filterEnd = filterEnd %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log$local_time = log$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
venues = venues[venues$local_time > filterStart & venues$local_time < filterEnd, ]
training = training[training$local_time > filterStart & training$local_time < filterEnd, ]
# Format and merge data frame. Keep track of first location entry.
venues <- rename(venues, c("id" = "venue_id", "trig_info.lon" = "lon", "trig_info.lat" = "lat"))
training <- rename(training, c("info.lon" = "lon", "info.lat" = "lat"))
# Merge venues with training records
venues <- subset(venues, select=c(timestamp, pt, venue_id, lon, lat))
training <- subset(training, select=c(timestamp, pt, venue_id, lon, lat))
# For each venue, find max/min/mean distance from matching training id's
ret <- data.frame(venue_id = character(), count = numeric(),
mean = numeric(), min = numeric(), max = numeric())
message("Distance in meters from original venue")
for (venue in venues$venue_id) {
training_sub <- training[training$venue_id == venue,]
venues_sub <- venues[venues$venue_id == venue,]
message(paste0("Venue ID: ", venue))
distlist <- numeric()
if (nrow(training_sub)) {
for (i in 1:nrow(training_sub)) {
dist <- distCosine(c(as.numeric(venues_sub$lon[1]), as.numeric(venues_sub$lat[1])),
c(as.numeric(training_sub$lon[i]), as.numeric(training_sub$lat[i])))
distlist <- append(distlist, dist)
message(dist)
}
temp <- data.frame(venue_id = venue, count = length(distlist),
mean = mean(distlist), min = min(distlist), max = max(distlist))
ret <- rbind(ret, temp)
}
ret
}
}
## FUNCTION: Time difference between arrival_trigger and manual_arrival
## Average per venue. Difference in minutes.
## Positive diff means arrival_trigger came first
## NA diff means no arrival_trigger detected near manual_arrival
#' @export
arrival_diff_avg <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2017-04-11T23:59:59Z') {
for (j in 1:length(pt)) {
# Read in data
log <- read_pilr(data_set = "pilrhealth:mobile:app_log", schema = "1",
query_params = list(participant = pt[j]))
filterStart = filterStart %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
filterEnd = filterEnd %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
temp <- data.frame(pt = character(), venue = character(),
avg_diff = numeric(), no_triggers = numeric())
if (nrow(log)) {
log$local_time = log$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log = log[log$local_time > filterStart & log$local_time < filterEnd, ]
occurences <- data.frame(venue = character(), trigger_diff = numeric())
# Find each manual_arrival
i <- 1
while (i < nrow(log)) {
if (log$tag[i] == "MANUAL_ARRIVAL") {
venue <- log$args.response_value[i]
# Find time difference of arrival_trigger if present
if (i <= 20) sub <- log[1:(i+20),]
else if (i >= nrow(log)-20) sub <- log[(i-20):nrow(log),]
else sub <- log[(i-20):(i+20),]
if (!any(sub$tag == "ARRIVAL_TRIGGER")) diff <- NA
else {
index <- match("ARRIVAL_TRIGGER", sub$tag)
diff <- as.numeric(as.POSIXct(log$local_time[i], format = "%Y-%m-%dT%H:%M:%SZ") -
as.POSIXct(sub$local_time[index], format = "%Y-%m-%dT%H:%M:%SZ"))
}
occurences <- rbind(occurences, data.frame(venue = venue, trigger_diff = diff))
}
i <- i + 1
}
# Remove any NA venues
occurences <- subset(occurences, !is.na(venue))
# Summarize by venue
i <- 1
while (i <= length(unique(occurences$venue))) {
sub <- subset(occurences, venue == unique(occurences$venue)[i])
num_na <- count(subset(sub, is.na(trigger_diff)))
sub <- subset(sub, !is.na(trigger_diff))
avg <- mean(sub$trigger_diff, na.rm=TRUE)
if (is.nan(avg)) avg = NA
temp <- rbind(temp, data.frame(pt = pt[j], venue = unique(occurences$venue)[i],
avg_diff = avg, no_triggers = as.numeric(num_na)))
i <- i + 1
}
if (nrow(temp) == 0) temp <- data.frame(pt = pt[j], venue = "NO TRIGGERS",
avg_diff = 0, no_triggers = 0)
}
if (j == 1) ret <- temp
else ret <- rbind(ret, temp)
}
ret
}
## FUNCTION: Condense arrival_diff results into 1 row per participant for full summary
#' @export
arrival_condense <- function(pts, diff) {
if (nrow(diff)) {
for (i in 1:length(pts)) {
sub <- subset(diff, pt == pts[i])
tot <- sum(sub$no_triggers)
sub <- subset(sub, !is.na(avg_diff))
if (nrow(sub) > 0) {
avg <- mean(sub$avg_diff, na.rm=TRUE)
tot <- sum(sub$no_triggers)
temp <- data.frame(pt = pts[i], Average_Trigger_Diff = avg, No_Triggers = tot)
if (i == 1) ret <- temp
else ret <- rbind(ret, temp)
}
else {
temp <- data.frame(pt = pts[i], Average_Trigger_Diff = NA, No_Triggers = tot)
if (i == 1) ret <- temp
else ret <- rbind(ret, temp)
}
}
}
else {
ret <- data.frame(pt = NA, Average_Trigger_Diff = NA, No_Triggers = NA)
}
ret
}
## FUNCTION: Time difference between arrival_trigger and manual_arrival
## For every arrival instance. Difference in minutes.
## Positive diff means arrival_trigger came first
## NA diff means no arrival_trigger detected near manual_arrival
#' @export
arrival_diff_inst <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2016-04-11T23:59:59Z') {
for (j in 1:length(pt)) {
# Read in data
log <- read_pilr(data_set = "pilrhealth:mobile:app_log", schema = "1",
query_params = list(participant = pt[j]))
filterStart = filterStart %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
filterEnd = filterEnd %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log$local_time = log$local_time %>% as.POSIXlt(format = "%Y-%m-%dT%H:%M:%SZ")
log = log[log$local_time > filterStart & log$local_time < filterEnd, ]
occurences <- data.frame(pt = character(), local_time = character(),
venue = character(), trigger_diff = numeric())
# Find each manual_arrival
i <- 1
while (i < nrow(log)) {
if (log$tag[i] == "MANUAL_ARRIVAL") {
venue <- log$args.response_value[i]
time <- as.POSIXct(log$local_time[i], format = "%Y-%m-%dT%H:%M:%SZ")
# Find time difference of arrival_trigger if present
if (i <= 10) sub <- log[1:(i+10),]
else if (i >= nrow(log)-10) sub <- log[(i-10):nrow(log),]
else sub <- log[(i-10):(i+10),]
if (!any(sub$tag == "ARRIVAL_TRIGGER")) diff <- NA
else {
index <- match("ARRIVAL_TRIGGER", sub$tag)
diff <- as.numeric(as.POSIXct(log$local_time[i], format = "%Y-%m-%dT%H:%M:%SZ") -
as.POSIXct(sub$local_time[index], format = "%Y-%m-%dT%H:%M:%SZ"))
}
occurences <- rbind(occurences, data.frame(pt = pt[j], local_time = time,
venue = venue, trigger_diff = diff))
}
i <- i + 1
}
if (j == 1) ret <- occurences
else ret <- rbind(ret, occurences)
}
ret
}
compliance_summary <- function(pt, filterStart = '2014-04-10T14:00:01Z',
filterEnd = '2016-04-11T23:59:59Z') {
for (i in 1:length(pt)) {
no_triggers <- 0
compliance <- data.frame(pt = character(), Total_Reports = numeric(), Total_Triggers = numeric(),
Reports_without_Triggers = numeric(), Suggestion_0 = numeric(), Suggestion_1 = numeric(),
Suggestion_2 = numeric(), Suggestion_3 = numeric(), Pictures_Submitted = numeric(),
Venues_Added = numeric())
log <- read_pilr(data_set = "pilrhealth:mobile:app_log", schema = "1",
query_params = list(participant = pt[i]))
survey <- read_pilr(data_set = "pilrhealth:mobile:survey_data", schema = "1",
query_params = list(participant = pt[i]))
venues <- read_pilr(data_set = "pilrhealth:liitah:personal_venue", schema = "1",
query_params = list(participant = pt[i]))
if (nrow(log) == 0) {
compliance <- data.frame(pt = paste0(pt[i], " (NO DATA)"), Total_Reports = 0, Total_Triggers = 0,
Reports_without_Triggers = 0, Suggestion_0 = 0, Suggestion_1 = 0,
Suggestion_2 = 0, Suggestion_3 = 0, Pictures_Submitted = 0, Venues_Added = 0)
}
else {
# Reports
reports <- log[log$tag == 'MANUAL_VERIFY',]
# Triggers
triggers <- log[log$tag == 'ARRIVAL_TRIGGER',]
# Find No Triggers
for (j in 1:nrow(log)) {
if (log$tag[j] == "MANUAL_VERIFY") {
if (j <= 15) sub <- log[1:(j+15),]
else if (j >= nrow(log)-15) sub <- log[(j-15):nrow(log),]
else sub <- log[(j-15):(j+15),]
if (!any(sub$tag == "ARRIVAL_TRIGGER")) no_triggers <- no_triggers + 1
}
}
# Food suggestion responses
suggestions <- survey[survey$question_code == "50295",]
suggestions <- suggestions[!is.na(suggestions$question_code),]
# Picture submissions
pictures <- log[log$tag == "FILE_UPLOAD",]
compliance <- rbind(compliance, data.frame(pt = paste0(pt[i]), Total_Reports = nrow(reports), Total_Triggers = nrow(triggers),
Reports_without_Triggers = no_triggers,
Suggestion_0 = nrow(suggestions[suggestions$response_value == 0,]),
Suggestion_1 = nrow(suggestions[suggestions$response_value == 1,]),
Suggestion_2 = nrow(suggestions[suggestions$response_value == 2,]),
Suggestion_3 = nrow(suggestions[suggestions$response_value == 3,]),
Pictures_Submitted = nrow(pictures), Venues_Added = nrow(venues)))
}
if (i == 1) ret <- compliance
else ret <- rbind(ret, compliance)
}
ret
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.