g.report.part5 = function(metadatadir = c(), f0 = c(), f1 = c(), loglocation = c(),
params_cleaning = NULL,
LUX_day_segments = c(), params_output,
verbose = TRUE) {
# description: function to load milestone data generated by g.part5 and to merge these into a spreadsheet
# here no additional information of analyses are added. This function therefore is
# primary to wrap up the output from parallel processed accelerometer files.
if (params_cleaning[["includedaycrit.part5"]] > 1) {
# I put this warning here, and not in check_params because that would generate
# the warning every time the check_params is used across GGIR.
warning(paste0("\nNote that the behaviour of parameter includedaycrit.part5 ",
"has changed for values above 1. These are now treated as the mimimum ",
"absolute number of valid hours during the waking hours of a day.",
"If you prefer to keep the old functionality then divide ",
"your current value (which is above 1) by 24."), call. = FALSE)
}
getValidDayIndices = function(x, window, params_cleaning) {
if (window != "Segments") {
if (params_cleaning[["includedaycrit.part5"]] >= 0 &
params_cleaning[["includedaycrit.part5"]] <= 1) { # if includedaycrit.part5 is used as a ratio
includeday_wearPercentage = params_cleaning[["includedaycrit.part5"]] * 100
includeday_absolute = 0
} else if (params_cleaning[["includedaycrit.part5"]] > 1 &
params_cleaning[["includedaycrit.part5"]] <= 25) { # if includedaycrit.part5 is used like params_cleaning[["includedaycrit"]] as a number of hours
includeday_wearPercentage = 0
includeday_absolute = params_cleaning[["includedaycrit.part5"]] * 60
}
if (params_cleaning[["includenightcrit.part5"]] >= 0 &
params_cleaning[["includenightcrit.part5"]] <= 1) { # if includenightcrit.part5 is used as a ratio
includenight_wearPercentage = params_cleaning[["includenightcrit.part5"]] * 100
includenight_absolute = 0
} else if (params_cleaning[["includenightcrit.part5"]] > 1 &
params_cleaning[["includenightcrit.part5"]] <= 25) { # if includenightcrit.part5 is used like params_cleaning[["includenightcrit"]] as a number of hours
includenight_wearPercentage = 0
includenight_absolute = params_cleaning[["includenightcrit.part5"]] * 60
}
include_window = rep(TRUE, nrow(x))
if (length(params_cleaning[["data_cleaning_file"]]) > 0) { # allow for forced relying on guider based on external params_cleaning[["data_cleaning_file"]]
DaCleanFile = data.table::fread(params_cleaning[["data_cleaning_file"]], data.table = FALSE)
days2exclude = which(paste(x$ID, x$window_number) %in% paste(DaCleanFile$ID, DaCleanFile$day_part5))
if (length(days2exclude) > 0) {
include_window[days2exclude] = FALSE
}
} else {
include_window = rep(TRUE,nrow(x))
}
# Below we intentionally only set a criteria on daytime, because for
# the night time we only need start and end of the SPT window.
# Add extra columns to ease the check
x$nonwear_perc_day_spt = as.numeric(x$nonwear_perc_day_spt)
x$nonwear_perc_day = as.numeric(x$nonwear_perc_day)
x$nonwear_perc_spt = as.numeric(x$nonwear_perc_spt)
x$wear_min_day_spt = (1 - (x$nonwear_perc_day_spt / 100)) * x$dur_day_spt_min #valid minute during waking hours
x$wear_min_day = (1 - (x$nonwear_perc_day / 100)) * x$dur_day_min #valid minute during waking hours
x$wear_perc_day = 100 - x$nonwear_perc_day #wear percentage during waking hours
x$wear_min_spt = (1 - (x$nonwear_perc_spt / 100)) * x$dur_spt_min #valid minute during waking hours
x$wear_perc_spt = 100 - x$nonwear_perc_spt #wear percentage during waking hours
x$lastHour = as.numeric(x$lastHour)
x$calendar_date = as.Date(x$calendar_date)
minimumValidMinutesMM = 0 # default
if (length(params_cleaning[["includedaycrit"]]) == 2) {
minimumValidMinutesMM = params_cleaning[["includedaycrit"]][2] * 60
}
if (params_output[["require_complete_lastnight_part5"]] == FALSE) {
x$lastWindow = FALSE
x$lastDate = x$calendar_date
} else {
x$lastWindow = x$window_number == max(x$window_number)
x$lastDate = as.Date(x$lastDate)
}
if (window == "WW" | window == "OO") {
indices = which(x$wear_perc_day >= includeday_wearPercentage &
x$wear_min_day >= includeday_absolute &
x$wear_perc_spt >= includenight_wearPercentage &
x$wear_min_spt >= includenight_absolute &
x$dur_spt_min > 0 & x$dur_day_min > 0 &
((x$lastWindow == TRUE & x$lastHour >= 15 & (x$lastDate - x$calendar_date) >= -1 & window == "WW") |
(x$lastWindow == TRUE & x$lastHour >= 9 & (x$lastDate - x$calendar_date) >= -1 & window == "OO") |
x$lastWindow == FALSE) &
include_window == TRUE &
x$wear_min_day_spt >= minimumValidMinutesMM)
} else if (window == "MM") {
indices = which(x$wear_perc_day >= includeday_wearPercentage &
x$wear_min_day >= includeday_absolute &
x$wear_perc_spt >= includenight_wearPercentage &
x$wear_min_spt >= includenight_absolute &
x$dur_spt_min > 0 & x$dur_day_min > 0 &
((x$lastWindow == TRUE & x$lastHour > 9 & (x$lastDate - x$calendar_date) >= -1) |
x$lastWindow == FALSE) &
x$dur_day_spt_min >= (params_cleaning[["minimum_MM_length.part5"]] * 60) &
include_window == TRUE &
x$wear_min_day_spt >= minimumValidMinutesMM)
# Note: By default for MM analysis only full days are interesting (23 hours for one day in the year)
}
} else if (window == "Segments") {
# clean based on segments (even if a day is not valid, a certain segment of that
# day could be valid if participant wore device enough in that part of the day)
maxpernwday = 100 - (params_cleaning[["segmentWEARcrit.part5"]] * 100)
include_window = rep(TRUE, nrow(x))
indices = which(as.numeric(x$nonwear_perc_day_spt) <= maxpernwday &
as.numeric(x$dur_day_min) / as.numeric(x$dur_day_spt_min) >= params_cleaning[["segmentDAYSPTcrit.part5"]][1] &
as.numeric(x$dur_spt_min) / as.numeric(x$dur_day_spt_min) >= params_cleaning[["segmentDAYSPTcrit.part5"]][2] &
include_window == TRUE)
}
# exclude first and last window?
if (params_cleaning[["excludefirstlast.part5"]] == TRUE) {
x$window_number = as.numeric(x$window_number)
# identify first and last day per file
first_days = aggregate(window_number ~ filename, data = x, FUN = min, na.rm = TRUE)
last_days = aggregate(window_number ~ filename, data = x, FUN = max, na.rm = TRUE)
# match first and last days with the output dataframe
exclude_firsts = which(paste(x$filename, x$window_number) %in% paste(first_days$filename, first_days$window_number))
exclude_lasts = which(paste(x$filename, x$window_number) %in% paste(last_days$filename, last_days$window_number))
# keep only indices that do not match with first and last days
indices2exclude = which(indices %in% c(exclude_firsts, exclude_lasts))
if (length(indices2exclude) > 0) indices = indices[-indices2exclude]
}
return(indices)
}
ms5.out = "/meta/ms5.out"
if (file.exists(paste(metadatadir, ms5.out, sep = ""))) {
if (length(dir(paste(metadatadir, ms5.out, sep = ""))) == 0) {
try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
} else {
try.generate.report = TRUE
}
} else {
try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
warning(paste0('Cannot generate part5 report because no part 5 milestone data",
" is available. First run part 5 with argument mode=5.'))
}
if (try.generate.report == TRUE) {
#======================================================================
# loop through meta-files
fnames.ms5 = list.files(paste0(metadatadir, ms5.out), full.names = TRUE)
if (f1 > length(fnames.ms5)) {
f1 = length(fnames.ms5)
}
if (verbose == TRUE) {
cat(paste0(" loading all the milestone data from part 5 this can",
" take a few minutes\n"))
}
myfun = function(x, expectedCols = c()) {
tail_expansion_log = last_timestamp = output = NULL
load(file = x)
cut = which(output[, 1] == "")
if (length(cut) > 0 & length(cut) < nrow(output)) {
output = output[-cut, which(colnames(output) != "")]
}
if (exists("last_timestamp") == TRUE) {
output$lastHour = as.numeric(format(last_timestamp, "%H"))
output$lastDate = as.Date(last_timestamp)
} else {
output$lastHour = Inf # use dummy value
output$lastDate = Inf # use dummy value
}
out = as.matrix(output)
if (length(expectedCols) > 0) {
tmp = as.data.frame(matrix(0, 0, length(expectedCols)))
colnames(tmp) = expectedCols
out = base::merge(tmp, out, all = TRUE)
}
if (!is.null(expectedCols) && ncol(out) > length(expectedCols)) {
warning(paste0("Columns dropped in output part5 for ",
basename(x),
" because these could not be matched to columns for earlier",
" recordings in this dataset. Please check whether these recordings ",
" were processed with a",
" different GGIR version or configuration. If yes, reprocess",
" consistently. If not, ",
" consider renaming this file such that it is",
" alphabetically first and by that processed first, which",
" should address the issue."), call. = FALSE)
}
if (length(tail_expansion_log) != 0) {
col2na = grep(pattern = paste0("sleep_efficiency|N_atleast5minwakenight|daysleeper|",
"daysleeper|sleeplog_used|_spt_sleep|_spt_wake"),
x = names(out), value = FALSE)
window_number = as.numeric(out[,"window_number"])
lastwindow_indices = which(window_number == max(window_number, na.rm = TRUE))
if (length(col2na) > 0 & length(lastwindow_indices) > 0) {
out[lastwindow_indices, col2na] = "" # set last row to NA for all sleep related variables
}
}
return(out)
}
# It can be that first part 5 milestone file has less columns when it
# was processed with a different GGIR version or configuration.
# This would then complicate
# appending later milestone files to it. So, check first a random sample for highest number
# of columns. If this fails to identify a file with the maximum number of
# column then the above function will provide a warning to
# inform the user and with instructions on how to address it.
expectedCols = NULL
set.seed(1234)
# Create list of file indices to try
testfiles = unique(sample(x = f0:f1, size = pmin(5, f1 - f0 + 1)))
for (testf in testfiles) {
out_try = myfun(fnames.ms5[testf])
if (ncol(out_try) > length(expectedCols)) {
# expectedCols should equal the column names of the milestone
# file with the largest number of columns
expectedCols = colnames(out_try)
}
}
# Now load and append all files
outputfinal = as.data.frame(do.call(rbind,
lapply(fnames.ms5[f0:f1], myfun, expectedCols)),
stringsAsFactors = FALSE)
# Find columns filled with missing values
cut = which(sapply(outputfinal, function(x) all(x == "")) == TRUE)
if (length(cut) > 0) {
outputfinal = outputfinal[,-cut]
}
# revise filename
outputfinal$filename = gsub(".RData$", "", outputfinal$filename)
# order data.frame
outputfinal$window_number = as.numeric(gsub(" ", "", outputfinal$window_number))
outputfinal = outputfinal[order(outputfinal$filename, outputfinal$window_number, outputfinal$window), ]
# split results to different spreadsheets in order to minimize individual
# filesize and to ease organising dataset
uwi = as.character(unique(outputfinal$window))
if (!all(uwi %in% c("MM", "WW", "OO"))) {
uwi = c(uwi[uwi %in% c("MM", "WW", "OO")], "Segments")
}
uTRLi = as.character(unique(outputfinal$TRLi))
uTRMi = as.character(unique(outputfinal$TRMi))
uTRVi = as.character(unique(outputfinal$TRVi))
usleepparam = as.character(unique(outputfinal$sleepparam))
# replace NaN by empty cell value
for (kra in 1:ncol(outputfinal)) {
krad = which(is.nan(outputfinal[,kra]) == TRUE)
if (length(krad) > 0) {
outputfinal[krad,kra] = ""
}
}
outputfinal$daytype = 0
outputfinal$daytype[which(outputfinal$weekday == "Sunday" |
outputfinal$weekday == "Saturday")] = "WE"
outputfinal$daytype[which(outputfinal$weekday == "Monday" |
outputfinal$weekday == "Tuesday" |
outputfinal$weekday == "Wednesday" |
outputfinal$weekday == "Thursday" |
outputfinal$weekday == "Friday")] = "WD"
outputfinal$nonwear_perc_day = as.numeric(outputfinal$nonwear_perc_day)
outputfinal$nonwear_perc_spt = as.numeric(outputfinal$nonwear_perc_spt)
outputfinal$dur_spt_min = as.numeric(outputfinal$dur_spt_min)
outputfinal$dur_day_min = as.numeric(outputfinal$dur_day_min)
outputfinal$guider = as.character(outputfinal$guider)
outputfinal$sleeplog_used = as.numeric(outputfinal$sleeplog_used)
outputfinal$dur_spt_min = as.numeric(outputfinal$dur_spt_min)
outputfinal$dur_day_min = as.numeric(outputfinal$dur_day_min)
outputfinal$dur_day_spt_min = as.numeric(outputfinal$dur_day_spt_min)
# loop to store various variants of the analysis separately
outputfinal_bu = outputfinal
if (verbose == TRUE) cat(" generating csv report for every parameter configurations...\n")
for (j in 1:length(uwi)) {
for (h1 in 1:length(uTRLi)) {
for (h2 in 1:length(uTRMi)) {
for (h3 in 1:length(uTRVi)) {
for (h4 in 1:length(usleepparam)) {
if (verbose == TRUE) {
cat(paste0(" ", uwi[j], "-", uTRLi[h1], "-", uTRMi[h2],
"-", uTRVi[h3], "-", usleepparam[h4]))
}
select_window = as.character(outputfinal$window) == uwi[j]
if (!(uwi[j] %in% c("MM", "WW", "OO"))) select_window = !(as.character(outputfinal$window) %in% c("MM", "WW", "OO"))
seluwi = which(select_window &
as.character(outputfinal$TRLi) == uTRLi[h1] &
as.character(outputfinal$TRMi) == uTRMi[h2] &
as.character(outputfinal$TRVi) == uTRVi[h3] &
as.character(outputfinal$sleepparam) == usleepparam[h4])
# store spreadsheet
if (nrow(outputfinal[seluwi,]) == 0) {
if (verbose == TRUE) cat("report not stored, because no results available")
} else {
CN = colnames(outputfinal)
outputfinal2 = outputfinal
colnames(outputfinal2) = CN
delcol = grep(pattern = "TRLi|TRMi|TRVi|sleepparam",
x = colnames(outputfinal2))
if (uwi[j] != "Segments") {
delcol = c(delcol, which(colnames(outputfinal2) == "window"))
}
outputfinal2 = outputfinal2[,-delcol]
OF3 = outputfinal2[seluwi,]
OF3 = as.data.frame(OF3, stringsAsFactors = TRUE)
#-------------------------------------------------------------
# store all summaries in csv files without cleaning criteria
OF3_clean = tidyup_df(OF3)
data.table::fwrite(
OF3_clean,
paste(
metadatadir,
"/results/QC/part5_daysummary_full_",
uwi[j], "_L", uTRLi[h1], "M", uTRMi[h2], "V", uTRVi[h3],
"_", usleepparam[h4], ".csv", sep = ""), row.names = FALSE, na = "",
sep = params_output[["sep_reports"]],
dec = params_output[["dec_reports"]])
# store all summaries in csv files with cleaning criteria
validdaysi = getValidDayIndices(x = OF3_clean, window = uwi[j],
params_cleaning = params_cleaning)
if ("lastHour" %in% colnames(OF3_clean)) {
OF3_clean = OF3_clean[, -which(colnames(OF3_clean) %in% c("lastHour", "lasteDate"))]
}
if (length(validdaysi) > 0) {
data.table::fwrite(
OF3_clean[validdaysi, ],
paste(metadatadir, "/results/part5_daysummary_",
uwi[j], "_L", uTRLi[h1], "M", uTRMi[h2], "V", uTRVi[h3],
"_", usleepparam[h4], ".csv", sep = ""), row.names = FALSE, na = "",
sep = params_output[["sep_reports"]],
dec = params_output[["dec_reports"]])
}
#------------------------------------------------------------------------------------
#also compute summary per person
agg_plainNweighted = function(df,
filename = "filename",
daytype = "daytype",
window = "MM") {
# function to take both the weighted (by weekday/weekendday) and plain average of all numeric variables
# df: input data.frame (OF3 outside this function)
ignorevar = c("daysleeper", "cleaningcode", "night_number", "sleeplog_used",
"ID", "acc_available", "window_number",
"boutcriter.mvpa", "boutcriter.lig", "boutcriter.in", "bout.metric") # skip cosinor variables
for (ee in 1:ncol(df)) { # make sure that numeric columns have class numeric
nr = nrow(df)
if (nr > 30) nr = 30
options(warn = -1)
trynum = as.numeric(as.character(df[1:nr, ee]))
options(warn = 0)
if (length(which(is.na(trynum) == TRUE)) != nr &
length(which(ignorevar == names(df)[ee])) == 0) {
options(warn = -1)
class(df[, ee]) = "numeric"
options(warn = 0)
}
}
plain_mean = function(x) {
options(warn = -1)
plain_mean = mean(x, na.rm = TRUE)
options(warn = 0)
if (is.na(plain_mean) == TRUE) {
plain_mean = x[1]
}
return(plain_mean)
}
# aggregate across all days
if (window == "Segments") {
by = list(df$filename, df$window)
} else {
by = list(df$filename)
}
PlainAggregate = aggregate.data.frame(df, by = by, FUN = plain_mean)
PlainAggregate = PlainAggregate[, -grep("^Group", colnames(PlainAggregate))]
# aggregate per day type (weekday or weekenddays)
if (window == "Segments") {
by = list(df$filename, df$window, df$daytype)
} else {
by = list(df$filename, df$daytype)
}
AggregateWDWE = aggregate.data.frame(df, by = by, plain_mean)
AggregateWDWE = AggregateWDWE[, -grep("^Group", colnames(AggregateWDWE))]
# Add counted number of days for Gini, Cov, alpha Fragmentation variables, because
# days are dropped if there are not enough fragments:
vars_with_mininum_Nfrag = c("FRAG_Gini_dur_PA_day", "FRAG_CoV_dur_PA_day",
"FRAG_alpha_dur_PA_day", "FRAG_Gini_dur_IN_day",
"FRAG_CoV_dur_IN_day")
vars_with_mininum_Nfrag_i = which(vars_with_mininum_Nfrag %in% colnames(df) == TRUE)
if (length(vars_with_mininum_Nfrag_i) > 0) {
varname_minfrag = vars_with_mininum_Nfrag[vars_with_mininum_Nfrag_i[1]]
DAYCOUNT_Frag_Multiclass = aggregate.data.frame(
df[, varname_minfrag],
by = by,
FUN = function(x) length(which(is.na(x) == FALSE))
)
if (window != "Segments") {
colnames(DAYCOUNT_Frag_Multiclass)[1:2] = c("filename","daytype")
# AL10F, abbreviation for: at least 10 fragments
colnames(DAYCOUNT_Frag_Multiclass)[3] = "Nvaliddays_AL10F"
by.x = c("filename", "daytype")
} else if (window == "Segments") {
colnames(DAYCOUNT_Frag_Multiclass)[1:3] = c("filename","window", "daytype")
# AL10F, abbreviation for: at least 10 fragments
colnames(DAYCOUNT_Frag_Multiclass)[4] = "Nvalidsegments_AL10F"
by.x = c("filename","window", "daytype")
}
AggregateWDWE = merge(AggregateWDWE, DAYCOUNT_Frag_Multiclass, by.x = by.x)
}
len = NULL
AggregateWDWE$len <- 0
AggregateWDWE$len[which(as.character(AggregateWDWE$daytype) == "WD")] = 5 #weighting of weekdays
AggregateWDWE$len[which(as.character(AggregateWDWE$daytype) == "WE")] = 2 #weighting of weekend days
if (window == "Segments") {
dt <- data.table::as.data.table(AggregateWDWE[,which(lapply(AggregateWDWE, class) == "numeric" |
names(AggregateWDWE) == filename |
names(AggregateWDWE) == "window")])
} else {
dt <- data.table::as.data.table(AggregateWDWE[,which(lapply(AggregateWDWE, class) == "numeric" |
names(AggregateWDWE) == filename)])
}
options(warn = -1)
.SD <- .N <- count <- a <- NULL
if (window == "Segments") {
WeightedAggregate <- dt[, lapply(.SD, weighted.mean, w = len, na.rm = TRUE), by = list(filename, window)]
} else {
WeightedAggregate <- dt[, lapply(.SD, weighted.mean, w = len, na.rm = TRUE), by = list(filename)]
}
options(warn = 0)
LUXmetrics = c("above1000", "timeawake", "mean", "imputed", "ignored")
add_missing_LUX = function(x, LUX_day_segments, weeksegment = c(), LUXmetrics) {
# missing columns, add these:
NLUXseg = length(LUX_day_segments)
if (length(weeksegment) > 0) {
LUX_segment_vars_expected = paste0("LUX_", LUXmetrics, "_",
LUX_day_segments[1:(NLUXseg - 1)],
"-", LUX_day_segments[2:(NLUXseg)],
"hr_day_", weeksegment)
} else {
luxvars = paste0("LUX_", LUXmetrics, "_")
segments = paste0(LUX_day_segments[1:(NLUXseg - 1)],
"_", LUX_day_segments[2:(NLUXseg)], "hr_day")
LUX_segment_vars_expected = c()
for (luxi in 1:length(luxvars)) {
LUX_segment_vars_expected = c(LUX_segment_vars_expected,
paste0(luxvars[luxi], segments))
}
}
dummy_df = as.data.frame(matrix(NaN, 1, length(LUX_segment_vars_expected)))
colnames(dummy_df) = LUX_segment_vars_expected
if (length(which(LUX_segment_vars_expected %in% colnames(x))) > 0) {
x = as.data.frame(merge(x, dummy_df, all.x = T))
# re-order
current_location = which(colnames(x) %in% LUX_segment_vars_expected == TRUE)
neworder = sort(colnames(x)[current_location])
x = cbind(x[,-current_location], x[,LUX_segment_vars_expected])
}
return(x)
}
LUX_segment_vars = c()
for (li in 1:length(LUXmetrics)) {
LUX_segment_vars = c(LUX_segment_vars, grep(
pattern = paste0("LUX_", LUXmetrics[li]),
x = colnames(WeightedAggregate),
value = TRUE
))
}
if (length(LUX_segment_vars) > 0 &
length(LUX_segment_vars) < 24 &
length(LUX_day_segments) > 0) {
WeightedAggregate = add_missing_LUX(
WeightedAggregate,
LUX_day_segments,
weeksegment = c(),
LUXmetrics = LUXmetrics
)
}
# merge them into one output data.frame (G)
LUX_segment_vars = c()
for (li in 1:length(LUXmetrics)) {
LUX_segment_vars = colnames(PlainAggregate) %in% grep(
x = colnames(PlainAggregate),
pattern = paste0("LUX_", LUXmetrics[li]),
value = TRUE
)
}
charcol = which(
lapply(PlainAggregate, class) != "numeric" &
names(PlainAggregate) != filename & !(LUX_segment_vars)
)
numcol = which(lapply(PlainAggregate, class) == "numeric" | LUX_segment_vars)
WeightedAggregate = as.data.frame(WeightedAggregate, stringsAsFactors = TRUE)
if (window == "Segments") {
by = c("filename", "window")
} else {
by = "filename"
}
G = base::merge(PlainAggregate,
WeightedAggregate,
by = by,
all.x = TRUE)
p0b = paste0(names(PlainAggregate[, charcol]), ".x")
p1 = paste0(names(PlainAggregate[, numcol]), ".x")
p2 = paste0(names(PlainAggregate[, numcol]), ".y")
for (i in 1:length(p0b)) {
names(G)[which(names(G) == p0b[i])] = paste0(names(PlainAggregate[, charcol])[i])
}
for (i in 1:length(p1)) {
names(G)[which(names(G) == p1[i])] = paste0(names(PlainAggregate[, numcol])[i], "_pla")
}
for (i in 1:length(p2)) {
names(G)[which(names(G) == p2[i])] = paste0(names(PlainAggregate[, numcol])[i], "_wei")
}
# expand output with weekday (WD) and weekend (WE) day aggregates
if (params_output[["week_weekend_aggregate.part5"]] == TRUE) {
for (weeksegment in c("WD", "WE")) {
temp_aggregate = AggregateWDWE[which(AggregateWDWE$daytype == weeksegment), ]
charcol = which(lapply(temp_aggregate, class) != "numeric" &
names(temp_aggregate) != filename)
numcol = which(lapply(temp_aggregate, class) %in% c("numeric", "integer") == TRUE)
names(temp_aggregate)[numcol] = paste0(names(temp_aggregate)[numcol], "_", weeksegment)
if (window == "Segments") {
temp_aggregate = temp_aggregate[, c(which(colnames(temp_aggregate) == "filename" |
colnames(temp_aggregate) == "window"), numcol)]
} else {
temp_aggregate = temp_aggregate[, c(which(colnames(temp_aggregate) == "filename"), numcol)]
}
LUX_segment_vars = c()
for (li in 1:length(LUXmetrics)) {
LUX_segment_vars = grep(
pattern = paste0("LUX_", LUXmetrics[li]),
x = colnames(temp_aggregate),
value = TRUE
)
}
if (length(LUX_segment_vars) > 0 &
length(LUX_segment_vars) < 24 & length(LUX_day_segments) > 0) {
temp_aggregate = add_missing_LUX(temp_aggregate, LUX_day_segments, weeksegment, LUXmetrics)
}
G = base::merge(G, temp_aggregate,
by = by, all.x = TRUE)
}
}
G = G[,-which(names(G) %in% c("len", "daytype", "len_WE", "len_WD"))]
return(G)
}
#---------------------------------------------
# Calculate, weighted and plain mean of all variables
# add column to define what are weekenddays and weekdays as needed for function agg_plainNweighted
# before processing OF3, first identify which days have enough monitor wear time
validdaysi = getValidDayIndices(x = OF3, window = uwi[j],
params_cleaning = params_cleaning)
if (length(validdaysi) > 0) { # do not attempt to aggregate if there are no valid days
# aggregate OF3 (days) to person summaries in OF4
OF4 = agg_plainNweighted(df = OF3[validdaysi,], filename = "filename",
daytype = "daytype", window = uwi[j])
# calculate additional variables
columns2keep = c("filename", "night_number", "daysleeper",
"cleaningcode","sleeplog_used","guider",
"acc_available", "nonwear_perc_day", "nonwear_perc_spt",
"daytype", "dur_day_min",
"dur_spt_min")
if (uwi[j] == "Segments") {
columns2keep = c(columns2keep, "window")
}
OF3tmp = OF3[, columns2keep]
foo34 = function(df,aggPerIndividual,nameold,namenew,cval,window) {
# function to help with calculating additional variables
# related to counting how many days of measurement there are
# that meet a certain criteria
# cval is a vector with 0 and 1, indicating whether the criteria is met
# aggPerIndividual is the aggregate data (per individual)
# df is the non-aggregated data (days across individuals
# we want to extra the number of days per individuals that meet the
# criteria in df, and make it allign with aggPerIndividual.
df2 = function(x) df2 = length(which(x == cval)) # check which values meets criterion
if (window == "Segments") by = list(df$filename, df$window)
if (window != "Segments") by = list(df$filename)
mmm = as.data.frame(aggregate.data.frame(df, by = by, FUN = df2),
stringsAsFactors = TRUE)
mmm2 = data.frame(
filename = mmm$Group.1,
cc = mmm[, nameold],
stringsAsFactors = TRUE
)
if (window == "Segments") {
mmm2$window = mmm$Group.2
by = c("filename", "window")
} else if (window != "Segments") {
by = "filename"
}
aggPerIndividual = merge(aggPerIndividual, mmm2, by = by)
names(aggPerIndividual)[which(names(aggPerIndividual) == "cc")] = namenew
foo34 = aggPerIndividual
}
# # calculate number of valid days (both night and day criteria met)
OF3tmp$validdays = 0
# criteria is that nonwear percentage needs to be below threshold for both day and night:
OF3tmp$validdays[validdaysi] = 1
# now we have a label for the valid days, we can create a new variable
# in OF4 that is a count of the number of valid days:
namenew = "Nvaliddays"
if (uwi[j] == "Segments") namenew = "Nvalidsegments"
OF4 = foo34(
df = OF3tmp,
aggPerIndividual = OF4,
nameold = "validdays",
namenew = namenew,
cval = 1,
window = uwi[j]
)
# do the same for WE (weekend days):
OF3tmp$validdays = 0
OF3tmp$validdays[validdaysi[which(OF3tmp$daytype[validdaysi] == "WE")]] = 1
namenew = "Nvaliddays_WE"
if (uwi[j] == "Segments") namenew = "Nvalidsegments_WE"
OF4 = foo34(
df = OF3tmp,
aggPerIndividual = OF4,
nameold = "validdays",
namenew = namenew,
cval = 1,
window = uwi[j]
)
# do the same for WD (weekdays):
OF3tmp$validdays = 0
OF3tmp$validdays[validdaysi[which(OF3tmp$daytype[validdaysi] == "WD")]] = 1
namenew = "Nvaliddays_WD"
if (uwi[j] == "Segments") namenew = "Nvalidsegments_WD"
OF4 = foo34(df = OF3tmp,
aggPerIndividual = OF4,
nameold = "validdays",
namenew = namenew,
cval = 1,
window = uwi[j]) # create variable from it
# do the same for daysleeper,cleaningcode, sleeplog_used, acc_available:
OF3tmp$validdays = 1
# redefine by considering only valid days
OF4 = foo34(
df = OF3tmp[validdaysi,],
aggPerIndividual = OF4,
nameold = "daysleeper",
namenew = "Ndaysleeper",
cval = 1,
window = uwi[j]
)
OF4 = foo34(
df = OF3tmp[validdaysi,],
aggPerIndividual = OF4,
nameold = "cleaningcode",
namenew = "Ncleaningcodezero",
cval = 0,
window = uwi[j]
)
for (ccode in 1:6) {
OF4 = foo34(
df = OF3tmp[validdaysi, ],
aggPerIndividual = OF4,
nameold = "cleaningcode",
namenew = paste0("Ncleaningcode", ccode),
cval = ccode,
window = uwi[j]
)
}
OF4 = foo34(
df = OF3tmp[validdaysi, ],
aggPerIndividual = OF4,
nameold = "sleeplog_used",
namenew = "Nsleeplog_used",
cval = TRUE,
window = uwi[j]
)
OF4 = foo34(
df = OF3tmp[validdaysi, ],
aggPerIndividual = OF4,
nameold = "acc_available",
namenew = "Nacc_available",
cval = 1,
window = uwi[j]
)
# Move valid day count variables to beginning of dataframe
OF4 = cbind(OF4[, 1:5],
OF4[, (ncol(OF4) - 10):ncol(OF4)],
OF4[, 6:(ncol(OF4) - 11)])
nom = names(OF4)
cut = which(nom == "sleeponset_ts" | nom == "wakeup_ts"
| nom == "night_number" | nom == "window_number"
| nom == "daysleeper" | nom == "cleaningcode"
| nom == "acc_available"
| nom == "guider" | nom == "L5TIME" | nom == "M5TIME"
| nom == "L10TIME" | nom == "M10TIME"
| nom == "acc_available" | nom == "daytype"
| nom %in% paste0("sleeplog_used_", c("pla", "wei", "WD", "WE"))
| nom %in% paste0("window_number_", c("pla", "wei", "WD", "WE")))
names(OF4)[which(names(OF4) == "weekday")] = "startday"
OF4 = OF4[,-cut]
for (col4 in 1:ncol(OF4)) {
navalues = which(is.na(OF4[,col4]) == TRUE)
if (length(navalues) > 0) {
OF4[navalues, col4] = ""
}
}
#Move Nvaliddays variables to the front of the spreadsheet
Nvaliddays_variables = grep(x = colnames(OF4), pattern = "Nvaliddays", value = FALSE)
Nvaliddays_variables = unique(c(
which(colnames(OF4) == "Nvaliddays"),
which(colnames(OF4) == "Nvaliddays_WD"),
which(colnames(OF4) == "Nvaliddays_WE"),
Nvaliddays_variables
))
OF4 = OF4[, unique(c(1:4, Nvaliddays_variables, 5:ncol(OF4)))]
#-------------------------------------------------------------
# store all summaries in csv files
OF4_clean = tidyup_df(OF4)
data.table::fwrite(OF4_clean,paste(metadatadir,"/results/part5_personsummary_",
uwi[j],"_L",uTRLi[h1],"M",
uTRMi[h2], "V", uTRVi[h3], "_",
usleepparam[h4], ".csv", sep = ""),
row.names = FALSE, na = "",
sep = params_output[["sep_reports"]],
dec = params_output[["dec_reports"]])
}
}
}
}
}
}
}
rm(outputfinal, outputfinal2)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.