g.impute = function(M, I, params_cleaning = c(), desiredtz = "",
dayborder = 0, TimeSegments2Zero = c(), acc.metric = "ENMO",
ID, qwindowImp = c(), ...) {
#get input variables
input = list(...)
if (length(input) > 0 || length(params_cleaning) == 0) {
# Extract and check parameters if user provides more arguments than just the parameter arguments,
# or if params_[...] aren't specified (so need to be filled with defaults).
# So, inside GGIR this will not be used, but it is used when g.impute is used on its own
# as if it was still the old g.impute function
params = extract_params(params_cleaning = params_cleaning,
input = input,
params2check = c("cleaning")) # load default parameters
params_cleaning = params$params_cleaning
rm(params)
}
windowsizes = M$windowsizes #default: c(5,900,3600)
metashort = M$metashort
metalong = M$metalong
ws3 = windowsizes[1]
ws2 = windowsizes[2]
ws = windowsizes[3]
# What is the minimum number of accelerometer axis needed to meet the criteria for nonwear in order for the data to be detected as nonwear?
wearthreshold = 2 #needs to be 0, 1 or 2
# windows per day
n_ws_perday = (1440*60) / ws
n_ws2_perday = (1440*60) / ws2
n_ws3_perday = (1440*60) / ws3
# windows per minute
n_ws3_permin = 60/ws3
# windows per hour
n_ws3_perhour = 3600/ws3
n_ws_perhour = 3600/ws
#check that matrices match
if (((nrow(metalong)/(n_ws2_perday)*10) - (nrow(metashort)/n_ws3_perday) * 10) > 1) {
warning("Matrices 'metalong' and 'metashort' are not compatible", call. = FALSE)
}
tmi = which(colnames(metalong) == "timestamp")
time = as.character(as.matrix(metalong[,tmi]))
startt = as.matrix(metalong[1, tmi])
#====================================
# Deriving file characteristics from 15 min summary files
LD = nrow(metalong) * (ws2/60) #length data in minutes
ND = nrow(metalong)/n_ws2_perday #number of days
#==============================================
# Generating time variable
timeline = seq(0, ceiling(nrow(metalong)/n_ws2_perday), by = 1/n_ws2_perday)
timeline = timeline[1:nrow(metalong)]
#========================================
# Extracting non-wear and clipping and make decision on which additional time needs to be considered non-wear
out = g.weardec(M, wearthreshold, ws2,
params_cleaning = params_cleaning,
desiredtz = desiredtz,
qwindowImp = qwindowImp)
r1 = out$r1 #non-wear
r2 = out$r2 #clipping
r3 = out$r3 #additional non-wear
r4 = matrix(0,length(r3),1) #protocol based decisions on data removal
LC = out$LC
LC2 = out$LC2
nonwearHoursFiltered = out$nonwearHoursFiltered
nonwearEventsFiltered = out$nonwearEventsFiltered
#========================================================
# Check whether TimeSegments2Zero exist, because this means that the
# user wants to ignore specific time windows. This feature is used
# for example if the accelerometer was not worn during the night and the user wants
# to include the nighttime acceleration in the analyses without imputation,
# but wants to use imputation for the rest of the day.
# So, those time windows should not be imputed.
# and acceleration metrics should have value zero during these windows.
if (length(TimeSegments2Zero) > 0) {
r1long = matrix(0,length(r1),(ws2/ws3)) #r5long is the same as r5, but with more values per period of time
r1long = replace(r1long,1:length(r1long),r1)
r1long = t(r1long)
dim(r1long) = c((length(r1)*(ws2/ws3)),1)
timelinePOSIX = iso8601chartime2POSIX(M$metashort$timestamp,tz = desiredtz)
# Combine r1Long with TimeSegments2Zero
for (kli in 1:nrow(TimeSegments2Zero)) {
startTurnZero = which(timelinePOSIX == TimeSegments2Zero$windowstart[kli])
endTurnZero = which(timelinePOSIX == TimeSegments2Zero$windowend[kli])
r1long[startTurnZero:endTurnZero] = 0
# Force ENMO and other acceleration metrics to be zero for these intervals
M$metashort[startTurnZero:endTurnZero,which(colnames(M$metashort) %in% c("timestamp","anglex","angley","anglez") == FALSE)] = 0
}
# collaps r1long (short epochs) back to r1 (long epochs)
r1longc = cumsum(c(0,r1long))
select = seq(1,length(r1longc), by = (ws2/ws3))
r1 = diff(r1longc[round(select)]) / abs(diff(round(select)))
r1 = round(r1)
}
#======================================
# detect first and last midnight and all midnights
tooshort = 0
dmidn = g.detecmidnight(time,desiredtz,dayborder) #,ND
firstmidnight = dmidn$firstmidnight; firstmidnighti = dmidn$firstmidnighti
lastmidnight = dmidn$lastmidnight; lastmidnighti = dmidn$lastmidnighti
midnights = dmidn$midnights; midnightsi = dmidn$midnightsi
#===================================================================
# Trim data based on study_dates_file
study_dates_log_used = FALSE
if (!is.null(params_cleaning[["study_dates_file"]])) {
# Read content of study dates file
studyDates = data.table::fread(file = params_cleaning[["study_dates_file"]], data.table = FALSE)
# Check ID and date formats
studyDates = check_log(log = studyDates,
dateformat = params_cleaning[["study_dates_dateformat"]],
colid = 1, datecols = 2:3,
logPath = params_cleaning[["study_dates_file"]],
logtype = "study dates log")
# identify first and last study dates
rowID = which(studyDates[, 1] == ID)
if (length(rowID) > 0) {
if (length(rowID) > 1) {
# this is in the unlikely event that an ID appears twice in the log
rowID = rowID[1]
warning(paste0("The ID ", ID, " appears twice in the study dates log"), call. = FALSE)
}
# expected first and last midnight
firstmidnight = as.Date(studyDates[rowID, 2], format = params_cleaning[["study_dates_dateformat"]])
lastmidnight = as.Date(studyDates[rowID, 3], format = params_cleaning[["study_dates_dateformat"]])
# find the dates in metalong
firstmidnighti = midnightsi[grep(firstmidnight, midnights)]
lastmidnighti = midnightsi[grep(lastmidnight, midnights) + 1] # plus 1 to include the reported date in log
# trim data at the beginning
if (length(firstmidnighti) > 0) {
r4[1:(firstmidnighti - 1)] = 1
study_dates_log_used = TRUE
} else {
# if midnight timestamp for the date is not available,
# do not trim the data and recover the firstmidnight value
firstmidnighti = dmidn$firstmidnighti
firstmidnight = dmidn$firstmidnight
# warning(paste0("The start date provided in the study dates file for ID = ",
# ID, "is not within the dates available in the recording. ",
# "The data was not trimmed at the beginning of the recording."), call. = FALSE)
}
# trim data at the end
if (length(lastmidnighti) > 0 & !is.na(lastmidnighti)) {
r4[lastmidnighti:nrow(r4)] = 1
study_dates_log_used = TRUE
} else {
# if midnight timestamp for the date is not available,
# do not trim the data and recover the lastmidnight value
lastmidnighti = dmidn$lastmidnighti
lastmidnight = dmidn$lastmidnight
# warning(paste0("The end date provided in the study dates file for ID = ",
# ID, "is not within the dates available in the recording. ",
# "The data was not trimmed at the end of the recording."), call. = FALSE)
}
# cut out r4 to apply strategies only on the trimmed portion of data
# after application of strategies, r4 would reset to the original length
if (study_dates_log_used == TRUE) {
r4_bu = r4 # backup copy of r4 before cutting it to be imputed later on
r4 = as.matrix(r4[which(r4[,1] == 0),])
}
} else if (length(rowID) == 0) {
warning(paste0("The ID ", ID, " does not appear in the study dates log ",
"the full recording has been considered within the study ",
"protocol selection"), call. = FALSE)
}
}
#===================================================================
# Select data based on data_masking_strategy
if (params_cleaning[["data_masking_strategy"]] == 1) { #protocol based data selection
if (params_cleaning[["hrs.del.start"]] > 0) {
r4[1:(params_cleaning[["hrs.del.start"]]*(3600/ws2))] = 1
}
if (params_cleaning[["hrs.del.end"]] > 0) {
if (length(r4) > params_cleaning[["hrs.del.end"]]*(3600/ws2)) {
r4[((length(r4) + 1) - (params_cleaning[["hrs.del.end"]]*(3600/ws2))):length(r4)] = 1
} else {
r4[1:length(r4)] = 1
}
}
if (LD < 1440) {
r4 = r4[1:floor(LD/(ws2/60))]
}
starttimei = 1
endtimei = length(r4)
} else if (params_cleaning[["data_masking_strategy"]] == 2) { #midnight to midnight data_masking_strategy
starttime = firstmidnight
endtime = lastmidnight
# only apply data_masking_strategy 2 if study dates log is not used for trimming the data,
# otherwise the data already start and finishes at midnight
if (study_dates_log_used == FALSE) {
starttimei = firstmidnighti
endtimei = lastmidnighti
if (firstmidnighti != 1) { #ignore everything before the first midnight
r4[1:(firstmidnighti - 1)] = 1 #-1 because first midnight 00:00 itself contributes to the first full day
}
r4[(lastmidnighti):length(r4)] = 1 #ignore everything after the last midnight
} else {
starttimei = 1
endtimei = length(r4)
}
} else if (params_cleaning[["data_masking_strategy"]] %in% c(3, 5)) { #select X most active days
#==========================================
# Look out for X most active days and use this to define window of interest
if (acc.metric %in% colnames(M$metashort)) {
atest = as.numeric(as.matrix(M$metashort[,acc.metric]))
} else {
acc.metric = grep("timestamp|angle", colnames(M$metashort),
value = TRUE, invert = TRUE)[1]
atest = as.numeric(as.matrix(M$metashort[,acc.metric]))
}
r2tempe = rep(r2, each = (ws2/ws3))
r1tempe = rep(r1, each = (ws2/ws3))
atest[which(r2tempe == 1 | r1tempe == 1)] = 0
if (params_cleaning[["data_masking_strategy"]] == 3) {
# Select the most active 24-h blocks by a rolling window of windowsizes[3]
NDAYS = length(atest) / n_ws3_perday
# rolling window in ws3
rolling_window = n_ws3_perhour * n_ws_perhour
pend = round((NDAYS - params_cleaning[["ndayswindow"]]) * n_ws_perday)
if (pend < 1) pend = 1
atestlist = rep(0, pend)
for (ati in 1:pend) {
p0 = (((ati - 1)*rolling_window) + 1)
p1 = (ati + (params_cleaning[["ndayswindow"]]*n_ws_perday)) * rolling_window #ndayswindow x quarter of a day = 1 week
if (p0 > length(atest)) p0 = length(atest)
if (p1 > length(atest)) p1 = length(atest)
if ((p1 - p0) > 1000) {
atestlist[ati] = mean(atest[p0:p1], na.rm = TRUE)
} else {
atestlist[ati] = 0
}
}
# atik is the ws2 index where the most active ndayswindow starts
atik = which(atestlist == max(atestlist))[1]
ignore_until_hours = (atik - 1) / (3600/ws2)
params_cleaning[["hrs.del.start"]] = ignore_until_hours + params_cleaning[["hrs.del.start"]]
ignore_from_hours = (atik - 1) / (3600/ws2)
params_cleaning[["maxdur"]] = ((ignore_from_hours/24) + params_cleaning[["ndayswindow"]]) - (params_cleaning[["hrs.del.end"]]/24)
if (params_cleaning[["maxdur"]] > NDAYS) params_cleaning[["maxdur"]] = NDAYS
# now calculate r4
if (params_cleaning[["hrs.del.start"]] > 0) {
r4[1:(params_cleaning[["hrs.del.start"]]*(3600/ws2))] = 1
}
# if (params_cleaning[["hrs.del.end"]] > 0) {
# if (length(r4) > params_cleaning[["hrs.del.end"]]*(3600/ws2)) {
# r4[((length(r4) + 1) - (params_cleaning[["hrs.del.end"]]*(3600/ws2))):length(r4)] = 1
# } else {
# r4[1:length(r4)] = 1
# }
# }
if (params_cleaning[["maxdur"]] > 0 & (length(r4) > ((params_cleaning[["maxdur"]]*n_ws2_perday) + 1))) {
ignore_from = (params_cleaning[["maxdur"]]*n_ws2_perday) + 1
r4[ignore_from:length(r4)] = 1
}
if (LD < 1440) {
r4 = r4[1:floor(LD/(ws2/60))]
}
} else if (params_cleaning[["data_masking_strategy"]] == 5) {
# Select the most active calendar days
atestlist = c()
# readjust midnightsi if study dates log used for trimming the data
if (study_dates_log_used == TRUE) {
midnights2keep = which(midnightsi >= firstmidnighti & midnightsi <= lastmidnighti)
midnightsi = midnightsi[midnights2keep]
}
for (ati in 1:length(midnightsi)) {
p0 = ((midnightsi[ati] * ws2/ws3) - ws2/ws3) + 1
p1 = ((midnightsi[ati + params_cleaning[["ndayswindow"]]] * ws2/ws3) - ws2/ws3)
if (is.na(p1)) break
if (p1 > length(atest)) break
atestlist[ati] = mean(atest[p0:p1], na.rm = TRUE)
}
# atik is the index where the most active ndayswindow starts
atik = 1 # intitialise atik as 1, which will be used in case ndayswindow is longer than recording days
if (!is.null(atestlist)) atik = which(atestlist == max(atestlist))[1]
#ignore everything before the first ndayswindow midnight plus hrs.del.start
ignore_until = (midnightsi[atik]) + (params_cleaning[["hrs.del.start"]]*(3600/ws2)) - 1 # minus 1 for not ignoring the first epoch in ndayswindow
if (ignore_until > 0) {
if (study_dates_log_used == TRUE) {
ignore_until = ignore_until - firstmidnighti + 1
}
r4[1:ignore_until] = 1
}
#ignore everything after the last midnight plus hrs.del.end
ignore_from = midnightsi[atik + params_cleaning[["ndayswindow"]]] - (params_cleaning[["hrs.del.end"]]*(3600/ws2))
if (study_dates_log_used == TRUE) {
ignore_from = ignore_from - firstmidnighti + 1
if (ignore_from >= length(r4)) ignore_from = length(r4)
}
# if ndayswindow is higher than number of midnights, use last midnight
if (is.na(ignore_from)) ignore_from = midnightsi[length(midnightsi)] - (params_cleaning[["hrs.del.end"]]*(3600/ws2))
# if ignore_from == length(r4) it means that recording ends at midnight
# and the code should not ignore anything at the end of the recording
if (ignore_from < length(r4)) r4[ignore_from:length(r4)] = 1
}
starttimei = 1
endtimei = length(r4)
} else if (params_cleaning[["data_masking_strategy"]] == 4) { #from first midnight to end of recording
starttime = firstmidnight
endtime = lastmidnight
# only apply data_masking_strategy 4 if study dates log is not used for trimming the data,
# otherwise the data already start and finishes at midnight
if (study_dates_log_used == FALSE) {
starttimei = firstmidnighti
endtimei = lastmidnighti
if (firstmidnighti != 1) { #ignore everything before the first midnight
r4[1:(firstmidnighti - 1)] = 1 # -1 because first midnight 00:00 itself contributes to the first full day
}
} else {
starttimei = 1
endtimei = length(r4)
}
}
# Mask data based on maxdur
if (params_cleaning[["maxdur"]] > 0 & (length(r4) > ((params_cleaning[["maxdur"]]*n_ws2_perday) + 1))) {
r4[((params_cleaning[["maxdur"]]*n_ws2_perday) + 1):length(r4)] = 1
}
# Mask data based on max_calendar_days
if (params_cleaning[["max_calendar_days"]] > 0) {
# get dates that are part of the study protocol
if (study_dates_log_used == TRUE) {
dates = as.Date(iso8601chartime2POSIX(M$metalong$timestamp[firstmidnighti:(lastmidnighti - 1)], tz = desiredtz))
} else {
dates = as.Date(iso8601chartime2POSIX(M$metalong$timestamp, tz = desiredtz))
}
if (params_cleaning[["max_calendar_days"]] < length(unique(dates))) {
lastDateToInclude = sort(unique(dates))[params_cleaning[["max_calendar_days"]]]
r4[which(dates > lastDateToInclude)] = 1
}
}
#===================================================================
# if data selected based on study dates log, recover the original length of r4
if (study_dates_log_used == TRUE) {
r4_bu[which(r4_bu == 0),] = r4
r4 = r4_bu
}
#========================================================================================
# Impute ws3 second data based on ws2 minute estimates of non-wear time
r5 = r1 + r2 + r3 + r4
r5[which(r5 > 1) ] = 1
r5[which(metalong$nonwearscore == -1) ] = -1 # expanded data with expand_tail_max_hours
r5long = matrix(0,length(r5),(ws2/ws3)) #r5long is the same as r5, but with more values per period of time
r5long = replace(r5long,1:length(r5long),r5)
r5long = t(r5long)
dim(r5long) = c((length(r5)*(ws2/ws3)),1)
#------------------------------
# detect which features have been calculated in part 1 and in what column they have ended up
ENi = which(colnames(metashort) == "en")
if (length(ENi) == 0) ENi = -1
#==============================
if (nrow(metashort) > length(r5long)) {
metashort = as.matrix(metashort[1:length(r5long),])
}
wpd = 1440*n_ws3_permin #windows per day
averageday = matrix(0,wpd,(ncol(metashort) - 1))
for (mi in 2:ncol(metashort)) {# generate 'average' day for each variable
# The average day is used for imputation and defined relative to the starttime of the measurement
# irrespective of dayborder as used in other parts of GGIR
metr = as.numeric(as.matrix(metashort[, mi]))
is.na(metr[which(r5long != 0)]) = T #turn all values of metr to na if r5long is different to 0 (it now leaves the expanded time with expand_tail_max out of the averageday calculation)
imp = matrix(NA,wpd,ceiling(length(metr)/wpd)) #matrix used for imputation of seconds
ndays = ncol(imp) #number of days (rounded upwards)
nvalidsec = matrix(0,wpd,1)
dcomplscore = length(which(r5 == 0)) / length(r5)
if (ndays > 1 ) { # only do imputation if there is more than 1 day of data #& length(which(r5 == 1)) > 1
# all days except last one
for (j in 1:(ndays - 1)) {
imp[,j] = as.numeric(metr[(((j - 1)*wpd) + 1):(j*wpd)])
}
# last day
lastday = metr[(((ndays - 1)*wpd) + 1):length(metr)]
imp[1:length(lastday),ndays] = as.numeric(lastday)
if (colnames(metashort)[mi] == "step_count") {
# Median per row, which equals to median for one time point in the 'average' day
# (median day would be a better term in this context)
# chances are high that this will often be zero, because a person
# would have to walk on a certain time point in the day for more than half of
# each day in the to get a median above zero
imp3 = apply(imp, 1, median, na.rm = TRUE)
} else {
# mean per row, which equals to mean or one time point in the 'average' day
imp3 = rowMeans(imp, na.rm = TRUE)
}
dcomplscore = length(which(is.nan(imp3) == F | is.na(imp3) == F)) / length(imp3)
if (length(imp3) < wpd) {
dcomplscore = dcomplscore * (length(imp3)/wpd)
}
if (ENi == mi) { #replace missing values for EN by 1
imp3[which(is.nan(imp3) == T | is.na(imp3) == T)] = 1
} else { #replace missing values for other metrics by 0
imp3[which(is.nan(imp3) == T | is.na(imp3) == T)] = 0 # for those part of the data where there is no single data point for a certain part of the day (this is CRITICAL)
}
averageday[, (mi - 1)] = imp3
for (j in 1:ndays) {
missing = which(is.na(imp[,j]) == T)
if (length(missing) > 0) {
imp[missing,j] = imp3[missing]
}
}
# imp is now the imputed time series
dim(imp) = c(length(imp),1)
# but do not use imp for expanded time
toimpute = which(r5long != -1) # do not impute the expanded time with expand_tail_max_hours
metashort[toimpute, mi] = as.numeric(imp[toimpute]) #to cut off the latter part of the last day used as a dummy data
} else {
dcomplscore = length(which(r5long == 0))/wpd
}
}
n_decimal_places = 4
metashort[,2:ncol(metashort)] = round(metashort[,2:ncol(metashort)], digits = n_decimal_places)
rout = data.frame(r1 = r1, r2 = r2, r3 = r3, r4 = r4, r5 = r5, stringsAsFactors = TRUE)
invisible(list(metashort = metashort, rout = rout, r5long = r5long, dcomplscore = dcomplscore,
averageday = averageday, windowsizes = windowsizes, data_masking_strategy = params_cleaning[["data_masking_strategy"]],
LC = LC, LC2 = LC2, hrs.del.start = params_cleaning[["hrs.del.start"]], hrs.del.end = params_cleaning[["hrs.del.end"]],
maxdur = params_cleaning[["maxdur"]], nonwearHoursFiltered = nonwearHoursFiltered,
nonwearEventsFiltered = nonwearEventsFiltered))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.