R/g.report.part5.R

Defines functions g.report.part5

Documented in g.report.part5

g.report.part5 = function(metadatadir = c(), f0 = c(), f1 = c(), loglocation = c(),
                          params_cleaning = NULL,
                          LUX_day_segments = c(), params_output,
                          verbose = TRUE) {
  # description: function to load milestone data generated by g.part5 and to merge these into a spreadsheet
  # here no additional information of analyses are added. This function therefore is 
  # primary to wrap up the output from parallel processed accelerometer files.
  
  if (params_cleaning[["includedaycrit.part5"]] > 1) {
    # I put this warning here, and not in check_params because that would generate
    # the warning every time the check_params is used across GGIR.
    warning(paste0("\nNote that the behaviour of parameter includedaycrit.part5 ",
                   "has changed for values above 1. These are now treated as the mimimum ",
                   "absolute number of valid hours during the waking hours of a day.",
                   "If you prefer to keep the old functionality then divide ",
                   "your current value (which is above 1) by 24."), call. = FALSE)
  }
  getValidDayIndices = function(x, window, params_cleaning) {
    if (window != "Segments") {
      if (params_cleaning[["includedaycrit.part5"]] >= 0 &
          params_cleaning[["includedaycrit.part5"]] <= 1) { # if includedaycrit.part5 is used as a ratio
        includeday_wearPercentage = params_cleaning[["includedaycrit.part5"]] * 100
        includeday_absolute = 0
      } else if (params_cleaning[["includedaycrit.part5"]] > 1 &
                 params_cleaning[["includedaycrit.part5"]] <= 25) { # if includedaycrit.part5 is used like params_cleaning[["includedaycrit"]] as a number of hours
        includeday_wearPercentage = 0
        includeday_absolute = params_cleaning[["includedaycrit.part5"]] * 60
      }
      if (params_cleaning[["includenightcrit.part5"]] >= 0 &
          params_cleaning[["includenightcrit.part5"]] <= 1) { # if includenightcrit.part5 is used as a ratio
        includenight_wearPercentage = params_cleaning[["includenightcrit.part5"]] * 100
        includenight_absolute = 0
      } else if (params_cleaning[["includenightcrit.part5"]] > 1 &
                 params_cleaning[["includenightcrit.part5"]] <= 25) { # if includenightcrit.part5 is used like params_cleaning[["includenightcrit"]] as a number of hours
        includenight_wearPercentage = 0
        includenight_absolute = params_cleaning[["includenightcrit.part5"]] * 60
      }
      include_window = rep(TRUE, nrow(x))
      if (length(params_cleaning[["data_cleaning_file"]]) > 0) { # allow for forced relying on guider based on external params_cleaning[["data_cleaning_file"]]
        DaCleanFile = data.table::fread(params_cleaning[["data_cleaning_file"]], data.table = FALSE)
        days2exclude = which(paste(x$ID, x$window_number) %in% paste(DaCleanFile$ID, DaCleanFile$day_part5))
        if (length(days2exclude) > 0) {
          include_window[days2exclude] = FALSE
        }
      } else {
        include_window = rep(TRUE,nrow(x))
      }
      # Below we intentionally only set a criteria on daytime, because for
      # the night time we only need start and end of the SPT window.
      
      # Add extra columns to ease the check
      x$nonwear_perc_day_spt = as.numeric(x$nonwear_perc_day_spt)
      x$nonwear_perc_day = as.numeric(x$nonwear_perc_day)
      x$nonwear_perc_spt = as.numeric(x$nonwear_perc_spt)
      x$wear_min_day_spt = (1 - (x$nonwear_perc_day_spt / 100)) * x$dur_day_spt_min #valid minute during waking hours
      
      x$wear_min_day = (1 - (x$nonwear_perc_day / 100)) * x$dur_day_min #valid minute during waking hours
      x$wear_perc_day = 100 - x$nonwear_perc_day #wear percentage during waking hours
      x$wear_min_spt = (1 - (x$nonwear_perc_spt / 100)) * x$dur_spt_min #valid minute during waking hours
      x$wear_perc_spt = 100 - x$nonwear_perc_spt #wear percentage during waking hours

      x$lastHour = as.numeric(x$lastHour)
      x$calendar_date = as.Date(x$calendar_date)

      minimumValidMinutesMM = 0 # default
      if (length(params_cleaning[["includedaycrit"]]) == 2) {
        minimumValidMinutesMM = params_cleaning[["includedaycrit"]][2] * 60
      }
      if (params_output[["require_complete_lastnight_part5"]] == FALSE) {
        x$lastWindow = FALSE
        x$lastDate = x$calendar_date
      } else {
        x$lastWindow = x$window_number == max(x$window_number)
        x$lastDate = as.Date(x$lastDate)
      }
      if (window == "WW" | window == "OO") {
        indices = which(x$wear_perc_day >= includeday_wearPercentage &
                          x$wear_min_day >= includeday_absolute &
                          x$wear_perc_spt >= includenight_wearPercentage &
                          x$wear_min_spt >= includenight_absolute &
                          x$dur_spt_min > 0 & x$dur_day_min > 0 &
                          ((x$lastWindow == TRUE & x$lastHour >= 15 & (x$lastDate - x$calendar_date) >= -1 & window == "WW") |
                             (x$lastWindow == TRUE & x$lastHour >= 9 & (x$lastDate - x$calendar_date) >= -1 & window == "OO") |
                             x$lastWindow == FALSE) &
                          include_window == TRUE &
                          x$wear_min_day_spt >= minimumValidMinutesMM)
      } else if (window == "MM") {
        indices = which(x$wear_perc_day >= includeday_wearPercentage &
                          x$wear_min_day >= includeday_absolute &
                          x$wear_perc_spt >= includenight_wearPercentage &
                          x$wear_min_spt >= includenight_absolute &
                          x$dur_spt_min > 0 & x$dur_day_min > 0 &
                          ((x$lastWindow == TRUE & x$lastHour > 9 & (x$lastDate - x$calendar_date) >= -1) |
                             x$lastWindow == FALSE) &
                          x$dur_day_spt_min >= (params_cleaning[["minimum_MM_length.part5"]] * 60) &
                          include_window == TRUE &
                          x$wear_min_day_spt >= minimumValidMinutesMM)
        # Note: By default for MM analysis only full days are interesting (23 hours for one day in the year)
      }
    } else if (window == "Segments") {
      # clean based on segments (even if a day is not valid, a certain segment of that
      # day could be valid if participant wore device enough in that part of the day)
      maxpernwday = 100 - (params_cleaning[["segmentWEARcrit.part5"]] * 100)
      include_window = rep(TRUE, nrow(x))
      indices = which(as.numeric(x$nonwear_perc_day_spt) <= maxpernwday &
                        as.numeric(x$dur_day_min) / as.numeric(x$dur_day_spt_min) >= params_cleaning[["segmentDAYSPTcrit.part5"]][1] &
                        as.numeric(x$dur_spt_min) / as.numeric(x$dur_day_spt_min) >= params_cleaning[["segmentDAYSPTcrit.part5"]][2] &
                        include_window == TRUE)
    }
    # exclude first and last window?
    if (params_cleaning[["excludefirstlast.part5"]] == TRUE) {
      x$window_number = as.numeric(x$window_number)
      # identify first and last day per file
      first_days = aggregate(window_number ~ filename, data = x, FUN = min, na.rm = TRUE)
      last_days = aggregate(window_number ~ filename, data = x, FUN = max, na.rm = TRUE)
      
      # match first and last days with the output dataframe
      exclude_firsts = which(paste(x$filename, x$window_number) %in% paste(first_days$filename, first_days$window_number))
      exclude_lasts = which(paste(x$filename, x$window_number) %in% paste(last_days$filename, last_days$window_number))
      
      # keep only indices that do not match with first and last days
      indices2exclude = which(indices %in% c(exclude_firsts, exclude_lasts))
      if (length(indices2exclude) > 0) indices = indices[-indices2exclude]
    }
    return(indices)
  }
  ms5.out = "/meta/ms5.out"
  if (file.exists(paste(metadatadir, ms5.out, sep = ""))) {
    if (length(dir(paste(metadatadir, ms5.out, sep = ""))) == 0) {
      try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
    } else {
      try.generate.report = TRUE
    }
  } else {
    try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
    warning(paste0('Cannot generate part5 report because no part 5 milestone data",
                   " is available. First run part 5 with argument mode=5.'))
  }
  if (try.generate.report == TRUE) {
    #======================================================================
    # loop through meta-files
    fnames.ms5 = list.files(paste0(metadatadir, ms5.out), full.names = TRUE)
    if (f1 > length(fnames.ms5)) {
      f1 = length(fnames.ms5)
    }
    if (verbose == TRUE) {
      cat(paste0(" loading all the milestone data from part 5 this can",
                 " take a few minutes\n"))
    }
    myfun = function(x, expectedCols = c()) {
      tail_expansion_log = last_timestamp = output = NULL
      load(file = x)
      cut = which(output[, 1] == "")
      if (length(cut) > 0 & length(cut) < nrow(output)) {
        output = output[-cut, which(colnames(output) != "")]
      }
      if (exists("last_timestamp") == TRUE) {
        output$lastHour = as.numeric(format(last_timestamp, "%H"))
        output$lastDate = as.Date(last_timestamp)
      } else {
        output$lastHour = Inf # use dummy value
        output$lastDate = Inf # use dummy value
      }
      out = as.matrix(output)
      if (length(expectedCols) > 0) {
        tmp = as.data.frame(matrix(0, 0, length(expectedCols)))
        colnames(tmp) = expectedCols
        out = base::merge(tmp, out, all = TRUE)
      }
      if (!is.null(expectedCols) && ncol(out) > length(expectedCols)) {
        warning(paste0("Columns dropped in output part5 for ",
                       basename(x),
                       " because these could not be matched to columns for earlier",
                       " recordings in this dataset. Please check whether these recordings ",
                       " were processed with a",
                       " different GGIR version or configuration. If yes, reprocess",
                       " consistently. If not, ",
                       " consider renaming this file such that it is",
                       " alphabetically first and by that processed first, which",
                       " should address the issue."), call. = FALSE)
      }
      if (length(tail_expansion_log) != 0) {
        col2na = grep(pattern = paste0("sleep_efficiency|N_atleast5minwakenight|daysleeper|",
                                       "daysleeper|sleeplog_used|_spt_sleep|_spt_wake"),
                      x = names(out), value = FALSE)
        window_number = as.numeric(out[,"window_number"])
        lastwindow_indices = which(window_number == max(window_number, na.rm = TRUE))
        if (length(col2na) > 0 & length(lastwindow_indices) > 0) {
          out[lastwindow_indices, col2na] = "" # set last row to NA for all sleep related variables
        }
      }
      return(out)
    }
    # It can be that first part 5 milestone file has less columns when it 
    # was processed with a different GGIR version or configuration.
    # This would then complicate
    # appending later milestone files to it. So, check first a random sample for highest number
    # of columns. If this fails to identify a file with the maximum number of
    # column then the above function will provide a warning to
    # inform the user and with instructions on how to address it.
    expectedCols = NULL
    set.seed(1234)
    # Create list of file indices to try
    testfiles = unique(sample(x = f0:f1, size = pmin(5, f1 - f0 + 1)))
    for (testf in testfiles) {
      out_try = myfun(fnames.ms5[testf])
      if (ncol(out_try) > length(expectedCols)) {
        # expectedCols should equal the column names of the milestone
        # file with the largest number of columns
        expectedCols = colnames(out_try) 
      }
    }
    # Now load and append all files
    outputfinal = as.data.frame(do.call(rbind,
                                        lapply(fnames.ms5[f0:f1], myfun, expectedCols)),
                                stringsAsFactors = FALSE)
    
    # Find columns filled with missing values
    cut = which(sapply(outputfinal, function(x) all(x == "")) == TRUE)
    if (length(cut) > 0) {
      outputfinal = outputfinal[,-cut]
    }
    
    # revise filename
    outputfinal$filename = gsub(".RData$", "", outputfinal$filename)

    # order data.frame
    outputfinal$window_number = as.numeric(gsub(" ", "", outputfinal$window_number))
    outputfinal = outputfinal[order(outputfinal$filename, outputfinal$window_number, outputfinal$window), ]

    # split results to different spreadsheets in order to minimize individual
    # filesize and to ease organising dataset
    uwi = as.character(unique(outputfinal$window))
    if (!all(uwi %in% c("MM", "WW", "OO"))) {
      uwi = c(uwi[uwi %in% c("MM", "WW", "OO")], "Segments")
    }
    uTRLi = as.character(unique(outputfinal$TRLi))
    uTRMi = as.character(unique(outputfinal$TRMi))
    uTRVi = as.character(unique(outputfinal$TRVi))
    usleepparam = as.character(unique(outputfinal$sleepparam))
    # replace NaN by empty cell value
    for (kra in 1:ncol(outputfinal)) {
      krad = which(is.nan(outputfinal[,kra]) == TRUE)
      if (length(krad) > 0) {
        outputfinal[krad,kra] = ""
      }
    }
    outputfinal$daytype = 0
    outputfinal$daytype[which(outputfinal$weekday == "Sunday" |
                                outputfinal$weekday == "Saturday")] = "WE"
    outputfinal$daytype[which(outputfinal$weekday == "Monday" |
                                outputfinal$weekday == "Tuesday" |
                                outputfinal$weekday == "Wednesday" |
                                outputfinal$weekday == "Thursday" |
                                outputfinal$weekday == "Friday")] = "WD"
    outputfinal$nonwear_perc_day = as.numeric(outputfinal$nonwear_perc_day)
    outputfinal$nonwear_perc_spt = as.numeric(outputfinal$nonwear_perc_spt)
    outputfinal$dur_spt_min = as.numeric(outputfinal$dur_spt_min)
    outputfinal$dur_day_min = as.numeric(outputfinal$dur_day_min)
    outputfinal$guider = as.character(outputfinal$guider)
    outputfinal$sleeplog_used = as.numeric(outputfinal$sleeplog_used)
    outputfinal$dur_spt_min = as.numeric(outputfinal$dur_spt_min)
    outputfinal$dur_day_min = as.numeric(outputfinal$dur_day_min)
    outputfinal$dur_day_spt_min = as.numeric(outputfinal$dur_day_spt_min)
    # loop to store various variants of the analysis separately
    outputfinal_bu = outputfinal
    if (verbose == TRUE) cat(" generating csv report for every parameter configurations...\n")
    for (j in 1:length(uwi)) {
      for (h1 in 1:length(uTRLi)) {
        for (h2 in 1:length(uTRMi)) {
          for (h3 in 1:length(uTRVi)) {
            for (h4 in 1:length(usleepparam)) {
              if (verbose == TRUE) {
                cat(paste0(" ", uwi[j], "-", uTRLi[h1], "-", uTRMi[h2],
                           "-", uTRVi[h3], "-", usleepparam[h4]))
              }
              select_window = as.character(outputfinal$window) == uwi[j]
              if (!(uwi[j] %in% c("MM", "WW", "OO"))) select_window = !(as.character(outputfinal$window) %in% c("MM", "WW", "OO"))
              seluwi = which(select_window &
                               as.character(outputfinal$TRLi) == uTRLi[h1] &
                               as.character(outputfinal$TRMi) == uTRMi[h2] &
                               as.character(outputfinal$TRVi) == uTRVi[h3] &
                               as.character(outputfinal$sleepparam) == usleepparam[h4])
              # store spreadsheet
              if (nrow(outputfinal[seluwi,]) == 0) {
                if (verbose == TRUE) cat("report not stored, because no results available")
              } else {
                CN = colnames(outputfinal)
                outputfinal2 = outputfinal
                colnames(outputfinal2) = CN
                delcol = grep(pattern = "TRLi|TRMi|TRVi|sleepparam",
                              x = colnames(outputfinal2))
                if (uwi[j] != "Segments") {
                  delcol = c(delcol, which(colnames(outputfinal2) == "window"))
                }
                outputfinal2 = outputfinal2[,-delcol]
                OF3 = outputfinal2[seluwi,]
                OF3 = as.data.frame(OF3, stringsAsFactors = TRUE)
                #-------------------------------------------------------------
                # store all summaries in csv files without cleaning criteria
                OF3_clean = tidyup_df(OF3)
                data.table::fwrite(
                  OF3_clean,
                  paste(
                    metadatadir,
                    "/results/QC/part5_daysummary_full_",
                    uwi[j], "_L", uTRLi[h1], "M", uTRMi[h2], "V", uTRVi[h3],
                    "_", usleepparam[h4], ".csv", sep = ""), row.names = FALSE, na = "",
                  sep = params_output[["sep_reports"]],
                  dec = params_output[["dec_reports"]])
                # store all summaries in csv files with cleaning criteria
                validdaysi = getValidDayIndices(x = OF3_clean, window = uwi[j],
                                                params_cleaning = params_cleaning)
                if ("lastHour" %in% colnames(OF3_clean)) {
                  OF3_clean = OF3_clean[, -which(colnames(OF3_clean) %in% c("lastHour", "lasteDate"))]
                }
                if (length(validdaysi) > 0) {
                  data.table::fwrite(
                    OF3_clean[validdaysi, ],
                    paste(metadatadir, "/results/part5_daysummary_",
                          uwi[j], "_L", uTRLi[h1], "M", uTRMi[h2], "V", uTRVi[h3],
                          "_", usleepparam[h4], ".csv", sep = ""), row.names = FALSE, na = "",
                    sep = params_output[["sep_reports"]],
                    dec = params_output[["dec_reports"]])
                }
                #------------------------------------------------------------------------------------
                #also compute summary per person
                agg_plainNweighted = function(df,
                                              filename = "filename",
                                              daytype = "daytype",
                                              window = "MM") {
                  # function to take both the weighted (by weekday/weekendday) and plain average of all numeric variables
                  # df: input data.frame (OF3 outside this function)
                  
                  ignorevar = c("daysleeper", "cleaningcode", "night_number", "sleeplog_used",
                                "ID", "acc_available", "window_number",
                                "boutcriter.mvpa", "boutcriter.lig", "boutcriter.in", "bout.metric") # skip cosinor variables
                  for (ee in 1:ncol(df)) { # make sure that numeric columns have class numeric
                    nr = nrow(df)
                    if (nr > 30) nr = 30
                    options(warn = -1)
                    trynum = as.numeric(as.character(df[1:nr, ee]))
                    options(warn = 0)
                    if (length(which(is.na(trynum) == TRUE)) != nr &
                        length(which(ignorevar == names(df)[ee])) == 0) {
                      options(warn = -1)
                      class(df[, ee]) = "numeric"
                      options(warn = 0)
                    }
                  }
                  plain_mean = function(x) {
                    options(warn = -1)
                    plain_mean = mean(x, na.rm = TRUE)
                    options(warn = 0)
                    if (is.na(plain_mean) == TRUE) {
                      plain_mean = x[1]
                    }
                    return(plain_mean)
                  }
                  # aggregate across all days
                  if (window == "Segments") {
                    by = list(df$filename, df$window)
                  } else {
                    by = list(df$filename)
                  }
                  PlainAggregate = aggregate.data.frame(df, by = by, FUN = plain_mean)
                  PlainAggregate = PlainAggregate[, -grep("^Group", colnames(PlainAggregate))]
                  # aggregate per day type (weekday or weekenddays)
                  if (window == "Segments") {
                    by = list(df$filename, df$window, df$daytype)
                  } else {
                    by = list(df$filename, df$daytype)
                  }
                  AggregateWDWE = aggregate.data.frame(df, by = by, plain_mean)
                  AggregateWDWE = AggregateWDWE[, -grep("^Group", colnames(AggregateWDWE))]
                  # Add counted number of days for Gini, Cov, alpha Fragmentation variables, because 
                  # days are dropped if there are not enough fragments:
                  vars_with_mininum_Nfrag = c("FRAG_Gini_dur_PA_day", "FRAG_CoV_dur_PA_day",
                                              "FRAG_alpha_dur_PA_day", "FRAG_Gini_dur_IN_day",
                                              "FRAG_CoV_dur_IN_day")
                  vars_with_mininum_Nfrag_i = which(vars_with_mininum_Nfrag %in% colnames(df) == TRUE)
                  if (length(vars_with_mininum_Nfrag_i) > 0) {
                    varname_minfrag = vars_with_mininum_Nfrag[vars_with_mininum_Nfrag_i[1]]
                    DAYCOUNT_Frag_Multiclass = aggregate.data.frame(
                      df[, varname_minfrag],
                      by = by,
                      FUN = function(x) length(which(is.na(x) == FALSE))
                    )
                    if (window != "Segments") {
                      colnames(DAYCOUNT_Frag_Multiclass)[1:2] = c("filename","daytype")
                      # AL10F, abbreviation for: at least 10 fragments
                      colnames(DAYCOUNT_Frag_Multiclass)[3] = "Nvaliddays_AL10F" 
                      by.x = c("filename", "daytype")
                    } else if (window == "Segments") {
                      colnames(DAYCOUNT_Frag_Multiclass)[1:3] = c("filename","window", "daytype")
                      # AL10F, abbreviation for: at least 10 fragments
                      colnames(DAYCOUNT_Frag_Multiclass)[4] = "Nvalidsegments_AL10F"
                      by.x = c("filename","window", "daytype")
                    }
                    AggregateWDWE = merge(AggregateWDWE, DAYCOUNT_Frag_Multiclass, by.x = by.x)
                  }
                  len = NULL
                  AggregateWDWE$len <- 0
                  AggregateWDWE$len[which(as.character(AggregateWDWE$daytype) == "WD")] = 5 #weighting of weekdays
                  AggregateWDWE$len[which(as.character(AggregateWDWE$daytype) == "WE")] = 2 #weighting of weekend days
                  if (window == "Segments") {
                    dt <- data.table::as.data.table(AggregateWDWE[,which(lapply(AggregateWDWE, class) == "numeric" | 
                                                                           names(AggregateWDWE) == filename |
                                                                           names(AggregateWDWE) == "window")])
                  } else {
                    dt <- data.table::as.data.table(AggregateWDWE[,which(lapply(AggregateWDWE, class) == "numeric" | 
                                                                           names(AggregateWDWE) == filename)])
                  }
                  
                  options(warn = -1)
                  .SD <- .N <- count <- a <- NULL
                  if (window == "Segments") {
                    WeightedAggregate <- dt[, lapply(.SD, weighted.mean, w = len, na.rm = TRUE), by = list(filename, window)]
                  } else {
                    WeightedAggregate <- dt[, lapply(.SD, weighted.mean, w = len, na.rm = TRUE), by = list(filename)]
                  }
                  options(warn = 0)
                  LUXmetrics = c("above1000", "timeawake", "mean", "imputed", "ignored")
                  add_missing_LUX = function(x, LUX_day_segments, weeksegment = c(), LUXmetrics) {
                    # missing columns, add these:
                    NLUXseg = length(LUX_day_segments)
                    if (length(weeksegment) > 0) {
                      LUX_segment_vars_expected = paste0("LUX_", LUXmetrics, "_",
                                                         LUX_day_segments[1:(NLUXseg - 1)],
                                                         "-", LUX_day_segments[2:(NLUXseg)],
                                                         "hr_day_", weeksegment)
                    } else {
                      luxvars = paste0("LUX_", LUXmetrics, "_")
                      segments = paste0(LUX_day_segments[1:(NLUXseg - 1)], 
                                        "_", LUX_day_segments[2:(NLUXseg)], "hr_day")
                      LUX_segment_vars_expected = c()
                      for (luxi in 1:length(luxvars)) {
                        LUX_segment_vars_expected = c(LUX_segment_vars_expected,
                                                      paste0(luxvars[luxi], segments))
                      }
                    }
                    dummy_df = as.data.frame(matrix(NaN, 1, length(LUX_segment_vars_expected)))
                    colnames(dummy_df) = LUX_segment_vars_expected
                    if (length(which(LUX_segment_vars_expected %in% colnames(x))) > 0) {
                      x = as.data.frame(merge(x, dummy_df, all.x = T))
                      # re-order
                      current_location = which(colnames(x) %in% LUX_segment_vars_expected == TRUE)
                      neworder = sort(colnames(x)[current_location])
                      x = cbind(x[,-current_location], x[,LUX_segment_vars_expected])
                    }
                    return(x)
                  }
                  LUX_segment_vars = c()
                  for (li in 1:length(LUXmetrics)) {
                    LUX_segment_vars = c(LUX_segment_vars, grep(
                      pattern = paste0("LUX_", LUXmetrics[li]),
                      x = colnames(WeightedAggregate),
                      value = TRUE
                    ))
                  }
                  if (length(LUX_segment_vars) > 0 &
                      length(LUX_segment_vars) < 24 &
                      length(LUX_day_segments) > 0) {
                    WeightedAggregate = add_missing_LUX(
                      WeightedAggregate,
                      LUX_day_segments,
                      weeksegment = c(),
                      LUXmetrics = LUXmetrics
                    )
                  }
                  # merge them into one output data.frame (G)
                  LUX_segment_vars = c()
                  for (li in 1:length(LUXmetrics)) {
                    LUX_segment_vars = colnames(PlainAggregate) %in% grep(
                      x = colnames(PlainAggregate),
                      pattern = paste0("LUX_", LUXmetrics[li]),
                      value = TRUE
                    )
                  }
                  charcol = which(
                    lapply(PlainAggregate, class) != "numeric" &
                      names(PlainAggregate) != filename & !(LUX_segment_vars)
                  )
                  numcol = which(lapply(PlainAggregate, class) == "numeric" | LUX_segment_vars)
                  WeightedAggregate = as.data.frame(WeightedAggregate, stringsAsFactors = TRUE)
                  
                  if (window == "Segments") {
                    by = c("filename", "window")
                  } else {
                    by = "filename"
                  }
                  G = base::merge(PlainAggregate,
                                  WeightedAggregate,
                                  by = by,
                                  all.x = TRUE)
                  p0b = paste0(names(PlainAggregate[, charcol]), ".x")
                  p1 = paste0(names(PlainAggregate[, numcol]), ".x")
                  p2 = paste0(names(PlainAggregate[, numcol]), ".y")
                  for (i in 1:length(p0b)) {
                    names(G)[which(names(G) == p0b[i])] = paste0(names(PlainAggregate[, charcol])[i])
                  }
                  for (i in 1:length(p1)) {
                    names(G)[which(names(G) == p1[i])] = paste0(names(PlainAggregate[, numcol])[i], "_pla")
                  }
                  for (i in 1:length(p2)) {
                    names(G)[which(names(G) == p2[i])] = paste0(names(PlainAggregate[, numcol])[i], "_wei")
                  }
                  # expand output with weekday (WD) and weekend (WE) day aggregates
                  if (params_output[["week_weekend_aggregate.part5"]] == TRUE) {
                    for (weeksegment in c("WD", "WE")) {
                      temp_aggregate = AggregateWDWE[which(AggregateWDWE$daytype == weeksegment), ]
                      charcol = which(lapply(temp_aggregate, class) != "numeric" &
                                        names(temp_aggregate) != filename)
                      numcol = which(lapply(temp_aggregate, class) %in% c("numeric", "integer") == TRUE)
                      names(temp_aggregate)[numcol] = paste0(names(temp_aggregate)[numcol], "_", weeksegment)
                      if (window == "Segments") {
                        temp_aggregate = temp_aggregate[, c(which(colnames(temp_aggregate) == "filename" |
                                                                    colnames(temp_aggregate) == "window"), numcol)]
                      } else {
                        temp_aggregate = temp_aggregate[, c(which(colnames(temp_aggregate) == "filename"), numcol)]
                      }
                      LUX_segment_vars = c()
                      for (li in 1:length(LUXmetrics)) {
                        LUX_segment_vars = grep(
                          pattern = paste0("LUX_", LUXmetrics[li]),
                          x = colnames(temp_aggregate),
                          value = TRUE
                        )
                      }
                      if (length(LUX_segment_vars) > 0 &
                          length(LUX_segment_vars) < 24 & length(LUX_day_segments) > 0) {
                        temp_aggregate = add_missing_LUX(temp_aggregate, LUX_day_segments, weeksegment, LUXmetrics)
                      }
                      G = base::merge(G, temp_aggregate,
                                      by = by, all.x = TRUE)
                    }
                  }
                  G = G[,-which(names(G) %in% c("len", "daytype", "len_WE", "len_WD"))]
                  return(G)
                }
                #---------------------------------------------
                # Calculate, weighted and plain mean of all variables
                # add column to define what are weekenddays and weekdays as needed for function agg_plainNweighted
                # before processing OF3, first identify which days have enough monitor wear time
                
                validdaysi = getValidDayIndices(x = OF3, window = uwi[j],
                                                params_cleaning = params_cleaning)
                if (length(validdaysi) > 0) { # do not attempt to aggregate if there are no valid days
                  # aggregate OF3 (days) to person summaries in OF4
                  OF4 = agg_plainNweighted(df = OF3[validdaysi,], filename = "filename", 
                                           daytype = "daytype", window = uwi[j])
                  # calculate additional variables
                  columns2keep = c("filename", "night_number", "daysleeper",
                                   "cleaningcode","sleeplog_used","guider",
                                   "acc_available", "nonwear_perc_day", "nonwear_perc_spt",
                                   "daytype", "dur_day_min",
                                   "dur_spt_min")
                  if (uwi[j] == "Segments") {
                    columns2keep = c(columns2keep, "window")
                  }
                  OF3tmp = OF3[, columns2keep]
                  foo34 = function(df,aggPerIndividual,nameold,namenew,cval,window) {
                    # function to help with calculating additional variables
                    # related to counting how many days of measurement there are
                    # that meet a certain criteria
                    # cval is a vector with 0 and 1, indicating whether the criteria is met
                    # aggPerIndividual is the aggregate data (per individual)
                    # df is the non-aggregated data (days across individuals
                    # we want to extra the number of days per individuals that meet the
                    # criteria in df, and make it allign with aggPerIndividual.
                    df2 = function(x) df2 = length(which(x == cval)) # check which values meets criterion
                    if (window == "Segments") by = list(df$filename, df$window)
                    if (window != "Segments") by = list(df$filename)
                    mmm = as.data.frame(aggregate.data.frame(df, by = by, FUN = df2),
                                        stringsAsFactors = TRUE)
                    mmm2 = data.frame(
                      filename = mmm$Group.1,
                      cc = mmm[, nameold],
                      stringsAsFactors = TRUE
                    )
                    if (window == "Segments") {
                      mmm2$window = mmm$Group.2
                      by = c("filename", "window")
                    } else if (window != "Segments") {
                      by = "filename"
                    }
                    aggPerIndividual = merge(aggPerIndividual, mmm2, by = by)
                    names(aggPerIndividual)[which(names(aggPerIndividual) == "cc")] = namenew
                    foo34 = aggPerIndividual
                  }
                  # # calculate number of valid days (both night and day criteria met)
                  OF3tmp$validdays = 0
                  # criteria is that nonwear percentage needs to be below threshold for both day and night:
                  OF3tmp$validdays[validdaysi] = 1
                  # now we have a label for the valid days, we can create a new variable
                  # in OF4 that is a count of the number of valid days:
                  namenew = "Nvaliddays"
                  if (uwi[j] == "Segments") namenew = "Nvalidsegments"
                  OF4 = foo34(
                    df = OF3tmp,
                    aggPerIndividual = OF4,
                    nameold = "validdays",
                    namenew = namenew,
                    cval = 1,
                    window = uwi[j]
                  )
                  # do the same for WE (weekend days):
                  OF3tmp$validdays = 0
                  OF3tmp$validdays[validdaysi[which(OF3tmp$daytype[validdaysi] == "WE")]] = 1
                  namenew = "Nvaliddays_WE"
                  if (uwi[j] == "Segments") namenew = "Nvalidsegments_WE"
                  OF4 = foo34(
                    df = OF3tmp,
                    aggPerIndividual = OF4,
                    nameold = "validdays",
                    namenew = namenew,
                    cval = 1,
                    window = uwi[j]
                  )
                  # do the same for WD (weekdays):
                  OF3tmp$validdays = 0
                  OF3tmp$validdays[validdaysi[which(OF3tmp$daytype[validdaysi] == "WD")]] = 1
                  namenew = "Nvaliddays_WD"
                  if (uwi[j] == "Segments") namenew = "Nvalidsegments_WD"
                  OF4 = foo34(df = OF3tmp, 
                              aggPerIndividual = OF4, 
                              nameold = "validdays", 
                              namenew = namenew, 
                              cval = 1, 
                              window = uwi[j]) # create variable from it
                  # do the same for daysleeper,cleaningcode, sleeplog_used, acc_available:
                  OF3tmp$validdays = 1
                  # redefine by considering only valid days
                  OF4 = foo34(
                    df = OF3tmp[validdaysi,],
                    aggPerIndividual = OF4,
                    nameold = "daysleeper",
                    namenew = "Ndaysleeper",
                    cval = 1,
                    window = uwi[j]
                  )
                  OF4 = foo34(
                    df = OF3tmp[validdaysi,],
                    aggPerIndividual = OF4,
                    nameold = "cleaningcode",
                    namenew = "Ncleaningcodezero",
                    cval = 0,
                    window = uwi[j]
                  )
                  for (ccode in 1:6) {
                    OF4 = foo34(
                      df = OF3tmp[validdaysi, ],
                      aggPerIndividual = OF4,
                      nameold = "cleaningcode",
                      namenew = paste0("Ncleaningcode", ccode),
                      cval = ccode,
                      window = uwi[j]
                    )
                  }
                  OF4 = foo34(
                    df = OF3tmp[validdaysi, ],
                    aggPerIndividual = OF4,
                    nameold = "sleeplog_used",
                    namenew = "Nsleeplog_used",
                    cval = TRUE,
                    window = uwi[j]
                  )
                  OF4 = foo34(
                    df = OF3tmp[validdaysi, ],
                    aggPerIndividual = OF4,
                    nameold = "acc_available",
                    namenew = "Nacc_available",
                    cval = 1,
                    window = uwi[j]
                  )
                  # Move valid day count variables to beginning of dataframe
                  OF4 = cbind(OF4[, 1:5],
                              OF4[, (ncol(OF4) - 10):ncol(OF4)],
                              OF4[, 6:(ncol(OF4) - 11)])
                  nom = names(OF4)
                  cut = which(nom == "sleeponset_ts" | nom == "wakeup_ts"
                              | nom == "night_number"  | nom == "window_number"
                              | nom == "daysleeper" | nom == "cleaningcode"
                              | nom == "acc_available"
                              | nom == "guider" | nom == "L5TIME" | nom == "M5TIME"
                              | nom == "L10TIME" | nom == "M10TIME"
                              | nom == "acc_available" | nom == "daytype"
                              | nom %in% paste0("sleeplog_used_", c("pla", "wei", "WD", "WE"))
                              | nom %in% paste0("window_number_", c("pla", "wei", "WD", "WE")))
                  names(OF4)[which(names(OF4) == "weekday")] = "startday"
                  OF4 = OF4[,-cut]
                  for (col4 in 1:ncol(OF4)) {
                    navalues = which(is.na(OF4[,col4]) == TRUE)
                    if (length(navalues) > 0) {
                      OF4[navalues, col4] = ""
                    }
                  }
                  #Move Nvaliddays variables to the front of the spreadsheet
                  Nvaliddays_variables = grep(x = colnames(OF4), pattern = "Nvaliddays", value = FALSE)
                  Nvaliddays_variables = unique(c(
                    which(colnames(OF4) == "Nvaliddays"),
                    which(colnames(OF4) == "Nvaliddays_WD"),
                    which(colnames(OF4) == "Nvaliddays_WE"),
                    Nvaliddays_variables
                  ))
                  OF4 = OF4[, unique(c(1:4, Nvaliddays_variables, 5:ncol(OF4)))]
                  #-------------------------------------------------------------
                  # store all summaries in csv files
                  OF4_clean = tidyup_df(OF4)
                  data.table::fwrite(OF4_clean,paste(metadatadir,"/results/part5_personsummary_",
                                                     uwi[j],"_L",uTRLi[h1],"M",
                                                     uTRMi[h2], "V", uTRVi[h3], "_",
                                                     usleepparam[h4], ".csv", sep = ""), 
                                     row.names = FALSE, na = "",
                                     sep = params_output[["sep_reports"]],
                                     dec = params_output[["dec_reports"]])
                }
              }
            }
          }
        }
      }
    }
    rm(outputfinal, outputfinal2)
  }
}
wadpac/GGIR documentation built on March 5, 2025, 11 p.m.