mcs-acc: Raw Accelerometer Data Analysis

g.part5.report = function(metadatadir=c(),f0=c(),f1=c(),loglocation=c(),
                          includenightcrit=c(),includedaycrit=c()) {
  # description: function to load milestone data generated by g.part5 and to merge these into a spreadsheet
  # here no additional information of analyses are added. This function therefore is primary to wrap up the up the output
  # from parallel processed accelerometer files
  ms5.out = "/meta/ms5.out"
  if (file.exists(paste(metadatadir,ms5.out,sep=""))) {
    if (length(dir(paste(metadatadir,ms5.out,sep=""))) == 0) {
      try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
    } else {
      try.generate.report = TRUE
    }
  } else {
    try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
    cat("Error: No milestone data available from part 5")
  }
  if (try.generate.report == TRUE) {
    #======================================================================
    # loop through meta-files
    fnames.ms5 = list.files(paste0(metadatadir,ms5.out),full.names=TRUE)
    if(f1 > length(fnames.ms5)) f1 = length(fnames.ms5)
    cat(" loading all the milestone data from part 5 this can take a few minutes\n")
    myfun = function(x) { 
      load(file=x)
      cut = which(output[,1] == "")
      if (length(cut) > 0 & length(cut) < nrow(output)) {
        output = output[-cut,]
      }
      
      out = as.matrix(output)
    }
    outputfinal = as.data.frame(do.call(rbind,lapply(fnames.ms5[f0:f1],myfun)),stringsAsFactors=FALSE)
    cut = which(outputfinal[1,] == "")
    if (length(cut) > 0) {
      outputfinal = outputfinal[,-cut]
    }
    cut2 = which(outputfinal[,1] == "" & outputfinal[,2] == "")
    if (length(cut2) > 0) {
      outputfinal = outputfinal[-cut2,]
    }
    #-------------------------------------------
    # clean spreadsheet:
    # if there is a sleeplog location then only use measurements with sleeplog
    if (length(loglocation) > 0) { #do you want to fall back on generic assumptions about sleep if sleep log is not available?
      only.use.sleeplog = TRUE
    } else {
      only.use.sleeplog = FALSE
    }
    # delete nights without diary, with limited or no acceleration:
    if (only.use.sleeplog == TRUE) {
      del = which(as.numeric(as.character(outputfinal$cleaningcode)) > 0 |
                    as.character(outputfinal$sleeplog_used) == "FALSE" | 
                    as.character(outputfinal$acc_available) == "FALSE")
    } else {
      #only delete nights with no or no valid accelerometer data, but consider nigths with missing sleep log data
      del = which(as.numeric(as.character(outputfinal$cleaningcode)) > 1 | as.character(outputfinal$acc_available) == "FALSE") 
    }
    if (length(del) > 0) {
      outputfinal = outputfinal[-del,]
    }
    #-----------------------------------------------------------
    # split results to different spreadsheets in order to minimize individual filesize and to ease organising dataset
    uwi = as.character(unique(outputfinal$window))
    uTRLi = as.character(unique(outputfinal$TRLi))
    uTRMi = as.character(unique(outputfinal$TRMi))
    uTRVi = as.character(unique(outputfinal$TRVi))
    uacc_def = as.character(unique(outputfinal$acc_def))
    # replace NaN by empty cell value
    for (kra in 1:ncol(outputfinal)) {
      krad = which(as.character(outputfinal[,kra]) == "NaN")
      if (length(krad) > 0) {
        outputfinal[krad,kra] = ""
      }
    }
    # loop to store varous variants of the analysis seperately
    cat(" generating output for every parameter configurations...\n")
    for (j in 1:length(uwi)) {
      for (h1 in 1:length(uTRLi)) {
        for (h2 in 1:length(uTRMi)) {
          for (h3 in 1:length(uTRVi)) {
            for (h4 in 1:length(uacc_def)) {
              cat(paste0(" ",uwi[j],"-",uTRLi[h1],"-",uTRMi[h2],"-",uTRVi[h3],"-",uacc_def[h4]))
              seluwi = which(as.character(outputfinal$window) == uwi[j] & 
                               as.character(outputfinal$TRLi) == uTRLi[h1] &
                               as.character(outputfinal$TRMi) == uTRMi[h2] &
                               as.character(outputfinal$TRVi) == uTRVi[h3] &
                               as.character(outputfinal$acc_def) == uacc_def[h4])
              # store spreadsheet
              if (nrow(outputfinal[seluwi,]) == 0) {
                cat("report not stored, because no results available")
              } else {
                
                # first update variable names
                LT = as.character(max(uTRLi))#as.character(outputfinal[seluwi[1],which(colnames(outputfinal) == "TRLi")])
                MT = as.character(max(uTRMi)) # as.character(outputfinal[seluwi[1],which(colnames(outputfinal) == "TRMi")])
                VT = as.character(max(uTRVi))#as.character(outputfinal[seluwi[1],which(colnames(outputfinal) == "TRVi")])
                CN = colnames(outputfinal)
                for (z4 in 1:length(CN)) {
                  # IN and OIN
                  tr1 = unlist(strsplit(CN[z4],paste("IN",LT,sep="")))
                  if (length(tr1) > 1) CN[z4] =  paste(tr1[1],"IN",uTRLi[h1],tr1[2],sep="")
                  # LIG
                  tr2 = unlist(strsplit(CN[z4],paste("LIG",LT,"_",MT,sep="")))
                  if (length(tr2) > 1) CN[z4] =  paste(tr2[1],"LIG",uTRLi[h1],"_",uTRMi[h2],tr2[2],sep="")
                  # MOD
                  tr3 = unlist(strsplit(CN[z4],paste("MOD",MT,"_",VT,sep="")))
                  if (length(tr3) > 1) CN[z4] =  paste(tr3[1],"MOD",uTRMi[h2],"_",uTRVi[h3],tr3[2],sep="")                  
                  # VIG
                  tr4 = unlist(strsplit(CN[z4],paste("VIG",VT,sep="")))
                  if (length(tr4) > 1) CN[z4] =  paste(tr4[1],"VIG",uTRVi[h3],tr4[2],sep="")
                  # MVPA with T_ mod _ m #for mg and min
                  tr5 = unlist(strsplit(CN[z4],paste("T",MT,"_m",sep="")))
                  if (length(unlist(strsplit(CN[z4],"MVPA"))) > 1 &
                      length(tr5) > 1) CN[z4] =  paste(tr5[1],"T",uTRMi[h2],"_m",tr5[2],sep="")                  
                  # INB with T_ mod _ m
                  tr6 = unlist(strsplit(CN[z4],paste("T",LT,"_m",sep="")))
                  if (length(unlist(strsplit(CN[z4],"INB"))) > 1 &
                      length(tr6) > 1) CN[z4] =  paste(tr6[1],"T",uTRLi[h1],"_m",tr6[2],sep="")                  
                  # LIGB with T_ mod _ m
                  tr7 = unlist(strsplit(CN[z4],paste("T",LT,"_",MT,"_m",sep="")))
                  if (length(unlist(strsplit(CN[z4],"LIGB"))) > 1 &
                      length(tr7) > 1) CN[z4] =  paste(tr7[1],"T",uTRLi[h1],"_",uTRMi[h2],"_m",tr7[2],sep="")
                  #Nbouts
                  tr15 = unlist(strsplit(CN[z4],paste("outs_INB",sep="")))
                  tr16 = unlist(strsplit(CN[z4],paste("outs_MVP",sep="")))
                  tr17 = unlist(strsplit(CN[z4],paste("outs_LIGB",sep="")))
                  if (length(tr15) > 1) {
                    CN[z4] = paste0(unlist(strsplit(CN[z4],paste("T",sep="")))[1],"T",uTRLi[h1])
                  }
                  if (length(tr16) > 1) {
                    CN[z4] = paste0(unlist(strsplit(CN[z4],paste("T",sep="")))[1],"T",uTRMi[h2])
                  }
                  if (length(tr17) > 1) {
                    CN[z4] = paste0(unlist(strsplit(CN[z4],paste("T",sep="")))[1],"T",uTRLi[h1],"_",uTRMi[h2])
                  }
                  #Nblocks
                  tr8 = unlist(strsplit(CN[z4],"ocks")) #Nblocks
                  if (length(tr8) > 1) {
                    CNtemp = paste0(CN[z4],"dummy")
                    # IN and OIN
                    tr9 = unlist(strsplit(CNtemp,paste("IN",LT,sep="")))
                    if (length(tr9) > 1) {
                      CNtemp =  paste(tr9[1],"IN",uTRLi[h1],sep="")
                    }
                    # LIG
                    tr10 = unlist(strsplit(CNtemp,paste("LIG",LT,"_",MT,sep="")))
                    if (length(tr10) > 1) CNtemp =  paste(tr10[1],"LIG",uTRLi[h1],"_",uTRMi[h2],sep="")
                    # MOD
                    tr11 = unlist(strsplit(CNtemp,paste("MOD",MT,"_",VT,sep="")))
                    if (length(tr11) > 1) CNtemp =  paste(tr11[1],"MOD",uTRMi[h2],"_",uTRVi[h3],sep="")                  
                    # VIG
                    tr12 = unlist(strsplit(CNtemp,paste("VIG",VT,sep="")))
                    if (length(tr12) > 1) CNtemp =  paste(tr12[1],"VIG",uTRVi[h3],sep="")
                    # MVPA with T_ mod _ m #for mg and min
                    tr13 = unlist(strsplit(CNtemp,paste("T",MT,sep="")))
                    if (length(unlist(strsplit(CNtemp,"MVPA"))) > 1 &
                        length(tr13) > 1) CNtemp =  paste(tr13[1],"T",uTRMi[h2],sep="")                  
                    # INB with T_ mod _ m
                    tr14 = unlist(strsplit(CNtemp,paste("T",LT,sep="")))
                    if (length(unlist(strsplit(CNtemp,"INB"))) > 1 &
                        length(tr14) > 1) CNtemp =  paste(tr14[1],"T",uTRLi[h1],sep="")                  
                    # LIGB with T_ mod _ m
                    tr14 = unlist(strsplit(CNtemp,paste("T",LT,"_",MT,sep="")))
                    if (length(unlist(strsplit(CNtemp,"LIGB"))) > 1 &
                        length(tr14) > 1) CNtemp =  paste(tr14[1],"T",uTRLi[h1],"_",uTRMi[h2],sep="")                 
                    if (length(unlist(strsplit(CNtemp,"dum"))) > 1) {
                      CN[z4] = unlist(strsplit(CNtemp,"dum"))[1]
                    } else {
                      CN[z4] = CNtemp
                    }
                  }
                  #                   # Nblocks and MVPA with T_ mod 
                  #                   tr5 = unlist(strsplit(CN[z4],paste("T",MT,sep="")))
                  #                   if (length(unlist(strsplit(CN[z4],"MVPA"))) > 1 &
                  #                         length(unlist(strsplit(CN[z4],"Nblocks"))) > 1) CN[z4] =  paste(tr5[1],"T",uTRMi[h2],sep="")                  
                  #                   # Nblocks and INB with T_ mod _ m
                  #                   tr6 = unlist(strsplit(CN[z4],paste("T",LT,sep="")))
                  #                   if (length(unlist(strsplit(CN[z4],"INB"))) > 1 &
                  #                         length(unlist(strsplit(CN[z4],"Nbolcks"))) > 1) CN[z4] =  paste(tr6[1],"T",uTRLi[h1],sep="")                  
                  
                  #                   CN[z4] = paste(CN[z4],"qdd",sep="") # add dummy character
                  #                   CNtemp = as.character(unlist(strsplit(CN[z4],"qdd"))) # remove dummy character
                  CNtemp = CN[z4]
                  if (length(which(CNtemp =="")) >= 1) CNtemp = CNtemp[-which(CNtemp =="")] 
                  CN[z4] = CNtemp
                }
                outputfinal2 = outputfinal
                colnames(outputfinal2) = CN
                delcol = which(colnames(outputfinal2) == "window" | colnames(outputfinal2) == "TRLi" |
                                 colnames(outputfinal2) == "TRMi" | colnames(outputfinal2) == "TRVi" |
                                 colnames(outputfinal2) == "acc_def")
                outputfinal2 = outputfinal2[,-delcol]
                #-------------------------------------------------------------
                # store all summaries in csv files
                write.csv(outputfinal2[seluwi,],paste(metadatadir,"/results/part5_daysummary_",
                                                      uwi[j],"_L",uTRLi[h1],"M",uTRMi[h2],"V",uTRVi[h3],"_",uacc_def[h4],".csv",sep=""),row.names=FALSE)
                #------------------------------------------------------------------------------------
                #also compute summary per person (we could modify this to also provide the weighted average)
                OF3 = outputfinal2[seluwi,]
                # OF4 = aggregate.data.frame(OF3,by=list(OF3$filename),FUN = df) # plain average
                takeweightedmean = function(A,filename="filename",daytype="daytype") {
                  # function to take both the weighted (by weekday/weekendday) and plain average of all numeric variables
                  # ignorevar = c("boutcriter.in","boutcriter.lig","id","boutcriter.mvpa",
                  #               "daysleeper","cleaningcode","night number")
                  ignorevar = c("daysleeper","cleaningcode","night number")
                  for (ee in 1:ncol(A)) { # make sure that numeric columns have class numeric
                    nr = nrow(A)
                    if (nr > 30) nr = 30
                    options(warn=-1)
                    trynum = as.numeric(A[1:nr,ee])
                    options(warn=0)
                    if (length(which(is.na(trynum) == TRUE)) != nr & length(which(ignorevar == names(A)[ee])) == 0) {
                      class(A[,ee]) = "numeric"
                    }
                  }
                  df = function(x) {
                    options(warn=-1)
                    df = mean(x,na.rm=TRUE)
                    options(warn=0)
                    if (is.na(df) == TRUE) {
                      df = x[1]
                    }
                    df
                  }
                  E = aggregate.data.frame(A,by=list(A$filename),FUN=df)
                  E = E[,-1]
                  B = aggregate.data.frame(A,by=list(A$filename,A$daytype),df)
                  B = B[,-c(1:2)]
                  len = NULL
                  B$len <- 0
                  B$len[which(as.character(B$daytype) == "WD")] = 5
                  B$len[which(as.character(B$daytype) == "WE")] = 2
                  dt <- data.table::as.data.table(B[,which(lapply(B, class)=="numeric" | names(B) == filename)])
                  options(warn=-1)
                  .SD <- .N <- count <- a <- NULL
                  dt2 <- dt[,lapply(.SD,weighted.mean,w=len,na.rm=TRUE),by=list(filename)]
                  options(warn=0)
                  charcol = which(lapply(E, class) != "numeric" & names(E) != filename)
                  numcol = which(lapply(E, class) == "numeric")
                  dt2 = as.data.frame(dt2)
                  G = base::merge(E,dt2,by="filename",all.x=TRUE)
                  p0b = paste0(names(E[,charcol]),".x")
                  p1 = paste0(names(E[,numcol]),".x")
                  p2 = paste0(names(E[,numcol]),".y")
                  for (i in 1:length(p0b)) {
                    names(G)[which(names(G)==p0b[i])] = paste0(names(E[,charcol])[i])
                  }
                  for (i in 1:length(p1)) {
                    names(G)[which(names(G)==p1[i])] = paste0(names(E[,numcol])[i],"_pla")
                  }
                  for (i in 1:length(p2)) {
                    names(G)[which(names(G)==p2[i])] = paste0(names(E[,numcol])[i],"_wei")
                  }
                  G = G[,-which(names(G)=="len" | names(G)=="daytype")]
                  return(G)
                }
                # add column to define what are weekenddays and weekdays
                OF3$daytype = 0
                OF3$daytype[which(OF3$weekday == "Sunday" | OF3$weekday == "Saturday")] = "WE"
                OF3$daytype[which(OF3$weekday == "Monday" | OF3$weekday == "Tuesday" |
                                    OF3$weekday == "Wednesday" | OF3$weekday == "Thursday" |
                                    OF3$weekday == "Friday")] = "WD"
                OF3 = as.data.frame(OF3)
                # before processing OF3, first identify which days have enough monitor wear time
                maxpernwnight = (1 - (includenightcrit / 24)) * 100
                maxpernwday = (1 - (includedaycrit / 24)) * 100
                validdaysi = which(OF3$nonwear_perc_day < maxpernwday & OF3$nonwear_perc_night < maxpernwnight)
                # aggregate OF3 (days) to person summaries in OF4
                OF4 = takeweightedmean(OF3[validdaysi,],filename="filename",day="daytype")
                #--------------------
                # calculate additional variables
                OF3tmp = OF3[,c("filename","night number","daysleeper","cleaningcode","sleeplog_used",
                                "acc_available","nonwear_perc_day","nonwear_perc_night","daytype","dur_day_min",
                                "dur_night_min")]
                foo34 = function(A,B,nameold,namenew,cval) {
                  # function to help with calculating additinal variables
                  df2 = function(x) df2 = length(which(x==cval))
                  mmm = as.data.frame(aggregate.data.frame(A,by=list(A$filename),FUN = df2))
                  mmm2 = data.frame(filename=mmm$Group.1,cc=mmm[,nameold])
                  B = merge(B,mmm2,by="filename")
                  names(B)[which(names(B)=="cc")] = namenew
                  foo34 = B
                }
                # calculate number of valid days (both night and day criteria met)
                OF3tmp$validdays = 0
                OF3tmp$nonwear_perc_day = as.numeric(OF3tmp$nonwear_perc_day)
                OF3tmp$nonwear_perc_night = as.numeric(OF3tmp$nonwear_perc_night)
                OF3tmp$dur_night_min = as.numeric(OF3tmp$dur_night_min)
                OF3tmp$dur_day_min = as.numeric(OF3tmp$dur_day_min)
                # criteria is that nonwear percentage needs to be below threshold unless the actual time in minutes
                # of daypart (night or day) is less than 120 minutes
                OF3tmp$validdays[which((OF3tmp$nonwear_perc_day < maxpernwday | OF3tmp$dur_day_min < 120) &
                                         (OF3tmp$nonwear_perc_night < maxpernwnight | OF3tmp$dur_night_min < 120))] = 1
                OF4 = foo34(A=OF3tmp,B=OF4,nameold="validdays",namenew="Nvaliddays",cval=1)
                OF3tmp$validdays = 0
                OF3tmp$validdays[which(OF3tmp$nonwear_perc_day < maxpernwday &
                                         OF3tmp$nonwear_perc_night < maxpernwnight & OF3tmp$daytype == "WE")] = 1
                OF4 = foo34(A=OF3tmp,B=OF4,nameold="validdays",namenew="Nvaliddays_WE",cval=1)
                OF3tmp$validdays = 0
                OF3tmp$validdays[which(OF3tmp$nonwear_perc_day < maxpernwday &
                                         OF3tmp$nonwear_perc_night < maxpernwnight & OF3tmp$daytype == "WD")] = 1
                OF4 = foo34(A=OF3tmp,B=OF4,nameold="validdays",namenew="Nvaliddays_WD",cval=1)

                # OF4 = foo34(A=OF3tmp[validdaysi,],B=OF4,nameold="night number",namenew="Nnights",cval=99)
                OF4 = foo34(A=OF3tmp[validdaysi,],B=OF4,nameold="daysleeper",namenew="Ndaysleeper",cval=1)
                OF4 = foo34(A=OF3tmp[validdaysi,],B=OF4,nameold="cleaningcode",namenew="Ncleaningcodezero",cval=0)
                OF4 = foo34(A=OF3tmp[validdaysi,],B=OF4,nameold="sleeplog_used",namenew="Nsleeplog_used",cval=TRUE)
                OF4 = foo34(A=OF3tmp[validdaysi,],B=OF4,nameold="acc_available",namenew="Nacc_available",cval=TRUE)
                
                OF4 = cbind(OF4[,1:4],OF4[,(ncol(OF4)-6):ncol(OF4)],OF4[,5:(ncol(OF4)-7)])
                nom = names(OF4)
                cut = which(nom == "acc_onset_ts" | nom == "acc_wake_ts" | 
                              nom == "sleeplog_onset_ts" | nom == "sleeplog_wake_ts" | nom == "night number"
                            | nom == "daysleeper" | nom == "cleaningcode" | nom == "acc_available"
                            | nom == "sleeplog_used" | nom == "L5TIME" | nom == "M5TIME"
                            | nom == "L10TIME" | nom == "M10TIME" | nom == "night number")
                names(OF4)[which(names(OF4)=="weekday")] = "startday"
                OF4 = OF4[,-cut]
                OF4 = as.matrix(OF4)
                OF4[which(is.na(OF4) == TRUE)] = ""
                #-------------------------------------------------------------
                # store all summaries in csv files
                write.csv(OF4,paste(metadatadir,"/results/part5_personsummary_",
                                    uwi[j],"_L",uTRLi[h1],"M",uTRMi[h2],"V",uTRVi[h3],"_",uacc_def[h4],".csv",sep=""),row.names=FALSE)
              }
            }
            
          }
        }
      }
    }
    rm(outputfinal, outputfinal2)
  }
}