mMARCH.AC module 7d: Extracts Weekends/Weekday Features of Domains of Physical Activity, Sleep and Circadian Rhythmicity

knitr::opts_chunk$set(echo = TRUE)
# {r, results="hide"} - The chunks is run but all results are hidden. The code shows in the doc,  
# {r, include=FALSE} - the code is run but neither the code or the results are shown
# {r, echo=FLASE} - The code is not shown, but results are
# bug= colaus has some NA in newID column on 9660 with csv data but no part2summary
# bug2=fragmentation can not take character id, will output NA in id column.
# after fix bug2, output change to a list.

This R pargram extracts multiple commonly used features for weekday and weekends from minute level actigraphy data. Weekday and weekends could be merged with the holiday days when holidayFN was specified. In GGIR, sleep period was defined from today noon to tomorrow noon in general, so weekends should be Friday and Saturday (wake up in Saturday and Sunday morning). To be noted, sleeponset in Sunday is counted as normal weekday in default since people need to work on Monday. For physical activity, weekends should be Saturday and Sunday for the purpose to calculate daytime activity.

currentdir_xxx 
studyname_xxx 
log.multiplier_xxx  
RemoveDaySleeper_xxx 
QCnights.feature.alpha_xxx  
epochOut_xxx 
holidayFN_xxx 
epochOut<-epochOut


foutFN<-function(x,studyname) paste("module7d_",studyname,"_new_features_page",x,".csv",sep="")  
setwd(currentdir) 

Only modify subject-level features based on weekday and weekends

Parameters

r paste("Calculate subject-level mean when Nvalid.night>=",QCnights.feature.alpha[1], " for each feature.",sep="")

r paste("Calculate subject-level SD when Nvalid.night>=",QCnights.feature.alpha[2], " for each feature.",sep="")

Definition of weekday and weekend

Domain | Weekday (WD) | Weekend (WE) -----------|------------------------------|---------------------------- PA, CL| Monday ~ Friday if not holiday | Saturday, Sunday, holiday SL | Mon, Tue, Wed, Thurs, Sun if the next day is not a holiday | Fri (Friday night sleeponset +Sat moring wake up in GGIR sleep window), Sat, day before holiday

Please note that Sunday night sleeponset is counted as weekday but Friday night sleeponset is counted as weekends under the current definition.

# Issue: Sunday night sleeponset is counted as weekday; Friday night sleeponset is counted as weekends under current definition.
# find "wd" and "we" days/nights with/without extra holidayFN as input (twin study)
holidayplusweekends<-function(feature,holidayFN=NULL){ 

Sho.today<-NULL
Sho.tomorrow<-NULL 
if (!is.null(holidayFN)) {  
     try(file.copy(holidayFN, currentdir)) 
     holidaytable<-read.csv(holidayFN,head=1,stringsAsFactors=F)
     holiday<-holidaytable[which(holidaytable[,"holiday"]==1),] 

     if ( "filename" %in% colnames(holiday) ) {
       todayID<-paste(feature[,"filename"],feature[,"Date"],sep=" ") 
       tomorrowID<-paste(feature[,"filename"],as.Date(feature[,"Date"])+1,sep=" ")
       holiday.today<-paste(holiday[,"filename"],holiday[,"Date"],sep=" ")  } else {
       todayID<- feature[,"Date"] 
       tomorrowID<- as.Date(feature[,"Date"])+1 
       holiday.today<- holiday[,"Date"]  }  

     Sho.today<-which(todayID %in% holiday.today)  
     Sho.tomorrow<-which(tomorrowID %in% holiday.today) 
}
Swd<-which(feature[,"weekday"] %in% c(1,2,3,4,5) |feature[,"weekday"] %in% c("Monday","Tuesday","Wednesday","Thursday","Friday")) 
Swe<-which(feature[,"weekday"] %in% c(6,7) | feature[,"weekday"] %in% c("Saturday","Sunday"))  
PAfeatureSwd<-setdiff(Swd,Sho.today)
PAfeatureSwe<-union(Swe,Sho.today) 
SLfeatureSwd<-setdiff(which(feature[,"weekday"] %in% c(1,2,3,4,7) |feature[,"weekday"] %in% c("Monday","Tuesday","Wednesday","Thursday","Sunday")),Sho.tomorrow)
SLfeatureSwe<-union(which(feature[,"weekday"] %in% c(5,6) | feature[,"weekday"] %in% c("Friday","Saturday")),Sho.tomorrow) 
table(feature[,"weekday"])

length(Swd)
length(Swe)
length(PAfeatureSwd) 
length(PAfeatureSwe) 
length(SLfeatureSwd) 
length(SLfeatureSwe)
nholiday<-length(setdiff(Sho.today,Swe)) 
holiday.story<-paste("Days: Among ",nrow(feature)," days, there are ",length(PAfeatureSwd), " weekdays and ", length(PAfeatureSwe), " weekends including ",nholiday," holidays.",sep="") 

ans<-list(PAfeatureSwd=PAfeatureSwd,PAfeatureSwe=PAfeatureSwe,SLfeatureSwd=SLfeatureSwd,SLfeatureSwe=SLfeatureSwe,holiday.story=holiday.story)
return(ans)
}
library(xlsx)   
library(knitr) 
library(dplyr)  # %>%
library(cosinor)   
library(mMARCH.AC) 
library(zoo) #for RA 
library(ActCR)
library(ActFrag) 
library(survival)
library(ineq) 
library(kableExtra)


# Prepare count/wear data for JIVE
summaryFN1<-paste(currentdir,"/summary/",studyname,"_ggir_output_summary.xlsx",sep="")
summaryP2<-read.xlsx( summaryFN1,head=0,sheetName = "2_fileSummary")  
ggirV<-as.character(gsub("X","",summaryP2[1,1]))  

enmoFN<-paste("./data/impu.flag_All_",studyname,"_ENMO.data.", epochOut,"s.GGIR",ggirV,".csv",sep="")   


# Prepare count/wear data for JIVE  (Note: Date=11/30/2017 format in D1)
D1<-read.csv(enmoFN,head=1, stringsAsFactors=F)
Ctop<-which(colnames(D1)=="X00.00.00")-1
table(D1[,c("KEEP","remove16h7day")])

D2<-D1[which(D1[,"KEEP"]=="keep" &!is.na(D1[,"newID"])),]
S1.keepdaysleep<- which(is.na(D2[,"daysleeper"]) | D2[,"daysleeper"]==0) 
S1.removedaysleep<- which( D2[,"daysleeper"]==1) 
if (RemoveDaySleeper & length(S1.removedaysleep)>=1) D3<-D2[-S1.removedaysleep,] else D3<-D2
daysleeperM<-D2[S1.removedaysleep,1:Ctop]


# Specify WD/WE data-------------------------------------- 
# Holiday days could be found filename+Date or Date only when filename is not available.
# -------------------------------------------------------- 
idM<-unique(D3[,c("filename","newID")])  

D3.days<-holidayplusweekends(feature=D3,holidayFN= holidayFN )  
Swd<-D3.days$PAfeatureSwd
Swe<-D3.days$PAfeatureSwe

idM.wd<-D3[Swd,c("filename","newID","Date")]
idM.we<-D3[Swe,c("filename","newID","Date")]

count <-cbind(D3[,c("filename","Date")],D3[,(Ctop+1):ncol(D3)])   
colnames(count)<-c("ID","Day",paste("MIN",1:(ncol(count)-2),sep="")) 
log.count = cbind(count[,1:2], log(log.multiplier*count[,-c(1,2)] + 1)) 

count.wd<-count[Swd,] 
count.we<-count[Swe,]
log.count.wd =  log.count[Swd,]
log.count.we =  log.count[Swe,]

dim(D3)
dim(count)
dim(count.wd)
dim(count.we)  

1. Data

The activity data at the minute level (r enmoFN) was stored in a format of data.frame of dimension $(\sum_i d_i) \times r ncol(D1)$, where $d_i$ is the number of available days for subject $i$. And the order of the r ncol(D1) columns (and corresponding column names) was as follows, r colnames(D1)[1:Ctop], "MIN1", ..., "MIN1440".

 *  Raw data = (`r dim(D1)`), `r length(unique(D1[,"filename"]))` files, `r length(unique(D1[,"newID"]))` individuals.
 *  Clean data = (`r dim(D2)`), `r length(unique(D2[,"filename"]))` files, `r length(unique(D2[,"newID"]))` individuals.     
 *  Remove daysleepers = (`r dim(D3)`), `r length(unique(D3[,"filename"]))` files, `r length(unique(D3[,"newID"]))` individuals.  
 *  Count data = (`r dim(count)`), `r length(unique(count[,"ID"]))` individuals. 
 *  `r D3.days$holiday.story`     
 *  WD Count data = (`r dim(count.wd)`), `r length(unique(count.wd[,"ID"]))` individuals.  
 *  WE Count data = (`r dim(count.we)`), `r length(unique(count.we[,"ID"]))` individuals.
MustHaveTAC<-TRUE
A=4  #  number of head columns including 1: filename 2: Date 3:newID 4:cleaningcode 
B=A+1 #first column need to average 

# module7b_1 subject data
#IS mesor amp acro mesor_L amp_L acro_L PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
#CR CR CR CR CR CR CR CR CR CR CR CR CR CR CR CR CR
#subject subject subject .... 
# For subject level feature, 7b will do sum for days with same value, so mean=NA when number of valid days < 7.

2. Feature Extractions for those day features

2.4 cosinor model

The classic cosinor model is defined as $r(t)= mes + amp \times c(t)$ where $c(t) = \cos([t-\phi]2\pi/24)$ where mes is mesor, amp is amplitude, $\phi$ is the acrophase.

# Extended cosinor model allows for two additional parameters to control the shape. Here we implement the anti logistic transformation, and have $r(t) = min +amp\times F(c(t))$, where $F()$ is the anti logistic function.


# `ExtCos` and `ExtCos_long` calcualte the five parameters, (for a single vector amd whole dataset respectively). It is suggested to log transformed the curve by choose `logtransform = TRUE`. 
minuteW=(ncol(count.wd)-2)/1440
Cos = ActCosinor_long2( count.data = count.wd, window=minuteW,daylevel=FALSE)  
Cos.log = ActCosinor_long2( count.data = log.count.wd, window=minuteW,daylevel=FALSE)   
extcos = ActExtendCosinor_long2(count.data = count.wd, window=minuteW,daylevel=FALSE) 
extcos.log = ActExtendCosinor_long2(count.data = log.count.wd, window=minuteW,daylevel=FALSE)   
colnames(Cos)<-c("ID","ndays","mesor","amp","acro","acrotime")
colnames(Cos.log)<-c("ID","ndays","mesor_L","amp_L","acro_L","acrotime_L")
colnames(extcos)<-gsub(paste("_",minuteW,sep=""),"_ext",colnames(extcos))
colnames(extcos.log)<-gsub(paste("_",minuteW,sep=""),"_ext_L",colnames(extcos.log))

cosinormodel.1<-merge(Cos,Cos.log,by=c("ID","ndays"),all=TRUE) 
cosinormodel.2<-merge(extcos,extcos.log,by=c("ID","ndays"),all=TRUE) 
cosinormodel.wd<-merge(cosinormodel.1[,-2],cosinormodel.2[,-2],by="ID",all=TRUE)  
colnames(cosinormodel.wd)[2:ncol(cosinormodel.wd)]<-paste(colnames(cosinormodel.wd)[2:ncol(cosinormodel.wd)],"_S_wd",sep="") 
minuteW=(ncol(count.we)-2)/1440
Cos = ActCosinor_long2( count.data = count.we, window=minuteW,daylevel=FALSE)  
Cos.log = ActCosinor_long2( count.data = log.count.we, window=minuteW,daylevel=FALSE)   
extcos = ActExtendCosinor_long2(count.data = count.we, window=minuteW,daylevel=FALSE) 
extcos.log = ActExtendCosinor_long2(count.data = log.count.we, window=minuteW,daylevel=FALSE)   
colnames(Cos)<-c("ID","ndays","mesor","amp","acro","acrotime")
colnames(Cos.log)<-c("ID","ndays","mesor_L","amp_L","acro_L","acrotime_L")
colnames(extcos)<-gsub(paste("_",minuteW,sep=""),"_ext",colnames(extcos))
colnames(extcos.log)<-gsub(paste("_",minuteW,sep=""),"_ext_L",colnames(extcos.log))

cosinormodel.1<-merge(Cos,Cos.log,by=c("ID","ndays"),all=TRUE) 
cosinormodel.2<-merge(extcos,extcos.log,by=c("ID","ndays"),all=TRUE) 
cosinormodel.we<-merge(cosinormodel.1[,-2],cosinormodel.2[,-2],by="ID",all=TRUE)  
colnames(cosinormodel.we)[2:ncol(cosinormodel.we)]<-paste(colnames(cosinormodel.we)[2:ncol(cosinormodel.we)],"_S_we",sep="")  

2.5 interdaily statbility

is_all.wd = IS_long2(count.data = count.wd, window=(ncol(count)-2)/144, method = "average"  )  #10 minute  
 colnames(is_all.wd)[2]<-"IS_wd"  
is_all.we = IS_long2(count.data = count.we, window=(ncol(count)-2)/144, method = "average"  )  #10 minute  
 colnames(is_all.we)[2]<-"IS_we"   
# subject level features: cosinor model, IS

output<-is_all.wd 
output<-merge(output,is_all.we,by=c("ID"), all=TRUE)  
output<-merge(output,cosinormodel.wd,by=c("ID"),all=TRUE)  
output<-merge(output,cosinormodel.we,by=c("ID"), all=TRUE)  


output<-merge(idM,output,by.x=c("filename" ),by.y=c("ID" ),all=TRUE)  
dim(output)
head(output)
write.csv(output, file=foutFN("1",studyname), col.names = TRUE, row.names = FALSE)  

plot.data<-array(NA,dim=c(nrow(output),ncol(output)-2))
for (j in 1:ncol(plot.data)) plot.data[,j]<-as.numeric(as.character(unlist(output[,j+2])))
colnames(plot.data)<-colnames(output)[-c(1,2)]
plot.data<-plot.data[,order(colnames(plot.data))]
par(mar=c(8, 4, 2, 2) + 0.1)  # bottom,left,up,right   
boxplot(plot.data,col=c("blue","grey" ),xlab="",xaxt="n") 
axis(1, at=1:ncol(plot.data), labels=colnames(plot.data), las = 2, cex.axis = 0.8) 

2.6 functional principal component analysis

As opposed to Cosinor/extCosinor model, functional principal component analysis (FPCA) is a data driven technique that makes no parametric assumptions about the functional form of diurnal patterns. FPCA represents any sample of diurnal patterns via L2-orthogonal functional "principal components". The resulted principal component scores can be used. Here, FPCA is done by using the fpca.face function in the refund package. It is suggsted to take the log transformation to reduce the skewness of count data by setting logtransform = TRUE.

Notice that, here, for simplicity and better interpretability, for each subject, we take the average across all valid days, therefore, we don't account for the within person correlation. To incorporate the within person correlation, one can choose to use multilevel FPCA by using the denseFLMM function in the denseFLMM package.

######################################################### 
# 2.6 functional principal component analysis: f=1 WD; f=2 WE

ftitle<-c("weekday", "weekend")
for (f in 1:2){ 

if (f==1) log.count.f=log.count.wd 
if (f==2) log.count.f=log.count.we  
if (f==1) idM.f<- idM.wd else idM.f<-idM.we 

X=log.count.f[,-c(1,2)]
X <- t(t(X) - colMeans(X, na.rm = TRUE))
dim(X)  
mean(X) 

id.groups<-unique(log.count.f[,"ID"])  
print(length(id.groups)) 
groups<-NULL
for (i in 1:nrow(log.count.f)) groups[i]<-which(id.groups==log.count.f[i,"ID"])  
groups<-as.matrix(groups)
G=1 
Zvars<-lapply(seq(len=G),function(g) matrix(1,nrow(log.count.f),1))
data<-as.matrix(X) #X=demean-log(enmo)

idM.avg<-unique(idM.f[,-3])
S2<-NULL
for (i in 1:length(id.groups)) S2[i]<-which(idM.avg[,"filename"]==id.groups[i])
idM.avg2<-idM.avg[S2,] 


print(c(f,dim(data),nrow(idM.avg2)))

#------------------------------------------------------------ 
# Method 2: multilevel(2 levels) FPCA seperating subject-level effect and day-level effect
#------------------------------------------------------------ 
library(denseFLMM)




#Z1 <- denseFLMM(data,groups=groups,Zvars=Zvars,smooth = FALSE,NPC=rep(20,G+1))  

Z2 <- denseFLMM(data, gridpoints = 1:ncol(data), 
                       Zlist = NA, 
                       G = G, 
                       Lvec = NA, 
                       groups = groups, 
                       Zvars = Zvars, 
                       L = NA, 
                       NPC=rep(20, G+1), 
                       smooth = FALSE, 
                       bf = 20, 
                       smoothalg = "gamm")

dim(Z2$xi[[1]]) ; dim(Z2$xi[[2]]) 
dim(Z2$phi[[1]]); dim(Z2$phi[[2]])


#Calculating the percentage of variance explained by each componet for the subject-level PCs and Day-level PCs.
pervar_sub<-100*Z2$nu[[1]]/sum(Z2$nu[[1]])
pervar_day<-100*Z2$nu[[2]]/sum(Z2$nu[[2]])
pervar_sub
pervar_day
#Calculating the relative variance explained by subject heterogeneity out of total variability
sum(Z2$nu[[1]])/Z2$totvar 
#Calculating the relative variance explained by day-to-day variance out of total variability
sum(Z2$nu[[2]])/Z2$totvar 
#Calculating the residual variance/noises out of total variability
1440*Z2$sigma2/Z2$totvar 

Z2$totvar
subject_PCs<-cbind(idM.avg2,Z2$xi[[1]]  ) 
day_PCs<-cbind(idM.f,Z2$xi[[2]]  ) 
colnames(subject_PCs)[(ncol(idM.avg2)+1): ncol(subject_PCs)]<-paste("PC",1:ncol(Z2$xi[[1]]),sep="")
colnames(day_PCs)[(ncol(idM.f)+1): ncol(day_PCs)]<-paste("PC",1:ncol(Z2$xi[[2]]),sep="")




if (f==1) Z2.save.1<-Z2 else Z2.save.2<-Z2

write.csv(day_PCs, file=foutFN("4_denseFLMM_day_PCs", paste(ftitle[f],studyname,sep="_")),  col.names = TRUE, row.names = FALSE )
write.csv(subject_PCs, file=foutFN("5_denseFLMM_subject_PCs", paste(ftitle[f],studyname,sep="_")), col.names = TRUE, row.names = FALSE )
}
for (f in 1:2){ 
if (f==1) Z2<-Z2.save.1  else  Z2<-Z2.save.2
pervar_sub<-100*Z2$nu[[1]]/sum(Z2$nu[[1]]) 

m<-4
my.ylim<-range(Z2$phi[[1]][,m:1])*1.4
op=par(mai=c(.6, .8, .3, 0.1),omi=c(0, 0, 0, 0))
matplot(Z2$phi[[1]][,m:1],type='l',ylab='PC',xlab='',xaxt='n',yaxt='n',main='',lwd=c(.7,.7,2,2),ylim=my.ylim,col=m:1)
title(xlab = "Hour", line=1.8)
title(main = paste(ftitle[f],": Subject-specific Effect (", 
                   round(sum(Z2$nu[[1]])/Z2$totvar,digits=3)*100,'% of Total)',sep=''),line=0.5)
axis(1,at=(0:12)*2*60,labels=(0:12)*2,cex.axis=1)
axis(2,las=1,cex.axis=1)
#axis(2,at=(1:3)*0.01,labels=(1:3)*0.01, las=1,cex.axis=1)
legend('topleft',co=1:4,
       legend=paste(paste('PC',1:m,' (',sep=''),sprintf("%.1f",round(pervar_sub[1:m],digits=1)),'%)',sep=''),
       lty=1,
       horiz=FALSE,
       lwd=2 ,
       bg='transparent',cex=0.725,text.font=1.5,box.col='transparent',
       ncol=2) 

} #f
# end FPCA

3. Average variable at the subject level and merge 3 parts

Please pay attention: Use IS and PC10 to locate subject level features; IS has to be the first R subject-level feature and IV has to be the last day level feature.

# set mean=NA for those features with Nvalid night< 3 or 7.
NcountSum<-function(x) sum(as.numeric(!is.na(x))) #calcuate number of good value of each feature

## To wear mask on mean feature table, align the first matrix according to IDs in the second matrix.
## return a Fmer81.N.all (Ngood values) matrix with same order (row and column) to Fmer81 
alignMatrix<-function(Fmer81.N.all,Fmer81){

if (length(setdiff(Fmer81[,1], Fmer81.N.all[,1]) )>=1)  stop("Found missing filename in Fmer81.N.all data")
if (length(setdiff(colnames(Fmer81),colnames(Fmer81.N.all)) )>=1) stop("Found missing columns in Fmer81.N.all data")

Fmer81x<-cbind(f=1:nrow(Fmer81),filename=Fmer81[,1]) 
data<-merge(Fmer81x,Fmer81.N.all,by="filename",all.x=TRUE,all.y=FALSE)
data<-data[order(as.numeric(data[,"f"])),-2] 
if (nrow(data)<nrow(Fmer81)) print("found missing lines in the mask")
Fmer81.mask<-data[, which(colnames(data) %in% colnames(Fmer81))]

if (sum(colnames(Fmer81.mask)!=colnames(Fmer81))>0) stop ("Wrong columns names in the mask")
if (sum( Fmer81.mask[,1]!= Fmer81[,1])>0) stop ("Wrong filename orders in the mask")
# cbind(Fmer81[1:10,1],Fmer81.mask[1:10,1])

return(Fmer81.mask) 
}





# For sleep data, wear two mask (allday mask and WD only mask)
MeanWithMask<-function(Fmer5.N.all,Fmer5,QCnights.allday,QCnights=0,Cmean4time=TRUE){ 


###a) Take averages for both subject and day level features although subject feature has same values for all days................

Ncol8<-which(colnames(Fmer5)=="weekday") # Ncol8 and after are weekday,zeroSleep,Nwd,Nwe,..  
Fmer81<-aggregate(.~filename,data=Fmer5[,c(1:(Ncol8-1))], mean,na.rm=T,na.action=na.pass)
Fmer82<-aggregate(.~filename,data=Fmer5[,c(1,Ncol8:ncol(Fmer5))], sum,na.rm=T,na.action=na.pass) 

timingC<-grep("cmean",colnames(Fmer5)) 
if (length(timingC)>=1) {
  Fmer83<-aggregate(.~filename,data=Fmer5[, c(1,timingC)], get_mean_sd_hour,unit2minute=60,out="mean" ,na.action=na.pass) #L5TIME
  if (length(which(Fmer81[,"filename"]!=Fmer83[,"filename"]))>=1) stop("Mismatch IDs in Fmer83")
  timingC<-grep("cmean",colnames(Fmer81)) 
  Fmer81[,timingC]<-Fmer83[,-1]
  # colnames(Fmer81)[timingC]==colnames(Fmer83)[-1]
}  
Fmer81.N.all<-alignMatrix(Fmer5.N.all,Fmer81)
Fmer81.N.self.raw<-aggregate(.~filename,data=Fmer5[, 1:(Ncol8-1)], NcountSum,na.action=na.pass)  
Fmer81.N.self<-alignMatrix(Fmer81.N.self.raw,Fmer81) 


# Wear 2 Mask for the mean features
Fmer81.mask<-Fmer81
for (j in 2:ncol(Fmer81.mask)) {
   S81<- which(Fmer81.N.all[,j] < QCnights.allday | Fmer81.N.self[,j] < QCnights  )
   if (length(S81)>=1) Fmer81.mask[S81,j]<-NA}


# merge two parts 
if (sum(Fmer81.mask[,1]!=Fmer82[,1])>0)  stop("Check IDs of two Fmer8 matrix")
Fmer8<-cbind(Fmer81.mask,Fmer82[,-c(1,2)]) #change Fmer81.mask, rm filename and weekday for the second file

return(Fmer8)
}
#  Fmer81.N.all=number of good days for each feature 
#  Fmer4[which(Fmer4[,"wakeup"]-Fmer4[,"sleeponset"] ==0) ,"zeroSleep"]<-1 
#  Fmer4[,"Nwd_0s"] <-Fmer4[,"Nwd"]*Fmer4[,"zeroSleep"]  # Number of weekdays of zero sleep
#  Fmer4[,"Nwe_0s"] <-Fmer4[,"Nwe"]*Fmer4[,"zeroSleep"]  # Number of weekends of zero sleep

dictionary<-paste("module7_",studyname,"_all_features_dictionary.xlsx",sep="")  
dict<-read.xlsx(dictionary,head=1,sheetName="dictionary",stringsAsFactors=F) 
dict<-dict[which(!is.na(dict[,"Domain"])),] 
SLdomain<-dict[which(dict[,"Domain"]=="SL"),"Variable"] 
PAdomain<-dict[which(dict[,"Domain"]=="PA"),"Variable"] 
CRdomain<-dict[which(dict[,"Domain"]=="CR"),"Variable"] 
subjectlevel.variable<-dict[which(dict[,"level"]=="subject"),"Variable"] 



ftitle<-c("weekday", "weekend")
inFN<-paste("module7b_",studyname,"_all_features_2_dayclean.csv",sep="") #clean file (must have TAC)
outFN<-paste("module7d_",ftitle,"_",studyname,"_all_features_3_subject.csv",sep="")
DayFeature<-read.csv(inFN,head=1,stringsAsFactors=F)                                                         #part1=day features
wdwefea<-read.csv(paste("module7d_",studyname,"_new_features_page1.csv",sep=""),head=1,stringsAsFactors=F)   #part2=new wdwe subject features
pcFN<-paste("module7d_",ftitle,"_",studyname,"_new_features_page5_denseFLMM_subject_PCs.csv",sep="")         #part3=fpca subject level wdwe




#X01) add L5TIME_cmean columns   
Csubject<-which(colnames(DayFeature) %in% subjectlevel.variable)  
S4<-setdiff(1:ncol(DayFeature),Csubject)    # day level SL/PA/CR features  
Fmer5<-DayFeature[,S4]  #remove old subject level features using all days, will replace new one
timingC<-grep("TIME",colnames(Fmer5))
timingMatrix<-Fmer5[,timingC]
colnames(timingMatrix)<-paste(colnames(timingMatrix),"_cmean",sep="") #circle mean columns 
CmeanDomain<-colnames(timingMatrix) 
lastC<-max(timingC)
Fmer6<-cbind(Fmer5[,c(1:lastC)],timingMatrix,Fmer5[(lastC+1):ncol(Fmer5)])


#X02) subset for Fmer6 (old daylevel features)
C5<-which(colnames(Fmer6)=="weekday")  
Csleep<-which(colnames(Fmer6) %in% SLdomain) 
Cother<-which(colnames(Fmer6) %in% c(PAdomain,CRdomain,CmeanDomain)) 
sleepColumns<-c(1,Csleep,C5:ncol(Fmer6))        # sleep features
nonsleepColumns<-c(1,Cother,C5:ncol(Fmer6))        # nonsleep daylevel features


Ncol8<-which(colnames(Fmer6)=="weekday")  # Ncol8 after are weekday,zeroSleep,Nwd,Nwe,.. 
Fmer6.N.all<-aggregate(.~filename,data=Fmer6[, c(1,B:(Ncol8-1))], NcountSum,na.action=na.pass) 
tailname<-colnames(Fmer6)[Ncol8:ncol(Fmer6)] 

H<-holidayplusweekends(Fmer6,holidayFN=holidayFN)      
#f=1/2 for WD/WE................................................
for (f in 1:2){

#x1) Old day level features summaryized for WD or WE only: Note sleep has special "weekend" definition.
if (f==1) {sleepSet<-H$SLfeatureSwd; paSet<-H$PAfeatureSwd; QCnights=QCnights.feature.alpha[3]}
if (f==2) {sleepSet<-H$SLfeatureSwe; paSet<-H$PAfeatureSwe; QCnights=QCnights.feature.alpha[4]}
table(Fmer6[sleepSet,"weekday"]) 
table(Fmer6[paSet,"weekday"])

sleepWD<-Fmer6[sleepSet,sleepColumns]   
paWD<-Fmer6[paSet,nonsleepColumns]   
SleepData<-MeanWithMask(Fmer6.N.all,Fmer5=sleepWD,QCnights.allday=QCnights.feature.alpha[1],QCnights=QCnights)   
PAData<-MeanWithMask(Fmer6.N.all,Fmer5=paWD,QCnights.allday=QCnights.feature.alpha[1],QCnights=QCnights)  

Fmer8<-merge(SleepData,PAData,by="filename",all=TRUE,suffix=c("_sleep","")) #Nwd.. 6 duplicate columns 
dim(PAData)
dim(SleepData)
dim(Fmer8)



#x2) PCs
wdwePC<-read.csv(pcFN[f],head=1,stringsAsFactors=F)
colnames(wdwePC)[3:ncol(wdwePC)]<-paste("PC",1:20,"_S",sep="")
wdwePC2<-wdwePC[,c(1,3:12)]

#x3) newfeatures
if (f==1) S9<-c("filename","newID",colnames(wdwefea)[grep("wd",colnames(wdwefea))])
if (f==2) S9<-c("filename","newID",colnames(wdwefea)[grep("we",colnames(wdwefea))]) 
wdweSubjectFea<-merge(wdwefea[,S9],wdwePC2,by="filename",all=TRUE)

#x4) merage old daylevel feature on wdwe + new wdwe subject leve + new wdwePCs
Fmer9<-merge(Fmer8,wdweSubjectFea,by="filename",all=TRUE) 

C8<-which(colnames(Fmer9) %in% c(tailname,paste(tailname,"_sleep",sep="")))
C9<-which(colnames(Fmer9)=="newID") 
C10<-c(1,C9,setdiff(2:ncol(Fmer9),c(C9,C8)),C8)
Fmer9<-Fmer9[,C10]  
Fmer9[,"cleancode2"]<-rowSums( (is.na(Fmer9))) # of missing feature, 0 means no missing  



rmwdwe<-function(x){y<-unlist(strsplit(x,"\\_")) 
                    z<-y[length(y)]
                    if (z=="wd" | z=="we") t<-paste(y[-length(y)],collapse="_") else t<-x
                    return(t)}
newname.rmwdwe<-as.character(unlist(lapply(colnames(Fmer9),rmwdwe)))
colnames(Fmer9)<-newname.rmwdwe
write.csv(Fmer9, file=outFN[f],  row.names = FALSE )  

# R4.2.2 Error in utils::write.table(Fmer9, file = outFN[f], row.names = FALSE,  :  unimplemented type 'list' in 'EncodeElement'


Fmer9.withmissing<-Fmer9[which(Fmer9[,"cleancode2"]>=1),]
rownames(Fmer9.withmissing)<-NULL 
ans<-c(ftitle[f],outFN[f],nrow(Fmer9),length(unique(Fmer9[,"filename"])),length(unique(Fmer9[,"newID"])) ) 
if (f==1) ans1<-ans else ans2<-ans    
if (f==1) Fmer9.withmissing.weekday<-Fmer9.withmissing else Fmer9.withmissing.weekend<-Fmer9.withmissing   
} 

r H$holiday.story

3.1) Weekday (WD) average

WD: r paste("Calculate weekday subject-level mean when Nvalid.night>=",QCnights.feature.alpha[1]," and Nvalid.weekday.night>=",QCnights.feature.alpha[3], " for each feature. Set them as NAs for ", nrow(Fmer9.withmissing.weekday), " subjects (",length(which(DayFeature[,1] %in% Fmer9.withmissing.weekday[,1]))," days) as follows,",sep="")

cleancode2: number of missingness of features of each subject

if (nrow(Fmer9.withmissing.weekday)>=1) kable(Fmer9.withmissing.weekday,caption="N valid night of removed subjects for WD mean features") %>%
    kable_styling(bootstrap_options = c("striped", "hover")) 

3.2) Weekend (WE) average

WE: r paste("Calculate weekend subject-level mean when Nvalid.night>=",QCnights.feature.alpha[1]," and Nvalid.weekend.night>=",QCnights.feature.alpha[4], " for each feature. Set them as NAs for ", nrow(Fmer9.withmissing.weekend), " subjects (",length(which(DayFeature[,1] %in% Fmer9.withmissing.weekend[,1]))," days) as follows,",sep="")

cleancode2: number of missingness of features of each subject

if (nrow(Fmer9.withmissing.weekend)>=1) kable(Fmer9.withmissing.weekend,caption="N valid night of removed subjects for WE mean features") %>%
    kable_styling(bootstrap_options = c("striped", "hover")) 
ansM<-rbind(ans1,ans2) 
colnames(ansM)<-c("Day","Filename", "Nrow", "Nfiles","Nsubjects")   
rownames(ansM)<-NULL
kable(ansM,caption="Description of output files")   %>%
    kable_styling(bootstrap_options = c("striped", "hover"))

4 Plot features

t1<-pheno.plot(inputFN=outFN[1],csv=TRUE, start=4)   
t2<-pheno.plot(inputFN=outFN[2],csv=TRUE, start=4)    

Some annotation variables in the output

Variable | Meaning --------------|---------------------------------- zeroSleep | sleeponset= wakup Nwd | number of weekdays Nwe | number of weekends Nwd_0s | number of zero sleep in weekdays
Nwe_0s | number of zero sleep in weekends _sleep | number of days for sleep feature calculation (WE=Fri and Sat), differ from PA and CR (WE=Sat and Sun) cleancode2 | number of NAs in the feature table


How to run this?



Try the mMARCH.AC package in your browser

Any scripts or data that you put into this service are public.

mMARCH.AC documentation built on June 8, 2025, 1:24 p.m.