knitr::opts_chunk$set(echo = TRUE) # {r, results="hide"} - The chunks is run but all results are hidden. The code shows in the doc, # {r, include=FALSE} - the code is run but neither the code or the results are shown # {r, echo=FLASE} - The code is not shown, but results are # bug= colaus has some NA in newID column on 9660 with csv data but no part2summary # bug2=fragmentation can not take character id, will output NA in id column. # after fix bug2, output change to a list.
This R pargram extracts multiple commonly used features for weekday and weekends from minute level actigraphy data. Weekday and weekends could be merged with the holiday days when holidayFN was specified. In GGIR, sleep period was defined from today noon to tomorrow noon in general, so weekends should be Friday and Saturday (wake up in Saturday and Sunday morning). To be noted, sleeponset in Sunday is counted as normal weekday in default since people need to work on Monday. For physical activity, weekends should be Saturday and Sunday for the purpose to calculate daytime activity.
currentdir_xxx studyname_xxx log.multiplier_xxx RemoveDaySleeper_xxx QCnights.feature.alpha_xxx epochOut_xxx holidayFN_xxx epochOut<-epochOut foutFN<-function(x,studyname) paste("module7d_",studyname,"_new_features_page",x,".csv",sep="") setwd(currentdir)
r paste("Calculate subject-level mean when Nvalid.night>=",QCnights.feature.alpha[1], " for each feature.",sep="")
r paste("Calculate subject-level SD when Nvalid.night>=",QCnights.feature.alpha[2], " for each feature.",sep="")
Definition of weekday and weekend
Domain | Weekday (WD) | Weekend (WE) -----------|------------------------------|---------------------------- PA, CL| Monday ~ Friday if not holiday | Saturday, Sunday, holiday SL | Mon, Tue, Wed, Thurs, Sun if the next day is not a holiday | Fri (Friday night sleeponset +Sat moring wake up in GGIR sleep window), Sat, day before holiday
Please note that Sunday night sleeponset is counted as weekday but Friday night sleeponset is counted as weekends under the current definition.
# Issue: Sunday night sleeponset is counted as weekday; Friday night sleeponset is counted as weekends under current definition. # find "wd" and "we" days/nights with/without extra holidayFN as input (twin study) holidayplusweekends<-function(feature,holidayFN=NULL){ Sho.today<-NULL Sho.tomorrow<-NULL if (!is.null(holidayFN)) { try(file.copy(holidayFN, currentdir)) holidaytable<-read.csv(holidayFN,head=1,stringsAsFactors=F) holiday<-holidaytable[which(holidaytable[,"holiday"]==1),] if ( "filename" %in% colnames(holiday) ) { todayID<-paste(feature[,"filename"],feature[,"Date"],sep=" ") tomorrowID<-paste(feature[,"filename"],as.Date(feature[,"Date"])+1,sep=" ") holiday.today<-paste(holiday[,"filename"],holiday[,"Date"],sep=" ") } else { todayID<- feature[,"Date"] tomorrowID<- as.Date(feature[,"Date"])+1 holiday.today<- holiday[,"Date"] } Sho.today<-which(todayID %in% holiday.today) Sho.tomorrow<-which(tomorrowID %in% holiday.today) } Swd<-which(feature[,"weekday"] %in% c(1,2,3,4,5) |feature[,"weekday"] %in% c("Monday","Tuesday","Wednesday","Thursday","Friday")) Swe<-which(feature[,"weekday"] %in% c(6,7) | feature[,"weekday"] %in% c("Saturday","Sunday")) PAfeatureSwd<-setdiff(Swd,Sho.today) PAfeatureSwe<-union(Swe,Sho.today) SLfeatureSwd<-setdiff(which(feature[,"weekday"] %in% c(1,2,3,4,7) |feature[,"weekday"] %in% c("Monday","Tuesday","Wednesday","Thursday","Sunday")),Sho.tomorrow) SLfeatureSwe<-union(which(feature[,"weekday"] %in% c(5,6) | feature[,"weekday"] %in% c("Friday","Saturday")),Sho.tomorrow) table(feature[,"weekday"]) length(Swd) length(Swe) length(PAfeatureSwd) length(PAfeatureSwe) length(SLfeatureSwd) length(SLfeatureSwe) nholiday<-length(setdiff(Sho.today,Swe)) holiday.story<-paste("Days: Among ",nrow(feature)," days, there are ",length(PAfeatureSwd), " weekdays and ", length(PAfeatureSwe), " weekends including ",nholiday," holidays.",sep="") ans<-list(PAfeatureSwd=PAfeatureSwd,PAfeatureSwe=PAfeatureSwe,SLfeatureSwd=SLfeatureSwd,SLfeatureSwe=SLfeatureSwe,holiday.story=holiday.story) return(ans) }
library(xlsx) library(knitr) library(dplyr) # %>% library(cosinor) library(mMARCH.AC) library(zoo) #for RA library(ActCR) library(ActFrag) library(survival) library(ineq) library(kableExtra) # Prepare count/wear data for JIVE summaryFN1<-paste(currentdir,"/summary/",studyname,"_ggir_output_summary.xlsx",sep="") summaryP2<-read.xlsx( summaryFN1,head=0,sheetName = "2_fileSummary") ggirV<-as.character(gsub("X","",summaryP2[1,1])) enmoFN<-paste("./data/impu.flag_All_",studyname,"_ENMO.data.", epochOut,"s.GGIR",ggirV,".csv",sep="") # Prepare count/wear data for JIVE (Note: Date=11/30/2017 format in D1) D1<-read.csv(enmoFN,head=1, stringsAsFactors=F) Ctop<-which(colnames(D1)=="X00.00.00")-1 table(D1[,c("KEEP","remove16h7day")]) D2<-D1[which(D1[,"KEEP"]=="keep" &!is.na(D1[,"newID"])),] S1.keepdaysleep<- which(is.na(D2[,"daysleeper"]) | D2[,"daysleeper"]==0) S1.removedaysleep<- which( D2[,"daysleeper"]==1) if (RemoveDaySleeper & length(S1.removedaysleep)>=1) D3<-D2[-S1.removedaysleep,] else D3<-D2 daysleeperM<-D2[S1.removedaysleep,1:Ctop] # Specify WD/WE data-------------------------------------- # Holiday days could be found filename+Date or Date only when filename is not available. # -------------------------------------------------------- idM<-unique(D3[,c("filename","newID")]) D3.days<-holidayplusweekends(feature=D3,holidayFN= holidayFN ) Swd<-D3.days$PAfeatureSwd Swe<-D3.days$PAfeatureSwe idM.wd<-D3[Swd,c("filename","newID","Date")] idM.we<-D3[Swe,c("filename","newID","Date")] count <-cbind(D3[,c("filename","Date")],D3[,(Ctop+1):ncol(D3)]) colnames(count)<-c("ID","Day",paste("MIN",1:(ncol(count)-2),sep="")) log.count = cbind(count[,1:2], log(log.multiplier*count[,-c(1,2)] + 1)) count.wd<-count[Swd,] count.we<-count[Swe,] log.count.wd = log.count[Swd,] log.count.we = log.count[Swe,] dim(D3) dim(count) dim(count.wd) dim(count.we)
The activity data at the minute level (r enmoFN
) was stored in a format of data.frame
of dimension $(\sum_i d_i) \times r ncol(D1)
$, where $d_i$ is the number of available days for subject $i$. And the order of the r ncol(D1)
columns (and corresponding column names) was as follows, r colnames(D1)[1:Ctop]
, "MIN1", ..., "MIN1440".
* Raw data = (`r dim(D1)`), `r length(unique(D1[,"filename"]))` files, `r length(unique(D1[,"newID"]))` individuals. * Clean data = (`r dim(D2)`), `r length(unique(D2[,"filename"]))` files, `r length(unique(D2[,"newID"]))` individuals. * Remove daysleepers = (`r dim(D3)`), `r length(unique(D3[,"filename"]))` files, `r length(unique(D3[,"newID"]))` individuals. * Count data = (`r dim(count)`), `r length(unique(count[,"ID"]))` individuals. * `r D3.days$holiday.story` * WD Count data = (`r dim(count.wd)`), `r length(unique(count.wd[,"ID"]))` individuals. * WE Count data = (`r dim(count.we)`), `r length(unique(count.we[,"ID"]))` individuals.
MustHaveTAC<-TRUE A=4 # number of head columns including 1: filename 2: Date 3:newID 4:cleaningcode B=A+1 #first column need to average # module7b_1 subject data #IS mesor amp acro mesor_L amp_L acro_L PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 #CR CR CR CR CR CR CR CR CR CR CR CR CR CR CR CR CR #subject subject subject .... # For subject level feature, 7b will do sum for days with same value, so mean=NA when number of valid days < 7.
fpca.face (refund)
and denseFLMM
ExtCos
, ExtCos_long
The classic cosinor model is defined as $r(t)= mes + amp \times c(t)$ where $c(t) = \cos([t-\phi]2\pi/24)$ where mes is mesor, amp is amplitude, $\phi$ is the acrophase.
# Extended cosinor model allows for two additional parameters to control the shape. Here we implement the anti logistic transformation, and have $r(t) = min +amp\times F(c(t))$, where $F()$ is the anti logistic function. # `ExtCos` and `ExtCos_long` calcualte the five parameters, (for a single vector amd whole dataset respectively). It is suggested to log transformed the curve by choose `logtransform = TRUE`.
minuteW=(ncol(count.wd)-2)/1440 Cos = ActCosinor_long2( count.data = count.wd, window=minuteW,daylevel=FALSE) Cos.log = ActCosinor_long2( count.data = log.count.wd, window=minuteW,daylevel=FALSE) extcos = ActExtendCosinor_long2(count.data = count.wd, window=minuteW,daylevel=FALSE) extcos.log = ActExtendCosinor_long2(count.data = log.count.wd, window=minuteW,daylevel=FALSE) colnames(Cos)<-c("ID","ndays","mesor","amp","acro","acrotime") colnames(Cos.log)<-c("ID","ndays","mesor_L","amp_L","acro_L","acrotime_L") colnames(extcos)<-gsub(paste("_",minuteW,sep=""),"_ext",colnames(extcos)) colnames(extcos.log)<-gsub(paste("_",minuteW,sep=""),"_ext_L",colnames(extcos.log)) cosinormodel.1<-merge(Cos,Cos.log,by=c("ID","ndays"),all=TRUE) cosinormodel.2<-merge(extcos,extcos.log,by=c("ID","ndays"),all=TRUE) cosinormodel.wd<-merge(cosinormodel.1[,-2],cosinormodel.2[,-2],by="ID",all=TRUE) colnames(cosinormodel.wd)[2:ncol(cosinormodel.wd)]<-paste(colnames(cosinormodel.wd)[2:ncol(cosinormodel.wd)],"_S_wd",sep="")
minuteW=(ncol(count.we)-2)/1440 Cos = ActCosinor_long2( count.data = count.we, window=minuteW,daylevel=FALSE) Cos.log = ActCosinor_long2( count.data = log.count.we, window=minuteW,daylevel=FALSE) extcos = ActExtendCosinor_long2(count.data = count.we, window=minuteW,daylevel=FALSE) extcos.log = ActExtendCosinor_long2(count.data = log.count.we, window=minuteW,daylevel=FALSE) colnames(Cos)<-c("ID","ndays","mesor","amp","acro","acrotime") colnames(Cos.log)<-c("ID","ndays","mesor_L","amp_L","acro_L","acrotime_L") colnames(extcos)<-gsub(paste("_",minuteW,sep=""),"_ext",colnames(extcos)) colnames(extcos.log)<-gsub(paste("_",minuteW,sep=""),"_ext_L",colnames(extcos.log)) cosinormodel.1<-merge(Cos,Cos.log,by=c("ID","ndays"),all=TRUE) cosinormodel.2<-merge(extcos,extcos.log,by=c("ID","ndays"),all=TRUE) cosinormodel.we<-merge(cosinormodel.1[,-2],cosinormodel.2[,-2],by="ID",all=TRUE) colnames(cosinormodel.we)[2:ncol(cosinormodel.we)]<-paste(colnames(cosinormodel.we)[2:ncol(cosinormodel.we)],"_S_we",sep="")
is_all.wd = IS_long2(count.data = count.wd, window=(ncol(count)-2)/144, method = "average" ) #10 minute colnames(is_all.wd)[2]<-"IS_wd" is_all.we = IS_long2(count.data = count.we, window=(ncol(count)-2)/144, method = "average" ) #10 minute colnames(is_all.we)[2]<-"IS_we"
# subject level features: cosinor model, IS output<-is_all.wd output<-merge(output,is_all.we,by=c("ID"), all=TRUE) output<-merge(output,cosinormodel.wd,by=c("ID"),all=TRUE) output<-merge(output,cosinormodel.we,by=c("ID"), all=TRUE) output<-merge(idM,output,by.x=c("filename" ),by.y=c("ID" ),all=TRUE) dim(output) head(output) write.csv(output, file=foutFN("1",studyname), col.names = TRUE, row.names = FALSE) plot.data<-array(NA,dim=c(nrow(output),ncol(output)-2)) for (j in 1:ncol(plot.data)) plot.data[,j]<-as.numeric(as.character(unlist(output[,j+2]))) colnames(plot.data)<-colnames(output)[-c(1,2)] plot.data<-plot.data[,order(colnames(plot.data))]
par(mar=c(8, 4, 2, 2) + 0.1) # bottom,left,up,right boxplot(plot.data,col=c("blue","grey" ),xlab="",xaxt="n") axis(1, at=1:ncol(plot.data), labels=colnames(plot.data), las = 2, cex.axis = 0.8)
As opposed to Cosinor/extCosinor model, functional principal component analysis (FPCA) is a data driven technique that makes no parametric assumptions about the functional form of diurnal patterns. FPCA represents any sample of diurnal patterns via L2-orthogonal functional "principal components". The resulted principal component scores can be used. Here, FPCA is done by using the fpca.face
function in the refund package. It is suggsted to take the log transformation to reduce the skewness of count data by setting logtransform = TRUE
.
Notice that, here, for simplicity and better interpretability, for each subject, we take the average across all valid days, therefore, we don't account for the within person correlation. To incorporate the within person correlation, one can choose to use multilevel FPCA by using the denseFLMM
function in the denseFLMM package.
######################################################### # 2.6 functional principal component analysis: f=1 WD; f=2 WE ftitle<-c("weekday", "weekend") for (f in 1:2){ if (f==1) log.count.f=log.count.wd if (f==2) log.count.f=log.count.we if (f==1) idM.f<- idM.wd else idM.f<-idM.we X=log.count.f[,-c(1,2)] X <- t(t(X) - colMeans(X, na.rm = TRUE)) dim(X) mean(X) id.groups<-unique(log.count.f[,"ID"]) print(length(id.groups)) groups<-NULL for (i in 1:nrow(log.count.f)) groups[i]<-which(id.groups==log.count.f[i,"ID"]) groups<-as.matrix(groups) G=1 Zvars<-lapply(seq(len=G),function(g) matrix(1,nrow(log.count.f),1)) data<-as.matrix(X) #X=demean-log(enmo) idM.avg<-unique(idM.f[,-3]) S2<-NULL for (i in 1:length(id.groups)) S2[i]<-which(idM.avg[,"filename"]==id.groups[i]) idM.avg2<-idM.avg[S2,] print(c(f,dim(data),nrow(idM.avg2))) #------------------------------------------------------------ # Method 2: multilevel(2 levels) FPCA seperating subject-level effect and day-level effect #------------------------------------------------------------ library(denseFLMM) #Z1 <- denseFLMM(data,groups=groups,Zvars=Zvars,smooth = FALSE,NPC=rep(20,G+1)) Z2 <- denseFLMM(data, gridpoints = 1:ncol(data), Zlist = NA, G = G, Lvec = NA, groups = groups, Zvars = Zvars, L = NA, NPC=rep(20, G+1), smooth = FALSE, bf = 20, smoothalg = "gamm") dim(Z2$xi[[1]]) ; dim(Z2$xi[[2]]) dim(Z2$phi[[1]]); dim(Z2$phi[[2]]) #Calculating the percentage of variance explained by each componet for the subject-level PCs and Day-level PCs. pervar_sub<-100*Z2$nu[[1]]/sum(Z2$nu[[1]]) pervar_day<-100*Z2$nu[[2]]/sum(Z2$nu[[2]]) pervar_sub pervar_day #Calculating the relative variance explained by subject heterogeneity out of total variability sum(Z2$nu[[1]])/Z2$totvar #Calculating the relative variance explained by day-to-day variance out of total variability sum(Z2$nu[[2]])/Z2$totvar #Calculating the residual variance/noises out of total variability 1440*Z2$sigma2/Z2$totvar Z2$totvar subject_PCs<-cbind(idM.avg2,Z2$xi[[1]] ) day_PCs<-cbind(idM.f,Z2$xi[[2]] ) colnames(subject_PCs)[(ncol(idM.avg2)+1): ncol(subject_PCs)]<-paste("PC",1:ncol(Z2$xi[[1]]),sep="") colnames(day_PCs)[(ncol(idM.f)+1): ncol(day_PCs)]<-paste("PC",1:ncol(Z2$xi[[2]]),sep="") if (f==1) Z2.save.1<-Z2 else Z2.save.2<-Z2 write.csv(day_PCs, file=foutFN("4_denseFLMM_day_PCs", paste(ftitle[f],studyname,sep="_")), col.names = TRUE, row.names = FALSE ) write.csv(subject_PCs, file=foutFN("5_denseFLMM_subject_PCs", paste(ftitle[f],studyname,sep="_")), col.names = TRUE, row.names = FALSE ) }
for (f in 1:2){ if (f==1) Z2<-Z2.save.1 else Z2<-Z2.save.2 pervar_sub<-100*Z2$nu[[1]]/sum(Z2$nu[[1]]) m<-4 my.ylim<-range(Z2$phi[[1]][,m:1])*1.4 op=par(mai=c(.6, .8, .3, 0.1),omi=c(0, 0, 0, 0)) matplot(Z2$phi[[1]][,m:1],type='l',ylab='PC',xlab='',xaxt='n',yaxt='n',main='',lwd=c(.7,.7,2,2),ylim=my.ylim,col=m:1) title(xlab = "Hour", line=1.8) title(main = paste(ftitle[f],": Subject-specific Effect (", round(sum(Z2$nu[[1]])/Z2$totvar,digits=3)*100,'% of Total)',sep=''),line=0.5) axis(1,at=(0:12)*2*60,labels=(0:12)*2,cex.axis=1) axis(2,las=1,cex.axis=1) #axis(2,at=(1:3)*0.01,labels=(1:3)*0.01, las=1,cex.axis=1) legend('topleft',co=1:4, legend=paste(paste('PC',1:m,' (',sep=''),sprintf("%.1f",round(pervar_sub[1:m],digits=1)),'%)',sep=''), lty=1, horiz=FALSE, lwd=2 , bg='transparent',cex=0.725,text.font=1.5,box.col='transparent', ncol=2) } #f # end FPCA
r QCnights.feature.alpha
, when Nvalid days on mean,sd,WD,WE < r QCnights.feature.alpha
, the mean featuer will be marked as NA.# set mean=NA for those features with Nvalid night< 3 or 7. NcountSum<-function(x) sum(as.numeric(!is.na(x))) #calcuate number of good value of each feature ## To wear mask on mean feature table, align the first matrix according to IDs in the second matrix. ## return a Fmer81.N.all (Ngood values) matrix with same order (row and column) to Fmer81 alignMatrix<-function(Fmer81.N.all,Fmer81){ if (length(setdiff(Fmer81[,1], Fmer81.N.all[,1]) )>=1) stop("Found missing filename in Fmer81.N.all data") if (length(setdiff(colnames(Fmer81),colnames(Fmer81.N.all)) )>=1) stop("Found missing columns in Fmer81.N.all data") Fmer81x<-cbind(f=1:nrow(Fmer81),filename=Fmer81[,1]) data<-merge(Fmer81x,Fmer81.N.all,by="filename",all.x=TRUE,all.y=FALSE) data<-data[order(as.numeric(data[,"f"])),-2] if (nrow(data)<nrow(Fmer81)) print("found missing lines in the mask") Fmer81.mask<-data[, which(colnames(data) %in% colnames(Fmer81))] if (sum(colnames(Fmer81.mask)!=colnames(Fmer81))>0) stop ("Wrong columns names in the mask") if (sum( Fmer81.mask[,1]!= Fmer81[,1])>0) stop ("Wrong filename orders in the mask") # cbind(Fmer81[1:10,1],Fmer81.mask[1:10,1]) return(Fmer81.mask) } # For sleep data, wear two mask (allday mask and WD only mask) MeanWithMask<-function(Fmer5.N.all,Fmer5,QCnights.allday,QCnights=0,Cmean4time=TRUE){ ###a) Take averages for both subject and day level features although subject feature has same values for all days................ Ncol8<-which(colnames(Fmer5)=="weekday") # Ncol8 and after are weekday,zeroSleep,Nwd,Nwe,.. Fmer81<-aggregate(.~filename,data=Fmer5[,c(1:(Ncol8-1))], mean,na.rm=T,na.action=na.pass) Fmer82<-aggregate(.~filename,data=Fmer5[,c(1,Ncol8:ncol(Fmer5))], sum,na.rm=T,na.action=na.pass) timingC<-grep("cmean",colnames(Fmer5)) if (length(timingC)>=1) { Fmer83<-aggregate(.~filename,data=Fmer5[, c(1,timingC)], get_mean_sd_hour,unit2minute=60,out="mean" ,na.action=na.pass) #L5TIME if (length(which(Fmer81[,"filename"]!=Fmer83[,"filename"]))>=1) stop("Mismatch IDs in Fmer83") timingC<-grep("cmean",colnames(Fmer81)) Fmer81[,timingC]<-Fmer83[,-1] # colnames(Fmer81)[timingC]==colnames(Fmer83)[-1] } Fmer81.N.all<-alignMatrix(Fmer5.N.all,Fmer81) Fmer81.N.self.raw<-aggregate(.~filename,data=Fmer5[, 1:(Ncol8-1)], NcountSum,na.action=na.pass) Fmer81.N.self<-alignMatrix(Fmer81.N.self.raw,Fmer81) # Wear 2 Mask for the mean features Fmer81.mask<-Fmer81 for (j in 2:ncol(Fmer81.mask)) { S81<- which(Fmer81.N.all[,j] < QCnights.allday | Fmer81.N.self[,j] < QCnights ) if (length(S81)>=1) Fmer81.mask[S81,j]<-NA} # merge two parts if (sum(Fmer81.mask[,1]!=Fmer82[,1])>0) stop("Check IDs of two Fmer8 matrix") Fmer8<-cbind(Fmer81.mask,Fmer82[,-c(1,2)]) #change Fmer81.mask, rm filename and weekday for the second file return(Fmer8) }
# Fmer81.N.all=number of good days for each feature # Fmer4[which(Fmer4[,"wakeup"]-Fmer4[,"sleeponset"] ==0) ,"zeroSleep"]<-1 # Fmer4[,"Nwd_0s"] <-Fmer4[,"Nwd"]*Fmer4[,"zeroSleep"] # Number of weekdays of zero sleep # Fmer4[,"Nwe_0s"] <-Fmer4[,"Nwe"]*Fmer4[,"zeroSleep"] # Number of weekends of zero sleep dictionary<-paste("module7_",studyname,"_all_features_dictionary.xlsx",sep="") dict<-read.xlsx(dictionary,head=1,sheetName="dictionary",stringsAsFactors=F) dict<-dict[which(!is.na(dict[,"Domain"])),] SLdomain<-dict[which(dict[,"Domain"]=="SL"),"Variable"] PAdomain<-dict[which(dict[,"Domain"]=="PA"),"Variable"] CRdomain<-dict[which(dict[,"Domain"]=="CR"),"Variable"] subjectlevel.variable<-dict[which(dict[,"level"]=="subject"),"Variable"] ftitle<-c("weekday", "weekend") inFN<-paste("module7b_",studyname,"_all_features_2_dayclean.csv",sep="") #clean file (must have TAC) outFN<-paste("module7d_",ftitle,"_",studyname,"_all_features_3_subject.csv",sep="") DayFeature<-read.csv(inFN,head=1,stringsAsFactors=F) #part1=day features wdwefea<-read.csv(paste("module7d_",studyname,"_new_features_page1.csv",sep=""),head=1,stringsAsFactors=F) #part2=new wdwe subject features pcFN<-paste("module7d_",ftitle,"_",studyname,"_new_features_page5_denseFLMM_subject_PCs.csv",sep="") #part3=fpca subject level wdwe #X01) add L5TIME_cmean columns Csubject<-which(colnames(DayFeature) %in% subjectlevel.variable) S4<-setdiff(1:ncol(DayFeature),Csubject) # day level SL/PA/CR features Fmer5<-DayFeature[,S4] #remove old subject level features using all days, will replace new one timingC<-grep("TIME",colnames(Fmer5)) timingMatrix<-Fmer5[,timingC] colnames(timingMatrix)<-paste(colnames(timingMatrix),"_cmean",sep="") #circle mean columns CmeanDomain<-colnames(timingMatrix) lastC<-max(timingC) Fmer6<-cbind(Fmer5[,c(1:lastC)],timingMatrix,Fmer5[(lastC+1):ncol(Fmer5)]) #X02) subset for Fmer6 (old daylevel features) C5<-which(colnames(Fmer6)=="weekday") Csleep<-which(colnames(Fmer6) %in% SLdomain) Cother<-which(colnames(Fmer6) %in% c(PAdomain,CRdomain,CmeanDomain)) sleepColumns<-c(1,Csleep,C5:ncol(Fmer6)) # sleep features nonsleepColumns<-c(1,Cother,C5:ncol(Fmer6)) # nonsleep daylevel features Ncol8<-which(colnames(Fmer6)=="weekday") # Ncol8 after are weekday,zeroSleep,Nwd,Nwe,.. Fmer6.N.all<-aggregate(.~filename,data=Fmer6[, c(1,B:(Ncol8-1))], NcountSum,na.action=na.pass) tailname<-colnames(Fmer6)[Ncol8:ncol(Fmer6)] H<-holidayplusweekends(Fmer6,holidayFN=holidayFN) #f=1/2 for WD/WE................................................ for (f in 1:2){ #x1) Old day level features summaryized for WD or WE only: Note sleep has special "weekend" definition. if (f==1) {sleepSet<-H$SLfeatureSwd; paSet<-H$PAfeatureSwd; QCnights=QCnights.feature.alpha[3]} if (f==2) {sleepSet<-H$SLfeatureSwe; paSet<-H$PAfeatureSwe; QCnights=QCnights.feature.alpha[4]} table(Fmer6[sleepSet,"weekday"]) table(Fmer6[paSet,"weekday"]) sleepWD<-Fmer6[sleepSet,sleepColumns] paWD<-Fmer6[paSet,nonsleepColumns] SleepData<-MeanWithMask(Fmer6.N.all,Fmer5=sleepWD,QCnights.allday=QCnights.feature.alpha[1],QCnights=QCnights) PAData<-MeanWithMask(Fmer6.N.all,Fmer5=paWD,QCnights.allday=QCnights.feature.alpha[1],QCnights=QCnights) Fmer8<-merge(SleepData,PAData,by="filename",all=TRUE,suffix=c("_sleep","")) #Nwd.. 6 duplicate columns dim(PAData) dim(SleepData) dim(Fmer8) #x2) PCs wdwePC<-read.csv(pcFN[f],head=1,stringsAsFactors=F) colnames(wdwePC)[3:ncol(wdwePC)]<-paste("PC",1:20,"_S",sep="") wdwePC2<-wdwePC[,c(1,3:12)] #x3) newfeatures if (f==1) S9<-c("filename","newID",colnames(wdwefea)[grep("wd",colnames(wdwefea))]) if (f==2) S9<-c("filename","newID",colnames(wdwefea)[grep("we",colnames(wdwefea))]) wdweSubjectFea<-merge(wdwefea[,S9],wdwePC2,by="filename",all=TRUE) #x4) merage old daylevel feature on wdwe + new wdwe subject leve + new wdwePCs Fmer9<-merge(Fmer8,wdweSubjectFea,by="filename",all=TRUE) C8<-which(colnames(Fmer9) %in% c(tailname,paste(tailname,"_sleep",sep=""))) C9<-which(colnames(Fmer9)=="newID") C10<-c(1,C9,setdiff(2:ncol(Fmer9),c(C9,C8)),C8) Fmer9<-Fmer9[,C10] Fmer9[,"cleancode2"]<-rowSums( (is.na(Fmer9))) # of missing feature, 0 means no missing rmwdwe<-function(x){y<-unlist(strsplit(x,"\\_")) z<-y[length(y)] if (z=="wd" | z=="we") t<-paste(y[-length(y)],collapse="_") else t<-x return(t)} newname.rmwdwe<-as.character(unlist(lapply(colnames(Fmer9),rmwdwe))) colnames(Fmer9)<-newname.rmwdwe write.csv(Fmer9, file=outFN[f], row.names = FALSE ) # R4.2.2 Error in utils::write.table(Fmer9, file = outFN[f], row.names = FALSE, : unimplemented type 'list' in 'EncodeElement' Fmer9.withmissing<-Fmer9[which(Fmer9[,"cleancode2"]>=1),] rownames(Fmer9.withmissing)<-NULL ans<-c(ftitle[f],outFN[f],nrow(Fmer9),length(unique(Fmer9[,"filename"])),length(unique(Fmer9[,"newID"])) ) if (f==1) ans1<-ans else ans2<-ans if (f==1) Fmer9.withmissing.weekday<-Fmer9.withmissing else Fmer9.withmissing.weekend<-Fmer9.withmissing }
r H$holiday.story
WD: r paste("Calculate weekday subject-level mean when Nvalid.night>=",QCnights.feature.alpha[1]," and Nvalid.weekday.night>=",QCnights.feature.alpha[3], " for each feature. Set them as NAs for ", nrow(Fmer9.withmissing.weekday), " subjects (",length(which(DayFeature[,1] %in% Fmer9.withmissing.weekday[,1]))," days) as follows,",sep="")
cleancode2: number of missingness of features of each subject
if (nrow(Fmer9.withmissing.weekday)>=1) kable(Fmer9.withmissing.weekday,caption="N valid night of removed subjects for WD mean features") %>% kable_styling(bootstrap_options = c("striped", "hover"))
WE: r paste("Calculate weekend subject-level mean when Nvalid.night>=",QCnights.feature.alpha[1]," and Nvalid.weekend.night>=",QCnights.feature.alpha[4], " for each feature. Set them as NAs for ", nrow(Fmer9.withmissing.weekend), " subjects (",length(which(DayFeature[,1] %in% Fmer9.withmissing.weekend[,1]))," days) as follows,",sep="")
cleancode2: number of missingness of features of each subject
if (nrow(Fmer9.withmissing.weekend)>=1) kable(Fmer9.withmissing.weekend,caption="N valid night of removed subjects for WE mean features") %>% kable_styling(bootstrap_options = c("striped", "hover"))
ansM<-rbind(ans1,ans2) colnames(ansM)<-c("Day","Filename", "Nrow", "Nfiles","Nsubjects") rownames(ansM)<-NULL kable(ansM,caption="Description of output files") %>% kable_styling(bootstrap_options = c("striped", "hover"))
t1<-pheno.plot(inputFN=outFN[1],csv=TRUE, start=4) t2<-pheno.plot(inputFN=outFN[2],csv=TRUE, start=4)
Some annotation variables in the output
Variable | Meaning
--------------|----------------------------------
zeroSleep | sleeponset= wakup
Nwd | number of weekdays
Nwe | number of weekends
Nwd_0s | number of zero sleep in weekdays
Nwe_0s | number of zero sleep in weekends
_sleep | number of days for sleep feature calculation (WE=Fri and Sat), differ from PA and CR (WE=Sat and Sun)
cleancode2 | number of NAs in the feature table
How to run this?
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.