R/ACCELMISSING.R

Defines functions accel.impute accel.plot.7days create.flag missing.rate valid.days valid.subjects wear.time.plot

Documented in accel.impute accel.plot.7days create.flag missing.rate valid.days valid.subjects wear.time.plot

# update:
# 05/14/2016: missing.rate - fix the table allowing NA for 7 days
# 05/21/2016: valid.subjects - keep.7days=TRUE/FALSE arguement is included 
# 05/23/2016: missing.rate - translate the missing rate to wearing hours, and create more outputs (table.wh, label)
# 07/05/2016: accel.impute - add demo.include=TRUE/FALSE argument
# 03/25/2018: no change but author email address, require() library() should be not included in the code because the package already depends on pscl and mice. Including it will give some errors in package building process (v1.2)
# 03/28/2018: missing.rate - as.matrix() for flag, PA, (v1.3)
# 03/31/2018: accel.impute - as.data.frame() for label and demo; 2l.zip.pmm, 2l.zipln, 2l.zipln.pmm,  =>  wy=NULL is included (v1.4)
########################################################
# accel.impute() performs multiple imputations for accelerometer data
# input: PA, label, flag, demo
# output: multiple datasets with imputations (m=5 as a default)
########################################################
accel.impute <- function(PA, label, flag, demo=NA, method="zipln", time.range=c("09:00","20:59"), K=3, D=5, mark.missing=0, thresh=10000, graph.diagnostic=TRUE,  seed=1234, m=5, maxit=6){

# input required
	PA = as.matrix(PA)
	label = as.data.frame(label) # data.frame is fine
	flag = as.matrix(flag)  # data frame is not fine
	#cat("Dimension of data: ","PA", dim(PA), "...label", dim(label), "...flag", dim(flag))

# optional data	: demo=NA is default
	if ( is.null(nrow(demo))) { demo.include=FALSE; print("no demo...") }
	if (!is.null(nrow(demo))) { demo.include=TRUE ; demo = as.data.frame(demo)}

# wearing/nonwearing mark
	nw = mark.missing
	w = abs(1-nw)

# time sequence
		start.time =as.POSIXct("2015-01-01")
		seq.time = seq.POSIXt(start.time, length.out=1440, by="min")
		min.seq = format(seq.time,"%H:%M")
		
# convert time.range to minute index
		min.range = which(min.seq %in% time.range)
		daytime = min.range[1]:min.range[2]  # minute index of daytime
		# cat("Imputation range (daytime in minute):", min.range[1], min.range[2])

# careful with threshold
 	PA[PA>=thresh] = thresh  # it might be done in previous steps, just in case
	# print(paste("Is thresh=",thresh, "reasonable for your device? Otherwise change the option.") )
	
# Demographic data  
# note: demo is the demographic data by subject
	if (demo.include){#----------------------------------------------

	# update label with demographic information
	key1 = colnames(label)[1]
	key2 = colnames(demo)[1]	
	labelDemo = merge(label, demo, by.x= key1, by.y= key2, all.x= TRUE)
	demo.daily = labelDemo[, colnames(demo) ]
	# # Alternative code
	# nsubject = nrow(demo) # 184 subject
	# nday = length(unique(label[, 2])) # num of days per subject - 7 or 14 days
	# demo.daily = as.data.frame(matrix(0, nday*nsubject, ncol(demo)))
	# for (i in 1:ncol(demo)) demo.daily[, i] = rep(demo[, i], each=nday) # subject i
	# colnames(demo.daily) = colnames(demo)

		# error messages: 
		if (nrow(PA)!=nrow(demo.daily)) {stop("Dimension does not match! Please check the demographic data.")}
		if (sum(is.na(demo)) != 0) {stop("There are missing values (NA) in demo. Imputation is needed, otherwise set 'demo.include=FALSE'.")}
		
	} #--------------------------------------------------------------------
	
	# create the variable of weekday=1 , weekend=0
	daylabel=label[, 2]  # l=Sunday,...,7=Saturday
	wk = ifelse((daylabel%%7)>=2, 1, 0) ; wk = as.factor(wk)    # weekday=1, weekend=0
	# time invariant X matrix

# create x matrix 	
	if (demo.include){ 
			xmat = data.frame(demo.daily[, -1], wk) 
			}else{
			xmat = data.frame(wk)	
			}
 # packages 
	#require(mice);  if (!require(mice)) {stop("mice package must be installed.")}	
	#require(pscl);   if (!require(pscl)) {stop("pscl package must be installed.")}	
###############################################
 # initial imputation with zip pmm   <--- first iteration 
 ###############################################
 print("First iteration starts with zip + pmm... ")
 intimp = PA  
 for (s in daytime) { #===========================(s)
 	cat(paste(min.seq[s],"...") ) # print the process
	t1 = s; t0 = s-1; 
	if (s==daytime[1]){# set initial value
		y0=PA[, t0] ;	 y0[flag[, t0]==nw] = NA
		icd.y0 = data.frame(y0, xmat)
		imp.y0 = mice(icd.y0, seed=seed, method = "pmm", maxit=maxit, m=1, printFlag=FALSE) 
		cd.y0 = complete(imp.y0, 1)$y0 
		}else{ cd.y0 = cd.y1 } # update 	
	y1 =PA[, t1]
	r1 =(flag[, t1]==w)   # wearing=T, missing=F
	y1[!r1] = NA			# missing to NA
	icd.y1 = data.frame(y1, ln.y0 = log(cd.y0+1), xmat) # (L1 L1)	
	imp.y1 = mice(icd.y1, seed=seed, method = "2l.zip.pmm", maxit=maxit, m=1, printFlag=FALSE)
	cd.y1 = complete(imp.y1)$y1 # one complete data of y1
	if (graph.diagnostic==TRUE)  {
		plot(log(cd.y1+1), log(cd.y0+1), col=ifelse(r1,3,2), pch=ifelse(r1,1,8) , xlab=min.seq[t1], ylab=min.seq[t0], main="log(count+1)" );	legend("topleft", legend=c("observed","imputed"), col=c(3,2), pch=c(1,8) )
		}
	#---------------------------------------------------------------	
	intimp[, t1] = complete(imp.y1)$y1  # save data	
	# for (j in 1:m) intimp[[j]][, t1] = complete(imp.y1, j)$y1  # save data 
 }#=========================================(s)
 
 ###############################################
 # actual imputation with zipln  <--- second iteration 
 ###############################################
print("done.") 
print( paste("Preparing the zipln imputation with K=", K,"lag and lead"))
# pre-step with K
		zmat = matrix(NA, nrow(intimp), ncol(intimp) )
		for ( s in (daytime[1]-K):(tail(daytime,1)+K) ){# ----- (zmat)
			yt= round(intimp[, s])
			data.t = data.frame(yt , xmat)
			zip.t = zeroinfl(yt ~ . |. , data=data.t)
			lam.t = predict(zip.t , type="count") 
	    	zmat[, s]  = log(yt+1) -  log(lam.t+1)
	    	cat(".", s)  
		}# ----------------------- (zmat)	
	# create empty datalist	
	listimp = vector("list", m) 
	names(listimp) = paste("imp", 1:m, sep="")
	for (k in 1:m) listimp[[k]] = intimp   

print("done") 
print(paste("Second iteration starts with", method," ... "))
for (s in daytime){ #==============================(s)
	cat(paste(min.seq[s],"...") ) # print the process
	if (s==daytime[1]){# set initial value
		cd.yt_1 = intimp[ , s-1]
		}else{ cd.yt_1 = cd.yt } # update
		tvec = c(s + (-K:K) )  
		names(tvec) = c( paste("t_", K:1, sep=""), "t0", paste("t.", 1:K, sep="") )	
	ytmat = intimp[, tvec]; ytmat1 = ytmat;
	ytmat1[flag[,tvec]==nw] = NA  # update with missing NA
	colnames(ytmat) = c( paste("yt_", K:1, sep=""), "yt", paste("yt.", 1:K, sep=""))
	colnames(ytmat1) = colnames(ytmat)
	ry     = (flag[, s]==w)       # wearing=T, missing=F		
	#------------------------------------------
	zs = zmat[ , tvec] 
	colnames(zs) = c( paste("zt_", K:1, sep=""), "zt" ,paste("zt.", 1:K, sep="") )
	#------------------------------------------
	icd.yt = data.frame(yt=ytmat1[, (K+1)], xmat, ln.yt_1 = log(cd.yt_1+1)) # (L1 CK)
		ini = mice(icd.yt, seed=seed, maxit=0) #initiate
		predmat = ini$predictorMatrix # predictor matirix
		predmat[1, "ln.yt_1"] = 3  # lag-term is only included in logit (type=3): type=1 (both), type=2(count only), type=3 (zero only)
	# method = 2l.zipln or 2l.zipln.pmm
	if (method=="zipln") { imp.yt = mice(icd.yt, seed=seed, method="2l.zipln", 
		predictorMatrix=predmat, maxit=maxit, m=m, K=K, zs=zs, printFlag=FALSE)}
	if (method=="zipln.pmm"){ imp.yt = mice(icd.yt, seed=seed, method="2l.zipln.pmm",
		predictorMatrix=predmat, maxit=maxit, m=m, K=K, zs=zs, D=D, printFlag=FALSE)}
    #---------------------------------------------------------------
    cd.yt = complete(imp.yt, m)$yt # one complete data of yt
    if (graph.diagnostic == TRUE){ plot(log(cd.yt+1), log(cd.yt_1+1), col=ifelse(r1,3,2), pch=ifelse(r1,1,8), xlab=min.seq[s], ylab=min.seq[s-1], main="log(count+1)" ); legend("topleft", legend=c("observed","imputed"), col=c(3,2), pch=c(1,8) ) }
	#---------------------------------------------------------------
	for (j in 1:m) listimp[[j]][, s] = complete(imp.yt, j)$yt  # save data
}#==============================================(s)	

print(paste("Imputation complete!", m, "datasets are created."))
return(listimp)

 }# end of the function 
#############################################################
##########################################
# accel.plot.7days() provides daily activity plots 
# input: PA, label, flag
# output: 7 days plots of a person
##########################################
accel.plot.7days <- function(PA, label, flag, time.range=c("00:00","23:59"),  mark.missing=0, axis.time=TRUE, save.plot=FALSE, directory.plot=getwd() ){

	print("*** Plot ***")
	current.dir = getwd()
			
	# wearing/nonwearing mark
	nw = mark.missing
	w = abs(1-nw)
	
	# time sequence
		start.time =as.POSIXct("2015-01-01")
		seq.time = seq.POSIXt(start.time, length.out=1440, by="min")
		min.seq = format(seq.time,"%H:%M")
		hr.seq = format(seq.time,"%H")
				
	daytime = which(min.seq==time.range[1]):which(min.seq==time.range[2])
	dayvec=c("Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday")
	
	# data
	Y = PA[, daytime]  
	flag = flag
	label = label
	nsubject = length(unique(label[,1]))  
	
	
	# plot for 7 days for each person
	par(mfcol = c(4, 2), mar=c(1, 2,1,1), oma=c(2,1,1,1))
	
	for (i in 1:nsubject) { #--------------(plot personi)
		personid = unique(label[,1])[i]
 		plot.new(); legend("center", paste("person id=", personid) , bty="n", cex=2)
 		
		for (j in 1:7){#---------(j)
			y = Y[(label[,1]==personid)&(label[,2]==j), ]
			plot(y, xlab="",  ylab="", ylim=c(0, 3000), col="green", axes=F)
			lines(smooth.spline(y), col="blue")
			text(100, 2500, dayvec[j], col="purple")
			
			if (axis.time==TRUE){ ############ x-axis
		    axis(1, at=seq(1,length(daytime)+1,60), 
			labels=hr.seq[seq(daytime[1],tail(daytime,1)+1,60)]  , 
			las=2, cex.axis=0.8 )
			}else{
 			axis(1, at=seq(1,length(daytime)+1,60), 
			labels=seq(daytime[1],tail(daytime,1)+1,60)-1, cex.axis=0.7 )
			}#################################### x-axis

		axis(2, at=c(1000, 2000, 3000), labels=c("1K","2K","3K"))
		box()

		# draw the missing interval	
		id = (i-1)*7 + j
		flag.which = which(flag[id,daytime] == nw)
		flag.top = rep(3000, length(flag.which))
		points(flag.which, flag.top, col="red", cex=0.3)
		
		} #---------(j)
		
		# save plot
		if (save.plot==TRUE) {
		setwd(directory.plot)		
		dev.copy(pdf, paste("subject", personid,".pdf",sep="") )
		dev.off()   }
		
} #----------------------------(plot personi)

	if (save.plot==TRUE){ 	print(paste("pdf plots are saved in ", getwd(), sep="") )	}

	setwd(current.dir)

}#end of function	
##########################################
# creat.flag() produces a missing flag matrix 
# input: PA
# output: flagmat, with the same dimension of PA
##########################################

	create.flag <- function(PA, window=20, mark.missing=0 ) {
			print("*** Create missing flag matrix ***")
			PAmat = as.matrix(PA)
			flagmat = matrix(abs(1-mark.missing), dim(PAmat)[1], dim(PAmat)[2] )
		###################### exact
			for (i in 1:dim(PAmat)[1]){ # --------(&)
				r = rle(PAmat[i, ])
				zero.id  = which( (as.numeric(r$lengths) > window) & (as.numeric(r$values) == 0) )
				if (length(zero.id)==0)  next     # if zero.id is empty, skip this loop
				zero.tbl = rbind(r$lengths[zero.id], r$values[zero.id], cumsum(r$lengths)[zero.id])
    			zero.end   = zero.tbl[3,] 
				zero.start  = zero.tbl[3,] - zero.tbl[1,] + 1
				for (j in 1:length(zero.end)) {	flagmat[i, zero.start[j]:zero.end[j]] = mark.missing }
			} # --------(&)
		###################### exact
		
		# if (method=='nci') {################### nci
			# require(accelerometry)
			# if (!require(accelerometry)){stop("accelerometry package must be installed!")}
		# for (i in 1:dim(flagmat)[1]){
		# flagmat[i,] = accel.weartime(PAmat[i,], window=window, nci=TRUE) 		}
		# }################### nci
		
		# if (method=='van') {################### van
			# require(accelerometry)
			# if (!require(accelerometry)){stop("accelerometry package must be installed!")}
		# for (i in 1:dim(flagmat)[1]){
		# flagmat[i,] = accel.weartime(PAmat[i,], window=window, nci=FALSE) 
		# }
		# }################### van

	print("A missing flag matrix is created")	
 	return(flagmat)
 	} 
 	
 #############################################
mice.impute.2l.zip.pmm <- function (y, ry, x, wy=NULL, type, K, D ){
# require(pscl) ; if (!require(pscl)){stop("pscl package must be installed!")}
Y <- y[ry]
X  <- x[ry,]
X <- data.frame(X) 
nam <- colnames(X)
b <- which(type==1)
c <- which(type==2)
z <- which(type==3)
zero <- c(b,z); zero <- unique(zero); zero <-sort(zero)  
count <- c(b,c); count <- unique(count); count <- sort(count) 
form <-
as.formula(paste("Y","~", paste(nam[count],collapse="+"),
"|", paste(nam[zero], collapse="+"))) 
dat <- data.frame(Y,X)
fit <- zeroinfl(form,data=dat,dist="poisson",link="logit") 
fit.sum <- summary(fit)
yhatobs= predict(fit, na.action=na.pass) # predicted value of zip = (1-pi)lambda
# Bayesian regression
beta <- coef(fit)
rv <- t(chol(fit.sum$vcov))
b.star <- beta + rv%*%rnorm(ncol(rv)) 
fit$coefficients$count <-
b.star[1:length(fit$coefficients$count)] 
fit$coefficients$zero <-
b.star[(length(fit$coefficients$count)+1):length(b.star)]
newdata <- data.frame(X=x[!ry,])
colnames(newdata)<- nam 
#---- zip+pmm -----
yhatmis = predict(fit, newdata=newdata, na.action=na.pass)
impvec = 1:length(yhatmis)
for (j in 1:length(yhatmis)){ 
	diff.j = abs(yhatmis[j] - yhatobs)
	donor.idx = order(diff.j)[1:5] # donors = 5
	donor.pool = Y[donor.idx]
	a.draw = sample(donor.pool, size=1, replace=TRUE) #----they don't share this function
    impvec[j] = a.draw
}
return(impvec)
#############################################
# References: (1) van Buuren S, Groothuis-Oudshoorn K. mice: Multivariate imputations by chained equations in r. Journal of Statistical Software  2011, (2) Kleinke K, Reinecke J. Multiple imputation of incomplete zero-infated count data. Statistica Neerlandica  2013
#############################################
}
######################################################
mice.impute.2l.zipln.pmm <- function (y, ry, x, wy=NULL, type, K, zs=zs, D ){
# require(pscl); if (!require(pscl)){stop("pscl package must be installed!")}
Y <- y[ry]
X  <- x[ry,  ]
X <- data.frame(X) 
nam <- colnames(X)
b <- which(type==1)
c <- which(type==2)
z <- which(type==3)
zero <- c(b,z); zero <- unique(zero); zero <-sort(zero)  
count <- c(b,c); count <- unique(count); count <- sort(count) 
form <-
as.formula(paste("Y","~", paste(nam[count],collapse="+"),
"|", paste(nam[zero], collapse="+"))) 
dat <- data.frame(Y,X)
fit <- zeroinfl(form,data=dat,dist="poisson",link="logit") 
fit.sum <- summary(fit)
#---- covariance matrix from K lag and K lead variables -----#
	    Sigma = cov(zs)
		Sigma.zz = Sigma[-(K+1), -(K+1)]
		Sigma.yz = Sigma[(K+1), -(K+1)]
		Z = zs[,-(K+1)]  # n by 2K
		et = matrix(Sigma.yz, 1, 2*K)%*%solve(Sigma.zz)%*%t(Z)
yhatobs = predict(fit, na.action=na.pass)*(exp(et)[ry])# predicted value of zip = (1-pi)lambda
#-----Bayesian Regression -------#
beta <- coef(fit)
rv <- t(chol(fit.sum$vcov))
b.star <- beta + rv%*%rnorm(ncol(rv)) 
fit$coefficients$count <-
b.star[1:length(fit$coefficients$count)] 
fit$coefficients$zero <-
b.star[(length(fit$coefficients$count)+1):length(b.star)]
newdata <- data.frame(X=x[!ry,])
colnames(newdata)<- nam 
#---- zipln+pmm -----
yhatmis = predict(fit, newdata=newdata, na.action=na.pass)*(exp(et)[!ry])
impvec = 1:length(yhatmis)
for (j in 1:length(yhatmis)){ 
	diff.j = abs(yhatmis[j] - yhatobs)
	donor.idx = order(diff.j)[1:D]  # donors = 5
	donor.pool = Y[donor.idx]
	a.draw = sample(donor.pool, size=1, replace=TRUE) #----they don't share this function
    impvec[j] = a.draw
}
return(impvec)
#############################################
# References: (1) van Buuren S, Groothuis-Oudshoorn K. mice: Multivariate imputations by chained equations in r. Journal of Statistical Software  2011, (2) Kleinke K, Reinecke J. Multiple imputation of incomplete zero-infated count data. Statistica Neerlandica  2013
#############################################
}
###############################################
mice.impute.2l.zipln <- function (y, ry, x, wy=NULL, type, K, zs=zs ){
# require(pscl) ; if (!require(pscl)){stop("pscl package must be installed!")}
Y <- y[ry]
X  <- x[ry,  ]
X <- data.frame(X) 
nam <- colnames(X)
b <- which(type==1)
c <- which(type==2)
z <- which(type==3)
zero <- c(b,z); zero <- unique(zero); zero <-sort(zero)  
count <- c(b,c); count <- unique(count); count <- sort(count) 
form <-
as.formula(paste("Y","~", paste(nam[count],collapse="+"),
"|", paste(nam[zero], collapse="+"))) 
dat <- data.frame(Y,X)
fit <- zeroinfl(form,data=dat,dist="poisson",link="logit") 
fit.sum <- summary(fit)
#-----parameter update -------#
beta <- coef(fit)
rv <- t(chol(fit.sum$vcov))
b.star <- beta + rv%*%rnorm(ncol(rv)) 
fit$coefficients$count <-
b.star[1:length(fit$coefficients$count)] 
fit$coefficients$zero <-
b.star[(length(fit$coefficients$count)+1):length(b.star)]
newdata <- data.frame(X=x[!ry,])
colnames(newdata)<- nam 
#---- draw zeros ------#
pi.star = predict(fit, newdata=newdata, type="zero", na.action=na.pass)
pi.star[is.nan(pi.star)] = 0  # or 1 ????
n0 = length(pi.star); u0 = runif(n0, 0, 1); pivec=pi.star
pivec = ifelse( pi.star >= u0,  0, 1)
#----- draw counts ------#
impvec = pivec
#res.sd = sd(fit$residuals)   # residuals from wearing time
lam.star = predict(fit, newdata=newdata, type="count", na.action=na.pass)
#---- covariance matrix from K lag and K lead variables -----#
	    Sigma = cov(zs)
		Sigma.zz = Sigma[-(K+1), -(K+1)]
		Sigma.yz = Sigma[(K+1), -(K+1)]
		Z = zs[,-(K+1)]  # n by 2K
		et = matrix(Sigma.yz, 1, 2*K)%*%solve(Sigma.zz)%*%t(Z)
# impuation
impvec[pivec==1] = lam.star[pivec==1]*(exp(et)[!ry][pivec==1])
return(impvec)
#############################################
# References: (1) van Buuren S, Groothuis-Oudshoorn K. mice: Multivariate imputations by chained equations in r. Journal of Statistical Software  2011, (2) Kleinke K, Reinecke J. Multiple imputation of incomplete zero-infated count data. Statistica Neerlandica  2013
#############################################
}
#################################################### 
# missing.rate() computes missing rate based on the missing flag
# input: label, flag
# output: list with total (total missing rate) and table (missing rate table by subject)
####################################################
 	missing.rate <- function(label, flag, mark.missing=0, time.range=c("09:00","20:59")){
 		
 	# wearing/nonwearing mark
		nw = mark.missing
		w = abs(1-nw)
	# input data
		label=as.matrix(label)
		flag =as.matrix(flag)
   	# time sequence
		start.time =as.POSIXct("2015-01-01")
		seq.time = seq.POSIXt(start.time, length.out=1440, by="min")
		min.seq = format(seq.time,"%H:%M")
	# time range
		duration = which(min.seq==time.range[1]):which(min.seq==time.range[2])
	# total missing rate 
		flag.duration = flag[ , duration]
		total = round(mean(flag.duration==nw), 3)
		totalper = total*100
		print(paste("Total missing rate during ",time.range[1],"-" ,time.range[2]," is " , total, "(", totalper ,"%)", sep="" ) )
	# missing rate table by subject
		nsubject = length(unique(label[,1]))
		mrate.7d = matrix(NA, nsubject, 7)
		for (i in 1:nsubject){ # ------(*)
			personi = unique(label[,1])[i]
			dayjs = as.numeric(label[label[,1] == personi, 2])
			flagi = flag.duration[label[,1] == personi, ]
			if (length(dayjs)==1) flagi = t(as.matrix(flagi))
			mrate.7d[i, dayjs] = apply(flagi==nw, 1, mean)
		}#----------(*)
		colnames(mrate.7d) =1:7
		row.names(mrate.7d) = unique(label[,1])
	# compute the missing rate to the wearing hours
		wh.7d = (1-mrate.7d)*(length(duration)/60)
	# update the label with wh	
		mrate.vec = apply(flag.duration==nw, 1, mean)
		wh.vec = (1-mrate.vec)*(length(duration)/60)
		mylabel=as.data.frame(label)
		colnames(mylabel)[1:2] = c("id","day")
		mylabel$wh = round(matrix(t(wh.vec), nrow(mylabel),1), 3)
	return(list(total=total, table=round(mrate.7d,3), table.wh=round(wh.7d,3), label=mylabel))
 	}
#######################################################
# valid.days() selects the valid days that has sufficient wearing times
# input: PA, label, flag
# output: list with the updated PA, label and flag  
#######################################################
valid.days <- function(PA, label, flag, wear.hr=10, time.range=c("09:00","20:59"), mark.missing=0){
	print("*** Complete Days Filtering ***")
	# wearing/nonwearing mark
		nw = mark.missing
		w = abs(1-nw)
	# time sequence
		start.time =as.POSIXct("2015-01-01")
		seq.time = seq.POSIXt(start.time, length.out=1440, by="min")
		min.seq = format(seq.time,"%H:%M")
	# time range
		duration = which(min.seq==time.range[1]):which(min.seq==time.range[2])
	# input data
		PA = as.matrix(PA)
		label = as.matrix(label)
		flag = as.matrix(flag)	
	# missing rate before filtering data
		flag.duration = flag[ , duration]
		mrate = round(mean(flag.duration==nw), 2)
		print(paste("Total missing rate during ", time.range[1],"-" , time.range[2]," is " ,mrate,sep=""))
		print(paste("Select only valid days based on wearing time >",wear.hr,"hours "))
	# select the valid days
		wearmin = wear.hr*60
		flag.sum = apply(flag.duration==w, 1, sum)
		flag.id  = which(flag.sum > wearmin)
	# update data - matrix
		PA2 = PA[flag.id, ]
		label2 = label[flag.id,]
		flag2  = flag[flag.id, ]
		valid.days.out = list(PA=PA2, label=label2, flag=flag2)
	# summary
	print(paste("Total days are reduced to", dim(PA2)[1], "from", dim(PA)[1] ))
	print(paste("Missing rate is now", round(mean(flag2[ ,duration]==nw),2)  ))
	
	# return
		return(valid.days.out)	
	}

####################################################################
# valid.subjects() select the valid subjects that have sufficient valid days in a week
# input: data1- list with PA, label, flag from the original data with many missing values
#			 data2- list with PA, label, flag from the output of valid.days
# output: list with the updated PA, label, and flag			
##############################################################
	valid.subjects <- function(data1, data2, valid.days=3, valid.week.days=NA, valid.weekend.days=NA,  mark.missing=0, keep.7days=TRUE){
	print("*** Select Data by Subject ***")
	# wearing/nonwearing mark
		nw = mark.missing
		w = abs(1-nw)
	# label by person
		person.label = data2$label[,1]  # all persons for valid days
		unique.person.id = unique(person.label)
	# valid days per person	 
		day.per.person = as.numeric(table(person.label)) 
		summary(day.per.person) # min=1 ~ max=7
	# separate label for weekend vs. weekand
		day.label    = data2$label[,2]
		weekday.label = ( day.label%%7 > 1 )  
		weekend.label = ( day.label%%7 <= 1 ) 
	# valid weekday per person	 
		weekday.person = as.numeric(tapply(weekday.label, person.label, FUN=sum))
	# valid weekend per person
		weekend.person = as.numeric(tapply(weekend.label, person.label, FUN=sum))
	
	# filtering 
	#----------------------------------------------
	if ( (length(valid.days)==0) || 
		 (is.na(valid.days)&is.na(valid.week.days)&is.na(valid.weekend.days)) ){
		stop("Input argument valid.days= is required.")
	}
	if ((!is.na(valid.days)) & is.na(valid.week.days) & is.na(valid.weekend.days)){
		keep.person.id = unique.person.id[day.per.person >= valid.days] 
		
		print(paste("Select only the subjects that include at least",
		 	valid.days, "complete (valid) days"))
		
		}
	if ((!is.na(valid.days)) && (!is.na(valid.week.days)) && (!is.na(valid.weekend.days)) ){	
		keep.person.id = unique.person.id[(day.per.person >= valid.days) &
				  (weekday.person >= valid.week.days) &
				  (weekend.person >= valid.weekend.days)]
		print(paste("Select only the subjects that include at least", 
			valid.days, "complete (valid) days,", 
			valid.week.days, "complete (valid)  weekday, ", 
			valid.weekend.days, "complete (valid)  weekend"))										  
		}		
	if (is.na(valid.days) && (!is.na(valid.week.days)) && (!is.na(valid.weekend.days))){	
		keep.person.id = unique.person.id[(weekday.person >= valid.week.days) &
										  (weekend.person >= valid.weekend.days)]
		
		print(paste("Select only the subjects that include at least", 
			valid.week.days, "complete (valid) weekday, ", 
			valid.weekend.days, "complete (valid) weekend"))										  
		}		
	if (is.na(valid.days) && (!is.na(valid.week.days)) && (is.na(valid.weekend.days))){	
		keep.person.id = unique.person.id[weekday.person >= valid.week.days]
		
		print(paste("Select only the subjects that include at least", 
			valid.week.days, "complete (valid) weekdays."))													  
		}
	if (is.na(valid.days) && (is.na(valid.week.days)) && (!is.na(valid.weekend.days))){	
		keep.person.id = unique.person.id[weekday.person >= valid.weekend.days]
		
		print(paste("Select only the subjects that include at least", 
			valid.weekend.days, "complete (valid) weekends."))											  
		}	
	#-----------------------------------------		
	N.person = length(keep.person.id)  
	#-----------------------------------------
	if (keep.7days){ #------------------(1)
		# keep 7 days from original data	
		keeplabel=(data1$label[,1]%in%keep.person.id)
		# update data
		valid.subjects.out = list(
			PA   = as.matrix(data1$PA[keeplabel,]),
			label= data1$label[keeplabel,],
			flag = as.matrix(data1$flag[keeplabel,])
			)
	} #----------------------------------------(1)		

	if (!keep.7days){ #------------------(2)	
		# keep only valid days (< 7days) 
		keeplabel = (data2$label[,1]%in%keep.person.id)
	  # update data		
		valid.subjects.out = list(
			PA 	 = as.matrix(data2$PA[keeplabel, ]), 
			label= data2$label[keeplabel, ], 
			flag = as.matrix(data2$flag[keeplabel, ])
				)
	} #-------------------------------------------(2)	
		
	names(valid.subjects.out$label)[1:2] = c("id","day")
	# Summary
	print(paste("Selected persons are", length(keep.person.id)) )
	# return
	return(valid.subjects.out)
	}
#####################################################################	 	
# wear.time.plot() displays the propotion of wearing over time a day among the daily profiles
# Also, based on that, it computes the standard measurement day
# input: PA, label, flag
# output: plot with the proportion of wearing over time
#####################################################################	 
	wear.time.plot <- function(PA, label, flag, mark.missing=0){
	print("*** Wearing proportion over time ***")
	
		start.time =as.POSIXct("2015-01-01")
		seq.time = seq.POSIXt(start.time, length.out=1441, by="min")
		min.seq = format(seq.time,"%H:%M")
	
	# Look at the wearing vs nonwearing time (Catellier et al, 2005)
	nw = mark.missing
	w = abs(1-nw)
	
	# data
	flag = as.matrix(flag)
	daylabel = label[,2]
	PAmat = as.matrix(PA)
	
	flag.wkday = flag[(daylabel>=2 & daylabel<=6),]
	flag.wkend = flag[(daylabel==1 | daylabel==7),]
	w.prop.weekday = apply(flag.wkday==w, 2, mean) 
	w.prop.weekend = apply(flag.wkend==w, 2, mean) 
	w.prop         = apply(flag==w, 2, mean)
	smrange60 = c(which(w.prop > 0.6)[1],  tail(which(w.prop > 0.6),1))
	smrange70 = c(which(w.prop > 0.7)[1],  tail(which(w.prop > 0.7),1)) 

	# plot of the proportion of wearing
	par(mfrow=c(1,1), mar=c(5,4,5,1))
		plot(w.prop.weekday, col="blue", lty=1, type="l",
			ylab="Proportion of wearing",
			xlab="Time of a Day", cex.axis=1, xaxt="n")
			
		axis(1, at=seq(1,1441,120), labels=min.seq[seq(1,1441,120)] ,las=2 ,cex.axis=0.7 )	
		lines(w.prop.weekend, col="red", lty=1)
		lines(w.prop, col="green",lwd=3, lty=1 )
		legend("topleft",c("Weekday","Weekend","Overall"), 	
			col=c("blue","red","green"),lwd=c(1,1,3)  )
		abline(h=0.6, lty=3, col=3)
		abline(h=0.7, lty=3, col=3)
		mtext("Standard measurement day can be chosen around ", side = 3, line = 3)
		mtext(paste("(", min.seq[smrange60[1]],"," ,min.seq[smrange60[2]],") during which 60% wear",sep=""), side = 3, line = 2)
		mtext(paste("(", min.seq[smrange70[1]],"," ,min.seq[smrange70[2]],") during which 70% wear",sep=""), side = 3, line = 1)	

	
	# summary
	print(paste("Data includes", dim(PAmat)[1],
		"daily profiles and the dimension is", dim(PAmat)[2] ))
	print(paste("Standard measurement day can be chosen around : "))
	print(paste("(", min.seq[smrange60[1]],"," ,min.seq[smrange60[2]],") during which 60% wear",sep=""))
	print(paste("(", min.seq[smrange70[1]],"," ,min.seq[smrange70[2]],") during which 70% wear",sep=""))	
	
	#w.prop.sum = data.frame(w.prop=w.prop, w.prop.weekday=w.prop.weekday,  w.prop.weekend=w.prop.weekend)
	#return(w.prop.sum)
	
	}
	
		

Try the accelmissing package in your browser

Any scripts or data that you put into this service are public.

accelmissing documentation built on May 2, 2019, 1:44 p.m.