predict_hrf: Prediction
In htree: Historical Tree Ensembles for Longitudinal Data

Description Usage Arguments Value See Also Examples

View source: R/hrf.R

Prediction functions for hrf and htb.

predict_htb(object,x=NULL,time=NULL,id=NULL,
	all.trees=FALSE,type="response",ntrees=NULL,se=FALSE)

predict_hrf(object,x=NULL,time=NULL,id=NULL,
	all.trees=FALSE,se=FALSE)

`object`	An object of class `htree`.
`x`	A data frame or matrix containing new data. If `NULL` then training data in `object` is used.
`time`	A vector of observation times.
`id`	A vector of length `nrow(x)` identifying the subjects.
`type`	If `response` then predictions on same scale as response are given.
`ntrees`	The number of trees to use in prediction.
`all.trees`	If `TRUE` then predictions associated with each tree are returned.
`se`	If `TRUE` then standard errors of predictions are returned.

Returns predictions.

hrf,htb

## Not run: 

# ------------------------------- #
# Simulated data example          #
# ------------------------------- #
# library(htree)
p=5;sigma_e=.5;sigma_a=.5;n=500;pnoise=2
random_intercept=as.numeric(mapply(rep,rnorm(n,sd=sigma_a),times=p))
dat=data.frame(time=rep(1:p,n),
	x=(random_intercept+rnorm(n*p,sd=sigma_e)),
	znoise=matrix(rnorm(n*p*pnoise),ncol=pnoise))
id=sort(rep(1:n,p))

# fit historical random forest
hb=hrf(x=dat,time=dat$time,id=id,yindx=2,se=TRUE)

# get predictions with standard errors
pred=predict_hrf(hb,se=TRUE)


# ------------------------------------------------------------------ #
# Comparison of SE-estimates with actual standard errors for 'hrf'
# ------------------------------------------------------------------ #  


## -- evaluation points
n=200
datp=data.frame(y=rep(0,n),w=seq(-2,2,length=n),z=rep(0,n))

## -- estimate model on 50 simulated data sets
pred=NULL
pred_se=NULL
nsim=20

## -- B=100 bootstrap samples, ensemble size of R=10 on each
control=list(ntrees=500,B=100,R=10,se=TRUE,nodesize=5)
for(k in 1:nsim){
	
	if(is.element(k,seq(1,nsim,by=10)))
		cat(paste("simulation: ",k," of ",nsim," \n",sep=""))
	# -- simulation model -- #
	dat=data.frame(y=(4*datp$w+rnorm(n)),x=datp$w,z=rnorm(n))
	# ---------------------- #
	h=hrf(x=dat,yindx="y",control=control)
	mm=predict_hrf(object=h,x=datp,se=TRUE)
	pred=cbind(pred,mm[,1])
	pred_se=cbind(pred_se,mm[,2])
}


# --- Actual Standard errors at datp 
pred_se_true=apply(pred,1,sd)

# --- Mean of estimated standard errors
pred_se_est=apply(pred_se,1,mean)
pred_se_lower=apply(pred_se,1,quantile,prob=.1)
pred_se_upper=apply(pred_se,1,quantile,prob=.9) 


# -- Plot estimated SE and true SE (+smooth)
z=c(pred_se_true,pred_se_est,pred_se_lower,pred_se_upper)
ylim=c(min(z),max(z))
plot(datp$w,pred_se_est,ylim=ylim,col="blue",xlab="w",
	ylab="Standard error",type="l",main=" SE-true (red) SE-est (blue)")
points(datp$w,pred_se_lower,col="blue",type="l",lty=2)
points(datp$w,pred_se_upper,col="blue",type="l",lty=2)

points(datp$w,pred_se_true,col="red",type="l")

 



# ------------------------------------------------------------------ #
# Comparison of SE-estimates with actual standard errors for 'htb'
# ------------------------------------------------------------------ #  


## -- evaluation points
n=200
datp=data.frame(y=rep(0,n),w=seq(-2,2,length=n),z=rep(0,n))

## -- estimate model on 50 simulated data sets
pred=NULL
pred_se=NULL
nsim=20
for(k in 1:nsim){
	
	if(is.element(k,seq(1,nsim,by=10)))
		cat(paste("simulation: ",k," of ",nsim," \n",sep=""))
	# -- simulation model -- #
	dat=data.frame(y=(4*datp$w+rnorm(n)),x=datp$w,z=rnorm(n))
	# ---------------------- #
	h=htb(x=dat,yindx="y",ntrees=200,cv.fold=10)
	mm=predict_htb(object=h,x=datp,se=TRUE)
	pred=cbind(pred,mm[,1])
	pred_se=cbind(pred_se,mm[,2])
}


# --- Actual Standard errors at datp 
pred_se_true=apply(pred,1,sd)

# --- Mean of estimated standard errors
pred_se_est=apply(pred_se,1,mean)
pred_se_lower=apply(pred_se,1,quantile,prob=.1)
pred_se_upper=apply(pred_se,1,quantile,prob=.9) 


# -- Plot estimated SE and true SE (+smooth)
z=c(pred_se_true,pred_se_est,pred_se_lower,pred_se_upper)
ylim=c(min(z),max(z))
plot(datp$w,pred_se_est,ylim=ylim,col="blue",xlab="w",
	ylab="Standard error",type="l",main=" SE-true (red) SE-est (blue)")
points(datp$w,pred_se_lower,col="blue",type="l",lty=2)
points(datp$w,pred_se_upper,col="blue",type="l",lty=2)

points(datp$w,pred_se_true,col="red",type="l")





## End(Not run)