fmt <- function(dsn,X,k){
Cut.off.points <- quantile(dsn[,X],seq(0,1,length.out = (k+1)),type = 1)
print(noquote(paste("Cut points for",X,"with",k,"categories:")))
print(Cut.off.points)
dsn.new <- dsn[order(dsn[,X]),]
Cut.off.loc <- rep(NA,k)
for(i in 1:k){
Cut.off.loc[i] <- which(dsn.new[,X]==Cut.off.points[i+1])[1]
}
Cut.off.loc <- c(0,Cut.off.loc)
Cut.off.label <- c("low",as.character(round(Cut.off.points,5))[2:k],"high")
New.X <- rep(NA,nrow(dsn.new))
for(i in 1:k){
New.X[(Cut.off.loc[i]+1):Cut.off.loc[i+1]] <- paste(Cut.off.label[i],"-",Cut.off.label[i+1])
}
dsn.new <- cbind(dsn.new,New.X)
Cut.off.points <- as.numeric(Cut.off.points)
fmt.output <- list(Cut.off.points = Cut.off.points, dsn.new = dsn.new)
return(fmt.output)
}
lk.func <- function(link.type,Group.Means){
link.name <- NULL
Group.Means[which(Group.Means==0)]<- 0.000001
Group.Means[which(Group.Means==1)]<- 0.999999
if(link.type=="identity"){
link.name <- "identity link"
Group.Means.Y <- Group.Means
}
else if(link.type=="logit"){
link.name <- "logit link"
Group.Means.Y <- log(Group.Means/(1-Group.Means))
}
else if(link.type=="probit"){
link.name <- "probit link"
Group.Means.Y <- pnorm(Group.Means)
}
else if(link.type=="log"){
link.name= "log link"
Group.Means.Y <- log(Group.Means)
}
else if(link.type=="clog-log"){
link.name= "complementary log-log link"
Group.Means.Y <- log(-log(1-Group.Means))
}
else if(link.type=="log-log"){
link.name= "log-log link"
Group.Means.Y <- -log(-log(Group.Means))
}
else if(link.type=="reciprocal"){
link.name="reciprocal link"
Group.Means.Y <- 1/Group.Means
}
link.list <- list(link.name = link.name, Y.Means = Group.Means.Y)
return(link.list)
}
#' @title Check linearity assumption
#' @description Check linearity assumption in a model using a given link function.
#' @usage LFL(dsn,X,Y,nkat,link.type,outlier)
#' @param dsn data set name containing X and Y;
#' @param X continuous covariate
#' @param Y outcome variable
#' @param nkat number of categories into which X should be divided (an even number)
#' @param link.type link function to be used:
#' link.type is coded as follows:
#' "identity" = identity link;
#' "logit" = logit link;
#' "probit" = probit link;
#' "log" = log link;
#' "clog-log" = complementary log-log link;
#' "log-log" = log-log link;
#' "reciprocal" = reciprocal link;
#' @param outlier exclude outliers from the plot.
#' @return Return a matrix with 6 columns:
#' \item{X}{intervals of continuous covariate.}
#' \item{Frequency}{number of observations in each group.}
#' \item{mean}{group means of outcome variable.}
#' \item{link}{name of link function.}
#' \item{gmu}{group means of outcome variable with link function.}
#' \item{midpt}{median of continuous covariate in each group.}
#' @examples
#' Data.X <- runif(1000)
#' Data.Y <- exp(Data.X+1)+rnorm(1000,0,1)
#' Data <- data.frame(cbind(Data.X,Data.Y))
#' LFL(Data,"Data.Y","Data.X",20,4)
#' ##Use NH data:
#' NH$quality.rating <- (scale(NH$quality.rating)/2+0.5)
#' LFL(dsn = NH,Y = "quality.rating", X = "RNHRD", nkat = 40, link.type = "logit", outlier = F)
#' @export
LFL <- function(dsn,X,Y,nkat,link.type,outlier=FALSE){
fmt.out <- fmt(dsn,X,nkat)
dsn.new <- fmt.out$dsn.new
Cut.off.points <- fmt.out$Cut.off.points
Group.means <- aggregate(dsn.new[,Y],list(dsn.new[,"New.X"]),mean)
midpt <- aggregate(dsn.new[,X],list(dsn.new[,"New.X"]),median)
lk.output <- lk.func(link.type,Group.means[,2])
link.name <- rep(lk.output$link.name,nkat)
LFL.output <- cbind(as.character(Group.means$Group.1),
as.numeric(table(dsn.new$New.X)),
Group.means[,2],
link.name,
lk.output$Y.Means,
midpt$x)
LFL.output <- rbind(LFL.output[nkat,],LFL.output[1:(nkat-1),])
colnames(LFL.output) <- c("X","Frequency","mean","link","gmu","midpt")
LFL.output[,3] <- round(as.numeric(LFL.output[,3]),5)
LFL.output[,5] <- round(as.numeric(LFL.output[,5]),5)
LFL.output[,6] <- round(as.numeric(LFL.output[,6]),5)
LFL.output <- LFL.plot <- data.frame(LFL.output)
if(outlier){
Model.outliers <- 1
while(Model.outliers==1){
LFL.model <- lm(as.numeric(as.character(LFL.plot$gmu))~as.numeric(as.character(LFL.plot$midpt)))
Outliers <- as.numeric(which(cooks.distance(LFL.model)>4*mean(cooks.distance(LFL.model))))
if(length(Outliers)>0){
LFL.plot <- LFL.plot[-Outliers,]
}
else if(length(Outliers) == 0){
Model.outliers <- 0
}
}
plot(as.numeric(as.character(LFL.plot$midpt)),as.numeric(as.character(LFL.plot$gmu)),xlab = "midpt",ylab = "gmu",main = paste("Check linearity of",lk.output$link.name,Y, "by",X,"with",nkat,"categories"))
abline(LFL.model)
}
else{
LFL.model <- lm(as.numeric(as.character(LFL.plot$gmu))~as.numeric(as.character(LFL.plot$midpt)))
plot(as.numeric(as.character(LFL.plot$midpt)),as.numeric(as.character(LFL.plot$gmu)),xlab = "midpt",ylab = "gmu",main = paste("Check linearity of",lk.output$link.name,Y, "by",X,"with",nkat,"categories"))
abline(LFL.model)
}
return(LFL.output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.