R/ggPredict.R
In predict3d: Draw Three Dimensional Predict Plot Using Package 'rgl'

Documented in expand.grid2 fit2newdata getMeans ggPredict is.mynumeric number2group restoreData restoreNames seekNamesDf seq_range string2pattern

#'calculate mean values of two consecutive number
#'@param x A numeric vector
#'@export
#'@examples
#'x=c(50,60,70)
#'getMeans(x)
getMeans=function(x){
     count=length(x)
     result=c()
     for(i in 2:count){
          result=c(result,mean(c(x[i-1],x[i])))
     }
     result
}

#'Create a sequence over the range of a vector
#'@param x A numeric vector
#'@param n An integer specifying the length of sequence (i.e., number of points across the range of x)
#'@export
#'@examples
#'seq_range(1:5,n=3)
seq_range=function(x,n=2){
     seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length.out = n)
}



#'Convert a numeric vector into groups
#'@param x A numeric vector
#'@param mode A numeric. If 1, mean(x) +c(-1,0,1)*sd(x) are used. If 2, quantile(x,probs=c(0.14,0.5,0.86),type=6) are used. If 3, values are used
#'@param values A numeric vector
#'@param silent A logical. Whether table of result will be shown
#'@param label A character string
#'@param digits integer indicating the number of decimal places
#'@param colorn The number of regression lines when the modifier variable(s) are numeric
#'@export
#'@examples
#'number2group(iris$Sepal.Length,label="Sepal.Length")
#'x=number2group(mtcars$wt,label="wt")
#'x
number2group=function(x,mode=1,values=NULL,silent=FALSE,label="label",digits=2,colorn=3){
     if(is.null(values)){
     if(mode==1) values=mean(x,na.rm=TRUE)+c(-1,0,1)*sd(x,na.rm=TRUE)
     else if(mode==2)  values=quantile(x,probs=c(0.16,0.5,0.84),type=6)
     else values=seq_range(x,colorn)
     }
     if(!silent) cat("breaks=",values,"\n")
     count=length(values)
     meanValues=getMeans(values)
     labels=paste(label,"=",round(values,digits=digits))
     breaks=c(min(x,na.rm=TRUE)-0.01,meanValues,max(x,na.rm=TRUE)+0.01)
     result=as.numeric(cut(x,breaks))
     if(!silent) print(table(result))
     result=factor(result,levels=1:count,labels=labels)
     attr(result,"breaks")=values
     invisible(result)
}

#' Decide whether a vector can be treated as a numeric variable
#' @param x A vector
#' @param maxylev An integer indicating the maximum number of levels of numeric variable be treated as a categorical variable
#' @export
is.mynumeric=function(x,maxylev=6){
    ifelse((is.numeric(x) & (length(unique(x))>maxylev)),TRUE,FALSE)
}


#'Restore factors in data.frame as numeric
#'@param data A data.frame
#'@export
#'@examples
#'fit=lm(mpg~factor(cyl)*factor(am),data=mtcars)
#'fit=lm(mpg~wt*factor(am),data=mtcars)
#'fit=lm(mpg~wt*hp,data=mtcars)
#'restoreData(fit$model)
restoreData=function(data){
     select=which(str_detect(names(data),"\\(.*\\)"))
     select
     for(i in seq_along(select)){
          str_detect(names(data)[select[i]],"factor")
          if(str_detect(names(data)[select[i]],"factor")){
               temp=as.numeric(as.character(data[[select[i]]]))
               tempname=str_replace(names(data)[select[i]],".*\\(","")
               tempname=str_replace(tempname,"\\)","")
               data[[tempname]]=temp
          }
     }
     if(length(select)>0) data[-select]
     data
}


#'Restore factors in variable name as numeric
#'@param x character vector
#'@export
#'@examples
#'restoreNames(c("factor(cyl)","am"))
#'restoreNames(c("I(age^2)","am","100/mpg","cyl^1/2","mpg2","sex + 0.5"))
restoreNames=function(x){
     temp=str_replace_all(x,".*\\(|\\)","")
     temp=str_replace_all(temp," ","")
     temp=str_replace(temp,"(\\^|\\*)([:digit:]|\\.|\\/).*","")
     temp=str_replace(temp,"[:digit:].*(/|\\*|\\+|-|\\^)","")
     temp=str_replace(temp,"(/|\\*|\\+|-|\\^)[:digit:].*","")
     temp
}

#'change string to pattern
#'@param string A character vector
#'@export
#'@examples
#'string=c("I(age^2)","factor(cyl)","log(mpg)")
#'string2pattern(string)
string2pattern=function(string){
     string=str_replace_all(string,"\\(","\\\\(")
     string=str_replace_all(string,"\\)","\\\\)")
     string=str_replace_all(string,"\\^","\\\\^")
     string
}

#'Find variable names in data.frame
#'@param vars variable names to find
#'@param df A data.frame
#'@return A character vector
#'@export
seekNamesDf=function(vars,df){
    namesDf=names(df)
    result=c()
    vars
    namesDf

    for( i in seq_along(vars)){
         if(vars[i] %in% namesDf){
              result=c(result,vars[i])
         } else{
              select=which(str_detect(namesDf,paste0(".*\\(",vars[i],"\\^|.*\\(",vars[i],"\\)")))
              if(length(select)>0){
                 result=c(result,namesDf[select])
              } else{  ## find factor
                    factorVars=names(which(unlist(lapply(df,is.factor))))
                    for(j in seq_along(factorVars)){
                         temp=string2pattern(factorVars[j])
                         if(str_detect(vars[i],temp)) result=c(result,factorVars[j])
                    }

              }
         }
    }
    result=unique(result)
    result
}

#' Make a new data set for prediction
#'@param fit An object of class "lm", "glm" or "loess"
#'@param predictors Names of predictor variables in string
#'@param mode A numeric. Useful when the variables are numeric. If 1, c(-1,0,1)*sd + mean is used. If 2, the 16th, 50th, 84th percentile values used. If 3 sequence over a the range of a vector used
#'@param pred.values For which values of the predictors should be used? Default is NULL. If NULL, 20 seq_range is used.
#'@param modx.values For which values of the moderator should lines be plotted? Default is NULL. If NULL, then the customary +/- 1 standard deviation from the mean as well as the mean itself are used for continuous moderators. If the moderator is a factor variable and modx.values is NULL, each level of the factor is included.
#'@param mod2.values For which values of the second moderator should lines be plotted? Default is NULL. If NULL, then the customary +/- 1 standard deviation from the mean as well as the mean itself are used for continuous moderators. If the moderator is a factor variable and modx.values is NULL, each level of the factor is included.
#'@param colorn The number of regression lines when the modifier variable(s) are numeric.
#'@param maxylev An integer indicating the maximum number of levels of numeric variable be treated as a categorical variable
#'@param summarymode An integer indicating method of extracting typical value of variables. If 1, typical() is used.If 2, mean() is used.
#'@importFrom magrittr "%>%"
#'@importFrom purrr reduce
#'@importFrom modelr typical
#'@importFrom stats sd na.omit
#'@importFrom dplyr ".data"
#'@importFrom stats quantile
#'@export
#'@examples
#'fit=lm(mpg~hp*wt*cyl+carb+am,data=mtcars)
#'fit2newdata(fit,predictors=c("hp","wt","am"))
#'fit2newdata(fit,predictors=c("hp","wt","cyl"))
#'fit2newdata(fit,predictors=c("hp"))
#'fit2newdata(fit,predictors=c("hp","wt"))
#'\dontrun{
#'fit=loess(mpg~hp*wt*am,data=mtcars)
#'fit2newdata(fit,predictors=c("hp"))
#'mtcars$engine=ifelse(mtcars$vs==0,"V-shaped","straight")
#'fit=lm(mpg~wt*engine,data=mtcars)
#'fit2newdata(fit,predictors=c("wt","engine"))
#'fit=lm(mpg~wt*factor(vs),data=mtcars)
#'fit2newdata(fit,predictors=c("wt","vs"))
#'fit2newdata(lm(mpg~hp*wt,data=mtcars),predictors=c("hp","wt"),mode=3,colorn=30)
#'fit=lm(mpg~hp*log(wt),data=mtcars)
#'fit2newdata(fit,predictors=c("hp","log(wt)"))
#'fit=lm(mpg~hp*wt*factor(vs),data=mtcars)
#'fit2newdata(fit,predictors=c("hp"))
#'}
#'require(moonBook)
#'fit=lm(log(NTAV)~I(age^2)*sex,data=radial)
#'fit2newdata(fit,predictors=c("I(age^2)","sex"))
fit2newdata=function(fit,predictors,mode=1,pred.values=NULL,modx.values=NULL,mod2.values=NULL,colorn=3,maxylev=6,summarymode=1){

         # fit=lm(100/mpg~wt*hp,data=mtcars)
       # predictors=c("wt","hp")
       # fit=lm(mpg~hp*wt*cyl+carb+am,data=mtcars)
       # predictors=c("hp")
        # mode=1;pred.values=NULL;modx.values=NULL;mod2.values=NULL;colorn=3;maxylev=6;summarymode=1

     predictors=restoreNames(predictors)
     predictors

     if("loess" %in% class(fit)){
          vars=rownames(attr(fit$terms,"factors"))
          yvar=vars[1]
          # xname=vars[2]
          # if(length(vars)>2) colorname=vars[3]
          # if(length(vars)>3) facetname=vars[4]

          # df=cbind(fit$y,data.frame(fit$x))
          # data.frame(fit$x)
          # colnames(df)[1]=yvar
          df=data.frame(fit$x)
          df

     } else{
    df=fit$model[-1]
    yvar=names(fit$model)[1]
     }

     df=restoreData(df)
     df=restoreData2(df)
     df


    df1<-df[predictors]
    select=setdiff(names(df),c(predictors,"(weights)"))

    if(length(which(str_detect(select,"I\\(|factor\\(")))>0){
           select=select[-which(str_detect(select,"I\\(|factor\\("))]
    }
    if(length(which(str_detect(select,"^log|^sqrt|^exp")))>0){
        select=select[-which(str_detect(select,"^log|^sqrt|^exp"))]
    }
    select
    df2<-df[select]

    if(is.mynumeric(df1[[1]],maxylev=maxylev)) {
        newdf=seq_range(df1[[1]],30)
    } else{
        newdf=unique(df1[[1]])
    }

    if(!is.null(pred.values)) newdf=pred.values
    newdf=data.frame(newdf)
    newdf

        if(length(df1)>1){
        newdf2<-lapply(df1[2:length(df1)],function(x) {
            if(is.mynumeric(x,maxylev=maxylev)) {
                if(mode==1) mean(x,na.rm=TRUE)+c(-1,0,1)*sd(x,na.rm=TRUE)
                else if(mode==2) quantile(x,probs=c(0.16,0.50,0.84),type=6)
                else if(mode==3) seq_range(x,colorn)
            } else{
                unique(x)
            }
        })
        if(!is.null(modx.values)) newdf2[[1]]=modx.values
        if(!is.null(mod2.values)) newdf2[[2]]=mod2.values
        newdf2
        if(length(newdf2)>1) {
           newdf2<-newdf2 %>% reduce(expand.grid)
        }
        newdf=expand.grid2(newdf,newdf2)
    }
    colnames(newdf)=colnames(df1)

    caption<-NULL
    if(length(df2)>0){
        # lapply(df2,modelr::typical)
        if(summarymode==1){
           newdf3<-lapply(df2,modelr::typical)
        } else{
          newdf3<-lapply(df2,mean,na.rm=TRUE)
        }
        newdf3
        newdf3=data.frame(newdf3,stringsAsFactors = FALSE)
        if(nrow(newdf3)>1) newdf3<-newdf3[1,]
        colnames(newdf3)=names(df2)
        caption<-paste(names(newdf3),newdf3[1,],sep="=",collapse=",")
        newdf3
        newdf
        newdf<-expand.grid2(newdf,newdf3)
    }

    newdf
    result <- predict(fit, newdata = newdf, type = "response",se=TRUE)
    # result <- predict(fit, newdata = newdf, type = "response",se.fit=TRUE)
     result
     newdf
     if(any(str_detect(names(df),"I\\("))){
          select=which(str_detect(names(df),"I\\("))
          select
          for(i in seq_along(select)){
               temp=names(df)[select[i]]

               eq=str_replace_all(temp,"I\\(|\\)$","")
               for(j in seq_along(names(newdf))){
                    temp2=names(newdf)[j]
                    if(str_detect(eq,temp2)){
                         temp3=str_replace(eq,temp2,paste0("newdf$",temp2))
                         newdf[[temp]]=eval(parse(text=temp3))
                    }
               }
          }
     }
     if(any(str_detect(names(df),"^log|^sqrt|^exp|^factor\\("))){
         select=which(str_detect(names(df),"^log|^sqrt|^exp|^factor\\("))
         select

         for(i in seq_along(select)){
             temp=names(df)[select[i]]
             temp
             for(j in seq_along(names(newdf))){
                 temp2=names(newdf)[j]
                 if(str_detect(temp,temp2)){
                     temp3=str_replace(temp,temp2,paste0("newdf$",temp2))
                     temp3
                     newdf[[temp]]=eval(parse(text=temp3))
                 }
             }

         }
         newdf
     }
    newdf[[yvar]]<-result$fit
    newdf$se.fit<-result$se.fit
    newdf$ymax<-newdf[[yvar]]+1.96*result$se.fit
    newdf$ymin<-newdf[[yvar]]-1.96*result$se.fit
    newdf=restoreData2(newdf)
    newdf=restoreData3(newdf)
    if(!is.null(caption)) attr(newdf,"caption")=caption
    newdf

}


#' expand.grid with two data.frames
#' @param df1 A data.frame
#' @param df2 A data.frame
expand.grid2=function(df1,df2){
    result=list()
    for(i in 1:length(df1)){
        result[[i]]=sort(unique(df1[[i]]))
    }
    count=ncol(df1)
    for(j in 1:length(df2)){
       result[[count+j]]=sort(unique(df2[[j]]))
    }
    result
    df=data.frame(expand.grid(result,stringsAsFactors = FALSE),stringsAsFactors = FALSE)
    colnames(df)=c(colnames(df1),colnames(df2))
    df
}

#' Visualize predictions from the multiple regression models.
#'@param fit An object of class "lm" or "glm"
#'@param pred The name of predictor variable
#'@param modx Optional. The name of moderator variable
#'@param mod2 Optional. The name of second moderator variable
#'@param modx.values For which values of the moderator should lines be plotted? Default is NULL. If NULL, then the customary +/- 1 standard deviation from the mean as well as the mean itself are used for continuous moderators. If the moderator is a factor variable and modx.values is NULL, each level of the factor is included.
#'@param mod2.values For which values of the second moderator should lines be plotted? Default is NULL. If NULL, then the customary +/- 1 standard deviation from the mean as well as the mean itself are used for continuous moderators. If the moderator is a factor variable and modx.values is NULL, each level of the factor is included.
#'@param dep Optional. The name of dependent variable
#'@param mode A  numeric. Useful when the variables are numeric. If 1, c(-1,0,1)*sd + mean is used. If 2, the 14th, 50th, 86th percentile values used. If 3 sequence over a the range of a vector used
#'@param colorn The number of regression lines when the modifier variable(s) are numeric.
#'@param maxylev An integer indicating the maximum number of levels of numeric variable be treated as a categorical variable
#'@param show.point Logical. Whether or not add points
#'@param show.error Logical. Whether or not show error
#'@param error.color color of error. dafault value is "red"
#'@param jitter logical Whether or not use geom_jitter
#'@param se Logical. Whether or not add confidence interval
#'@param alpha A numeric. Transparency
#'@param show.text  Logical. Whether or not add regression equation as label
#'@param add.modx.values Logical. Whether or not add moderator values to regression equation
#'@param add.loess Logical. Whether or not add loess line
#'@param labels labels on regression lines
#'@param angle angle of text
#'@param xpos x axis position of label
#'@param vjust vertical alignment of labels
#'@param digits integer indicating the number of decimal places
#'@param facet.modx Create separate panels for each level of the moderator? Default is FALSE
#'@param facetbycol Logical.
#'@param plot Logical. Should a plot of the results be printed? Default is TRUE.
#'@param summarymode  An integer indicating method of extracting typical value of variables. If 1, typical() is used.If 2, mean() is used.
#'@param ... additional arguments to be passed to geom_text
#'@importFrom rlang enquo "!!" quo_name enexpr .data
#'@importFrom dplyr group_by do
#'@importFrom stats as.formula glm lm predict
#'@importFrom ggplot2 geom_line geom_ribbon geom_point labs facet_grid geom_jitter geom_segment
#'@importFrom ggplot2 ggplot aes stat_smooth geom_text coord_fixed theme_bw
#'@export
#'@examples
#'fit=loess(mpg~hp*wt*am,data=mtcars)
#'ggPredict(fit)
#'ggPredict(fit,hp)
#'\dontrun{
#'ggPredict(fit,hp,wt)
#'fit=lm(mpg~wt*hp-1,data=mtcars)
#'ggPredict(fit,xpos=0.7)
#'fit=lm(mpg~hp*wt,data=mtcars)
#'ggPredict(fit)
#'ggPredict(fit,labels=paste0("label",1:3),xpos=c(0.3,0.6,0.4))
#'ggPredict(fit,se=TRUE)
#'ggPredict(fit,mode=3,colorn=40,show.text=FALSE)
#'fit=lm(log(mpg)~hp*wt,data=mtcars)
#'ggPredict(fit,dep=mpg)
#'fit=lm(mpg~hp*wt*cyl,data=mtcars)
#'ggPredict(fit,modx=wt,modx.values=c(2,3,4,5),mod2=cyl,show.text=FALSE)
#'ggPredict(fit,hp,wt,show.point=FALSE,se=TRUE,xpos=0.5)
#'ggPredict(fit,modx=wt,xpos=0.3)
#'ggPredict(fit)
#'mtcars$engine=ifelse(mtcars$vs==0,"V-shaped","straight")
#'fit=lm(mpg~wt*engine,data=mtcars)
#'ggPredict(fit)
#'require(TH.data)
#'fit=glm(cens~pnodes*horTh,data=GBSG2,family=binomial)
#'ggPredict(fit,pnodes,horTh,se=TRUE,xpos=c(0.6,0.3),angle=c(40,60),vjust=c(2,-0.5))
#'fit1=glm(cens~pnodes,data=GBSG2,family=binomial)
#'ggPredict(fit1,vjust=1.5,angle=45)
#'fit3=glm(cens~pnodes*age,data=GBSG2,family=binomial)
#'ggPredict(fit3,pred=pnodes,modx=age,mode=3,colorn=10,show.text=FALSE)
#'fit2=glm(cens~pnodes*age*horTh,data=GBSG2,family=binomial)
#'ggPredict(fit2,pred=pnodes,modx=age,mod2=horTh,mode=3,colorn=10,show.text=FALSE)
#'fit=lm(mpg~log(hp)*wt,data=mtcars)
#'ggPredict(fit,hp,wt)
#'fit=lm(mpg~hp*wt+disp+gear+carb+am,data=mtcars)
#'ggPredict(fit,disp,gear,am)
#'library(moonBook)
#'fit=lm(weight~I(height^3)+I(height^2)+height+sex,data=radial)
#'ggPredict(fit)
#'predict3d(fit)
#'}
ggPredict=function(fit,pred=NULL,modx=NULL,mod2=NULL,modx.values=NULL,mod2.values=NULL,
                   dep=NULL,mode=1,colorn=3,maxylev=6,show.point=getOption("ggPredict.show.point",TRUE),
                   show.error=FALSE,error.color="red",
                   jitter=NULL,se=FALSE,alpha=0.1,
                    show.text=TRUE, add.modx.values=TRUE,add.loess=FALSE,
                    labels=NULL,angle=NULL,xpos=NULL,vjust=NULL,digits=2,
                    facet.modx=FALSE,facetbycol=TRUE,plot=TRUE,summarymode=1,...) {



    # require(tidyverse);require(rlang)
    # fit=lm(mpg~wt-1,data=mtcars)
     # mtcars$engine=ifelse(mtcars$vs==0,"V-shaped","straight")
     # fit=lm(mpg~engine*wt,data=mtcars)
    # pred=NULL;modx=NULL;mod2=NULL;modx.values=NULL;mod2.values=NULL;dep=NULL
    # mode=1;colorn=3;maxylev=6;show.point=TRUE;show.error=FALSE;error.color="red"
    # jitter=NULL;se=FALSE;alpha=0.1
    # show.text=TRUE; add.modx.values=TRUE;add.loess=FALSE
    # labels=NULL;angle=NULL;xpos=NULL;vjust=NULL;digits=2
    # facet.modx=FALSE;facetbycol=TRUE;plot=TRUE;summarymode=1

    #  fit=lm(log(NTAV)~I(age^2)*sex,data=radial)
    #  predc="I(age^2)";modxc="sex";mod2c=NULL;jitter=NULL;show.error=FALSE
    #  add.loess=FALSE;angle=NULL;facetbycol=TRUE;facet.modx=FALSE;colorn=3;mode=1
    #
    #  fit=lm(mpg~hp*wt*vs,data=mtcars)
     # predictors=c("hp","engine")
     # fit=lm(mpg~log(hp)*wt*vs,data=mtcars)
     # fit=lm(weight~I(height^3)+I(height^2)+height+sex,data=radial)
    # fit=lm(NTAV~I(age^2)+sex-1,data=radial)
    # fit=lm(mpg~hp*wt+disp+gear+carb+am,data=mtcars)
    # fit=lm(100/mpg~hp*wt,data=mtcars)
    # predc="hp";modxc="wt";mod2c=NULL;jitter=NULL;show.error=FALSE
    #  add.loess=FALSE;angle=NULL;facetbycol=TRUE;facet.modx=FALSE;colorn=3;mode=1

    method=class(fit)[1]
    if(method=="loess"){
        yvar=attr(attr(fit$terms,"factors"),"dimnames")[[1]][1]
    } else{
        yvar=names(fit$model)[1]
    }

    if(method=="loess"){
        rawdata=cbind(fit$y,data.frame(fit$x))
        colnames(rawdata)[1]=yvar
    } else {
        rawdata=fit$model
        temp=grep("(weights)",names(fit$model))
        if(length(temp)>0) {
            rawdata=rawdata[-temp]
        }

    }


    checkVarname=FALSE
    predc <- quo_name(enexpr(pred))
    if(predc=="NULL"){
        pred<-NULL
        predc=names(rawdata)[2]
        checkVarname=TRUE
    } else{
        pred=enquo(pred)
    }
    modxc <- quo_name(enexpr(modx))
    modx<-enquo(modx)
    if(modxc=="NULL"){
        if(checkVarname & ncol(rawdata)>2){
            modxc=names(rawdata)[3]
        } else{
          modx<-NULL
          modxc<-NULL
        }
    }
    mod2c <- quo_name(enexpr(mod2))
    mod2<-enquo(mod2)
    if(mod2c=="NULL"){
        if(checkVarname & ncol(rawdata)>3){
            mod2c=names(rawdata)[4]
        } else{
        mod2<-NULL
        mod2c<-NULL
        }
    }

    depc <- quo_name(enexpr(dep))

    yvarOrig<-yvar
    if(depc!="NULL"){
         yvar=depc
    }

    if(!is.mynumeric(rawdata[[predc]])){
       if((!is.null(modxc)) && (is.mynumeric(rawdata[[modxc]]))){
            temp=predc
            predc=modxc
            modxc=temp
       } else if((!is.null(mod2c))&&(is.mynumeric(rawdata[[mod2c]]))){
         temp=predc
         predc=mod2c
         mod2c=temp
       }
    }


    predictors=c(predc,modxc,mod2c)
    predictors
    if(checkVarname){
    predictors=unique(restoreNames(predictors))
    predc<-modxc<-mod2c<-NULL
    predc=predictors[1]
    if(length(predictors)>1){
         modxc=predictors[2]
    }
    if(length(predictors)>2){
         mod2c=predictors[3]
    }
    predictors=c(predc,modxc,mod2c)
    # cat("predictors=",predictors,"\n")
    }
      # cat("predictors=",predictors,"\n")
    predc
    modxc
    mod2c
    rawdata
    # rawdata=data.frame(rawdata)
    rawdata=restoreData(rawdata)
    rawdata=restoreData2(rawdata)
    rawdata=restoreData3(rawdata)

    rawdata$yhat=predict(fit,newdata=rawdata)

    newdata=fit2newdata(fit,predictors,mode=mode,modx.values=modx.values,
                        mod2.values=mod2.values,colorn=colorn,maxylev=maxylev,summarymode=summarymode)
    # print(newdata)
    if(!is.null(mod2c)){
         if(is.mynumeric(rawdata[[mod2c]],maxylev=maxylev)) {
              x=number2group(rawdata[[mod2c]],mode=mode,values=mod2.values,
                             colorn=colorn,label=mod2c,silent=TRUE,digits=digits)
              rawdata$mod2group=x
              values=attr(x,"breaks")
              newdata$mod2group=number2group(newdata[[mod2c]],mode=mode,values=values,
                                             colorn=colorn,label=mod2c,silent=TRUE,digits=digits)
         } else{
              rawdata$mod2group=rawdata[[mod2c]]
              newdata$mod2group=newdata[[mod2c]]
         }
    }
    if(!is.null(modxc)){
         if(is.mynumeric(rawdata[[modxc]],maxylev = maxylev)) {
              x=number2group(rawdata[[modxc]],mode=mode,values=modx.values,colorn=colorn,
                             label=modxc,silent=TRUE,digits=digits)
              rawdata$modxgroup=x
              values=attr(x,"breaks")
              # print(modxc)
              # print(newdata[[modxc]])
              newdata$modxgroup=number2group(newdata[[modxc]],mode=mode,values=values,
                                             colorn=colorn,label=modxc,silent=TRUE,digits=digits)
         } else{
              rawdata$modxgroup=rawdata[[modxc]]
              newdata$modxgroup=newdata[[modxc]]
         }
    }

    fitted=newdata
    fitted
    names(fitted)


    # tempcount=4
    # if(!is.null(modxc)) tempcount=tempcount+1
    # if(!is.null(mod2c)) tempcount=tempcount+1
    # if(yvar!=yvarOrig) tempcount=tempcount+1
    # tempcount

    # temp1=setdiff(names(fitted)[1:(ncol(fitted)-tempcount)],predc)
    # temp1=setdiff(predictors,predc)
    # temp1=restoreNames(temp1)
    # predc

    exclude=c(predc,restoreNames(predc),yvar,yvarOrig,restoreNames(yvar),
              "modxgroup","mod2group","se.fit","ymax","ymin")
    temp1=setdiff(names(fitted),exclude)
    # exclude1=which(str_detect(temp1,"factor\\("))
    # if(length(exclude1)>0) temp1=temp1[-exclude1]
    temp1
    exclude2=which(str_detect(temp1,paste0("\\(",predc)))
    if(length(exclude2)>0) temp1=temp1[-exclude2]
    exclude3=str_replace_all(predc,"^.*\\(|\\)","")
    if(length(exclude3)>0) temp1=setdiff(temp1,exclude3)
    temp2=c()
    for(i in seq_along(temp1)){
        # if(!str_detect(temp1[i],"factor\\(")){
        #     temp2=c(temp2,temp1[i])
        # }

        if(is.numeric(fitted[[temp1[i]]])){
              temp2=c(temp2,temp1[i])
        }else if(length(unique(fitted[[temp1[i]]]))>1){
            temp2=c(temp2,temp1[i])
        }
    }
    temp2
    if(length(temp2)>0){
    temp3=paste0(temp2,collapse=",")
    temp3=paste0("group_by(fitted,",temp3,")")
    # cat("names(fitted)=",names(fitted),"\n")
    # cat("exclude=",exclude,"\n")
    # cat("temp3=",temp3,"\n")
    temp3
    fitted

    fitted<-eval(parse(text=temp3))
    }

    # str(fitted)
    fitted

    predictors=unique(c(modxc,mod2c))
    predictors2=c(predictors,setdiff(temp1,temp2))
    predictors2

    if(length(predictors2)>0){
         formulaString=getNewFormula(fit,predictors2)
         newFormula=as.formula(formulaString)
    } else{
        newFormula=fit$terms
        # if(attr(fit$terms,"intercept")==0){
        #     tempeq=as.character(fit$terms)
        #     tempeq[3]=str_replace(tempeq[3]," - 1","")
        #     tempeq=paste(tempeq[2],tempeq[1],tempeq[3])
        #     tempeq
        #     newFormula=as.formula(tempeq)
        # }

    }
     # fitted=data.frame(fitted)
               newFormula


    fitted
    temp=fitted %>% do(coef= lm(newFormula,data=.)$coef)
    temp$coef
    ## polynomial

    polynames=names(fit$model)[which(str_detect(names(fit$model),paste0("I\\(",predc,"\\^")))]
    polynames
    if(length(polynames)>0){
         temppredc=sort(polynames,decreasing=TRUE)[1]
    } else{
         if(predc %in% names(fit$model)) {
           temppredc=predc
         } else{
           temppredc=names(fit$model)[which(str_detect(names(fit$model),predc))][1]
         }
    }


    if(method=="lm") {
        fitted<-fitted %>% do(coef= lm(newFormula,data=.)$coef[c("(Intercept)",temppredc)])
    } else if(method=="glm") {
        fitted<-fitted %>% do(coef=glm(newFormula,data=.,family=fit$family$family)$coef[c("(Intercept)",temppredc)])
    } else{
        fitted<-fitted %>% do(coef=lm(newFormula,data=.)$coef[c("(Intercept)",temppredc)])
    }

    fitted
    temp4=setdiff(temp1,temp2)
    for(i in seq_along(temp4)){
         fitted[[temp4[i]]]=newdata[[temp4[[i]]]][1]
    }

    coef=unlist(fitted$coef)
    coef
    fitted$intercept=coef[seq(1,by=2,length.out=nrow(fitted))]
    fitted$slope=coef[seq(2,by=2,length.out=nrow(fitted))]
#
#     if(method!="loess"){
#          if((names(fit$coef)[1]!="(Intercept)")){
#            fitted$intercept=0
#          }
#     }


    # if(is.null(xpos)){
    #     if(method=="lm") xpos=0.7
    #     else if(method=="glm") xpos=0.4
    #     else xpos=0.5
    # }

modxc
predc
yvar
newdata
fitted
    if(is.null(modxc)){
    p<-ggplot(data=newdata,aes(x=.data[[predc]],y=.data[[yvar]]))
    }  else {
    p<-ggplot(data=newdata,aes(x=.data[[predc]],y=.data[[yvar]],color=.data[[modxc]],fill=.data[[modxc]],group=.data[[modxc]]))
    }
    p<-p+  geom_line()

    p

    if(is.null(jitter)){
        if(method=="glm") jitter=TRUE
        else jitter=FALSE
    }

    if(show.point==TRUE) {
        if(jitter) p<-p+geom_jitter(data=rawdata,width=0,height=0.05)
        else p<-p+geom_point(data=rawdata)
    }
    if(show.error) p<-p+geom_segment(data=rawdata,aes(xend=.data[[predc]],yend=.data[["yhat"]]),color=error.color)
    if(add.loess) p<-p+stat_smooth(data=rawdata,se=FALSE,color="red",fullrange = TRUE)

    if(se==TRUE) p<-p+ geom_ribbon(aes(ymax=.data[["ymax"]],ymin=.data[["ymin"]],color=NULL),alpha=alpha)
    faceteq="~"
    if(!is.null(modxc)){
          if(facet.modx){
               if(facetbycol) {
                    faceteq=paste0(faceteq,"modxgroup")
               } else{
                    faceteq=paste0("modxgroup",faceteq)
               }
          }
    }
    if(!is.null(mod2c)) {
         if(facetbycol) {
              if(faceteq=="~") faceteq="~."
              faceteq=paste0("mod2group",faceteq)
         } else{
              faceteq=paste0(faceteq,"mod2group")
         }
    } else{
         if((faceteq!="~")&(!facetbycol)) faceteq=paste0(faceteq,".")
    }
    faceteq
    if(faceteq !="~") p<-p+eval(parse(text=paste0("facet_grid(",faceteq,")")))
    p
    facetno<-1
    if(!is.null(modxc)) {
         if(facet.modx){
         if(facetbycol) {
              facetno=length(unique(rawdata[["modxgroup"]]))
         } else{
              facetno=1/length(unique(rawdata[["modxgroup"]]))
         }
         }
    }
    if(!is.null(mod2c)) {
         if(facetbycol) {
              facetno=facetno/length(unique(rawdata[["mod2group"]]))
         }else{
              facetno=facetno*length(unique(rawdata[["mod2group"]]))
         }
    }
    facetno

    fitted1<-fitted
    ytransform=0
    if(yvarOrig==paste0("log(",yvar,")")) {
        ytransform=1
    } else if(yvarOrig==paste0("exp(",yvar,")")) {
        ytransform=-1
    } else if(yvarOrig==paste0("sqrt(",yvar,")")) {
         ytransform=0.5
    }
    fitted
    fitted<-slope2angle(fitted,fit,ytransform=ytransform,predc,temppredc,modxc,yvar,p,method=method,xpos=xpos,vjust=vjust,digits=digits,
                        facetno=facetno,add.modx.values=add.modx.values)
    fitted
    if(!is.null(angle)) fitted$angle=angle
    if(!is.null(labels)){
        if(length(labels)==nrow(fitted)) fitted$label=labels
    }
    if(!is.null(modxc)) {
         times=nrow(fitted)/length(unique(newdata$modxgroup))
         fitted$modxgroup=rep(sort(unique(newdata$modxgroup)),each=times)
         fitted
    }
    if(!is.null(mod2c)) {
         times=nrow(fitted)/length(unique(newdata$mod2group))
         fitted$mod2group=rep(sort(unique(newdata$mod2group)),times)
         fitted
    }
    if(show.text) {
        if(method=="lm"){
            p <- p+ geom_text(data=fitted,
                          aes(x=.data$x,y=.data$y,angle=.data$angle,label=.data$label,vjust=.data$vjust),...)
            # p <- p+ geom_text(data=fitted,
            #        aes(x=.data$x,y=.data$y,angle=.data$angle,label=.data$label,vjust=.data$vjust))


        } else{
            p <- p+ geom_text(data=fitted,
                              aes(x=.data$x,y=.data$y,angle=.data$angle,label=.data$label,vjust=.data$vjust),
                              parse=TRUE,...)
        }
    }
    p

    if(method=="lm") p<-p+ coord_fixed(ratio=attr(fitted,"ratio"))
    p<-p+theme_bw()
    if(!is.null(attr(newdata,"caption"))) {
        p<-p+labs(caption=paste0("Analysis assuming ",attr(newdata,"caption")))
    }
    if(plot==TRUE) print(p)
    #class(p)=c("gg","ggplot","ggPredict")
    invisible(list(p=p,
         newdata=newdata,
         slope=fitted,
         aspectRatio=getAspectRatio(p)))
}

#'Make angle data with slope data
#'@param df A data.frame
#'@param fit An object of class "lm" or "glm"
#'@param ytransform Numeric. If 1, log transformation of dependent variable, If -1, exponential transformation
#'@param predc Name of predictor variable
#'@param temppredc Name of predictor variable in regression equation
#'@param modxc Name of moderator variable
#'@param yvar Name of dependent variable
#'@param p An object of class ggplot
#'@param method String. Choices are one of "lm" and "glm".
#'@param xpos The relative x-axis position of labels. Should be within 0 to 1
#'@param vjust vjust
#'@param digits integer indicating the number of decimal places
#'@param facetno The number of facets
#'@param add.modx.values Whether add name of moderator variable
#'@export
slope2angle=function(df,fit,ytransform=0,predc,temppredc,modxc,yvar,p,method="lm",xpos=NULL,vjust=NULL,digits=3,facetno=NULL,add.modx.values=TRUE){
     # digits=3;xpos=0.7
    # method="lm";xpos=NULL;vjust=NULL;digits=3;facetno=NULL;add.modx.values=TRUE

    info=getAspectRatio(p)
    p
    # print(info)
    ratio=info$ratio
    if(!is.null(facetno)) ratio=ratio*facetno
    df$slope2=df$slope*ratio
    df$radian=atan(df$slope2)
    df$angle=df$radian*180/pi

    df
    if(method=="lm"){
        df$label=paste0(round(df$slope,digits),temppredc)
        if(str_detect(temppredc,"I\\(")) df$label=paste0(round(df$slope,digits),str_replace(temppredc,"I\\(","\\("))
        polynames=names(fit$model)[which(str_detect(names(fit$model),paste0("I\\(",predc,"\\^")))]
        polynames
        if(length(polynames)>0){
             polynames=sort(polynames,decreasing=TRUE)
             polynames
             res=paste0(round(fit$coef[polynames[1]],digits),
                        str_replace_all(polynames[1],"I\\(|\\)$",""))
                        # str_replace_all(names(fit$model)[2],"I\\(|\\)$",""))
             if(length(polynames)>1){
                  for( i in 2:length(polynames)){
                       no=fit$coef[polynames[i]]
                       res=paste0(res,ifelse(no>=0," + "," - "),round(abs(no),digits),
                                  str_replace_all(polynames[i],"I\\(|\\)$",""))
                  }
             }
             if(predc %in% names(fit$coef)){
             no=fit$coef[predc]
             res=paste0(res,ifelse(no>=0," + "," - "),round(abs(no),digits),predc)
             }
             df$label=res

        }
        df$label

        df$intercept[is.na(df$intercept)]=0
        df$label=paste0(df$label,
                    ifelse(df$intercept==0,"",ifelse(df$intercept>0," + "," - ")),
                    ifelse(df$intercept==0,"",round(abs(df$intercept),digits)))

        if(ytransform==1) {
            df$label=paste0("exp(",df$label,")")
        } else if(ytransform==-1){
            df$label=paste0("log(",df$label,")")
        } else if(ytransform==0.5){
             df$label=paste0("(",df$label,")^2")
        }
    } else if(method=="glm"){

    df$label=paste0("frac(1,1+ plain(e)^(-(",round(df$slope,digits),"*",predc,
                    ifelse(df$intercept>=0,"+","-"),abs(round(df$intercept,digits)),")))")

    }

    if(is.numeric(df[[1]])){
        df$labels2=round(df[[1]],digits)
    } else if(is.factor(df[[1]])){
        df$labels2=levels(df[[1]])[df[[1]]]
    } else{
        df$labels2=df[[1]]
    }
    if(add.modx.values) {
        if(method=="lm"){
        if(colnames(df)[1]!="coef"){
             if(!is.null(modxc)){
                df$label=paste0(df$label," | ",modxc," = ",df$labels2)
             }
        }
        }
    }
    if(method=="loess"){
        if(colnames(df)[1]!="coef"){
         df$label=paste0(colnames(df)[1]," == ",df$labels2)
        } else{
            df$label=""
        }
    }
    count=nrow(df)
    # if(is.null(xpos)){
    #     yrange=c()
    #     xpos=c(0.3,0.7)
    #     for(j in 1:2){
    #     x=info$xmin+(info$xmax-info$xmin)*xpos[j]
    #     x=rep(x,count)
    #     y=c()
    #     for(i in seq_along(df$slope)){
    #     if(method=="lm"){
    #        if(ytransform==1){
    #            y=c(y,exp(df$slope[i]*x[i]+df$intercept[i]))
    #        } else if(ytransform==-1){
    #            y=c(y,log(df$slope[i]*x[i]+df$intercept[i]))
    #        } else {
    #            y=c(y,df$slope[i]*x[i]+df$intercept[i])
    #        }
    #     }else if(method=="glm"){
    #         y=c(y,1/(1+exp(-(df$slope[i]*x[i]+df$intercept[i]))))
    #     } else{
    #          y=c(y,1)
    #     }
    #     }
    #       yrange=c(yrange,max(y,na.rm=TRUE)-min(y,na.rm=TRUE))
    #     }
    #     # cat("yrange=",yrange,"\n")
    #     select=which.max(yrange)
    #     x=info$xmin+(info$xmax-info$xmin)*xpos[select]
    #     x=rep(x,count)
    #     y=c()
    #     for(i in seq_along(df$slope)){
    #         if(method=="lm"){
    #             if(ytransform==1){
    #                 y=c(y,exp(df$slope[i]*x[i]+df$intercept[i]))
    #             } else if(ytransform==-1){
    #                 y=c(y,log(df$slope[i]*x[i]+df$intercept[i]))
    #             } else {
    #                 y=c(y,df$slope[i]*x[i]+df$intercept[i])
    #             }
    #
    #         }else if(method=="glm"){
    #             y=c(y,1/(1+exp(-(df$slope[i]*x[i]+df$intercept[i]))))
    #         } else{
    #             y=c(y,1)
    #         }
    #     }
    #
    # } else{
    #     x=info$xmin+(info$xmax-info$xmin)*xpos
    #     if(length(xpos)==1){
    #         x=rep(x,count)
    #     }
    #     y=c()
    #     for(i in seq_along(df$slope)){
    #         if(method=="lm"){
    #             if(ytransform==1){
    #                 y=c(y,exp(df$slope[i]*x[i]+df$intercept[i]))
    #             } else if(ytransform==-1){
    #                 y=c(y,log(df$slope[i]*x[i]+df$intercept[i]))
    #             } else {
    #                 y=c(y,df$slope[i]*x[i]+df$intercept[i])
    #             }
    #         }else if(method=="glm"){
    #             y=c(y,1/(1+exp(-(df$slope[i]*x[i]+df$intercept[i]))))
    #         } else{
    #             y=c(y,1)
    #         }
    #     }
    # }

    # print(x)
    # print(y)

    if(is.null(xpos)) xpos=0.3
    x=info$xmin+(info$xmax-info$xmin)*xpos
    if(length(x)==1) x=rep(x,count)

    df$x=x

    if(is.null(vjust)) {
        vjust=rep(-0.5,count)
    }
    df$vjust=vjust
    df[[predc]]=df$x
    df=restoreData2(df)
     result <- predict(fit, newdata = df, type = "response",se=TRUE)
         # result <- predict(fit, newdata = newdf, type = "response",se.fit=TRUE)
     df$y<-result$fit

     if(ytransform==1){
          df$y=exp(df$y)
     } else if(ytransform==-1){
          df$y=log(df$y)
     } else if(ytransform==0.5){
          df$y=(df$y)^2
     }



    if(method!="lm") {
         df$angle=0
    } else if(predc!=names(fit$model)[2]){
         if(!(predc %in% names(fit$model))) df$angle=0
    }


    attr(df,"ratio")=ratio
    if(method!="loess"){
    ## y including arithmetic operator
    depc1=names(fit$model)[1]
    if(yvar!=depc1){

    depc2=restoreNames(depc1)
    keep=str_detect(depc1,"log|sqrt|exp")
    keep=keep | (depc1==depc2)
    if(!keep) {
      names(df)[names(df)=="y"]=str_replace(depc1,depc2,"y")
      df=restoreData3(df,changeLabel=TRUE)
    }
    df$angle=0
    }
    }
    df
}


#'Make new formula
#'@param fit An object of class lm or glm
#'@param predictors Names of variables to exclude
#'@importFrom stringr str_detect str_replace str_replace_all
#'@export
#'@examples
#'fit=lm(mpg~factor(cyl)*factor(am)+wt+carb,data=mtcars)
#'getNewFormula(fit,predictors=c("cyl","wt"))
#'fit=lm(Sepal.Length~Sepal.Width*Petal.Length+Species,data=iris)
#'getNewFormula(fit,predictors=c("Petal.Length"))
#'fit=lm(mpg~hp*wt*factor(cyl),data=mtcars)
#'getNewFormula(fit,predictors=c("hp","cyl"))
#'fit=loess(mpg~hp*wt,data=mtcars)
#'getNewFormula(fit,predictors=c("hp","wt"))
getNewFormula=function(fit,predictors=NULL){
    # predictors<-NULL
     # predictors=c("wt")
      # predictors=c("Species")
      # predictors=c("wt","cyl")
       # predictors=c("Petal.Length")
        # predictors=c("hp","wt")
      # fit$call
     # fit=lm(mpg~wt*hp-1,data=mtcars);predictors="hp"
     predictors=str_replace_all(predictors,".*\\(|\\)","")

     if("loess" %in% class(fit)){
          temp=attr(fit$terms,"term.labels")
          yvar=attr(attr(fit$terms,"factors"),"dimnames")[[1]][1]
     } else{
          if(names(fit$coef)[1]=="(Intercept)") {
               temp=names(fit$coef)[-1]
          } else {
               temp=names(fit$coef)
          }
          yvar=names(fit$model)[1]
     }
     temp
     exclude=c()
     if("loess" %in% class(fit)){
          df=cbind(fit$y,data.frame(fit$x))
     } else {
          df=fit$model
     }
     predictors=seekNamesDf(predictors,df)
     predictors
     for(i in seq_along(predictors)){
          if(!is.factor(df[[predictors[i]]])){
               exclude=c(exclude,predictors[i])
          } else{
               mylevels=levels(df[[predictors[i]]])
               exclude=c(exclude,paste0(predictors[i],mylevels))
          }
     }
     exclude
     exclude=string2pattern(exclude)
     for( i in seq_along(exclude)){
        select=which(!str_detect(temp,exclude[i]))
        temp=temp[select]
    }
    temp
    df

    temp=seekNamesDf(temp,df)

    result=paste0(yvar,"~",paste(temp,collapse="+"))

    # if(!("loess" %in% class(fit))){
    #       if(names(fit$coef)[1]!="(Intercept)") result=paste0(result,"-1")
    # }
    result

}

#'Get aspect information of a ggplot
#'@param p A ggplot object
#'@importFrom ggplot2 layer_scales
#'@export
#'@examples
#'p<-ggplot2::ggplot(data=mtcars,ggplot2::aes(x=.data$wt,y=.data$mpg))+ggplot2::geom_point()
#'getAspectRatio(p)
getAspectRatio=function(p){
     xmin=as.numeric(layer_scales(p)$x$range$range[1])
     xmax=as.numeric(layer_scales(p)$x$range$range[2])
     ymin=as.numeric(layer_scales(p)$y$range$range[1])
     ymax=as.numeric(layer_scales(p)$y$range$range[2])


     ratio=(xmax-xmin)/(ymax-ymin)
     list(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax,ratio=ratio)
}
Any scripts or data that you put into this service are public.
predict3d documentation built on Nov. 5, 2025, 6:34 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
predict3d
Draw Three Dimensional Predict Plot Using Package 'rgl'

R/ggPredict.R
In predict3d: Draw Three Dimensional Predict Plot Using Package 'rgl'

Defines functions ggPredict expand.grid2 fit2newdata seekNamesDf string2pattern restoreNames restoreData is.mynumeric number2group seq_range getMeans

Documented in expand.grid2 fit2newdata getMeans ggPredict is.mynumeric number2group restoreData restoreNames seekNamesDf seq_range string2pattern

Try the predict3d package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

predict3d Draw Three Dimensional Predict Plot Using Package 'rgl'

R/ggPredict.R In predict3d: Draw Three Dimensional Predict Plot Using Package 'rgl'

Defines functions ggPredict expand.grid2 fit2newdata seekNamesDf string2pattern restoreNames restoreData is.mynumeric number2group seq_range getMeans

Documented in expand.grid2 fit2newdata getMeans ggPredict is.mynumeric number2group restoreData restoreNames seekNamesDf seq_range string2pattern

Try the predict3d package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

predict3d
Draw Three Dimensional Predict Plot Using Package 'rgl'

R/ggPredict.R
In predict3d: Draw Three Dimensional Predict Plot Using Package 'rgl'