R/depricated.R

Defines functions grow_tree_simple grow_tree_complex node_var_test_rank initial_node_var_test_ranl

initial_node_var_test_ranl=function(c0,x,y,gamma){
  logit=1/(1+exp(-gamma*(x-c0)))
  b0=logit;b1=1-logit
  X=cbind(b0,b1)
  rank=Matrix::rankMatrix(t(X)%*%X,1e-7*(ncol(X)))
  if(rank<ncol(X)){
    return(c(Inf,rep(NA,ncol(X))))
  }
  b=tryCatch(stats::coef(stats::.lm.fit(X,y)),error=function(e)Inf)
  if(is.infinite(b[1])){
    return(c(b[1],rep(NA,ncol(X))))
  }
  c(sum((y-X%*%b)^2),b)
}

node_var_test_rank=function(c0,x,y,gamma,Pmat,terminal,middlenodes){
  logit=1/(1+exp(-gamma*(x-c0)))
  b0=logit*Pmat[,terminal]
  b1=(1-logit)*Pmat[,terminal]
  # b0=(1/(1+exp(-gamma*(x-c0))))*Pmat[,terminal]
  # b1=(1-b0)*Pmat[,terminal]
  X=cbind(b0,b1,Pmat[,-c(terminal,middlenodes)])
  rank=Matrix::rankMatrix(t(X)%*%X,tol=1e-7*(ncol(X)))
  if(rank<ncol(X)){
    return(c(Inf,rep(NA,ncol(X))))
  }
  b=tryCatch(stats::coef(stats::.lm.fit(X,y)),error=function(e)Inf)
  if(is.infinite(b[1])){
    return(c(b[1],rep(NA,ncol(X))))
  }
  e=sum((y-X%*%b)^2)
  c(e,b)
}

grow_tree_complex=function(x, y, p, d_max, gamma){
  bf=0
  variables = sample(ncol(x), round(p*ncol(x)))#sort(sample(1:ncol(x), max(round(p*ncol(x)),2)  ))
  gammai=gamma[sample(1:length(gamma),1)]

  fit=list()
  for(i in 1:length(variables)){
    xtest=unique(sort(x[,variables[i]]))
    if(length(xtest)>10){
      xtest=xtest[stats::quantile(1:length(xtest),probs = seq(0,1,0.1))]
    }
    gammascale=max(stats::IQR(x[,variables[i]]),0.1)
    ssr=sapply(xtest,initial_node_var_test,x=x[,variables[i]],y=y,gamma=gammai*gammascale)
    ssr=t(ssr)
    best=which.min(ssr[,1])
    res0=c(xtest[best],gammai*gammascale,ssr[best,])
    names(res0)=c("c0","gamma","val","b0","b1")
    fit[[i]]=res0

  }
  best=which.min(lapply(fit,function(x)x["val"]))
  node=fit[[best]]

  nodeleft=data.frame("side" = 1,"b" = node["b0"],"c0" = node["c0"],gamma = node["gamma"]
                      , "parent" = 0, "terminal" = "yes", variable = variables[best], id = 1)
  noderight=data.frame(side = 2,  "b" = node["b1"],"c0" = node["c0"],gamma = node["gamma"]
                       , "parent" = 0, "terminal" = "yes", variable = variables[best], id = 2)
  tree=rbind(nodeleft,noderight)
  tree$terminal=as.character(tree$terminal)

  Pmat=1/(1+exp(-node["gamma"]*(x[,variables[best]]-node["c0"])))
  Pmat=cbind(Pmat,1-Pmat)

  ################
  iter=1
  while(iter<=d_max){

    gammai=gamma[sample(1:length(gamma),1)]
    terminal=which(tree$terminal=="yes")
    variables=sample(ncol(x), round(p*ncol(x)))#sort(sample(1:ncol(x), max(round(p*ncol(x)),2)))
    test=expand.grid(variables,terminal)
    colnames(test)=c("variable","terminal")

    t1=Sys.time()
    fit=list()
    for(i in 1:nrow(test)){
      xt=x[,test[i,"variable"]]
      nodeobs=which(apply(Pmat,1,which.max)==test$terminal[i])
      xtest=unique(sort(xt[nodeobs]))

      nodep=tree[tree$id==test$terminal[i],]
      used=c()
      if(nodep$variable==test[i,"variable"]) used=c(used,nodep$c0)

      parent=ifelse(length(nodep$parent)!=0,nodep$parent,-1)
      while(length(parent)!=0){
        if(nodep$variable==test[i,"variable"]){
          used=c(used,nodep$c0)
        }
        nodep=tree[tree$id==parent,]
        parent=nodep$parent
      }
      xtest=setdiff(xtest,used)
      if(length(xtest)>10){
        xtest=xtest[stats::quantile(1:length(xtest),probs = seq(0,1,0.1))]
      }
      gammascale=max(stats::IQR(xt),0.1)
      middlenodes=which(is.na(colSums(Pmat)))
      if(length(xtest)<=1){
        fit[[i]]=c(val=Inf)
      }else{
        ssr=sapply(xtest,node_var_test,x=x[,test[i,"variable"]],y=y,gamma=gammai*gammascale,Pmat=Pmat,
                   terminal=test$terminal[i],middlenodes=middlenodes)
        ssr=t(ssr)
        ssr[is.nan(ssr)]=Inf
        best=which.min(ssr[,1])
        res=c(xtest[best],gammai*gammascale,ssr[best,])
        names(res)=c("c0","gamma","val","b0","b1")
        fit[[i]]=res
      }


    }
    Sys.time()-t1
    best=which.min(lapply(fit,function(x)x["val"]))
    node=fit[[best]]
    if(bf==5){
      iter=d_max+1
      next
    }
    if(is.infinite(node["val"])){
      bf=bf+1
      next
    }

    nodeleft=data.frame("side" = 1,"b" = node["b0"],"c0" = node["c0"],gamma = node["gamma"]
                        , "parent" = tree$id[test$terminal[best]], "terminal" = "yes", variable = test[best,"variable"],id=nrow(tree)+1)
    noderight=data.frame(side = 2,  "b" = node["b1"],"c0" = node["c0"],gamma = node["gamma"]
                         , "parent" = tree$id[test$terminal[best]], "terminal" = "yes", variable = test[best,"variable"],id=nrow(tree)+2)


    tree$terminal[test$terminal[best]]="no"

    tree=rbind(tree,nodeleft,noderight)

    p0=(1/(1+exp(-node["gamma"]*(x[,nodeleft$variable]-node["c0"]))))*Pmat[,test$terminal[best]]
    p1=(1-(1/(1+exp(-node["gamma"]*(x[,noderight$variable]-node["c0"])))))*Pmat[,test$terminal[best]]
    Pmat=cbind(Pmat,p0,p1)
    Pmat[,test$terminal[best]]=NA
    tree$b[tree$terminal=="yes"] =  node[ c(6:length(node),4,5)]
    iter=iter+1
  }
  Pmat[is.na(Pmat)]=0
  fitted=Pmat%*%tree$b
  result=list(tree=tree,fitted.values=fitted)
  return(result)
}

grow_tree_simple=function(x, y, p, d_max, gamma){
  bf=0
  variables = sample(ncol(x), round(p*ncol(x)))#sort(sample(1:ncol(x), max(round(p*ncol(x)),2)  ))
  gammai=gamma[sample(1:length(gamma),1)]

  fit=list()
  for(i in 1:length(variables)){
    xtest=x[,variables[i]]
    #xtest=stats::runif(20,min(xtest)-0.1*stats::sd(xtest),max(xtest)+0.1*stats::sd(xtest))
    xtest=sample(xtest,20)

    gammascale=max(stats::IQR(x[,variables[i]]),0.1)
    ssr=sapply(xtest,initial_node_var_test,x=x[,variables[i]],y=y,gamma=gammai*gammascale)
    ssr=t(ssr)
    best=which.min(ssr[,1])
    res0=c(xtest[best],gammai*gammascale,ssr[best,])
    names(res0)=c("c0","gamma","val","b0","b1")
    fit[[i]]=res0

  }
  best=which.min(lapply(fit,function(x)x["val"]))
  node=fit[[best]]

  nodeleft=data.frame("side" = 1,"b" = node["b0"],"c0" = node["c0"],gamma = node["gamma"]
                      , "parent" = 0, "terminal" = "yes", variable = variables[best], id = 1)
  noderight=data.frame(side = 2,  "b" = node["b1"],"c0" = node["c0"],gamma = node["gamma"]
                       , "parent" = 0, "terminal" = "yes", variable = variables[best], id = 2)
  tree=rbind(nodeleft,noderight)
  tree$terminal=as.character(tree$terminal)

  Pmat=1/(1+exp(-node["gamma"]*(x[,variables[best]]-node["c0"])))
  Pmat=cbind(Pmat,1-Pmat)

  ################
  iter=1
  while(iter<=d_max){

    gammai=gamma[sample(1:length(gamma),1)]
    terminal=which(tree$terminal=="yes")
    variables=sample(ncol(x), round(p*ncol(x)))#sort(sample(1:ncol(x), max(round(p*ncol(x)),2)))
    test=expand.grid(variables,terminal)
    colnames(test)=c("variable","terminal")

    t1=Sys.time()
    fit=list()
    for(i in 1:nrow(test)){
      xt=x[,test[i,"variable"]]
      #xtest=stats::runif(10,min(xt)-0.1*stats::sd(xt),max(xt)+0.1*stats::sd(xt))
      xtest=sample(xt,20,prob = Pmat[,test$terminal[i]]+0.01)

      gammascale=max(stats::IQR(xt),0.1)
      middlenodes=which(is.na(colSums(Pmat)))
      if(length(xtest)<=1){
        fit[[i]]=c(val=Inf)
      }else{
        ssr=sapply(xtest,node_var_test,x=x[,test[i,"variable"]],y=y,gamma=gammai*gammascale,Pmat=Pmat,
                   terminal=test$terminal[i],middlenodes=middlenodes)
        ssr=t(ssr)
        ssr[is.nan(ssr)]=Inf
        best=which.min(ssr[,1])
        res=c(xtest[best],gammai*gammascale,ssr[best,])
        names(res)=c("c0","gamma","val","b0","b1")
        fit[[i]]=res
      }


    }
    Sys.time()-t1
    best=which.min(lapply(fit,function(x)x["val"]))
    node=fit[[best]]
    if(bf==5){
      iter=d_max+1
      next
    }
    if(is.infinite(node["val"])){
      bf=bf+1
      next
    }

    nodeleft=data.frame("side" = 1,"b" = node["b0"],"c0" = node["c0"],gamma = node["gamma"]
                        , "parent" = tree$id[test$terminal[best]], "terminal" = "yes", variable = test[best,"variable"],id=nrow(tree)+1)
    noderight=data.frame(side = 2,  "b" = node["b1"],"c0" = node["c0"],gamma = node["gamma"]
                         , "parent" = tree$id[test$terminal[best]], "terminal" = "yes", variable = test[best,"variable"],id=nrow(tree)+2)


    tree$terminal[test$terminal[best]]="no"

    tree=rbind(tree,nodeleft,noderight)

    p0=(1/(1+exp(-node["gamma"]*(x[,nodeleft$variable]-node["c0"]))))*Pmat[,test$terminal[best]]
    p1=(1-(1/(1+exp(-node["gamma"]*(x[,noderight$variable]-node["c0"])))))*Pmat[,test$terminal[best]]
    Pmat=cbind(Pmat,p0,p1)
    Pmat[,test$terminal[best]]=NA
    tree$b[tree$terminal=="yes"] =  node[ c(6:length(node),4,5)]
    iter=iter+1
  }
  Pmat[is.na(Pmat)]=0
  fitted=Pmat%*%tree$b
  result=list(tree=tree,fitted.values=fitted)
  return(result)
}
gabrielrvsc/BooST documentation built on July 27, 2021, 3:08 p.m.