README.md
In sambiostat/RICWL:

Project Name

RICWL R package.

Robust index of confidence weighted learning for estimating optimal individual treatment rule when outcome has skwewed or heavy-tailed distributions or with outliers. By using a similarity weighted indicator as our index of confidence, it uses only direction of paired outcomes but not their absoluate values, thus achived robustness towards irrgular outcome distributions. Specially, we allow two types of simialrity functions, Cosine-based and Fraction-based. Those two are more reliable in high dimension compared to other similarity measurements.

Setup
Examples
Status
Contact

install.packages("devtools")
library(devtools)
install_github("sambiostat/RICWL")

Generate data with cauchy error term

library(RICWL)
library(caret)

#Generative Model 
sim<-function(N=500, p=5, sigma=0.3){
  X=matrix(runif(N*p,-1,1), nrow=N)
  colnames(X)=paste("X",c(1:p),sep="")
  A=sample(c(1,-1), N, replace=T, prob=c(0.5,0.5))
  Ym = 1+3*cos(X[,3])- 2*cos(X[,4])
  Yc = 4*(0.6- X[,1]^2 -X[,2]^2)

  #Homo cauchy
  noise=rcauchy(N, location = 0, scale = sigma)

  s2n=var(Yc) / (var(Ym)+ var(noise))
  Y=Ym + A*Yc + noise

  optA={Yc > 0}*2-1
  Y0=Ym - Yc #Everyone follows A=-1
  Y1=Ym + Yc #Everyone follows A=1
  optY=Ym + abs(Yc)

  out=list("X"=X, "A"=A, "Y"=Y, "optA"=optA,  "Y1"=Y1, "Y0"=Y0, "optY"=optY,
           "EY"=mean(optY),  "MedY"=median(optY))

  return(out)
}

#Set parameter for generative model
N=300
p=5
sgm=0.3
s=1
theta_list= c(0.25, 0.5, 0.75, 1)
sgm_list=c(0.001,0.01, 1,10,100,1000)
c_list=4^(-2:2)
m=5


#Generate test data 
test.data <- sim(N=10000, p=p, sigma=sgm)
X.test <- test.data$X
optTr.test <- factor(test.data$optA, levels=c(-1,1))
Y.test <- test.data$Y
Tr.test <- test.data$A
y1.test <- test.data$Y1
y0.test <- test.data$Y0
optY.test <- test.data$EY
optYmed.test <- test.data$MedY

tmpdata=rbind.data.frame(cbind.data.frame("Y"=y1.test, cond="Trt=1"),
              cbind.data.frame("Y"=y0.test, cond="Trt=-1"))

#plot the distribution of generated outcome Y under two treatment
ggplot(tmpdata, aes(x=Y, fill=cond)) +
  geom_histogram(binwidth=.5, alpha=.5, position="identity")

Histogram

#generate training data 
train.data <- sim(N=N, p=p, sigma=sgm)
X <- train.data[[1]]
Tr <- train.data[[2]]
y <- train.data[[3]]

*Fit 4 mdoels

#RICL_frac
pwl.frac.fit <- RICWL_CV(H=X, A=Tr, R2=y, pi=NULL, kernel='rbf', pentype = "lasso", 
        XS=NULL, sigmalst=sgm_list,m=m,  e=1e-5, clist=c_list,
         theta_list = theta_list ,frac.par=0.5, method="frac")

optTr.pwl.frac<- predict(pwl.frac.fit, x=X.test)
optTr.pwl.frac <- factor(optTr.pwl.frac, levels=c(-1,1))
cfm.pwl.frac <- caret::confusionMatrix(as.factor(optTr.pwl.frac), reference=as.factor(optTr.test))
pcd.pwl.frac<- cfm.pwl.frac$overall["Accuracy"]  
value.pwl.frac <- mean(c(y1.test[optTr.pwl.frac==1], y0.test[optTr.pwl.frac==-1]))
med.pwl.frac <- median(c(y1.test[optTr.pwl.frac==1], y0.test[optTr.pwl.frac==-1]))   


#RICL_cos
pwl.cos.fit <- RICWL_CV(H=X, A=Tr, R2=y, pi=NULL, kernel='rbf', pentype = "lasso", 
        XS=NULL, sigmalst=sgm_list,  m=m,  e=1e-5, clist=c_list, 
        theta_list = theta_list ,frac.par=0.5, method="cos")                            

optTr.pwl.cos<-predict(pwl.cos.fit, x=X.test)
optTr.pwl.cos <- factor(optTr.pwl.cos, levels=c(-1,1))
cfm.pwl.cos <- caret::confusionMatrix(as.factor(optTr.pwl.cos), reference=as.factor(optTr.test))
pcd.pwl.cos<- cfm.pwl.cos$overall["Accuracy"]
value.pwl.cos<- mean(c(y1.test[optTr.pwl.cos==1], y0.test[optTr.pwl.cos==-1]))
med.pwl.cos<- median(c(y1.test[optTr.pwl.cos==1], y0.test[optTr.pwl.cos==-1]))

##other rwl  
rwl.fit <- ROWL(X, Tr, y, pi=rep(0.5, N), kernel ="rbf", clinear=c_list, 
        sigma=sgm_list,m=m, residual=T)
optTr.rwl<-  predict(rwl.fit, X.test)
optTr.rwl<- factor(optTr.rwl, levels=c(-1,1))
cfm.rwl <- caret::confusionMatrix(as.factor(optTr.rwl), reference=as.factor(optTr.test))
pcd.rwl<- cfm.rwl$overall["Accuracy"]
value.rwl <- mean(c(y1.test[optTr.rwl==1], y0.test[optTr.rwl==-1]))
med.rwl <- median(c(y1.test[optTr.rwl==1], y0.test[optTr.rwl==-1]))

#owl
owl.fit <- ROWL(X, Tr, y, pi=rep(0.5, N), kernel ="rbf", clinear=c_list,
         sigma=sgm_list,m=m, residual=F)
optTr.owl <- predict(owl.fit, X.test)
optTr.owl<-factor(optTr.owl, levels=c(-1,1))
cfm.owl <- caret::confusionMatrix(as.factor(optTr.owl), reference=as.factor(optTr.test))
pcd.owl<- cfm.owl$overall["Accuracy"]
value.owl <- mean(c(y1.test[optTr.owl==1], y0.test[optTr.owl==-1]))
med.owl <- median(c(y1.test[optTr.owl==1], y0.test[optTr.owl==-1]))   


##combine the res
result <-cbind( c(value.pwl.frac, pcd.pwl.frac, med.pwl.frac),
         c(value.pwl.cos, pcd.pwl.cos, med.pwl.cos),
             c(value.owl, pcd.owl, med.owl),
             c(value.rwl, pcd.rwl, med.rwl),
             c(optY.test, 1, optYmed.test ))


result

Project is: finished

Created by jinchun zhang(jczhang0818@gmail.com) - feel free to contact me!

sambiostat/RICWL documentation built on Dec. 31, 2020, 3:17 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com