simulation/Simulation.R

source("Simulation/GenerateData.R")


simulation = function(L, n, beta = NULL, model = c("A", "B", "C", "D"), g=p, pro=0.1, 
                      da=0.8, db=0.5, 
                      useDataFile = FALSE,seed=2017,
                      method = "rkan", ...) {
    intercept=FALSE
    mcount <- length(model)
    
    # define output
    iw = rep(0, L)
    ib = rep(0,L)
    cfr <- rep(0, L)
    ofr <- rep(0, L)
    cfr2 <- rep(0, L)
    pdr <- rep(0, L)
    fdr <- rep(0, L)
    over_size <- 2
    msize <- rep(0, L)
    mses <- rep(0, L)
    times <- rep(0, L)
    nres <- array(list(), mcount)
    nlambda2 <- ifelse(is.null(list(...)$lambda2),20,length(list(...)$lambda2))
    nlambda1 <- ifelse(is.null(list(...)$lambda1),20,length(list(...)$lambda1))
    crit2 <- matrix(0,nrow=L,ncol=nlambda2)
    crit1 <- matrix(0,nrow=L,ncol=nlambda1)
    wdf <- array(0,dim=c(L,nlambda2,nlambda1))
    bdf <- array(0,dim=c(L,nlambda2,nlambda1))
    bic <- array(0,dim=c(L,nlambda2,nlambda1))
    bic2 <- array(0,dim=c(L,nlambda2,nlambda1))
    lam2 <- crit2
    lam1 <- crit1
    ks <- NULL
    opt.param <- matrix(0,nrow=L,ncol=3)
    dfw <- rep(0,L)
    dfb <- rep(0,L)
    
    zero <- 1e-7
    
    pb <- txtProgressBar(1, mcount * L, style = 3)
    for (j in 1:mcount) {
        # for each model
        
        # initiate
        if (!is.null(seed)) {
            set.seed(seed)
        }
        p <- length(beta)
        if (useDataFile) {
            f = paste("data\\", model[j], n, "X", p,"_",length(g),"_",da,"X",db,"_", pro, ".rda", sep = "")
            load(f)
            beta = data[[1]]$beta
            n = length(data[[1]]$y)
        }
        b = array(0, dim = c(L, ifelse(FALSE,p+1,p)))
        w = array(0, dim = c(L, n))
        gam = array(0, dim = c(L, n))
        # ROC
        # simulation
        for (i in 1:L) {
            # data
            if (useDataFile) {
                out = data[[i]]
                beta <- out$beta
            } else {
                out = GenerateDataByModel(n = n, beta = beta, model = model[j], pro=pro, g=g, a=da, b=db)
            }
            
            # try different methods
            if (method == "rkan") {
              ptm <- proc.time()
              res = rkan(x=out$x, y=out$y, ...)
              times[i] <- (proc.time() - ptm)[1]
              b[i, ] = ifelse(abs(res$beta) >zero, res$beta, 0)
              w[i, ] = res$w
              lam2[i,] =  res$lambda2
              lam1[i,] = res$lambda1
              if(is.null(ks)) ks <- matrix(0,nrow=L,ncol=length(res$k)) 
              ks[i,] <- res$k
              opt.param[i,] <- c(res$opt.lambda1,res$opt.lambda2,res$opt.k)
              iter=res$iter
            }else if (method == "rgkan") {
              ptm <- proc.time()
              res = rkan(x=out$x, y=out$y, g=g, ...)
              times[i] <- (proc.time() - ptm)[1]
              b[i, ] = ifelse(abs(res$beta) >zero, res$beta, 0)
              w[i, ] = res$w
              lam2[i,] =  res$lambda2
              lam1[i,] = res$lambda1
              if(is.null(ks)) ks <- matrix(0,nrow=L,ncol=length(res$k)) 
              ks[i,] <- res$k
              opt.param[i,] <- c(res$opt.lambda1,res$opt.lambda2,res$opt.k)
              iter=res$iter
            }else if (method == "gkan") {
              ptm <- proc.time()
              res = rkan(x=out$x, y=out$y, lambda2=100,g=g, ...)
              times[i] <- (proc.time() - ptm)[1]
              b[i, ] = ifelse(abs(res$beta) >zero, res$beta, 0)
              w[i, ] = res$w
              lam2[i,] =  res$lambda2
              lam1[i,] = res$lambda1
              if(is.null(ks)) ks <- matrix(0,nrow=L,ncol=length(res$k)) 
              ks[i,] <- res$k
              opt.param[i,] <- c(res$opt.lambda1,res$opt.lambda2,res$opt.k)
              iter=res$iter
            }else if (method == "kan") {
              ptm <- proc.time()
              res = rkan(x=out$x, y=out$y, lambda2=100, ...)
              times[i] <- (proc.time() - ptm)[1]
              b[i, ] = ifelse(abs(res$beta) >zero, res$beta, 0)
              w[i, ] = res$w
              lam2[i,] =  res$lambda2
              lam1[i,] = res$lambda1
              if(is.null(ks)) ks <- matrix(0,nrow=L,ncol=length(res$k)) 
              ks[i,] <- res$k
              opt.param[i,] <- c(res$opt.lambda1,res$opt.lambda2,res$opt.k)
              iter=res$iter
            }else if (method == "pawls") {
              ptm <- proc.time()
              res = pawls(x=out$x, y=out$y, intercept = intercept,...)
              times[i] <- (proc.time() - ptm)[1]
              b[i, ] = ifelse(abs(res$beta) >zero, res$beta, 0)
              w[i, ] = res$w
              if(length(lam2[i,]!=length(res$lamabda2))){
                lam2=matrix(0,nrow=L,ncol=length(res$lamabda2))
                lam1=matrix(0,nrow=L,ncol=length(res$lamabda1))
              }
              lam2[i,] =  res$lambda2
              lam1[i,] = res$lambda1
              iter=res$iter
            }  
            # record result

            if(intercept) res$beta <- res$beta[-1]
            true_set <- which(beta != 0)
            pnum <- length(true_set)
            active_set <- which(abs(res$beta) > zero)
            msize[i] <- length(active_set)
            common_size <- length(intersect(true_set, active_set))
            cfr[i] <- ifelse(common_size == pnum & msize[i] == pnum, 1, 0)  # correctly fit
            ofr[i] <- ifelse(common_size == pnum & msize[i] > pnum, 1, 0)  # over fit
            cfr2[i] <- ifelse(common_size == pnum & msize[i] <= pnum + over_size, 1, 0)  # over fit by at most 2
            pdr[i] <- common_size/pnum  # positive discover rate
            fdr[i] <- ifelse(msize[i]==0,0,(msize[i] - common_size)/msize[i])  #
            mses[i] <- sum((res$beta - beta)^2)
            setTxtProgressBar(pb, (j - 1) * L + i)
        }
        
        # compute measurement MSE
        MSE <- round(mean(mses), 3)
        # CFR, OFR, PDR, FDR, AN
        CFR <- round(mean(cfr), 3) * 100
        OFR <- round(mean(ofr), 3) * 100
        CFR2 <- round(mean(cfr2), 3) * 100
        PDR <- round(mean(pdr), 3) * 100
        FDR <- round(mean(fdr), 3) * 100
        AN <- round(mean(msize), 3)
        TIME <- sum(times)
        # outlier dectection
        OD <- "not applicable."
        if(method == "rkan"|| method=="rgkan" || method=="pawls"  ){
          if(model[j] == "A" || model[j] == "B"){
            curPro <- 0
          }
          else{
            curPro <- pro
          }
          OD <- OutlierSummary(w, curPro)
          roc <- ComputeROC(w,pro=curPro)
          OD$tpr <- roc$tpr
          OD$fpr <- roc$fpr
        }
       
        # BIC curve
        if(method=="rkan"||method=="rgkan"){
          nres[[j]] <- list(model = model[j], CFR = CFR, CFR2 = CFR2, OFR = OFR, PDR = PDR, FDR = FDR, 
                            AN = AN, MSE = MSE, mses=mses, TIME = TIME,iter=iter,OD=OD,
                            lam2=lam2,lam1=lam1,ks=ks,opt.param=opt.param,betas=b,ws=w)
        } else{
          nres[[j]] <- list(model = model[j], CFR = CFR, CFR2 = CFR2, OFR = OFR, PDR = PDR, FDR = FDR, 
                            AN = AN, MSE = MSE, mses=mses, TIME = TIME, iw=iw, ib=ib,OD=OD)
        }
    }
    # return
    nres
}

OutlierSummary = function(w, pro = 0.1) {
  n = dim(w)[1]
  m = dim(w)[2]
  num = round(m * pro)
  if (num == 0) {
    M = 0
    JD = 1
  } else {
    temp = apply(w[, 1:num] == 1, 1, sum)
    M = mean(temp/num)
    JD = mean(temp == 0)
  }
  S = mean(apply(w[, (num + 1):m] != 1, 1, sum)/(m - num))
  
  list(M = M, S = S, JD = JD)
  
}

ComputeROC= function(w, cutoff=seq(0,1.01,by=0.01), pro=0.1)
{
  l <- length(cutoff)
  L <- dim(w)[1]
  n <- dim(w)[2]
  ps <- NULL
  onum <- n*pro
  if(onum!=0) ps <-1:onum 
  tpr <- rep(0,l)
  fpr <- rep(0,l)
  for(j in 1 : L){
    for(i in 1 : l){
      ps_temp <- which(w[j,] < cutoff[i])
      onum_temp <- length(ps_temp)
      m <- length(intersect(ps,ps_temp))
      tpr[i] <- tpr[i] + m/ onum 
      fpr[i] <- fpr[i] + (onum_temp - m) / (n - onum)
    }
  }
  tpr <- tpr/L
  fpr <- fpr/L
  list(tpr=tpr,fpr=fpr)
}

PlotBICs=function(res,model=1,iter=0){
  if(iter==0){ # show grid iteration
    L <- dim(res[[model]]$crit2)[1]
    for(i in 1:L){
      PlotBICs(res,model,i)
      readline(prompt="Press [enter] to continue")
    }
    }else { #show paticular iteration
      res <- res[[model]]
      lam2 <- res$lam2[iter,]
      crit2 <- res$crit2[iter,]
      index2 <- res$iw[iter] 
      lam1 <- res$lam1[iter,]
      crit1 <- res$crit1[iter,]
      index1 <- res$ib[iter]
      par(mfrow=c(1,2))
      x1 <- log(lam2)
      y1 <- crit2
      plot(x1,y1,type="l",main=paste("w iter",iter,":dfw=",res$dfw[iter],"; dfb=", res$dfb[iter]-1,".", sep=""))
      abline(v=log(lam2[index2]), col="grey")
     
      x2 <- log(lam1)
      y2 <- crit1
      plot(x2,y2,type="l",main=paste("beta iter",iter,":dfw=",res$dfw[iter],"; dfb=", res$dfb[iter]-1,".", sep=""))
      abline(v=log(lam1[index1]), col="grey")
    }
}

PlotBIC2D=function(res,model=1,iter=0,tb=4,tw=5){
  if(iter==0){ # show grid iteration
    L <- dim(res[[model]]$bic)[1]
    for(i in 1:L){
      PlotBIC2D(res,model,i,tb=tb,tw=tw)
      readline(prompt="Press [enter] to continue")
    }
  }else { #show paticular iteration
    res <- res[[model]]
    crit <- res$bic[iter,,]
    crit1 <- res$bic2[iter,,]
    lam2 <- res$lam2[iter,]
    lam1 <- res$lam1[iter,]
    wdf <- res$wdf[iter,,]
    bdf <- res$bdf[iter,,]
    ib <- res$ib[iter]
    iw <- res$iw[iter]
    x11()
    par(mfrow = c(2, 3))
    image2D(crit,x=-log(lam2),y=-log(lam1),xlab="-log lambda2", ylab="-log lambda1",main="crit2")
    # x11()
    image2D(crit1,x=-log(lam2),y=-log(lam1),xlab="-log lambda2", ylab="-log lambda1",main="crit1")
    points(-log(lam2[res$iw[iter]]),-log(lam1[res$ib[iter]]),pch=24, col="black")
    # x11()
    image2D(wdf,x=-log(lam2),y=-log(lam1),xlab="-log lambda2", ylab="-log lambda1",
            main=paste("wdf=",wdf[iw,ib],sep = ""))
    points(-log(lam2[res$iw[iter]]),-log(lam1[res$ib[iter]]),pch=24, col="black")
    # x11()
    image2D(bdf,x=-log(lam2),y=-log(lam1),xlab="-log lambda2", ylab="-log lambda1",
            main=paste("bdf=",bdf[iw,ib],sep = ""))
    points(-log(lam2[res$iw[iter]]),-log(lam1[res$ib[iter]]),pch=24, col="black")
    # x11()
    image2D(bdf+wdf,x=-log(lam2),y=-log(lam1),xlab="-log lambda2", ylab="-log lambda1",main="w+bdf")
    points(-log(lam2[res$iw[iter]]),-log(lam1[res$ib[iter]]),pch=24, col="black")
    #last one
    n <- dim(bdf)[1]
    m <- dim(bdf)[2]
    df <- matrix(0, nrow=n, ncol=m)
    for(i in 1:n){
      for(j in 1:m){
        if(bdf[i,j]==tb & wdf[i,j]==tw){
          df[i,j]=1
        }else if( (bdf[i,j]>tb & bdf[i,j]<=tb*1.5) & (wdf[i,j]>tw & wdf[i,j]<= tw*1.5)){
          df[i,j]=0.7
        }else if( (bdf[i,j]>tb*1.5 & bdf[i,j]<=tb*2) & (wdf[i,j]>tw*1.5 & wdf[i,j]<= tw*2)){
          df[i,j]=0.4
        }else if( (bdf[i,j]>tb*2 & bdf[i,j]<=tb*3) & (wdf[i,j]>tw*2 & wdf[i,j]<= tw*3)){
          df[i,j]=0.2
        }else{
          df[i,j]=0
        }
      }
    }
    image2D(df,x=-log(lam2),y=-log(lam1),xlab="-log lambda2", ylab="-log lambda1",main="True df")
    points(-log(lam2[res$iw[iter]]),-log(lam1[res$ib[iter]]),pch=24, col="black")
  }
}

PlotEfficiency <- function(lambda,weight,x, sigma=2,main=NULL)
{
  L <- length(lambda)
  n <- dim(x)[1]
  p <- dim(x)[2]
  varb <- matrix(0,nrow=L,ncol=p)
  for(i in 1 : L){
    varb[i,] <- diag(solve(t(x) %*% diag(weight[i,]^2) %*% x))  * sigma^2
  }
  op <- apply(weight!=1,1,sum)/n
  #plot
  plot(op,varb[,p],type="n",xlim=c(0,0.6), 
       xlab="Percent of Outliers", main=main, ylab=expression(paste("Var(",hat(beta),")",sep = "")))
  qual_col_pals <-  brewer.pal.info[brewer.pal.info$category == 'qual',]
  col_vector <-  unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
  col <- sample(col_vector, p)
  for(i in 1:p){
    lines(op,varb[,i],lwd=2, lty=1,col=col[i])
  }
}
r08in/rkan documentation built on May 24, 2019, 6:13 a.m.