R/GenLUSample.R

Defines functions GenLUSample

Documented in GenLUSample

#' Generate L/U sample
#' 
#' @param X Population covariates
#' @param y Population response
#' @param nl number of labelled subjects
#' @param nu number of unlabelled subjects
#' @param replace if TRUE, sample with replacement
#' @export
GenLUSample = function(X,y,nl,nu,replace = FALSE){
  # N = nrow(X) >2*(nl+nu)
  repl = replace
  z = rep(0,nrow(X))
  
  # Randomly divide the data into half
  N0 = round(nrow(X)/2)
  X1ind = sample(1:N0,N0)
  ind1 = rep(0,nrow(X))
  ind1[X1ind] = 1
  
  # sample labelled data, 
  posind =which(y==1&ind1==1) # where subject is positive and in the first half data set
  sampl = sample(x = posind,size = nl,replace = repl)
  X_l = X[sampl,]
  y_l = y[sampl]
  z[sampl] = 1
  
  # sample unlabelled data, among unlabelled ones
  sampunl = sample(which(z!=1&ind1==0),size=nu,replace = repl)
  X_u = X[sampunl,]
  y_u = y[sampunl]
  
  # Negative samples in unlabelled dataset
  sampn = sampunl[which(y_u==0)]
  X_n = X[sampn,]
  y_n = y[sampn]
  
  return(list(X=X,y=y,z=z,X_l=X_l,X_u=X_u,X_n = X_n, y_u=y_u,sampunl=sampunl,sampl=sampl,sampn = sampn))
}
hsong1/PUlearning documentation built on May 16, 2017, 11:27 p.m.