data-raw/PSEUDO_PREDICT_HD.R

## Create Simulated data based on gen_dat.R -- add only event data
## True data are generated using a logstic function -- see true_omega_ft.R

library(usethis)
library(HDChangePoint)


pseudo.HD<-function(n=80, model="logist",  p=2, bb0=0.5, bb=0.06, x.sd=0.1, v1=5, v2=7, dist="normal", eps.sd=0.05, u.sd=0.05){



  #################################################################################
  ## gender is created: 30% for male and 70% for females based on the real data ###
  #################################################################################

  gender<-sample(c("Male", "Female"), size=n, replace=TRUE, prob=c(0.3,0.7))


  ########################################
  ## generate data for each subject i   ##
  ########################################

  ## list format of observed HD data
  datgen<-vector("list", n)

  ## vector of visit numbers: different for different patients; 30% for 5 visits, 60% for 6 visits, 10% for 7 visits
  visit.num<-sample(v1:v2, n, replace=TRUE, prob=c(0.3, 0.6, 0.1))


  raw.tms.data<-array(0, dim=c(sum(visit.num),8),
                      dimnames=list(paste("xx", 1:sum(visit.num), sep=""),c("SUBJID", "EVENT", "TOTAL_MOTOR_SCORE", "TRUE_INFL_POINT", "AGE", "CAG", "gender", "logAGE")
                      ))



  for (id in 1:n){

    ## vector of subject ID ###

    subj.visit.number<-visit.num[id]
    subj.id<-rep(id, subj.visit.number)

    ## event
    event<-seq(1, subj.visit.number, length.out=subj.visit.number)


    ## vector of gender  ###

    gender_level<-ifelse(gender[id]=="Male",1,0)
    sex<-rep(gender_level,visit.num[id])


    ###########################################
    ## Model for log of Inflection point T  ###
    ###########################################

    ## covariate W is uniform random number on (2,3)
    #p<-2
    ww<-runif(p-1,35, 50)

    ## add covariate values
    cov.W<-rep(ww, subj.visit.number)

    ## error terms
    if (dist=="normal"){
      ee<-rnorm(1,0,u.sd)
    }



    beta<-c(bb0,bb)   ##  input for beta components
    W<-c(1,ww)        ##  ww is true covariate assocaited with log inflection point

    ################################################
    ## estimate log of inflection points         ###
    ################################################
    betaW<-crossprod(beta,W)   #t(as.vector(beta))%*%(as.vector(W));
    true_z<-betaW+ee;


    ## true inflection points should be in the range of logage
    xx<-sort(rnorm(subj.visit.number,true_z,x.sd))
    age<-exp(xx)

    ## error term for longitudinal model,
    ##            which follows normal distribution with mean 0 and standard deviation sigma

    if (dist=="normal"){
      eps.star<-rnorm(subj.visit.number,0, eps.sd)
    }

    ## function omega generated by function w()
    true_z<-as.numeric(true_z)
    omega<-w(xx,true_z, model)

    ## longitudinal response: total motor score y
    yy<-omega+eps.star




    datgen[[id]]<-cbind(subj.id, event, yy, true_z, age, cov.W, sex, xx)


  }


  ## create data.frame from list type of data
  gendat<-do.call("rbind.data.frame", datgen)

  colnames(gendat)<-c("SUBJID","event" , "TOTAL_MOTOR_SCORE", "TRUE_INFL_POINT", "AGE", "CAG", "gender",  "logAGE")


  return(gendat=gendat)
}

PSEUDO_PREDICT_HD<-pseudo.HD(n=80, model="logist",  p=2, bb0=0.5, bb=0.06, x.sd=0.3, v1=5, v2=7, dist="normal", eps.sd=0.05, u.sd=0.05)



write.csv(PSEUDO_PREDICT_HD, file="data-raw/PSEUDO_PREDICT_HD.csv")
usethis::use_data(PSEUDO_PREDICT_HD, overwrite = TRUE)
save(PSEUDO_PREDICT_HD, file="data/PSEUDO_PREDICT_HD.rdata")
unkyunglee/HDChangePoint documentation built on Nov. 27, 2021, 2:57 p.m.