Nothing
#' Simulate preference data to apply CCRS
#'
#' @description Simulates artificial preference data containing content-based (and response-style-based) clusters.
#' @usage generate.rsdata(n=n,m=m,q=q,K.true=K.true,H.true=NULL,clustered.rs=FALSE,
#' cls.cont.vec=NULL,cls.rs.vec=NULL,savedata=FALSE)
#' @param n An integer indicating the number of respondents.
#' @param m An integer indicating the number of items.
#' @param q An integer indicating the maximum rating.
#' @param K.true An integer indicating the true number of content-based clusters for n respondents.
#' @param H.true An integer indicating the true number of response-style-based clusters for n respondents. This is needed when \code{clustered.rs=TRUE}.
#' @param clustered.rs A logical value indicating whether response-style-based cluster structure exists in generated data. If \code{TRUE}, coefficients of I-spline are generated by response-style-based clusters. The default is \code{clustered.rs=FALSE}.
#' @param cls.cont.vec A vector of integers (from 1:K.true) of length n indicating the content-based cluster to which each respondent is allocated in artificial data. If it's \code{NULL}, it is generated automatically.
#' @param cls.rs.vec A vector of integers (from 1:H.true) of length n indicating the response-style-based clusters. If it's \code{NULL} and clustered.rs==T, it is generated randomly.
#' @param savedata A logical value indicating whether artificial data are saved as csv files. The default is \code{savedata=FALSE}.
#' @return A list with the following elements:
#' \item{\code{X}}{An n by m matrix of categorical variables.}
#' \item{\code{X.star}}{An n by m matrix of true preference data \code{X^*}.}
#' \item{\code{X.nors}}{An n by m matrix of categorical variables transformed by reference boundaries.}
#' \item{\code{cls.cont.vec}}{A vector of integers (from 1:H.true) indicating content-based clusters used to generate artificial data.}
#' \item{\code{cls.rs.vec}}{A vector of integers (from 1:H.true) indicating response-style-based clusters used to generate artificial data.}
#' @seealso \code{\link{create.ccrsdata}}
#' @references Takagishi, M., Velden, M. van de & Yadohisa, H. (2019). Clustering preference data in the presence of response style bias, to appear in British Journal of Mathematical and Statistical Psychology.
#' @importFrom msm rtnorm
#' @importFrom cds ispline
#' @importFrom stats runif rnorm
#' @importFrom utils write.csv
#' @examples
#' #data setting
#' n <- 30 ; m <- 10 ; H.true <- 2 ; K.true <- 2 ; q <- 5
#' datagene <- generate.rsdata(n=n,m=m,K.true=K.true,H.true=H.true,q=q,clustered.rs = TRUE)
#' #obtain n x m data matrix
#' X <- datagene$X
#' @export
generate.rsdata <- function(n=n,m=m,q=q,K.true=K.true,H.true=NULL,clustered.rs=FALSE,
cls.cont.vec=NULL,cls.rs.vec=NULL,savedata=FALSE) {
######setting that's been fixed
sig.Xstar <- 0.1
sig.xi <- 0.01
sig <- 0.1
beta.order<-c(1,2,4,5,3)
Km <- 5 ; noisem <- 2
q4 <- 1/4
if(is.null(cls.cont.vec)){
cls.cont.vec <- rep(1:K.true,(n/K.true))
}
if(is.null(cls.rs.vec)){
if(clustered.rs){
cls.tr.b4narabi <- rep(1:H.true,rep((n/H.true),H.true))
clsnarabi <- sample(x=n,size=n,replace=FALSE)
cls.rs.vec <- cls.tr.b4narabi[clsnarabi]
}else{
cls.rs.vec <- seq(1,n)
}
}
if(clustered.rs){
Beta.tr <- matrix(runif((n*3),0,1),n,3)
Beta.tr <- t(apply(Beta.tr,1,function(x){(x/sum(x))}))
}else{####start clustered r.s.######
#1:no rs, 2:ac, 3:dis, 4:ext, 5:mid
rscls<-seq(1,5)[1:H.true]
Beta.tr <- matrix(0,H.true,3)
#####specify beta#######
Betasp.all <- matrix(0,5,2)
Betasp.all[beta.order[2],] <- c(3.9,0.1) #ac
Betasp.all[beta.order[3],] <- c(0.1,3.9) #dis
Betasp.all[beta.order[4],] <- c(0.1,0.1) #ext
Betasp.all[beta.order[5],] <- c(98,98) #mid
#####function generating beta#####
beta.gene<-function(x){
x1 <- x[1]
x2 <- x[2]
a2 <- solve((1/x1)+1+(1/x2))
a1 <- a2/x1
a3 <- a2/x2
return(c(a1,a2,a3))
}
###beta except no r.s.
Betasp<-matrix(0,(H.true-1),2)
Betasp<-Betasp.all[rscls[rscls!=1],]
if(any(rscls==1)){
if(H.true!=1){
if(H.true==2){
Beta.tr<-matrix(beta.gene(Betasp),(H.true-1),3,byrow=TRUE)
}else{
Beta.tr<-matrix(apply(Betasp,1,beta.gene),(H.true-1),3,byrow=TRUE)
}
Beta.tr<-rbind(q4*c(1,2,1),Beta.tr)
}else{
Beta.tr<-q4*c(1,2,1)
}
}else{
Beta.tr <- matrix(apply(Betasp,1,beta.gene),(H.true),3,byrow=TRUE)
}
}#####end clustered beta#####
###setting rs for middle and extreme
b2.hand <- matrix(0,2,q)
if(q==5){
b2.hand[1,]<-c(0.35,0.49,0.51,0.65,1) #rs extreme
b2.hand[2,]<-c(0.01,0.21,0.79,0.99,1) #rs middle
}else if(q==7){
b2.hand[1,]<-c(0.26,0.46,0.49,0.51,0.54,0.74,1) #rs extreme
b2.hand[2,]<-c(0.01,0.04,0.28,0.72,0.96,0.99,1) #rs middle
}
#################### make cluster structure ##########################
Xi <- matrix(0,K.true,m)
###grouping variables
nm <- 0 ; nom <- m-nm
if(nm==0){
mcls<-c(rep((nom/Km),Km))
mvar.grp<-rep(seq(1,Km),mcls)
}else{
mcls<-c(rep((nom/Km),Km),nm)
mvar.grp<-rep(seq(1,(Km+1)),mcls)
}
####upper and lower at each level#####
UL.m<-matrix(0,Km,2)
###divide [0,1] interval by Km
intbegin <- 1/Km
intvec <- seq(0,1,by=intbegin)
for(k in 1:Km){
#add sig
if(k==1){
UL.m[k,]<-c((intvec[k]),(intvec[k+1]+sig))
}else if(k==Km){
UL.m[k,]<-c((intvec[k]-sig),(intvec[k+1]))
}else if(k==(Km+1)){
UL.m[k,]<-c(0,1)
}else{
UL.m[k,]<-c((intvec[k]-sig),(intvec[k+1]+sig))
}
}
#########including noise variables##########
#"6 (Km+1)" is allocated to noise variables (which variable is noise is random)
noisevari.g <- sample(Km,noisem,replace=FALSE) #decide variable group by sample
noisevari <- which(mvar.grp %in% noisevari.g) #take out each number of variables in noise group
mvar.grp[mvar.grp %in% noisevari.g] <- Km+1
#######make Level (K.true x Km)##########
######kth row elements indicate the interval (level) corresponding to kth cluster
#(1:low, 2:middle, 3:high)
######function to make Level
createlevel <- function(Level) {
Level[1,] <- seq(1,Km)
if(K.true>1){
levelvec<-sample((Km-1),(K.true-1),replace=FALSE)
llee<-1
for(llee in 1:length(levelvec)){
rlev<-levelvec[llee]+1
Level[(llee+1),]<-seq(1,Km)[c(seq(rlev,Km),seq(1,(rlev-1)))]
}}
return(Level)
}
#####make Level. In K.true=2, if both 1&5 coexist in the same column (variable group), do it again.
Level <- matrix(0,K.true,Km)
Level <- createlevel(Level)
Level[,noisevari.g] <- (Km+1) ##insert noise number indicating which vari is noise
for(k in 1:K.true){
for(le in 1:(Km)){
levv <- Level[k,le]
v.k <- m/Km
if(levv!=(Km+1)){ #mean of the interval is mean of normal
Xi[k,which(mvar.grp==le)] <- rnorm(v.k,mean=mean(UL.m[levv,]),sd=sig.xi)
}
}
}
###trunced normal
X.star <- matrix(0,n,m)
for(i in 1:n){
k<-cls.cont.vec[i]
X.star[i,-noisevari] <- Xi[k,-noisevari]+rtnorm((m-length(noisevari)),mean=0,sd=sig.Xstar,lower=-1+sig,upper=1-sig)
X.star[i,noisevari] <- runif(length(noisevari),0,1)
}
#############################################
####create response style#####
#############################################
scales<-seq(1,q)
x.bounds<-c(0:q)/q
tvec<-c(0, 0.5, 1)
Mmat.01 <- ispline(x.bounds, tvec = tvec, intercept = FALSE)
r.s.func<-function(x){
b2<-Mmat.01%*%(x/sum(x))
return(b2)
}
if(H.true!=1){
b2<-apply(t(Beta.tr),2,r.s.func)
}else{
b2<-r.s.func(Beta.tr)
}
used.rs.num<-seq(1:5)[which(beta.order %in% c(1:H.true))]
handrs <- c(4,5)
if(any(used.rs.num %in% handrs)){
used.hand.beta<-which(used.rs.num %in% handrs)
which.hand<-which(handrs %in% used.rs.num)
b2[-1,used.hand.beta]<-t(b2.hand[which.hand,])
}
b2[1,] <- -Inf ; b2[(nrow(b2)),] <- Inf
######transform data to r.s. biased data########
X <- matrix(NA,n,m)
for(k in 1:H.true){
ind.cls <- which(cls.rs.vec==k)
X[ind.cls,] <- as.numeric(cut(X.star[ind.cls,], breaks = b2[,k], labels = seq(1,q)))
}
b.nors<-r.s.func(c(q4,(q4*2),q4))
b.nors[1,] <- -Inf
b.nors[(nrow(b.nors)),] <- Inf
X.nors <- matrix(as.numeric(cut(X.star, breaks =b.nors , labels = seq(1,q))),n,m)
#----------------------------------------------------------------------------------------
#######save data#############
#----------------------------------------------------------------------------------------
if(savedata) {
colnames(X) <- lapply(c(1:m),function(x){paste("X",x,sep="")})
colnames(X.star) <- lapply(c(1:m),function(x){paste("X.star",x,sep="")})
#colnames(Level) <- lapply(c(1:Km),function(x){paste("Level",x,sep="")})
#colnames(Xi) <- lapply(c(1:m),function(x){paste("Xi",x,sep="")})
location.data <- "crssdata.csv"
datalist.n <- cbind(X,X.star,cls.rs.vec,cls.cont.vec)
write.csv(datalist.n,location.data)
#others
#location.data2 <- "crssdata.others.csv"
#datalist.nigai <- cbind(Level,Xi)
#write.csv(datalist.nigai, location.data2)
}
return(list(X=X,X.star=X.star,X.nors=X.nors,#,Level=Level
cls.cont.vec=cls.cont.vec,cls.rs.vec=cls.rs.vec))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.