blupADC: blup-Associated Data Cleaner

#*************generate DIR. file **********
generate_DIR<-function(phe_col_names=NULL,
					   target_trait_name=NULL,
					   fixed_effect_name=NULL, #列表
					   random_effect_name=NULL, #列表，不包括永久环境效应
					   covariate_effect_name=NULL, #列表
					   random_regression_effect_name=NULL, #列表，随机回归效应   list(c("",""),c("",""))
					   maternal_effect_name=NULL, #列表
					   
					   include_social_effect=FALSE,    #是否评估social effect
					   group_effect_name=NULL,  #评估social effect时, group的名称,自动生成group_phe,												
					   integer_group_names=NULL,        #整型group的名称
					   real_group_names=NULL,            #实型group的名称
		   
					   provided_effect_file_path=NULL, #各个性状的效应文件
					   provided_effect_file_name="model_define.txt",
					   phe_path=NULL,
					   phe_name=NULL,
					   analysis_model=NULL,
					   ped_inbred_number=1, #构建系谱的时候，用近交的方法
					   genetic_effect_name="Id",
					   included_permanent_effect=FALSE, 
					   included_dominance_effect=FALSE,
					   missing_value= -9999,
					   iteration_criteria= 1.0e-7,
					   relationship_path=NULL,
					   relationship_name=NULL,
					   dmu_module=NULL,
					   dmu_algorithm_code=NULL,
					   provided_prior_file_path=NULL,
					   provided_prior_file_name=NULL,
					   integer_n=NULL,  #整型数目
					   residual_cov_trait=NULL,  #限定残差协方差的性质
					   output_DIR_path=NULL,
					   output_DIR_name=NULL,
					   IND_geno_file_name=NULL, #
					   IND_geno=NULL,
					   SSBLUP_omega=0.05,
					   EM_iter=10,
					   dmu5_iter=1000000,
					   gibbs_n_rounds=10000,
					   gibbs_n_burnin=1000,
					   gibbs_n_gaps=5,
					   gibbs_seeds=c(1994,1998),
					   #newly added parameters for the new feature in blupADC
						phe_file=NULL,
						kinship_file=NULL,
						prior_file=NULL
					   ){
#生成 social_effect的表型
#social effect其实可以看做是特殊情况的随机回归，需要将随机效应那一行设置为 1+1+1+1类似的形式即可

if(is.null(phe_file)){
phe_file=paste0(phe_path,"/",phe_name)
}


if(TRUE%in%include_social_effect){

cat("Performing social genetic effect analysis...... \n")
cat("Group effect should be included as random effect in the social genetic effect analysis \n")
cat("User must specify the integer_group_names and real_group_names or group_effect_name! \n")
if(!is.null(group_effect_name)){

cat("Generating phenotype of social genetic effect automatically......\n")

phe=fread(phe_file,data.table=F,header=F)
colnames(phe)=phe_col_names

social_result=generate_social_phe(phe=phe,
								  integer_n=integer_n,
					                social_effect_group_name=group_effect_name,
					                genetic_effect_name=genetic_effect_name)

phe=social_result$phe
integer_n=social_result$integer_n
integer_group_names=social_result$integer_group_names
real_group_names=social_result$real_group_names

phe_col_names=colnames(phe)
phe_name=paste0("Social_phe_",phe_name)
phe_path=output_DIR_path
fwrite(phe,paste0(phe_path,"/",phe_name),quote=F,sep=" ",row.names=F,col.names=F)

random_regression_effect_name=rep(list(paste0(real_group_names,"&",integer_group_names)),
                                          length(target_trait_name)) #单性状

for(i in 1:length(target_trait_name)){
random_effect_name[[i]]=c(random_effect_name[[i]],integer_group_names)  # 单性状
}

}else{

cat("Using user-provided social genetic effect phenotype......\n")

if(is.null(integer_group_names)|is.null(real_group_names)){stop("User must specify the integer_group_names or real_group_names when using provided phenotype!")}
integer_group_names=integer_group_names
real_group_names=real_group_names


random_regression_effect_name=rep(list(paste0(real_group_names,"&",integer_group_names)),
                                          length(target_trait_name)) #单性状

for(i in 1:length(target_trait_name)){
random_effect_name[[i]]=c(random_effect_name[[i]],integer_group_names)  # 单性状
}

}
}


if(!is.null(IND_geno)){
write.table(IND_geno,paste0(relationship_path[1],"/","IND_geno.txt"),quote=F,row.names=F,col.names=F,sep=" ")
IND_geno_file_name="IND_geno.txt"
}

					   
Trait_n=length(target_trait_name)
Variable_n=length(phe_col_names)
real_n=Variable_n-integer_n
Integer_col_names=phe_col_names[1:integer_n]
Real_col_names=phe_col_names[(integer_n+1):Variable_n]

library(data.table)
#读取各性状的固定效应、随机效应、协变量记录
if(!is.null(random_effect_name)&!is.null(provided_effect_file_path)){stop("Effect_file and  effect_names couldn't provided simultaneously! ")}
if(!is.null(provided_effect_file_path)&!is.null(provided_effect_file_name)){

model=fread(paste0(provided_effect_file_path,"/",provided_effect_file_name),data.table=F,fill=TRUE,header = F)

fixed_effect_name=NULL;random_effect_name=NULL;covariate_effect_name=NULL
for(i in 1:length(target_trait_name)){

n=match(target_trait_name[i],model[,1]) # which line  is  the trait

if(is.na(n)){stop(paste0(target_trait_name[i]," couldn't find in provided effect_file; "))}

effect_col=grep(pattern="*",as.character(model[n,]),fixed=T)
fixed_effect=as.character(model[n,c((effect_col[1]+1):(effect_col[2]-1))])
random_effect=as.character(model[n,c((effect_col[2]+1):(effect_col[3]-1))])
covariate_effect=as.character(model[n,c((effect_col[3]+1):(effect_col[4]-1))])

if("*"%in%fixed_effect){fixed_effect=NULL}
if("*"%in%random_effect){random_effect=NULL}
if("*"%in%covariate_effect){covariate_effect=NULL}

fixed_effect_name=c(fixed_effect_name,list(fixed_effect))
random_effect_name=c(random_effect_name,list(random_effect))
covariate_effect_name=c(covariate_effect_name,list(covariate_effect))
}
}


#software create DIR file 
COMMENT=matrix("",nrow=2,ncol=length(phe_col_names)+10)

ANALYSE=matrix("",nrow=2,ncol=length(phe_col_names)+10)

DATA=matrix("",nrow=2,ncol=length(phe_col_names)+10)

VARIABLE=matrix("",nrow=10,ncol=length(phe_col_names)+10)

VAR_STR=matrix("",nrow=4+length(relationship_name),ncol=length(phe_col_names)+10)

SOLUTION=matrix("",nrow=2,ncol=length(phe_col_names)+10)

MODEL=matrix("",nrow=1+Trait_n*(Trait_n+6),ncol=length(phe_col_names)+10)

PRIOR=matrix("",nrow=(Trait_n*(Trait_n+1)/2)*6,ncol=length(phe_col_names)+10)

PARAMETER=matrix("",nrow=10*length(target_trait_name),ncol=length(phe_col_names)+10)




#$COMMENT
COMMENT[1,1]="$COMMENT"

#$ANALYSE 
if(is.null(dmu_module)){dmu_module="dmuai"}
dmu_module_code=ifelse(dmu_module=="dmuai",1,
                   ifelse(dmu_module=="rjmc",2,
		          ifelse(dmu_module=="dmu4",11,
			      ifelse(dmu_module=="dmu5",12,NA))))
	
if(is.null(dmu_algorithm_code)){	
dmu_algorithm_code=ifelse(dmu_module=="dmuai",1,
                   ifelse(dmu_module=="rjmc",0,
		          ifelse(dmu_module=="dmu4",14,
			      ifelse(dmu_module=="dmu5",2,NA))))}		

		  
ANALYSE[1,1]=paste0("$ANALYSE ",dmu_module_code," ",dmu_algorithm_code," 0 0 ")

#$DATA 
DATA[1,1]=paste0("$DATA  ASCII (",integer_n,",",real_n," ,",missing_value,") ",phe_file)


#$VARIABLE
VARIABLE[1,1]="$VARIABLE"
VARIABLE[3,1:integer_n]=paste0("I",1:integer_n)
VARIABLE[4,1:integer_n]=Integer_col_names
VARIABLE[6,1:real_n]=paste0("R",1:real_n)
VARIABLE[7,1:real_n]=Real_col_names
VARIABLE[3,1]="#I1";VARIABLE[6,1]="#R1";


#将 I1.... R1.... 和变量名称对齐
for(i in 1:ncol(VARIABLE)){
integer_space=nchar(VARIABLE[4,i])
VARIABLE[3,i]=sprintf(paste0("%-",integer_space,"s"),VARIABLE[3,i])
real_space=nchar(VARIABLE[7,i])
VARIABLE[6,i]=sprintf(paste0("%-",real_space,"s"),VARIABLE[6,i])
}


#$MODEL
MODEL[1,1]="$MODEL"
MODEL[2,1]=paste0(Trait_n," ",Trait_n," 0 0 0")
MODEL[3:(2+Trait_n),1]="0"

if(!is.null(fixed_effect_name)){if(length(fixed_effect_name)!=Trait_n){stop("The number of fixed effect names doesn't match the number of traits")}}
if(!is.null(random_effect_name)){if(length(random_effect_name)!=Trait_n){stop("The number of random effect names doesn't match the number of traits")}}
if(!is.null(covariate_effect_name)){if(length(covariate_effect_name)!=Trait_n){stop("The number of covariate effect names doesn't match the number of traits")}}



#User_define模型下，添加多个遗传效应 放入 random_effect_name, 后续只需要在 trait_fixed_random_line 中，把遗传效应的位置替换成 Id的位置，其余不变
if(("User_define"%in%analysis_model)&length(relationship_name)>=2){
User_define_effect=paste0("User_define",1:(length(relationship_name)-1))
for(i in 1:length(random_effect_name)){
random_effect_name[[i]]=c(random_effect_name[[i]],User_define_effect)
random_effect_name[[i]]=c(genetic_effect_name,User_define_effect,setdiff(random_effect_name[[i]],c(genetic_effect_name,User_define_effect)))
}
Integer_col_names=c(Integer_col_names,User_define_effect)
}


#添加显性效应 放入 random_effect_name, 后续只需要在 trait_fixed_random_line 中，把dominance效应的位置替换成 Id的位置，其余不变
if(included_dominance_effect==TRUE){
dominance_effect="Added_dominance_effect"
for(i in 1:length(random_effect_name)){
random_effect_name[[i]]=c(random_effect_name[[i]],dominance_effect)
random_effect_name[[i]]=c(genetic_effect_name,dominance_effect,setdiff(random_effect_name[[i]],c(genetic_effect_name,dominance_effect)))
}
Integer_col_names=c(Integer_col_names,dominance_effect)
}


#添加永久环境效应 放入 random_effect_name, 后续只需要在 trait_fixed_random_line 中，把permanent的位置替换成 Id的位置，其余不变
if(identical(included_permanent_effect,TRUE)){included_permanent_effect=rep(list(included_permanent_effect),length(target_trait_name))}
if(identical(included_permanent_effect,FALSE)){included_permanent_effect=rep(list(included_permanent_effect),length(target_trait_name))}

permanent_effect="pe_effect"
if(permanent_effect%in%Integer_col_names){stop("pe_effect already stands for permant effect,please change this effect name in the phe_col_name!!! ")}
for(i in 1:length(random_effect_name)){

if(included_permanent_effect[[i]]==TRUE){

random_effect_name[[i]]=c(random_effect_name[[i]],permanent_effect)
random_effect_name[[i]]=c(genetic_effect_name,permanent_effect,setdiff(random_effect_name[[i]],c(genetic_effect_name,permanent_effect)))

}
}

if(TRUE %in% included_permanent_effect)Integer_col_names=c(Integer_col_names,permanent_effect)


#添加母性效应 放入 random_effect_name, 后续只需要将母性效应对应的随机效应编号 设置与 遗传效应的编号一样即可

if(!is.null(maternal_effect_name)){

for(i in 1:length(random_effect_name)){

maternal=maternal_effect_name[[i]] #列表

if(!is.null(maternal)){

if(!maternal%in%Integer_col_names){stop("Maternal effect name could not find in provided phe_col_names; ")}

random_effect_name[[i]]=c(random_effect_name[[i]],maternal)
random_effect_name[[i]]=c(genetic_effect_name,maternal,setdiff(random_effect_name[[i]],c(genetic_effect_name,maternal)))


}
}}



#记录所有性状的名称，并自定义随机效应序号

random_lev=as.factor(unique(do.call(c,random_effect_name))) #统计所有的随机效应名称，并将随机效应赋值
levels(random_lev)=c(genetic_effect_name,setdiff(as.character(random_lev),genetic_effect_name))
random_lev_number=as.numeric(random_lev)
random_lev_names=as.character(random_lev)
random_total_lev_number=random_lev_number[match(do.call(c,random_effect_name),random_lev_names)]


#添加残差效应序号：
residual_number=length(random_lev_number)+1
random_total_lev_number=c(random_total_lev_number,rep(residual_number,Trait_n))


#for assigning the names of AI-inv matrix
ai_random_lev_names=c(random_lev_names,"Res")
ai_random_total_lev_number=random_total_lev_number



#多个性状循环
for(i in 1:Trait_n){
trait=target_trait_name[i]
fixed=fixed_effect_name[[i]]
random=random_effect_name[[i]]
if(!genetic_effect_name%in%random){stop("Genetic effect name could not find in provided random effect name! ")}
random=c(genetic_effect_name,setdiff(random,c(genetic_effect_name)))#确保 genetic effect name 为随机效应第一位
covariate=covariate_effect_name[[i]]
regression=random_regression_effect_name[[i]]
maternal=maternal_effect_name[[i]] #列表

#判断性状名称、效应名称是否都在 提供的 phe_col_names 中
if(is.na(match(trait,Real_col_names))){
stop(paste0("Trait:",trait," could not find in provided phe_col_names; "))
}else{trait_pos=match(trait,Real_col_names)}



if(!is.null(fixed)){
fixed_pos=match(fixed,Integer_col_names)
if(NA%in%fixed_pos){stop(paste0("Fixed effect:",fixed[is.na(fixed_pos)]," could not find in provided phe_col_names; "))}
}else{fixed_pos=NULL}

if(!is.null(random)){

random_pos=match(random,Integer_col_names)

if(NA%in%random_pos){stop(paste0("Random effect:",random[is.na(random_pos)]," could not find in provided phe_col_names; "))}
}else{random_pos=NULL}


if(!is.null(covariate)){
covariate_pos=match(covariate,Real_col_names)
if(NA%in%covariate_pos){stop(paste0("Covariate effect:",covariate[is.na(covariate_pos)]," could not find in provided phe_col_names; "))}
}else {covariate_pos=NULL}


#随机回归效应-在covariate生成后进行修改
#嵌套的随机效应必须要出现在随机效应中
#嵌套的随机效应同时需要考虑在PRIOR文件中，每个随机效应下，有多少个对角的效应 eg.随机效应代码为，嵌套两个，那么就需要变为：
																				# 1 1 1 1
																				# 1 2 2 1
																				# 1 3 3 1
linked_number=0
if(!is.null(regression)){

if(TRUE %in% included_permanent_effect){cat("Permanent effect name in random regression model is:pe_effect! \n")}

##确定回归系数的种类
#regression_name_coef_set=sapply(regression,function(x)unlist(strsplit(x,split = "&"))[1])
#regression_name_coef_set=unique(regression_name_coef_set)
#n_regre=length(regression_name_coef_set)
#
#regression_split=strsplit(regression,split = "&")
#regression_name_effect_set=as.list(rep(NA,n_regre))
##确定每个回归系数匹配的效应
#for(i in 1:n_regre){
#
#for(j in 1:length(regression_split)){
#regression_name_coef=regression_split[[j]][1]       # 第一个名称是 多项式回归系数的名称
#regression_name_effect=regression_split[[j]][-1]    # 非第一个，剩余的是 该多项式回归系数嵌套的所有随机效应
#
#if(regression_name_coef==regression_name_coef_set[i]){
#regression_name_effect_set[[i]]=c(regression_name_effect_set[[i]],regression_name_effect)
#}
#}
#tmp_reg=unique(as.character(na.omit(regression_name_effect_set[[i]])))
#linked_number=linked_number+length(tmp_reg)
#pos_In_blanket=match(match(tmp_reg,Integer_col_names),
#                                   c(fixed_pos,random_pos)) # 嵌套的变量在括号中的位置
#pos_In_blanket=paste(pos_In_blanket,collapse = " ")    # 合并成一个字符串
#
#regression_name_coef_effect_pos=paste0(match(regression_name_coef,Real_col_names),
#                                                   "(",
#										   pos_In_blanket,
#										   ") "
#										   )
#print(regression_name_coef_effect_pos)										   
#covariate_pos=c(covariate_pos,regression_name_coef_effect_pos)
#}

reg_k=length(regression) #随机回归效应的个数

for(j in 1:reg_k){

regression_name=unlist(strsplit(regression[j],split = "&"))
  
regression_name_coef=regression_name[1]       # 第一个名称是 多项式回归系数的名称
regression_name_effect=regression_name[-1]             #非第一个，剩余的是 该多项式回归系数嵌套的所有随机效应

linked_number=linked_number+length(regression_name_effect)-1 #针对括号内有2个及以上嵌套效应的情况，需要将协变量的总数目进行修改

#if(FALSE%in%(regression_name_effect%in%random)){stop("Random effect in random_regression_effect_name don't exist in the random_effect_name!")}

if(FALSE%in%(regression_name_effect%in%c(fixed,random))){
stop("Regression effect in random_regression_effect_name don't exist in the random_effect_name or fixed_effect_name!")
}


pos_In_blanket=match(match(regression_name_effect,Integer_col_names),
                                   c(fixed_pos,random_pos)) # 嵌套的变量在括号中的位置
pos_In_blanket=paste(pos_In_blanket,collapse = " ")    # 合并成一个字符串

								   
regression_name_coef_effect_pos=paste0(match(regression_name_coef,Real_col_names),
                                                   "(",
										   pos_In_blanket,
										   ") "
										   )
covariate_pos=c(covariate_pos,regression_name_coef_effect_pos)
}

}


trait_fixed_random_line=paste0(trait_pos," 0 ",length(fixed_pos)+length(random_pos)," ",paste(c(fixed_pos,random_pos),collapse=" "))
random_line=paste(c(length(random),random_lev_number[match(random,random_lev_names)]),collapse=" ")
covariate_line=paste(c(length(covariate_pos)+linked_number,covariate_pos),collapse=" ")


#考虑永久环境效应，将  trait_fixed_random_line 中的 Added_permanent_effect 的位置替换成 Id 位置
if(TRUE%in%included_permanent_effect[[i]]){
permanent_effect_pos=match(permanent_effect,Integer_col_names)
random_pos[random_pos%in%permanent_effect_pos]=match(genetic_effect_name,Integer_col_names)
trait_fixed_random_line=paste0(trait_pos," 0 ",length(fixed_pos)+length(random_pos)," ",paste(c(fixed_pos,random_pos),collapse=" "))
}
 
#考虑显性效应，将  trait_fixed_random_line 中的 Added_dominance_effect 的位置替换成 Id 位置
if(included_dominance_effect==TRUE){
dominance_effect_pos=match(dominance_effect,Integer_col_names)
random_pos[random_pos%in%dominance_effect_pos]=match(genetic_effect_name,Integer_col_names)
trait_fixed_random_line=paste0(trait_pos," 0 ",length(fixed_pos)+length(random_pos)," ",paste(c(fixed_pos,random_pos),collapse=" "))
}

 
#考虑User_define模型，将  trait_fixed_random_line 中的 User_define_effect 的位置替换成 Id 位置
if(("User_define"%in%analysis_model)&length(relationship_name)>=2){
User_define_effect_pos=match(User_define_effect,Integer_col_names)
random_pos[random_pos%in%User_define_effect_pos]=match(genetic_effect_name,Integer_col_names)
trait_fixed_random_line=paste0(trait_pos," 0 ",length(fixed_pos)+length(random_pos)," ",paste(c(fixed_pos,random_pos),collapse=" "))
}


#考虑母性效应，将  random_line 中的 maternal effect 的随机效应编号替换为遗传效应的随机效应编号
if(!is.null(maternal)){

#将maternal effect的水平修改成 和  遗传效应水平一致
#其他随机效应的水平均减去1

tmp_random_lev_number=random_lev_number

tmp_random_lev_number[match(maternal,random_lev_names)]=tmp_random_lev_number[match(genetic_effect_name,random_lev_names)]

tmp_random_lev_number[tmp_random_lev_number!=1]=tmp_random_lev_number[tmp_random_lev_number!=1]-1

random_line=paste(c(length(random),tmp_random_lev_number[match(random,random_lev_names)]),collapse=" ")

}
 

#考虑social genetic effect, 需要额外对DIR文件中，固定效应-随机效应行 和 随机效应编号 行进行修改
if(TRUE%in%include_social_effect){
if(TRUE%in%include_social_effect[[i]]){

#修改固定效应-随机效应行， 在integer_group_name后添加(0)

random_pos[match(integer_group_names,random)]=paste0(random_pos[match(integer_group_names,random)],"(","0",")")
trait_fixed_random_line=paste0(trait_pos," 0 ",length(fixed_pos)+length(random_pos)," ",paste(c(fixed_pos,random_pos),collapse=" "))

#修改随机效应编号 行，形如 +1+1+1

temp=random_lev_number[match(random,random_lev_names)]

temp1=random_lev_number[match(setdiff(random,integer_group_names),random_lev_names)] #从随机效应名称中排除 integer_group_names
temp2=paste(rep(1,length(random)-length(temp1)),collapse = "+")

random_line=paste(c(length(random),temp1,temp2),collapse=" ")

}}



MODEL[i+2+Trait_n,1]=trait_fixed_random_line
MODEL[(i+2+Trait_n)+Trait_n,1]=random_line
MODEL[(i+2+Trait_n)+2*Trait_n,1]=covariate_line

}

#残差限定
if(is.null(residual_cov_trait)){
MODEL[(Trait_n+2+Trait_n)+2*Trait_n+1,1]="0"
}else{
if(length(residual_cov_trait)>Trait_n){stop("The number of residual covariance traits is larger than the number of analyse traits \n")}
residual_cov_trait_n=0

for(i in 1:length(residual_cov_trait)){

residual_cov_combination=residual_cov_trait[[i]]

if(!is.null(residual_cov_combination)){
residual_cov_trait_n=residual_cov_trait_n+1
trait1=residual_cov_combination[1]
trait2=residual_cov_combination[2]
MODEL[(Trait_n+2+Trait_n)+2*Trait_n+1+i,1]=paste0(match(trait1,target_trait_name)," ",match(trait2,target_trait_name))
}
}

MODEL[(Trait_n+2+Trait_n)+2*Trait_n+1,1]=residual_cov_trait_n
}


if(is.null(kinship_file)&!is.null(relationship_name)){

	
	kinship_file=paste0(relationship_path,"/",relationship_name)

}

#$VAR_STR
#定义 $VAR_STR 中的文件名称
if(is.null(relationship_name)&is.null(kinship_file)){
stop("Please provide relationship name!")
}else{

addtive_relationship_name=kinship_file[1]
if(analysis_model%in%c("GBLUP_AD")){dominance_relationship_name=kinship_file[2]}
if(analysis_model%in%c("SSBLUP_A")){SSBLUP_G_matrix_name=kinship_file[2]}
}

if(analysis_model=="PBLUP_A"){
VAR_STR[1,1]=paste0("$VAR_STR 1 PED ",ped_inbred_number," ASCII   ",addtive_relationship_name)
}else if (analysis_model=="PBLUP_AD"){
VAR_STR[1,1]=paste0("$VAR_STR 1 PED ",ped_inbred_number," ASCII   ",addtive_relationship_name)
VAR_STR[3,1]=paste0("$VAR_STR 2 COR ASCII     ",dominance_relationship_name)
}else if (analysis_model=="GBLUP_A"){
VAR_STR[1,1]=paste0("$VAR_STR 1 COR ASCII     ",addtive_relationship_name)
}else if (analysis_model=="GBLUP_AD"){
VAR_STR[1,1]=paste0("$VAR_STR 1 COR ASCII     ",addtive_relationship_name)
VAR_STR[3,1]=paste0("$VAR_STR 2 COR ASCII     ",dominance_relationship_name)
}else if (analysis_model=="SSBLUP_A"){

VAR_STR[1,1]=paste0("$VAR_STR 1 PGMIX 1 ASCII  ",addtive_relationship_name,"    ",
                      relationship_path,"/",IND_geno_file_name,"   ",
				   SSBLUP_G_matrix_name,"  ",
				   SSBLUP_omega,"  G-ADJUST")
}else if (analysis_model=="User_define"){

for(i in 1:length(relationship_name)){
VAR_STR[i,1]=paste0("$VAR_STR ",i," COR ASCII     ",kinship_file[i])
}}


#$SOLUTION
#if(dmu_module!="rjmc"){
SOLUTION[1,1]="$RESIDUALS ASCII"
SOLUTION[2,1]="$SOLUTION"
#}
#$PRIOR

if(include_social_effect==FALSE){  #social effect 不知道该如何构建PRIOR文件，

PRIOR[1,1]="$PRIOR"
if(!is.null(provided_prior_file_name)&!is.null(provided_prior_file_path)){
given_prior=fread(paste0(provided_prior_file_path,"/",provided_prior_file_name),data.table=F)
given_prior=as.matrix(given_prior)
PRIOR[2:(nrow(given_prior)+1),1:ncol(given_prior)]=given_prior
}else{

iteration_pos=1
for(i in 1:(length(random_lev_number)+1)){  #随机效应的数量 

	for(j in 1:sum(random_total_lev_number%in%i)){
	
		for(k in 1:j){
			iteration_pos=iteration_pos+1
			PRIOR[iteration_pos,1]=i
			PRIOR[iteration_pos,2]=j
			PRIOR[iteration_pos,3]=k
			if(j!=k){
			PRIOR[iteration_pos,4]=0.5
			}else {PRIOR[iteration_pos,4]=1}
	}}}
}	
}

#残差限定

for(i in 1:length(residual_cov_trait)){

residual_cov_combination=residual_cov_trait[[i]]
residual_number=residual_number

if(!is.null(residual_cov_combination)){

residual_cov_combination_pos=match(residual_cov_combination,target_trait_name)

trait1_pos=residual_cov_combination_pos[1]
trait2_pos=residual_cov_combination_pos[2]
PRIOR=PRIOR[!(PRIOR[,1]%in%residual_number&PRIOR[,2]%in%trait1_pos&PRIOR[,3]%in%trait2_pos),]
PRIOR=PRIOR[!(PRIOR[,1]%in%residual_number&PRIOR[,2]%in%trait2_pos&PRIOR[,3]%in%trait1_pos),]		
}}


#不显示 PRIOR结果， 因为针对随机回归模型，PRIOR结果存在些许问题，使用DMU默认值即可
#如果不是人为提供PRIOR文件的话，PRIOR设置为NULL
if(!is.null(provided_prior_file_name)&!is.null(provided_prior_file_path)){

}else{
PRIOR=NULL
}


# module_parameter

if(dmu_module=="dmuai"){

PARAMETER[1,1]="$DMUAI"
PARAMETER[2,1]=EM_iter
PARAMETER[3,1]=iteration_criteria
PARAMETER[4,1]="1.0d-6"
PARAMETER[5,1]="1"
PARAMETER[6,1]="0"

}else if(dmu_module=="dmu5"){
PARAMETER[1,1]="$DMU5"
PARAMETER[2,1]=paste0(dmu5_iter," 1e-9")
PARAMETER[3,1]="512"

}else if(dmu_module=="rjmc"){
PARAMETER[1,1]="$RJMC"
PARAMETER[2,1]="3 0"
PARAMETER[3,1]=paste(gibbs_seeds,collapse=" ")
PARAMETER[4,1]=paste(gibbs_n_burnin,gibbs_n_rounds,gibbs_n_gaps,collapse=" ")

if(!is.null(PRIOR)){
n_var=length(unique(PRIOR[,1]))
}else{
n_var=length(unique(random_lev))+1
}
PARAMETER[5:(5+n_var-1),1]=paste(1:n_var,6*length(target_trait_name))
}






#输出结果
#DIR=rbind(COMMENT,ANALYSE,DATA,VARIABLE,VAR_STR,MODEL,SOLUTION,PRIOR,PARAMETER)
#DIR=DIR[!DIR[,1]%in%"",]

COMMENT=COMMENT[!COMMENT[,1]%in%"",]
ANALYSE=ANALYSE[!ANALYSE[,1]%in%"",]
DATA=DATA[!DATA[,1]%in%"",]
VARIABLE=VARIABLE[!VARIABLE[,1]%in%"",]
VAR_STR=VAR_STR[!VAR_STR[,1]%in%"",]
MODEL=MODEL[!MODEL[,1]%in%"",]
SOLUTION=SOLUTION[!SOLUTION[,1]%in%"",]
PRIOR=PRIOR[!PRIOR[,1]%in%"",]
PARAMETER=PARAMETER[!PARAMETER[,1]%in%"",]

DIR=rbind(COMMENT,"",ANALYSE,"",DATA,"",VARIABLE,"",VAR_STR,"",MODEL,"",SOLUTION,"",PRIOR,"",PARAMETER)

setwd(output_DIR_path)
if(is.null(output_DIR_name)){output_DIR_name="Trait.DIR"}
write.table(DIR,output_DIR_name,quote=F,row.names=F,col.names=F,sep=" ")

return(list(DIR=union_dir(DIR),random_effect_name=random_effect_name,ai_name=list(lev=ai_random_total_lev_number,lev_name=ai_random_lev_names)))
}


union_dir<-function(data){
union_data=matrix(NA,nrow=nrow(data),ncol=1)
for(i in 1:nrow(data)){
union_data[i,1]=paste(data[i,],collapse=" ")
}
return(union_data)
}

#

generate_social_phe<-function(phe=NULL,
							  integer_n=3,
							  social_effect_group_name=NULL,
							  genetic_effect_name=NULL
							  ){

if(!social_effect_group_name%in%colnames(phe)){stop("Group name is not included in provided phe_col_names")}
if(!genetic_effect_name%in%colnames(phe)){stop("Genetic effect name is not included in provided phe_col_names")}
phe=as.data.frame(phe,stringsAsFactors=F)

phe_col_names=colnames(phe)
Variable_n=length(phe_col_names)
real_n=Variable_n-integer_n
Integer_col_names=phe_col_names[1:integer_n]
Real_col_names=phe_col_names[(integer_n+1):Variable_n]

res_Integer_col_names=setdiff(Integer_col_names,c(genetic_effect_name,social_effect_group_name)) #除遗传和group之外的整型变量名称
res_Real_col_names=Real_col_names #剩余的实型变量名称


#统计最大的group_size
group_size=as.data.frame(table(phe[,social_effect_group_name]),stringsAsFactors=F)
group_size[,1]=as.numeric(group_size[,1])
max_size=max(group_size[,2])-1
colnames(group_size)=c("Group","Size")

if(max_size==0){stop("The number of indivuals in each group is equal to 1, please check your group record!!!")}


#构建group_ind_set和group的对应列表
group_ind_set <- vector(mode = "list", length = nrow(group_size))
group_i_set=as.numeric(group_size$Group)
for(i in 1:nrow(group_size)){
group_ind_set[[i]]=phe[phe[,social_effect_group_name]%in%group_i_set[i],genetic_effect_name]
}


#group信息和 id名称所在的列
group_column=match(social_effect_group_name,colnames(phe))-1
id_column=match(genetic_effect_name,colnames(phe))-1


group_result=get_rest_id(ref=group_ind_set,ref_index=group_i_set,
                 phe=as.matrix(phe),column_n=max_size,
			  group_column=group_column ,id_column=id_column)

integer_group=group_result$integer_group
real_group=group_result$real_group
rm(group_result);gc();

integer_group_names=paste0("Gr_id",1:ncol(integer_group))
real_group_names=paste0("Status_Gr_id",1:ncol(real_group))

colnames(integer_group)=integer_group_names
colnames(real_group)=real_group_names

phe=cbind(phe,integer_group,real_group)


#按整型和实型将表型数据的列重新排列
phe=phe[,c(genetic_effect_name,res_Integer_col_names,social_effect_group_name,integer_group_names,
            res_Real_col_names,real_group_names)]

return(list(phe=phe,integer_n=integer_n+length(integer_group_names),
			integer_group_names=integer_group_names,real_group_names=real_group_names))			  
}