misc/phyloscan.Rakai.gender.R

RakaiFull.gender.171122.propfemalepos.communities.merged<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)	
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	rtpdm[, COUPLE2:= factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('no couple','couple'))]
	rtpdm[, SAMEHH:= factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('same hh','different hh'))]	
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/community_hivprev_byGenderStudyVisit.rda"
	load(infile)
	df	<- as.data.table(melt(female.negative, varnames=c('COMM_NUM', 'VISIT'), value.name='FEMALE_NEG'))	
	tmp	<- as.data.table(melt(male.negative, varnames=c('COMM_NUM', 'VISIT'), value.name='MALE_NEG'))	
	df	<- merge(df, tmp, by=c('COMM_NUM', 'VISIT'), all=TRUE)	
	tmp	<- as.data.table(melt(female.positive, varnames=c('COMM_NUM', 'VISIT'), value.name='FEMALE_POS'))
	df	<- merge(df, tmp, by=c('COMM_NUM', 'VISIT'), all=TRUE)
	tmp	<- as.data.table(melt(male.positive, varnames=c('COMM_NUM', 'VISIT'), value.name='MALE_POS'))
	df	<- merge(df, tmp, by=c('COMM_NUM', 'VISIT'), all=TRUE)
	set(df, NULL, 'COMM_NUM', df[, gsub('^107$|^16$','16m',gsub('^776$|^51$','51m',gsub('^4$|^24$','24m',gsub('^1$|^22$','22m',as.character(COMM_NUM)))))])
	df	<- subset(df, VISIT%in%c(15,15.1,16))
	
	#	select relevant communities
	tmp	<- sort(unique(c(as.character(rtpdm$MALE_COMM_NUM), as.character(rtpdm$MALE_COMM_NUM))))
	tmp	<- data.table(COMM_NUM=tmp)
	df	<- merge(df, tmp, by=c('COMM_NUM'))
	df[, FEMALE:= FEMALE_NEG+FEMALE_POS]
	df[, MALE:= MALE_NEG+MALE_POS]
	df[, POS:= MALE_POS+FEMALE_POS]
	df[, NEG:= MALE_NEG+FEMALE_NEG]
	df[, FEMALE_POS_P:= FEMALE_POS/POS]
	df	<- subset(df, MALE!=0 | FEMALE!=0)
	
	#	divide into groups by proportion of seropos who are female
	tmp	<- df[, list(FEMALE_POS_PM=mean(FEMALE_POS_P)), by='COMM_NUM']
	setkey(tmp, FEMALE_POS_PM)
	tmp[, FEMALE_POS_C:= cut(FEMALE_POS_PM, breaks=c(0.5, 0.55, 0.56, 0.58, 0.68, 0.69,  1), labels=c('a','b','c','d','e','f'))]
	
	ggplot(tmp, aes(x=FEMALE_POS_PM, fill=FEMALE_POS_C)) + geom_histogram()
	df	<- merge(df, subset(tmp, select=c(COMM_NUM, FEMALE_POS_C)), by='COMM_NUM')
	
	#	sum POS and FEMALE_POS by community groups	
	dg	<- df[, list(FEMALE_POS=sum(FEMALE_POS), POS=sum(POS)), by='FEMALE_POS_C']
	
	#	add community groups to rtpdm
	tmp	<- unique(subset(df, select=c(COMM_NUM,FEMALE_POS_C)))
	setnames(tmp, 'COMM_NUM', 'MALE_COMM_NUM')
	tmp	<- merge(subset(rtpdm, MALE_COMM_TYPE==FEMALE_COMM_TYPE), tmp, by='MALE_COMM_NUM')
	tmp	<- tmp[, {
				z<- as.numeric(binconf(length(which(grepl('mf',SELECT))), length(SELECT)))
				list(	MF_TRM	= length(which(grepl('mf',SELECT))),
						MF_TRM_RATIO	= length(which(grepl('mf',SELECT)))/length(which(grepl('fm',SELECT))),
						LMF_TRM_RATIO	= log(length(which(grepl('mf',SELECT)))/length(which(grepl('fm',SELECT)))),
						TRM		= length(SELECT),
						MF_MED	= z[1],
						MF_CL	= z[2],
						MF_CU	= z[3])
			}, by='FEMALE_POS_C']
	dg	<- merge(dg, tmp, by='FEMALE_POS_C')
	tmp	<- dg[, {
				z<- binconf(FEMALE_POS,POS)
				list(FEMALE_POS_M=z[1], FEMALE_POS_CL=z[2], FEMALE_POS_CU=z[3])
			}, by='FEMALE_POS_C']
	dg	<- merge(dg, tmp, by='FEMALE_POS_C')	
	

	mhi.4 	<- map2stan(
			alist(
					MF_TRM ~ dbinom(TRM, ptm),
					logit(ptm) <- a + b*logit(pdiagf[i]),
					FEMALE_POS ~ dbinom(POS, pdiagf),									 
					a ~ dnorm(0,100),
					b ~ dnorm(0,10),
					pdiagf ~ dnorm(0.5,1)
			),
			data=as.data.frame(dg), start=list(a=0, b=0, pdiagf=rep(0.5,6)),
			warmup=5e2, iter=5e3, chains=1, cores=4
	)	
	summary(mhi.4)	# b posterior median nearly 1; credibility intervals wide but this is good
	post	<- extract.samples(mhi.4)
	dummy	<- function(z) quantile(logistic(with(post, a+b*z)), prob=c(0.5, 0.025, 0.975))
	tmp		<- data.table(FEMALE_POS_P=seq(0.5,0.75,0.001))
	tmp		<- tmp[, 	{
				z<- dummy(logit(FEMALE_POS_P))
				list('MF_median'=z[1], 'MF_cl'=z[2], 'MF_cu'=z[3])
			}, by='FEMALE_POS_P']	
	ggplot(dg) +
			geom_ribbon(data=tmp, aes(x=FEMALE_POS_P, ymin=MF_cl, ymax=MF_cu), fill='black', alpha=0.25) +
			geom_line(data=tmp, aes(x=FEMALE_POS_P, y=MF_median), colour='grey50', size=1) +
			geom_abline(intercept=0, slope=1, lty=2) +
			geom_point(aes(x=FEMALE_POS_M, y=MF_MED, colour=FEMALE_POS_C), size=2) +
			geom_errorbar(aes(x=FEMALE_POS_M, ymin=MF_CL, ymax=MF_CU, colour=FEMALE_POS_C), width=0.03, size=0.9) +
			geom_errorbarh(aes(x=FEMALE_POS_M, y=MF_MED, xmin=FEMALE_POS_CL, xmax=FEMALE_POS_CU, colour=FEMALE_POS_C), height=0.05, size=0.9) +
			scale_x_continuous(labels=scales:::percent, breaks=seq(0,1,0.05)) +
			scale_y_continuous(labels=scales:::percent, breaks=seq(0,1,0.1)) +
			scale_colour_brewer(palette='Set2') +
			theme_bw() +
			labs(	x='\nproportion of females among newly diagnosed', 
					y='male to female transmission\namong phylogenetically inferred transmission events\n',
					colour='community group')
	ggsave(file=paste0(outfile.base,'_trmMF_vs_diagFM_by_commgroup.pdf'), w=6, h=4.5)
}

RakaiFull.gender.171122.propfemalepos.communitytype<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)	
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	rtpdm[, COUPLE2:= factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('no couple','couple'))]
	rtpdm[, SAMEHH:= factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('same hh','different hh'))]	
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/community_hivprev_byGenderStudyVisit.rda"
	load(infile)
	df	<- as.data.table(melt(female.negative, varnames=c('COMM_NUM', 'VISIT'), value.name='FEMALE_NEG'))	
	tmp	<- as.data.table(melt(male.negative, varnames=c('COMM_NUM', 'VISIT'), value.name='MALE_NEG'))	
	df	<- merge(df, tmp, by=c('COMM_NUM', 'VISIT'), all=TRUE)	
	tmp	<- as.data.table(melt(female.positive, varnames=c('COMM_NUM', 'VISIT'), value.name='FEMALE_POS'))
	df	<- merge(df, tmp, by=c('COMM_NUM', 'VISIT'), all=TRUE)
	tmp	<- as.data.table(melt(male.positive, varnames=c('COMM_NUM', 'VISIT'), value.name='MALE_POS'))
	df	<- merge(df, tmp, by=c('COMM_NUM', 'VISIT'), all=TRUE)
	set(df, NULL, 'COMM_NUM', df[, gsub('^107$|^16$','16m',gsub('^776$|^51$','51m',gsub('^4$|^24$','24m',gsub('^1$|^22$','22m',as.character(COMM_NUM)))))])
	df	<- subset(df, VISIT%in%c(15,15.1,16))
	
	tmp	<- sort(unique(c(as.character(rtpdm$MALE_COMM_NUM), as.character(rtpdm$MALE_COMM_NUM))))
	tmp	<- data.table(COMM_NUM=tmp)
	df	<- merge(df, tmp, by=c('COMM_NUM'))
	df	<- df[, list(FEMALE_NEG=sum(FEMALE_NEG), MALE_NEG=sum(MALE_NEG), FEMALE_POS=sum(FEMALE_POS), MALE_POS=sum(MALE_POS)), by='COMM_NUM']
	
	tmp	<- unique(subset(rtpdm, select=c(MALE_COMM_NUM,MALE_COMM_TYPE)))
	setnames(tmp, c('MALE_COMM_NUM','MALE_COMM_TYPE'), c('COMM_NUM','COMM_TYPE'))
	df	<- merge(df, tmp, by='COMM_NUM')
	dg	<- df[, list(FEMALE_NEG=sum(FEMALE_NEG), MALE_NEG=sum(MALE_NEG), FEMALE_POS=sum(FEMALE_POS), MALE_POS=sum(MALE_POS)), by='COMM_TYPE']
	
	dg[, MALE_PROP_DIAG:= MALE_POS/(MALE_NEG+MALE_POS)]
	dg[, FEMALE_PROP_DIAG:= FEMALE_POS/(FEMALE_NEG+FEMALE_POS)]
	dg[, FM_DIAG_RATIO:=FEMALE_PROP_DIAG/MALE_PROP_DIAG]
	dg[, LFM_DIAG_RATIO:=log(FM_DIAG_RATIO)]
	dg[, FEMALE:= FEMALE_NEG+FEMALE_POS]
	dg[, MALE:= MALE_NEG+MALE_POS]
	dg[, POS:= MALE_POS+FEMALE_POS]
	dg[, NEG:= MALE_NEG+FEMALE_NEG]
	tmp	<- dg[, {
				z<- binconf(FEMALE_POS,POS)
				list(FM_DIAG_PROP_M=z[1], FM_DIAG_PROP_CL=z[2], FM_DIAG_PROP_CU=z[3])
			}, by='COMM_TYPE']
	dg	<- merge(dg, tmp, by='COMM_TYPE')	
	dg[, binconf(sum(FEMALE_POS), sum(POS))]				#	0.6008119 0.5901207 0.6114079
	dg[, exp(logit(binconf(sum(FEMALE_POS), sum(POS))))]	#	1.505085  1.439742  1.573393
	
	tmp	<- subset(rtpdm, MALE_COMM_TYPE==FEMALE_COMM_TYPE)
	tmp	<- tmp[, {
				z<- as.numeric(binconf(length(which(grepl('mf',SELECT))), length(SELECT)))
				list(	MF_TRM	= length(which(grepl('mf',SELECT))),
						MF_TRM_RATIO	= length(which(grepl('mf',SELECT)))/length(which(grepl('fm',SELECT))),
						LMF_TRM_RATIO	= log(length(which(grepl('mf',SELECT)))/length(which(grepl('fm',SELECT)))),
						TRM		= length(SELECT),
						MF_MED	= z[1],
						MF_CL	= z[2],
						MF_CU	= z[3])
			}, by='MALE_COMM_TYPE']
	setnames(tmp, 'MALE_COMM_TYPE', 'COMM_TYPE')
	dg	<- merge(dg, tmp, by='COMM_TYPE')
	dg[, COMM_TYPE_2:= as.integer(factor(COMM_TYPE))]
	
	ggplot(dg, aes(x=FM_DIAG_RATIO, y=MF_MED, ymin=MF_CL, ymax=MF_CU)) +
			geom_point() +
			geom_errorbar() +
			theme_bw()
	
	mh.2 	<- map2stan(
			alist(
					MF_TRM ~ dbinom(TRM, ptm),
					logit(ptm) <- a + b*pdiagf[i],
					FEMALE_POS ~ dbinom(POS, pdiagf),									 
					a ~ dnorm(0,100),
					b ~ dnorm(0,10),
					#pdiagf ~ dbeta(1,1),
					pdiagf ~ dnorm(0.5,1)
			),
			data=as.data.frame(dg), start=list(a=0, b=0, pdiagf=rep(0.5,3)),
			warmup=5e2, iter=5e3, chains=1, cores=4
	)	
	mh.3 	<- map2stan(
			alist(
					MF_TRM ~ dbinom(TRM, ptm),
					logit(ptm) <- a + b*logit(pdiagf[i]),
					FEMALE_POS ~ dbinom(POS, pdiagf),									 
					a ~ dnorm(0,100),
					b ~ dnorm(0,10),
					pdiagf ~ dnorm(0.5,1)
			),
			data=as.data.frame(dg), start=list(a=0, b=0, pdiagf=rep(0.5,3)),
			warmup=5e2, iter=5e3, chains=1, cores=4
	)
	
	summary(mh.3)	# b posterior median nearly 1; credibility intervals wide but this is good
	post	<- extract.samples(mh.3)
	dummy	<- function(z) quantile(logistic(with(post, a+b*z)), prob=c(0.5, 0.025, 0.975))
	tmp		<- data.table(FM_DIAG_PROP=seq(0.5,0.75,0.001))
	tmp		<- tmp[, {
				z<- dummy(logit(FM_DIAG_PROP))
				list('MF_median'=z[1], 'MF_cl'=z[2], 'MF_cu'=z[3])
			}, by='FM_DIAG_PROP']
	
	ggplot(dg) +
			geom_ribbon(data=tmp, aes(x=FM_DIAG_PROP, ymin=MF_cl, ymax=MF_cu), fill='black', alpha=0.25) +
			geom_line(data=tmp, aes(x=FM_DIAG_PROP, y=MF_median), colour='grey50', size=1) +
			geom_abline(intercept=0, slope=1, lty=2) +
			geom_point(aes(x=FM_DIAG_PROP_M, y=MF_MED, colour=COMM_TYPE), size=2) +
			geom_errorbar(aes(x=FM_DIAG_PROP_M, ymin=MF_CL, ymax=MF_CU, colour=COMM_TYPE), width=0.03, size=0.9) +
			geom_errorbarh(aes(x=FM_DIAG_PROP_M, y=MF_MED, xmin=FM_DIAG_PROP_CL, xmax=FM_DIAG_PROP_CU, colour=COMM_TYPE), height=0.05, size=0.9) +
			scale_x_continuous(labels=scales:::percent, breaks=seq(0,1,0.05)) +
			scale_y_continuous(labels=scales:::percent, breaks=seq(0,1,0.1)) +
			scale_colour_brewer(palette='Set2') +
			theme_bw() +
			labs(	x='\nproportion of females among newly diagnosed', 
					y='male to female transmission\namong phylogenetically inferred transmission events\n',
					colour='community type')
	ggsave(file=paste0(outfile.base,'_trmMF_vs_diagFM_by_commtype.pdf'), w=6, h=4.5)
}

RakaiFull.gender.171122.hh.are.femalerecipients.transmitters<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)
	
	#zm		<- get_googlemap(center="rakai district uganda", zoom=10, maptype="hybrid")
	#zc		<- as.data.table(read.csv('~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/PANGEA_Rakai_community_anonymized_IDs.csv', stringsAsFactors=FALSE))
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	rtpdm[, COUPLE2:= factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('no couple','couple'))]
	rtpdm[, SAMEHH:= factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('same hh','different hh'))]	
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	
	dfr	<- subset(rtpdm, grepl('mf',SELECT), select=c(FEMALE_RID, SAMEHH))
	setnames(dfr, 'SAMEHH', 'SAMEHH_OF_FEMALE_RECIPIENT')
	tmp	<- subset(rtpdm, grepl('fm',SELECT))
	dfr	<- merge(dfr, tmp, by='FEMALE_RID', all.x=1)
	tmp	<- dfr[, list(N_TRMS= length(which(!is.na(MALE_RID))) ), by=c('FEMALE_RID','SAMEHH_OF_FEMALE_RECIPIENT')]
	tmp[, table(SAMEHH_OF_FEMALE_RECIPIENT, N_TRMS>=1)]
	
	fisher.test( matrix(c(6,1,83,83),2,2) )
	#	p-value = 0.1185
	#	alternative hypothesis: true odds ratio is not equal to 1
	#95 percent confidence interval:
   	#	0.6984958 278.8704511
	#	sample estimates:
	#odds ratio 
  	#	5.949555 
}

RakaiFull.gender.171122.hh.sources.of.transmitters<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)
	
	#zm		<- get_googlemap(center="rakai district uganda", zoom=10, maptype="hybrid")
	#zc		<- as.data.table(read.csv('~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/PANGEA_Rakai_community_anonymized_IDs.csv', stringsAsFactors=FALSE))
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	rtpdm[, COUPLE2:= factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('no couple','couple'))]
	rtpdm[, SAMEHH:= factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('same hh','different hh'))]	
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	
	dft	<- subset(rtpdm, grepl('mf',SELECT), select=c(MALE_RID, SAMEHH))
	setnames(dft, 'SAMEHH', 'SAMEHH_OF_MALE_TRANSMITTER')
	tmp	<- subset(rtpdm, grepl('fm',SELECT))
	dft	<- merge(dft, tmp, by='MALE_RID', all.x=1)
	tmp	<- dft[, list(N_TRMS= length(which(!is.na(FEMALE_RID))) ), by=c('MALE_RID','SAMEHH_OF_MALE_TRANSMITTER')]
	tmp[, table(SAMEHH_OF_MALE_TRANSMITTER, N_TRMS>=1)]
	
	fisher.test( matrix(c(6,1,83,83),2,2) )
	#	p-value = 0.1185
	#	alternative hypothesis: true odds ratio is not equal to 1
	#95 percent confidence interval:
	#	0.6984958 278.8704511
	#	sample estimates:
	#odds ratio 
	#	5.949555 
}

RakaiFull.gender.171122.hh.multivariatemodelswithprevalence.stan.with.threshold<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)
	
	#zm		<- get_googlemap(center="rakai district uganda", zoom=10, maptype="hybrid")
	#zc		<- as.data.table(read.csv('~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/PANGEA_Rakai_community_anonymized_IDs.csv', stringsAsFactors=FALSE))
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	
	#rtpdm[, table(PAIR_COMM_TYPE)]
	rtpdm[, COUPLE2:= factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('no couple','couple'))]
	rtpdm[, SAMEHH:= factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('same hh','different hh'))]	
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	
	
	df		<- subset(rtpdm, FEMALE_HH_NUM==MALE_HH_NUM, select=c(	MALE_RID, FEMALE_RID, PTY_RUN, IDCLU, LINK_MF, POSTERIOR_SCORE_MF, COUPLE2, SAMEHH, PAIR_COMM_TYPE, FEMALE_COMM_NUM,
					MALE_RECENTVL, MALE_SEXP1YR, MALE_SEXP1OUT2, MALE_OCCUP_OLLI, MALE_OCAT, MALE_EDUCAT, MALE_CIRCUM,
					FEMALE_RECENTVL, FEMALE_SEXP1YR, FEMALE_SEXP1OUT2, FEMALE_OCCUP_OLLI, FEMALE_OCAT, FEMALE_EDUCAT 
			))
	#	missing data: fill in
	set(df, df[, which(is.na(MALE_RECENTVL))], 'MALE_RECENTVL', -1)
	set(df, df[, which(is.na(FEMALE_RECENTVL))], 'FEMALE_RECENTVL', -1)
	set(df, df[, which(is.na(MALE_CIRCUM))], 'MALE_CIRCUM', 'Unknown')
	set(df, df[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(df, df[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	#	add estimated HIV prevalences (medians)
	infile	<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30trmMF_estimated_prevalenceratio.rda"	
	load(infile)
	tmp		<- dcast.data.table(dgg, COMM_NUM~variable, value.var='M')
	setnames(tmp, 'COMM_NUM', 'FEMALE_COMM_NUM')
	df		<- merge(df, tmp, by='FEMALE_COMM_NUM')
	
	#for(x in colnames(df)) print( c(x, any(is.na(df[[x]]))) )
	#	prepare data for STAN
	df[, COMM_FISH:= as.integer(PAIR_COMM_TYPE=='fisherfolk')]
	df[, COMM_AGR:= as.integer(PAIR_COMM_TYPE=='agrarian')]
	df[, COMM_TRAD:= as.integer(PAIR_COMM_TYPE=='trading')]
	df[, COMM_MXD:= as.integer(PAIR_COMM_TYPE=='mixed')]
	df[, FE_NOEDU:= as.integer(FEMALE_EDUCAT=='None')]
	df[, FE_NOEDU_MISS:= as.integer(FEMALE_EDUCAT=='Unknown')]
	df[, MA_NOEDU:= as.integer(MALE_EDUCAT=='None')]
	df[, MA_NOEDU_MISS:= as.integer(MALE_EDUCAT=='Unknown')]	
	df[, MA_CIRCUM:= as.integer(MALE_CIRCUM=='Y')]
	df[, MA_CIRCUM_MISS:= as.integer(FEMALE_EDUCAT=='Unknown')]
	df[, FE_SEXP1YR_G1:= as.integer(FEMALE_SEXP1YR!='1')]
	df[, MA_SEXP1YR_G1:= as.integer(MALE_SEXP1YR!='1')]
	df[, MA_OC_AGRO:= as.integer(MALE_OCAT=='Agro/House')]	
	df[, MA_OC_FISH:= as.integer(MALE_OCAT=='Fishing')]
	df[, MA_OC_TRAD:= as.integer(MALE_OCAT=='Trading/Shop keeper')]
	df[, MA_OC_OTH:= as.integer(MALE_OCAT%in%c('zother','Bar/waitress','Student','Boda/Trucking'))]	
	df[, FE_OC_AGRO:= as.integer(FEMALE_OCAT=='Agro/House')]
	df[, FE_OC_BAR:= as.integer(FEMALE_OCAT=='Bar/waitress')]			
	df[, FE_OC_TRAD:= as.integer(FEMALE_OCAT=='Trading/Shop keeper')]
	df[, FE_OC_OTH:= as.integer(FEMALE_OCAT%in%c('zother','Student'))]		
	df[, PF2:= PF-mean(PF)]
	df[, PM2:= PM-mean(PM)]
	df[, RFM2:= RFM-mean(RFM)]
	#
	#	STAN
	#
	
	#	same household definitely stronger effect than couples	
	ms.1 <- map2stan(
			alist(
					LINK_MF ~ dbinom(1, pmf),
					logit(pmf) <- base + 
							# comm_fish*COMM_FISH is base 
							comm_agr*COMM_AGR + comm_trad*COMM_TRAD + comm_mxd*COMM_MXD +
							noedu_f*FE_NOEDU + noedu_m*MA_NOEDU +
							sexp_f*FE_SEXP1YR_G1 + sexp_m*MA_SEXP1YR_G1 +
							circum_m*MA_CIRCUM +								
							prev_m*PM2 + prev_f*PF2 + prev_ratio_fm*RFM2 +
							# agro occupation is baseline
							mocc_other*MA_OC_OTH + mocc_fish*MA_OC_FISH + mocc_trad*MA_OC_TRAD +										
							focc_other*FE_OC_OTH + focc_bar*FE_OC_BAR +  focc_trad*FE_OC_TRAD,
					base ~ dnorm(0,100),								
					c(comm_agr, comm_trad, comm_mxd) ~ dnorm(0,10),
					c(noedu_f, noedu_m) ~ dnorm(0,10),								
					c(sexp_f, sexp_m, circum_m) ~ dnorm(0,10),
					c(prev_m, prev_f, prev_ratio_fm) ~ dnorm(0,10),
					c(mocc_other, mocc_fish, mocc_trad) ~ dnorm(0,10),
					c(focc_other, focc_bar, focc_trad) ~ dnorm(0,10)
			),
			data=as.data.frame(df), 
			start=list(	base=0, comm_agr=0, comm_trad=0, comm_mxd=0, noedu_f=0, noedu_m=0, sexp_f=0, sexp_m=0, circum_m=0,
					prev_m=0, prev_f=0, prev_ratio_fm=0,
					focc_other=0, focc_bar=0, focc_trad=0,
					mocc_other=0, mocc_fish=0, mocc_trad=0),			
			warmup=1e3, iter=5e3, chains=1, cores=4
	)		
	#plot(precis(ms.1, prob=0.95))
	#pairs(ms.1)
	post	<- extract.samples(ms.1)
	dp		<- data.table(	MC= seq_along(post$base),
			PI_COMM_FISH=logistic( post$base ),				
			PI_COMM_AGRO=logistic( post$base+post$comm_agr ), 											
			PI_COMM_TRAD=logistic( post$base+post$comm_trad ),							
			PI_COMM_MXD=logistic( post$base+post$comm_mxd ),
			PI_FEDU_NONE=logistic( post$base+post$noedu_f ),
			PI_MEDU_NONE=logistic( post$base+post$noedu_m ),							
			PI_FESEXP1YR_G1=logistic( post$base+post$sexp_f ),
			PI_MASEXP1YR_G1=logistic( post$base+post$sexp_m ),							
			PI_CIRC_YES=logistic( post$base+post$circum_m ),							
			PI_MOCC_OTH=logistic( post$base+post$mocc_other ),
			PI_MOCC_FISH=logistic( post$base+post$mocc_fish ),
			PI_MOCC_TRAD=logistic( post$base+post$mocc_trad ),
			PI_FOCC_OTH=logistic( post$base+post$focc_other ),
			PI_FOCC_BAR=logistic( post$base+post$focc_bar ),
			PI_FOCC_TRAD=logistic( post$base+post$focc_trad ),			
			PI_PREV_MALE=logistic( post$base+post$prev_m ),
			PI_PREV_FEMALE=logistic( post$base+post$prev_f ),
			PI_PREV_RATIOFM=logistic( post$base+post$prev_ratio_fm ),			
			OR_COMM_FISH=exp( post$base ),				
			OR_COMM_AGRO=exp( post$base+post$comm_agr ), 											
			OR_COMM_TRAD=exp( post$base+post$comm_trad ),							
			OR_COMM_MXD=exp( post$base+post$comm_mxd ),
			OR_FEDU_NONE=exp( post$base+post$noedu_f ),
			OR_MEDU_NONE=exp( post$base+post$noedu_m ),							
			OR_FESEXP1YR_G1=exp( post$base+post$sexp_f ),
			OR_MASEXP1YR_G1=exp( post$base+post$sexp_m ),
			OR_CIRC_YES=exp( post$base+post$circum_m ),							
			OR_MOCC_OTH=exp( post$base+post$mocc_other ),
			OR_MOCC_FISH=exp( post$base+post$mocc_fish ),
			OR_MOCC_TRAD=exp( post$base+post$mocc_trad ),
			OR_FOCC_OTH=exp( post$base+post$focc_other ),
			OR_FOCC_BAR=exp( post$base+post$focc_bar ),
			OR_FOCC_TRAD=exp( post$base+post$focc_trad ),			
			OR_PREV_MALE=exp( post$base+post$prev_m ),
			OR_PREV_FEMALE=exp( post$base+post$prev_f ),
			OR_PREV_RATIOFM=exp( post$base+post$prev_ratio_fm ),
			ORX_COMM_AGRO=exp( post$comm_agr ), 											
			ORX_COMM_TRAD=exp( post$comm_trad ),							
			ORX_COMM_MXD=exp( post$comm_mxd ),
			ORX_FEDU_NONE=exp( post$noedu_f ),
			ORX_MEDU_NONE=exp( post$noedu_m ),
			ORX_FESEXP1YR_G1=exp( post$sexp_f ),
			ORX_MASEXP1YR_G1=exp( post$sexp_m ),							
			ORX_CIRC_YES=exp( post$circum_m ),							
			ORX_MOCC_OTH=exp( post$mocc_other ),
			ORX_MOCC_FISH=exp( post$mocc_fish ),
			ORX_MOCC_TRAD=exp( post$mocc_trad ),
			ORX_FOCC_OTH=exp( post$focc_other ),
			ORX_FOCC_BAR=exp( post$focc_bar ),
			ORX_FOCC_TRAD=exp( post$focc_trad ),
			ORX_PREV_MALE=exp( post$prev_m ),
			ORX_PREV_FEMALE=exp( post$prev_f ),
			ORX_PREV_RATIOFM=exp( post$prev_ratio_fm )
	)
	dp		<- melt(dp, id.vars='MC')	
	dss.1	<- dp[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
					V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), by='variable']
	dss.1	<- dcast.data.table(dss.1, variable~STAT, value.var='V')
	dss.1[, MODEL:= 'multivariate ms3']
	ds		<- copy(dss.1)
	ds[, LABEL:= paste0(round(MED, d=2), '\n[', round(CL, d=2),'-', round( CU, d=2),']')]
	ds[, STAT:= factor(gsub('^([^_]+)_.*','\\1',variable), levels=c('PI','OR','ORX'), labels=c('proportion MF','odds MF','odds ratio'))]
	ds[, FACTOR:=gsub('^([^_]+)_([^_]+)_([^_]+)','\\3',variable)]
	ds[, MOFA:=gsub('^([^_]+)_([^_]+)_([^_]+)','\\2-\\3',variable)]
	ds[, MODELTYPE:= factor(grepl('univariate',MODEL), levels=c(TRUE,FALSE),labels=c('univariate','multivariate'))]	
	ds		<- subset(ds, STAT=='odds ratio')
	set(ds, NULL, 'MOFA2', ds[, factor(MOFA, levels=rev(c(
											"COMM-AGRO","COMM-TRAD","COMM-MXD",																						
											"FEDU-NONE", "MEDU-NONE",      
											"FESEXP1YR-G1","MASEXP1YR-G1", 
											"CIRC-YES",                   
											"FOCC-BAR","FOCC-TRAD","FOCC-OTH",
											"MOCC-FISH","MOCC-OTH","MOCC-TRAD"   
									)), labels=rev(c("Agrarian community vs. fishing community", "Trading community vs. fishing community", "Mixed vs. fishing community",
											"Female education: None vs. at least primary education", "Male education: None vs. at least primary education",
											"Female sex partners in last year: >1 vs. 1", "Male sex partners in last year: >1 vs. 1",
											"Male circumcised: yes vs. no",
											"Female occupation: bar/waitress vs. agricultural/house", "Female occupation: trading/shopkeeper vs. agricultural/house","Female occupation: other vs. agricultural/house", 
											"Male occupation: fishing vs. agricultural/house", "Male occupation: other vs. agricultural/house", "Male occupation: trading/shopkeeper vs. agricultural/house"
									)))])
	setkey(ds, MOFA)	
	dp		<- subset(ds, !MOFA%in%c('COMM-TRAD','COMM-MXD'))
	dp[, FILL:= '0']
	set(dp, dp[, which(IL>1 | IU<1)], 'FILL', '1')
	set(dp, dp[, which(CL>1 | CU<1)], 'FILL', '2')
	ggplot(dp, aes(x=MOFA2)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/10, 1/4,1/3,1/2,1,2,3,4,10), labels=c('1/10','1/4','1/3','1/2','1','2','3','4','10')) +
			scale_fill_manual(values=c('0'='white', '1'='orange', '2'='red')) +
			coord_flip() +
			guides(fill=FALSE) +
			labs(x='Partner characteristics\n', y='\nOdds ratio in male to female transmission')
	ggsave(file=paste0(outfile.base,'_propmf_factors_univariate_samehh.pdf'), w=12, h=7)		
}

RakaiFull.gender.171122.hh.multivariatemodels.stan.with.threshold<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)
	
	#zm		<- get_googlemap(center="rakai district uganda", zoom=10, maptype="hybrid")
	#zc		<- as.data.table(read.csv('~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/PANGEA_Rakai_community_anonymized_IDs.csv', stringsAsFactors=FALSE))
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	
	#rtpdm[, table(PAIR_COMM_TYPE)]
	rtpdm[, COUPLE2:= factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('no couple','couple'))]
	rtpdm[, SAMEHH:= factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('same hh','different hh'))]	
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	
	
	df		<- subset(rtpdm, FEMALE_HH_NUM==MALE_HH_NUM, select=c(	MALE_RID, FEMALE_RID, PTY_RUN, IDCLU, LINK_MF, POSTERIOR_SCORE_MF, COUPLE2, SAMEHH, PAIR_COMM_TYPE, FEMALE_COMM_NUM,
					MALE_RECENTVL, MALE_SEXP1YR, MALE_SEXP1OUT2, MALE_OCCUP_OLLI, MALE_OCAT, MALE_EDUCAT, MALE_CIRCUM,
					FEMALE_RECENTVL, FEMALE_SEXP1YR, FEMALE_SEXP1OUT2, FEMALE_OCCUP_OLLI, FEMALE_OCAT, FEMALE_EDUCAT 
			))
	#	missing data: fill in
	set(df, df[, which(is.na(MALE_RECENTVL))], 'MALE_RECENTVL', -1)
	set(df, df[, which(is.na(FEMALE_RECENTVL))], 'FEMALE_RECENTVL', -1)
	set(df, df[, which(is.na(MALE_CIRCUM))], 'MALE_CIRCUM', 'Unknown')
	set(df, df[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(df, df[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')	
	#for(x in colnames(df)) print( c(x, any(is.na(df[[x]]))) )
	#	add estimated HIV prevalences (medians)
	infile	<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30trmMF_estimated_prevalenceratio.rda"	
	load(infile)
	tmp		<- dcast.data.table(dgg, COMM_NUM~variable, value.var='M')
	setnames(tmp, 'COMM_NUM', 'FEMALE_COMM_NUM')
	df		<- merge(df, tmp, by='FEMALE_COMM_NUM')

	#	prepare data for STAN
	df[, COMM_FISH:= as.integer(PAIR_COMM_TYPE=='fisherfolk')]
	df[, COMM_AGR:= as.integer(PAIR_COMM_TYPE=='agrarian')]
	df[, COMM_TRAD:= as.integer(PAIR_COMM_TYPE=='trading')]
	df[, COMM_MXD:= as.integer(PAIR_COMM_TYPE=='mixed')]
	df[, FE_NOEDU:= as.integer(FEMALE_EDUCAT=='None')]
	df[, FE_NOEDU_MISS:= as.integer(FEMALE_EDUCAT=='Unknown')]
	df[, MA_NOEDU:= as.integer(MALE_EDUCAT=='None')]
	df[, MA_NOEDU_MISS:= as.integer(MALE_EDUCAT=='Unknown')]	
	df[, MA_CIRCUM:= as.integer(MALE_CIRCUM=='Y')]
	df[, MA_CIRCUM_MISS:= as.integer(FEMALE_EDUCAT=='Unknown')]
	df[, FE_SEXP1YR_G1:= as.integer(FEMALE_SEXP1YR!='1')]
	df[, MA_SEXP1YR_G1:= as.integer(MALE_SEXP1YR!='1')]
	df[, MA_OC_AGRO:= as.integer(MALE_OCAT=='Agro/House')]	
	df[, MA_OC_FISH:= as.integer(MALE_OCAT=='Fishing')]
	df[, MA_OC_TRAD:= as.integer(MALE_OCAT=='Trading/Shop keeper')]
	df[, MA_OC_OTH:= as.integer(MALE_OCAT%in%c('zother','Bar/waitress','Student','Boda/Trucking'))]	
	df[, FE_OC_AGRO:= as.integer(FEMALE_OCAT=='Agro/House')]
	df[, FE_OC_BAR:= as.integer(FEMALE_OCAT=='Bar/waitress')]			
	df[, FE_OC_TRAD:= as.integer(FEMALE_OCAT=='Trading/Shop keeper')]
	df[, FE_OC_OTH:= as.integer(FEMALE_OCAT%in%c('zother','Student'))]		
	df[, RFM_ABOVE_MEAN:=  as.integer( RFM>=subset(dgg, variable=='RFM')[, mean(M)] ) ]
	
	
	#
	#	STAN
	#
	
	#	same household definitely stronger effect than couples	
	ms.1 <- map2stan(
			alist(
					LINK_MF ~ dbinom(1, pmf),
					logit(pmf) <- base + 
							# comm_fish*COMM_FISH is base 
							comm_agr*COMM_AGR + comm_trad*COMM_TRAD + comm_mxd*COMM_MXD +
							noedu_f*FE_NOEDU + noedu_m*MA_NOEDU +
							sexp_f*FE_SEXP1YR_G1 + sexp_m*MA_SEXP1YR_G1 +
							circum_m*MA_CIRCUM +	
							prevratio_fm * RFM_ABOVE_MEAN +
							# agro occupation is baseline
							mocc_other*MA_OC_OTH + mocc_fish*MA_OC_FISH + mocc_trad*MA_OC_TRAD +										
							focc_other*FE_OC_OTH + focc_bar*FE_OC_BAR +  focc_trad*FE_OC_TRAD,
					base ~ dnorm(0,100),								
					c(comm_agr, comm_trad, comm_mxd) ~ dnorm(0,10),
					c(noedu_f, noedu_m) ~ dnorm(0,10),								
					c(sexp_f, sexp_m, circum_m, prevratio_fm) ~ dnorm(0,10),					
					c(mocc_other, mocc_fish, mocc_trad) ~ dnorm(0,10),
					c(focc_other, focc_bar, focc_trad) ~ dnorm(0,10)
			),
			data=as.data.frame(df), 
			start=list(	base=0, comm_agr=0, comm_trad=0, comm_mxd=0, noedu_f=0, noedu_m=0, sexp_f=0, sexp_m=0, circum_m=0,						
						prevratio_fm=0, focc_other=0, focc_bar=0, focc_trad=0, mocc_other=0, mocc_fish=0, mocc_trad=0),			
			warmup=1e3, iter=5e3, chains=1, cores=4
		)		
	#plot(precis(ms.1, prob=0.95))
	#pairs(ms.1)
	post	<- extract.samples(ms.1)
	dp		<- data.table(	MC= seq_along(post$base),
			PI_COMM_FISH=logistic( post$base ),				
			PI_COMM_AGRO=logistic( post$base+post$comm_agr ), 											
			PI_COMM_TRAD=logistic( post$base+post$comm_trad ),							
			PI_COMM_MXD=logistic( post$base+post$comm_mxd ),
			PI_FEDU_NONE=logistic( post$base+post$noedu_f ),
			PI_MEDU_NONE=logistic( post$base+post$noedu_m ),							
			PI_FESEXP1YR_G1=logistic( post$base+post$sexp_f ),
			PI_MASEXP1YR_G1=logistic( post$base+post$sexp_m ),							
			PI_CIRC_YES=logistic( post$base+post$circum_m ),							
			PI_MOCC_OTH=logistic( post$base+post$mocc_other ),
			PI_MOCC_FISH=logistic( post$base+post$mocc_fish ),
			PI_MOCC_TRAD=logistic( post$base+post$mocc_trad ),
			PI_FOCC_OTH=logistic( post$base+post$focc_other ),
			PI_FOCC_BAR=logistic( post$base+post$focc_bar ),
			PI_FOCC_TRAD=logistic( post$base+post$focc_trad ),
			PI_PREV_FMRATIO=logistic( post$base+post$prevratio_fm ),
			OR_COMM_FISH=exp( post$base ),				
			OR_COMM_AGRO=exp( post$base+post$comm_agr ), 											
			OR_COMM_TRAD=exp( post$base+post$comm_trad ),							
			OR_COMM_MXD=exp( post$base+post$comm_mxd ),
			OR_FEDU_NONE=exp( post$base+post$noedu_f ),
			OR_MEDU_NONE=exp( post$base+post$noedu_m ),							
			OR_FESEXP1YR_G1=exp( post$base+post$sexp_f ),
			OR_MASEXP1YR_G1=exp( post$base+post$sexp_m ),
			OR_CIRC_YES=exp( post$base+post$circum_m ),							
			OR_MOCC_OTH=exp( post$base+post$mocc_other ),
			OR_MOCC_FISH=exp( post$base+post$mocc_fish ),
			OR_MOCC_TRAD=exp( post$base+post$mocc_trad ),
			OR_FOCC_OTH=exp( post$base+post$focc_other ),
			OR_FOCC_BAR=exp( post$base+post$focc_bar ),
			OR_FOCC_TRAD=exp( post$base+post$focc_trad ),	
			OR_PREV_FMRATIO=exp( post$base+post$prevratio_fm ),
			ORX_COMM_AGRO=exp( post$comm_agr ), 											
			ORX_COMM_TRAD=exp( post$comm_trad ),							
			ORX_COMM_MXD=exp( post$comm_mxd ),
			ORX_FEDU_NONE=exp( post$noedu_f ),
			ORX_MEDU_NONE=exp( post$noedu_m ),
			ORX_FESEXP1YR_G1=exp( post$sexp_f ),
			ORX_MASEXP1YR_G1=exp( post$sexp_m ),							
			ORX_CIRC_YES=exp( post$circum_m ),							
			ORX_MOCC_OTH=exp( post$mocc_other ),
			ORX_MOCC_FISH=exp( post$mocc_fish ),
			ORX_MOCC_TRAD=exp( post$mocc_trad ),
			ORX_FOCC_OTH=exp( post$focc_other ),
			ORX_FOCC_BAR=exp( post$focc_bar ),
			ORX_FOCC_TRAD=exp( post$focc_trad ),
			ORX_PREV_FMRATIO=logistic( post$prevratio_fm )
	)
	dp		<- melt(dp, id.vars='MC')	
	dss.1	<- dp[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
					V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), by='variable']
	dss.1	<- dcast.data.table(dss.1, variable~STAT, value.var='V')
	dss.1[, MODEL:= 'multivariate ms2']
	ds		<- copy(dss.1)
	ds[, LABEL:= paste0(round(MED, d=2), '\n[', round(CL, d=2),'-', round( CU, d=2),']')]
	ds[, STAT:= factor(gsub('^([^_]+)_.*','\\1',variable), levels=c('PI','OR','ORX'), labels=c('proportion MF','odds MF','odds ratio'))]
	ds[, FACTOR:=gsub('^([^_]+)_([^_]+)_([^_]+)','\\3',variable)]
	ds[, MOFA:=gsub('^([^_]+)_([^_]+)_([^_]+)','\\2-\\3',variable)]
	ds[, MODELTYPE:= factor(grepl('univariate',MODEL), levels=c(TRUE,FALSE),labels=c('univariate','multivariate'))]	
	ds		<- subset(ds, STAT=='odds ratio')
	set(ds, NULL, 'MOFA2', ds[, factor(MOFA, levels=rev(c(
											"PREV-FMRATIO",
											"COMM-AGRO","COMM-TRAD","COMM-MXD",																						
											"FEDU-NONE", "MEDU-NONE",      
											"FESEXP1YR-G1","MASEXP1YR-G1", 
											"CIRC-YES",                   
											"FOCC-BAR","FOCC-TRAD","FOCC-OTH",
											"MOCC-FISH","MOCC-OTH","MOCC-TRAD"   
									)), labels=rev(c("female-to-male prevalence ratio above average vs. below average",
											"Agrarian community vs. fishing community", "Trading community vs. fishing community", "Mixed vs. fishing community",
											"Female education: None vs. at least primary education", "Male education: None vs. at least primary education",
											"Female sex partners in last year: >1 vs. 1", "Male sex partners in last year: >1 vs. 1",
											"Male circumcised: yes vs. no",
											"Female occupation: bar/waitress vs. agricultural/house", "Female occupation: trading/shopkeeper vs. agricultural/house","Female occupation: other vs. agricultural/house", 
											"Male occupation: fishing vs. agricultural/house", "Male occupation: other vs. agricultural/house", "Male occupation: trading/shopkeeper vs. agricultural/house"
									)))])
	setkey(ds, MOFA)	
	dp		<- subset(ds, !MOFA%in%c('COMM-TRAD','COMM-MXD'))
	dp[, FILL:= '0']
	set(dp, dp[, which(IL>1 | IU<1)], 'FILL', '1')
	set(dp, dp[, which(CL>1 | CU<1)], 'FILL', '2')
	ggplot(dp, aes(x=MOFA2)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +
			scale_fill_manual(values=c('0'='white', '1'='orange', '2'='red')) +
			coord_flip(ylim=c(1/20,20)) +
			guides(fill=FALSE) +
			labs(x='Partner characteristics\n', y='\nOdds ratio of male-to-female transmission\nbetween partner characteristics')
	ggsave(file=paste0(outfile.base,'_propmf_factors_univariate_samehh.pdf'), w=8, h=3.5)		
}

RakaiFull.gender.171122.hh.multivariatemodels.stan.with.threshold.v2<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)
	
	#zm		<- get_googlemap(center="rakai district uganda", zoom=10, maptype="hybrid")
	#zc		<- as.data.table(read.csv('~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/PANGEA_Rakai_community_anonymized_IDs.csv', stringsAsFactors=FALSE))
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
	
	#rtpdm[, table(PAIR_COMM_TYPE)]
	rtpdm[, COUPLE2:= factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('no couple','couple'))]
	rtpdm[, SAMEHH:= factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('same hh','different hh'))]	
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	
	#	add immigrant
	load("~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/180322_sampling_by_gender_age.rda")
	tmp		<- subset(de, !is.na(RID), select=c(RID, INMIGRANT))
	infile				<- "~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/RakaiPangeaMetaData.rda"
	load(infile)
	tmp2	<- subset(as.data.table(inmigrant), select=c(RCCS_studyid,inmigrant))
	setnames(tmp2, 'RCCS_studyid', 'RID')
	tmp2	<- tmp2[, list(inmigrant=as.integer(any(inmigrant==1))), by='RID']
	tmp		<- merge(tmp, tmp2, all=TRUE, by='RID')
	#INMIGRANT and inmigrant do not agree.. 
	#subset(tmp, !is.na(inmigrant) & !is.na(INMIGRANT))[, table(inmigrant, INMIGRANT)]
	#...just set those with missing INMIGRANT to inmigrant
	set(tmp, NULL, 'INMIGRANT', tmp[, as.integer(INMIGRANT)])
	tmp2	<- tmp[, which(!is.na(inmigrant) & is.na(INMIGRANT))]
	set(tmp, tmp2, 'INMIGRANT', tmp[tmp2, inmigrant])	
	tmp		<- subset(tmp, select=c(RID, INMIGRANT))	
	setnames(tmp, colnames(tmp), paste0('MALE_',colnames(tmp)))
	rtpdm	<- merge(rtpdm, tmp, by='MALE_RID', all.x=TRUE)
	setnames(tmp, colnames(tmp), gsub('MALE_','FEMALE_',colnames(tmp)))
	rtpdm	<- merge(rtpdm, tmp, by='FEMALE_RID', all.x=TRUE)
	set(rtpdm, rtpdm[, which(is.na(MALE_INMIGRANT))], 'MALE_INMIGRANT', 0L)
	set(rtpdm, rtpdm[, which(is.na(FEMALE_INMIGRANT))], 'FEMALE_INMIGRANT', 0L)
	
	
	df		<- subset(rtpdm, FEMALE_HH_NUM==MALE_HH_NUM, select=c(	MALE_RID, FEMALE_RID, PTY_RUN, IDCLU, LINK_MF, POSTERIOR_SCORE_MF, COUPLE2, SAMEHH, PAIR_COMM_TYPE, FEMALE_COMM_NUM,
					MALE_RECENTVL, MALE_SEXP1YR, MALE_SEXP1OUT2, MALE_OCCUP_OLLI, MALE_OCAT, MALE_EDUCAT, MALE_CIRCUM, MALE_INMIGRANT,
					FEMALE_RECENTVL, FEMALE_SEXP1YR, FEMALE_SEXP1OUT2, FEMALE_OCCUP_OLLI, FEMALE_OCAT, FEMALE_EDUCAT, FEMALE_INMIGRANT 
			))
	#	missing data: fill in
	set(df, df[, which(is.na(MALE_RECENTVL))], 'MALE_RECENTVL', -1)
	set(df, df[, which(is.na(FEMALE_RECENTVL))], 'FEMALE_RECENTVL', -1)
	set(df, df[, which(is.na(MALE_CIRCUM))], 'MALE_CIRCUM', 'Unknown')
	set(df, df[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(df, df[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')	
	#for(x in colnames(df)) print( c(x, any(is.na(df[[x]]))) )
	#	add estimated HIV prevalences (medians)
	infile	<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30trmMF_estimated_prevalenceratio.rda"	
	load(infile)
	tmp		<- dcast.data.table(dgg, COMM_NUM~variable, value.var='M')
	setnames(tmp, 'COMM_NUM', 'FEMALE_COMM_NUM')
	df		<- merge(df, tmp, by='FEMALE_COMM_NUM')
	
	#	prepare data for STAN
	df[, COMM_FISH:= as.integer(PAIR_COMM_TYPE=='fisherfolk')]
	df[, COMM_AGR:= as.integer(PAIR_COMM_TYPE=='agrarian')]
	df[, COMM_TRAD:= as.integer(PAIR_COMM_TYPE=='trading')]
	df[, COMM_MXD:= as.integer(PAIR_COMM_TYPE=='mixed')]
	df[, FE_NOEDU:= as.integer(FEMALE_EDUCAT=='None')]
	df[, FE_NOEDU_MISS:= as.integer(FEMALE_EDUCAT=='Unknown')]
	df[, MA_NOEDU:= as.integer(MALE_EDUCAT=='None')]
	df[, MA_NOEDU_MISS:= as.integer(MALE_EDUCAT=='Unknown')]	
	df[, MA_CIRCUM:= as.integer(MALE_CIRCUM=='Y')]
	df[, MA_CIRCUM_MISS:= as.integer(FEMALE_EDUCAT=='Unknown')]
	df[, FE_SEXP1YR_G1:= as.integer(FEMALE_SEXP1YR!='1')]
	df[, MA_SEXP1YR_G1:= as.integer(MALE_SEXP1YR!='1')]
	df[, MA_OC_AGRO:= as.integer(MALE_OCAT=='Agro/House')]	
	df[, MA_OC_FISH:= as.integer(MALE_OCAT=='Fishing')]
	df[, MA_OC_TRAD:= as.integer(MALE_OCAT=='Trading/Shop keeper')]
	df[, MA_OC_OTH:= as.integer(MALE_OCAT%in%c('zother','Bar/waitress','Student','Boda/Trucking'))]	
	df[, FE_OC_AGRO:= as.integer(FEMALE_OCAT=='Agro/House')]
	df[, FE_OC_BAR:= as.integer(FEMALE_OCAT=='Bar/waitress')]			
	df[, FE_OC_TRAD:= as.integer(FEMALE_OCAT=='Trading/Shop keeper')]
	df[, FE_OC_OTH:= as.integer(FEMALE_OCAT%in%c('zother','Student'))]		
	df[, RFM_ABOVE_MEAN:=  as.integer( RFM>=subset(dgg, variable=='RFM')[, mean(M)] ) ]
	df[, MA_VL_HIGH:= as.integer(MALE_RECENTVL>=1e5)]
	df[, FE_VL_HIGH:= as.integer(FEMALE_RECENTVL>=1e5)]
	#
	#	STAN
	#
	mhh.1 <- map2stan(
			alist(
					LINK_MF ~ dbinom(1, pmf),
					logit(pmf) <- base +
							male_vlhigh*MA_VL_HIGH + female_vlhigh*FE_VL_HIGH +
							male_noedu*MA_NOEDU + female_noedu*FE_NOEDU +
							male_inmigrant*MALE_INMIGRANT + female_inmigrant*FEMALE_INMIGRANT +							
							female_sexp*FE_SEXP1YR_G1 + male_sexp*MA_SEXP1YR_G1 +															 														
							# occupation: other is baseline
							male_agro*MA_OC_AGRO + male_fish*MA_OC_FISH + male_shopkeeper*MA_OC_TRAD +  
							female_agro*FE_OC_AGRO + female_barworker*FE_OC_BAR + female_shopkeeper*FE_OC_TRAD,
					base ~ dnorm(0,100),													
					c(male_noedu, female_noedu, male_vlhigh, female_vlhigh) ~ dnorm(0, 1),								
					c(male_inmigrant, female_inmigrant, male_sexp, female_sexp) ~ dnorm(0, 1),					
					c(male_fish, male_shopkeeper, male_agro, female_barworker, female_shopkeeper, female_agro) ~ dnorm(0, 1)					
			),
			data=as.data.frame(df), 
			start=list(	base=0, male_noedu=0, female_noedu=0, male_inmigrant=0, female_inmigrant=0, male_sexp=0, female_sexp=0, male_vlhigh=0, female_vlhigh=0,						
						male_fish=0, male_shopkeeper=0, male_agro=0, female_barworker=0, female_shopkeeper=0, female_agro=0),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	precis(mhh.1, prob=0.95)	
	plot(precis(mhh.1, prob=0.95))
	#	get stuff from mhh.1
	post	<- as.data.table(extract.samples(mhh.1))
	post[, MC:= seq_len(nrow(post))]
	post	<- melt(post, id.vars='MC')
	tmp		<- post[, which(!grepl('sig',variable))]
	set(post, tmp, 'value', post[tmp, exp(value)])	
	tmp		<- post[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
					V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), 
			by=c('variable')]
	dhh1	<- dcast.data.table(tmp, variable~STAT, value.var='V')
	dhh1[, MODEL:= 'mf in hh']
	dhh1[, STAT:= as.character(factor(grepl('base',variable), levels=c(TRUE,FALSE), labels=c('odds','odds ratio')))]
	set(dhh1, NULL, 'FACTOR', dhh1[, factor(variable, levels=c('base','male_vlhigh','female_vlhigh','male_noedu','female_noedu','male_inmigrant','female_inmigrant',
																'male_sexp','female_sexp','male_fish','male_shopkeeper','male_agro','female_barworker','female_shopkeeper','female_agro'))])
	setkey(dhh1, FACTOR)
	#	save
	save(df, mhh.1, dhh1, file=paste0(outfile.base,'_interaction_runs_mf_followup.rda'))	
	
	#	make plots
	dp		<- subset(dhh1, !grepl('base',variable))	
	dp[, FILL:= '0']
	set(dp, dp[, which(IU<1)], 'FILL', '-1')
	set(dp, dp[, which(CU<1)], 'FILL', '-2')	
	set(dp, dp[, which(IL>1)], 'FILL', '1')
	set(dp, dp[, which(CL>1)], 'FILL', '2')
	ggplot(dp, aes(x=FACTOR)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +			
			scale_fill_manual(values=c('-2'='deepskyblue','-1'='cyan','0'='grey80', '1'='darkorange', '2'='red')) +
			#coord_flip(ylim=c(1/20,20)) +
			coord_flip(ylim=c(1/10,10)) +
			guides(fill=FALSE) +
			labs(x='Partner characteristics\n', y='\nOdds for male-to-female transmission\nin phylogenetically reconstructed transmission pairs') +
			facet_grid(STAT~., scales='free', space='free')	
	ggsave(file=paste0(outfile.base,'_interaction_runs_odds_noage_mf.pdf'), w=5, h=5)	
}


RakaiFull.transmitter.171122.get.data.set<- function()
{
	require(data.table)
	
	infile					<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_withmetadata.rda"
	outfile.base			<- gsub('_withmetadata.rda','',infile)	
	#zm		<- get_googlemap(center="rakai district uganda", zoom=10, maptype="hybrid")
	#zc		<- as.data.table(read.csv('~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/PANGEA_Rakai_community_anonymized_IDs.csv', stringsAsFactors=FALSE))
	load(infile)	
	setkey(rtp, MALE_RID, FEMALE_RID)
	rtp[, PAIRID:= seq_len(nrow(rtp))]
	rtpdm	<- subset(rtp, grepl('mf|fm',SELECT))
	rtpdm[, PAIR_COMM_TYPE:= FEMALE_COMM_TYPE]
	set(rtpdm, rtpdm[, which(PAIR_COMM_TYPE!='fisherfolk')], 'PAIR_COMM_TYPE', 'non-fishing')
	set(rtpdm, rtpdm[, which(FEMALE_COMM_TYPE!=MALE_COMM_TYPE)], 'PAIR_COMM_TYPE', 'mixed')
	set(rtpdm, rtpdm[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
	set(rtpdm, rtpdm[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')	
	rtpdm[, COUPLE:= as.integer(as.character(factor(COUPLE=='no couple', levels=c(TRUE,FALSE), labels=c('0','1'))))]
	rtpdm[, SAME_HH:= as.integer(as.character(factor(FEMALE_HH_NUM==MALE_HH_NUM, levels=c(TRUE,FALSE), labels=c('1','0'))))]
	rtpdm[, SAME_COMM:= as.integer(as.character(factor(FEMALE_COMM_NUM==MALE_COMM_NUM, levels=c(TRUE,FALSE), labels=c('1','0'))))]
	rtpdm[, PAIR_COMM:= MALE_COMM_NUM_A]
	set(rtpdm, rtpdm[, which(FEMALE_COMM_NUM_A!=MALE_COMM_NUM_A)], 'PAIR_COMM', 'mixed')
	rtpdm[, MALE_SEXP1OUT2:= factor(MALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(MALE_SEXP1OUT=='Unknown')], 'MALE_SEXP1OUT2', 'Unknown')
	rtpdm[, FEMALE_SEXP1OUT2:= factor(FEMALE_SEXP1OUT=='0', levels=c(TRUE,FALSE),labels=c('none','1+'))]
	set(rtpdm, rtpdm[, which(FEMALE_SEXP1OUT=='Unknown')], 'FEMALE_SEXP1OUT2', 'Unknown')
	rtpdm[, MALE_SEX:='M']
	rtpdm[, FEMALE_SEX:='F']
	rtpdm[, PAIRID:= seq_len(nrow(rtpdm))]
	set(rtpdm, NULL, 'MALE_CIRCUM', rtpdm[, as.integer(as.character(factor(MALE_CIRCUM, levels=c('Y','N'), labels=c('1','0'))))])
	
	#	add immigrant:
	#	was inmigrant within 1 year of first conc pos; within 2 years of first conc pos	
	tmp			<- subset(rtpdm, select=c(PAIRID, MALE_RID, FEMALE_RID, VISIT_FIRSTCONCPOS))
	tmp2		<- unique(subset(rd, select=c(RID, VISIT, DATE)))
	setnames(tmp2, colnames(tmp2), gsub('MALE_VISIT','VISIT_FIRSTCONCPOS',paste0('MALE_',colnames(tmp2))))
	tmp			<- merge(tmp, tmp2, by=c('MALE_RID','VISIT_FIRSTCONCPOS'), all.x=1)
	setnames(tmp2, colnames(tmp2), gsub('MALE_','FEMALE_', colnames(tmp2)))
	tmp			<- merge(tmp, tmp2, by=c('FEMALE_RID','VISIT_FIRSTCONCPOS'), all.x=1)
	set(tmp, tmp[, which(is.na(MALE_DATE))], 'MALE_DATE', -1)
	set(tmp, tmp[, which(is.na(FEMALE_DATE))], 'FEMALE_DATE', -1)
	#	argh there are entries where both have missing date in rd ..
	tmp2		<- subset(tmp, MALE_DATE==-1 & FEMALE_DATE==-1, select=c(MALE_RID, FEMALE_RID, VISIT_FIRSTCONCPOS, PAIRID))
	tmp2		<- merge(tmp2, subset(rtpdm, select=c(MALE_RID, FEMALE_RID, VISIT_FIRSTCONCPOS, PAIRID, MALE_DATE, FEMALE_DATE)), by=c('MALE_RID','FEMALE_RID','VISIT_FIRSTCONCPOS','PAIRID'))
	tmp			<- rbind(subset(tmp, !(MALE_DATE==-1 & FEMALE_DATE==-1)), tmp2)	
	set(tmp, NULL, 'DATE_FIRSTCONCPOS', tmp[, pmax(MALE_DATE, FEMALE_DATE)])
	stopifnot( nrow(subset(tmp, DATE_FIRSTCONCPOS==-1))==0 )
	set(tmp, NULL, c('VISIT_FIRSTCONCPOS','MALE_DATE','FEMALE_DATE'), NULL)
	
	#
	infile		<- "~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/RakaiPangeaMetaData_v2.rda"
	load(infile)
	inmigrant	<- subset(as.data.table(inmigrant), select=c(RCCS_studyid, date))	
	inmigrant[, date:= hivc.db.Date2numeric(date)]		
	setnames(inmigrant, c('RCCS_studyid','date'), c('MALE_RID','MALE_INMIGRATE_DATE'))
	tmp			<- merge(tmp, inmigrant, by='MALE_RID', all.x=1)
	setnames(inmigrant, colnames(inmigrant), gsub('MALE_','FEMALE_', colnames(inmigrant)))
	tmp			<- merge(tmp, inmigrant, by='FEMALE_RID', all.x=1)
	set(tmp, tmp[, which(is.na(MALE_INMIGRATE_DATE))], 'MALE_INMIGRATE_DATE', -1)
	set(tmp, tmp[, which(is.na(FEMALE_INMIGRATE_DATE))], 'FEMALE_INMIGRATE_DATE', -1)
	
	tmp			<- tmp[, list( MALE_INMIGRATE_1YR= as.numeric(any(DATE_FIRSTCONCPOS>=MALE_INMIGRATE_DATE & DATE_FIRSTCONCPOS-MALE_INMIGRATE_DATE<1)),
								MALE_INMIGRATE_2YR= as.numeric(any(DATE_FIRSTCONCPOS>=MALE_INMIGRATE_DATE & DATE_FIRSTCONCPOS-MALE_INMIGRATE_DATE<2)),
								FEMALE_INMIGRATE_1YR= as.numeric(any(DATE_FIRSTCONCPOS>=FEMALE_INMIGRATE_DATE & DATE_FIRSTCONCPOS-FEMALE_INMIGRATE_DATE<1)),
								FEMALE_INMIGRATE_2YR= as.numeric(any(DATE_FIRSTCONCPOS>=FEMALE_INMIGRATE_DATE & DATE_FIRSTCONCPOS-FEMALE_INMIGRATE_DATE<2))), by=c('FEMALE_RID','MALE_RID','PAIRID','DATE_FIRSTCONCPOS')]
	#	58 males ; 6 more males inmigrant within 2 years				
	#	76 females; 11 more females inmigrant within 2 years
	rtpdm		<- merge(rtpdm, tmp, by=c('FEMALE_RID','MALE_RID','PAIRID'))
	
	
	#	most recent vl within one year first conc pos
	vld			<- subset(as.data.table(vlData), select=c(RCCS_studyid, date, copies, newcopies))	
	set(vld, vld[, which(copies!=newcopies)], 'copies', vld[copies!=newcopies,newcopies])
	vld[, date:= hivc.db.Date2numeric(date)]
	set(vld, NULL, 'newcopies', NULL)
	vld			<- subset(vld, !is.na(copies))
	tmp			<- subset(rtpdm, select=c(PAIRID, MALE_RID, FEMALE_RID, DATE_FIRSTCONCPOS))
	setnames(vld, c('RCCS_studyid','date','copies'), c('MALE_RID','MALE_VL_DATE','MALE_VL'))
	tmp			<- merge(tmp, vld, by=c('MALE_RID'), all.x=1)
	setnames(vld, colnames(vld), gsub('MALE_','FEMALE_',colnames(vld)))
	tmp			<- merge(tmp, vld, by=c('FEMALE_RID'), all.x=1)
	tmp			<- tmp[, list( 	MALE_CLOSEST_VL_DATE= MALE_VL_DATE[which.min(abs(DATE_FIRSTCONCPOS-MALE_VL_DATE))],
								MALE_CLOSEST_VL= MALE_VL[which.min(abs(DATE_FIRSTCONCPOS-MALE_VL_DATE))],
								FEMALE_CLOSEST_VL_DATE= FEMALE_VL_DATE[which.min(abs(DATE_FIRSTCONCPOS-FEMALE_VL_DATE))],
								FEMALE_CLOSEST_VL= FEMALE_VL[which.min(abs(DATE_FIRSTCONCPOS-FEMALE_VL_DATE))]
								), by=c('FEMALE_RID','MALE_RID','PAIRID','DATE_FIRSTCONCPOS')]	
	set(tmp, tmp[, which(abs(DATE_FIRSTCONCPOS-MALE_CLOSEST_VL_DATE)<1)], 'MALE_CLOSEST_VL_1YR', tmp[abs(DATE_FIRSTCONCPOS-MALE_CLOSEST_VL_DATE)<1, MALE_CLOSEST_VL])
	set(tmp, tmp[, which(abs(DATE_FIRSTCONCPOS-MALE_CLOSEST_VL_DATE)<2)], 'MALE_CLOSEST_VL_2YR', tmp[abs(DATE_FIRSTCONCPOS-MALE_CLOSEST_VL_DATE)<2, MALE_CLOSEST_VL])
	set(tmp, tmp[, which(abs(DATE_FIRSTCONCPOS-FEMALE_CLOSEST_VL_DATE)<1)], 'FEMALE_CLOSEST_VL_1YR', tmp[abs(DATE_FIRSTCONCPOS-FEMALE_CLOSEST_VL_DATE)<1, FEMALE_CLOSEST_VL])
	set(tmp, tmp[, which(abs(DATE_FIRSTCONCPOS-FEMALE_CLOSEST_VL_DATE)<2)], 'FEMALE_CLOSEST_VL_2YR', tmp[abs(DATE_FIRSTCONCPOS-FEMALE_CLOSEST_VL_DATE)<2, FEMALE_CLOSEST_VL])
	tmp			<- subset(tmp, select=c(PAIRID, FEMALE_CLOSEST_VL_1YR, FEMALE_CLOSEST_VL_2YR, MALE_CLOSEST_VL_1YR, MALE_CLOSEST_VL_2YR))
	rtpdm		<- merge(rtpdm, tmp, by='PAIRID', all.x=1)
	
	#	age at midpoint
	set(rtpdm, rtpdm[, which(is.na(MALE_BIRTHDATE))], 'MALE_BIRTHDATE', rtpdm[, mean(MALE_BIRTHDATE, na.rm=TRUE)])
	set(rtpdm, rtpdm[, which(is.na(FEMALE_BIRTHDATE))], 'FEMALE_BIRTHDATE', rtpdm[, mean(FEMALE_BIRTHDATE, na.rm=TRUE)])
	rtpdm[, MALE_AGE_AT_MID:= 2013.25-MALE_BIRTHDATE]
	rtpdm[, FEMALE_AGE_AT_MID:= 2013.25-FEMALE_BIRTHDATE]
	rtpdm[, MALE_AGE_AT_MID_C:= rtpdm[,as.character(cut(MALE_AGE_AT_MID, breaks=c(15,20,25,30,35,40,45,52), right=FALSE, levels=c('15-19','20-24','25-29','30-34','35-39','40-44','45-50')))]]
	rtpdm[, FEMALE_AGE_AT_MID_C:= rtpdm[,as.character(cut(FEMALE_AGE_AT_MID, breaks=c(15,20,25,30,35,40,45,52), right=FALSE, levels=c('15-19','20-24','25-29','30-34','35-39','40-44','45-50')))]]

	#
	#	add extra variables
	infile	<- "~/Dropbox (SPH Imperial College)/Rakai Pangea Meta Data/Data for Fish Analysis Working Group/additionalVariables_rtp_23Apr2018.rda"
	load(infile)
	rtpde	<- subset(as.data.table(rtp), grepl('mf|fm',SELECT), select=c(MALE_RID, FEMALE_RID, FEMALE_GUD, MALE_GUD, male.sum.stable, male.sum.unstable, male.condom.no.stable, male.condom.no.unstable,
					male.condom.yes.stable, male.condom.yes.unstable, male.alc.no.stable, male.alc.no.unstable, male.alc.yes.stable, male.alc.yes.unstable, female.sum.stable, female.sum.unstable,
					female.condom.no.stable, female.condom.no.unstable, female.condom.yes.stable, female.condom.yes.unstable, female.alc.no.stable, female.alc.no.unstable, female.alc.yes.stable,
					female.alc.yes.unstable, pregnant))
	setnames(rtpde, colnames(rtpde), gsub('\\.','_',toupper(colnames(rtpde))))
	setnames(rtpde, 'PREGNANT', 'FEMALE_PREGNANT')
	set(rtpde, rtpde[, which(FEMALE_PREGNANT=='unknown')], 'FEMALE_PREGNANT', NA_character_)
	set(rtpde, NULL, 'FEMALE_PREGNANT', rtpde[, as.integer(FEMALE_PREGNANT=='yes')])
	set(rtpde, NULL, 'FEMALE_GUD', rtpde[, as.integer(FEMALE_GUD==1)])
	set(rtpde, NULL, 'MALE_GUD', rtpde[, as.integer(MALE_GUD==1)])	
	#	alcohol use: ever in last year
	set(rtpde, rtpde[, which(MALE_SUM_UNSTABLE==0 & is.na(MALE_ALC_YES_UNSTABLE))], 'MALE_ALC_YES_UNSTABLE', 0L)
	set(rtpde, rtpde[, which(MALE_SUM_STABLE==0 & is.na(MALE_ALC_YES_STABLE))], 'MALE_ALC_YES_STABLE', 0L)
	set(rtpde, rtpde[, which(FEMALE_SUM_UNSTABLE==0 & is.na(FEMALE_ALC_YES_UNSTABLE))], 'FEMALE_ALC_YES_UNSTABLE', 0L)
	set(rtpde, rtpde[, which(FEMALE_SUM_STABLE==0 & is.na(FEMALE_ALC_YES_STABLE))], 'FEMALE_ALC_YES_STABLE', 0L)
	rtpde[, MALE_ALCEVER_LASTYEAR:= as.integer((MALE_ALC_YES_UNSTABLE+MALE_ALC_YES_STABLE)>0)]
	rtpde[, FEMALE_ALCEVER_LASTYEAR:= as.integer((FEMALE_ALC_YES_UNSTABLE+FEMALE_ALC_YES_STABLE)>0)]	
	#	condom use: never in last year
	set(rtpde, rtpde[, which(MALE_SUM_UNSTABLE==0 & is.na(MALE_CONDOM_YES_UNSTABLE))], 'MALE_CONDOM_YES_UNSTABLE', 0L)
	set(rtpde, rtpde[, which(MALE_SUM_STABLE==0 & is.na(MALE_CONDOM_YES_STABLE))], 'MALE_CONDOM_YES_STABLE', 0L)
	set(rtpde, rtpde[, which(FEMALE_SUM_UNSTABLE==0 & is.na(FEMALE_CONDOM_YES_UNSTABLE))], 'FEMALE_CONDOM_YES_UNSTABLE', 0L)
	set(rtpde, rtpde[, which(FEMALE_SUM_STABLE==0 & is.na(FEMALE_CONDOM_YES_STABLE))], 'FEMALE_CONDOM_YES_STABLE', 0L)
	rtpde[, MALE_CONDOM_NEVER:= as.integer((MALE_CONDOM_YES_UNSTABLE+MALE_CONDOM_YES_STABLE)==0)]
	rtpde[, FEMALE_CONDOM_NEVER:= as.integer((FEMALE_CONDOM_YES_UNSTABLE+FEMALE_CONDOM_YES_STABLE)==0)]
	#	check against reporting
	set(rtpde, rtpde[, which((MALE_SUM_UNSTABLE+MALE_SUM_STABLE)==0)], c('MALE_ALCEVER_LASTYEAR','MALE_CONDOM_NEVER'), NA_integer_)
	set(rtpde, rtpde[, which((FEMALE_SUM_UNSTABLE+FEMALE_SUM_STABLE)==0)], c('FEMALE_ALCEVER_LASTYEAR','FEMALE_CONDOM_NEVER'), NA_integer_)
	
	rtpde	<- subset(rtpde, select=c(MALE_RID, FEMALE_RID, FEMALE_GUD, MALE_GUD, FEMALE_PREGNANT, MALE_ALCEVER_LASTYEAR, FEMALE_ALCEVER_LASTYEAR, MALE_CONDOM_NEVER, FEMALE_CONDOM_NEVER))
	rtpdm	<- merge(rtpdm, rtpde, by=c('MALE_RID','FEMALE_RID'))
	
	#	select
	setkey(rtpdm, MALE_RID,FEMALE_RID)
	df		<- subset(rtpdm, select=c(	MALE_RID, FEMALE_RID, PAIRID, PTY_RUN, LINK_MF, COUPLE, SAME_HH, SAME_COMM, PAIR_COMM_TYPE, DATE_FIRSTCONCPOS,					
										MALE_SEX, MALE_RECENTVL, MALE_SEXP1YR, MALE_SEXP1OUT2, MALE_OCAT, MALE_EDUCAT, MALE_CIRCUM,
										MALE_INMIGRATE_1YR, MALE_INMIGRATE_2YR, MALE_CLOSEST_VL_1YR, MALE_CLOSEST_VL_2YR, MALE_AGE_AT_MID, MALE_AGE_AT_MID_C, MALE_ARVSTARTDATE,
										FEMALE_SEX, FEMALE_RECENTVL, FEMALE_SEXP1YR, FEMALE_SEXP1OUT2, FEMALE_OCAT, FEMALE_EDUCAT, 
										FEMALE_INMIGRATE_1YR, FEMALE_INMIGRATE_2YR, FEMALE_CLOSEST_VL_1YR, FEMALE_CLOSEST_VL_2YR, FEMALE_AGE_AT_MID, FEMALE_AGE_AT_MID_C,
										FEMALE_GUD, MALE_GUD, FEMALE_PREGNANT, MALE_ALCEVER_LASTYEAR, FEMALE_ALCEVER_LASTYEAR, MALE_CONDOM_NEVER, FEMALE_CONDOM_NEVER, FEMALE_ARVSTARTDATE
										))
	
	#	missing data: fill in
	if(0)
	{
		set(df, df[, which(is.na(MALE_RECENTVL))], 'MALE_RECENTVL', -1)
		set(df, df[, which(is.na(FEMALE_RECENTVL))], 'FEMALE_RECENTVL', -1)
		set(df, df[, which(is.na(MALE_CIRCUM))], 'MALE_CIRCUM', 'Unknown')
		set(df, df[, which(is.na(FEMALE_EDUCAT))], 'FEMALE_EDUCAT', 'Unknown')
		set(df, df[, which(is.na(MALE_EDUCAT))], 'MALE_EDUCAT', 'Unknown')
		set(df, df[, which(is.na(MALE_BIRTHDATE))], 'MALE_BIRTHDATE', df[, mean(MALE_BIRTHDATE, na.rm=TRUE)])
		set(df, df[, which(is.na(FEMALE_BIRTHDATE))], 'FEMALE_BIRTHDATE', df[, mean(FEMALE_BIRTHDATE, na.rm=TRUE)])
		for(x in colnames(df)) print( c(x, any(is.na(df[[x]]))) )
	}
	
	#	add estimated HIV prevalences (medians)
	if(0)
	{
		infile	<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30trmMF_estimated_prevalenceratio.rda"	
		load(infile)
		tmp		<- dcast.data.table(dgg, COMM_NUM~variable, value.var='M')
		tmp[, RFM:=NULL]
		setnames(tmp, c('COMM_NUM','PF','PM'), c('FEMALE_COMM_NUM','FEMALE_HIVPREVF','FEMALE_HIVPREVM'))	
		df		<- merge(df, tmp, by='FEMALE_COMM_NUM')
		setnames(tmp, c('FEMALE_COMM_NUM','FEMALE_HIVPREVF','FEMALE_HIVPREVM'), c('MALE_COMM_NUM','MALE_HIVPREVF','MALE_HIVPREVM'))
		df		<- merge(df, tmp, by='MALE_COMM_NUM')		
	}
	
	#
	#	cast to TRM==1 and REC==0
	#
	dg		<- subset(df, LINK_MF==1)
	set(dg, NULL, colnames(dg)[grepl('FEMALE',colnames(dg))], NULL)
	setnames(dg, colnames(dg), gsub('^MALE_','',colnames(dg)))
	tmp		<- subset(df, LINK_MF==1)
	set(tmp, NULL, colnames(tmp)[grepl('^MALE',colnames(tmp))], NULL)
	setnames(tmp, colnames(tmp), gsub('^FEMALE_','',colnames(tmp)))
	tmp[, LINK_MF:=0L]
	dg		<- rbind(dg, tmp, fill=TRUE)	
	tmp		<- subset(df, LINK_MF==0)
	set(tmp, NULL, colnames(tmp)[grepl('FEMALE',colnames(tmp))], NULL)
	setnames(tmp, colnames(tmp), gsub('^MALE_','',colnames(tmp)))	
	dg		<- rbind(dg, tmp, fill=TRUE)
	tmp		<- subset(df, LINK_MF==0)
	set(tmp, NULL, colnames(tmp)[grepl('^MALE',colnames(tmp))], NULL)
	setnames(tmp, colnames(tmp), gsub('^FEMALE_','',colnames(tmp)))
	tmp[, LINK_MF:=1L]
	dg		<- rbind(dg, tmp, fill=TRUE)
	setnames(dg, 'LINK_MF','TRANSMITTER')
	
	#
	#	get into format transmitter TR and recipient REC
	#
	rmf		<- subset(df, LINK_MF==1 )
	rfm		<- subset(df, LINK_MF==0 )
	rtr		<- copy(rmf)
	setnames(rtr,colnames(rtr),gsub('FEMALE','REC',colnames(rtr)))
	setnames(rtr,colnames(rtr),gsub('MALE','TR',colnames(rtr)))
	tmp		<- copy(rfm)
	setnames(tmp,colnames(tmp),gsub('FEMALE','TR',colnames(tmp)))
	setnames(tmp,colnames(tmp),gsub('MALE','REC',colnames(tmp)))
	rtr		<- rbind(rtr,tmp,fill=TRUE)	
	
	save(df, dg, rtr, file= paste0(outfile.base, '_transmitterrecipientdata.rda'))
}

RakaiFull.transmitter.171122.gender.explore<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile			<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_transmitterrecipientdata.rda"
	outfile.base	<- gsub('data.rda','',infile)
	load(infile)
	#
	#	prepare variables for STAN
	#		
	dg	<- copy(df)
	#	prepare data for STAN
	dg[, COMM_FISH:= as.integer(COMM_TYPE=='fisherfolk')]
	dg[, COMM_AGR:= as.integer(COMM_TYPE=='agrarian')]
	dg[, COMM_TRAD:= as.integer(COMM_TYPE=='trading')]
	#dg[, COMM_MXD:= as.integer(PAIR_COMM_TYPE=='mixed')]
	dg[, HH:= as.integer(SAMEHH=='same hh')]
	dg[, MALE:= as.integer(SEX=='M')]
	dg[, NOEDU:= as.integer(EDUCAT=='None')]
	dg[, NOEDU_MISS:= as.integer(EDUCAT=='Unknown')]
	dg[, CIRCUM2:= as.integer(CIRCUM=='Y')]
	dg[, SEXP1YR_G1:= as.integer(SEXP1YR!='1')]
	dg[, VL_HIGH:= as.integer(RECENTVL>=1e5)]
	#dg[, VL_SUPP:= as.integer(RECENTVL<=200)]
	dg[, OC_AGRO:= as.integer(OCAT=='Agro/House')]
	dg[, OC_BAR:= as.integer(OCAT=='Bar/waitress')]
	dg[, OC_BODA:= as.integer(OCAT=='Boda/Trucking')]
	dg[, OC_FISH:= as.integer(OCAT=='Fishing')]
	dg[, OC_STUD:= as.integer(OCAT=='Student')]
	dg[, OC_TRAD:= as.integer(OCAT=='Trading/Shop keeper')]
	dg[, OC_OTH:= as.integer(OCAT%in%c('zother'))]	
	dg[, AGE_AT_MID_C2:= dg[,as.integer(cut(AGE_AT_MID, breaks=c(15,20,25,30,35,40,45,52), right=FALSE, levels=c('1','2','3','4','5','6','7')))]]
	dg[, AGE_AT_MID_D:= dg[,as.character(cut(AGE_AT_MID, breaks=c(15,20,25,30,35,52), right=FALSE, levels=c('15-19','20-24','25-29','30-34','35-50')))]]
	dg[, AGE_AT_MID_D2:= dg[,as.integer(cut(AGE_AT_MID, breaks=c(15,20,25,30,35,52), right=FALSE, levels=c('1','2','3','4','5')))]]
	#
	
	dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SAMEHH','SEX')]
	#	         SAMEHH SEX        PT        OT   N
	#1:      same hh   M 0.6666667 2.0000000 126
	#2: different hh   M 0.5329341 1.1410256 167
	#3:      same hh   F 0.3333333 0.5000000 126
	#4: different hh   F 0.4670659 0.8764045 167

	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SAMEHH','SEX','AGE_AT_MID_C2','AGE_AT_MID_C')]
	setkey(tmp, SEX, SAMEHH, AGE_AT_MID_C2)
	
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SAMEHH','SEX','AGE_AT_MID_D2','AGE_AT_MID_D')]
	setkey(tmp, SEX, SAMEHH, AGE_AT_MID_D2)
	#	this is great and interesting:
	#	females within HH: bimodal (reaching 1.0); whereas outside HH: increasing
	#	males within HH: pretty constant; whereas outside HH: increasing

	
	#
	#	VIRAL LOAD (yes)
	#
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SAMEHH','SEX','VL_HIGH')]
	setkey(tmp, VL_HIGH, SEX, SAMEHH)
	tmp
	dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('VL_HIGH')]
	#	   VL_HIGH        PT       OT   N
	#1:       0 0.4878049 0.952381 533
	#2:       1 0.6226415 1.650000  53
	#	--> that s great

	#
	#	CIRCUMCISION (no)
	#

	dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','CIRCUM')]
	#	   SEX  CIRCUM        PT        OT   N
	#1:   M       N 0.6028037 1.5176471 214
	#2:   M       Y 0.5512821 1.2285714  78
	#	--> suggests not to include circumcision

	#
	#	INMIGRANT ( don t include or include with gender)
	#

	dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('INMIGRANT')]
	#	   INMIGRANT        PT       OT   N
	#1:         0 0.5066667 1.027027 450
	#2:         1 0.4779412 0.915493 136	--> not much at all
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SAMEHH','INMIGRANT')]
	setkey(tmp, INMIGRANT, SAMEHH)
	tmp
	#	         SAMEHH INMIGRANT        PT        OT   N
	#1:      same hh         0 0.5130890 1.0537634 191
	#2: different hh         0 0.5019305 1.0077519 259
	#3:      same hh         1 0.4590164 0.8484848  61		--> could be sig
	#4: different hh         1 0.4933333 0.9736842  75
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','INMIGRANT')]
	setkey(tmp, SEX, INMIGRANT)
	tmp		
	#	   SEX INMIGRANT        PT       OT   N
	#1:   F         0 0.4186047 0.720000 215
	#2:   F         1 0.3846154 0.625000  78	--> not much down
	#3:   M         0 0.5872340 1.422680 235
	#4:   M         1 0.6034483 1.521739  58	--> not much up
	#	inmigrant: overall not very strong 
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','SAMEHH','INMIGRANT')]
	setkey(tmp, SEX, SAMEHH, INMIGRANT)
	tmp		
	#	  SEX     SAMEHH INMIGRANT        PT        OT   N
	#1:   F      same hh         0 0.3111111 0.4516129  90
	#2:   F      same hh         1 0.3888889 0.6363636  36
	#3:   F different hh         0 0.4960000 0.9841270 125
	#4:   F different hh         1 0.3809524 0.6153846  42	--> could be sig
	#5:   M      same hh         0 0.6930693 2.2580645 101
	#6:   M      same hh         1 0.5600000 1.2727273  25	--> more down
	#7:   M different hh         0 0.5074627 1.0303030 134
	#8:   M different hh         1 0.6363636 1.7500000  33	--> more up

	#
	#	NO EDU (include with gender)
	#

	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','SAMEHH','NOEDU')]
	setkey(tmp, SEX, SAMEHH, NOEDU)
	tmp	
	dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','NOEDU')]
	#	   SEX NOEDU        PT        OT   N
	#1:   M     0 0.5886792 1.4311927 265
	#2:   M     1 0.6071429 1.5454545  28
	#3:   F     0 0.4220532 0.7302632 263
	#4:   F     1 0.3000000 0.4285714  30			--> interesting
	#	no education shows up among females, sample size but OK ish

	#
	#	SEXP1YR_G1 (include in interaction with gender and household)
	#
	dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEXP1YR_G1')]
	#	   SEXP1YR_G1        PT        OT   N
	#1:          0 0.4634146 0.8636364 328
	#2:          1 0.5465116 1.2051282 258	--> this should be up and sig
	dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','SEXP1YR_G1')]
	#	   SEX SEXP1YR_G1        PT        OT   N
	#1:   M          0 0.5789474 1.3750000 114
	#2:   M          1 0.5977654 1.4861111 179
	#3:   F          0 0.4018692 0.6718750 214
	#4:   F          1 0.4303797 0.7555556  79	--> so the higher overall result is just confounded by more men reporting >1 sex partner
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','SEXP1YR_G1','SAMEHH')]
	setkey(tmp, SEX, SAMEHH, SEXP1YR_G1)
	tmp	
	#1:   F          0      same hh 0.3364486 0.5070423 107
	#2:   F          1      same hh 0.3157895 0.4615385  19
	#3:   F          0 different hh 0.4672897 0.8771930 107
	#4:   F          1 different hh 0.4666667 0.8750000  60
	#5:   M          0      same hh 0.6129032 1.5833333  62
	#6:   M          1      same hh 0.7187500 2.5555556  64	--> this should be sig
	#7:   M          0 different hh 0.5384615 1.1666667  52
	#8:   M          1 different hh 0.5304348 1.1296296 115

	#
	#	OCCUPATION
	#
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','OCAT')]
	setkey(tmp, SEX, OCAT)
	tmp	
	#SEX                OCAT        PT        OT   N
	#1:   F          Agro/House 0.3931034 0.6477273 145
	#2:   F        Bar/waitress 0.4878049 0.9523810  41 --> interesting, higher
	#3:   F             Student 0.2000000 0.2500000   5	--> put into other
	#4:   F Trading/Shop keeper 0.3888889 0.6363636  54
	#5:   F              zother 0.4375000 0.7777778  48
	#6:   M          Agro/House 0.6428571 1.8000000  42
	#7:   M        Bar/waitress 0.0000000 0.0000000   2 --> put into other
	#8:   M       Boda/Trucking 0.4285714 0.7500000   7 --> put into other
	#9:   M             Fishing 0.5606061 1.2758621 132 --> interesting, lower
	#10:   M             Student 0.5714286 1.3333333   7 --> put into other
	#11:   M Trading/Shop keeper 0.6136364 1.5882353  44
	#12:   M              zother 0.6440678 1.8095238  59

	#
	#	explore INMIGRANT COMM_TYPE
	#
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('INMIGRANT','COMM_TYPE')]
	setkey(tmp, INMIGRANT, COMM_TYPE)
	tmp	
	#	   INMIGRANT  COMM_TYPE        PT        OT   N
	#1:         0   agrarian 0.4822695 0.9315068 141
	#2:         0 fisherfolk 0.5070922 1.0287770 282
	#3:         0    trading 0.6296296 1.7000000  27
	#4:         1   agrarian 0.5208333 1.0869565  48
	#5:         1 fisherfolk 0.4444444 0.8000000  81
	#6:         1    trading 0.5714286 1.3333333   7
	tmp	<- dg[, list(PT= length(which(TRANSMITTER==1))/length(TRANSMITTER), OT= length(which(TRANSMITTER==1))/length(which(TRANSMITTER==0)), N=length(TRANSMITTER) ), by=c('SEX','INMIGRANT','COMM_TYPE')]
	setkey(tmp, SEX, INMIGRANT, COMM_TYPE)
	tmp	

}

RakaiFull.transmitter.171122.gender.interactionmodel.stan.with.threshold<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile			<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_transmitterrecipientdata.rda"
	outfile.base	<- gsub('data.rda','',infile)
	load(infile)
	#
	#	prepare variables for STAN
	#		
	dg	<- copy(df)
	#	prepare data for STAN
	dg[, COMM_FISH:= as.integer(COMM_TYPE=='fisherfolk')]
	dg[, COMM_AGR:= as.integer(COMM_TYPE=='agrarian')]
	dg[, COMM_TRAD:= as.integer(COMM_TYPE=='trading')]
	#dg[, COMM_MXD:= as.integer(PAIR_COMM_TYPE=='mixed')]
	dg[, HH:= as.integer(SAMEHH=='same hh')]
	dg[, MALE:= as.integer(SEX=='M')]
	dg[, NOEDU:= as.integer(EDUCAT=='None')]
	dg[, NOEDU_MISS:= as.integer(EDUCAT=='Unknown')]
	dg[, SEXP1YR_G1:= as.integer(SEXP1YR!='1')]
	dg[, VL_HIGH:= as.integer(RECENTVL>=1e5)]
	dg[, MA_CIRCUM:= as.integer(!is.na(CIRCUM) & CIRCUM=='Y')]
	#dg[, VL_SUPP:= as.integer(RECENTVL<=200)]
	dg[, OC_AGRO:= as.integer(OCAT=='Agro/House')]
	dg[, OC_BAR:= as.integer(OCAT=='Bar/waitress')]
	dg[, OC_BODA:= as.integer(OCAT=='Boda/Trucking')]
	dg[, OC_FISH:= as.integer(OCAT=='Fishing')]
	dg[, OC_STUD:= as.integer(OCAT=='Student')]
	dg[, OC_TRAD:= as.integer(OCAT=='Trading/Shop keeper')]
	dg[, OC_OTH:= as.integer(OCAT%in%c('zother'))]	
	dg[, AGE_AT_MID_C2:= dg[,as.integer(cut(AGE_AT_MID, breaks=c(15,20,25,30,35,40,45,52), right=FALSE, levels=c('1','2','3','4','5','6','7')))]]
	dg[, AGE_AT_MID_D:= dg[,as.character(cut(AGE_AT_MID, breaks=c(15,20,25,30,35,52), right=FALSE, levels=c('15-19','20-24','25-29','30-34','35-50')))]]
	dg[, AGE_AT_MID_D2:= dg[,as.integer(cut(AGE_AT_MID, breaks=c(15,20,25,30,35,52), right=FALSE, levels=c('1','2','3','4','5')))]]
	dg[, MA_OC_AGRO:= as.integer(OCAT=='Agro/House')]	
	dg[, MA_OC_FISH:= as.integer(OCAT=='Fishing')]
	dg[, MA_OC_TRAD:= as.integer(OCAT=='Trading/Shop keeper')]
	dg[, MA_OC_OTH:= as.integer(OCAT%in%c('zother','Bar/waitress','Student','Boda/Trucking'))]	
	dg[, FE_OC_AGRO:= as.integer(OCAT=='Agro/House')]
	dg[, FE_OC_BAR:= as.integer(OCAT=='Bar/waitress')]			
	dg[, FE_OC_TRAD:= as.integer(OCAT=='Trading/Shop keeper')]
	dg[, FE_OC_OTH:= as.integer(OCAT%in%c('zother','Student'))]		
	
	
	
	#	model 1 interaction model 
	#	MALE:HH + VL_HIGH + COMM_TYPE + MALE:NOEDU + MALE:INMIGRANT + MALE:SEXP1YR_G1
	mi.1 <- map2stan(
			alist(
					TRANSMITTER ~ dbinom(1, ptr),
					logit(ptr) <- base + #	is female_in_hh
								  male_in_hh*MALE*HH + 
								  male_extra_hh*MALE*(1-HH) + female_extra_hh*(1-MALE)*(1-HH) +
								  vlhigh*VL_HIGH +
								  comm_fish*COMM_FISH + comm_trad*COMM_TRAD +
								  male_noedu*MALE*NOEDU + female_noedu*(1-MALE)*NOEDU +
								  male_inmigrant*MALE*INMIGRANT + female_inmigrant*(1-MALE)*INMIGRANT +
								  male_sexp*MALE*SEXP1YR_G1 + female_sexp*(1-MALE)*SEXP1YR_G1,
					base ~ dnorm(0,100),										
					c(comm_fish, comm_trad, vlhigh) ~ dnorm(0,10),
					c(male_in_hh, male_extra_hh, female_extra_hh, male_noedu, female_noedu, male_inmigrant, female_inmigrant, male_sexp, female_sexp) ~ dnorm(0,10)													
			),
			data=as.data.frame(dg), 
			start=list(	base=0, comm_fish=0, comm_trad=0, vlhigh=0,
						male_in_hh=0, male_extra_hh=0, female_extra_hh=0, male_noedu=0, female_noedu=0, male_inmigrant=0, female_inmigrant=0, male_sexp=0, female_sexp=0),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	precis(mi.1, prob=0.95)	
	plot(precis(mi.1, prob=0.95))
	
	mi.2 <- map2stan(
			alist(
					TRANSMITTER ~ dbinom(1, ptr),
					logit(ptr) <- male_in_hh*MALE*HH + female_in_hh*(1-MALE)*HH +
							male_extra_hh*MALE*(1-HH) + female_extra_hh*(1-MALE)*(1-HH) +
							vlhigh*VL_HIGH +							
							male_noedu*MALE*NOEDU + female_noedu*(1-MALE)*NOEDU +
							male_inmigrant*MALE*INMIGRANT + female_inmigrant*(1-MALE)*INMIGRANT +
							male_sexp*MALE*SEXP1YR_G1 + female_sexp*(1-MALE)*SEXP1YR_G1,															
					c(vlhigh) ~ dnorm(0,10),
					c(female_in_hh, male_in_hh, male_extra_hh, female_extra_hh, male_noedu, female_noedu, male_inmigrant, female_inmigrant, male_sexp, female_sexp) ~ dnorm(0,10)													
			),
			data=as.data.frame(dg), 
			start=list(	vlhigh=0,
						female_in_hh=0, male_in_hh=0, male_extra_hh=0, female_extra_hh=0, male_noedu=0, female_noedu=0, male_inmigrant=0, female_inmigrant=0, male_sexp=0, female_sexp=0),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	#	mi.2 is good
	mi.4 <- map2stan(
			alist(
					TRANSMITTER ~ dbinom(1, ptr),
					logit(ptr) <- male_in_hh*MALE*HH + female_in_hh*(1-MALE)*HH +
							male_extra_hh*MALE*(1-HH) + female_extra_hh*(1-MALE)*(1-HH) +
							vlhigh*VL_HIGH + 							
							male_noedu*MALE*NOEDU + female_noedu*(1-MALE)*NOEDU +
							male_inmigrant*MALE*INMIGRANT + female_inmigrant*(1-MALE)*INMIGRANT +
							male_sexp*MALE*SEXP1YR_G1 + female_sexp*(1-MALE)*SEXP1YR_G1 +
							# occupation: agro/house is baseline
							male_agro*MALE*MA_OC_AGRO + male_fish*MALE*MA_OC_FISH + male_shopkeeper*MALE*MA_OC_TRAD +  
							female_agro*(1-MALE)*FE_OC_AGRO + female_barworker*(1-MALE)*FE_OC_BAR + female_shopkeeper*(1-MALE)*FE_OC_TRAD,															
					c(vlhigh) ~ dnorm(0,10),
					c(male_fish, male_shopkeeper, male_agro, female_barworker, female_shopkeeper, female_agro) ~ dnorm(0,10),
					c(female_in_hh, male_in_hh, male_extra_hh, female_extra_hh, male_noedu, female_noedu, male_inmigrant, female_inmigrant, male_sexp, female_sexp) ~ dnorm(0,10)													
			),
			data=as.data.frame(dg), 
			start=list(	vlhigh=0,
						male_fish=0, male_shopkeeper=0, male_agro=0, female_barworker=0, female_shopkeeper=0, female_agro=0,
						female_in_hh=0, male_in_hh=0, male_extra_hh=0, female_extra_hh=0, male_noedu=0, female_noedu=0, male_inmigrant=0, female_inmigrant=0, male_sexp=0, female_sexp=0),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	plot(mi.4)
	precis(mi.4, depth=2, prob=0.95)
	plot(precis(mi.4, depth=2, prob=0.95))
	
	
	mi.5 <- map2stan(
			alist(
					TRANSMITTER ~ dbinom(1, ptr),
					logit(ptr) <- male_in_hh*MALE*HH + female_in_hh*(1-MALE)*HH +
							male_extra_hh*MALE*(1-HH) + female_extra_hh*(1-MALE)*(1-HH) +
							vlhigh*VL_HIGH + male_circ*MALE*MA_CIRCUM +							
							male_noedu*MALE*NOEDU + female_noedu*(1-MALE)*NOEDU +
							male_inmigrant*MALE*INMIGRANT + female_inmigrant*(1-MALE)*INMIGRANT +
							male_sexp*MALE*SEXP1YR_G1 + female_sexp*(1-MALE)*SEXP1YR_G1 +
							# occupation: agro/house is baseline
							male_agro*MALE*MA_OC_AGRO + male_fish*MALE*MA_OC_FISH + male_shopkeeper*MALE*MA_OC_TRAD +  
							female_agro*(1-MALE)*FE_OC_AGRO + female_barworker*(1-MALE)*FE_OC_BAR + female_shopkeeper*(1-MALE)*FE_OC_TRAD,															
					c(vlhigh, male_circ) ~ dnorm(0,10),
					c(male_fish, male_shopkeeper, male_agro, female_barworker, female_shopkeeper, female_agro) ~ dnorm(0,10),
					c(female_in_hh, male_in_hh, male_extra_hh, female_extra_hh, male_noedu, female_noedu, male_inmigrant, female_inmigrant, male_sexp, female_sexp) ~ dnorm(0,10)													
			),
			data=as.data.frame(dg), 
			start=list(	vlhigh=0, male_circ=0,
						female_in_hh=0, male_in_hh=0, male_extra_hh=0, female_extra_hh=0, 
						male_fish=0, male_shopkeeper=0, male_agro=0, female_barworker=0, female_shopkeeper=0, female_agro=0,
						male_noedu=0, female_noedu=0, male_inmigrant=0, female_inmigrant=0, male_sexp=0, female_sexp=0),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	precis(mi.5, depth=2, prob=0.95)
	plot(precis(mi.5, depth=2, prob=0.95))
	#	cannot make sense out of male_circ < 0, leave out	
	
	mi.3 <- map2stan(
			alist(
					TRANSMITTER ~ dbinom(1, ptr),
					logit(ptr) <- male_in_hh[AGE_AT_MID_D2]*MALE*HH + female_in_hh[AGE_AT_MID_D2]*(1-MALE)*HH +
							male_extra_hh[AGE_AT_MID_D2]*MALE*(1-HH) + female_extra_hh[AGE_AT_MID_D2]*(1-MALE)*(1-HH) +
							vlhigh*VL_HIGH +							
							male_noedu*MALE*NOEDU + female_noedu*(1-MALE)*NOEDU +
							male_inmigrant*MALE*INMIGRANT + female_inmigrant*(1-MALE)*INMIGRANT +
							male_sexp*MALE*SEXP1YR_G1 + female_sexp*(1-MALE)*SEXP1YR_G1,															
					c(vlhigh) ~ dnorm(0,10),					 
					c(male_noedu, female_noedu, male_inmigrant, female_inmigrant, male_sexp, female_sexp) ~ dnorm(0,10),
					female_in_hh[AGE_AT_MID_D2] ~ dnorm(0, sig_female_in_hh),
					male_in_hh[AGE_AT_MID_D2] ~ dnorm(0, sig_male_in_hh),
					female_extra_hh[AGE_AT_MID_D2] ~ dnorm(0, sig_female_extra_hh),
					male_extra_hh[AGE_AT_MID_D2] ~ dnorm(0, sig_male_extra_hh),
					c(sig_female_in_hh, sig_male_in_hh, sig_female_extra_hh, sig_male_extra_hh) ~ dexp(1)
			),
			data=as.data.frame(dg), 
			start=list(	female_in_hh=rep(0,5), male_in_hh=rep(0,5), male_extra_hh=rep(0,5), female_extra_hh=rep(0,5), 
						sig_female_in_hh=1, sig_male_in_hh=1, sig_female_extra_hh=1, sig_male_extra_hh=1,
						vlhigh=0, male_noedu=0, female_noedu=0, male_inmigrant=0, female_inmigrant=0, male_sexp=0, female_sexp=0),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	plot(mi.3)
	precis(mi.3, depth=2, prob=0.95)
	plot(precis(mi.3, depth=2, prob=0.95))
	
	#	extract stuff from mi.2
	post	<- as.data.table(extract.samples(mi.2))
	post[, MC:= seq_len(nrow(post))]
	post	<- melt(post, id.vars='MC')
	tmp		<- post[, which(!grepl('sig',variable))]
	set(post, tmp, 'value', post[tmp, exp(value)])	
	tmp		<- post[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
								V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), 
					  by=c('variable')]
	dmi2	<- dcast.data.table(tmp, variable~STAT, value.var='V')
	dmi2[, MODEL:= 'no age']
	dmi2[, STAT:= as.character(factor(grepl('extra_hh|in_hh',variable), levels=c(TRUE,FALSE), labels=c('odds','odds ratio')))]
	dmi2	<- dmi2[order(STAT, variable),]
	dmi2[, FACTOR:= seq_len(nrow(dmi2))]
	set(dmi2, NULL, 'FACTOR', dmi2[, factor(FACTOR, levels=FACTOR, labels=variable)])
	#	extract stuff from mi.4
	post	<- as.data.table(extract.samples(mi.4))
	post[, MC:= seq_len(nrow(post))]
	post	<- melt(post, id.vars='MC')
	tmp		<- post[, which(!grepl('sig',variable))]
	set(post, tmp, 'value', post[tmp, exp(value)])	
	tmp		<- post[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
					V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), 
			by=c('variable')]
	dmi4	<- dcast.data.table(tmp, variable~STAT, value.var='V')
	dmi4[, MODEL:= 'no age']
	dmi4[, STAT:= as.character(factor(grepl('extra_hh|in_hh',variable), levels=c(TRUE,FALSE), labels=c('odds','odds ratio')))]
	dmi4	<- dmi4[order(STAT, variable),]
	dmi4[, FACTOR:= seq_len(nrow(dmi4))]
	set(dmi4, NULL, 'FACTOR', dmi4[, factor(FACTOR, levels=FACTOR, labels=variable)])
	
	
	#	extract stuff from mi.3
	post	<- as.data.table(extract.samples(mi.3))
	post[, MC:= seq_len(nrow(post))]
	post	<- melt(post, id.vars='MC')
	post[, AGE_AT_MID_D2:= as.integer(gsub('^.*V([0-9])$','\\1',variable))]
	set(post, NULL, 'variable', post[, gsub('\\.V[0-9]','',variable)])
	post	<- merge(post, unique(subset(dg, select=c(AGE_AT_MID_D,AGE_AT_MID_D2))), by='AGE_AT_MID_D2', all.x=TRUE)
	tmp		<- post[, which(!grepl('sig',variable))]
	set(post, tmp, 'value', post[tmp, exp(value)])
	set(post, post[, which(is.na(AGE_AT_MID_D))], 'AGE_AT_MID_D', '')
	tmp		<- post[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
								V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), 
					  by=c('variable','AGE_AT_MID_D')]
	dmi3	<- dcast.data.table(tmp, variable+AGE_AT_MID_D~STAT, value.var='V')
	dmi3[, MODEL:= 'with age']
	dmi3[, STAT:= as.character(factor(grepl('extra_hh|in_hh',variable), levels=c(TRUE,FALSE), labels=c('odds','odds ratio')))]
	set(dmi3, dmi3[, which(grepl('sig_',variable))], 'STAT', 'xhyperparam')
	dmi3	<- dmi3[order(STAT, variable, AGE_AT_MID_D),]
	dmi3[, FACTOR:= seq_len(nrow(dmi3))]
	set(dmi3, NULL, 'FACTOR', dmi3[, factor(FACTOR, levels=FACTOR, labels=paste0(variable,'_',AGE_AT_MID_D))])
	
	#	save
	save(dg, mi.2, mi.3, mi.4, mi.5, dmi2, dmi3, dmi4, file=paste0(outfile.base,'_interaction_runs.rda'))	
	
	#	make plots
	dp		<- copy(dmi2)	
	dp[, FILL:= '0']
	set(dp, dp[, which(IU<1)], 'FILL', '-1')
	set(dp, dp[, which(CU<1)], 'FILL', '-2')	
	set(dp, dp[, which(IL>1)], 'FILL', '1')
	set(dp, dp[, which(CL>1)], 'FILL', '2')
	ggplot(dp, aes(x=FACTOR)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/6, 1/3,1/2,1,2,3, 6), labels=c('1/6','1/3','1/2','1','2','3','6'), expand=c(0,0)) +
			scale_fill_manual(values=c('-2'='deepskyblue','-1'='cyan','0'='grey80', '1'='darkorange', '2'='red')) +
			coord_flip(ylim=c(1/5,5)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs') +
			facet_grid(STAT~., scales='free', space='free')	
	ggsave(file=paste0(outfile.base,'_interaction_runs_odds_noage.pdf'), w=5, h=3.5)
	
	dp		<- copy(dmi4)	
	dp[, FILL:= '0']
	set(dp, dp[, which(IU<1)], 'FILL', '-1')
	set(dp, dp[, which(CU<1)], 'FILL', '-2')	
	set(dp, dp[, which(IL>1)], 'FILL', '1')
	set(dp, dp[, which(CL>1)], 'FILL', '2')
	ggplot(dp, aes(x=FACTOR)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/6, 1/3,1/2,1,2,3, 6), labels=c('1/6','1/3','1/2','1','2','3','6'), expand=c(0,0)) +
			scale_fill_manual(values=c('-2'='deepskyblue','-1'='cyan','0'='grey80', '1'='darkorange', '2'='red')) +
			coord_flip(ylim=c(1/5,5)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs') +
			facet_grid(STAT~., scales='free', space='free')	
	ggsave(file=paste0(outfile.base,'_interaction_runs_odds_noagewithocc.pdf'), w=5, h=5)
	
	
	dp		<- subset(dmi3, !grepl('sig', FACTOR))	
	dp[, FILL:= '0']
	set(dp, dp[, which(IU<1)], 'FILL', '-1')
	set(dp, dp[, which(CU<1)], 'FILL', '-2')	
	set(dp, dp[, which(IL>1)], 'FILL', '1')
	set(dp, dp[, which(CL>1)], 'FILL', '2')	
	ggplot(dp, aes(x=FACTOR)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +
			scale_fill_manual(values=c('-2'='deepskyblue','-1'='cyan','0'='grey80', '1'='darkorange', '2'='red')) +
			coord_flip(ylim=c(1/20,20)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs') +
			facet_grid(STAT~., scales='free', space='free')
	ggsave(file=paste0(outfile.base,'_interaction_runs_odds_withage.pdf'), w=5, h=8)
	
	dp		<- subset(dmi3, !grepl('sig', FACTOR) & grepl('in_hh', FACTOR))	
	dp[, FILL:= '0']
	set(dp, dp[, which(IU<1)], 'FILL', '-1')
	set(dp, dp[, which(CU<1)], 'FILL', '-2')	
	set(dp, dp[, which(IL>1)], 'FILL', '1')
	set(dp, dp[, which(CL>1)], 'FILL', '2')
	dp[, SEX:= gsub('([a-z]+)_([a-z]+_[a-z]+)','\\1',variable)]
	ggplot(dp, aes(x=AGE_AT_MID_D)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +
			scale_fill_manual(values=c('-2'='deepskyblue','-1'='cyan','0'='grey80', '1'='darkorange', '2'='red')) +
			coord_flip(ylim=c(1/10,10)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs') +
			facet_grid(STAT+variable~., scales='free', space='free')
	ggsave(file=paste0(outfile.base,'_interaction_runs_odds_onluage.pdf'), w=5, h=4)
}

RakaiFull.transmitter.171122.gender.multivariatemodels.stan.with.threshold<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile			<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_transmitterrecipientdata.rda"
	outfile.base	<- gsub('data.rda','',infile)
	load(infile)
	#
	#	prepare variables for STAN
	#		
	dg	<- copy(df)
	#	prepare data for STAN
	dg[, COMM_FISH:= as.integer(COMM_TYPE=='fisherfolk')]
	dg[, COMM_AGR:= as.integer(COMM_TYPE=='agrarian')]
	dg[, COMM_TRAD:= as.integer(COMM_TYPE=='trading')]
	#dg[, COMM_MXD:= as.integer(PAIR_COMM_TYPE=='mixed')]
	dg[, HH:= as.integer(SAMEHH=='same hh')]
	dg[, MALE:= as.integer(SEX=='M')]
	dg[, NOEDU:= as.integer(EDUCAT=='None')]
	dg[, NOEDU_MISS:= as.integer(EDUCAT=='Unknown')]
	dg[, SEXP1YR_G1:= as.integer(SEXP1YR!='1')]
	dg[, VL_HIGH:= as.integer(RECENTVL>=1e5)]
	#dg[, VL_SUPP:= as.integer(RECENTVL<=200)]
	dg[, OC_AGRO:= as.integer(OCAT=='Agro/House')]
	dg[, OC_BAR:= as.integer(OCAT=='Bar/waitress')]
	dg[, OC_BODA:= as.integer(OCAT=='Boda/Trucking')]
	dg[, OC_FISH:= as.integer(OCAT=='Fishing')]
	dg[, OC_STUD:= as.integer(OCAT=='Student')]
	dg[, OC_TRAD:= as.integer(OCAT=='Trading/Shop keeper')]
	dg[, OC_OTH:= as.integer(OCAT%in%c('zother'))]	
	dg[, AGE_AT_MID_C2:= dg[,as.integer(cut(AGE_AT_MID, breaks=c(15,20,25,30,35,40,45,52), right=FALSE, levels=c('1','2','3','4','5','6','7')))]]
	#
	#	look only at transmitters among males
	#
	df	<- subset(dg, MALE==1)		
	#	model 2 with age (exchangeable)
	mma.2 <- map2stan(
			alist(
					TRANSMITTER ~ dbinom(1, ptr),
					logit(ptr) <- base + age[AGE_AT_MID_C2] +   
							# agrarian community is base 
							comm_fish*COMM_FISH + comm_trad*COMM_TRAD + 							
							noedu*NOEDU + sexp*SEXP1YR_G1 +	vlhigh*VL_HIGH + hh*HH + inmigrant*INMIGRANT +							
							# other occupation is baseline
							occ_agro*OC_AGRO + occ_fish*OC_FISH + occ_boda*OC_BODA + occ_stud*OC_STUD + occ_trad*OC_TRAD,
					base ~ dnorm(0,100),
					age[AGE_AT_MID_C2] ~ dnorm(0, age_sig),					
					c(comm_fish, comm_trad) ~ dnorm(0,10),
					c(noedu, sexp, vlhigh, hh, inmigrant) ~ dnorm(0,10),								
					c(occ_agro, occ_boda, occ_stud, occ_trad, occ_fish) ~ dnorm(0,10),
					c(age_sig) ~ dexp(1)
			),
			data=as.data.frame(df), 
			start=list(	base=0, comm_fish=0, comm_trad=0, age=rep(0,7), noedu=0, sexp=0, vlhigh=0, hh=0, inmigrant=0,						
						occ_agro=0, occ_boda=0, occ_stud=0, occ_trad=0, occ_fish=0,
						age_sig=1),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	#plot(precis(mhh.2, prob=0.95, depth=2))
	#pairs(mhh.1)
	post	<- extract.samples(mma.2)
	dp		<- data.table(	MC= seq_along(post$base),
			PI_COMM_AGRO=logistic( post$base ),				
			PI_COMM_FISH=logistic( post$base+post$comm_fish ), 											
			PI_COMM_TRAD=logistic( post$base+post$comm_trad ),										
			PI_EDU_NONE=logistic( post$base+post$noedu ),										
			PI_SEXP1YR_G1=logistic( post$base+post$sexp ),
			PI_VL_HIGH=logistic( post$base+post$vlhigh ),			
			PI_MOBILITY_INMIGRANT=logistic( post$base+post$inmigrant ),
			PI_HH_SAME=logistic( post$base+post$hh ),			
			PI_OCC_AGRO=logistic( post$base+post$occ_agro ),
			#PI_OCC_BAR=logistic( post$base+post$occ_bar ),
			PI_OCC_BODA=logistic( post$base+post$occ_boda ),
			PI_OCC_FISH=logistic( post$base+post$occ_fish ),
			PI_OCC_STUD=logistic( post$base+post$occ_stud ),
			PI_OCC_TRAD=logistic( post$base+post$occ_trad ),
			PI_AGE_1=logistic( post$base+post$age[,1] ),
			PI_AGE_2=logistic( post$base+post$age[,2] ),
			PI_AGE_3=logistic( post$base+post$age[,3] ),
			PI_AGE_4=logistic( post$base+post$age[,4] ),
			PI_AGE_5=logistic( post$base+post$age[,5] ),
			PI_AGE_6=logistic( post$base+post$age[,6] ),
			PI_AGE_7=logistic( post$base+post$age[,7] ),
			OR_COMM_AGRO=exp( post$base ),				
			OR_COMM_FISH=exp( post$base+post$comm_fish ), 											
			OR_COMM_TRAD=exp( post$base+post$comm_trad ),										
			OR_EDU_NONE=exp( post$base+post$noedu ),										
			OR_SEXP1YR_G1=exp( post$base+post$sexp ),
			OR_VL_HIGH=exp( post$base+post$vlhigh ),
			OR_MOBILITY_INMIGRANT=exp( post$base+post$inmigrant ),
			OR_HH_SAME=exp( post$base+post$hh ),			
			OR_OCC_AGRO=exp( post$base+post$occ_agro ),
			#OR_OCC_BAR=exp( post$base+post$occ_bar ),
			OR_OCC_BODA=exp( post$base+post$occ_boda ),
			OR_OCC_FISH=exp( post$base+post$occ_fish ),
			OR_OCC_STUD=exp( post$base+post$occ_stud ),
			OR_OCC_TRAD=exp( post$base+post$occ_trad ),
			OR_AGE_1=exp( post$base+post$age[,1] ),
			OR_AGE_2=exp( post$base+post$age[,2] ),
			OR_AGE_3=exp( post$base+post$age[,3] ),
			OR_AGE_4=exp( post$base+post$age[,4] ),
			OR_AGE_5=exp( post$base+post$age[,5] ),
			OR_AGE_6=exp( post$base+post$age[,6] ),
			OR_AGE_7=exp( post$base+post$age[,7] ),
			ORX_COMM_FISH=exp( post$comm_fish ), 											
			ORX_COMM_TRAD=exp( post$comm_trad ),										
			ORX_EDU_NONE=exp( post$noedu ),										
			ORX_SEXP1YR_G1=exp( post$sexp ),
			ORX_VL_HIGH=exp( post$vlhigh ),
			ORX_MOBILITY_INMIGRANT=exp( post$inmigrant ),
			ORX_HH_SAME=exp( post$hh ),						
			ORX_OCC_AGRO=exp( post$occ_agro ),
			#ORX_OCC_BAR=exp( post$occ_bar ),
			ORX_OCC_BODA=exp( post$occ_boda ),
			ORX_OCC_FISH=exp( post$occ_fish ),
			ORX_OCC_STUD=exp( post$occ_stud ),
			ORX_OCC_TRAD=exp( post$occ_trad ),
			ORX_AGE_1=exp( post$age[,1] ),
			ORX_AGE_2=exp( post$age[,2] ),
			ORX_AGE_3=exp( post$age[,3] ),
			ORX_AGE_4=exp( post$age[,4] ),
			ORX_AGE_5=exp( post$age[,5] ),
			ORX_AGE_6=exp( post$age[,6] ),
			ORX_AGE_7=exp( post$age[,7] )
			)
	dp		<- melt(dp, id.vars='MC')	
	dma.2	<- dp[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
							V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), by='variable']
	dma.2	<- dcast.data.table(dma.2, variable~STAT, value.var='V')
	dma.2[, MODEL:= 'multivariate male with age']
	#
	#	look only at transmitters outside households
	#
	df	<- subset(dg, MALE==0)		
	mfe.2 <- map2stan(
			alist(
					TRANSMITTER ~ dbinom(1, ptr),
					logit(ptr) <- base + age[AGE_AT_MID_C2] +   
							# agrarian community is base 
							comm_fish*COMM_FISH + comm_trad*COMM_TRAD + 							
							noedu*NOEDU + sexp*SEXP1YR_G1 +	vlhigh*VL_HIGH + hh*HH + inmigrant*INMIGRANT +							
							# other occupation is baseline
							occ_agro*OC_AGRO + occ_bar*OC_BAR + occ_stud*OC_STUD + occ_trad*OC_TRAD,
					base ~ dnorm(0,100),
					age[AGE_AT_MID_C2] ~ dnorm(0, age_sig),					
					c(comm_fish, comm_trad) ~ dnorm(0,10),
					c(noedu, sexp, vlhigh, hh, inmigrant) ~ dnorm(0,10),								
					c(occ_agro, occ_bar, occ_stud, occ_trad) ~ dnorm(0,10),
					c(age_sig) ~ dexp(1)
			),
			data=as.data.frame(df), 
			start=list(	base=0, comm_fish=0, comm_trad=0, age=rep(0,7), noedu=0, sexp=0, vlhigh=0, hh=0, inmigrant=0,						
					occ_agro=0, occ_bar=0, occ_stud=0, occ_trad=0,
					age_sig=1),			
			warmup=1e3, iter=1e4, chains=1, cores=4
		)
	#plot(precis(mfe.2, prob=0.95, depth=2))
	#plot(mfe.2)
	post	<- extract.samples(mfe.2)
	dp		<- data.table(	MC= seq_along(post$base),
			PI_COMM_AGRO=logistic( post$base ),				
			PI_COMM_FISH=logistic( post$base+post$comm_fish ), 											
			PI_COMM_TRAD=logistic( post$base+post$comm_trad ),										
			PI_EDU_NONE=logistic( post$base+post$noedu ),										
			PI_SEXP1YR_G1=logistic( post$base+post$sexp ),
			PI_VL_HIGH=logistic( post$base+post$vlhigh ),			
			PI_MOBILITY_INMIGRANT=logistic( post$base+post$inmigrant ),
			PI_HH_SAME=logistic( post$base+post$hh ),			
			PI_OCC_AGRO=logistic( post$base+post$occ_agro ),
			PI_OCC_BAR=logistic( post$base+post$occ_bar ),
			PI_OCC_STUD=logistic( post$base+post$occ_stud ),
			PI_OCC_TRAD=logistic( post$base+post$occ_trad ),
			PI_AGE_1=logistic( post$base+post$age[,1] ),
			PI_AGE_2=logistic( post$base+post$age[,2] ),
			PI_AGE_3=logistic( post$base+post$age[,3] ),
			PI_AGE_4=logistic( post$base+post$age[,4] ),
			PI_AGE_5=logistic( post$base+post$age[,5] ),
			PI_AGE_6=logistic( post$base+post$age[,6] ),
			PI_AGE_7=logistic( post$base+post$age[,7] ),
			OR_COMM_AGRO=exp( post$base ),				
			OR_COMM_FISH=exp( post$base+post$comm_fish ), 											
			OR_COMM_TRAD=exp( post$base+post$comm_trad ),										
			OR_EDU_NONE=exp( post$base+post$noedu ),										
			OR_SEXP1YR_G1=exp( post$base+post$sexp ),
			OR_VL_HIGH=exp( post$base+post$vlhigh ),
			OR_MOBILITY_INMIGRANT=exp( post$base+post$inmigrant ),
			OR_HH_SAME=exp( post$base+post$hh ),			
			OR_OCC_AGRO=exp( post$base+post$occ_agro ),
			OR_OCC_BAR=exp( post$base+post$occ_bar ),
			OR_OCC_STUD=exp( post$base+post$occ_stud ),
			OR_OCC_TRAD=exp( post$base+post$occ_trad ),
			OR_AGE_1=exp( post$base+post$age[,1] ),
			OR_AGE_2=exp( post$base+post$age[,2] ),
			OR_AGE_3=exp( post$base+post$age[,3] ),
			OR_AGE_4=exp( post$base+post$age[,4] ),
			OR_AGE_5=exp( post$base+post$age[,5] ),
			OR_AGE_6=exp( post$base+post$age[,6] ),
			OR_AGE_7=exp( post$base+post$age[,7] ),
			ORX_COMM_FISH=exp( post$comm_fish ), 											
			ORX_COMM_TRAD=exp( post$comm_trad ),										
			ORX_EDU_NONE=exp( post$noedu ),										
			ORX_SEXP1YR_G1=exp( post$sexp ),
			ORX_VL_HIGH=exp( post$vlhigh ),
			ORX_MOBILITY_INMIGRANT=exp( post$inmigrant ),
			ORX_HH_SAME=exp( post$hh ),						
			ORX_OCC_AGRO=exp( post$occ_agro ),
			ORX_OCC_BAR=exp( post$occ_bar ),
			ORX_OCC_STUD=exp( post$occ_stud ),
			ORX_OCC_TRAD=exp( post$occ_trad ),
			ORX_AGE_1=exp( post$age[,1] ),
			ORX_AGE_2=exp( post$age[,2] ),
			ORX_AGE_3=exp( post$age[,3] ),
			ORX_AGE_4=exp( post$age[,4] ),
			ORX_AGE_5=exp( post$age[,5] ),
			ORX_AGE_6=exp( post$age[,6] ),
			ORX_AGE_7=exp( post$age[,7] )
			)
	dp		<- melt(dp, id.vars='MC')	
	dfe.2	<- dp[, list( 	STAT=c('MED','CL','IL','IU','CU'), 
					V= quantile(value, prob=c(0.5,0.025,0.25,0.75,0.975))), by='variable']
	dfe.2	<- dcast.data.table(dfe.2, variable~STAT, value.var='V')
	dfe.2[, MODEL:= 'multivariate female with age']
	
	
	ds		<- rbind(dfe.2, dma.2)
	ds[, LABEL:= paste0(round(MED, d=2), '\n[', round(CL, d=2),'-', round( CU, d=2),']')]
	ds[, STAT:= factor(gsub('^([^_]+)_.*','\\1',variable), levels=c('PI','OR','ORX'), labels=c('probability transmitter','odds transmitter','odds ratio'))]
	ds[, FACTOR:=gsub('^([^_]+)_([^_]+)_([^_]+)','\\3',variable)]
	ds[, MOFA:=gsub('^([^_]+)_([^_]+)_([^_]+)','\\2-\\3',variable)]		
	#ds		<- subset(ds, STAT=='odds ratio')
	set(ds, NULL, 'MOFA2', ds[, factor(MOFA, levels=rev(c(
											"AGE-1","AGE-2","AGE-3","AGE-4","AGE-5","AGE-6","AGE-7",											
											"COMM-AGRO","COMM-FISH","COMM-TRAD","EDU-NONE","SEXP1YR-G1","VL-HIGH","MOBILITY-INMIGRANT","HH-SAME",
											"OCC-AGRO","OCC-BAR","OCC-BODA","OCC-FISH","OCC-STUD","OCC-TRAD"											
									)), labels=rev(c("age 15-19","age 20-24","age 25-29","age 30-34","age 35-39","age 40-44","age 45-50",											
											"Agrarian community", "Fishing site", "Trading centre",
											"No primary education","More than 1 sex partner in last year", "Viral load above 100,000 cps/ml","Inmigrant","Same household",											
											"Primary occupation: agricultural/house", "Primary occupation: bar worker/waitress","Primary occupation: Boda/trucking",											
											"Primary occupation: fishing", "Primary occupation: student", "Primary occupation: trading/shopkeeper"
									)))])
	setkey(ds, MOFA)	
	#
	#	save
	#
	save(dg, ds, dfe.2, dma.2, mma.2, mfe.2, file=paste0(outfile.base,'_gender_runs.rda'))
	
	dp		<- subset(ds, STAT=='odds transmitter')
	dp[, FILL:= '0']
	set(dp, dp[, which(IL>1 | IU<1)], 'FILL', '1')
	set(dp, dp[, which(CL>1 | CU<1)], 'FILL', '2')
	ggplot(subset(dp, MODEL=='multivariate hh with age'), aes(x=MOFA2)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +
			scale_fill_manual(values=c('0'='white', '1'='orange', '2'='red')) +
			coord_flip(ylim=c(1/20,20)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs')
	ggsave(file=paste0(outfile.base,'_odds_samehh_withage.pdf'), w=6, h=8)
	ggplot(subset(dp, MODEL=='multivariate outside hh with age'), aes(x=MOFA2)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +
			scale_fill_manual(values=c('0'='white', '1'='orange', '2'='red')) +
			coord_flip(ylim=c(1/20,20)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs')
	ggsave(file=paste0(outfile.base,'_odds_diffhh_withage.pdf'), w=6, h=8)
	
	dp		<- subset(ds, STAT=='odds ratio')
	dp[, FILL:= '0']
	set(dp, dp[, which(IL>1 | IU<1)], 'FILL', '1')
	set(dp, dp[, which(CL>1 | CU<1)], 'FILL', '2')	
	ggplot(subset(dp, MODEL=='multivariate hh no age'), aes(x=MOFA2)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +
			scale_fill_manual(values=c('0'='white', '1'='orange', '2'='red')) +
			coord_flip(ylim=c(1/20,20)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds ratio for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs')
	ggsave(file=paste0(outfile.base,'_odds_samehh_noage.pdf'), w=6, h=5)
	ggplot(subset(dp, MODEL=='multivariate outside hh no age'), aes(x=MOFA2)) +
			geom_hline(yintercept=1, colour='grey50', lwd=1) +
			geom_boxplot(aes(middle=MED, lower=IL, upper=IU, ymin=CL, ymax=CU, fill=FILL), stat='identity') +
			theme_bw() +
			scale_y_continuous(trans='log', breaks=c(1/20, 1/10, 1/6, 1/3,1/2,1,2,3, 6,10,20), labels=c('1/20','1/10','1/6','1/3','1/2','1','2','3','6','10', '20'), expand=c(0,0)) +
			scale_fill_manual(values=c('0'='white', '1'='orange', '2'='red')) +
			coord_flip(ylim=c(1/20,20)) +
			guides(fill=FALSE) +
			labs(x='Individual characteristics\n', y='\nOdds ratio for being a transmitter vs. recipient\nin phylogenetically reconstructed transmission pairs')
	ggsave(file=paste0(outfile.base,'_odds_diffhh_noage.pdf'), w=6, h=5)
}

RakaiFull.transmitter.180423.mxmy.stan.oddsratio.seqsampling<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)		#rdirichlet
	require(rethinking)	# STAN wrapper
	
	library(boot) 
	library(reshape) 
	ds <- data.frame(Person = c(rep("A", 20), rep("B", 10)), Success = 
					c(rbinom(20, 1, 0.25), rbinom(10, 1, 0.75))) 
	da <- cast(Person ~ ., data = ds, value = "Success", fun = 
					list(mean, length)) 
	
	m0 <- glm(Success ~ 1, data = ds, family = binomial) 
	m1 <- glm(mean ~ 1, data = da, family = binomial, weights=length) 
	summary(m0)
	summary(m1)
	#	the coefficients and sd(coef) are the same in m0 and m1
	inv.logit(coef(m0)) 
	inv.logit(coef(m1)) 
	
	
	indir				<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run"
	#
	#	load inferred transmissions
	#	
	infile				<- file.path(indir,"todi_pairs_171122_cl25_d50_prior23_min30_transmitterrecipientdata.rda")
	outfile.base		<- gsub('data.rda','',infile)
	load(infile)
	setnames(df, c('LINK_MF'), c('MALE_IS_TR'))
	set(df, NULL, c('FEMALE_INMIGRATE_1YR','FEMALE_INMIGRATE_2YR','MALE_INMIGRATE_1YR','MALE_INMIGRATE_2YR'), NULL)	
	infile.inference	<- file.path(indir,"todi_pairs_180522_cl25_d50_prior23_min30_phylogeography_data_with_inmigrants.rda")		
	load(infile.inference)
	tmp					<- subset(rtpdm, select=c(FEMALE_RID, MALE_RID, PTY_RUN, FEMALE_COMM_NUM_A, MALE_COMM_NUM_A, FEMALE_INMIGRATE_1YR, MALE_INMIGRATE_1YR, FEMALE_INMIGRATE_2YR, MALE_INMIGRATE_2YR))
	df					<- merge(df, tmp, by=c('MALE_RID','FEMALE_RID','PTY_RUN'))
	#
	#	build model variables
	#
	df[, MALE_CLOSEST_VL_1YR_G10E3:= as.integer(MALE_CLOSEST_VL_1YR>10e3)]
	df[, FEMALE_CLOSEST_VL_1YR_G10E3:= as.integer(FEMALE_CLOSEST_VL_1YR>10e3)]
	set(df, df[, which(FEMALE_EDUCAT=='Unknown')], 'FEMALE_EDUCAT', NA_character_)
	set(df, df[, which(MALE_EDUCAT=='Unknown')], 'MALE_EDUCAT', NA_character_)	
	df[, FE_NOEDU:= as.integer(FEMALE_EDUCAT=='None')]	
	df[, MA_NOEDU:= as.integer(MALE_EDUCAT=='None')]
	set(df, df[, which(FEMALE_SEXP1YR=='Unknown')], 'FEMALE_SEXP1YR', NA_character_)
	set(df, df[, which(MALE_SEXP1YR=='Unknown')], 'MALE_SEXP1YR', NA_character_)	
	df[, FE_SEXP1YR_G1:= as.integer(FEMALE_SEXP1YR!='1')]
	df[, MA_SEXP1YR_G1:= as.integer(MALE_SEXP1YR!='1')]	
	set(df, NULL, 'FISH', df[, as.integer(PAIR_COMM_TYPE=='fisherfolk')])	
	df[, MALE_AGE_AT_MID_C:= df[,as.character(cut(MALE_AGE_AT_MID, breaks=c(15,25,30,35,52), right=FALSE, labels=c('15-24','25-29','30-34','35-50')))]]
	df[, MALE_AGE_AT_MID_C2:= as.integer(as.factor(MALE_AGE_AT_MID_C))]
	df[, FEMALE_AGE_AT_MID_C:= df[,as.character(cut(FEMALE_AGE_AT_MID, breaks=c(15,20,25,30,35,52), right=FALSE, labels=c('15-19','20-24','25-29','30-34','35-50')))]]
	df[, FEMALE_AGE_AT_MID_C2:= as.integer(as.factor(FEMALE_AGE_AT_MID_C))]
	df[, MALE_AGE_24:= as.integer(MALE_AGE_AT_MID_C=='15-24')]
	df[, MALE_AGE_29:= as.integer(MALE_AGE_AT_MID_C=='25-29')]
	df[, MALE_AGE_34:= as.integer(MALE_AGE_AT_MID_C=='30-34')]
	df[, FEMALE_AGE_19:= as.integer(FEMALE_AGE_AT_MID_C=='15-19')]
	df[, FEMALE_AGE_24:= as.integer(FEMALE_AGE_AT_MID_C=='20-24')]
	df[, FEMALE_AGE_29:= as.integer(FEMALE_AGE_AT_MID_C=='25-29')]
	df[, FEMALE_AGE_34:= as.integer(FEMALE_AGE_AT_MID_C=='30-34')]	
	#	binarize covariates for sampling models
	df[, MALE_AGE_YOUNG:= as.integer(MALE_AGE_AT_MID_C=='15-24')]
	df[, MALE_AGE_MID:= as.integer(MALE_AGE_AT_MID_C=='25-34')]
	df[, MALE_SEX2:= as.integer(MALE_SEX=='M')]
	df[, MALE_COMM_TYPE_F:= as.integer(substr(MALE_COMM_NUM_A,1,1)=='f')]
	df[, MALE_COMM_TYPE_T:= as.integer(substr(MALE_COMM_NUM_A,1,1)=='t')]
	df[, MALE_INMIGRATE_1YR:= as.integer(grepl('inmigrant',MALE_INMIGRATE_1YR))]
	df[, MALE_INMIGRATE_2YR:= as.integer(grepl('inmigrant',MALE_INMIGRATE_2YR))]
	df[, MALE_INMIGRANT:=MALE_INMIGRATE_2YR]
	df[, FEMALE_AGE_YOUNG:= as.integer(FEMALE_AGE_AT_MID_C=='15-24')]
	df[, FEMALE_AGE_MID:= as.integer(FEMALE_AGE_AT_MID_C=='25-34')]
	df[, FEMALE_SEX2:= as.integer(FEMALE_SEX=='M')]
	df[, FEMALE_COMM_TYPE_F:= as.integer(substr(FEMALE_COMM_NUM_A,1,1)=='f')]
	df[, FEMALE_COMM_TYPE_T:= as.integer(substr(FEMALE_COMM_NUM_A,1,1)=='t')]
	df[, FEMALE_INMIGRATE_1YR:= as.integer(grepl('inmigrant',FEMALE_INMIGRATE_1YR))]
	df[, FEMALE_INMIGRATE_2YR:= as.integer(grepl('inmigrant',FEMALE_INMIGRATE_2YR))]
	df[, FEMALE_INMIGRANT:=FEMALE_INMIGRATE_2YR]
	
	#infile.participation	<- file.path(indir,"participation_differences_180322_logisticmodels.rda")	
	#load(infile.participation)
	#mp1 <- mp2 <- mp4 <- NULL
	#	define community number to match STAN output for participation model
	#tmp		<- unique(subset(dg, select=c(COMM_NUM_A,COMM_NUM_B)))
	#setnames(tmp, colnames(tmp), paste0('MALE_',colnames(tmp)))
	#df		<- merge(df, tmp, by='MALE_COMM_NUM_A')
	#setnames(tmp, colnames(tmp), gsub('MALE_','FEMALE_',colnames(tmp)))
	#df		<- merge(df, tmp, by='FEMALE_COMM_NUM_A')
	#
	#	define community number to match STAN output for sequencing model
	infile.sequencing		<- file.path(indir,"sequencing_differences_180322_logisticmodels.rda")
	load(infile.sequencing)
	ms2 	<- ms3 <- ms4 <- NULL
	tmp		<- unique(subset(dg, select=c(COMM_NUM_A,COMM_NUM_B)))
	setnames(tmp, 'COMM_NUM_B', 'COMM_NUM_B2')
	setnames(tmp, colnames(tmp), paste0('MALE_',colnames(tmp)))
	df		<- merge(df, tmp, by='MALE_COMM_NUM_A')
	setnames(tmp, colnames(tmp), gsub('MALE_','FEMALE_',colnames(tmp)))
	df		<- merge(df, tmp, by='FEMALE_COMM_NUM_A')
	#	extract posterior median	
	#mps			<- extract.samples(mp3)
	mss		<- extract.samples(ms1)
	for(x in c("a","sig_comm","trading","fishing","inmigrant","male","young","midage"))
		mss[[x]]	<- median(mss[[x]])
	mss[['comm']]<- apply(mss[['comm']], 2, median)
	#	calculate sequencing probabilities on logit scale
	df[, MALE_SEQ:= with(mss, a + comm[MALE_COMM_NUM_B2] + trading*MALE_COMM_TYPE_T + fishing*MALE_COMM_TYPE_F + inmigrant*MALE_INMIGRANT + male*MALE_SEX2 + young*MALE_AGE_YOUNG + midage*MALE_AGE_MID)]
	df[, FEMALE_SEQ:= with(mss, a + comm[FEMALE_COMM_NUM_B2] + trading*FEMALE_COMM_TYPE_T + fishing*FEMALE_COMM_TYPE_F + inmigrant*FEMALE_INMIGRANT + male*FEMALE_SEX2 + young*FEMALE_AGE_YOUNG + midage*FEMALE_AGE_MID)]
	#	calculate probability that pair was sequenced
	df[, PAIR_SEQ:= exp(-(log1p(exp(-MALE_SEQ))+log1p(exp(-FEMALE_SEQ))))]
	df[, INV_PAIR_SEQ:= round(1/PAIR_SEQ)]
	df[, MALE_IS_TR_WEIGHTED:= as.integer(MALE_IS_TR*INV_PAIR_SEQ)]
	df[, DUMMY:= seq_len(nrow(df))]
	
	#
	#	complete case analysis
	#
	dh	<- subset(df, select=c(	PAIRID, MALE_IS_TR, MALE_CLOSEST_VL_1YR_G10E3, FEMALE_CLOSEST_VL_1YR_G10E3, FE_SEXP1YR_G1, MA_SEXP1YR_G1, FE_NOEDU, MA_NOEDU, MALE_CIRCUM,
					FEMALE_INMIGRATE_1YR, MALE_INMIGRATE_1YR, FISH, SAME_HH, SAME_COMM,										
					FEMALE_GUD, MALE_GUD, FEMALE_PREGNANT, MALE_ALCEVER_LASTYEAR, FEMALE_ALCEVER_LASTYEAR, MALE_CONDOM_NEVER, FEMALE_CONDOM_NEVER))	
	dh[, HAS_MISSING_COL:= apply(as.matrix(dh), 1, function(x) any(is.na(x)) )]
	dh	<- subset(dh, !HAS_MISSING_COL)
	#	103 data points
	mt.6cc 	<- map2stan(
			alist(
					MALE_IS_TR ~ dbinom(1, ptr),
					logit(ptr) <- base + male_vl_above10e3*MALE_CLOSEST_VL_1YR_G10E3 + female_vl_above10e3*FEMALE_CLOSEST_VL_1YR_G10E3 +
							female_edu_none*FE_NOEDU + male_edu_none*MA_NOEDU + female_sexp1yr_gr1*FE_SEXP1YR_G1 + male_sexp1yr_gr1*MA_SEXP1YR_G1 +
							male_inmigrate_lastyear*MALE_INMIGRATE_1YR + female_inmigrate_lastyear*FEMALE_INMIGRATE_1YR +							
							female_gud*FEMALE_GUD + male_gud*MALE_GUD + male_alcever_lastyear*MALE_ALCEVER_LASTYEAR + female_alcever_lastyear*FEMALE_ALCEVER_LASTYEAR + male_condom_never*MALE_CONDOM_NEVER + female_condom_never*FEMALE_CONDOM_NEVER +
							female_pregnant*FEMALE_PREGNANT + male_circum*MALE_CIRCUM +
							pair_commtype_notfish*(1-FISH) + pair_withincomm_yes*SAME_COMM + pair_withinhh_yes*SAME_HH,
					base ~ dnorm(0,100),
					c(male_vl_above10e3, female_vl_above10e3, female_edu_none, male_edu_none, female_sexp1yr_gr1, male_sexp1yr_gr1) ~ dnorm(0,1),
					c(male_inmigrate_lastyear, female_inmigrate_lastyear, pair_commtype_notfish, pair_withincomm_yes, pair_withinhh_yes) ~ dnorm(0,1),					
					c(female_gud, male_gud, male_alcever_lastyear, female_alcever_lastyear, male_condom_never, female_condom_never, female_pregnant, male_circum) ~ dnorm(0,1)
			),
			data=as.data.frame(dh), 
			start=list(	base=0, male_vl_above10e3=0, female_vl_above10e3=0, female_edu_none=0, male_edu_none=0, female_sexp1yr_gr1=0, male_sexp1yr_gr1=0,
					male_inmigrate_lastyear=0, female_inmigrate_lastyear=0, pair_commtype_notfish=0, pair_withincomm_yes=0, pair_withinhh_yes=0,					
					female_gud=0, male_gud=0, male_alcever_lastyear=0, female_alcever_lastyear=0, male_condom_never=0, female_condom_never=0, female_pregnant=0, male_circum=0),			
			warmup=2e3, iter=20e3, chains=1, cores=1)
	precis(mt.6cc, prob=0.8)
	#	extract number of individuals (n) and proportion (freq)
	ds	<- subset(melt(dh, id.vars=c('PAIRID','MALE_IS_TR')), !is.na(value))
	tmp	<- ds[, list(FREQ_MALE_TR= mean(MALE_IS_TR)), by=c('variable','value')]
	ds	<- ds[, list(yes= length(which(value==1)), no= length(which(value==0)) ), by='variable']
	ds	<- melt(ds, id.vars=c('variable'), variable.name='GROUP', value.name='N')	
	set(tmp, NULL, 'value', tmp[, as.character(factor(value, levels=c(0,1), labels=c('no','yes')))])
	setnames(tmp, 'value','GROUP')
	ds	<- merge(ds, tmp, by=c('variable','GROUP'))	
	set(ds, NULL, 'variable', ds[, gsub('FEMALE_CLOSEST_VL_1YR_G10E3','FEMALE_VL_ABOVE10E3',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MALE_CLOSEST_VL_1YR_G10E3','MALE_VL_ABOVE10E3',variable)])	
	set(ds, NULL, 'variable', ds[, gsub('FE_NOEDU','FEMALE_EDU_NONE',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MA_NOEDU','MALE_EDU_NONE',variable)])
	set(ds, NULL, 'variable', ds[, gsub('FE_SEXP1YR_G1','FEMALE_SEXP1YR_GR1',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MA_SEXP1YR_G1','MALE_SEXP1YR_GR1',variable)])	
	set(ds, NULL, 'variable', ds[, gsub('FEMALE_INMIGRATE_1YR','FEMALE_INMIGRATE_LASTYEAR',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MALE_INMIGRATE_1YR','MALE_INMIGRATE_LASTYEAR',variable)])
	set(ds, NULL, 'variable', ds[, gsub('SAME_HH','PAIR_WITHINHH_YES',variable)])
	set(ds, NULL, 'variable', ds[, gsub('SAME_COMM','PAIR_WITHINCOMM_YES',variable)])	
	tmp	<- ds[, which(variable=='FISH')]
	set(ds, tmp, 'GROUP', ds[tmp, gsub('xx','yes',gsub('yes','no',gsub('no','xx',GROUP)))])	
	set(ds, NULL, 'variable', ds[, gsub('FISH','PAIR_COMMTYPE_NOTFISH',variable)])		
	#	extract odds ratio and prob odds ratio > 1
	post	<- as.data.table(extract.samples(mt.6cc))
	post[, MC:= seq_len(nrow(post))]	
	set(post, NULL, colnames(post)[grepl('impute|_mean|_sigma|base', colnames(post))], NULL)
	setnames(post, colnames(post), toupper(colnames(post)))
	post	<- melt(post, id.vars='MC', value.name='COEFF')	
	set(post, NULL, 'OR_MF', post[, exp(COEFF)])
	set(post, NULL, 'OR_FM', post[, exp(-COEFF)])
	tmp		<- post[, list( 	GROUP='yes',
					STAT=c('OR_MF_MED','OR_MF_CL','OR_MF_IL','OR_MF_IU','OR_MF_CU'), 
					V= quantile(OR_MF, prob=c(0.5,0.025,0.1,0.9,0.975))), 
			by=c('variable')]						
	tmp		<- dcast.data.table(tmp, variable+GROUP~STAT, value.var='V')
	ds		<- merge(ds, tmp, by=c('variable','GROUP'), all.x=1)
	tmp		<- post[, list( 	GROUP='yes',
					STAT=c('OR_FM_MED','OR_FM_CL','OR_FM_IL','OR_FM_IU','OR_FM_CU'), 
					V= quantile(OR_FM, prob=c(0.5,0.025,0.1,0.9,0.975))), 
			by=c('variable')]						
	tmp		<- dcast.data.table(tmp, variable+GROUP~STAT, value.var='V')
	ds		<- merge(ds, tmp, by=c('variable','GROUP'), all.x=1)
	tmp		<- post[, list(GROUP='yes', PROB_OR_TAIL=max(mean(OR_MF>1), mean(OR_MF<1))), by='variable']
	ds		<- merge(ds, tmp, by=c('variable','GROUP'), all.x=1)
	#ds[, unique(variable)]
	set(ds, NULL, 'variable', ds[, factor(variable, levels=c('MALE_IS_TR',
									'MALE_VL_ABOVE10E3','MALE_SEXP1YR_GR1','MALE_EDU_NONE','MALE_INMIGRATE_LASTYEAR','MALE_GUD','MALE_CONDOM_NEVER','MALE_ALCEVER_LASTYEAR','MALE_CIRCUM',
									# 'MALE_AGE_24','MALE_AGE_29','MALE_AGE_34',
									'FEMALE_VL_ABOVE10E3','FEMALE_SEXP1YR_GR1','FEMALE_EDU_NONE','FEMALE_INMIGRATE_LASTYEAR','FEMALE_GUD','FEMALE_CONDOM_NEVER','FEMALE_ALCEVER_LASTYEAR','FEMALE_PREGNANT',
									# 'FEMALE_AGE_19','FEMALE_AGE_24','FEMALE_AGE_29','FEMALE_AGE_34',	
									'PAIR_COMMTYPE_NOTFISH','PAIR_WITHINCOMM_YES','PAIR_WITHINHH_YES'))])	
	setkey(ds, variable, GROUP)	
	ds[, PRETTY_FREQ:= paste0(round(FREQ_MALE_TR*100, d=1), '%')]
	ds[, PRETTY_OR_MF:= paste0(round(OR_MF_MED, d=2),' [',round(OR_MF_IL, d=2),'-',round(OR_MF_IU, d=2),']')]
	ds[, PRETTY_OR_FM:= paste0(round(OR_FM_MED, d=2),' [',round(OR_FM_IL, d=2),'-',round(OR_FM_IU, d=2),']')]
	set(ds, ds[, which(is.na(OR_MF_MED))], c('PRETTY_OR','PRETTY_OR_MF','PRETTY_OR_FM'), '-')
	ans	<- subset(ds, select=c(variable, GROUP, N, PRETTY_FREQ, PRETTY_OR_MF, PRETTY_OR_FM))	
	write.csv(ans, file=paste0(outfile.base,'_stanmodels_mt6cc.csv'))
	
	
	#
	#	inverse probability weighting
	#
	dh		<- df[, lapply(.SD, rep, times=INV_PAIR_SEQ), by='DUMMY']
	dh		<- subset(dh, select=c(	PAIRID, MALE_IS_TR, MALE_CLOSEST_VL_1YR_G10E3, FEMALE_CLOSEST_VL_1YR_G10E3, FE_SEXP1YR_G1, MA_SEXP1YR_G1, FE_NOEDU, MA_NOEDU, MALE_CIRCUM,
					FEMALE_INMIGRATE_1YR, MALE_INMIGRATE_1YR, FISH, SAME_HH, SAME_COMM,										
					FEMALE_GUD, MALE_GUD, FEMALE_PREGNANT, MALE_ALCEVER_LASTYEAR, FEMALE_ALCEVER_LASTYEAR, MALE_CONDOM_NEVER, FEMALE_CONDOM_NEVER))		
	mt.6r 	<- map2stan(
			alist(
					MALE_IS_TR ~ dbinom(1, ptr),
					logit(ptr) <- base + male_vl_above10e3*MALE_CLOSEST_VL_1YR_G10E3 + female_vl_above10e3*FEMALE_CLOSEST_VL_1YR_G10E3 +
							female_edu_none*FE_NOEDU + male_edu_none*MA_NOEDU + female_sexp1yr_gr1*FE_SEXP1YR_G1 + male_sexp1yr_gr1*MA_SEXP1YR_G1 +
							male_inmigrate_lastyear*MALE_INMIGRATE_1YR + female_inmigrate_lastyear*FEMALE_INMIGRATE_1YR +							
							female_gud*FEMALE_GUD + male_gud*MALE_GUD + male_alcever_lastyear*MALE_ALCEVER_LASTYEAR + female_alcever_lastyear*FEMALE_ALCEVER_LASTYEAR + male_condom_never*MALE_CONDOM_NEVER + female_condom_never*FEMALE_CONDOM_NEVER +
							female_pregnant*FEMALE_PREGNANT + male_circum*MALE_CIRCUM +
							pair_commtype_notfish*(1-FISH) + pair_withincomm_yes*SAME_COMM + pair_withinhh_yes*SAME_HH,
					MALE_CLOSEST_VL_1YR_G10E3 <- dnorm(vl_mean, vl_sigma),
					FEMALE_CLOSEST_VL_1YR_G10E3 <- dnorm(vl_mean, vl_sigma),
					FE_SEXP1YR_G1 <- dnorm(sexp_mean, sexp_sigma),
					MA_SEXP1YR_G1 <- dnorm(sexp_mean, sexp_sigma),
					FE_NOEDU <- dnorm(edu_mean, edu_sigma),
					MA_NOEDU <- dnorm(edu_mean, edu_sigma),
					FEMALE_PREGNANT <- dnorm(miss_mean, miss_sigma),
					MALE_CIRCUM <- dnorm(miss_mean, miss_sigma), 
					MALE_ALCEVER_LASTYEAR <- dnorm(alc_mean, alc_sigma), 
					FEMALE_ALCEVER_LASTYEAR <- dnorm(alc_mean, alc_sigma),
					MALE_CONDOM_NEVER <- dnorm(cond_mean, cond_sigma),
					FEMALE_CONDOM_NEVER <- dnorm(cond_mean, cond_sigma),
					base ~ dnorm(0,100),
					c(vl_mean, edu_mean, sexp_mean, miss_mean, alc_mean, cond_mean) ~ dnorm(0.5, 1),
					c(vl_sigma, edu_sigma, sexp_sigma, miss_sigma, alc_sigma, cond_sigma) ~ dcauchy(0,1),
					c(male_vl_above10e3, female_vl_above10e3, female_edu_none, male_edu_none, female_sexp1yr_gr1, male_sexp1yr_gr1) ~ dnorm(0,1),
					c(male_inmigrate_lastyear, female_inmigrate_lastyear, pair_commtype_notfish, pair_withincomm_yes, pair_withinhh_yes) ~ dnorm(0,1),					
					c(female_gud, male_gud, male_alcever_lastyear, female_alcever_lastyear, male_condom_never, female_condom_never, female_pregnant, male_circum) ~ dnorm(0,1)
			),
			data=as.data.frame(dh), 
			start=list(	base=0, male_vl_above10e3=0, female_vl_above10e3=0, female_edu_none=0, male_edu_none=0, female_sexp1yr_gr1=0, male_sexp1yr_gr1=0,
					male_inmigrate_lastyear=0, female_inmigrate_lastyear=0, pair_commtype_notfish=0, pair_withincomm_yes=0, pair_withinhh_yes=0,					
					female_gud=0, male_gud=0, male_alcever_lastyear=0, female_alcever_lastyear=0, male_condom_never=0, female_condom_never=0, female_pregnant=0, male_circum=0,
					vl_mean=0.5, edu_mean=0.5, sexp_mean=0.5, miss_mean=0.5, alc_mean=0.5, cond_mean=0.5, vl_sigma=1, edu_sigma=1, sexp_sigma=1, miss_sigma=1, alc_sigma=1, cond_sigma=1),			
			warmup=2e3, iter=20e3, chains=1, cores=1)
	precis(mt.6r, prob=0.8)
	
	
	#	extract number of individuals (n) and proportion (freq)
	ds	<- subset(melt(dh, id.vars=c('PAIRID','MALE_IS_TR')), !is.na(value))
	tmp	<- ds[, list(FREQ_MALE_TR= mean(MALE_IS_TR)), by=c('variable','value')]
	ds	<- ds[, list(yes= length(which(value==1)), no= length(which(value==0)) ), by='variable']
	ds	<- melt(ds, id.vars=c('variable'), variable.name='GROUP', value.name='N')	
	set(tmp, NULL, 'value', tmp[, as.character(factor(value, levels=c(0,1), labels=c('no','yes')))])
	setnames(tmp, 'value','GROUP')
	ds	<- merge(ds, tmp, by=c('variable','GROUP'))	
	set(ds, NULL, 'variable', ds[, gsub('FEMALE_CLOSEST_VL_1YR_G10E3','FEMALE_VL_ABOVE10E3',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MALE_CLOSEST_VL_1YR_G10E3','MALE_VL_ABOVE10E3',variable)])	
	set(ds, NULL, 'variable', ds[, gsub('FE_NOEDU','FEMALE_EDU_NONE',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MA_NOEDU','MALE_EDU_NONE',variable)])
	set(ds, NULL, 'variable', ds[, gsub('FE_SEXP1YR_G1','FEMALE_SEXP1YR_GR1',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MA_SEXP1YR_G1','MALE_SEXP1YR_GR1',variable)])	
	set(ds, NULL, 'variable', ds[, gsub('FEMALE_INMIGRATE_1YR','FEMALE_INMIGRATE_LASTYEAR',variable)])
	set(ds, NULL, 'variable', ds[, gsub('MALE_INMIGRATE_1YR','MALE_INMIGRATE_LASTYEAR',variable)])
	set(ds, NULL, 'variable', ds[, gsub('SAME_HH','PAIR_WITHINHH_YES',variable)])
	set(ds, NULL, 'variable', ds[, gsub('SAME_COMM','PAIR_WITHINCOMM_YES',variable)])	
	tmp	<- ds[, which(variable=='FISH')]
	set(ds, tmp, 'GROUP', ds[tmp, gsub('xx','yes',gsub('yes','no',gsub('no','xx',GROUP)))])	
	set(ds, NULL, 'variable', ds[, gsub('FISH','PAIR_COMMTYPE_NOTFISH',variable)])	
	
	#	extract odds ratio and prob odds ratio > 1
	post	<- as.data.table(extract.samples(mt.6r))
	post[, MC:= seq_len(nrow(post))]	
	set(post, NULL, colnames(post)[grepl('impute|_mean|_sigma|base', colnames(post))], NULL)
	setnames(post, colnames(post), toupper(colnames(post)))
	post	<- melt(post, id.vars='MC', value.name='COEFF')	
	set(post, NULL, 'OR_MF', post[, exp(COEFF)])
	set(post, NULL, 'OR_FM', post[, exp(-COEFF)])
	tmp		<- post[, list( 	GROUP='yes',
					STAT=c('OR_MF_MED','OR_MF_CL','OR_MF_IL','OR_MF_IU','OR_MF_CU'), 
					V= quantile(OR_MF, prob=c(0.5,0.025,0.1,0.9,0.975))), 
			by=c('variable')]						
	tmp		<- dcast.data.table(tmp, variable+GROUP~STAT, value.var='V')
	ds		<- merge(ds, tmp, by=c('variable','GROUP'), all.x=1)
	tmp		<- post[, list( 	GROUP='yes',
					STAT=c('OR_FM_MED','OR_FM_CL','OR_FM_IL','OR_FM_IU','OR_FM_CU'), 
					V= quantile(OR_FM, prob=c(0.5,0.025,0.1,0.9,0.975))), 
			by=c('variable')]						
	tmp		<- dcast.data.table(tmp, variable+GROUP~STAT, value.var='V')
	ds		<- merge(ds, tmp, by=c('variable','GROUP'), all.x=1)
	tmp		<- post[, list(GROUP='yes', PROB_OR_TAIL=max(mean(OR_MF>1), mean(OR_MF<1))), by='variable']
	ds		<- merge(ds, tmp, by=c('variable','GROUP'), all.x=1)
	#ds[, unique(variable)]
	set(ds, NULL, 'variable', ds[, factor(variable, levels=c('MALE_IS_TR',
									'MALE_VL_ABOVE10E3','MALE_SEXP1YR_GR1','MALE_EDU_NONE','MALE_INMIGRATE_LASTYEAR','MALE_GUD','MALE_CONDOM_NEVER','MALE_ALCEVER_LASTYEAR','MALE_CIRCUM',
									# 'MALE_AGE_24','MALE_AGE_29','MALE_AGE_34',
									'FEMALE_VL_ABOVE10E3','FEMALE_SEXP1YR_GR1','FEMALE_EDU_NONE','FEMALE_INMIGRATE_LASTYEAR','FEMALE_GUD','FEMALE_CONDOM_NEVER','FEMALE_ALCEVER_LASTYEAR','FEMALE_PREGNANT',
									# 'FEMALE_AGE_19','FEMALE_AGE_24','FEMALE_AGE_29','FEMALE_AGE_34',	
									'PAIR_COMMTYPE_NOTFISH','PAIR_WITHINCOMM_YES','PAIR_WITHINHH_YES'))])	
	setkey(ds, variable, GROUP)
	
	ds[, PRETTY_FREQ:= paste0(round(FREQ_MALE_TR*100, d=1), '%')]
	ds[, PRETTY_OR_MF:= paste0(round(OR_MF_MED, d=2),' [',round(OR_MF_IL, d=2),'-',round(OR_MF_IU, d=2),']')]
	ds[, PRETTY_OR_FM:= paste0(round(OR_FM_MED, d=2),' [',round(OR_FM_IL, d=2),'-',round(OR_FM_IU, d=2),']')]
	ds[, PRETTY_OR_MF95:= paste0(round(OR_MF_MED, d=2),' [',round(OR_MF_CL, d=2),'-',round(OR_MF_CU, d=2),']')]
	ds[, PRETTY_OR_FM95:= paste0(round(OR_FM_MED, d=2),' [',round(OR_FM_CL, d=2),'-',round(OR_FM_CU, d=2),']')]
	
	set(ds, ds[, which(is.na(OR_MF_MED))], c('PRETTY_OR','PRETTY_OR_MF','PRETTY_OR_FM','PRETTY_OR_MF95','PRETTY_OR_FM95'), '-')
	ans	<- subset(ds, select=c(variable, GROUP, N, PRETTY_FREQ, PRETTY_OR_MF, PRETTY_OR_FM, PRETTY_OR_MF95, PRETTY_OR_FM95))
	
	write.csv(ans, file=paste0(outfile.base,'_stanmodels_mt6r.csv'))
	
	save(df, mt.6r, mt.6cc, file=paste0(outfile.base,'_stanmodels_cc_ipw.rda'))
}
	
RakaiFull.transmitter.180423.mxmy.stan.oddsratio.other<- function()
{
	require(data.table)
	require(scales)
	require(ggplot2)
	require(ggmap)
	require(grid)
	require(gridExtra)
	require(RColorBrewer)
	require(Hmisc)
	require(gtools)	#rdirichlet
	require(rethinking)	# STAN wrapper
	
	infile			<- "~/Dropbox (SPH Imperial College)/Rakai Fish Analysis/full_run/todi_pairs_171122_cl25_d50_prior23_min30_transmitterrecipientdata.rda"
	outfile.base	<- gsub('data.rda','',infile)
	load(infile)
	setnames(df, 'LINK_MF', 'MALE_IS_TR')
	
	#	build model with closest viral load greater 10e3 and 100e3
	df[, MALE_CLOSEST_VL_1YR_G10E3:= as.integer(MALE_CLOSEST_VL_1YR>10e3)]
	df[, FEMALE_CLOSEST_VL_1YR_G10E3:= as.integer(FEMALE_CLOSEST_VL_1YR>10e3)]
	#	add sex partners and education
	set(df, df[, which(FEMALE_EDUCAT=='Unknown')], 'FEMALE_EDUCAT', NA_character_)
	set(df, df[, which(MALE_EDUCAT=='Unknown')], 'MALE_EDUCAT', NA_character_)	
	df[, FE_NOEDU:= as.integer(FEMALE_EDUCAT=='None')]	
	df[, MA_NOEDU:= as.integer(MALE_EDUCAT=='None')]
	set(df, df[, which(FEMALE_SEXP1YR=='Unknown')], 'FEMALE_SEXP1YR', NA_character_)
	set(df, df[, which(MALE_SEXP1YR=='Unknown')], 'MALE_SEXP1YR', NA_character_)	
	df[, FE_SEXP1YR_G1:= as.integer(FEMALE_SEXP1YR!='1')]
	df[, MA_SEXP1YR_G1:= as.integer(MALE_SEXP1YR!='1')]
	#	add fishing, same household, same community, inmigrant
	set(df, NULL, 'FISH', df[, as.integer(PAIR_COMM_TYPE=='fisherfolk')])
	#	add age as random effect 
	#	because I don t think it s a linearly increasing relationship
	df[, MALE_AGE_AT_MID_C:= df[,as.character(cut(MALE_AGE_AT_MID, breaks=c(15,25,30,35,52), right=FALSE, labels=c('15-24','25-29','30-34','35-50')))]]
	df[, MALE_AGE_AT_MID_C2:= as.integer(as.factor(MALE_AGE_AT_MID_C))]
	df[, FEMALE_AGE_AT_MID_C:= df[,as.character(cut(FEMALE_AGE_AT_MID, breaks=c(15,20,25,30,35,52), right=FALSE, labels=c('15-19','20-24','25-29','30-34','35-50')))]]
	df[, FEMALE_AGE_AT_MID_C2:= as.integer(as.factor(FEMALE_AGE_AT_MID_C))]
	df[, MALE_AGE_24:= as.integer(MALE_AGE_AT_MID_C=='15-24')]
	df[, MALE_AGE_29:= as.integer(MALE_AGE_AT_MID_C=='25-29')]
	df[, MALE_AGE_34:= as.integer(MALE_AGE_AT_MID_C=='30-34')]
	df[, FEMALE_AGE_19:= as.integer(FEMALE_AGE_AT_MID_C=='15-19')]
	df[, FEMALE_AGE_24:= as.integer(FEMALE_AGE_AT_MID_C=='20-24')]
	df[, FEMALE_AGE_29:= as.integer(FEMALE_AGE_AT_MID_C=='25-29')]
	df[, FEMALE_AGE_34:= as.integer(FEMALE_AGE_AT_MID_C=='30-34')]	
	
	
	dh		<- subset(df, select=c(PAIRID, MALE_IS_TR, MALE_CLOSEST_VL_1YR_G10E3, FEMALE_CLOSEST_VL_1YR_G10E3))
	
	mt.1 <- map2stan(
			alist(
					MALE_IS_TR ~ dbinom(1, ptr),
					logit(ptr) <- base + male_vl_above10e3 * MALE_CLOSEST_VL_1YR_G10E3 + female_vl_above10e3 * FEMALE_CLOSEST_VL_1YR_G10E3,
					MALE_CLOSEST_VL_1YR_G10E3 <- dnorm(vl_mean, vl_sigma),
					FEMALE_CLOSEST_VL_1YR_G10E3 <- dnorm(vl_mean, vl_sigma),
					base ~ dnorm(0,100),
					vl_mean ~ dnorm(0.5, 1),
					vl_sigma ~ dcauchy(0,1),
					c(male_vl_above10e3, female_vl_above10e3) ~ dnorm(0,10)													
			),
			data=as.data.frame(dh), 
			start=list(base=0, male_vl_above10e3=0, female_vl_above10e3=0, vl_mean=0.5, vl_sigma=1),			
			warmup=5e2, iter=2e3, chains=1, cores=4, verbose=TRUE, debug=TRUE)
	precis(mt.1, prob=0.95)
	#	base                 0.66   0.29       0.10       1.26  1002 1.00
	#	male_vl_above10e3    0.80   0.37       0.08       1.53   859 1.00
	#	female_vl_above10e3 -1.38   0.35      -2.03      -0.67  1176 1.00
	#	looks good, from base Male is significant
			
	dh		<- subset(df, select=c(PAIRID, MALE_IS_TR, MALE_CLOSEST_VL_1YR_G10E3, FEMALE_CLOSEST_VL_1YR_G10E3, FE_SEXP1YR_G1, MA_SEXP1YR_G1, FE_NOEDU, MA_NOEDU))
	mt.2 	<- map2stan(
			alist(
					MALE_IS_TR ~ dbinom(1, ptr),
					logit(ptr) <- base + male_vl_above10e3*MALE_CLOSEST_VL_1YR_G10E3 + female_vl_above10e3*FEMALE_CLOSEST_VL_1YR_G10E3 +
									female_edu_none*FE_NOEDU + male_edu_none*MA_NOEDU + female_sexp1yr_gr1*FE_SEXP1YR_G1 + male_sexp1yr_gr1*MA_SEXP1YR_G1,
					MALE_CLOSEST_VL_1YR_G10E3 <- dnorm(vl_mean, vl_sigma),
					FEMALE_CLOSEST_VL_1YR_G10E3 <- dnorm(vl_mean, vl_sigma),
					FE_SEXP1YR_G1 <- dnorm(sexp_mean, sexp_sigma),
					MA_SEXP1YR_G1 <- dnorm(sexp_mean, sexp_sigma),
					FE_NOEDU <- dnorm(edu_mean, edu_sigma),
					MA_NOEDU <- dnorm(edu_mean, edu_sigma),
					base ~ dnorm(0,100),
					c(vl_mean, edu_mean, sexp_mean) ~ dnorm(0.5, 1),
					c(vl_sigma, edu_sigma, sexp_sigma) ~ dcauchy(0,1),
					c(male_vl_above10e3, female_vl_above10e3, female_edu_none, male_edu_none, female_sexp1yr_gr1, male_sexp1yr_gr1) ~ dnorm(0,10)													
			),
			data=as.data.frame(dh), 
			start=list(	base=0, male_vl_above10e3=0, female_vl_above10e3=0, female_edu_none=0, male_edu_none=0, female_sexp1yr_gr1=0, male_sexp1yr_gr1=0,
						vl_mean=0.5, edu_mean=0.5, sexp_mean=0.5, vl_sigma=1, edu_sigma=1, sexp_sigma=1),			
			warmup=5e2, iter=4e3, chains=1, cores=4)
	plot(mt.2)
	precis(mt.2, prob