supplementAbomb.R
In SEERaBomb: SEER and Atomic Bomb Survivor Data Analysis Tools

rm(list=ls()) 
# The first step is to show that there are lower AIC EAR models than the CML ERR
# and EAR models in: Hsu, W. L., Preston, ... and Mabuchi, K. The Incidence of
# Leukemia, Lymphoma ... among Atomic Bomb Survivors: 1950-2001. Radiat. Res.
# 179, 361-382 (2013). To use this script please place lsshempy.csv in
# /data/abomb or change this line
d<-read.csv("~/data/abomb/lsshempy.csv",header=T)
head(d,2)
d=d[d$mar_an>=0,] # take only those with known doses 
sum(d$cml) # check to see that we have 75 cases of CML
# viewing d as an environment, use within to remove columns not needed here
d=within(d,{rm(distcat,agxcat, agecat, dcat, time,upyr,subjects,year,
               nhl,hl,mye,all,oll,alltot,cll,hcl,clltot,atl,aml,oml,amol,amltot,
               othleuk,noncll,leuktot,hldtot,mar_ag,mar_an)}) 
head(d,2)
# the next line uses within to transform columns, as in the beginning of the
# epicure log file in Hsu et al's supplement 3
d=within(d, {nic = as.numeric(gdist > 12000) # nic= not in city
             over4gy = 1 - un4gy  
             rm(gdist,un4gy)  # get rid of things no longer needed as we go
             tsx = (age -agex) 
             lt25 = log(tsx/25) 
             a = log(age/70) 
             a55 = log(age/55) ;
             age55=age-55
             agex30=agex-30
             ax30 = log(agex/30) ;
             py10k = pyr/10000 ;
             py = pyr;
             s=sex-1
             c=city-1
             sv=mar_ad10/1000;
             hiro = as.numeric(city == 1) 
             naga = as.numeric(city ==2)
             rm(sex,city,pyr,mar_ad10)
})


head(d)
bk=d[d$sv<0.05,]
library(bbmle)
# ERR model of CML in Table S2 (in Supplement 1) is:
# back=0.22*exp(-0.06*s + (1.38 + 1.75*s)*a)
# Induced=(5.24*d)*exp(-1.50*c - 1.59*lt25 - 1.42*a55)
# where
# c = 0 for Hiroshima and 1 for Nagasaki        
# s = 0 for men and 1 for women                 
# a = ln(age/70) 
# a55 = ln(age/55)              
# lt25 = ln(tsx/25) 

# first compare their polynomial in age background model to our preferred exponential in age model
summary(pow1<-mle2(cml~dpois(lambda=py10k*A*exp(cs*s+(ca+csa*s)*a)),
                     start=list(A=.22,cs=-0.06,ca=1.38,csa=1.75),data=bk) ) 
summary(exp1k<-mle2(cml~dpois(lambda=py10k*A*exp(cs*s+k*age55)),
                     start=list(A=.22,cs=-0.6,k=0.04),data=bk) ) 
summary(exp2k<-mle2(cml~dpois(lambda=py10k*A*exp(cs*s+(k+ks*s)*age55)),
                 start=list(A=.22,cs=-0.6,k=0.03,ks=.02),data=bk) ) 
AIC(pow1,exp1k,exp2k)
ICtab(pow1,exp1k,exp2k)
# AIC favors exp(k*age) over  age^k for background; also, sex diff in k is not justified
BIC(pow1,exp1k,exp2k) # BIC seems to penalize too much for adding a parameter

# now add in their ERR induced term
# Induced=(5.24*d)*exp(-1.50*c - 1.59*lt25 - 1.42*a55)  [according to Table S2 in Supplement 1], but 
# with extra terms according to the epicure log file in Supplement 3. 
# Since the Supplement 3 model exactly reproduces Table S9 in Supplement 2, we go with it:
summary(err<-mle2(cml~dpois(lambda=py10k*(A1*exp(c1s*s+c1a*a+c1sa*a*s+c1nic0*hiro*nic+c1nic1*naga*nic)*
                          (1+sv*A2*exp(c2c*c + c2t*lt25 + c2a*a55+c2o4g*over4gy)   ) ) ),
                 start=list(A1=.22,c1s=-0.06,c1a=1.38,c1sa=1.75,c1nic0=-0.2,c1nic1=-0.86,
                            A2=5.24,c2c=-1.50,c2t=-1.59,c2a=-1.42,c2o4g=-0.3),data=d) ) 
# note that many parameters are highly insignificant by the Wald test, so this looks overparameterized!

AIC(err)
logLik(err)
deviance(err)
prd=predict(err)
-2*sum(d$cml*log(prd))
-2*sum(d$cml*log(prd))+2*length(coef(err)) # 11-parameter model => diff of 22
# [1] 754.454 # exactly as in epicure log file of supplement. In computing the 
# deviance here, I'm OK with ditching the third term below since it only depends
# on the data, and since it vanishes anyway since the cells are all 0 or 1 (in this data)
# and the factorial takes these all to 1 which the log then takes to zero, but was wondering
# how they can leave out the second term, i.e. -prd
-2*sum(d$cml*log(prd)-prd-log(factorial(d$cml)))
-2*sum(d$cml*log(prd)-prd)
sum(prd) # my only guess is that the sum of the expected means = sum of observed cases
# so that it too really only depends on the data, at the level of the whole sum. 

# EAR model of CML. From Table S2 in Supplement 2
# back=0.25*exp(-0.28*s + 1.60*a + 1.38*(s*a))
# Induced=
# (0.68*d)*exp(-1.49*c - 1.63*lt25 - 0.20*a55) men
# (0.57*d)*exp(-1.49*c - 1.63*lt25 + 2.10*a55) women
# but the epicure log shows more to the model, as follows
summary(ear<-mle2(cml~dpois(lambda=py10k*(A1*exp(c1s*s+c1a*a+c1sa*a*s+c1nic0*hiro*nic+c1nic1*naga*nic)
                        +sv*A2*exp(c2c*c + c2s*s+c2t*lt25 + c2a*a55+c2sa*s*a55+c2o4g*over4gy)   ) ) ,
                 #                                        method="L-BFGS-B",
                 start=list(A1=.22,c1s=-0.06,c1a=1.38,c1sa=1.75,c1nic0=-0.2,c1nic1=-0.86,
                            A2=5.24,c2c=-1.50,c2s=-0.18,c2t=-1.59,c2a=-1.42,c2sa=2.3,c2o4g=-0.3),data=d) ) 
# still lots of insignificant parameters.
prd=predict(ear)
-2*sum(d$cml*log(prd))
-2*sum(d$cml*log(prd))+2*length(coef(ear)) # 13-parameter model, so add 26 => worse model 
# 753.7032  # also exactly as in the epicure log file supplement => a better fit than the ERR model

AIC(err,ear)
ICtab(err,ear) # err wins by AIC
anova(err,ear) # err wins by likelihood ratio test (i.e. deviance difference test)


################################################################################3
# The following EAR model beats out Hsu's ERR model (in terms of AICs)  
summary(s1<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age+ks*s*age)
                                      +sv*exp(c20+c2c*c + c2s*s+c2t*tsx+c2ts*s*tsx)  ) ),
         start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,c2s=-0.18,c2t=-0.4,c2ts=0.3),data=d) ) 
prd=predict(s1)
-2*sum(d$cml*log(prd))
-2*sum(d$cml*log(prd))+2*length(coef(s1)) # 8 parameters, so add 16
ICtab(err,s1) # s1 wins by AIC
anova(err,s1) # s1 wins by likelihood ratio test (i.e. deviance difference test)


# # using this model there is no evidence for the need for kerma>4gy correction
# summary(s1ov<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age+ks*s*age)
#                                       +sv*exp(c20+c2c*c + c2s*s+c2t*tsx+c2ts*s*tsx+c2o4g*over4gy)  ) ),
#                  start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
#                             c2s=-0.18,c2t=-0.4,c2ts=0.3,c2o4g=-0.3),data=d) ) 
# prd=predict(s1ov)
# -2*sum(d$cml*log(prd))
# -2*sum(d$cml*log(prd))+2*length(coef(s1ov))
# 

# checking if adding lage55 in the induced term helps, yes, it does 
summary(s1a55<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age+ks*s*age)
                     +sv*exp(c20+c2c*c + c2s*s+c2t*tsx+c2ts*s*tsx + c2a*a55+c2sa*s*a55 )  ) ),
                    start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
                               c2s=-0.18,c2t=-0.4,c2ts=0.3,c2a=-1.42,c2sa=2.3),data=d) ) 
prd=predict(s1a55)
-2*sum(d$cml*log(prd))                       
-2*sum(d$cml*log(prd))+2*length(coef(s1a55)) # lage55 terms are justified! new best model
ICtab(err,s1a55) # new model wins by AIC diff of 3.4
anova(err,s1a55) # new model has less deviance and fewer parameters!

# keeping only the female component of lage55  
summary(s1a55f<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+(k+ks*s)*age)
                         +sv*exp(c20+c2c*c + c2s*s+c2t*tsx+c2ts*s*tsx +c2sa*s*a55 )  ) ),
                    start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
                               c2s=-0.18,c2t=-0.4,c2ts=0.3,c2sa=2.3),data=d) ) 

prd=predict(s1a55f)
-2*sum(d$cml*log(prd))                       
-2*sum(d$cml*log(prd))+2*length(coef(s1a55f)) # new best model 
ICtab(err,s1a55f) # now new model wins by AIC diff of 5.3
anova(err,s1a55f) # deviance loss was negligible, consistent with AIC drop of almost 2

# # do we still need a sex diff in exponential decays?  
# summary(m1a55f<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age+ks*s*age)
#                                           +sv*exp(c20+c2c*c + c2s*s+c2t*tsx +c2sa*s*a55 )  ) ),
#                      start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
#                                 c2s=-0.18,c2t=-0.4,c2sa=2.3),data=d) ) 
# prd=predict(m1a55f)
# -2*sum(d$cml*log(prd))                       
# -2*sum(d$cml*log(prd))+2*length(coef(m1a55f)) #  yes, keep the difference in exponential decays

# # try switching to age at exposure in induced term doesn't help
# summary(s1a55x<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+(k+ks*s)*age)
#                                           +sv*exp(c20+c2c*c + c2s*s+c2t*tsx+c2ts*s*tsx +c2sa*s*agex )  ) ),
#                      start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
#                                 c2s=-0.18,c2t=-0.4,c2ts=0.3,c2sa=2.3),data=d) ) 
# prd=predict(s1a55x)
# -2*sum(d$cml*log(prd))                       
# -2*sum(d$cml*log(prd))+2*length(coef(s1a55x)) 
# ICtab(s1a55f,s1a55x) # fit is worse

# make exponential more realistic by bringing tsx out in front of induced term   
summary(s1tsx<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+(k+ks*s)*age)
                                          +sv*tsx*exp(c20+c2c*c + c2s*s+c2t*tsx+c2ts*s*tsx +c2sa*s*a55 )  ) ),
                     start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
                                c2s=-0.18,c2t=-0.4,c2ts=0.3,c2sa=2.3),data=d) ) 
# note that the parameter P values are now almost all signficant
prd=predict(s1tsx)
-2*sum(d$cml*log(prd))                       
-2*sum(d$cml*log(prd))+2*length(coef(s1tsx)) # new best model 
ICtab(s1tsx,s1a55f) # tsx in front wins by AIC diff of 0.4


# what about the age at exposure hump for HM only  
a55HMb<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age+ks*s*age)    +
      sv*tsx*exp(c20+c2c*c + c2s*s+c2t*tsx+c2ts*s*tsx +c2sa*s*a55+ c2x*(1-c)*(1-s)*abs(agex-30))  ) ),
                     start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
                                c2s=-0.18,c2t=-0.4,c2ts=0.3,c2x=.1,c2sa=2.3),data=d)  
prd=predict(a55HMb)
-2*sum(d$cml*log(prd))                       
-2*sum(d$cml*log(prd))+2*length(coef(a55HMb)) # 

summary(a55HMb)     #P=0.23 on c2x param is consistent with 0.21 below for model diffs
AIC(a55HMb,s1tsx)
ICtab(a55HMb,s1tsx) #AIC difference of 0.4 => deviance better by 1.6 (i.e. maybe something to it)
anova(a55HMb,s1tsx) # P=0.21 => HM hump at agex=30 is not statistically significant

#rewrite to estimate sex difference in EAR time constants 
summary(best<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age+ks*s*age)
                                        +sv*tsx*exp(c20+c2c*c + c2s*s-tsx/(c2t+c2ts*s) +c2sa*s*a55 )  ) ),
                     start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
                                c2s=-0.18,c2t=5,c2ts=10,c2sa=2.3),data=d) ) 
prd=predict(best)
-2*sum(d$cml*log(prd))                       
-2*sum(d$cml*log(prd))+2*length(coef(best))  
# see that it still is the best
AIC(s1tsx,best) # i.e. these should be identical, and they are (this is just a reparameterization)
ICtab(err,best) 
anova(err,best) 

# show that can't remove ks by AIC, but perhaps by LRT and BIC
summary(best) # see that ks was not significant by Wald Test
summary(bestT<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age)
                                        +sv*tsx*exp(c20+c2c*c + c2s*s-tsx/(c2t+c2ts*s) +c2sa*s*a55 )  ) ),
                   start=list(c10=-12.4,k=0.025,c20=-10.5,c2c=-1.50,
                              c2s=-0.18,c2t=5,c2ts=10,c2sa=2.3),data=d) ) 
prd=predict(bestT)
-2*sum(d$cml*log(prd))                       
anova(best,bestT) 
BICtab(best,bestT)  # LRT and Bayes both say take out ks 
ICtab(best,bestT)  # but AIC says leave it in. 

# Since I would prefer to see agex over age in my induced term, I tried things
# like this that failed
# d$oldF=d$s*as.numeric(d$agex>40)
# head(d)
# summary(bestT<-mle2(cml~dpois(lambda=py*(exp(c10+c2c*c+c2s*s+k*age+ks*s*age)
#                                         +sv*tsx*exp(c20+c2c*c + c2s*s-tsx/(c2t+c2ts*s) +c2sa*oldF )  ) ),
#                    start=list(c10=-12.4,k=0.025,ks=0,c20=-10.5,c2c=-1.50,
#                               c2s=-0.18,c2t=5,c2ts=log(10),c2sa=2.3),data=d) ) 
# prd=predict(bestT)
# -2*sum(d$cml*log(prd))                       
# ICtab(bestT,best) # all of my attempts to get age out of the induced term failed
# anova(bestT,best) 

#OK, so we stop at this point and now make tables and figures for the paper
(sb=summary(best))
(outs=round(coef(sb),4))
rownames(outs)[c(1,4:8)]<-c("c1hm","c2hm","cn","cs","tau","taus")
outs  # parameter names mapped to those in paper before sending to the table
library(hwriter)
# setwd("/users/radivot/downloads/sachs")
# hwrite(outs,"table2.html")

sd=coef(sb)["c2ts",2]
mn=coef(best)["c2ts"]
(tauDiff=round(2*c(mn,mn-1.96*sd,mn+1.96*sd),2)) # mean is twice tau for gamma with t^1 in front
sprintf("%s (%s, %s)",tauDiff[1],tauDiff[2],tauDiff[3])
(Taum=coef(best)["c2t"])  #so twice these are the mean male and female waiting times
(Tauf=Taum + mn)

# now get amplitude parameter ratio (M/F) estimates and CI 
sdA=coef(sb)["c2s",2]
mnA=coef(best)["c2s"]
(MovF=round((Taum/Tauf)^2*exp(-c(mnA,mnA+1.96*sdA,mnA-1.96*sdA)),2))
sprintf("%s (%s, %s)",MovF[1],MovF[2],MovF[3])
# note that the (Taum/Tauf)^2 is needed because the normalized density
# function is (1/Tau^2)*t*exp(-t/Tau)

###############################################################################

# The next goal is to make something like the dots in Fig 4B of Hsu et al, but for tsx decays
# To do this we replace the tsx exponential decays with a "nonparameteric" model (not distribution),
# i.e. this is non-parameteric like a one-way anova is of a time course. 
d=transform(d,tsxf=cut(tsx,6),sex=as.factor(s))
head(d)
summary(s3<-mle2(cml~dpois(lambda=py*(exp(c10 + c2c*c + c2s*s + k*age + ks*s*age)
                                +sv*exp(c2c*c  + f + c2sa*s*a55 )  ) ),
                 parameters=list(f~-1 + tsxf:sex),
                 start=list(c10=-12.4,k=0.025,ks=0,c2c=-1.50,
                              c2s=-0.18,f=-5,c2sa=2),data=d) ) 
prd=predict(s3)
-2*sum(d$cml*log(prd))  # fit is much improved, with deviance now at 746.07 (lowest of all)
-2*sum(d$cml*log(prd))+2*length(coef(s3))# but the extra parameters are not justified: this is OK, 
# the point here is to plot the points, not say that this is a better model. 
AIC(s3,best)
ICtab(s3,best) 
anova(s3,best) 

ss3=summary(s3)
wait=exp(coef(ss3)[6:17,1])*1e4
waitL=exp(coef(ss3)[6:17,1]-1.96*coef(ss3)[6:17,2])*1e4
waitU=exp(coef(ss3)[6:17,1]+1.96*coef(ss3)[6:17,2])*1e4
(lvls=levels(d$tsxf))
(Lvls=strsplit(lvls,","))
substring("(4.23",2)
(lows=sapply(Lvls,function(x) as.numeric(substring(x[1],2))))
(ups=sapply(Lvls,function(x) as.numeric(substring(x[2],1,4))))
(mids=round(apply(rbind(lows,ups),2,mean),2))
(dfc=data.frame(mids,wait,waitL,waitU,Sex=gl(2,6,labels=c("Male","Female") ) ) )

graphics.off()
if(length(grep("linux",R.Version()$os))) windows <- function( ... ) X11( ... )
if(length(grep("darwin",R.Version()$os))) windows <- function( ... ) quartz( ... )
windows(width=7,height=6)

library(ggplot2)
pd <- position_dodge(1) 
(p=ggplot(dfc, aes(x=mids, y=wait, shape=Sex, col=Sex, ymax=10)) + 
   geom_point(size=6,position=pd) ) # + ylim(0, 10) ) # scale_y_log10(limits=c(1e-5,10)) 
(p=p+labs(title="IR-to-CML Latency",x="Years since exposure",
          y=expression(paste("Cases per ",10^4," Person-Year-Sv") ) ) )    
(p=p+geom_errorbar(aes(ymin=waitL, ymax=waitU),width=.01,position=pd))
(p=p+theme(plot.title = element_text(size = rel(2.3)),
           axis.title = element_text(size = rel(2.3)),
           axis.text = element_text(size = rel(2.3)))  )
(p=p+theme(legend.position = c(0.8, .5), 
           legend.title = element_text(size = rel(2)) ,
           legend.text = element_text(size = rel(2))  ) )  

waitm=exp(coef(ss3)[6:11,1])
waitf=exp(coef(ss3)[12:17,1])
(MovF<-format(sum(waitm)/sum(waitf),digits=3)) # ratio of sums (like AUCs)

# now we still need the M/F CI, 
mns=coef(ss3)[6:17,1]
sds=coef(ss3)[6:17,2]
X=exp(matrix(rnorm(12*100000,mns,sds),byrow=T, ncol=12))
M=X[,1:6]; F=X[,7:12]
Ms=apply(M,1,sum)
Fs=apply(F,1,sum)
MovF=Ms/Fs
MF=round(quantile(MovF,c(0.5,0.025,0.975)),2)
(MF=sprintf("%s (%s, %s)",MF[1],MF[2],MF[3]))

pm=waitm/sum(waitm)
pf=waitf/sum(waitf)
(taum<-round(mids%*%pm,2))
(tauf<-round(mids%*%pf,2))

(p=p+annotate("text",x=15,y=10, hjust=0, label = paste("M/F =",MF),size=9) )
(lb1=paste0("tau[m] == ",taum,"*~Yrs"))
(p=p+annotate("text",x=15,y=9, hjust=0, label = lb1,size=9,parse=T) )
(lb1=paste0("tau[f] == ",tauf,"*~Yrs"))
(p=p+annotate("text",x=15,y=8, hjust=0, label = lb1,size=9,parse=T) )
# This next block calculates the 95% CI of the difference in waiting times
rdiscrete <- function(n, probs,values) {
  cumprobs <- cumsum(probs)
  singlenumber <- function() {
    x <- runif(1)
    N <- sum(x > cumprobs)
    N
  }
  values[replicate(n, singlenumber())+1]
}
x=rdiscrete(1e5,pf,mids)
summary(as.factor(x))
y=rdiscrete(1e5,pm,mids)
summary(as.factor(y))
(delT=quantile(x-y,c(0.025,0.975)))
(lb1=paste0("Delta*tau == ",tauf-taum,"(",delT[1],", ",delT[2],")"))
#(lb1=paste0("Delta*tau == ",tauf-taum,"~group(\"(\",",delT[1],",",delT[2],",\")\")")) # attempt with group() failed
# quartz(height=7,width=8)  # set back to square
(p=p+annotate("text",x=15,y=7, hjust=0, label = lb1,size=7,parse=T) ) #just show since can't add space before ( 
sprintf("%s (%s, %s)",tauf-taum,delT[1],delT[2])
# ggsave(p,file="~/IR2CML.eps")


### Figure 7: interpretations ##############
# graphics.off()
# quartz(height=6,width=6) 

# setEPS()
# postscript('~/concept.eps', height = 6, width = 6)

k=0.025
Tp=22.1
Rp=1.73
Tf=-15:23
fR=function(Tf) exp(-k*(Tf-Tp))
R=fR(Tf)


par(mar=c(4.5,4.5,0,.5))
plot(Tf,R,type="l",lwd=2.5,cex.lab=1.6,cex.axis=1.5,
     ylab="(male risk)/(female risk) M/F",
     xlab="extra female latency time T in years",axes=F,font=2)
mtext(side=3,line=-4,"Continuum of SEER CML\n Sex Difference Interpretations",cex=1.5,font=1)
# axis(1);axis(2)
points(x=c(0,Tp),y=c(Rp,1),pch=1,cex=3,lwd=3)
abline(h=1,v=0,lty=3)
fT=function(R) Tp-log(R)/k
# Tx=3.86
# Rx=1.26
# # points(x=c(fT(Rx),Tx),y=c(Rx,fR(Tx)),pch="+",cex=3)
# # plot(1:20,pch=1:20)
# x=seq(Tx,fT(Rx),0.1)
# x=seq(delT[1],delT[2],0.1)
# y=fR(x)
# points(x,y,type="l",lwd=6)

x=seq(tauDiff[2],tauDiff[3],0.1)
y=fR(x)
points(x,y,type="l",lwd=7)



rect(-1,1.13*Rp,23,1.26*Rp,lwd=2)
text(12,1.2*Rp,"interpretations by\n a single cause",cex=1.4,font=1,bty="o")
arrows(x0=0,y0=1.13*Rp,x1=0,y1=Rp+0.05,lwd=2,angle=20)
arrows(x0=Tp,y0=1.13*Rp,x1=Tp,y1=1.05,lwd=2,angle=20)
text(4,1.04*Rp,"higher\nmale\nrisk",cex=1.4,font=1)
text(10,0.96*Rp,"or",cex=1.4,font=2)
text(18,0.9*Rp,"shorter\nmale\nlatency",cex=1.4,font=1)
text(-9,1.38,
     "interpretations\nconsistent with\ntime-since-\nexposure data\n(heavy line)",
     cex=1.4,font=1)
arrows(x0=-2,y0=1.4,fT(1.4),y1=1.4,lwd=2,angle=20)
# dev.off() 


###############################################33


# Age at exposure: Figure 8
graphics.off()
if(length(grep("linux",R.Version()$os))) windows <- function( ... ) X11( ... )
if(length(grep("darwin",R.Version()$os))) windows <- function( ... ) quartz( ... )
windows(width=8,height=7)


head(d)
d$Dose<-cut(d$sv,c(-1,.02,1,100),labels=c("Low","Moderate","High"))
d$Dose=factor(d$Dose,levels=c("High","Moderate","Low"))


d$agexc<-cut(d$agex,c(0,20,40,180),labels=c("10","30","50"))
d$Sex<-factor(d$s,labels=c("Male","Female"))
head(d)
library(plyr) 
(d2<-ddply(subset(d,c==0), .(Dose,Sex,agexc), summarise,
           PY=sum(py),cases=sum(cml),agex=weighted.mean(agex,py) )) #
(d2=within(d2,{incid=1e5*cases/PY}))

library(ggplot2)
(p <- ggplot(d2,aes(x=agex,y=incid,shape=Dose,col=Dose,group=Dose))+geom_point(size=5) +geom_line()
 + labs(title="Hiroshima A-bomb Survivors",x="Age-at-exposure (PY-weighted)",
        y=expression(paste("CML Cases per ",10^5," Person-Years")))    
 + scale_y_log10(limits=c(.1,130)) +xlim(8,52) )
(p=p + facet_grid(. ~ Sex))
(p=p+theme(
#   legend.position = "none", 
   legend.position = c(0.67, .85), 
           legend.title = element_text(size = rel(2)) ,
           legend.text = element_text(size = rel(1.3))  ) )  

(p=p+theme(plot.title = element_text(size = rel(2)),
           strip.text = element_text(size = rel(2)),
           axis.title.y = element_text(size = rel(1.7)),
           axis.title.x = element_text(size = rel(1.7)),
           axis.text = element_text(size = rel(2)))  )

# ggsave(p,file="~/agex.eps")
# ggsave(p,file="/users/radivot/case/grants/sachs/HSCepi/figs/agex.png")

########### make a similar plot for age, not agex. 

#This plot shows just how crazy the Hiroshima Females really are, i.e. it shows
#that we should try to stay away from them.

# graphics.off()
if(length(grep("linux",R.Version()$os))) windows <- function( ... ) X11( ... )
if(length(grep("darwin",R.Version()$os))) windows <- function( ... ) quartz( ... )
windows(width=8,height=7)

head(d)
d$agec<-cut(d$age,c(0,30,60,180),labels=c("15","45","75"))
d$Dose<-cut(d$sv,c(-1,.02,1,100),labels=c("Low","Moderate","High"))
d$Sex<-factor(d$s,labels=c("Male","Female"))
head(d)
library(plyr) 
(d3<-ddply(subset(d,c==0), .(Dose,Sex,agec), summarise,
           PY=sum(py),cases=sum(cml),age=weighted.mean(age,py) )) #
(d3=within(d3,{incid=1e5*cases/PY}))

library(ggplot2)
(p <- ggplot(d3,aes(x=age,y=incid,shape=Dose,group=Dose))+geom_point(size=5) +geom_line()
 + labs(title="Hiroshima A-bomb Survivors",x="Attained-age (PY-weighted)",
        y=expression(paste("CML Cases per ",10^5," Person-Years")))    
 + scale_y_log10(limits=c(.1,130)) +xlim(8,90) )
(p=p + facet_grid(. ~ Sex))
(p=p+theme(legend.position = c(0.67, .85), 
           legend.title = element_text(size = rel(2)) ,
           legend.text = element_text(size = rel(1.7))  ) )  

(p=p+theme(plot.title = element_text(size = rel(2.5)),
           strip.text = element_text(size = rel(2)),
           axis.title.y = element_text(size = rel(2.5)),
           axis.title.x = element_text(size = rel(2.3)),
           axis.text = element_text(size = rel(2.3)))  )



# Skip below 

# ###############################################33
# # for b test bin up times since exposure in groups of two to make fewer parameters
# dHM=subset(d,Sex=="Male"&c==0)
# head(dHM)
# summary(hm0<-mle2(cml~dpois(lambda=py*(exp(c10 + k*age)+sv*exp(f)) ),
#                   parameters=list(f~-1 + tsxf),
#                   start=list(c10=-12.4,k=0.025,f=-10),data=dHM ) )
# 
# summary(hm1<-mle2(cml~dpois(lambda=py*(exp(c10 + k*age)+
#                                          exp(-b*abs(agex-30)/28.85)*sv*exp(f)) ),
#                   parameters=list(f~-1 + tsxf),
#                   start=list(c10=-12.4,k=0.025,f=-10,b=0.5),data=dHM ) )
# anova(hm0,hm1) # P=0.12  HM only => no longer can claim significance
# 


# #################### try full model
# summary(s3<-mle2(cml~dpois(lambda=py*(exp(c10 + c2c*c + c2s*s + k*age + ks*s*age)
#                                       +sv*exp(c2c*c  + f + c2sa*s*a55 )  ) ),
#                  parameters=list(f~-1 + tsxf:sex),
#                  start=list(c10=-12.4,k=0.025,ks=0,c2c=-1.50,
#                             c2s=-0.18,f=-5,c2sa=2),data=d) ) 
# 
# summary(s3x<-mle2(cml~dpois(lambda=py*(exp(c10 + c2c*c + c2s*s + k*age + ks*s*age)
#                     +sv*exp(c2c*c  + f + c2sa*s*a55 + c2x*(1-c)*(1-s)*abs(agex-30) )  ) ),
#                  parameters=list(f~-1 + tsxf:sex),
#                  start=list(c10=-12.4,k=0.025,ks=0,c2c=-1.50,
#                             c2s=-0.18,f=-5,c2sa=2,c2x=.05),data=d) ) 
# summary(a55HMb)
# AIC(s3,s3x)
# ICtab(s3,s3x)
# anova(s3,s3x)
Any scripts or data that you put into this service are public.
SEERaBomb documentation built on Dec. 16, 2019, 1:21 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
SEERaBomb
SEER and Atomic Bomb Survivor Data Analysis Tools

inst/docs/papers/attic/REB2014/supplementAbomb.R
In SEERaBomb: SEER and Atomic Bomb Survivor Data Analysis Tools

Try the SEERaBomb package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SEERaBomb SEER and Atomic Bomb Survivor Data Analysis Tools

inst/docs/papers/attic/REB2014/supplementAbomb.R In SEERaBomb: SEER and Atomic Bomb Survivor Data Analysis Tools

Try the SEERaBomb package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SEERaBomb
SEER and Atomic Bomb Survivor Data Analysis Tools

inst/docs/papers/attic/REB2014/supplementAbomb.R
In SEERaBomb: SEER and Atomic Bomb Survivor Data Analysis Tools