R/tissue_masses_flows.R
In httk: High-Throughput Toxicokinetics

Documented in tissue_masses_flows

#'Given a data.table describing a virtual population by the NHANES quantities, 
#'generates HTTK physiological parameters for each individual.
#'
#'@param tmf_dt A data.table generated by
#'  \code{gen_age_height_weight()}, containing variables \code{gender},
#'  \code{reth}, \code{age_months}, \code{age_years}, \code{weight}, and
#'  \code{height}.
#'  
#'@param add_variability An option to add variability to calculated masses and
#'  flows. Default is TRUE; use FALSE for repeatable calculations.
#'  
#'@return The same data.table, with aditional variables describing tissue masses
#'  and flows.
#'  
#'@keywords httk-pop
#'
#'@author Caroline Ring
#'
#'@references 
#' Barter, Zoe E., et al. "Scaling factors for the extrapolation of in vivo 
#' metabolic drug clearance from in vitro data: reaching a consensus on values 
#' of human micro-somal protein and hepatocellularity per gram of liver." Current 
#' Drug Metabolism 8.1 (2007): 33-45.
#'
#' Birnbaum, L., et al. "Physiological parameter values for PBPK models." 
#' International Life Sciences Institute, Risk Science Institute, Washington, 
#' DC (1994).
#'
#' Geigy Pharmaceuticals, "Scientific Tables", 7th Edition, 
#' John Wiley and Sons (1970)
#'
#' McNally, Kevin, et al. "PopGen: a virtual human population generator." 
#' Toxicology 315 (2014): 70-85.
#'
#' \insertRef{ring2017identifying}{httk}
#'
#'@import stats

tissue_masses_flows <- function(tmf_dt,
                                add_variability = TRUE) {
  
  #R CMD CHECK throws notes about "no visible binding for global variable", for
  #each time a data.table column name is used without quotes. To appease R CMD
  #CHECK, a variable has to be created for each of these column names and set to
  #NULL. Note that within the data.table, these variables will not be NULL! Yes,
  #this is pointless and annoying.
  id <- mass_mean <- height_ref <- height <- mass_ref <- tissue <- NULL
  gender <- age_years <- age_months <- weight <- bonemass_mean <- NULL
  BSA <- mass_dist <- mass <- mass_cv <- flow_mean <- flow_ref <- NULL
  flow_frac <- flow <- flow_cv <- CO <- Adipose <- Bone <- NULL
  org_mass_sum <- Blood <- Other_mass <- Adipose_mass <- NULL
  org_flow_check <- CO_flow <- weight_adj <- BSA_adj <- NULL
  million.cells.per.gliver <- NULL
  #End R CMD CHECK appeasement.
  tmf_dt <- copy(tmf_dt) #to avoid altering the original object outside the function
  
  #Set an id variable for later ordering
  tmf_dt[, id:=1:nrow(tmf_dt)]
  
  #Add reference values to data table
  tmp_dt <- merge(tmf_dt,
                  mcnally_dt,
                  by='gender',
                  allow.cartesian=TRUE)
  
  #Allometric scaling of tissue masses for most things
  #(the non-allometric ones will be replaced)
  tmp_dt[, 
         mass_mean:=tissue_scale(height_ref=height_ref,
                                 height_indiv=height,
                                 tissue_mean_ref=mass_ref)]
  #Non-allometrically scaled tissue masses:
  #Brain mass
  tmp_dt[tissue=='Brain', 
         mass_mean:=brain_mass(gender=gender, 
                               age_years=age_years)]
  
  #Bone mass
  tmp_dt[tissue=='Bone', 
         mass_mean:=bone_mass_age(age_years=age_years,
                                  age_months=age_months,
                                  height=height,
                                  weight=weight,
                                  gender=gender)]
  #Convert bone mass to skeleton mass 
  bone_mass_mean <- tmp_dt[tissue=='Bone', list(id, mass_mean)]
  setnames(bone_mass_mean, 'mass_mean', 'bonemass_mean')
  tmp_dt <- merge(tmp_dt,
                  bone_mass_mean,
                  by=('id'))
  tmp_dt[tissue=='Skeleton', 
         mass_mean:=bonemass_mean/0.5]
  tmp_dt[, bonemass_mean:=NULL]
  rm(bone_mass_mean)
  
  #Muscle mass
  tmp_dt[tissue=='Muscle', 
         mass_mean:=skeletal_muscle_mass(smm=mass_mean,
                                         age_years=age_years,
                                         height=height,
                                         gender=gender)]
  
  #For individuals under age 18:
  #Liver mass
  tmp_dt[tissue=='Liver' &
           age_years<=18, 
         mass_mean:=liver_mass_children(height=height,
                                        weight=weight,
                                        gender=gender)]
  #Kidney mass
  tmp_dt[tissue=='Kidney' &
           age_years<=18,
         mass_mean:=kidney_mass_children(weight=weight,
                                         height=height,
                                         gender=gender)]
  #Pancreas mass
  tmp_dt[tissue=='Pancreas' &
           age_years<=18,
         mass_mean:=pancreas_mass_children(height=height,
                                           weight=weight,
                                           gender=gender)]
  
  #Spleen mass
  tmp_dt[tissue=='Spleen' &
           age_years<=18,
         mass_mean:=spleen_mass_children(height=height,
                                         weight=weight,
                                         gender=gender)]
  
  #Lung mass
  tmp_dt[tissue=='Lung' &
           age_years<=18,
         mass_mean:=lung_mass_children(height=height,
                                       weight=weight,
                                       gender=gender)]
  
  #For all individuals: skin and blood mass calculated from body surface area
  #First compute body surface area
  tmp_dt[, BSA:=body_surface_area(BW=weight, 
                                  H=height,
                                  age_years=age_years)]
  
  #Then compute skin mass
  tmp_dt[tissue=='Skin',
         mass_mean:=skin_mass_bosgra(BSA=BSA)]
  
  #And blood mass
  tmp_dt[tissue=='Blood',
         mass_mean:=blood_weight(BSA=BSA/(100^2),
                                 gender=gender)]
  #If blood mass based on BSA and gender is negative or very small,
  #then just default to the mean blood mass by age
  #(Geigy Scientific Tables, 7th ed.)
  tmp_dt[tissue=='Blood' &
           mass_mean<0.2,
         mass_mean:=blood_mass_correct(blood_mass=mass_mean, 
                                       age_months=age_months,
                                       age_years=age_years,
                                       gender=gender,
                                       weight=weight)]
  #Add variability to tissue masses
  #For those tissues with normal distribution of residual variability,
  #draw individual tissue mass from a normal distribution
  if (add_variability) {
    tmp_dt[mass_dist=='Normal',
           mass:=truncnorm::rtruncnorm(n=length(mass_dist),
                                       a=0, #truncated at zero below (because mass can't be negative)
                                       mean=mass_mean, #with mean = mass predicted above,
                                       sd=mass_cv*mass_mean)]  #cv given by the mass_cv from McNally et al, (2014)
  } else {
    tmp_dt[mass_dist=='Normal',
           mass:=mass_mean]
  }
  
  #For those tissues with log-normal distribution of residual variability:
  #draw individual tissue mass from a log-normal distribution,
  #with mean = log(mass predicted above),
  #sd derived from mass_cv.
  if (add_variability) {
    tmp_dt[mass_dist=='Log-normal',
           mass:=exp(rnorm(n=length(mass_dist),
                           mean=log(mass_mean),
                           sd=sqrt(log(mass_cv^2+1))))]
  } else {
    tmp_dt[mass_dist=='Log-normal',
           mass:=mass_mean]
  }
  
  
  #Flows: allometrically scaled
  tmp_dt[tissue=='CO',
         flow_mean:=tissue_scale(height_ref=height_ref,
                                 height_indiv=height,
                                 tissue_mean_ref=1.05*flow_ref)*#scale back to age 25; reference value is for age 35
           (1-pmax(0,0.005*(age_years-25)))] #age scaling of CO
  #Split off CO into a separate column
  CO_flow_mean <- tmp_dt[tissue=='CO', list(id, flow_mean)]
  setnames(CO_flow_mean, 'flow_mean', 'CO_flow_mean')
  tmp_dt <- merge(tmp_dt,
                  CO_flow_mean,
                  by='id',
                  allow.cartesian=TRUE)
  tmp_dt[tissue!='CO', 
         flow_mean:=flow_frac*CO_flow_mean]
  #Add variability to flows (normal distribution)
  #note: exclude CO and lung flow
  if (add_variability) {
    tmp_dt[tissue!='CO' & 
             tissue!='Lung' &
             !is.na(flow_mean), 
           flow:=truncnorm::rtruncnorm(n=length(flow_mean),
                                       a=0,
                                       mean=flow_mean,
                                       sd=flow_cv*flow_mean)]
  } else {
    tmp_dt[tissue!='CO' & 
             tissue!='Lung' &
             !is.na(flow_mean), 
           flow:=flow_mean]
  }
  
  
  #Lung flow is a fixed fraction of CO
  tmp_dt[tissue=='Lung', 
         flow:=flow_frac*
           CO_flow_mean]
  
  #No variability in CO itself
  tmp_dt[tissue=='CO', 
         flow:=CO_flow_mean]
  
  #Now cast tmp_dt so each tissue mass and flow has its own column
  #First do masses
  mass_cast <- data.table::dcast.data.table(tmp_dt,
                                            id~tissue,
                                            value.var='mass')
  mass_cast[, CO:=NULL] #cardiac output doesn't have a mass
  mass_cast[, Adipose:=NULL] #adipose mass is assigned later
  mass_cast[, Bone:=NULL] #already counted in Skeleton
  setnames(mass_cast,
           names(mass_cast)[names(mass_cast)!='id'],
           paste(names(mass_cast)[names(mass_cast)!='id'],
                 'mass',
                 sep='_'))
  #Get sum of enumerated tissue masses
  mass_cast[, org_mass_sum:=Reduce('+', .SD), 
            .SDcols=grep(x=names(mass_cast),
                         pattern='mass',
                         value=TRUE)]
  
  #Then cast flows
  flow_cast <- data.table::dcast.data.table(tmp_dt,
                                            id~tissue,
                                            value.var='flow')
  flow_cast[, Blood:=NULL] #no flow to blood
  flow_cast[, Bone:=NULL] #already counted in Skeleton flow
  setnames(flow_cast,
           names(flow_cast)[names(flow_cast)!='id'],
           paste(names(flow_cast)[names(flow_cast)!='id'],
                 'flow',
                 sep='_'))
  
  #Add the tissue mass columns to tmf_dt
  tmf_dt <- merge(tmf_dt,
                  mass_cast,
                  by='id')
  
  #Add the tissue flow columns to tmf_dt
  tmf_dt<- merge(tmf_dt, 
                 flow_cast,
                 by='id')
  
  #To compute adipose weight: first calculate sum of non-adipose tissues.
  #Include GI tract contents (1.4% of body weight) 
  #and rest of body (3.3% of body weight)
  #(Birnbaum et al. 1994)
  tmf_dt[, Other_mass:=(0.033+0.014)*weight]
  tmf_dt[, org_mass_sum:=org_mass_sum+Other_mass]
  
  #Then the rest of weight not accounted for by
  #the sum of non-adipose tissues must be adipose.
  #Include log-normal residual variability.
  if (add_variability) {
    tmf_dt[(weight-org_mass_sum)>1, 
           Adipose_mass:=exp(rnorm(n=length(weight),
                                   mean=log(weight-org_mass_sum),
                                   sd=sqrt(log(0.42^2+1)) #CV from McNally et al. (2014) table 5
           ))]
  } else {
    tmf_dt[(weight-org_mass_sum)>1, 
           Adipose_mass:=(weight-org_mass_sum)]
  }
  
  
  #If non-adipose tissues accounted for all of body mass,
  #or more than body mass,
  #then set adipose mass to 0 -- this will be checked and rejected later
  tmf_dt[(weight-org_mass_sum)<=1, 
         Adipose_mass:=0]
  
  #Check to see what percentage of CO the flows add up to
  #If they add up to more than 100% of CO, that individual will be
  #rejected later.
  tmf_dt[, org_flow_check:=Reduce('+', .SD),
         .SDcols=names(flow_cast)[!(names(flow_cast) %in% 
                                      c('CO_flow',
                                        'id'))]]
  tmf_dt[, org_flow_check:=org_flow_check/CO_flow]
  
  #Adjust body weight to be sum of all tissues including adipose
  tmf_dt[, weight_adj:=org_mass_sum + Adipose_mass]
  
  #Adjust BSA using adjusted body weight
  tmf_dt[, BSA_adj:=body_surface_area(BW=weight_adj, 
                                      H=height,
                                      age_years=age_years)] #in cm^2
  #And do hepatocellularity
  #   million.cells.per.gliver  Millions cells per gram of liver tissue.
  #From Barter et al. 2007
  #geometric mean = exp(mean (log million.cells.per.gliver)) = 99
  #95% CI for observations is 23-444
  #log(444)-log(99) = 2sigma (approximately)
  mu <- log(10^(-0.66*log10(tmf_dt[,age_years])+3.10)) #From Figure 5 of Barter et al. (2007))
  mu[tmf_dt[,age_years<20]] <- log(10^(-0.66*log10(19)+3.10)) #For people below age 20, just set it at the age-19 prediction
  #(because data in Barter et al. (2007) only goes back to age 20)
  sigma.total <- ((log(444) -log(99))/2 + 
                    (log(99)-log(23))/2)/2 #Estimate variance from overall population (from Barter et al. 2007)
  #This gives TOTAL variance of ln data
  #Get R2 from linear fit to log10 data
  Fval <- qf(0.012/2, 
             df1=1, 
             df2=26, 
             lower.tail=FALSE) #=var unexplained/var explained
  R2 <- Fval/(1+Fval) #=var explained / var total
  sigma <- sqrt((1-R2)*sigma.total^2) #remaining variance of ln data
  if (add_variability) {
    tmf_dt[, million.cells.per.gliver:=exp(rnorm(n=nrow(tmf_dt),
                                                 mean=mu,
                                                 sd=sigma))]
  } else {
    tmf_dt[, million.cells.per.gliver:=mu]
  }
  
  
  #Harmonize names
  setnames(tmf_dt,
           c('Kidney_mass',
             'Kidney_flow',
             'CO_flow'),
           c('Kidneys_mass',
             'Kidneys_flow',
             'CO')
  )
  
  #Delete id column
  tmf_dt[, id:=NULL]
  
  #And we're done!
  return(tmf_dt)
}