R/polygenic.lib.R

Defines functions solar_polygenic hasHousehold extractCovariateFrame extractVarCompFrame extractLoglik get_proc_write_globals

#----------------------------------
# Polygenic functions
#----------------------------------

solar_polygenic <- function(dir, out)
{
  stopifnot(file.exists(dir))
  
  ### make `cmd`
  
  # fix `covlist`: term `1`
  # - it may be given in formula
  # - to be omitted in SOLAR
  covlist2 <- out$covlist
  # filter out term 1
  ind <-  grep("^1$", covlist2)
  if(length(ind) > 0) { 
    covlist2 <- covlist2[-ind] 
  }

  # proc  
  cmd.proc_def <- c(get_proc_write_globals(),
    get_proc_write_covariate())

  cmd.proc_call <- c("write_covariate",
    "write_globals")
  
  if(length(out$traits) == 1) {
    cmd.proc_def <- c(cmd.proc_def, get_proc_write_param_univar(),
      get_proc_write_varcomp_univar())
    cmd.proc_call <- c(cmd.proc_call, 
      paste("write_param_univar ", "\"", out$solar$model.filename, "\"", sep = ""), 
      "write_varcomp_univar")
  } else if(length(out$traits) == 2) {
    cmd.proc_def <- c(cmd.proc_def, get_proc_write_param_bivar())
    cmd.proc_call <- c(cmd.proc_call, 
      paste("write_param_bivar ", "\"", out$solar$model.filename, "\"", sep = "")) 
  }
  
  # cmd
  cmd <- c(paste("trait", paste(out$traits, collapse = " ")),
    paste("covariate", paste(covlist2, collapse = " ")),
    out$polygenic.settings,
    paste("polygenic", out$polygenic.options),
    ifelse(length(covlist2) == 0 | length(out$traits) > 1, "", "residual -fewest -needped")) # 
  
  ### run solar    
  ret.solar <- solar(c(cmd.proc_def, cmd, cmd.proc_call), dir, result = TRUE)
  
  solar.ok <- any(grepl("Output files and models are in directory", ret.solar))
    
  ### update `out`
  out$solar$cmd <- cmd
  out$solar$ret <- ret.solar
  out$solar$ok <- solar.ok

  ### terminate if an error
  if(!solar.ok) {
    return(out)
  }

  ### model dir
  model.dir <- paste(out$traits, collapse = ".")  
  model.path <- file.path(dir, model.dir)
  
  ### move "out.*" files (generated by R function `solar_polygenic`, not SOLAR)
  filenames.out <- list.files(dir, "^out\\.*")

  ret <- file.rename(
    file.path(dir, filenames.out), file.path(model.path, filenames.out)) 
  # `ret` logical vector
  stopifnot(all(ret))

  ### read output files
  files.model <- solarReadFiles(model.path)
  
  ### extract vars from output files
  out$cf <- suppressWarnings(try({
    extractCovariateFrame(model.path, out)
  }, silent = TRUE))
  out$vcf <- suppressWarnings(try({
    extractVarCompFrame(model.path, out)
 }, silent = TRUE))
  out$lf <- suppressWarnings(try({
    extractLoglik(model.path, out)
  }, silent = TRUE))
  
  ### update `out`
  out$solar$files <- list(model = files.model)
    
  return(out)
}

#----------------------------------
# Check functions
#----------------------------------
hasHousehold <- function(cnames) "HHID" %in% matchIdNames(cnames)

#----------------------------------
# Extract functions
#----------------------------------
extractCovariateFrame <- function(dir, out)
{
  vals <- scan(file.path(dir, "out.covariate"), character(), quiet = TRUE)
  stopifnot(vals[1] == "covariate")
  
  if(length(vals) == 1) {
    return(data.frame())
  }
  
  cov <- vals[-1]
  ncov <- length(cov)
  
  cf <- data.frame(covariate = cov)
  
  if(length(out$traits) == 1) {
    tab <- read.table(file.path(dir, "out.param.univar"), colClasses = c("character", "numeric"))
#       V1            V2
#1 loglike -2.108689e+02
#2     h2r  8.061621e-01
#3   h2rSE  1.100465e-01
#4      e2  1.938379e-01
#5    e2SE  1.100465e-01
#6    bage  4.591117e-04
#7  bageSE  1.483783e-02
#8    bsex -4.559509e-01
#9  bsexSE  3.125045e-01
    
    # Estimate of betas
    bnames <- paste("b", cov, sep = "")
    stopifnot(all(bnames %in% tab[, 1]))

    ind <- which(tab[, 1] %in% bnames)
    cf$Estimate <- tab[ind, 2]
    
    # SE of betas
    SEnames <- paste("b", cov, "SE", sep = "")
    stopifnot(all(SEnames %in% tab[, 1]))

    ind <- which(tab[, 1] %in% SEnames)
    cf$SE <- tab[ind, 2]
    
    ### read `out.globals` to get p-values of covariates
    lines <- readLines(file.path(dir, "out.globals"))
#> lines
#[1] "SOLAR_Individuals 174"               "SOLAR_H2r_P 6.1167535e-10"          
#[3] "SOLAR_Kurtosis -0.3603"              "SOLAR_Covlist_P 0.9753082 0.1455341"
#[5] "SOLAR_Covlist_Chi 0.0010 2.1184"     "SOLAR_RhoP "                        
#[7] "SOLAR_RhoP_SE 0"                     "SOLAR_RhoP_P "                      
#[9] "SOLAR_RhoP_OK 0"    
    
    # Chi-values of betas
    line <- grep("SOLAR_Covlist_Chi", lines, value = TRUE)
    stopifnot(length(line) == 1)

    vals <- strsplit(line, "\\s")[[1]]
    stopifnot(vals[1] == "SOLAR_Covlist_Chi")
    if(length(vals) > 1) {
      stopifnot(length(vals) == ncov + 1)
      cf$Chi <- as.numeric(vals[-1])
    } else {
      cf$Chi <- as.numeric(NA)
    }

    # P-values of betas
    line <- grep("SOLAR_Covlist_P", lines, value = TRUE)
    stopifnot(length(line) == 1)

    vals <- strsplit(line, "\\s")[[1]]
    stopifnot(vals[1] == "SOLAR_Covlist_P")
    if(length(vals) > 1) {
      stopifnot(length(vals) == ncov + 1)
      cf$pval <- as.numeric(vals[-1])
    } else {
      cf$pval <- as.numeric(NA)
    }
    
    # Add mean
    if(all(c("mean", "meanSE") %in% tab[, 1])) {
      ind1 <- which(tab[, 1] %in% "mean")
      ind2 <- which(tab[, 1] %in% "meanSE")
      cf.mean <- data.frame(covariate = "mean", Estimate = tab[ind1, 2], 
        SE = tab[ind2, 2], Chi = NA, pval = NA)
      
      cf <- rbind(cf.mean, cf)
    }
  }  
  
  return(cf)
}

extractVarCompFrame <- function(dir, out)
{
  vcf <- data.frame()
  
  if(length(out$traits) == 1) {
    vals <- scan(file.path(dir, "out.varcomp.univar"), character(), quiet = TRUE)
    stopifnot(vals[1] == "varcomp")
  
    if(length(vals) == 1) {
      return(data.frame())
    }
  
    varcomp <- vals[-1]
    nvarcomp <- length(varcomp)
  
    vcf <- data.frame(varcomp = varcomp)
  

    tab <- read.table(file.path(dir, "out.param.univar"), colClasses = c("character", "numeric"))
#       V1            V2
#1 loglike -2.108689e+02
#2     h2r  8.061621e-01
#3   h2rSE  1.100465e-01
#4      e2  1.938379e-01
#5    e2SE  1.100465e-01
#6    bage  4.591117e-04
#7  bageSE  1.483783e-02
#8    bsex -4.559509e-01
#9  bsexSE  3.125045e-01
    
    # Estimate of var
    varnames <- varcomp
    stopifnot(all(varnames %in% tab[, 1]))

    ind <- which(tab[, 1] %in% varnames)
    vcf$Var <- tab[ind, 2]
    
    # SE of var
    SEnames <- paste(varcomp, "SE", sep = "")
    stopifnot(all(SEnames %in% tab[, 1]))

    ind <- which(tab[, 1] %in% SEnames)
    vcf$SE <- tab[ind, 2]
    
   ### read `out.globals` to get p-values of varcomp
   vcf$pval <- as.numeric(NA)
       
   lines <- readLines(file.path(dir, "out.globals"))
#> lines
#[1] "SOLAR_Individuals 174"               "SOLAR_H2r_P 6.1167535e-10"          
#[3] "SOLAR_Kurtosis -0.3603"              "SOLAR_Covlist_P 0.9753082 0.1455341"
#[5] "SOLAR_Covlist_Chi 0.0010 2.1184"     "SOLAR_RhoP "                        
#[7] "SOLAR_RhoP_SE 0"                     "SOLAR_RhoP_P "                      
#[9] "SOLAR_RhoP_OK 0"    
    
    # p-value of `h2r`
    stopifnot("h2r" %in% vcf$varcomp)
    ind <- which(vcf$varcomp == "h2r")
    
    line <- grep("SOLAR_H2r_P", lines, value = TRUE)
    stopifnot(length(line) == 1)

    vals <- strsplit(line, "\\s")[[1]]
    stopifnot(vals[1] == "SOLAR_H2r_P")
    
    
    if(length(vals) > 1) {
      stopifnot(length(vals) == 2)
      vcf$pval[ind] <- as.numeric(vals[2])
    }
  }

  if(length(out$traits) == 2) {
    tab <- read.table(file.path(dir, "out.param.bivar"), colClasses = c("character", "numeric"))
#           V1        V2
#1 h2r(trait1) 0.7953058
#2  e2(trait1) 0.2046942
#3 h2r(trait2) 0.6074364
#4  e2(trait2) 0.3925636
#5        rhog 0.9691782
#6        rhoe 0.4524437
    
    # Estimate/SE of varcomp
    indSE <- grep("SE$", tab[, 1])
    vcf <- data.frame(varcomp = tab[-indSE, 1], Estimate = tab[-indSE, 2], SE = tab[indSE, 2])
    
    Estimate <- SE <- NULL # due to R CMD check: no visible binding
    vcf <- mutate(vcf, zscore = Estimate / SE)
  }  
  
  return(vcf)
}

extractLoglik <- function(dir, out)
{
  file.path <- file.path(dir, "polygenic.logs.out")
  stopifnot(file.exists(file.path))

  lines <- readLines(file.path)
  
  out <- list()
  out2 <- list()
  
  ### sporadic 
  line <- grep("Loglikelihood of sporadic model is", lines, value = TRUE)
  if(length(line) == 1) {
    out <- c(out, list(list(model = "sporadic",
      loglik = as.numeric(strsplit(line, "model is")[[1]][2]))))
  }

  ### sporadic 
  line <- grep("Loglikelihood of polygenic model is", lines, value = TRUE)
  if(length(line) == 1) {
    out <- c(out, list(list(model = "polygenic",
      loglik = as.numeric(strsplit(line, "model is")[[1]][2]))))
  }
  
  ### rhoe constrained to 0
  ind <- grep("Loglikelihood of model with rhoe constrained to 0", lines)
  if(length(ind) == 1) {
    line <- lines[ind]
    out <- c(out, list(list(model = "rhoe0",
      loglik = as.numeric(strsplit(line, "to 0 is")[[1]][2]))))
    
    line <- lines[ind+1]
    vals <- strsplit(line, "=|,")[[1]]
    out2 <- c(out2, list(list(model = "rhoe0",
      Chi = as.numeric(vals[2]), deg = as.numeric(vals[4]),
      pval = as.numeric(vals[6]))))
  }
  
  ### rhog constrained to 0
  ind <- grep("Loglikelihood of model with rhog constrained to 0", lines)
  if(length(ind) == 1) {
    line <- lines[ind]
    out <- c(out, list(list(model = "rhog0",
      loglik = as.numeric(strsplit(line, "to 0 is")[[1]][2]))))
    
    line <- lines[ind+1]
    vals <- strsplit(line, "=|,")[[1]]
    out2 <- c(out2, list(list(model = "rhog0",
      Chi = as.numeric(vals[2]), deg = as.numeric(vals[4]),
      pval = as.numeric(vals[6]))))
  }

  ### rhog constrained to 1
  ind <- grep("Loglikelihood of model with rhog constrained to 1", lines)
  if(length(ind) == 1) {
    line <- lines[ind]
    out <- c(out, list(list(model = "rhog1",
      loglik = as.numeric(strsplit(line, "to 0 is")[[1]][2]))))
    
    line <- lines[ind+1]
    vals <- strsplit(line, "=|,")[[1]]
    out2 <- c(out2, list(list(model = "rhog1",
      Chi = as.numeric(vals[2]), deg = as.numeric(vals[4]),
      pval = as.numeric(vals[6]))))
  }
  
  ### rhoc constrained to 0
  ind <- grep("Loglikelihood of model with rhoc constrained to 0", lines)
  if(length(ind) == 1) {
    line <- lines[ind]
    out <- c(out, list(list(model = "rhoc0",
      loglik = as.numeric(strsplit(line, "to 0 is")[[1]][2]))))
    
    line <- lines[ind+1]
    vals <- strsplit(line, "=|,")[[1]]
    out2 <- c(out2, list(list(model = "rhoc0",
      Chi = as.numeric(vals[2]), deg = as.numeric(vals[4]),
      pval = as.numeric(vals[6]))))
  }

  ### rhop constrained to 0
  ind <- grep("Loglikelihood of model with rhop constrained to 0", lines)
  if(length(ind) == 1) {
    line <- lines[ind]
    out <- c(out, list(list(model = "rhop0",
      loglik = as.numeric(strsplit(line, "to 0 is")[[1]][2]))))
    
    line <- lines[ind+1]
    vals <- strsplit(line, "=|,")[[1]]
    out2 <- c(out2, list(list(model = "rhop0",
      Chi = as.numeric(vals[2]), deg = as.numeric(vals[4]),
      pval = as.numeric(vals[6]))))
  }      
  
  if(length(out) == 0) {
    df <- data.frame()
  } else {
    df <- ldply(out, function(x) data.frame(model = x$model, loglik = x$loglik))
  }
  
  if(length(out2) > 0) {
    df2 <- ldply(out2, function(x) data.frame(model = x$model, Chi = x$Chi,
      deg = x$deg, pval = x$pval))
    
    df <- plyr::join(df, df2, by = "model")      
  }
  
  return(df)
}  

#----------------------------------
# Solar proc
#----------------------------------

get_proc_write_globals <- function() 
{
"proc write_globals {} {\
\
  global SOLAR_Individuals\
  global SOLAR_Kurtosis\
  global SOLAR_Covlist_P\
  global SOLAR_Covlist_Chi\
  global SOLAR_RhoP\
	global SOLAR_RhoP_P\
  global SOLAR_RhoP_SE\
	global SOLAR_RhoP_OK\
\
  ### write to file\
	set f [open \"out.globals\" \"w\"]\
\
  puts $f \"SOLAR_Individuals $SOLAR_Individuals\"\
  if {[if_global_exists SOLAR_H2r_P]} {\
    global SOLAR_H2r_P\
    puts $f \"SOLAR_H2r_P $SOLAR_H2r_P\"\
  } else {\
    puts $f \"SOLAR_H2r_P \"\
  }\
  puts $f \"SOLAR_Kurtosis $SOLAR_Kurtosis\"\
  puts $f \"SOLAR_Covlist_P $SOLAR_Covlist_P\"\
  puts $f \"SOLAR_Covlist_Chi $SOLAR_Covlist_Chi\"\
\
  # Phenotypic Correlation\
  puts $f \"SOLAR_RhoP $SOLAR_RhoP\"\
  puts $f \"SOLAR_RhoP_SE $SOLAR_RhoP_SE\"\
  puts $f \"SOLAR_RhoP_P $SOLAR_RhoP_P\"\
  puts $f \"SOLAR_RhoP_OK $SOLAR_RhoP_OK\"\
\
	close $f\
}\
"
}

get_proc_write_covariate <- function() 
{
"proc write_covariate {} {\
\
 	set covariates [covariate]\
\
  ### write to file\
	set f [open \"out.covariate\" \"w\"]\
\
  puts $f \"covariate $covariates\"\
\
	close $f\
}\
"
}

get_proc_write_varcomp_univar <- function() 
{
"proc write_varcomp_univar {} {\
\
  ### write to file\
	set f [open \"out.varcomp.univar\" \"w\"]\
  puts $f \"varcomp\"\
\
 	if {[if_parameter_exists h2r]} {\
    puts $f \"h2r\"\
  }\
 	if {[if_parameter_exists c2]} {\
    puts $f \"c2\"\
  }\
 	if {[if_parameter_exists e2]} {\
    puts $f \"e2\"\
  }\
\
	close $f\
}\
"
}

# @ http://helix.nih.gov/Documentation/solar-6.6.2-doc/91.appendix_1_text.html#read_model
# @ see any null0.mod
get_proc_write_param_univar <- function()
{
"proc write_param_univar {model} {\
\
	set loglike [read_model $model loglike]\
	set h2r [read_model $model h2r]\
	set h2rSE [read_model $model h2r -se]\
\
	if {[if_parameter_exists c2]} {\
    set c2 [read_model $model c2]\
    set c2SE [read_model $model c2 -se]\
	}\
	set e2 [read_model $model e2]\
	set e2SE [read_model $model e2 -se]\
\
  ### write to file\
	set f [open \"out.param.univar\" \"w\"]\
\
  puts $f \"loglike $loglike\"\
  puts $f \"h2r $h2r\"\
  puts $f \"h2rSE $h2rSE\"\
\
	if {[if_parameter_exists c2]} {\
    puts $f \"c2 $c2\"\
    puts $f \"c2SE $c2SE\"\
	}\
  puts $f \"e2 $e2\"\
  puts $f \"e2SE $e2SE\"\
\
 	set cov [covariate]\
  set ncov [llength $cov]\
  for {set i 0} {$i < $ncov} {incr i} {\
  	set covi [lindex $cov $i]\
  	if {[if_parameter_exists b$covi]} {\
      set bi [read_model $model b$covi]\
      set biSE [read_model $model b$covi -se]\
      puts $f \"b$covi $bi\"\
      puts $f \"b${covi}SE $biSE\"\
     }\
  }\
\
  # mean\
	if {[if_parameter_exists mean]} {\
    set mean [read_model $model mean]\
    set meanSE [read_model $model mean -se]\
    puts $f \"mean $mean\"\
    puts $f \"meanSE $meanSE\"\
  }\
\
	close $f\
}\
"
}

get_proc_write_param_bivar <- function() 
{
"proc write_param_bivar {model} {\
\
 	set traits [trait]\
\
  ### write to file\
	set f [open \"out.param.bivar\" \"w\"]\
\
  for {set i 0} {$i < 2} {incr i} {\
  	set ti [lindex $traits $i]\
\
  	set h2ri \"h2r(${ti})\"\
  	if {[if_parameter_exists $h2ri]} {\
      set vali [read_model $model $h2ri]\
      puts $f \"$h2ri $vali\"\
\
      set vali [read_model $model $h2ri -se]\
      puts $f \"${h2ri}SE $vali\"\
     }\
\
  	set e2i \"e2(${ti})\"\
  	if {[if_parameter_exists $e2i]} {\
      set vali [read_model $model $e2i]\
      puts $f \"$e2i $vali\"\
\
      set vali [read_model $model $e2i -se]\
      puts $f \"${e2i}SE $vali\"\
     }\
\
  	set c2i \"c2(${ti})\"\
  	if {[if_parameter_exists $c2i]} {\
      set vali [read_model $model $c2i]\
      puts $f \"$c2i $vali\"\
\
      set vali [read_model $model $c2i -se]\
      puts $f \"${c2i}SE $vali\"\
     }\
  }\
\
 	if {[if_parameter_exists rhog]} {\
    set val [read_model $model rhog]\
    puts $f \"rhog $val\"\
\
    set val [read_model $model rhog -se]\
    puts $f \"rhogSE $val\"\
  }\
\
 	if {[if_parameter_exists rhoc]} {\
    set val [read_model $model rhoc]\
    puts $f \"rhoc $val\"\
\
    set val [read_model $model rhoc -se]\
    puts $f \"rhocSE $val\"\
  }\
\
 	if {[if_parameter_exists rhoe]} {\
    set val [read_model $model rhoe]\
    puts $f \"rhoe $val\"\
\
    set val [read_model $model rhoe -se]\
    puts $f \"rhoeSE $val\"\
  }\
\
	close $f\
}\
"
}

Try the solarius package in your browser

Any scripts or data that you put into this service are public.

solarius documentation built on May 2, 2019, 2:43 a.m.