# Custom SHG Call
#
# The SHG is written in Python and is maintained seperately from this package by CISNET.
# This function writes a custom "run_tests.py" into the current director for the SHG so
# that only selected birth cohort years can be run instead of all of the 1890 - 2110 range
# that is hardcoded into the current run_tests.py distributed with SHG 6.3.4. This function
# is called by runSHG() and does not need to be called directly. It creates a python script
# in the SHG directory, called 'run_custom_tests.py'
# @param birth_cohort the year of birth for the cohort (ie: 1950)
# @keywords smoking history generator run_tests
# @examples
# setwd("~/SHG/") # navigate to the SHG directory first
# SHG_output <- makeCustomRun(birth_cohort = 1950)
makeCustomRun <- function(birth_cohort) {
# working directory should have been set as SHG directory before this
# write the following code into a custom python
run_custom_tests <- c(
stringr::str_c("from __future__ import division"),
stringr::str_c("from ipdb import set_trace"),
stringr::str_c("import numpy as np"),
stringr::str_c("import os"),
stringr::str_c("from shutil import move"),
stringr::str_c("import socket"),
stringr::str_c("from sys import platform"),
stringr::str_c(""),
stringr::str_c(""),
stringr::str_c("def recompile_source():"),
stringr::str_c(" if platform == 'linux' or platform == 'linux2':"),
stringr::str_c(" os.system('sh install.sh')"),
stringr::str_c(" try:"),
stringr::str_c(" move('lbc_smokehist.exe', 'lbc_smokehist_linux.exe')"),
stringr::str_c(" except:"),
stringr::str_c(" pass"),
stringr::str_c(" elif platform == 'darwin':"),
stringr::str_c(" os.system('sh install.sh')"),
stringr::str_c(" try:"),
stringr::str_c(" move('lbc_smokehist.exe', 'lbc_smokehist_osx.exe')"),
stringr::str_c(" except:"),
stringr::str_c(" pass"),
stringr::str_c(" elif platform == 'win32' or platform == 'cygwin':"),
stringr::str_c(" os.system('install.sh')"),
stringr::str_c(" try:"),
stringr::str_c(" move('lbc_smokehist.exe', 'lbc_smokehist_win.exe')"),
stringr::str_c(" except:"),
stringr::str_c(" pass"),
stringr::str_c(""),
stringr::str_c("def delete_old_io_files():"),
stringr::str_c(" [os.remove(test_file) for test_file in os.listdir('.') if test_file.startswith('input') or test_file.startswith('output') or test_file.startswith('errors')]"),
stringr::str_c(""),
stringr::str_c("def create_input_file(cohort):"),
stringr::str_c(" seed1 = np.int(np.random.random() * 2147483647); seed2 = np.int(np.random.random() * 2147483647)"),
stringr::str_c(" seed3 = np.int(np.random.random() * 2147483647); seed4 = np.int(np.random.random() * 2147483647)"),
stringr::str_c(" open('input_{cohort}.txt'.format(**locals()), 'w').write(open('template_input_05032017.txt').read().format(**locals()))"),
stringr::str_c(""),
stringr::str_c("def run_shg(cohort):"),
stringr::str_c(" if platform == 'linux' or platform == 'linux2':"),
stringr::str_c(" command = './lbc_smokehist_linux.exe input_{cohort}.txt'.format(**locals())"),
stringr::str_c(" elif platform == 'darwin':"),
stringr::str_c(" command = './lbc_smokehist_osx.exe input_{cohort}.txt'.format(**locals())"),
stringr::str_c(" print command"),
stringr::str_c(" elif platform == 'win32' or platform == 'cygwin':"),
stringr::str_c(" command = 'lbc_smokehist_win.exe input_{cohort}.txt'.format(**locals())"),
stringr::str_c(" os.system(command)"),
stringr::str_c(""),
stringr::str_c("def parse_results(year):"),
stringr::str_c(" global sims"),
stringr::str_c(" people = []"),
stringr::str_c(" sims = open('output_' + str(year) + '.out', 'r').read()"),
stringr::str_c(" if sims.find('<RUN>') != -1:"),
stringr::str_c(" sims = sims[sims.find('<RUN>') + 5:sims.find('</RUN>')].split('", "\\", "n')"),
stringr::str_c(" else:"),
stringr::str_c(" sims = sims.split('","\\","n')"),
stringr::str_c(" sims = [sim.split(';')[0:-1] for sim in sims if len(sim) != 0]"),
stringr::str_c(" for sim in sims:"),
stringr::str_c(" person = {}"),
stringr::str_c(" person['race'] = sim[0]"),
stringr::str_c(" person['sex'] = sim[1]"),
stringr::str_c(" person['yob'] = sim[2]"),
stringr::str_c(" person['init_age'] = sim[3]"),
stringr::str_c(" person['cess_age'] = sim[4]"),
stringr::str_c(" person['ocd_age'] = sim[5]"),
stringr::str_c(" if person['init_age'] != '-999':"),
stringr::str_c(" smoking_history = np.array(sim[6:])"),
stringr::str_c(" years = int(smoking_history.shape[0] / 2)"),
stringr::str_c(" person['smoking_history'] = smoking_history.reshape(years, 2)"),
stringr::str_c(" people.append(person)"),
stringr::str_c(" return people"),
stringr::str_c(""),
stringr::str_c("def comment_on_prevalence(people, year):"),
stringr::str_c(" smokers = 0"),
stringr::str_c(" for person in people:"),
stringr::str_c(" if 'smoking_history' in person.keys():"),
stringr::str_c(" if person['ocd_age'] > person['smoking_history'][0][0] or person['ocd_age'] == '-999':"),
stringr::str_c(" smokers += 1"),
stringr::str_c(" print 'year = ' + str(year)"),
stringr::str_c(" print 'prevalence = ' + str(smokers / len(people))"),
stringr::str_c(""),
stringr::str_c("def comment_on_ages(people, key, year):"),
stringr::str_c(" init_ages = [int(person[key]) for person in people]"),
stringr::str_c(" init_ages = [d for d in init_ages if d != -999]"),
stringr::str_c(" local_min = min(init_ages)"),
stringr::str_c(" local_max = max(init_ages)"),
stringr::str_c(" local_median = np.median(init_ages)"),
stringr::str_c(""),
stringr::str_c(" if key is 'smoking_history':"),
stringr::str_c(" local_min = min(init_ages)"),
stringr::str_c(" local_max = max(init_ages)"),
stringr::str_c(" local_median = np.median(init_ages)"),
stringr::str_c(""),
stringr::str_c(" print key + ' = ' + str(local_min) + ', ' + str(local_max) + ', ' + str(local_median) + ' (min, max, median)'"),
stringr::str_c(""),
stringr::str_c(" # Addressing minimums"),
stringr::str_c(" # Minimum init age is normally within some delta of 8 years old, using 6 years here"),
stringr::str_c(" if key is 'init_age' and abs(local_min - 8) > 7:"),
stringr::str_c(" print 'WARNING: weird min init age, ' + str(local_min)"),
stringr::str_c(" # Minimum cess age is normally within some delta of 15 years old, using 6 years here"),
stringr::str_c(" if key is 'cess_age' and abs(local_min - 15) > 7:"),
stringr::str_c(" print 'WARNING: weird min cess age, ' + str(local_min)"),
stringr::str_c(" # Infant mortality in the first year is normally unavoidable"),
stringr::str_c(" if key is 'ocd_age' and local_min is not 0:"),
stringr::str_c(" print 'WARNING: weird min ocd age, ' + str(local_min)"),
stringr::str_c(""),
stringr::str_c(" # Addressing maximums"),
stringr::str_c(" # Maximum init age is normally within some delta of 8 years old, using 6 years here"),
stringr::str_c(" # Maximum cess age is normally within some delta of 15 years old, using 6 years here"),
stringr::str_c(" if key is 'cess_age' and abs(local_max - 99) > 9 and local_max + year != 2050:"),
stringr::str_c(" print 'WARNING: weird max cess age, ' + str(local_max)"),
stringr::str_c(" # Infant mortality in the first year is normally unavoidable"),
stringr::str_c(" if key is 'ocd_age' and abs(local_max - 99) > 9 and local_max + year != 2050:"),
stringr::str_c(" print 'WARNING: weird max ocd age, ' + str(local_max)"),
stringr::str_c(""),
stringr::str_c("def flatten(list_of_lists):"),
stringr::str_c(" return [item for sublist in list_of_lists for item in sublist]"),
stringr::str_c(""),
stringr::str_c("def comment_on_smoking_histories(people, key, year):"),
stringr::str_c(" smoking_histories = [person['smoking_history'] for person in people if 'smoking_history' in person.keys()]"),
stringr::str_c(" smoking_ages = [list(np.array(smoking_history[:, 0], dtype='int')) for smoking_history in smoking_histories]"),
stringr::str_c(" smoking_amounts = [list(np.array(smoking_history[:, 1], dtype='float')) for smoking_history in smoking_histories]"),
stringr::str_c(" smoking_averages = [np.mean(np.array(smoking_amount)) for smoking_amount in smoking_amounts]"),
stringr::str_c(" num_switchers = len([d for d in smoking_averages if float(d) != int(d)])"),
stringr::str_c(" min_smoking_age = min(flatten(smoking_ages))"),
stringr::str_c(" max_smoking_age = max(flatten(smoking_ages))"),
stringr::str_c(" min_smoking_amount = min(flatten(smoking_amounts))"),
stringr::str_c(" average_smoking_amount = np.mean(np.array(flatten(smoking_amounts)))"),
stringr::str_c(" max_smoking_amount = max(flatten(smoking_amounts))"),
stringr::str_c(" fraction_of_switchers = num_switchers / len(smoking_histories)"),
stringr::str_c(" print 'Smoking ages: min = {min_smoking_age}, max = {max_smoking_age}'.format(**locals())"),
stringr::str_c(" print 'Smoking amounts: min = {min_smoking_amount}, max = {max_smoking_amount}, mean = {average_smoking_amount}'.format(**locals())"),
stringr::str_c(" print 'Fraction of switchers = {fraction_of_switchers}'.format(**locals())"),
stringr::str_c(""),
stringr::str_c("if __name__ == '__main__':"),
stringr::str_c(" delete_old_io_files()"),
stringr::str_c(" if socket.gethostname() == 'BenRacine-PC': ## Add name of your pc in order to rebuild"),
stringr::str_c(" recompile_source()"),
stringr::str_c(" # for year in range(1890, 2110, 10):"),
stringr::str_c(" year = ",birth_cohort),
stringr::str_c(" create_input_file(year)"),
stringr::str_c(" print"),
stringr::str_c(" print"),
stringr::str_c(" run_shg(year)"),
stringr::str_c(" people = parse_results(year)"),
stringr::str_c(" comment_on_prevalence(people, year)"),
stringr::str_c(" comment_on_ages(people, 'init_age', year)"),
stringr::str_c(" comment_on_ages(people, 'cess_age', year)"),
stringr::str_c(" comment_on_ages(people, 'ocd_age', year)"),
stringr::str_c(" comment_on_smoking_histories(people, 'ocd_age', year)"),
stringr::str_c(" print"),
stringr::str_c("")
)
# write to file
fileConn <- file("run_custom_tests.py")
writeLines(run_custom_tests, fileConn)
close(fileConn)
}
#' Runs Smoking History Generator
#'
#' The SHG is written in Python and is maintained seperately from this package by CISNET. This function calls the SHG from within R,
#' creates the SHG output_19X0.out file, and also returns the SHG cohort. The SHG needs to be installed properly prior to use.
#' To use runSHG(), you pass it the directory where the SHG is installed. Tested using SHG6.3.4 and Python v2.7.10 on MacOSX.
#' SHG6.3.4.zip was obtained from cisnet.flexkb.net, see their readme.md for setup instructions. In python, you'll likely want these
#' dependencies: six, ipdb, enum, ipython_genutils, ipython. To install the SHG, run install.sh. To run the SHG from R, run:
#' (1) python run_tests.py directly, or use this R wrapper function to create a custom 'run_custom_tests.py'. No object is returned
#' by this wrapper function. You will be prompted to look for the appropriate output_19X0.out, input_19X0.txt, and errors_19X0.txt
#' in the SHG directory.
#' If you do not wish to use the SHG, you can load a processed smoking history dataset by typing: data(smkhist) that is included
#' with this package. The smkhist dataset is for a 1950 birth cohort of 10000 men from SHG3.6.4.
#' @param path a string indicating the directory that the SHG is installed in (ie: "~/SHG6.3.4")
#' @param n the size of the cohort (ie: 10000)
#' @param gender the gender (ie: "M" == 0 or "F" == 1)
#' @param birth_cohort the year of birth for the cohort (ie: 1950 or 1960)
#' @param seed the seed for random number generator
#' @keywords smoking history generator
#' @export
#' @examples
#' library(LCRFsim)
#' runSHG("~/SHG3.6.4", 10000, "F", 1950, 1) # runs SHG to create output_1950.out in SHG directory
#' smoking_history <- processSHG(file = "~/SHG3.6.4/output_1950.out", birth_cohort = 1950)
#' data(smkhist) # or you can load a sample processed SHG output
runSHG <- function(path, n, gender, birth_cohort, seed){
oldwd <- getwd()
# the path of the SHG
setwd(path)
if (gender %in% c("F","f","female")) {SEX = 1}
if (gender %in% c("M","m","male")) {SEX = 0}
# fix weird 5x10e6 problem
n <- as.integer(n)
cat(stringr::str_c("\nrunning SHG for n = ",n,", gender = ",SEX," (",gender,")",", cohort birth year = ",birth_cohort, ", seed = ",seed,"... "))
# runs twice, once for M, once for F
# ---------------------------------------------------------
# step 0: produce the template input file that the SHG uses
# filename: template_input_05032017.txt
# ---------------------------------------------------------
CONFIG <- c(stringr::str_c("SEED_INIT=",seed),
stringr::str_c("SEED_CESS=",seed+1),
stringr::str_c("SEED_OCD=",seed+2),
stringr::str_c("SEED_MISC=",seed+3),
stringr::str_c("RACE=0"),
stringr::str_c("SEX=",SEX),
stringr::str_c("YOB=", birth_cohort),
stringr::str_c("CESSATION_YEAR=0"),
stringr::str_c("REPEAT=", n),
stringr::str_c("INIT_PROB=data/05_03_2017/lbc_shg_initiation05032017.txt"),
stringr::str_c("CESS_PROB=data/05_03_2017/lbc_shg_cessation05032017.txt"),
stringr::str_c("OCD_PROB=data/05_03_2017/lbc_smokehist_oc_mortality_02212016.txt"),
stringr::str_c("CPD_DATA=data/05_03_2017/lbc_shg_cpd05032017.txt"),
stringr::str_c("OUTPUTFILE=output_{cohort}.out"),
stringr::str_c("ERRORFILE=errors_{cohort}.txt"))
# write to file
fileConn <- file("template_input_05032017.txt")
writeLines(CONFIG, fileConn)
close(fileConn)
# ---------------------------------------------------------
# step 1 - 3: run_tests.py, decompose.py, create_SH.R
# ---------------------------------------------------------
# make a "run_custom_tests.py" in SHG directory for the user selected brith cohort
makeCustomRun(birth_cohort)
# call_SHG <- "run_tests.py"
call_SHG <- "run_custom_tests.py" # ASSUMED TO BE IN SHG DIR
# --------------------------------
# step 1: run_tests.py (ships with SHG), actually run_custom_tests.py for birth_cohort year
cmd <- "python"
system2(cmd, call_SHG, stdout=TRUE) # for one birth_cohort year
cat("DONE!\n")
cat("check the SHG directory for the appopriate SHG output files: \noutput_", birth_cohort,".out\ninput_", birth_cohort,".txt\nerrors_", birth_cohort,".txt\n", sep="")
setwd(oldwd)
}
#' Process raw SHG output
#'
#' The SHG creates an out_1950.out file which is the raw output. The following
#' function runs python code that decomposes that output into a few other files
#' that will create an R-readable table. A SHG file is saved to "smoking_histories.csv"
#' in the working directory. The function also returns the smoking history as a data frame object in R.
#' If you have a output_XXXX.out file from your own SHG simulations, you can process it using
#' this function to load an R object that can be used for simulating the risk factors.
#'
#' @param file the location of the output_1950.out file (ie: "shg/output_1950.out")
#' @param birth_cohort the year of birth for the cohort (ie: 1950)
#' @keywords smoking history generator run_tests process processSHG
#' @export
#' @examples
#' smoking_history <- processSHG(file = "~/SHG3.6.4/output_1950.out", birth_cohort = 1950)
processSHG <- function(file, birth_cohort){
# decompose output output_19X0.out into a file using Iakovos's python code
cat("processing SHG output file ... \n")
# --------------------------------
# step 2: decompose_output_files in R package /inst folder
decompose_out <- paste(system.file(package="LCRFsim"), "decompose_output_files.py", sep="/")
# run the python code that decomposes the SHG output
cmd <- "python"
system2(cmd, c(decompose_out, file, birth_cohort), stdout=TRUE)
# creates 3 files: people_ages_mat_1950.txt, people_cpd_mat_1950.txt, people_mat1950.txt
# ---------------------------------
# step 3: run create_smoking_histories.R file (genderless version) for birth_cohort year
result <- createSmokingHistories(birth_cohort)
cat("smoking history saved as 'smoking_histories_", birth_cohort,".csv'\n", sep="")
# if never started smoking, NA, so set to 0
result[is.na(result)] <- 0
# turn it into a data.frame
result <- data.frame(result)
# return the smoking history
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.