R/PInfDataFunctions.r

#______________________________________________________________________________
#Functions used for Peer Influence data processing and analysis
#______________________________________________________________________________
# Naming conventions and abbreviatons:
# NQ: 'NetQ', the short survey comprising demographics,
#      the network questions, and several problem-behavior
#      inventories (Antisocial, bullying, victimization,
#      Alc, Tobacco and MJ use). 'Implicit Consent' used, so
#      response rates are typically over 80% for these questions.
# FQ: 'FullQ', the long survey added to the NetQ if participant gave explicit
#      consent. Questions include more significant antisocial behavior,
#      sexual behavior, sexual maturity, relationships w/parents.
# pWavVec: ordered integer vector of wave #'s for which data are requested
#      Waves 2,6, and 10 are all summer waves, where no NQ or FQ data is
#        collected (only EMA data)
#      Wave 1 is Spring of 2014 (8th grade, end of middle school) for Cohort
#         1 schools, Spring of 2015 for Cohort 2
#      Waves 3, 7 and 11 are Fall assessments (about October)
#      Waves 4 and 8 are Winter assessments (about January)
#      Waves 1, 5, and 9 are Spring assessments (about May)
# pSchVec: ordered integer vector of School IDs for which data are requested
# SID: Subject ID number
# SchID: School ID number
#______________________________________________________________________________

# DB Layer Fns ----------------------------------------------------------

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> updateStuEligWindow <<
# _____________________________________________________________________________
#' Returns TRUE if the Student Eligibility Window (S.E.W.) needs updating.
#'
#' The table 'dbo.StudentEligibilityWindow in the database table has 1 row
#'     for each eligibility period (startdate, enddate)
#'     for each student who was ever survey-eligible. If Eligibility has
#'     not expired, the enddate will be set to the last date the update
#'     sproc (called dbo.StudentEligibilityWindows) was run.
#'
#' @details There are no parameters; the sproc simply obtains the date
#'     that the S.E.W. was last updated, and then the
#'     most recent date of any survey in the table dbo.SWave (which records
#'     wave-specific survey data by participating student), and compares
#'     the two. If the last S.E.W. was more recent
#'     than the most recent survey, no update is needed (return FALSE);
#'     otherwise, it is needed (return TRUE).
#'
#' @note This function is mainly to be used internally, but a user might
#'     consider calling it out of curiosity, as to when the last S.E.W.
#'     table update occurred.
#'
#' @return A logical variable
#' @export
checkEligUpdate <- function(){
#' @import RODBC
#' @import dplyr
#' @import data.table

cat("Checking whether Student Eligibility Window update is needed",
    "\n")
#______________________________________________________________
  sewQuery <- paste("SELECT Max(EndDate)",
                    " From PInf1.dbo.StudentEligibilityWindow")
  conn <- RODBC::odbcConnect(dsn="PInf1")
  dt <- data.table(RODBC::sqlQuery(conn,sewQuery))[1,V1]
  dtlastUpdate <- as.character(dt) %>%
                  as.POSIXct() %>%
                  as.Date()
#_____________________________________________________________
  wvQuery <- paste("SELECT Max(AssessmentDateTime)",
                   " From PInf1.dbo.SWave")
  dt <- data.table(RODBC::sqlQuery(conn,wvQuery))[1,V1]
  dtlastSurvey <- as.character(dt) %>%
                  as.POSIXct() %>%
                  as.Date()
  if (dtlastSurvey > dtlastUpdate) {
    cat("SEW update required...", "\n")
    return(TRUE)
  }
  if(dtlastSurvey == dtlastUpdate){
    cat("SEW updated today & up to date if no other surveys\n",
        "have occurred since.\n")
    return(FALSE)
  }
  if (dtlastSurvey < dtlastUpdate){
  # if dtlastSurvey < dtlastUpdate
  cat("SEW is up to date.\n")
  return(FALSE)}
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getDOB <<
# _____________________________________________________________________________
# Uses: RODBC
# _____________________________________________________________________________
#' Returns a data.table with DOBs for all SIDs input
#'
#' This function can be used to calculate an age variable
#'
#' @param pSIDVec A vector of SIDs for which DOB is wanted. These will
#'     normally be part of an 'Analysis Set', i.e. generated by
#'     'getNetworkSet' or 'getSIDSet'.
#' @return A data.table with two columns: SID and DOB. SID is integer,
#'     DOB is POSIXct/POSIxt
#' @details Makes a call to the database using RODBC
#' @examples
#' # Gets DOB for a couple of specific SIDs
#' aFewDOBs <- getDOB(c(10, 11, 12))
#' @note Draws data from vStudentCurrent (DB View), not the other 3 main
#'     data tables, so it is included to provide this necessary DB
#'     access, if DOB is required.
#' @export
getDOB <- function(pSIDVec){
#' @import RODBC
  # ________________
  dobQuery <- paste ("SELECT SID,DOB",
                     " From vStudentCurrent",
                     " WHERE SID In (",paste(pSIDVec,collapse=","),
                     ") ORDER BY SID ")
  # Extract data
  conn <- odbcConnect(dsn="PInf1")
  dobData<- sqlQuery(conn,dobQuery)
  return(dobData)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getFixedCovs <<
# _____________________________________________________________________________
# This function gets all SMaster records for these schools & waves.
# _____________________________________________________________________________
# Obtains dataframe of fixed NQ data (from the table 'SMaster' in the
# database 'PInf1'), for one " (pSchVec)"school cohort".
# To get only the SIDs for a given set of waves, you have to select the
# relevant SIDs first with other code, then select from this data frame.)
# _____________________________________________________________________________
# Uses: RODBC
# _____________________________________________________________________________
#' Returns a dataframe of fixed covariates for a set of input schools
#'
#' This is generally used to pull this data from the database, and
#'     subsequently subset it for a particular Analysis Set.
#'
#' @param pSchVec A vector of (integer) School IDs; any existing fixed
#'     covariate rows in the database where the participant is shown
#'     as currently in one of these schools will be included.
#' @return A dataframe that has the same format as the database table
#'     'PInf.dbo.SMaster'. Column names fairly accurately describe
#'     column contents, but one should consult the qq codebook for
#'     precise definition.
#' @details If a participant switches schools (which happens regularly),
#'     this function will only pull his/her data if his/her *current*
#'     school is in the input vector.
#' @examples
#' # Gets table of fixed covariats for all the schools in 1 district
#' fc <- getFixedCovs(c(3,4,5,6,30)
#' @export
getFixedCovs <- function(pSchVec){
#' @import RODBC
# _______________
  smQuery <- paste ("SELECT * From PInf1.dbo.SMaster WHERE SID IN ",
                    "(SELECT Distinct SID From PInf1.dbo.qqelig ",
                    "WHERE SchID In (",paste(pSchVec,collapse=","),
                    "))")
  # Extract data
  conn <- odbcConnect("PInf1")
  smData <- sqlQuery(conn,smQuery)
  return(smData)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getNetwork <<
#______________________________________________________________________________
# Uses: pkg 'RODBC'
# Used by: getNetworkSet
# _____________________________________________________________________________
#' Returns a dataframe of network edges for input schools and waves
#'
#' This is generally used to pull this data from the database, and
#'     subsequently subset it for a particular Analysis Set. It is
#'     currently used by 'getNetworkSet'.
#'
#' @param pWavVec An ordered numeric vector of wave SIDs to be included in the
#'     analysis.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSchVec A vector of (integer) School IDs.
#' @return A dataframe that has the same format as the database table
#'     'PInf.dbo.SAffiliation'. Column names fairly accurately describe
#'     column contents, but one should consult the qq codebook for
#'     precise definition.
#' @details Chooser was asked for more information on an individual (if any)
#'     designated as Chooser's 'very best friend'. These variables are
#'     missing for any alters not so designated, or if Chooser did not
#'     make this designation at all.
#' @examples
#' # Gets edgelist (with addtional info on some alters) for individuals in
#' # waves 1, 3, and 4, schools 3:6 and 30 (one school district).
#' net <- getNetwork(c(1,3,4), c(3,4,5,6,30))
#' @export
getNetwork <- function(pWavVec,pSchVec){
#' @import RODBC
# ____________
  dbQuery <- paste("SELECT * From PInf1.dbo.SAffiliation ",
                   "WHERE SchID In (",paste(pSchVec,collapse=","),
                   ") AND WID IN (",paste(pWavVec,collapse=","),")")
  conn <- odbcConnect(dsn="PInf1")
  outNet <- sqlQuery(conn, dbQuery)
  return(outNet)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getNQTVCs <<
#______________________________________________________________________________
# Uses: pkgs 'RODBC', 'dtplyr', 'dplyr', 'data.table'
# Used by: 'makeTVTbl'
# _____________________________________________________________________________
#' Returns a data.table of Short-Survey time-varying covariates
#'
#' This is generally used to pull this data from the database, and
#'     subsequently subset it for a particular Analysis Set. It is
#'     currently used by 'makeTVTbl'.
#'
#' @param pWavVec An ordered numeric vector of wave SIDs to be included in the
#'     analysis.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSchVec A vector of (integer) School IDs.
#' @return A dataframe that is a subset of the database table
#'     'PInf.dbo.SWave'., containing only the items asked on the 'Short Survey'.
#'     Column names fairly accurately describe
#'     column contents, but one should consult the qq codebook for
#'     precise definition.
#' @examples
#' # Gets a long-form data.table (each row represents an individual and a wave)
#' # of survey data from the 'Short Survey', for
#' # waves 1, 3, and 4, schools 3:6 and 30 (one school district).
#' tvcs <- getNQTVCs(c(1, 3, 4), c(3, 4, 5, 6, 30))
#' @export
getNQTVCs <- function(pWavVec,pSchVec){
#' @import RODBC
#' @import data.table
#' @import dtplyr
  # _________________
  nqQuery <- paste ("SELECT SchID, SID, WID, ULied,UHit,UMean,USkip,UDamage,
                    ULate,UFam,","UOKids,ONames,OHit,OThreat,ONoTalk,OExclu,
                    OGossip,OEncourg,",
                    "YNoTalk,YExclu,YGossip,YLies,YEncourg,TobLife,Tob30Day,",
                    "ETobLife,ETob30Day,ChewLife,Chew30Day,AlcLife,Alc30Day,",
                    "BngLife,Bng30Day,MJLife,MJ30Day,OptOutNow ",
                    "FROM PInf1.dbo.SWave ",
                    "WHERE SchID In (",paste(pSchVec,collapse=","),
                    ") AND WID IN (",paste(pWavVec,collapse=","),")")
  # Extract data
  conn <- odbcConnect(dsn="PInf1")
  nqData <- data.table(sqlQuery(conn,nqQuery), key = "SID")
  # Calls 'zero30dSU to set unanswered 30day SU questions to 0 if no lifetime
  #    use was reported, returns data.table from there.
  return(zero30dSU(nqData))
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getSWNodes <<
# _____________________________________________________________________________
# Uses: pkgs 'RODBC', 'dtplyr', 'dplyr', 'data.table'
# Used by: getEligNodes, getEligXWave, getMSRIDVecs
# _____________________________________________________________________________
#' Returns a vector of SIDs for Short or Long-form survey-eligibles
#'
#' Output from this function is useful in creating subsets of data meeting
#'     survey-eligibility requirements.
#'
#' @param pWav A specific survey wave.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSch A specific School ID.
#' @param pNQW  If 1, short-survey-eligible SIDs in this wave and school
#'     are returned (this would also include long-survey-eligibles as a
#'     subset).
#' @param pFQ If 1, only long-survey-eligible SIDs in this wave and school are
#'     returned.
#' @return An integer vector of SIDs
#' @note This function will not necessarily return correct SIDs if the
#'     table dbo.StudentEligibilityWindow is not up to date. The calling
#'     code should always check this first! If 'checkEligUpdate' returns
#'     FALSE, you're OK. If not, the code should first run
#'     'updateStuEligWin'
#' @examples
#' # Returns a vector of SIDs of individuals who were short-survey (NQ)
#' # eligible at wave 1, from school #3.
#' nodes <- getNQTVCs(pWav = 1, pSch = 3, pNQW = 1)
#' @export
getSWNodes <- function(pWav, pSch, pNQW = 1, pFQW = 0){
#' @import RODBC
#' @import data.table
#' @import dtplyr
  # _____________
  # Infer cohort from SchID
  coh1Schools <-c(1,2,3,4,5,6,10,20,30,31)
  coh2Schools <-c(7,8,9,70,80,101,102,110,111,112,113,120)
  if (pSch %in% coh1Schools){pCoh<-1}
  if (pSch %in% coh2Schools){pCoh<-2}
  if (!(pSch %in% coh1Schools | pSch %in% coh2Schools)){
    stop("School ",pSch," does not exist.")
  }
  # Infer period from WID and Cohort
  pPer <- ifelse (pCoh==1,pWav,pWav+4)
  if ((pCoh==1 & pPer>11) | (pCoh==2 & pPer>15) | pWav %in% c(2,6,10)){
    stop("Wave # ",pWav," does not exist or is a summer wave (no data)")
  }
  #_____________
  # Extract data
  conn <- odbcConnect("PInf1")
  # The sproc called here creates a DB table of survey-eligible kids for this
  # cohort and wave. Then the correct school is selected from that table.
  # ____________________________________________________
  tblOfNodesQuery <- paste("exec PInf1.dbo.StudentSIDInWave @CohortID = ",pCoh,",@PeriodID = ",
                           pPer,",@Wave = ",
                           pWav,",@NQWanted = ",pNQW,",@FQWanted = ",pFQW)
  eligNums <- sqlQuery(conn,tblOfNodesQuery)
  nodeQuery <- paste("SELECT SID,SchID From PInf1.dbo.NQEligTbl ")
  nodes_temp <- data.table(sqlQuery(conn,nodeQuery), key = "SID")
  # _____________________________________________________
  # If the query returned no records, return a length 0 vector
  if (dim(nodes_temp)[1] == 0){
    return (integer())
  } else {
    # Since kids could have been eligible in multiple schools,
    # check each against the kid's SWave record (if he/she has one). If the kid
    # shows a different SchID in SWave then pSch, mark that node for
    # removal.
    # ____________________________________________________
    sWaveQuery <- paste("SELECT SID, SchID From PInf1.dbo.SWave ",
                        " WHERE SchID =",pSch, " AND WID=",pWav)
    sWaveSIDSch <- data.table(sqlQuery(conn,sWaveQuery),key = "SID")
    # select any rows for which school ID's in Elig and Actual Survey (sWave)
    #  DTs do not match; these SIDs should not be in this school.
    SIDDifSch <- sWaveSIDSch[SchID != pSch,.(SID)]
    nodes <- nodes_temp[SID %w/o% SIDDifSch & !is.na(SID)] # The 'eligible' data.table

    return (nodes[SchID == pSch,SID])
  }
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> updateStuEligWin <<
#______________________________________________________________________________
#' Runs DB sproc to update the Student Eligibility Windows table
#'
#' The table 'dbo.StudentEligibilityWindow' contains a row for each time
#'     period any ever-eligible participant's eligibility status changed.
#'
#' @details The sproc 'dbo.StudentEligibilityWindows' (note final 's'!)
#'     is executed.
#' @return A completion code is returned. If it is 0, the sproc executed OK.
#'     If it is 100 there was some kind of error.
#' @export
updateStuEligWin <- function(){
#' @import RODBC
#' @import data.table
exeQuery <- paste()
conn <- odbcConnect("PInf1")
# The sproc updates the table dbo.StudentEligibilityWindow.
# ____________________________________________________
cat ("Updating Student Eligibility Window; this takes a minute or two...", "\n")
#_____________________________________________________
exeQuery <- paste("exec PInf1.dbo.StudentEligibilityWindows")
retcode <- sqlQuery(conn, exeQuery, errors = TRUE)
# If 'check' returns FALSE, table is updated (or requires no update)
if(checkEligUpdate() == FALSE) {
  cat("SEW update completed successfully.", "\n")
  return(0)
} else {
  cat ("WARNING: SEW update failed; there may be a problem with the database.",
       "\n")
  return(100)
  }
}

# Middle Layer Object Fns -----------------------------------------------

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getDiditXWave <<
# _____________________________________________________________________________
# Uses: getNQTVCs, AllNQNotNA, pkg 'RODBC' (indirect), pkg 'dtplyr'
#       pkg 'dplyr'
# _____________________________________________________________________________
#' Returns dataframe with flags indicating survey completion
#'
#' @param pWavVec An ordered numeric vector of wave SIDs to be included in the
#'     analysis.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSchVec A numeric vectorof School IDs to be included in the analysis.
#' @param pElig The minimum number of eligible waves for SID to be included.
#' @param pDid The minimum number of survey-completed waves for SID to be
#'     included.
#' @return A (#eligible SIDs x length(pWavVec)+1) dataframe. Col 1 is SID,
#'     Cols 2 on (each named 'wK', where K is the kth ordered wave in pWavVec)
#'     are flags with the integer 1 if individual SID 'completed' a survey
#'     for that wave (completion is determined in the function 'allNQQNotNA')
#' @examples
#' # gets dataframe including SIDs for waves 1, 3, 4, and 5, schools 3-6 and
#' # 30 (one school district); each included SID was eligible for the Short
#' # survey at least once, and completed at least 1 survey in these waves.
#' sidVec <- getSIDSet(c(1, 3, 4, 5), c(3, 4, 5, 6, 30), 1, 1)
#' @export
getDiditXWave <- function(pWavVec,pSchVec){
#' @import data.table
#' @import dtplyr
  # __________________
  cat("Obtaining survey completion info...", "\n")
  #Get a node list of all eligible, so the output DF records participation
  #  for all eligible at any wave
  allNodes <- getEligNodes(pWavVec,pSchVec)
  # Construct output DF
  outDF <- data.frame(allNodes)
  names(outDF)[1] <- "SID"
  # Add cols to output DF for each wave, 1 if participant 'did it', else 0
  tvc <- allNQQNotNA(getNQTVCs(pWavVec,pSchVec)) # WATCH OUT, this is a
                                                 # data.table !!
  for (i in 1:length(pWavVec)){
    vWvN <- paste("wv",toString(pWavVec[i]),sep="") # new col name
    diditNodesi <- unique(tvc[DidIt == 1 & WID == pWavVec[i],SID])
    outDF[,i+1] <- ifelse(match(allNodes,diditNodesi,nomatch=0) ==0,0,1)
    names(outDF)[i+1] <- vWvN
  }
  cat("Done", "\n")
  return(outDF[order(outDF$SID),])
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getEligNodes <<
# _____________________________________________________________________________
# Uses:
#    getSWNodes, pkgs 'RODBC' (indirect)
# _____________________________________________________________________________
#' Returns a vector of SIDs who were Short Survey eligible on any input wave
#'
#' @param pWavVec An ordered numeric vector of wave SIDs to be included in the
#'     analysis.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSchVec A numeric vectorof School IDs to be included in the analysis.
#' @return A (#eligible SIDs)-length numerical vector of SIDs.
#' @details Used by other functions in this package (e.g. getNetworkSet,
#'     makeTVTbl, )
#' @note This function just repeatedly calls 'getSWNodes' for each requested
#'     school & wave combination
#' @examples
#' # gets vector of SIDs for waves 1, 3, 4, and 5, schools 3-6 and 30 (one
#' # school district)
#' sidVec <- getSIDSet(c(1, 3, 4, 5), c(3, 4, 5, 6, 30), 1, 1)
#' @export
getEligNodes <- function(pWavVec, pSchVec){
  # __________
  # Check Student Eligibility Window status
  if (checkEligUpdate()) retCode <- updateStuEligWin()
  # __________
  # For efficiency, we create a list of
  # length w x s and create a vector of SIDs for each school/wave combination
  cat("Fetching eligible nodes...", "\n")
  sidHolder <- vector("list", length(pWavVec)*length(pSchVec))
  for (i in seq_along(pWavVec)){
    for (j in seq_along(pSchVec)){
      # This call gets the SIDs of kids in school j, survey eligible in wave i
      sidHolder[[((i-1)*length(pSchVec))+j]] <- getSWNodes(pWav = pWavVec[i],
                                                           pSch = pSchVec[j])
    }
  }
  cat("Done", "\n")
  # Combines unique SIDs into a sorted vector
  return (sort(unique(unlist(sidHolder))))
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getEligXWave <<
# _____________________________________________________________________________
# Uses: getSWNodes, pkg 'RODBC' (indirect)
# _____________________________________________________________________________
#' Returns a dataframe with flags x wave for Short Survey eligibility
#'
#'     The function is used to determine analyses sets (viz., by
#'     'getNetworkSet', 'makeTVTbl', 'getFixedCovs', and 'makeCCVec').
#'
#' @param pWavVec An ordered numeric vector of wave SIDs to be included in the
#'     analysis.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSchVec A numeric vector of School IDs to be included in the analysis.
#' @return A (#eligible SIDs x length(pWavVec)+1) dataframe. Col 1 is SID,
#'     Cols 2 on (each named 'wK', where K is the kth ordered wave in pWavVec)
#'     are flags with the integer 1 if individual SID was Short Survey (NetQ)
#'     eligible for that wave.
#' @details A sproc 'PInf1.dbo.StudentSIDInWave' in the SQL Server database
#'     PInf1 obtains eligible IDs. It is wapped by the function 'getSWNodes'
#'     in this package, called by the present function.
#' @examples
#' # Gets dataframe including SIDs for waves 1, 3, 4, and 5, schools 3-6 and
#' # 30 (one school district); each included SID was eligible for the Short
#' # survey at least once, and completed at least 1 survey in these waves.
#' EligXWave <- getEligXWave(c(1,3,4,5), c(3,4,5,6,30))
#' @export
getEligXWave <- function(pWavVec,pSchVec){
  # Check Student Eligibility Window status
  #__________
  if (checkEligUpdate()) retCode <- updateStuEligWin()
  # Get the eligible node list for each wave
  # __________
  cat("Obtaining survey eligibility info...", "\n")
  nodeList <- vector("list",length(pWavVec))
  allNodes <- numeric()
  for (i in 1:length(pWavVec)){
    wavNodes <- numeric()
    for (j in 1:length(pSchVec)){
      # execute only if school and wave combination were observed
      if (schWvExists(pWavVec[i],pSchVec[j])){
      cat("Pulling eligible SIDs from database W",
          pWavVec[i],",Sch",pSchVec[j], "\n")
      # We can't use 'getEligNodes', because we need the wave-
      # level eligibility to construct the output DF.
      nodesij <- getSWNodes(pWavVec[i],pSchVec[j],1,0)
      wavNodes <- append(wavNodes, nodesij, after=length(wavNodes))
      allNodes <- append(allNodes, nodesij, after=length(allNodes))
      }
      # ...otherwise go on to the next i,j pair.
    }
    # Each list element is the eligible nodes for that wave
    nodeList[[i]]<-wavNodes
  }

  # Construct Output Dataframe
  allNodesUnique <- unique(allNodes) # All Elig SIDs, all waves
  outDF <- data.frame(allNodesUnique)
  names(outDF)[1] <- "SID"

  # Will hold output
  # vWvNames <-character(length(pWavVec))
  cat("Saving elig info...", "\n")
  for (i in 1:length(pWavVec)){
    vWvN <- paste("wv",toString(pWavVec[i]),sep="") # new col name
    outDF[,i+1] <- ifelse(match(allNodesUnique,nodeList[[i]],nomatch=0) == 0,0,1)
    names(outDF)[i+1] <- vWvN
  }
  cat("Done", "\n")
  return(outDF[order(outDF$SID),])
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getMSRIDVect <<
# _____________________________________________________________________________
# Uses: getSWNodes
# _____________________________________________________________________________
#' Returns a list of *Row* ID vectors for each middle school requested
#'
#'     The function is used for multiple middle school districts to
#'     add structural zeros to a wave 1 network edge table.
#'
#' @param mSchVec A numeric vector of middle school IDs to be separated
#'     at Wave 1 (only, because in this study, participants are in middle
#'     school only for Wave 1) by structural zeros.
#' @param sidRowID A dataframe-format crosswalk of the SIDs and corresponding
#'     Row IDs in the analysis set being developed by the function that
#'     calls this one.
#' @return A length(mSchVec) list of Row IDs of individuals within
#'     each of the middle schools with IDs in 'mSchVec'.
#' @details Adding structural zeros is an option of 'getNetworkSet', which
#'     is where this function is currently used. In the PInf study, this is
#'     only meaningful for middle schools from the same school district at
#'     Wave 1.
#' @examples
#' # Gets dataframe including SIDs for the wave vector 'mSchVec' and the
#' # two-column dataframe 'sidRowID'.
#' msIDList <- getMSRIDVecs (mSchVec, sidRowID)
#' @note Internal Function, NOT EXPORTED
getMSRIDVecs <- function(mSchVec, sidRowID){
  # Check Student Eligibility Window status
  #__________
  if (checkEligUpdate()) retCode <- updateStuEligWin()
  # __________
  outList <- vector(mode = "list", length = length(mSchVec))
  for(i in 1:length(mSchVec)){
    holdr1 <- getSWNodes(1,mSchVec[i])
    holdr2 <- sidRowID$RID[match(holdr1,sidRowID$SID)]
    outList[[i]] <- holdr2[!is.na(holdr2)]
  }
  return(outList)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getNetworkSet <<
#______________________________________________________________________________
# Uses:  getNetwork, pkgs 'network', 'Matrix', 'RODBC'
# _____________________________________________________________________________
#' Returns a list of networks forming a longitudinal Analysis Set
#'
#' An Analysis Set is a set of observations satisfying a particular set of
#'     criteria to be included in an analysis. Requiring complete
#'     data is too stringent for most longitudinal analysis. This is true in
#'     both descriptive network studies (where even individuals who come
#'     and go still form part of the social context) and also SAOM and
#'     related network modeling methods (which can include partial data on
#'     individuals if MAR assumptions are met)
#'
#' @param pWavVec An ordered numeric vector of wave SIDs to be included in the
#'     analysis.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSchVec ordered integer vector of school IDs
#' @param pElig the minimum number of eligible waves for SID to be included
#' @param pDid  the minimum number of completed waves for SID to be included
#' @param pTyp  string, defines type of network (BF => 'one of my best
#'               friends', FT => 'spent free time with')
#' @param pOut string, type of network format for output (SP =>
#'              dgTMatrix-class sparse matrix (pkg 'Matrix), NT =>
#'              network-class matrix (pkg 'network'))
#' @param pS0 string; if "S0", pOut = "SP", and pWavVec includes wave 1,
#'             structural zeros are inserted in the edge list of the w1
#'             networks between individuals in different middle schools.
#' @return A <pWavVec>-length list of either sparse matrix (dgTMatrix class)
#'     networks, one list element per wave, with RowIDs identifying chooser
#'     and chosen, -- OR -- 'network' class networks (pkg 'network'),
#'     one list element per wave (also with RowIDs). The last element of
#'     the list is a vector of the SIDs to be included in the analysis
#'      (this has many uses in other package functions).
#' @details The schools specified will normally comprise a single
#'  network, for descriptive or analysis purposes. If they are not, for some
#'  waves, then structural zeros should be included for pairs of nodes that
#'  cannot form relationships on particular waves (e.g. if the kids went to
#'  separate middle schools, and then all attend the same high school)
#' The 'length(pWavVec)+1st' element of the output contains a vector of
#'   the SIDs considered to be 'in the analysis' by the selection criteria
#'   implied by the input parameters. It's useful for obtaining
#'   corresponding variables such as vertex properties, RSIena covariates,
#'   and so on.
#' This function can be used to pull networks suitable for descriptive
#' analysis (e.g. by later converting them to 'network' objects and using 'sna')
#' or by combining them into a SAOM network object using sienaNet (from
#' 'RSiena') as shown in the example below.
#' @examples
#' # Creates a list of length 4; the first 3 elements of the list
#' # are dgTSparse-class matrices, one for each wave, for all the
#' # middle and high schools in one school district of the PInf study.
#' # Inclusion criteria are: eligible for 2 or more surveys, completed
#' # 2 or more surveys. Networks are defined as 'Best Friends'. Output
#' # networks are in dgTSparse format, and the Wave 1 middle schools
#' # have structural zeros between individuals in different schools.
#' netList <- getNetworkSet(pWavVec = c(1, 3, 4),
#'                          pSchVec = c(3, 4, 5, 6, 30),
#'                          pElig = 2, pDid = 2, pTyp = "BF",
#'                          pOut = "SP", pS0 = "S0")
#' # Create SAOM longitudinal network object 'myNet'
#' myNet <- sienaNet(netList[1:3], sparse=T)
#' # Even easier way, using 'makeSAOMNet' wrapper function
#' myNet2 <- makeSAOMNet(netList)
#' @export
getNetworkSet <- function(pWavVec, pSchVec, pElig=1, pDid=1, pTyp="BF",
                          pOut = "SP", pS0 = ""){
#' @import data.table
#' @import dtplyr
#' @import network
#' @import Matrix
  # ______________________
  # Check parameters
  # ______________________
  if (pElig > length(pWavVec)){
    stop("Error: waves of eligibility cannot exceed # of waves.")
  }
  if (pDid > length(pWavVec)){
    stop("Error: number of completed surveys cannot exceed # of waves.")
  }
  if (pDid > pElig){
    stop("Error: # ofcompleted surveys cannot exceed # of eligible waves.")
  }
  if (!(pTyp %in% c("BF","FT"))){
    stop("Error: relationship type must be BF or FT")
  }
  if (!(pOut %in% c("SP","NT"))){
    stop("Error: output type must be SP or NT")
  }
  if (pS0 != "" & pOut != "SP"){
    stop("Error: Structural zeros only work with sparse output")
  }
  if (pS0 == "S0" & pWavVec[1] != 1){
    stop("Error: struc zeros only pertain to wave 1, which is not included")
  }
  # ___________________________________________________________________________
    cat("Begin obtaining SID Set for Analysis (this may take a minute or two)",
        "\n")
  subSIDs <- getSIDSet(pWavVec, pSchVec, pElig, pDid)
    cat("Done obtaining SID Set for Analysis", "\n")
  # Make a crosswalk of SIDs and row IDs
  sidRowID <- data.frame(subSIDs, seq(1, length(subSIDs)))
  names(sidRowID) <- c("SID", "RID")

  # Get the associated network from 'sAffiliation'--process to "network"
  #   object.
  # ___________________________________________________________________________
  net <- getNetwork(pWavVec, pSchVec) # Raw network
  rawNet <- with(net, net[SID %in% subSIDs & AffSID %in% subSIDs, ]) # Subset
  edgNet <- with(rawNet, rawNet[, c("SID", "AffSID", "bff", "WID")]) #Edge format
  rownames(edgNet) <- seq(length=nrow(edgNet))

  # Substitute RowIDs for SIDs ('network' objects require this)
  edgNet$SID    <- sidRowID$RID[match(edgNet$SID,sidRowID$SID)]
  edgNet$AffSID <- sidRowID$RID[match(edgNet$AffSID,sidRowID$SID)]
  names(edgNet) <- c("RIDOut", "RIDIn", "bff", "WID") #Names reflect rows now

  #++++++++++++++++
  cat("Got raw network, substituted Row IDs for SIDs", "\n")
  #++++++++++++++++

  # Create a new ** DATA.TABLE ** version of the networ from 'edgNet',
  #   with ones where the edges represent the type of relationship
  #   requested (currently BF of FT)
  if (pTyp=="BF"){
    # "One of my best friends" -- involves only selecting for bff>0
    rlpNet <- data.table(with(edgNet, edgNet[bff>0, ]))
    #++++++++++++++++
    cat("Relationship values coded ('best friend')", "\n")
    #++++++++++++++++
  } else {
    if (pTyp=="FT"){
      # "Spend free time with" -- set 0=1; all others are already 1
      rlpNet <- data.table(edgNet)
      rlpNet$bff <- ifelse(rlpNet$bff==0, 1, rlpNet$bff)
      #++++++++++++++++
      cat("Relationship values coded ('spent free time with')", "\n")
      #++++++++++++++++
    }
  }

  #++++++++++++++++

  # Find the individuals who do not select anyone
  # nor are selected by anyone in the dataset
  isolates<-sidRowID %>%
    filter(!RID %in% unique(as.numeric(rlpNet$RIDOut)) &
             !RID %in% unique(as.numeric(rlpNet$RIDIn)))
  notisolates<-sidRowID %>%
    filter(RID %in% unique(as.numeric(rlpNet$RIDOut)) |
             RID %in% unique(as.numeric(rlpNet$RIDIn)))
  # Isolates at the end of the subject list are not added
  # automatically by the network package, and must be added
  # manually.
  end_isolates<-table((isolates$RID>(notisolates$RID[[length(notisolates$RID)]])))["TRUE"]
  num_end_isolates<-as.numeric(end_isolates)
  if (is.na(num_end_isolates)){
    num_end_isolates<-0
  }

  #++++++++++++++++

  # Split networks by wave, if more than one wave
  # _____________________________________________
  ll <- length(pWavVec)
  # will hold the output networks, plus the combined SID:
  outList <- vector(mode = "list", length = ll+1)
  nnodes <- length(subSIDs)
  #ccccccccccccccc
  cat("Begin constructing network output, type = ", pOut, "\n")
  #ccccccccccccccc
  for (i in 1:ll){
    # NOTE: the selection of wave ('rlpNet$WID') effectively reduces the
    #       raw edgelist to a 'triplet' (RIDOut, RIDIn, value), as required by
    #       both the 'spMatrix' and 'network' functions.
    if (pOut == "SP"){
      if(pS0 == "S0" & pWavVec[i] == 1){
        # Add Structural Zeros between any elementary schools, if requested
        # Select just the elementary schools
        mSchVec <- pSchVec[pSchVec %in% c(1:9,101,102)]
        if (length(mSchVec) > 1){
          # This is where the structural zeros are inserted into 'rlpNet'
            cat("Begin inserting structural zeros", "\n")
          # _______________________________
          # Create vectors of RowIDs associated with each input middle school
          msIDList <- getMSRIDVecs (mSchVec, sidRowID)
          # Get the new rows (rlpNet format) with S0's inserted between MS's
          rlpNetS0 <- s0Assemble(msIDList)
          # Add the new rows to rlpNet (notice the 'data.table' syntax...)
          rlpNet2 <- do.call(rbind,list(rlpNet,
                                        rlpNetS0[,.(RIDOut,RIDIn,bff,WID)]))
          #_________________________________
            cat("Done insterting structural zeros","\n")

        } else{
          cat("Warning: <2 middle schools given; struc 0 request ignored","\n")
        }
        # This one has structural 0s
        outNeti <- with (rlpNet2[rlpNet2$WID == pWavVec[i], ],
                         spMatrix(nnodes, nnodes,
                                  RIDOut, RIDIn, x = bff))
        itemName <- paste("wv", toString(pWavVec[i]), sep = "")
        outList[[i]] <- outNeti
        names(outList)[i] <- itemName
      }
      else{
        # This one has no structural 0s
        outNeti <- with (rlpNet[rlpNet$WID == pWavVec[i], ],
                         spMatrix(nnodes, nnodes,
                                  RIDOut, RIDIn, x = bff))
        itemName <- paste("wv", toString(pWavVec[i]), sep = "")
        outList[[i]] <- outNeti
        names(outList)[i] <- itemName
      }
    } else {
      # 'network' format
      if (pOut == "NT"){
        mtx <- data.matrix(rlpNet[rlpNet$WID == pWavVec[i],
                                  .(RIDOut,RIDIn,bff)])
        outNeti <- network(mtx,
                         directed = TRUE,
                         matrix.type = "edgelist")
        if (num_end_isolates>0){
          network::add.vertices(outNeti,num_end_isolates)
          network.vertex.names(outNeti) = sidRowID$SID
          itemName <- paste("wv", toString(pWavVec[i]), sep = "")
          outList[[i]] <- outNeti
          names(outList)[i] <- itemName
        } else {
          network.vertex.names(outNeti) = sidRowID$SID
          itemName <- paste("wv", toString(pWavVec[i]), sep = "")
          outList[[i]] <- outNeti
          names(outList)[i] <- itemName
        }
      }
    }
  }
  #cccccccccccccccc
  cat("Done constructing network output", "\n")
  #cccccccccccccccc
  outList [[ll+1]] <- subSIDs # This is returned as a convenience
  names (outList)[ll+1] <- c("subSIDs")
  return (outList)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getSIDSet <<
# _____________________________________________________________________________
# Uses:  getEligXWave, getDidItXWave, pkgs 'network', 'Matrix'
# Used by: getNetworkSet
# _____________________________________________________________________________
#' Returns a vector of SIDs that are consistent with some Analysis Set
#'
#' This function is a bit slow, probably because it uses calls to
#'     'getEligXWave' and 'getDiditXWave'...but it still runs in a couple
#'     of minutes even for fairly large Analysis Sets, and this design
#'     has the advantage of not requiring the output from these functions
#'     to be created ahead of time and passed in.
#'
#' @param pWavVec An ordered numeric vector of wave SIDs to be included in the
#'     analysis.
#'     Waves 2, 6, and 10 are summer waves; no Short Survey or Long Survey data
#'     were collected. Hence these waves should not ever be specified.
#' @param pSchVec A numeric vectorof School IDs to be included in the analysis.
#' @param pElig the minimum number of eligible waves for SID to be included
#' @param pDid  the minimum number of completed waves for SID to be included
#' @return An Analysis Set-length numerical vector of SIDs.
#' @details Used by other functions in this package (e.g. getNetworkSet).
#'     Could be used stand-alone, e.g. if you want to draw an Analysis Set
#'     without having to create a list of networks with 'getNetworkSet'.
#' @examples
#' # Obtains the SID set for a network analysis:
#' SIDVec <- getSIDSet(c(1, 3, 4), c(3, 4, 5, 6, 30))
#' @export
getSIDSet <- function(pWavVec, pSchVec, pElig=1, pDid=1){
#' @import network
#' @import Matrix
  #_______________________
  # require(network, Matrix)
  #_______________________
  ll <- length(pWavVec) # saves some line space below
  if (pElig < pDid){
    stop("Error: You cannot ask for more completed surveys than elig waves")
  }
  if (pElig>ll | pDid>ll){
    stop("Error: More completed surveys or elig waves than total waves ")
  }
  # Create an eligibility and suvery-completion ("did") dataframe; SID, then
  #  'll' cols for Elig flag, then ll cols for 'did' flags
  bothDF <- merge(getEligXWave(pWavVec,pSchVec),getDiditXWave(pWavVec,pSchVec),
                  by = "SID")

  # Determine the node set based on input parameters
  # (column calcs count 1's in the cols for eligibility x wave and then
  #  for survey completion by wave, contained in the last 2*ll cols).
  # This set determines who will be considered 'part of the analysis',
  # (although in RSiena, 'composition change' can be used to alter this
  #  wave by wave).
  # ___________________________________________________________________________
  if (ll>1){
    # (this is necessary because rowSums will not work if it's only
    #  summing 1 column. Is that stupid, or what??)
    subSIDs <- sort(bothDF[rowSums(bothDF[,c(2:(ll+1))]) >= pElig
                           & rowSums(bothDF[,c((ll+2):(1+(2*ll)))]) >= pDid
                           ,"SID"])
  } else {
    subSIDs <- sort(bothDF[bothDF[,2]>=pElig & bothDF[,3]>=pDid,"SID"])
  }
  return(subSIDs)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> makeSAOMNet <<
# _____________________________________________________________________________
# Uses: pkgs 'Matrix, 'RSiena' (or 'RSienaTest')
# _____________________________________________________________________________
#' Returns a longitudinal 'sienaNet' object directly from 'getNetworkSet'
#'
#' This function is a very thin wrapper for RSiena::sienaDependent (or
#'     equivalently, 'sienaNet'), saving maybe half a line of code. It takes
#'     the output from 'getNetworkSet' with no additional parameters or
#'     modification (BUT the networks included in that output must be of
#'     class dgTSparse), and creates a longitudinal network dependent
#'     variable object from it, using whatever networks it is given (w of
#'     them).The last element of the 'getNetworkSet' function output, a
#'     vector of the SIDs included in the analysis set, is ignored.
#'
#' @param pNetInput The list of length (w+1) generated by 'getNetworkSet',
#'     where the first w elements *must be* class dgTSparse networks.
#' @return A 'sienaNet' object, ready to be used as an endogenous network
#'     variable in a SAOM.
#' @examples
#' # 'net' will be a 'sienaNet' network dependent variable:
#'   net <- makeSAOMNet(getNetworkSet(pWavVec = c(1,3,4),
#'     pSchVec = c(3, 4, 5, 6, 30),
#'     pElig = 2, pDid = 2, pTyp = "BF", pOut = "SP", pS0 = "S0"))
#' @export
makeSAOMNet <- function(pNetInput){
#' @import RSiena
  lenlst <- length(pNetInput) # how long is the list?
  if (class(pNetInput[[lenlst]])[1] != "dgTMatrix"){
    lenlst <- lenlst - 1 #Ignore last list element (it's an SID vector)
  }
  if (class(pNetInput[[1]])[1] != "dgTMatrix"){
    stop("Error: Input must begin with a list of sparse matrices")
  }

  outSienaNet <- sienaDependent(pNetInput[1:lenlst],sparse=TRUE)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> makeTVTbl <<<
# _____________________________________________________________________________
# Uses: getTVCCols, pkgs dplyr, dtplyr (combines data.table and dplyr)
# _____________________________________________________________________________
#' Returns a data.table of a time varying behavior variable
#'
#' @param pCCTbl A dataframe or data.table of all the time-varying covariates
#'  in long-form (1 row per subject per wave) produced by the function
#'     'getNQTVCs'. It is an image of the database table PInf1.dbo.SWave.
#' @param pSIDs A numeric vector of SIDs of the individuals in this Analysis
#'     Set (i.e. for example, created by 'getNetworkSet'). Ensures the table
#'     created is consistent with such a set.
#' @param pVar A string giving the name of the variable to create. These are
#'     defined in 'getNQTVCs'. Options are listed below.
#' @param pCut  A numeric vector of cut points for classifying values of
#'     the requested variable (see Base R function 'cut'). Intervals are
#'     closed on the left, open on the right.
#' @param pMaxNA The maximum number of NAs among the variable's items
#'     before the returned value for the particular subject-wave is set to NA
#' @return The output table format is w+1 columns (w=#waves present in
#'     pCCTbl); first column is an ordered set of SIDs, columns 2:(w+1)
#'     are the scored (mean of items) value of the requested behaivor
#'     variable for the w waves.
#' @details May be used to create tables that can be easily converted to SAOM
#'     dependent behavior or time-varying covariate (predictor) variables by
#'     just selecting the last w columns (see example below). Generally such
#'     variables will be consistent with a network Analysis Set (of
#'     individuals) produced by 'getNetworkSet', in which case 'pSIDs' can
#'     simply be taken from the last list element of the output of the
#'     latter function. Of course, if you wanted to create variables for any
#'     longitudinal analysis (i.e. score them, select the individuals and
#'     waves you wanted), you could use this function as well, but you would
#'     need to generate the pSIDs by directly executing 'getSIDSet'.
#' @note Choices for pVar are defined in 'getTVCCols'. They are:
#'     AB (Antisocial behavior),
#'     OV (Others victimization of you),
#'     YV (You victimizing others),
#'     T3 (Tobacco, freq of use in last 30 days),
#'     E3 (E-tobacco, freq of use in last 30 days),
#'     C3 (Chewing Tobacco, freq of use in last 30 days),
#'     A3 (Alcohol, freq of use in last 30 days),
#'     B3 (Binge drinking, freq in last 30 days),
#'     M3 (Marijuana use, freq in last 30 days),
#'     AL (Alcohol, freq of use, lifetime up to now).
#'     BL (Binge drinking, freq of use, lifetime up to now).
#'     ML (Marijuana use, freq of use, lifetime up to now).
#' @examples
#' # When the network part of an analysis set is created by 'getNetworkSet',
#' # the last element of the output is a vector of participating SIDs. We
#' # use that as part of the input to the present function.
#' netList <- getNetworkSet(pWavVec = c(1,3,4), pSchVec = c(3,4,5,6,30),
#'     pElig = 2, pDid = 2, pTyp = "BF", pOut = "SP", pS0 = "S0")
#' # Create the time varying behavior table
#' ccTbl <- getNQTVCs(pWavVec = c(1,3,4), pSchVec = c(3,4,5,6,30))
#' # Creates a data.table of antisocial behavior variables x subject & wave,
#' # for the 3 waves & other criteria used above, grouped into 4 categories,
#' # and with no more than 3 NAs out of the 6 items.
#' abTbl <- makeTVTbl(ccTbl, netList[[4]], pVar = "AB",
#'     pCut = c(0, .5, 1, 5, 20), pMaxNA = 3)
#'
#' @importFrom stats reshape
#' @export
makeTVTbl <- function(pTVTbl, pSIDs, pVar="X", pCut = c(0), pMaxNA = 1){
#' @import data.table
#' @import dtplyr
# _____________________
  if (pVar == "X"){
    cat("Warning: No var name supplied; name defaults to X", "\n")
  }
  # Create a name for the binned (final form, scaled) variable:
  binVar <- paste(pVar,"B", sep = "") # Name of binned variable (e.g."ABB", ..)
  # Select only the SIDs from the input data.table to be used in the analysis
  # (dplyr used for 'filter')
  tvDT <- data.table(dplyr::filter(pTVTbl,SID %in% pSIDs))
  setkey(tvDT, SID, WID)

  # Get the vector (length >= 2) of variables implied by pVar
  items4TVC <- getTVCCols(pVar)

  # Calculate rowmeans
  tvDT[[pVar]] <- rowMeans(tvDT[, items4TVC, with = F],na.rm = T)
  # find rows for which the pVar needs to be set to NA. Only looks at the sub-
  #  dataframe with col names in 'items4TVCs'. naSums contains #NAs per row.
  naSums <- apply(tvDT[, items4TVC, with = F], 1, function(x) sum(is.na(x)))
  tvDT[[pVar]][which(naSums > pMaxNA)] <- NA

  # Bin 'pVar' according to the cut points in 'pCut'; place in 'binVar'
  tvDT[[binVar]] <- as.numeric(cut(tvDT[[pVar]], pCut, right=F,
                                     include.highest = T,
                                     labels = c(1:(length(pCut)-1))))


  # Build the DT for this TVC. The target is a length(subSIDs) x
  #  length(pWavVec)+1 DT. The last length(pWavVec) set of cols is then
  #  input as a matrix to 'sienaDependent' or 'varCovar'.
  # The whole DT is returned.
  tvDTLong <- data.table(tvDT$SID)
  tvDTLong$WID <- tvDT$WID
  tvDTLong[[pVar]] <- tvDT[[binVar]] # output name will be the original
  names(tvDTLong) <- c("SID", "WID", pVar)
    tvDTLong[[pVar]] <- tvDTLong[[pVar]] - 1 #makes scales 0-based
  tvDTWide <- reshape(tvDTLong,timevar = "WID", idvar = "SID",
                      direction = "wide")
  return(tvDTWide)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> schWvExists <<
# _____________________________________________________________________________
#' Returns logical TRUE if requested wave includes requested school
#'
#' This information is inherent in the study design; middle schools are only
#'     observed in Wave 1 (and high schools are not), whereas high schools
#'     are only observed in waves 3:5, 7:9, and 11.
#'
#' @param pWav The wave number of interest. Cannot be 2, 6, or 10.
#' @param pSch The school ID number of interest.
#' @return Logical variable; TRUE if the requested school and wave has any
#'     observations, otherwise FALSE.
#' @note: Used internally only-- NOT EXPORTED
schWvExists <- function(pWav,pSch){
  midSchools <- c(1:9,101,102)
  hiSchools <- c(10,20,30,70,80,110,120)
  # Initialize return value
  if (!(pWav %in% c(1,3:5,7:9,11))){
    stop("Error: Illegal wave requested. Must be 1, 3-5, 7-9, or 11.")
  }
  if ((pWav == 1 & pSch %in% midSchools) |
      (pWav > 2 & pSch %in% hiSchools)){
    return(TRUE)
  } else {
    return(FALSE)
  }
}


#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> s0Assemble <<
# _____________________________________________________________________________
# Uses: s0Make, data.table
# Used by: getNetworkSet
# _____________________________________________________________________________
#' Returns a data.table with structural 0's for w1 middle schools
#'
#' This is a 'helper' function which, along with s0Make, adds rows of
#'     structural zeros for individuals from different middle schools
#'     who, however, will be going to the same high school the next
#'     year...and are thus considered part of the same network, for
#'     analysis purposes.
#'
#' @param pMSchVec A list of vectors, each containing the Wave 1 Analysis Set
#'     RowIDs (because Network Sets use RowIDs, as required by RSiena and
#'     network) for a specific middle school.
#' @return A data.table in edgelist format; cols are RowID (chooser),
#'     RowID (chosen), bff (value of the relationship, an integer >= 0),
#'     and WID (wave ID #)
#' @note This function should not normally be used stand-alone. To see how it
#'     is used in context, consult the source code for 'getNetworkSet'.
#'     NOT EXPORTED
s0Assemble <- function(pMSchVec){
  # Calculate number of combinations (order irrelevant)
#' @import data.table
#______________________
# (actually it's s0Make that uses DTs ...)
  ll <- choose(length(pMSchVec),2)
  numSch <- length(pMSchVec)
  tblList <- vector(mode = "list", length = ll)
  k <- 1
  for (i in 1:(numSch-1)){
    for(j in (i+1):numSch){
      tblList[[k]] <- s0Make(pMSchVec[[i]],pMSchVec[[j]])
      k <- k+1
    }
  }
  # Collapse listed DTs into one
  # (this is actually very fast...surprisingly!)
  outTbl <- do.call(rbind,tblList)
  return(outTbl)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> s0make <<
# _____________________________________________________________________________
# Used by: s0Assemble (which is called by getNetworkSet)
# Uses: pkg 'data.table'
# _____________________________________________________________________________
#' Returns a data.table of structural 0 edges between 2 schools
#'
#' This is a 'helper' function which, along with s0Assemble, adds rows of
#'     structural zeros for individuals from different middle schools
#'     who, however, will be going to the same high school the next
#'     year...and are thus considered part of the same network, for
#'     analysis purposes.
#'
#' @param pVec1 A vector of the RowIDs (note: not SIDs!) in middle school 1.
#' @param pVec2 A vector of the RowIDs in middle school 2.
#' @return A data.table in edgelist format; cols are RowID (chooser),
#'     RowID (chosen), bff (value of the relationship, an integer >= 0),
#'     and WID (wave ID #).
#' @details To make connections between two wave 1 middle schools impossible,
#'     all you need to do is create an edge with value '10 (structural zero,
#'     in RSiena terminology) between each pair of individuals in the two
#'     schools. This function accomplishes that in a simple but efficient way.
#'     The function 's0Assemble' takes all the data.table objects
#'     created here and assembles them into one big table to return to the
#'     calling function.
#' @note This function should not normally be used stand-alone. To see how it
#'     is used in context, consult the source code for 'getNetworkSet'.
#'     NOT EXPORTED
s0Make <- function(pVec1, pVec2){
#' @import data.table
#' @import dtplyr
  # ________________
  # calculate number of rows needed (all combinations ofIDs, in
  #  both directions)
  numrows <- length(pVec1) * length(pVec2)*2
  # This code gives you two data frames, the first with all
  # combinations of pVec1 x pVec2, the second with these
  # reversed. These comprise all the combinations requiring
  # structural zeros
  DTpart1 <- expand.grid(pVec1,pVec2)
  DTpart2 <- expand.grid(pVec2,pVec1)
    names(DTpart1) <- names(DTpart2) <- c("RIDOut","RIDIn")
  # Combine them into a single data table, and add the other
  # two cols. This code is VERY efficient, btw ... :D
  outDT <- data.table(rNum = c(1:numrows),
                      RIDOut=c(DTpart1$RIDOut,DTpart2$RIDOut),
                      RIDIn =c(DTpart1$RIDIn, DTpart2$RIDIn), bff=10, WID=1)
  setkey(outDT,rNum) #Gives it a key, for binary searching
 return(outDT)
}

#  Create/Score Vars ----------------------------------------------------------

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> allNQQNotNA <<
#______________________________________________________________________________
# Used by: getDiditXWave
#______________________________________________________________________________
#' Returns input dataframe of short survey questions with 'DitIt' flag.
#'
#' Used by 'getDididXWave' to create a wide-format dataframe indicating survey
#'     completion for each of a selected set of waves, by SID.
#'
#' @param pDF The dataframe produced by 'getNQTVCs'.
#' @return The long-form dataframe of time varying covariates (returned by
#'     getNQTVCs) with a column 'DidIt' added, which has the value one if
#'     any of the questions addressed below (lifetime substance use questions)
#'     were answered, 0 otherwise.
#' @details Used by 'getDidItXWave' to set values for the returned dataframe.
#' @note Internal function, NOT EXPORTED
allNQQNotNA <- function(pDF){
  pDF$DidIt <- ifelse(!is.na(pDF$ULied) | !is.na(pDF$UHit)
                      | !is.na(pDF$TobLife)
                      | !is.na(pDF$AlcLife)
                      | !is.na(pDF$BngLife)
                      | !is.na(pDF$MJLife),1,0)
  return(pDF)
}

# FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> createOnset <<
# _____________________________________________________________________________
#' Returns data.table of binary, up-only 'onset' variables x wave
#'
#' This is a stand-alone scoring function that takes a wide-form data.table
#'     or dataframe of 'lifetime use' variables for a (presumably consecutive)
#'     set of waves, and transforms them into up-only variables with the value
#'     1 when the participant first reports any lifetime use.
#'
#' @param pInDT A data.table (or dataframe) containing SID plus a lifetime use
#'     (NA, 0, >0) variable for each of c-1 ordered time periods, where c is
#'     the number of columns in the DF.
#' @param pTHold The threshold (numerical value) for lifetime use; if the input
#'     use variable value is >= pTHold, then the event 'occurred'.
#' @return A data.table of the same dimension as the input data table, with
#'     rows transformed into up-only variables suitable for use in proportional
#'     hazard models, or onset/rate models (in RSiena).
#' @details The logic assigns 1's to the output variables for all waves after
#'     the first *known* positive instance (KPI; value >= pTHold) of the event.
#'     It also ignores instances where a participant reports the event at wave
#'     w, but then does not report any lifetime use at some wave w+k. Instead,
#'     the value of the onset variable will be 1 from the first instance of
#'     the event on. For waves prior to the KPI, NAs are changed to 0 if they
#'     preceded a non-KPI event (value < pTHold), but left as NAs otherwise.
#' @note The effect of this coding logic is as follows: (a) If the 'onset'
#'     variable is NA for either wave w or wave w+1, then that individual
#'     will be included in target statistics for onset between those two
#'     waves, but with a value equal to the overall mean (typically 0, since
#'     the variable will be centered). (b) The individual will
#'     participate in the simulations, with the assumption that the missing
#'     value had the sample mean value at whichever wave it was missing.
#' @examples
#' # Returns a table of up-only variables which have the value 0 before the
#' # first known lifetime alcohol use (if any such times are available), and 1
#' # thereafter. The pTHold value of 1 means *any* use.
#' # Get network 'Analysis Set'; last list element is unique SIDs
#' netList <- getNetworkSet(pWavVec = c(1,3,4), pSchVec = c(3,4,5,6,30))
#' # Get table of TVCs for these waves and schools
#' ccTbl <- getNQTVCs(pWavVec = c(1,3,4), pSchVec = c(3,4,5,6,30))
#' # Make TVC table for 'lifetime alc freq of use'
#' alcLife <- makeTVTbl(ccTbl, netList[[4]], pVar = "AL",
#'     pCut = c(0, .5, 1, 5, 20), pMaxNA = 3)
#' # Create onset variable
#' alcOnset <- createOnset(alcLife,pTHold = 1)
#' @export
createOnset <- function(pInDT,pTHold=1){
#' @import data.table
#' @import dtplyr
# ____________________
  colnum <- dim(pInDT)[2]
  rownum <- dim(pInDT)[1]
  outDT  <- pInDT
  outDT[,2:colnum] <- NA
  for (i in 1:rownum){
    foundEvent <- 0    #Flag gets set if any 'event' is found
    foundNonEvent <- 0 #Flag gets set if non-NA value < pTHold is found
  for (j in 2:colnum){
    if (!is.na(pInDT[[i,j]])){
    if (pInDT[[i,j]]>=pTHold){
      foundEvent <- 1
      # Fill 1's from here forward
      for (k in j:colnum){
        outDT[[i,k]] <- 1
      }
      # Fill 0's back from the event (assumes any
      # intervening NA is a zero..but at leats you
      # know there was a definite 0 at some point)
      if(foundNonEvent >= 2){
      for (k in 2:(j-1)){
        outDT[[i,k]] <- 0
      }
      }
      # Move to next row
      break
    } else {
      foundNonEvent <- j # A "0" (nonevent)
    }
      } #nested Ifs
   }#j loop -- end of table row scan
    if(foundEvent == 0 & foundNonEvent > 0){
      # The event never occurred
      # In this case we can fill with 0's
      # backwards from the last 0
      for (k in 2:foundNonEvent){
        outDT[[i,k]] <- 0
      }
    }
  } #i loop
  return(outDT)
}

# FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> createNewOnset
# _____________________________________________________________________________
#' Returns data.table of lag-1 onset variables with 3 different values
#'
#' This stand-alone scoring function takes a wide-form data.table
#'     or dataframe of 'lifetime use' variables for a (presumably consecutive)
#'     set of waves, and transforms them into variables with the value 0 prior
#'     to first known alcohol use, 1 if the individual is about to onset in the
#'     next wave, and 2 if he/she had already onset at some earlier wave. The
#'     onset event is lagged one wave (as for 'createNbhdOnset').
#' The variables returned by this function provide a particularly nice
#'     descriptive context for onset events--if you graph them in a network
#'     plot and assign, say, different node colors to each value, you can see
#'     who had or had not started (drinking, say) as of
#'     wave w, along with affiliations of everyone who was about to start in
#'     the next wave.
#'
#' @param pInDT A data.table (or dataframe) containing SID plus a lifetime use
#'     (NA, 0, >0) variable for each of c-1 ordered time periods, where c is
#'     the number of columns in the DF.
#' @param pTHold The threshold (numerical value) for lifetime use; if the input
#'     use variable value is >= pTHold, then the event 'occurred'.
#' @return A data.table of the same dimension as the input data table, with rows
#'     transformed into up-only variables suitable for descriptive use.
#' @details The value of the output for an individual is coded 2 from the wave
#'     he/she onsets to the final wave. IF (and only if) an affirmative
#'     'no onset' (value < pTHold) response is found, the previous wave is
#'     then given the value 1 ("ABOUT to onset"). An NA that follows only
#'     other NAs or 'no onset' values is always coded NA. An NA that follows
#'     an event (value >= pTHold) is always coded 2. An NA that precedes a
#'     'no onset' value is coded 0. An NA that precedes an event is an NA,
#'     unless it is followed by a 'no onset' before the event occurs, in
#'     which case it is a zero, or if JUST before the event, a 1.
#' @examples
#' # Returns a table of 1-period lagged up-only variables which have the value 0
#' # before the first known lifetime alcohol use (if any such times are
#' # available), 1 on the occasion prior to onset, and 2 thereafter.
#' # The pTHold value of 1 means *any* use.
#' netList <- getNetworkSet(pWavVec = c(1,3,4), pSchVec = c(3,4,5,6,30))
#' ccTbl <- getNQTVCs(pWavVec = c(1,3,4), pSchVec = c(3,4,5,6,30))
#' alcLifeLag <- makeTVTbl(ccTbl, netList[[4]], pVar = "AL",
#'     pCut = c(0, .5, 1, 5, 20), pMaxNA = 3)
#' alcNewOnset <- createNewOnset(alcLifeLag, pTHold = 1)
#' @export
createNewOnset <- function(pInDT,pTHold=1){
#' @import data.table
#' @import dtplyr
# ____________________
  # pInDT is a data.table (or dataframe) with 1 row for each subject.
  # The first column is SID
  # The second through cth columns are 0 if the event has not yet occurred
  #   up to time (c-1), > pTHold if it has.
  colnum <- dim(pInDT)[2]
  rownum <- dim(pInDT)[1]
  outDT  <- pInDT
  outDT[,2:colnum] <- 0
  for (i in 1:rownum){
    foundEvent <- 0 #Flag gets set if any 'event' is found
    found0 <- 0     #Flag gets set if a specific non-event is found
    for (j in 2:colnum){
      if (!is.na(pInDT[[i,j]])){
        if (pInDT[[i,j]]>=pTHold){
          foundEvent <- 1
          outDT[[i,j]] <- 2
          if (found0 == 1){
            # Set ABOUT TO onset at prev wave, *iff* we have affirmed
            # 0 on the behavior in a previous wave
            outDT[[i,j-1]] <- 1
          }
          # Fill right with 2's
          for (k in (j+1):colnum){
            # (j+1)> colnum means no cols remain, so...
            if(k <= colnum) outDT[[i,k]] <- 2
          }
          break # <<< go to next i
        } else {
          # the value is <= the threshold & no event has
          # yet been found...so back fill with zeros
          found0 <- 1
            for(k in 2:j){
              outDT[[i,k]] <- 0
            }
        }

        } else {
        # If the value IS NA...
          if(foundEvent == 1){
                outDT[[i,j]] <- 2
          } else{
          outDT[[i,j]] <- NA
        }
      } # if NA block
    }#i loop
  } #j loop
  return(outDT)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getTVCCols <<
# _____________________________________________________________________________
#' Returns a vector of column (item) names for a scale or variable
#'
#' Used by 'getNQTVCs' to obtain the items necessary for a scale (or in some
#'     cases, just a single items, e.g. 'number of days drank, last 30'). It
#'     has the effect of generalizing 'getNQTVCs' so that the call to that
#'     function is compact, and does not require the user to go find the
#'     names of the items in any given scale.
#'
#' @param pVar A string giving one of the abbreviations for a variable
#'     understood by the function. See below for a list of possible values.
#' @return A vector of strings corresponding to the names of the items
#'     comprising a variable in the database table 'PInf1.dbo.SWave'
#' @note Choices for pVar are:
#'     AB (Antisocial behavior),
#'     OV (Others victimization of you),
#'     YV (You victimizing others),
#'     T3 (Tobacco, freq of use in last 30 days),
#'     E3 (E-tobacco, freq of use in last 30 days),
#'     C3 (Chewing Tobacco, freq of use in last 30 days),
#'     A3 (Alcohol, freq of use in last 30 days),
#'     B3 (Binge drinking, freq in last 30 days),
#'     M3 (Marijuana use, freq in last 30 days),
#'     AL (Alcohol, freq of use, lifetime)
#'     BL (Binge, freq of use, lifetime)
#'     ML (Marijuana, freq of use, lifetime)
#' @examples
#' # Internal function, not normally available to users. See 'makeTVTbl'
#' # source code to see how it is used.
#' @note NOT EXPORTED
getTVCCols <- function(pVar){
  if (!(pVar %in% c("AB", "OV", "YV", " T3", "E3", "C3", "A3",
                    "B3", "M3","AL","BL","ML"))){
    stop("Error: Var must be AB, OV, YV, T3, E3, C3, A3, B3, M3, AL, BL, ML")
  }
  if (pVar == "AB"){
    # Antisocial behavior
    outCols <- c("ULied", "UHit","UMean", "USkip", "UDamage", "ULate")
  }
  if (pVar == "OV"){
    # Others victimize you
    outCols <- c("ONames", "OHit", "OThreat", "ONoTalk",
                 "OExclu", "OGossip", "OEncourg")
  }
  if (pVar == "YV"){
    # You victimize others
    outCols <-c("YNoTalk", "YExclu", "YGossip", "YLies", "YEncourg")
  }
  if (pVar == "T3"){
    # Tobacco use last 30 days
    outCols <- c("Tob30Day")
  }
  if (pVar == "E3"){
    # E-cig use last 30 days
    outCols <- c("ETob30Day")
  }
  if (pVar == "C3"){
    # Chew tobacco last 30 days
    outCols <- c("Chew30Day")
  }
  if (pVar == "A3"){
    # Alcohol freq last 30-days
    outCols <- c("Alc30Day")
  }
  if (pVar == "B3"){
    # Binge drinking freq last 30 days
    outCols <- c("Bng30Day")
  }
  if (pVar == "M3"){
    # MJ freq last 30 days
    outCols <- c("MJ30Day")
  }
  if (pVar == "AL"){
    # Alc Lifetime Freq
    outCols <- c("AlcLife")
  }
  if (pVar == "BL"){
    # Binge drinking Lifetime Freq
    outCols <- c("BngLife")
  }
  if (pVar == "ML"){
    # Marijuana Lifetime Freq
    outCols <- c("MJLife")
  }
  return(outCols)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> makeCCVec <<
# ____________________________________________________________________________
#' Returns a list of vectors representing RSiena composition change.
#'
#' The output of this function may be input directly to the RSiena function
#'     'sienaCompositionChange'.
#'
#' @param pElig  An n x 1+w dataframe, with 1 row for every SID who was
#'     *survey-eligible* for any of the waves of interest, and 1 col for
#'     each wave, containing 1 if the SID was survey-eligible that wave,
#'     and 0 if not (col 1 is SID).
#' @return A list with n elements (one for each row of the inut DF). Each
#'     element is a vector of pairs of arrival and departure times from
#'     the network, as explained in the RSiena manual under 'method of
#'     changing composition'.
#' @details The input dataframe must be the object returned by
#'     'getEligXWave', or the equivalent.
#' @examples
#' # Returns a list of composition change vectors for the SIDs
#' # in waves 1, 3, and 4, and schools 3, 4, 5, 6, and 30.
#' ccDF <- makeCCVec(getEligXWave(pWavVec = c(1,3,4),
#'     pSchVec = c(3,4,5,6,30)))
#' @export
makeCCVec <- function(pElig){
  wv<-dim(pElig)[2] # cols (waves-1)
  nn<-dim(pElig)[1] # rows (nodes)
  CCVec <- vector("list", nn) # empty list with nn elements
  #
  for (i in 1:nn){
    for (j in 2:wv) {
      # -- Wave is 1st wave
      #browser()
      if (j==2 & pElig[i,j]==1) {CCVec[[i]]<-c(1)}
      # -- any other wave than 1st or last
      if (j>2 & j<wv) {
        # switch from 1 to 0
        if(pElig[i,j]==0 & pElig[i,(j-1)]==1){
          CCVec[[i]]<-c(CCVec[[i]],c(j-1.5))}
        if(pElig[i,j]==1 & pElig[i,(j-1)]==0){
          CCVec[[i]]<-c(CCVec[[i]],c(j-1.5))}
      } # if j>2,<wv
      # -- if last wave
      if (j==wv) {
        if(pElig[i,wv]==0 & pElig[i,(wv-1)]==1){
          CCVec[[i]]<-c(CCVec[[i]],wv-1.5)}
        if(pElig[i,wv]==1 & pElig[i,(wv-1)]==0){
          CCVec[[i]]<-c(CCVec[[i]],c(wv-1.5,wv-1))}
        if(pElig[i,wv]==1 & pElig[i,(wv-1)]==1){
          CCVec[[i]]<-c(CCVec[[i]],c(wv-1))}
      } # if j==wv
    } # j loop
  } # i loop

  return(CCVec)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
#  >> make01MemberMatrix <<
#______________________________________________________________________________
#' Creates a matrix indicating membership in a group based on a variable
#'
#' Membership is assumed to be described by a set of integers stored in
#' the variable.
#'
#' @param pInDT a data.table with two columns: SID and some integer
#'   variable
#' @param pPrefix a string indicating the prefix to give to each
#'    category-specific column of the output
#' @return A data.table with the same two columns as the input DT, plus
#'    k new columns, one for each unique category (i.e. value) in the
#'    2nd column of pInDT. Let 'str' be the value of 'pPrefix'; and suppose
#'    the categories are numbered 1, 2, ... then
#'    the columns will have the names str1, str2, etc.
#' @export
make01MemberMatrix <- function(pInDT,pPrefix){
  #' @import data.table
  categoryVec <- sort(as.vector(unique(pInDT[[2]])))
  memberMatrix <- pInDT
  for(i in 1:length(categoryVec)){
    colName <- paste(pPrefix, i, sep="")
    memberMatrix <- mutate(memberMatrix, newCol =
                             ifelse(pInDT[[2]] == categoryVec[i],1,0))
    names(memberMatrix)[2+i]<- colName
  }
  return(memberMatrix)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> netvtxAttr <<
# _____________________________________________________________________________
#' Returns a dataframe DF with col.2 a suitable 'network' node attribute.
#'
#' Standalone function for creating pkg 'network' node (vertex) attribute.
#'
#' @param pAttrMasterDF A two-column dataframe with SID and the values of
#'     some numerical variable 'x' as the columns. It should include all
#'     SIDs for some school and wave range (e.g. as generated by 'getSWNodes')
#'     for which there are data (if no data, attribute will be 'NA'). The SIDs
#'     actually used are selected from the next parameter.
#' @param pSIDVec An ordered vector of SIDs exactly matching the 'node set'
#'     for the network of interest; e.g. if the network has n nodes, the length
#'     of the returned dataframe will be n.
#' @param pNetwork The 'network' object to assign the vertex attribute to. This
#'     is not actually used to do the assignment, but rather, for some
#'     consistency checking.
#' @return A dataframe with two columns: SID (integer) and the associated values
#'     of some variable (numerical).
#' @examples
#' # AL134DF is a dataframe as output by 'createOnset'.
#' # Because this analysis is descriptive, this code creates a vertex
#' # attribute for just one wave, and in the 2nd statement, assigns it as
#' # a 'network' vertex attribute, naming it 'AlcOns'
#' ALDF.w1 <- netVtxAttr(AL134DF[,c("SID","al.1")],ds.winsSID1,ds.winsOBF1.no)
#' ds.winsOBF1.no %v% "AlcOns" <- ALDF.w1$al.1
#' @export
netVtxAttr <- function(pAttrMasterDF,pSIDVec,pNetwork) {
#' @import data.table
#' @import dtplyr
# ___________________ #NOTE: does not use data.table YET ...
  # Check a few things
  if (length(pAttrMasterDF)!=2){
    stop("Dataframe containing attributes does not have 2 cols.")
  }
  if (length(pNetwork$oel)!=length(pSIDVec)){
    stop("Network vertex count not equal to # SIDs (pSIDVec)")
  }
  # make sure everything is in order, because vertex attributes are
  # assigned based on some original order of the nodes, which in our
  # case will correspond to SID order
  pAttrMasterDF <- pAttrMasterDF[order(pAttrMasterDF$SID),]
  pSIDVec <- sort(pSIDVec)
  targDF <- merge(as.data.frame(pSIDVec),pAttrMasterDF,
                  by.x = getNameAsString(pSIDVec), by.y = "SID", all.x = T)
  names(targDF)[1] <- "SID"
  return(targDF)
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> zero30dSU <<
# _____________________________________________________________________________
# Used by: makeTVTbl
# _____________________________________________________________________________
#' Adjusts 30 day SU variables to zero if lifetime use is zero
#'
#' @param pDF A data.table in the format returned by 'getNQTVCs', i.e. an
#'     image of the database table 'SWave' that includes at least the
#'     variables for lifetime and 30 day substance use included in the
#'     Short Survey.
#' @return A data.table in the same format as the input DF, with 30 day
#'     substance use variables recoded to 0 if no lifetime use of the
#'     corresponding substance has been reported concurrently.
#' @details Survey skip logic skips over 30 day SU questions if no lifetime
#'     use of the substance is reported concurrently. This recode allows
#'     the 30 day variables to be used in analyses under such circumstances,
#'     instead of being missing.
#' @note This function is a helper function for 'makeTVTbl'.
#' @export
zero30dSU <- function(pDT){
#' @import data.table
#' @import dtplyr
#___________________
  # The pDF has to be a data.table in the format created
  # by 'getNQVTCs' or equivalent

  # NOTE: The syntax for multiple in-place assignment with data.table
  #       is a bit awkward; I think this form is less awkward (but it's
  #       still kind of ugly)
  pDT[, ':=' (Tob30Day =  ifelse(TobLife  == 0, 0, Tob30Day),
              ETob30Day = ifelse(ETobLife == 0, 0, ETob30Day),
              Chew30Day = ifelse(ChewLife == 0, 0, Chew30Day),
              Alc30Day =  ifelse(AlcLife  == 0, 0, Alc30Day),
              Bng30Day =  ifelse(BngLife  == 0, 0, Bng30Day),
              MJ30Day =   ifelse(MJLife   == 0, 0, MJ30Day))]
  return (pDT)
}

# Utility Fns -----------------------------------------------------------------

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
# >> getNameAsString <<
# _____________________________________________________________________________
#' Returns the name of a variable as a string.
#'
#' This function just removes the need to look up how to do this (fairly
#'     common) operation, which I seem to have to do all the time, because
#'     I can't remember the (little used) syntax.
#' @param pVarName A variable name (no quotes or anything)
#' @return The variable name as a string
#' @examples
#' # Returns c("myVariable")
#' vStr <- getNameAsString(myVariable)
#' @note Internal Function; NOT EXPORTED
getNameAsString <- function(pVarName){
  deparse(substitute(pVarName))
}

#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
#' 'Without' operator (complement of %in%):
#' @details A binary operator, i.e. x %w/o% y (x and y vectors, or lists) gives
#'    the elements of x that are NOT IN y.
#' @export
"%w/o%" <- function(x,y) x[!x %in% y] #Bin operator, x NOT IN y (vectors)
#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
johninpdx/RSTools documentation built on May 19, 2019, 5:15 p.m.