R/source.R

Defines functions getSankeyTblTime getSKChart runAppSK

Documented in runAppSK

library(data.table)
library(shiny)
library(testthat)
library(assertthat)
library(inline)
library(stringr)
library(stringi)
library(magrittr)
library(R.utils)
library(Rcpp)

#' @import data.table shiny testthat assertthat inline stringr stringi magrittr R.utils Rcpp
NULL

#' @exportPattern ^[^\.]
NULL

#' @useDynLib shiny.alluvial, .registration = TRUE
NULL

#' @importFrom Rcpp evalCpp
NULL

#' Launch a webpage that shows the alluvial visualization 
#'
#' @param zTbl timestamped, sessionized table in proper format
#' @param ... Additional arguments passed to shiny::runApp
#'
#' @return None
#'
#' @examples
#' runAppSK(sessionTbl)
#'
#' @export
runAppSK <- function(zTbl, port=3343, ...) {
	.SAPATH = system.file(package="shiny.alluvial")
	.sessionTbl <<- copy(zTbl)
	addResourcePath('shiny_alluvial', .SAPATH) 
	shiny::runApp(.SAPATH, host="0.0.0.0", port=port, ...)
}

getSKChart <- function(tbl, exactTimeP=F, width, height) {
	ixs = tbl[, .I[moving == 'backward']]
	resTbl = copy(tbl)
	resTbl[, grep('^cID', colnames(resTbl), value=T) := NULL]
	resTbl

	resTbl[ixs, `:=`(tTarget=abs(tTarget), tSource=abs(tSource), dt=abs(dt))]
	capMinTimes(resTbl)
	if (exactTimeP == F) {
		resTbl = relaxTimesToSteps(resTbl)
	}
	resTbl
	resTbl[grepl('^ *$', target), `:=`(dt=0, tTarget=tSource)]
	resTbl[ixs, `:=`(tTarget=max(tTarget) - tTarget, tSource=max(tTarget) - tSource)]
	swapVals <- function(nm1, nm2) {
		resTbl[ixs, nm.orig := get(nm1)
		       ][ixs, (nm1) := get(nm2)
		       ][ixs, (nm2) := nm.orig
		       ][, nm.orig := NULL
		       ]
	}
	swapVals('source', 'target')
	swapVals('sourceCID', 'targetCID')
	swapVals('tSource', 'tTarget')
	swapVals('countRowSource', 'countRowTarget')
	swapVals('valueRowSource', 'valueRowTarget')

	makeUniqueTS(resTbl)
        sankeyPlot = list()
        sankeyPlot$params = 
                list(
                     data = resTbl,
                     nodeWidth = 25,
                     nodePadding = 30,
                     layout = 32,
                     width = width,
                     height = height,
                     labelFormat = ".1%"
                     )
	sankeyPlot
}

getSankeyTblTime <- function(zTbl, stepMax=3, groupField='', normalizeGroupsP=F) {
	getSankeyTblTimeGrp <- function(e2Tbl, stepMax, labelGroup) {
		eTbl = addSessionEnd(e2Tbl)
		if (labelGroup != '') {
			eTbl = addSessionStart(eTbl, labelGroup=labelGroup)
		}
		setorder(eTbl, sID, nS)
		eTbl[, tStart := iDateTime[1], sID]
		eTbl[, dt := as.numeric(iDateTime - tStart, units='secs')]
		eTbl[, tStart := NULL]
		skTbl = eTbl[, .(sID, nS, label, iDateTime, dt, NS)]
		addCIDs(skTbl, stepMax)
		resTbl = getSkSumTbl(skTbl)
		updateDrops(resTbl)
		resTbl[, moving := ifelse(min(tTarget) < 0, 'backward', 'forward')]
		resTbl
	}
	e2Tbl = copy(zTbl)
	if (groupField == '') {
		e2Tbl[, labelGroup := '']
	} else {
		e2Tbl[, labelGroup := get(groupField)]
	}
	resTbl = e2Tbl[, getSankeyTblTimeGrp(.SD, stepMax, labelGroup), .(labelGroup)]
	resTbl[, countGroup := sum(count[step == 2]), labelGroup]
	resTbl[order(-countGroup, labelGroup), rankGroup := rleid(labelGroup)]
	incrementCIDs <- function(val, cRank) {
		cNames = grep('cid', colnames(resTbl), value=T, ignore.case=T)
		for (cName in cNames) {
			resTbl[rankGroup == cRank, (cName) := get(cName) + val]
		}
		resTbl
	}
	if (resTbl[, max(rankGroup)] > 1) {
		for (cRank in 2:resTbl[, max(rankGroup)]) {
			incrementCIDs(resTbl[rankGroup == cRank - 1, max(targetCID)], cRank)
		}
	}
	resTbl = addCountRow(resTbl, normalizeGroupsP)
}

getSankeyTblTimeAnchor <- function(xTbl, anchor, direction='after', ...) {
	if (direction == 'both') {
		return(getSankeyTblTimeAnchorBoth(xTbl, anchor, ...))
	}
	yTbl = copy(xTbl)
	zTbl = data.table()
	yTbl[, .SD
	     ][label == anchor
	     ][, .(nSMin=min(nS), nSMax=max(nS)), .(sID, NS)
	     ][direction == 'after', nSMax := NS
	     ][direction == 'before', nSMin :=1
	     ][, yTbl[.SD, `:=`(sActiveP=T, snSMin=i.nSMin, snSMax=i.nSMax), on='sID']
	     ][sActiveP == T
	     ][nS >= snSMin & nS <= snSMax
	     ][, nS := 1:.N, sID
	     ][, NS := .N, sID
	     ][, updateDtPrev(copy(.SD))
	     ][, {zTbl <<- copy(.SD); .SD}
	     ]
	if (direction == 'before') {
		zTbl[, nS := rev(nS), sID]
		setorder(zTbl, uuid, sID, nS)
		updateDtPrev(zTbl)
	}
	getSankeyTblTime(zTbl, ...)
}

getSankeyTblTimeAnchorBoth <- function(xTbl, anchor, ...) {
	xbTbl = getSankeyTblTimeAnchor(xTbl, anchor, 'before', ...)
	xaTbl = getSankeyTblTimeAnchor(xTbl, anchor, 'after', ...)
	xaTbl[, step := step - min(step) + 1 + xbTbl[, max(step)]]
	xaTbl[, tSource := tSource + xbTbl[, max(abs(tTarget))]]
	xaTbl[, tTarget := tTarget + xbTbl[, max(abs(tTarget))]]
	xbTbl[step == 2]
	assert_that(all(xbTbl[step == 2, sourceCID] == 1))
	assert_that(all(xaTbl[step == min(step), sourceCID] == 1))
	adjVal = xbTbl[, max(targetCID)] - 1
	xaTbl[step > min(step), sourceCID := sourceCID + adjVal]
	xaTbl[, targetCID := targetCID + adjVal]
	xaTbl[, cID1 := NULL]
	cNames = grep('^cID', colnames(xaTbl), value=T)
	newNames = sprintf('cID%s', seq(from=xbTbl[, max(step)] + 1, length.out=length(cNames)))
	setnames(xaTbl, cNames, newNames)
	for (cName in newNames) {
		xaTbl[, (cName) := get(cName) + adjVal]
	}
	rbind(xbTbl, xaTbl, fill=T)
}

addSessionStart <- function(zTbl, labelGroup) {
	bTbl = copy(zTbl)
	bTbl[, nS := nS + 1]
	bTbl[, NS := NS + 1]
	bTbl[, .SD
	     ][, .(nS=1:(.N+1), NS=.N+1), sID
	     ][, bTbl[copy(.SD), on=c('sID', 'nS', 'NS')]
	     ][order(sID, nS)
	     ][, `:=`(iDateTimeMin=iDateTime[2], itimeMin=itime[2], idateMin=idate[2]), .(sID, NS)
	     ][nS == 1, `:=`(iDateTime=iDateTimeMin, itime=itimeMin, idate=idateMin)
	     ][, updateDtPrev(copy(.SD))
	     ][nS == 1, label := labelGroup
	     ][, c('iDateTimeMin', 'itimeMin', 'idateMin') := NULL
	     ]
}

addSessionEnd <- function(bTbl, endLabel='') {  
	bTbl[, .(nS=1:(.N+1), NS=.N), sID 
	     ][, bTbl[.SD, on=c('sID', 'nS', 'NS')] 
	     ][order(sID, nS) 
	     ][, `:=`(iDateTimeMax=iDateTime[NS], itimeMax=itime[NS], idateMax=idate[NS]), .(sID, NS) 
	     ][, NS := .N, sID 
	     ][nS == NS, `:=`(iDateTime=iDateTimeMax, itime=itimeMax, idate=idateMax) 
	     ][, updateDtPrev(copy(.SD)) 
	     ][nS == NS, label := endLabel 
	     ][, c('iDateTimeMax', 'itimeMax', 'idateMax') := NULL 
	     ] 
} 

updateDtPrev <- function(fTbl) {
	fTbl[, dtPrev := iDateTime - c(iDateTime[1], head(iDateTime, -1))]
	fTbl[nS == 1, dtPrev := NA]
	fTbl
}

updateDtNext <- function(fTbl) {
	fTbl[, dtNext := c(tail(iDateTime, -1), tail(iDateTime, 1)) - iDateTime]
	fTbl[nS == NS, dtNext := NA]
	fTbl
}

addCIDs <- function(skTbl, maxStep) {
	skTbl[, cIDCur := 1]
	for (step in 1:(min(c(maxStep, skTbl[, max(NS)])))) {
		addCID(skTbl, step)
	}
	makeUniqueCID(skTbl)
}

addCID <- function(skTbl, step) {
        #print(sprintf("addCID %s", step))
	skTbl[, .SD
	      ][nS <= step
	      ][, dcast(.SD, cIDCur + sID ~ nS, value.var='label')
	      ][, `:=`(cIDNew=.GRP, NSIDs=.N), by=eval(c('cIDCur', as.character(c(1:step))))
	      ][order(-NSIDs, cIDNew)
	      ][, {skTbl[.SD, `:=`(cIDNew=as.numeric(i.cIDNew)), on='sID']; .SD}
	      ]
	skTbl[, cIDCur := cIDNew]
	setnames(skTbl, 'cIDNew', sprintf('%s%s', 'cID', step))
	skTbl
}

makeUniqueCID <- function(skTbl) {
	cNames = sort(setdiff(grep('^cID', colnames(skTbl), value=T), 'cIDCur'))
	cNames
	cID = 0
	for (cName in cNames) {
		jTbl = skTbl[, .(cIDNew=.GRP + cID), by=eval(cName)]
		jTbl
		skTbl
		cName
		skTbl[jTbl, (cName) := i.cIDNew, on=cName]
		cID = cID + skTbl[, max(get(cName))]
	}
	skTbl[, cIDCur := get(tail(cNames, 1))]
	skTbl
}

getSkSumTbl <- function(skTbl) {
        #print(sprintf("getSkSumTbl"))
	steps = grep('^cID', colnames(skTbl), value=T) %>% setdiff(c('cIDCur', 'cID1')) %>% str_extract('[0-9]+$') %>% as.numeric
	resTbl = rbindlist(lapply(as.list(steps), function(c) getTsAtStep(skTbl, c)), use.names=T, fill=T)
	resTbl
	resTbl[, .(target, targetCID, tTarget)][resTbl[, .(source, sourceCID, tSource)], on=c(target='source', targetCID='sourceCID'), nomatch=0][tSource != tTarget][, nrow(.SD) == 0]
	resTbl[, .(source, sourceCID, tSource)][resTbl[, .(target, targetCID, tTarget)], on=c(source='target', sourceCID='targetCID'), nomatch=0][tSource != tTarget][, nrow(.SD) == 0]
	resTbl
}

getTsAtStep <- function(skTbl, step) {
        #print(sprintf("getTsAtStep", step))
	byCols = c(sprintf('%s%s', 'cID', 1:step))
	butlastByCols = head(byCols, -1)
	lastByCol = tail(byCols, 1)
	nextToLastByCol = tail(butlastByCols, 1)
	tTbl = data.table()
	skTbl[nS == step
	      ][, c(list(target=label, targetCID=get(lastByCol), dtTarget=dt, sID=sID), .SD[, byCols, with=F])
	      ][, {tTbl <<- copy(.SD); .SD}
	      ]
	sTbl = data.table()
	skTbl[nS == (step - 1)
	      ][, c(list(source=label, sourceCID=get(nextToLastByCol), dtSource=dt, sID=sID), .SD[, butlastByCols, with=F])
	      ][, {sTbl <<- copy(.SD); .SD}
	      ]
	sTbl
	tTbl
	tTbl[sTbl, dtSource := i.dtSource, on='sID']
	sTbl = sTbl[, .(dtSource=mean(dtSource)), eval(c(butlastByCols, 'sourceCID', 'source'))]
	resTbl = tTbl[, .(tTarget=mean(dtTarget), sdDt=sd(dtTarget-dtSource), count=.N, step), by=eval(c(byCols, 'target', 'targetCID'))]
	resTbl
	resTbl[sTbl, `:=`(source=i.source, tSource=dtSource, sourceCID=i.sourceCID), on=butlastByCols]
	resTbl[, dt := tTarget - tSource]
	cIDCols = grep('cID', colnames(resTbl), value=T)
	setcolorder(resTbl, c(setdiff(colnames(resTbl), cIDCols), cIDCols))
	resTbl
}

updateDrops <- function(resTbl) {
	resTbl[target == '', `:=`(tTarget=tSource, dt=0)]
	resTbl
}

addCountRow <- function(zTbl, normalizeGroupsP) {       
        #print(sprintf("addCountRow %s", normalizeGroupsP))
	addCountRowGrp <- function(resTbl, labelGroup) {
		cIDCols = grep('^cID', colnames(resTbl), value=T)
		resTbl[, countRowTarget := 0]
		resTbl[, endP := grepl('^ *$', target)]
		resTbl[, .(sSum=sum(count)), .(step, source)
		       ][order(-sSum)
		       ][, sRank := 1:.N
		       ][, resTbl[.SD, sRank := i.sRank, on=c('step', 'source')]
		       ]
		setorder(resTbl, step, sRank, endP, -count)
		resTbl[, sRank := NULL]
		resTbl[, endP := NULL]
		for (i in 1:(length(cIDCols))) {
			byCols = cIDCols[1:i]
			nextCol = cIDCols[i+1]
			resTbl[, .SD
			       ][(i == 1 & step <= i + 1) | step == i, .(count=sum(count)), by=eval(byCols)
			       ][!is.na(get(tail(byCols, n=1)))
			       ][, V3 := shift(count, fill=0), by=eval(head(byCols, n=-1))
			       ][, V4 := cumsum(V3), by=eval(head(byCols, n=-1))
			       ][, resTbl[.SD, `:=`(countRowTarget=countRowTarget + i.V4), on=byCols]
			       ]
		}
		wTbl = resTbl[, rbind(data.table(label=target, labelID=targetCID, countRowTarget=countRowTarget),
				      data.table(label=source, labelID=sourceCID, countRowTarget=countRowTarget))]
		wTbl[, .(countRowSource=min(countRowTarget)), .(label, labelID)
		     ][, wTbl <<- copy(.SD)
		     ]
		resTbl[wTbl, countRowSource := i.countRowSource, on=c(source='label', sourceCID='labelID')]
		resTbl[order(countRowSource, countRowTarget)]
	}
	zTbl
	resTbl = zTbl[, addCountRowGrp(copy(.SD), labelGroup), labelGroup]
	incrementCounts <- function(val, cRank) {
		cNames = grep('countRow', colnames(resTbl), value=T, ignore.case=T)
		for (cName in cNames) {
			resTbl[rankGroup == cRank, (cName) := get(cName) + val]
		}
		resTbl
	}
	if (resTbl[, max(rankGroup)] > 1) {
		for (cRank in 2:resTbl[, max(rankGroup)]) {
			incrementCounts(resTbl[rankGroup == cRank - 1 & step == 2, sum(count) + min(countRowSource)], cRank)
		}
	}
	updateSKValues(resTbl, normalizeGroupsP)
}

capMinTimes <- function(tbl, padProp=.01) {
	tPad = tbl[, max(tTarget) * padProp]
	while (tbl[tTarget < (tSource + tPad), .N] > 0) {
		tbl[tTarget < (tSource + tPad)
		    ][, .(i.targetCID=targetCID, i.tSource=tSource)
		    ][, {tbl[.SD, tTarget := i.tSource + tPad, on=c(targetCID='i.targetCID')]; tbl[.SD, tSource := i.tSource + tPad, on=c(sourceCID='i.targetCID')]}
		    ]
	}
}

relaxTimesToSteps <- function(tbl) {
	tbl[, .N, step]
	tbl[, dt := 1]
	tbl[, tTarget := step - 1]
	tbl[, tSource := step - 2]
	tbl[, .N, .(tSource, tTarget)]
	tbl[grepl('^ *$', target), `:=`(dt=0, tTarget=tSource)]
	tbl
}

updateSKValues <- function(resTbl, normalizeGroupsP) {
        #print(sprintf("updateSKValues %s", normalizeGroupsP))
	NSs = resTbl[step == 2, sum(count)]
	resTbl[, value := count/NSs]
	resTbl[, valueRowSource := countRowSource/NSs]
	resTbl[, valueRowTarget := countRowTarget/NSs]
	if (normalizeGroupsP == T) {
		normalizeSKValues(resTbl)
	}
	resTbl
}

normalizeSKValues <- function(resTbl) {
	resTbl[, .SD
	       ][step == 2
	       ][, .(count=sum(count)), rankGroup
	       ][, resTbl[.SD, NSs := i.count, on=.(rankGroup)]
	       ]
	resTbl[, .SD
	       ][step == 2
	       ][, .(count=min(countRowSource)), rankGroup
	       ][, resTbl[.SD, BSs := i.count, on=.(rankGroup)]
	       ]
	resTbl[, value := count / NSs / uniqueN(rankGroup)]
	resTbl[, max(value), rankGroup]
	resTbl[, valueRowSource := (countRowSource - BSs) / NSs / uniqueN(rankGroup) + (rankGroup - 1) / uniqueN(rankGroup)]
	resTbl[, valueRowTarget := (countRowTarget - BSs) / NSs / uniqueN(rankGroup) + (rankGroup - 1) / uniqueN(rankGroup)]
	resTbl[, max(valueRowSource), rankGroup]
	resTbl[, c('NSs', 'BSs') := NULL]
	resTbl
}

makeUniqueTS <- function(resTbl) {
	spTbl = data.table()
	resTbl[, rbind(data.table(label=target, labelCID=targetCID), data.table(label=source, labelCID=sourceCID))
	       ][, unique(.SD, by=c('label', 'labelCID'))
	       ][, .(labelCID, grp=0:(.N-1)), .(label)
	       ][, spTbl <<- copy(.SD)
	       ]
	spTbl
	resTbl
	resTbl[spTbl, sourceSp := i.grp, on=c(source='label', sourceCID='labelCID')]
	resTbl[spTbl, targetSp := i.grp, on=c(target='label', targetCID='labelCID')]
	resTbl[, rowID := 1:.N]
	resTbl[, target := sprintf('%s%s', target, insertNB(targetSp)), rowID]
	resTbl[, source := sprintf('%s%s', source, insertNB(sourceSp)), rowID]
	resTbl[, `:=`(sourceSp=NULL, targetSp=NULL, rowID=NULL)]
	resTbl
}

insertNB <- function(n) paste(rep(' ', n), collapse='', sep='')

addSessionRolling <- function(fTbl, keyv, maxDt=15, sKeyP=F) {
        setorderv(fTbl, keyv)
        fTbl[, sIDOuter := .GRP, by=eval(head(keyv, -1))]
        fTbl[, dtPrev := iDateTime - shift(iDateTime)]
        fTbl[, dsIDPrev := sIDOuter - shift(sIDOuter)]
        fTbl[!dsIDPrev %in% c(NA, 0, 1)][, assert_that(nrow(.SD) == 0)]
        fTbl[, sID := cumsum(is.na(dtPrev) | dtPrev >= maxDt | dsIDPrev == 1)]
        fTbl[, dsIDPrev := NULL]
        fTbl[, sIDOuter := NULL]
        fTbl[, NS := .N, sID]
        fTbl[, nS := rowid(sID)]
        updateDtPrev(fTbl)
        if (sKeyP == T) {
                fTbl[, sKey := jsonlite::toJSON(list(key=keyv, maxDt=maxDt))]
        }
        fTbl
}


updateDtPrev <- function(fTbl) {
        fTbl[, dtPrev := iDateTime - c(iDateTime[1], head(iDateTime, -1))]
        fTbl[nS == 1, dtPrev := NA]
        fTbl
}

updateDtNext <- function(fTbl) {
        fTbl[, dtNext := c(tail(iDateTime, -1), tail(iDateTime, 1)) - iDateTime]
        fTbl[nS == NS, dtNext := NA]
        fTbl
}

test_that("addSessionRolling", {
        cTbl = data.table(uuid=c(1,1,2,2,3,3), iDateTime=as.Date(c(1,10,1,2,2,3), origin='1970-01-01'))
        addSessionRolling(cTbl, c('uuid', 'iDateTime'), 1.5)[, assert_that(all(sID == c(1, 2, 3, 3, 4, 4)))]
        cTbl = data.table(uuid=c(1,1,1,2,3,3), iDateTime=as.Date(c(1,10,1,2,2,3), origin='1970-01-01'))
        addSessionRolling(cTbl, c('uuid', 'iDateTime'), 1.5)
        cTbl[, assert_that(all(sID == c(1, 1, 2, 3, 4, 4)))]
        cTbl[, assert_that(all(uuid == c(1,1,1,2,3,3)))]
        cTbl[, assert_that(all(iDateTime == as.Date(c(1,1,10,2,2,3), origin='1970-01-01')))]
        cTbl[, expect_identical(dtPrev, as.difftime(c(NA, 0, NA, NA, NA, 1), units='days'))]
        cTbl
})

addSLabels <- function(zTbl) {
        zTbl
        zTbl[grepl(',', label)][, assert_that(nrow(.SD) == 0)]
        zTbl[, sLabel := paste0(label, collapse=',', sep=''), sID]
        zTbl[, sUptoLabel := accumpaste(label, sID)]
        zTbl
}

ensureSLabels <- function(zTbl) {
        if (! 'sUptoLabel' %in% colnames(zTbl)) {
                addSLabels(zTbl)
        }
        zTbl
}

grepForSessions <- function(zTbl, patt, invert=F, col='sUptoLabel') {
#        print(sprintf('grepping for: %s, %s, %s', patt, invert, col))
        zTbl[, assert_that("sKey" %in% colnames(zTbl))]
        key = jsonlite::fromJSON(zTbl[, sKey[1]])$key
        maxDt = jsonlite::fromJSON(zTbl[, sKey[1]])$maxDt
        zTbl[, .SD
             ][grepl(patt, get(col)) != invert
             ][, addSessionRolling(copy(.SD), key, maxDt=maxDt)
             ][, {tempTbl <<- copy(.SD); .SD}
             ][, .SD
             ]
}
claytontstanley/shiny.alluvial documentation built on May 5, 2022, 11:01 a.m.