#' Facility function to help recode/combine codes
combine_clusters<-function(p2p_wrap_object,
which_solution = NULL,
codes_to_combine = NULL
) {
# Pull out what it needs
eval(parse(text=paste("tempCluster<-p2p_wrap_object$Solutions$cluster",which_solution,sep="")))
tempCluster[tempCluster[,2]%in%codes_to_combine,2]<-min(codes_to_combine)
}
#' Function to remove some specific journey points from the data, and format remaining data correctly
remove_journey_points<-function(SeqDataPath,
WhichEventCodesRemove=NULL,
TerminalEventCode,
ReplaceTerminalWithCode=NULL) {
library(foreign)
if (is.character(SeqDataPath)) Data<-read.spss(SeqDataPath,use.value.labels = F, to.data.frame = T)
if (is.data.frame(SeqDataPath)) Data<-SeqDataPath
whichDP<-apply(Data[,2:51],1,function(x) which(x%in%WhichEventCodesRemove))
DataDP<-Data
for (i in 1:dim(Data)[1]) DataDP[i,whichDP[[i]]+1]<-NA
whichValid<-apply(DataDP,1,function(x) which(!is.na(x)))
DataDPFinal<-matrix(,ncol=51,nrow=dim(Data)[1])
for (i in 1:dim(Data)[1]) {
DataDPFinal[i,1:length(whichValid[[i]])]<-as.matrix(DataDP[i,whichValid[[i]]])
DataDPFinal[i,which(DataDPFinal[i,]%in%TerminalEventCode)]<-ReplaceTerminalWithCode
}
colnames(DataDPFinal)<-colnames(Data)
return(as.data.frame(DataDPFinal))
}
#' Apply a set of coefficients from the wrapper to a new dataset
#'
apply_dfacoefs<-function(
SeqDataPath,
priorP2PwrapObj, # Object from p2p_wrap
whichCluster=NULL
)
{
if (is.null(whichCluster)) stop("Specify from which cluster you want to extract the DFA coefficients.")
# Retrieve stuff from the priorP2PwrapObj
CapSeqLength<-priorP2PwrapObj$CapSeqLength
SeqMinLength<-priorP2PwrapObj$SeqMinLength
SingleState<-priorP2PwrapObj$SingleState
DFACoefs<-eval(parse(text=paste("gap_m4_c8_15_ssF$DFACoefs$cluster",whichCluster,sep="")))
#######################
## Data manipulation ##
#######################
# Crate Dummy variables out of the factor
CreateDummy<-function(CategoricalVector,Varname,CreateBaseline=1)
{
UniqueCategories<-unique(CategoricalVector)
Store<-matrix(0,ncol=length(UniqueCategories),nrow=length(CategoricalVector))
for (i in 1:length(CategoricalVector)) {
Store[i,which(UniqueCategories%in%CategoricalVector[i])]<-1
}
#Process labels
UniqueCategories<-as.numeric(gsub("-","_", UniqueCategories))
Store<-Store[,order(UniqueCategories)]
UniqueCategories<-UniqueCategories[order(UniqueCategories)]
colnames(Store)<-paste(Varname,UniqueCategories,sep="_")
if (CreateBaseline!=0) {
tempColnames<-colnames(Store)[-CreateBaseline]
Store<-as.matrix(Store[,-CreateBaseline])
colnames(Store)<-tempColnames
}
return(Store)
}
# Non sequence group
NonSequenceGroup<-list()
# Load data
message("Loading in data")
suppressWarnings(Data<-read.spss(SeqDataPath, use.value.labels = F, to.data.frame = T))
# retrieve event type labels
if (is.null(tpLabels)) tpLabels<-names(attr(Data[,2],"value.labels"))[order(as.numeric(attr(Data[,2],"value.labels")))]
# Remove rows with only missing data
if (length(which(apply(Data[,-c(1,2)],1,function(x) all(is.na(x)))))>0) Data<-Data[-which(apply(Data[,-c(1,2)],1,function(x) all(is.na(x)))),]
# If there's been a cap placed on sequence length, remove sequences greater than that cap
if (!is.null(CapSeqLength)) {
NonSequenceGroup[[length(NonSequenceGroup)+1]]<-Data[which(apply(Data[,-1],1,function(x) table(!is.na(x))[2]<SeqMinLength)),1]
Data<-Data[which(apply(Data[,-1],1,function(x) table(!is.na(x))[2]<=CapSeqLength & table(!is.na(x))[2]>=SeqMinLength)),]
Data<-Data[,c(1:(CapSeqLength+1))]
}
# Record the ids of everyone who remains
IDs<-Data[,1]
# Retrieve the max code for the touchpoints
maxtpCode=max(Data[,-1],na.rm=T)
TerminalEventLabel<-tpLabels[maxtpCode]
# Start building sequences
if (!is.null(CapSeqLength)) SeqData<-seqdef(Data,var=colnames(Data)[2:(CapSeqLength+1)])
if (is.null(CapSeqLength)) SeqData<-seqdef(Data[,-1])
# Check to see if single state is turned on, and if so, revise sequence data
if (SingleState) SeqData<-seqdss(SeqData)
# Check to see if there are any single-step sequences. If so, remove them from the sequences
Any1<-any(seqlength(SeqData)%in%1)
if (Any1) {
message("Removing single-step sequences")
Which1<-which(seqlength(SeqData)%in%1)
NonSequenceGroup[[length(NonSequenceGroup)+1]]<-Data[Which1,1]
# Truncate sequences to those that are multi-step and those that are single step
SeqDataSingle<-SeqData[Which1,] # can return this later
SeqData<-SeqData[-(Which1),]
IDs<-IDs[-(Which1)]
}
########################
## Apply Coefficients ##
########################
# Retrieves indvidual-level measurements on the sequences
Length<-seqlength(SeqData)
FirstEvent<-CreateDummy(SeqData[,1],"FirstEvent")
LastEvent<-matrix(,ncol=1,nrow=length(Length))
for(i in 1:length(Length)) LastEvent[i]<-SeqData[i,Length[i]]
if (length(table(LastEvent))==1) for(i in 1:length(Length)) LastEvent[i]<-SeqData[i,Length[i]-1]
LastEvent<-CreateDummy(LastEvent,"LastEvent")
CountOfEachEvent<-seqistatd(SeqData)
colnames(CountOfEachEvent)<-paste("CountOfEventType",colnames(CountOfEachEvent),sep="")
# Remove highly correlated variables at > .6
DFAData<-cbind(FirstEvent,LastEvent,CountOfEachEvent)
# Apply
WhichCols<-match(rownames(DFACoefs),colnames(DFAData))
AlgoData<-DFAData[,WhichCols[2:length(WhichCols)]]
apply_coef<-function(tData,Coefs) Coefs[1]+sum(tData*Coefs[2:length(Coefs)])
storeScores<-list()
for (hmc in 1:whichCluster) storeScores[[hmc]]<-apply(AlgoData,1,apply_coef,DFACoefs[,hmc])
storeScores<-Reduce("cbind",storeScores)
Assigned<-rbind(cbind(IDs,max.col(storeScores)),cbind(unlist(NonSequenceGroup),99))
colnames(Assigned)<-c("IDs","currSol")
return(Assigned)
}
#function to measure the frequency of repeated journey point
#' Path to purchase wrapper
#'
#' @param SeqDataPath A Character vector to an SPSS data file, Path to spss sequence
#' data, should be cleaned if there's any cleaning that it needs, first column should contain ids.
#'
#' @param DFA A logical. Default is True. Should the cluster be based off of a DFA algorithm. If
#' False, then cluster is derived from the ward hierarchical clustering algorithm. The DFA
#' algorithm is based off 4 characteristics of sequences: First event, Last event,
#' frequency of each event type, and the presence/frequency of the top 10 common subsequences
#' for each cluster type. If DFA is True, additional information is released in the return
#' statement to reproduce the clusters.
#'
#' @param CapSeqLength A numeric. Identifies how many events to cap sequences by, if any.
#'
#' @param SingleState A logical. Should sequences be defined by single state transitions, or
#' allow the same event type to repeat multiple in a row.
#'
#' @param HowManyClusters A numeric vector or scalar. Iterates through cluster solution sizes.
#' default is 5:10.
#'
#' @param costMatrix An nxn cost matrix, overriding the default costs.
#'
#' @param tpLabels
#' @export
p2p_wrap<-function(SeqDataPath,
DFA = TRUE,
CapSeqLength = 20,
SeqMinLength = 3,
tpLabels = NULL,
SingleState= FALSE,
HowManyClusters=5:10,
SeriesTitle=NULL,
costMatrix=NULL,
OtherProfileBinariesPath=NULL, # A path to an SPSS file that contains profile variable binaries, first column should be IDs.
ExcelOutPath=NULL,
ConvertOutToSPSS=TRUE, # Convert output to an SPSS conformable means table?
#Group=NULL, # Either Null, or a two slot list, one with variable name, one with variable value to select on, e.g. list("DV_Category_PipeIn",2). Creates sequences within the group.
skewThreshold=.3, # Threshold for skews to qualify as a connection in the journey map
CustomCluster=NULL, # Nx2 dataframe, first column containing IDs, second column the labels.
ReportBtwWithinPlot=TRUE,
CustomClusterIsJourneyGroup=TRUE # is the custom cluster the journey group (allows us to generate full tables with other subgroups if FALSE)
)
{
# Load external libraries
message("Loading in built-in function")
library(TraMineR)
library(foreign)
library(cluster)
library(MASS)
library(klaR)
library(igraph)
# Load custom functions
# Extract the classification coefficients
coeffLDA<-function(LDAmodel,DV)
{
ModelData<-model.matrix(LDAmodel)[,-1]
priorsGet<-LDAmodel$prior
gr <- length(table(DV)) ## groups might be factors or numeric
v <- dim(ModelData)[2] ## variables
m <- LDAmodel$means ## group means
w <- array(NA, dim = c(v, v, gr))
for(i in 1:gr){
tmp <- scale(subset(ModelData, DV == unique(DV)[i]), scale = FALSE)
w[,,i] <- t(tmp) %*% tmp
}
W <- w[,,1]
for(i in 2:gr)
W <- W + w[,,i]
V <- W/(nrow(ModelData) - gr)
iV <- solve(V)
class.funs <- matrix(NA, nrow = v + 1, ncol = gr)
colnames(class.funs) <- paste(names(table(DV)))
rownames(class.funs) <- c("constant", colnames(ModelData))
for(i in 1:gr) {
class.funs[1, i] <- -.5 * (t(m[i,]) %*% iV %*% (m[i,])) + log(priorsGet)[i] # Intercept
class.funs[2:(v+1) ,i] <- iV %*% (m[i,])
}
return(class.funs)
}
# Crate Dummy variables out of the factor
CreateDummy<-function(CategoricalVector,Varname,CreateBaseline=1)
{
UniqueCategories<-unique(CategoricalVector)
Store<-matrix(0,ncol=length(UniqueCategories),nrow=length(CategoricalVector))
for (i in 1:length(CategoricalVector)) {
Store[i,which(UniqueCategories%in%CategoricalVector[i])]<-1
}
#Process labels
UniqueCategories<-as.numeric(gsub("-","_", UniqueCategories))
Store<-Store[,order(UniqueCategories)]
UniqueCategories<-UniqueCategories[order(UniqueCategories)]
colnames(Store)<-paste(Varname,UniqueCategories,sep="_")
if (CreateBaseline!=0) {
tempColnames<-colnames(Store)[-CreateBaseline]
Store<-as.matrix(Store[,-CreateBaseline])
colnames(Store)<-tempColnames
}
return(Store)
}
# Assign Sequence moments to a given cluster.
assigner<-function(MeansTable,Thresh)
{
rownames(MeansTable)<-lapply(lapply(strsplit(rownames(MeansTable)," ",""),function(x) x[-1]),paste, collapse=" ")
store<-list(); storeMeans<-list()
tempMeansTable<-MeansTable[,1:(dim(MeansTable)[2]-1)]
for (mtm in 1:dim(tempMeansTable)[2]) {
store[[mtm]]<-rownames(MeansTable[which(tempMeansTable[,mtm]>Thresh),])
storeMeans[[mtm]]<-MeansTable[which(tempMeansTable[,mtm]>Thresh),mtm]
}
return(list(Arcs=store,ArcSizes=storeMeans))
}
# Function to create a simple baliaviz visualization from a converted edgelist output
gen_baliaviz_fromconvertedgelist<-function(theConvertedEdgeList)
{
LINKS<-as.data.frame(theConvertedEdgeList$Numeric)
colnames(LINKS)<-c("source","target","value")
NODES<-as.data.frame(theConvertedEdgeList$Character)
colnames(NODES)<-c("name","group","size")
NODES$group<-as.numeric(as.character(NODES$group))
NODES$size<-as.numeric(as.character(NODES$size))
LINKS$source<-as.numeric(as.character(LINKS$source))
LINKS$target<-as.numeric(as.character(LINKS$target))
LINKS$value<-as.numeric(as.character(LINKS$value))
NODES$group<-as.numeric(as.character(NODES$group))
NODES$size<-as.numeric(as.character(NODES$size))
if (!is.null(theConvertedEdgeList$Title)) return(baliaviz::forceNetwork(Nodes = NODES, Links = LINKS,title=theConvertedEdgeList$Title))
if (is.null(theConvertedEdgeList$Title)) return(baliaviz::forceNetwork(Nodes = NODES, Links = LINKS))
}
# Non sequence group
NonSequenceGroup<-list()
NoJourneyGroup<-list()
# Load data
message("Loading in data")
if (is.character(SeqDataPath))
{
suppressWarnings(Data<-read.spss(SeqDataPath, use.value.labels = F, to.data.frame = T))
# retrieve event type labels
if (is.null(tpLabels)) tpLabels<-names(attr(Data[,2],"value.labels"))[order(as.numeric(attr(Data[,2],"value.labels")))]
}
if (is.data.frame(SeqDataPath)) Data<-SeqDataPath
# Remove rows with only missing data
if (length(which(apply(Data[,-c(1,2)],1,function(x) all(is.na(x)))))>0 && CustomClusterIsJourneyGroup) {
NoJourneyGroup[[1]]<-Data[which(apply(Data[,-c(1,2)],1,function(x) all(is.na(x)))),1]
Data<-Data[-which(apply(Data[,-c(1,2)],1,function(x) all(is.na(x)))),]
}
# If there's been a cap placed on sequence length, remove sequences greater than that cap
if (!is.null(SeqMinLength)) {
NonSequenceGroup[[length(NonSequenceGroup)+1]]<-Data[which(apply(Data[,-1],1,function(x) table(!is.na(x))[2]<SeqMinLength)),1]
Data<-Data[which(apply(Data[,-1],1,function(x) table(!is.na(x))[2]<=CapSeqLength & table(!is.na(x))[2]>=SeqMinLength)),]
Data<-Data[,c(1:(CapSeqLength+1))]
}
# If customcluster has been specified and there are 98s or 99s as part of that solution, remove those codes
#if (!is.null(CustomCluster) && any(CustomCluster[,2]%in%c(98,99))) {
# NonSequenceGroup[[length(NonSequenceGroup)+1]]<-Data[which(CustomCluster[,2]%in%c(98,99)),1]
# Data<-Data[-which(CustomCluster[,2]%in%c(98,99)),]
# CustomCluster<-CustomCluster[-which(CustomCluster[,2]%in%c(98,99)),]
# }
# Record the ids of everyone who remains
IDs<-Data[,1]
# Retrieve the max code for the touchpoints
maxtpCode=max(Data[,-1],na.rm=T)
TerminalEventLabel<-tpLabels[maxtpCode]
# Start building sequences
if (!is.null(CapSeqLength)) SeqData<-seqdef(Data,var=colnames(Data)[2:(CapSeqLength+1)])
if (is.null(CapSeqLength)) SeqData<-seqdef(Data[,-1])
# Check to see if single state is turned on, and if so, revise sequence data
if (SingleState) SeqData<-seqdss(SeqData)
# Check to see if there are any single-step sequences. If so, remove them from the sequences
Any1<-any(seqlength(SeqData)%in%1)
if (Any1 && CustomClusterIsJourneyGroup) {
message("Removing single-step sequences")
Which1<-which(seqlength(SeqData)%in%1)
NonSequenceGroup[[length(NonSequenceGroup)+1]]<-Data[Which1,1]
# Truncate sequences to those that are multi-step and those that are single step
SeqDataSingle<-SeqData[Which1,] # can return this later
SeqData<-SeqData[-(Which1),]
IDs<-IDs[-(Which1)]
}
###############################
## Start of cluster analysis ##
###############################
if (is.null(costMatrix)) Cost<-seqsubm(SeqData, method = "CONSTANT", with.miss = TRUE)
message("Calculating distances between paths")
Distances<-seqdist(SeqData,method="OM",indel=1,norm="auto",sm=Cost[-length(tpLabels),-length(tpLabels)],with.missing=T)
if (is.null(CustomCluster)) {
message("Clustering paths")
clusterward <- agnes(Distances, diss = TRUE, method = "ward")
################################################
### Create within vs. between distance plot. ###
################################################
if (ReportBtwWithinPlot) {
message("Generating Between vs. Within Cluster Distances Plot")
storeClusterDistances<-list()
# Create plot metrics
DistWithin<-matrix(,ncol=1,nrow=length(1:50))
DisBtw<-matrix(,ncol=1,nrow=length(1:50))
for (i in 2:50) {
curCut<-cutree(clusterward, k=i)
# Avg distance within
tempStore<-matrix(,ncol=1,nrow=i)
for (kk in 1:i) {
tempDist<-Distances[which(curCut==kk),which(curCut==kk)]
tempStore[kk]<-mean(tempDist[lower.tri(tempDist)])
}
DistWithin[i]<-mean(tempStore)
# Distances between
tempGroupDistances<-matrix(0,i,i)
for (kk1 in 1:i) {
for (kk2 in 1:i) {
if (kk1!=kk2) tempGroupDistances[kk1,kk2]<-mean(Distances[which(curCut==kk1),which(curCut==kk2)])
}
}
# Store the cluster distance matrix if its specified in HowManyClusters
if (any(i==HowManyClusters)) storeClusterDistances[[paste("cluster",i,"_dists",sep="")]]<-tempGroupDistances
DisBtw[i]<-mean(tempGroupDistances[lower.tri(tempGroupDistances)])
}
# Look at rate of change of btw vs within cluster
DiffBtwWith<-diff(DisBtw-DistWithin)
storeZ<-matrix(,ncol=1,nrow=length(DiffBtwWith))
for (ll in 2:length(DiffBtwWith)) {
storeZ[ll]<-(DiffBtwWith[ll]-mean(DiffBtwWith[1:ll],na.rm=T))/sd(DiffBtwWith[1:ll],na.rm=T)
}
library(ggplot2)
plotData<-as.data.frame(cbind(DiffBtwWith,1:length(DiffBtwWith)))
colnames(plotData)<-c("Distances","Clusters")
plotter<-ggplot(plotData)
microClusterPlot<-plotter+geom_line(aes(x=Clusters,y=Distances))+
stat_smooth(aes(x=Clusters,y=Distances),se=F)+
ggtitle("Difference Between vs. Within Cluster Distances")
}
################################################
################################################
# Use HowManyClusters to control the search space
if (is.null(HowManyClusters)) stop("Please specify the number(s) of cluster(s) you want to create")
message(paste("Creating", HowManyClusters[1], "to",HowManyClusters[length(HowManyClusters)], "cluster groups"))
for (i in 1:length(HowManyClusters)) {
eval(parse(text=paste("cluster",HowManyClusters[i]," <- cutree(clusterward, k=", HowManyClusters[i],")",sep="")))
}
# If DFA is true, generate an algorithm for each of the clustering solutions based on length, first event
if (DFA) {
# Retrieves indvidual-level measurements on the sequences
Length<-seqlength(SeqData)
FirstEvent<-CreateDummy(SeqData[,1],"FirstEvent")
LastEvent<-matrix(,ncol=1,nrow=length(Length))
for(i in 1:length(Length)) LastEvent[i]<-SeqData[i,Length[i]]
if (length(table(LastEvent))==1) for(i in 1:length(Length)) LastEvent[i]<-SeqData[i,Length[i]-1]
LastEvent<-CreateDummy(LastEvent,"LastEvent")
CountOfEachEvent<-seqistatd(SeqData)
colnames(CountOfEachEvent)<-paste("CountOfEventType",colnames(CountOfEachEvent),sep="")
# Check for any constants and remove them
anyConstant<-any(apply(CountOfEachEvent,2,var)==0)
if (anyConstant) CountOfEachEvent<-CountOfEachEvent[,-which(apply(CountOfEachEvent,2,var)==0)]
# Remove highly correlated variables at > .6
#DFAData<-cbind(FirstEvent,LastEvent,CountOfEachEvent,SubSeqPresenceType)
DFAData<-cbind(FirstEvent,LastEvent,CountOfEachEvent)
corDFAData<-cor(DFAData)
diag(corDFAData)<-0
cordumDFAData<-matrix(0,dim(corDFAData)[1],dim(corDFAData)[2])
colnames(cordumDFAData)<-colnames(corDFAData)
rownames(cordumDFAData)<-colnames(corDFAData)
cordumDFAData[abs(corDFAData)>.6]<-1
corGraph<-graph_from_adjacency_matrix(cordumDFAData)
corGraph<-as_edgelist(corGraph)
# randomly remove one of the problematic pairs
DFAData<-DFAData[,-which(colnames(DFAData)%in%apply(corGraph,1,function(x) x[1]))]
clusterModelAccuracy<-list()
clusterModelCoefs<-list()
clusterSubSeqs<-list()
storeClusterDistances<-list()
for (hmc in 1:length(HowManyClusters)) {
message(paste("Evaluating DFA for cluster",HowManyClusters[hmc],sep=""))
eval(parse(text=paste("StepDFA<-greedy.wilks(cluster",HowManyClusters[hmc],"~DFAData)",sep="")))
KeepFromStep<-gsub("DFAData","",as.character(StepDFA[[1]]$vars))
# build model with gathered information
eval(parse(text=paste("testModel<-lda(cluster",HowManyClusters[hmc],"~DFAData[,KeepFromStep])",sep="")))
# store DFA version of cluster
eval(parse(text=paste("DFA_cluster",HowManyClusters[hmc],"<-predict(testModel)$class",sep="")))
# retrieve cross table, predicted vs observed
eval(parse(text=paste("AccTable<-table(cluster",HowManyClusters[hmc],",DFA_cluster",HowManyClusters[hmc],")",sep="")))
# retrieve % accuracy
clusterModelAccuracy[[paste("cluster",HowManyClusters[hmc],sep="")]]<-sum(diag(AccTable))/sum(AccTable)
# store coefficients
eval(parse(text=paste("tempCoefs<-coeffLDA(testModel,cluster",HowManyClusters[hmc],")",sep="")))
rownames(tempCoefs)<-gsub("[]]","",gsub("[[, ]","",gsub("KeepFromStep","",gsub("DFAData","",rownames(tempCoefs)))))
clusterModelCoefs[[paste("cluster",HowManyClusters[hmc],sep="")]]<-tempCoefs
# restore the cluster-level distances using DFA'ed groupings
tempGroupDistances<-matrix(0,HowManyClusters[hmc],HowManyClusters[hmc])
for (kk1 in 1:HowManyClusters[hmc]) {
for (kk2 in 1:HowManyClusters[hmc]) {
if (kk1!=kk2) eval(parse(text=paste("tempGroupDistances[kk1,kk2]<-mean(Distances[which(DFA_cluster",HowManyClusters[hmc],"==kk1),which(DFA_cluster",HowManyClusters[hmc],"==kk2)])",sep="")))
}
}
# Store the cluster distance matrix if its specified in HowManyClusters
storeClusterDistances[[paste("DFA_cluster",HowManyClusters[hmc],"_dists",sep="")]]<-tempGroupDistances
}
}
}
#################################################
## Report the various profiles on the clusters ##
#################################################
# Import and process OtherProfileBinariesPath, if included
if (!is.null(OtherProfileBinariesPath)) {
suppressWarnings(OthProfileData<-read.spss(OtherProfileBinariesPath, use.value.labels = F, to.data.frame = T))
OPLabels<-as.character(attr(OthProfileData,"variable.labels"))
FilteredProfileData<-OthProfileData[match(IDs,OthProfileData[,1]),]
FilteredProfileData<-FilteredProfileData[-1]
OPVarNames<-colnames(FilteredProfileData)
OPLabels<-OPLabels[-1]
}
if (!DFA && is.null(CustomCluster)) ClusterNames<-paste("cluster",HowManyClusters,sep="")
if (DFA && is.null(CustomCluster)) ClusterNames<-paste("DFA_cluster",HowManyClusters,sep="")
if (!is.null(CustomCluster)) {
ClusterNames<-"CustomCluster"
CustomCluster<-CustomCluster[match(IDs,CustomCluster[,1]),2]
}
if (ConvertOutToSPSS) SPSSOut<-list()
BindedTablesStore<-list()
storeProto<-list()
storeClusterSolutions<-list()
masterStoreGraphs<-list()
masterStoreGraphTables<-list()
storeBaseSizes<-list()
storeCompositeProtos<-list()
storeCompositeProtosBaliaviz<-list()
storeCompositeProtoDistances<-list()
storeSimplyCompositeProtoDistances<-list()
storeStandCPSDists<-list()
storeStandClassAssign<-list()
storeCPSClassAssignProp<-list()
for (cN in ClusterNames) {
## Compute means for profiling
currSol<-eval(parse(text=cN))
if (class(currSol)=="integer") currSol<-as.factor(currSol)
maxCurrSol<-max(as.numeric(as.character(currSol)))
if (length(unlist(NonSequenceGroup))==0) storeClusterSolutions[[cN]]<-cbind(IDs,currSol)
if (length(unlist(NonSequenceGroup))>0 & length(unlist(NoJourneyGroup))==0) storeClusterSolutions[[cN]]<-rbind(cbind(IDs,currSol),cbind(unlist(NonSequenceGroup),99))
if (length(unlist(NonSequenceGroup))==0 & length(unlist(NoJourneyGroup))>0) storeClusterSolutions[[cN]]<-rbind(cbind(IDs,currSol),cbind(unlist(NoJourneyGroup),98))
if (length(unlist(NonSequenceGroup))>0 & length(unlist(NoJourneyGroup))>0) storeClusterSolutions[[cN]]<-rbind(cbind(IDs,currSol),cbind(unlist(NoJourneyGroup),98),cbind(unlist(NonSequenceGroup),99))
## Identify PROTOTYPICAL sequences
#if (is.null(CustomCluster)) {
DistanceFromCenter<-disscenter(Distances,currSol)
#DistanceFromCenter[which(currSol==1)]
tempStoreProto<-list()
for (rNg in 1:maxCurrSol) {
#currProto<-unique(head(SeqData[currSol%in%rNg,][order(DistanceFromCenter[currSol%in%rNg]),],20))
currProto<-SeqData[currSol%in%rNg,][order(DistanceFromCenter[currSol%in%rNg]),]
currProto<-apply(currProto,2,as.character)
storeCurrProto<-list()
if (!is.null(dim(currProto))) for (prt in 1:dim(currProto)[1]) storeCurrProto[[prt]] <-suppressWarnings(paste(tpLabels[as.numeric(currProto[prt,])][!is.na(tpLabels[as.numeric(currProto[prt,])])],collapse=">"))
if (is.null(dim(currProto))) storeCurrProto[[1]] <-suppressWarnings(paste(tpLabels[as.numeric(currProto)][!is.na(tpLabels[as.numeric(currProto)])],collapse=">"))
tempStoreProto[[rNg]]<-unlist(storeCurrProto)
}
storeProto[[paste("cluster",cN,sep="")]]<-tempStoreProto
#}
# Map each prototype as a (temporally) directed graph - composite prototypes
#AvgProtoSeqLength<-unlist(lapply(lapply(lapply(tempStoreProto,function(x) strsplit(x,">","")),function(x) lapply(x,length)),function(x) floor(mean(unlist(x)))))
#ProtoLengths<-lapply(lapply(lapply(lapply(tempStoreProto,function(x) strsplit(x,">","")),function(x) lapply(x,length)),unlist),table)
#OverIndexWithinGroup<-lapply(WithinGroupDistributionOfJourneys,function(x) x[x>1.5*mean(x)])
#max2<-unlist(lapply(ProtoLengths,function(x) as.numeric(names(x)[x%in%max(x[-which(x%in%max(x))])])))
#longest<-unlist(ProtoLengths,function(x) as.numeric(names(x)[length(x)]))
WithinGroupDistributionOfJourneys<-lapply(lapply(tempStoreProto,function(x) strsplit(x,">","")),function(x) table(unlist(x)))
MoreThan5pctWithinGroup<-lapply(WithinGroupDistributionOfJourneys,function(x) names(x[(x/sum(x))>.05]))
MaxJourneyLengthWithinGroup<-unlist(lapply(lapply(tempStoreProto,function(x) strsplit(x,">","")),function(x) max(unlist(lapply(x,length)))))
CompositeProto<-list()
for (clu in 1:maxCurrSol) {
pieceCompProto<-list()
protoSplit<-lapply(tempStoreProto[[clu]],function(x) unlist(strsplit(x,">","")))
protSplit_haveTerminalEvent<-unlist(lapply(protoSplit,function(x) any(x%in%TerminalEventLabel)))
protoSplit_clean<-protoSplit
for (psc in 1:length(protoSplit_clean)) if (protSplit_haveTerminalEvent[psc]) protoSplit_clean[[psc]]<-protoSplit[[psc]][-which(protoSplit[[psc]]%in%TerminalEventLabel)]
for (apsl in 1:MaxJourneyLengthWithinGroup[clu]) {
#Truncate proto split by the ones that are of a certain length
protoSplit_clean_trunc<-protoSplit_clean[unlist(lapply(protoSplit_clean,function(x) length(x)))>=apsl]
#Create frequency table
tempTab<-table(unlist(lapply(protoSplit_clean_trunc,function(x) x[apsl])))
#Keep only the high frequency journey points
tempTab<-tempTab[names(tempTab)%in%unlist(MoreThan5pctWithinGroup[clu])]
if (length(tempTab)>0) {
namesTempTab<-names(tempTab)
maxTempTab<-max(tempTab)
tempNamesStore<-namesTempTab[which(tempTab%in%maxTempTab)]
#if (any(tempNamesStore%in%TerminalEventLabel)) tempNamesStore<-tempNamesStore[-which(tempNamesStore%in%TerminalEventLabel)]
pieceCompProto[[apsl]]<-tempNamesStore
}
}
# Put the purchase event as the last event, and remove it from any other slot
#pieceCompProto[[length(pieceCompProto)+1]]<-TerminalEventLabel
if (any(unlist(lapply(pieceCompProto,length))==0)) pieceCompProto<-pieceCompProto[-which(unlist(lapply(pieceCompProto,length))==0)]
#piece edge list together
adder<-1
pieceCompProtoEdge<-list()
simplyCompProtoEdge<-list()
for (cc in 1:(length(pieceCompProto)-1)) {
for (pcp in 1:length(pieceCompProto[[cc]])) {
pieceCompProtoEdge[[adder]]<-paste(pieceCompProto[[cc]][pcp],pieceCompProto[[cc+1]],sep=">")
simplyCompProtoEdge[[adder]]<-paste(paste("(",1:length(pieceCompProto[[cc]][pcp]),") ",pieceCompProto[[cc]],sep=""),paste(" (",1:length(pieceCompProto[[cc+1]]),") ",pieceCompProto[[cc+1]],sep=""),sep=" >>> ",collapse="")
adder<-adder+1
}
}
CompositeProto[[clu]]<-Reduce("rbind",strsplit(unlist(pieceCompProtoEdge),">",""))
# Add in the terminal event
if (length(CompositeProto[[clu]])>2) CompositeProto[[clu]]<-rbind(CompositeProto[[clu]],cbind(unique(matrix(CompositeProto[[clu]])),TerminalEventLabel))
if (length(CompositeProto[[clu]])==2) CompositeProto[[clu]]<-rbind(CompositeProto[[clu]],cbind(unique(CompositeProto[[clu]]),TerminalEventLabel))
# Remove duplicated edges
CompositeProto[[clu]]<-Reduce("rbind",strsplit(unique(apply(CompositeProto[[clu]],1,paste,collapse=">")),">",""))
if (length(CompositeProto[[clu]])==2) CompositeProto[[clu]]<-matrix(CompositeProto[[clu]],ncol=2,byrow=T)
}
# Convert the character edgelist for mapping
storeCompositeProtos[[cN]]<-lapply(lapply(CompositeProto,balialab::convert_edgelist),function(x) list(Numeric=x$Numeric-1,Character=x$Character))
# Identify distances between micro-clusters
tempGroupDistances<-matrix(0,maxCurrSol,maxCurrSol)
for (kk1 in 1:maxCurrSol) {
for (kk2 in 1:maxCurrSol) {
if (kk1!=kk2) tempGroupDistances[kk1,kk2]<-mean(Distances[which(currSol==kk1),which(currSol==kk2)])
}
}
meanDist<-mean(tempGroupDistances[lower.tri(tempGroupDistances)])
sdDist<-sd(tempGroupDistances[lower.tri(tempGroupDistances)])
standDist<-round((tempGroupDistances-meanDist)/sdDist,digits=2)
diag(standDist)<-""
standDist[upper.tri(standDist)]<-""
standDist<-as.data.frame(standDist)
CPSNames<-paste("cps",1:maxCurrSol,sep="")
colnames(standDist)<-CPSNames
rownames(standDist)<-CPSNames
# store it
storeStandCPSDists[[cN]]<-standDist
# Explore the sequence class membership (range of classes is automatically selected, around a 1/5 of the number of sequences
if (is.null(CustomCluster)) {
RangeClassesExplore<-ceiling(maxCurrSol/5):(ceiling(maxCurrSol/5)+ceiling(maxCurrSol/5))
if (any(RangeClassesExplore==1)) RangeClassesExplore<-RangeClassesExplore[-1] # remove 1 class to explore, cuz can't do it.
tempStoreClassAssign<-matrix(,ncol=length(RangeClassesExplore),nrow=length(currSol))
tempTempStoreCPSClassAssignProp<-list()
for (rce in RangeClassesExplore) {
currClassMembership<-cutree(agnes(as.dist(tempGroupDistances), diss = TRUE, method = "ward"),k=rce)
tempStoreCPSClassAssignProp<-list()
for (tg in 1:rce) {
tempStoreClassAssign[which(currSol%in%which(currClassMembership%in%tg)),which(RangeClassesExplore%in%rce)]<-tg
tempTab<-table(currSol[currSol%in%which(currClassMembership%in%tg)])
tempStoreCPSClassAssignProp[[tg]]<-round(tempTab/sum(tempTab),digits=2)
}
colnames(tempStoreClassAssign)<-paste("numClasses",RangeClassesExplore,sep="")
toStore<-Reduce("rbind",tempStoreCPSClassAssignProp)
rownames(toStore)<-paste("class",1:rce,sep="")
colnames(toStore)<-CPSNames
tempTempStoreCPSClassAssignProp[[rce]]<-toStore
}
# store it
storeCPSClassAssignProp[[cN]]<-tempTempStoreCPSClassAssignProp
storeStandClassAssign[[cN]]<-cbind(IDs,tempStoreClassAssign)
}
# ... continue with profiling
FirstList<-list()
LastList<-list()
TotalList<-list()
TransitionList<-list()
BaseSizes<-list()
for (rNg in 1:(maxCurrSol+1)) {
if (rNg<=maxCurrSol) currClusSeqs<-SeqData[which(currSol==rNg),]
if (rNg==(maxCurrSol+1)) currClusSeqs<-SeqData
BaseSizes[[rNg]]<-dim(currClusSeqs)[1]
currClusSeqsLength<-seqlength(currClusSeqs)
# First and Last event
FirstEventTable<-round(table(factor(currClusSeqs[,1],levels=1:maxtpCode))/dim(currClusSeqs)[1],digits=5)
LastEventTable<-matrix(,ncol=1,nrow=length(currClusSeqsLength))
for (i in 1:length(currClusSeqsLength)) LastEventTable[i]<-currClusSeqs[i,currClusSeqsLength[i]]
if (var(LastEventTable)==0) for (i in 1:length(currClusSeqsLength)) LastEventTable[i]<-currClusSeqs[i,currClusSeqsLength[i]-1]
LastEventTable<-table(factor(LastEventTable,levels=1:maxtpCode))/dim(currClusSeqs)[1]
# Total frequency
TotalPercent<-table(factor(matrix(as.matrix(currClusSeqs)),levels=1:maxtpCode))/sum(table(factor(matrix(as.matrix(currClusSeqs)),levels=1:maxtpCode)))
# Transition rates
TransitionRate<-matrix(t(seqtrate(currClusSeqs)))
rownames(TransitionRate)<-matrix(t(seqetm(currClusSeqs)))
# Store
FirstList[[rNg]]<-FirstEventTable
LastList[[rNg]]<-LastEventTable
TotalList[[rNg]]<-TotalPercent
TransitionList[[rNg]]<-TransitionRate
##################################################################################################################
## Go back to composite prototype tables to fill in those tables with profiling information about the sequences ##
##################################################################################################################
if (rNg<=maxCurrSol) {
tempNet<-storeCompositeProtos[[cN]][[rNg]]$Numeric+1
tempNodes<-storeCompositeProtos[[cN]][[rNg]]$Character
tempNet[,1]<-match(tempNodes,tpLabels)[tempNet[,1]]
tempNet[,2]<-match(tempNodes,tpLabels)[tempNet[,2]]
tempNet<-apply(tempNet,1,paste,collapse=">")
tempLinkSizes<-TransitionRate[match(tempNet,rownames(TransitionRate))]
if (any(is.na(tempLinkSizes))) tempLinkSizes[is.na(tempLinkSizes)]<-.5
tempNodeSizes<-TotalPercent[match(tempNodes,tpLabels)]
# Remove an edge if the transition probability is 0
if (any(tempLinkSizes%in%0)) {
whichtoremove<-which(tempLinkSizes%in%0)
tempLinkSizes<-tempLinkSizes[-whichtoremove]
storeCompositeProtos[[cN]][[rNg]]$Numeric<-storeCompositeProtos[[cN]][[rNg]]$Numeric[-whichtoremove,]
}
storeCompositeProtos[[cN]][[rNg]]$Numeric<- cbind(storeCompositeProtos[[cN]][[rNg]]$Numeric,2-(2-10)*((tempLinkSizes-min(tempLinkSizes,na.rm=T))/(max(tempLinkSizes,na.rm=T)-min(tempLinkSizes,na.rm=T))))
storeCompositeProtos[[cN]][[rNg]]$Character<-cbind(tempNodes,c(5,rep(1,length(tempNodes)-2),3),2-(2-10)*((tempNodeSizes-min(tempNodeSizes))/(max(tempNodeSizes)-min(tempNodeSizes))))
if (!is.null(SeriesTitle)) storeCompositeProtos[[cN]][[rNg]]$Title<-paste(SeriesTitle,"_",cN,"_gr",rNg,sep="")
}
}
# Create the baliaviz visual
storeCompositeProtosBaliaviz[[cN]]<-lapply(storeCompositeProtos[[cN]],gen_baliaviz_fromconvertedgelist)
# Store
FirstStore<-as.data.frame(Reduce("cbind",FirstList))
rownames(FirstStore)<-paste("FEV",1:length(tpLabels)," ",tpLabels,sep="")
colnames(FirstStore)<-c(paste("JourneyType_",1:maxCurrSol,sep=""),"Total")
LastStore<-as.data.frame(Reduce("cbind",LastList))
rownames(LastStore)<-paste("LEV",1:length(tpLabels)," ",tpLabels,sep="")
colnames(LastStore)<-c(paste("JourneyType_",1:maxCurrSol,sep=""),"Total")
TotalStore<-as.data.frame(Reduce("cbind",TotalList))
rownames(TotalStore)<-paste("TEV",1:length(tpLabels)," ",tpLabels,sep="")
colnames(TotalStore)<-c(paste("JourneyType_",1:maxCurrSol,sep=""),"Total")
TransitionStore<-as.data.frame(Reduce("cbind",TransitionList))
colnames(TransitionStore)<-c(paste("JourneyType_",1:maxCurrSol,sep=""),"Total")
# process labels
TransSplit<-strsplit(rownames(TransitionStore),">","")
for (i in 1:length(TransSplit)) {
pasterwhich<-as.numeric(TransSplit[[i]])
if (length(pasterwhich)>1) TransSplit[[i]]<-paste(tpLabels[pasterwhich],collapse=">")
if (length(pasterwhich)==1) TransSplit[[i]]<-paste(tpLabels[pasterwhich],tpLabels[pasterwhich],sep=">")
}
rownames(TransitionStore)<-paste("TRA",1:length(unlist(TransSplit))," ",unlist(TransSplit),sep="")
# Bind the tables together
BindedTables<-rbind(FirstStore,LastStore,TotalStore,TransitionStore)
BindedTablesStore[[cN]]<-BindedTables
storeBaseSizes[[cN]]<-cbind(1:maxCurrSol,unlist(BaseSizes)[-length(unlist(BaseSizes))])
###########################################################################################
## If there are additional profile variables to append, add those and obtain their means ##
###########################################################################################
if (!is.null(OtherProfileBinariesPath)) {
#temp data
#OthProfileData<-as.data.frame(cbind(Data[,1],matrix(sample(c(0,1),dim(Data)[1]*dim(Data)[2],replace=T,prob=c(.8,.2)),nrow=dim(Data)[1],ncol=dim(Data)[2])))
storeOtherProfileMeans<-list()
for (op in 1:dim(FilteredProfileData)[2]){
eval(parse(text=paste("storeOtherProfileMeans[[OPVarNames[op]]]<-tapply(FilteredProfileData[,op],",cN,",mean,na.rm=T)",sep="")))
}
#Reduce
storeOtherProfileMeans<-Reduce("rbind",storeOtherProfileMeans)
storeOtherProfileMeans<-cbind(storeOtherProfileMeans,apply(FilteredProfileData,2,mean,na.rm=T))
rownames(storeOtherProfileMeans)<-paste(OPVarNames," ",OPLabels,sep="")
colnames(storeOtherProfileMeans)<-c(paste("JourneyType_",1:maxCurrSol,sep=""),"Total")
# append the means
BindedTables<-rbind(BindedTables,storeOtherProfileMeans)
BindedTablesStore[[cN]]<-rbind(BindedTablesStore[[cN]],storeOtherProfileMeans)
}
######################################################################################
## If convert to SPSS out is specified, convert the means table to SPSS conformable ##
######################################################################################
if (ConvertOutToSPSS) {
Out<-list()
for (j in 1:dim(BindedTables)[2]) Out[[j]]<-cbind(BindedTables[,j],BaseSizes[[j]],0)
Out<-Reduce("cbind",Out)
rownames(Out)<-rownames(BindedTables)
BlankRow<-rep("",(maxCurrSol+1)*3)
SecondRow<-BlankRow
SecondRow[1]<-cN
ThirdRow<-BlankRow
ThirdRow[seq(1,(maxCurrSol+1)*3,by=3)]<-c(1:maxCurrSol,"Total")
FourthRow<-rep(c("Mean","N","Std. Deviation"),maxCurrSol+1)
StoreMeans<-rbind(BlankRow,SecondRow,ThirdRow,FourthRow,Out)
rownames(StoreMeans)[1:4]<-c("Report","a","b","c")
colnames(StoreMeans)<-BlankRow
SPSSOut[[cN]]<-StoreMeans
}
} # close main cluster loop
############
## Return ##
############
if (!is.null(ExcelOutPath)) {
library(WriteXLS)
for (spj in 1:length(SPSSOut)) eval(parse(text=paste("SPSS",ClusterNames[spj],"<-as.data.frame(SPSSOut[[spj]])",sep="")))
WriteXLS(paste("SPSS",ClusterNames,sep=""),ExcelOutPath,row.names=T,col.names=F)
}
Return<-list()
Return[["MainTable"]]<-BindedTablesStore
if (is.null(CustomCluster)) Return[["Prototypes"]]<-storeProto
Return[["TPLabels"]]<-tpLabels
Return[["Solutions"]]<-storeClusterSolutions
Return[["BaseSizes"]]<-storeBaseSizes
if (ConvertOutToSPSS) Return[["SPSSMainTable"]]<-SPSSOut
if (DFA) {
Return[["DFACoefs"]]<-clusterModelCoefs
Return[["DFAModelAccuracy"]]<-clusterModelAccuracy
Return[["SubSeqsForDFA"]]<-clusterSubSeqs
}
if (ReportBtwWithinPlot) Return[["microClusterPlot"]]<-microClusterPlot
Return[["CompositeProtos"]]<-storeCompositeProtos
Return[["CompositeProtosBaliaviz"]]<-storeCompositeProtosBaliaviz
Return[["CPSDistances"]]<-storeStandCPSDists
if (is.null(CustomCluster)) Return[["ClassAssigned"]]<-storeStandClassAssign
if (is.null(CustomCluster)) Return[["ClassAssignProps"]]<-storeCPSClassAssignProp
Return[["CapSeqLength"]]<-CapSeqLength
Return[["SeqMinLength"]]<-SeqMinLength
Return[["SingleState"]]<-SingleState
return(Return)
}
#' Survival based drivers for sequence data
sequence_driver<-function(SPSSPath,
TPVarNames,
TimeVarNames,
DurationVarName,
#DecayTime=35,
DecayRate=.95,
tpLabels=NULL,
TermEventNumCode=NULL,
JourneySubgroups=NULL, # this should be a specific Solutions slot from the p2p_wrap
ExcelOutPath=NULL
)
{
if (is.null(TermEventNumCode)) stop("Please specify the numeric code for the terminal event")
# decay function
decay_func<-function(timelength,rate) {
vect<-1
for (i in 2:timelength) vect<-c(vect,vect[length(vect)]*rate)
return(vect)
}
library(foreign)
# Data read
suppressWarnings(Data<-read.spss(SPSSPath, use.value.labels = F, to.data.frame = T))
if (is.null(tpLabels)) tpLabels<-names(attr(Data[,TPVarNames][,1],"value.labels"))[order(as.numeric(attr(Data[,TPVarNames][,1],"value.labels")))]
# Remove cases that are classified as having short journey groups
if (!is.null(JourneySubgroups)) {
GroupCodesAtData<-JourneySubgroups[match(Data[,1],JourneySubgroups[,1]),2]
if (any(JourneySubgroups[,2]%in%99)) JourneySubgroups<-JourneySubgroups[-which(JourneySubgroups[,2]%in%99),]
Data<-Data[-which(GroupCodesAtData%in%99 | is.na(GroupCodesAtData)),]
}
Duration<-Data[,DurationVarName]
# Grab decay
Decay<-decay_func(max(Duration),DecayRate)
MaxCode<-max(Data[,TPVarNames],na.rm=T)
MaxTime<-max(Data[,TimeVarNames],na.rm=T)
storeRspMats<-list()
storeSerials<-list()
message("Expanding survival data with decay")
###########################
## Start respondent loop ##
for (rsp in 1:dim(Data)[1]) {
storeSerials[[rsp]]<-Data[rsp,1]
rspTP<-Data[rsp,TPVarNames]
rspTP<-rspTP[!is.na(rspTP)]
rspTime<-Data[rsp,TimeVarNames]
rspTime<-rspTime[!is.na(rspTime)]
Serials<-Data[rsp,1]
if (all(rspTime==0)) rspTime[length(rspTime)]<-MaxTime
# Adjust all the times if the max time is not MaxTime
if (max(rspTime)!=MaxTime) {
adjustment<-MaxTime/max(rspTime)
rspTime<-floor(rspTime*adjustment)
if (max(rspTime)!=MaxTime) rspTime[length(rspTime)]<-MaxTime
}
# Make the time slider into a proportion
rspTime<-rspTime/100
# Initialize time decaying object for each event type
rspMatrix<-matrix(0,nrow=Duration[rsp]+1,ncol=MaxCode)
# Map
Map<-Duration[rsp]*rspTime
# Map floored - to be able to enter into matrix, add 1
MapFloored<-floor(Map)
if (min(MapFloored)==0) MapFloored[MapFloored==0]<-1
# Store events
for (p in 1:length(MapFloored)) {
if (!(rspTP[p]%in%TermEventNumCode)) rspMatrix[MapFloored[p],rspTP[p]]<-rspMatrix[MapFloored[p],rspTP[p]]+1
if (rspTP[p]%in%TermEventNumCode) rspMatrix[dim(rspMatrix)[1],rspTP[p]]<-1
}
# Apply decay to event matrix
for (j in 1:dim(rspMatrix)[2]) {
# check to see if there are any events of this type
AnyNot0<-any(rspMatrix[,j]!=0)
if (AnyNot0) {
WhichNot0<-which(rspMatrix[,j]!=0)
if (!all(WhichNot0%in%dim(rspMatrix)[1])) {
storeEventDecay<-list()
for (wn0 in 1:length(WhichNot0)) {
initMat<-matrix(0,nrow=dim(rspMatrix)[1],ncol=1)
initMat[WhichNot0[wn0]]<-rspMatrix[WhichNot0[wn0],j]
if (WhichNot0[wn0]!=length(initMat)) {
decayloc<-2
for (lp in (WhichNot0[wn0]+1):length(initMat)) {
initMat[lp]<-initMat[lp-1]*Decay[decayloc]
decayloc<-decayloc+1
}
}
#store initMat
storeEventDecay[[wn0]]<-initMat
}
rspMatrix[,j]<-apply(Reduce("cbind",storeEventDecay),1,sum)
} # close if whichnot doesn't equal the length of time
}
} # close for j (the decay)
# Add the duration variable to the matrix
rspMatrix<-cbind(rspMatrix,0:Duration[rsp])
#Store the respondent matrix
storeRspMats[[rsp]]<-rspMatrix
} # close the rsp loop (looping across respondents)
### Reduce the data to a single data.frame
StoreResults<-list()
message("Running total level survival impact")
survivalData<-as.data.frame(Reduce("rbind",storeRspMats))
whichLowVar<-which(apply(survivalData,2,var,na.rm=T)<.009)
if (length(whichLowVar)>0) survivalData[,whichLowVar]<-0
colnames(survivalData)<-c(paste("JourneyType_",1:MaxCode,sep=""),"Duration")
survivalVars<-colnames(survivalData)
VarsShouldModel<-colnames(survivalData)
VarsShouldModel<-VarsShouldModel[-which(VarsShouldModel%in%c("Duration",paste("JourneyType_",TermEventNumCode,sep="")))]
eval(parse(text=paste("Model<-glm(",survivalVars[length(survivalVars)-1],"~.,survivalData,family='binomial')",sep="")))
tempResults<-coef(summary(Model))[,c(1,4)]
tempResults<-tempResults[match(VarsShouldModel,rownames(tempResults)),]
tempResults<-cbind(tempResults[,1],abs(tempResults[,1]),tempResults[,2],round(100*abs(tempResults[,1])/mean(abs(tempResults[,1]),na.rm=T),digits=0))
colnames(tempResults)<-paste(paste("Gr_Total",sep=""),c("Coef","absCoef","sig","Indexed"),sep="")
rownames(tempResults)<-tpLabels[-TermEventNumCode]
StoreResults[["TotalImpact"]]<-tempResults
if (!is.null(JourneySubgroups)) {
message("Running journey/sub group level impact")
JourneySubgroups<-as.data.frame(JourneySubgroups)
HowManyGroups<-max(JourneySubgroups$currSol)
GroupCodesAtData<-JourneySubgroups[match(Data[,1],JourneySubgroups[,1]),2]
for (gr in 1:HowManyGroups) {
message(paste("Group ", gr, " of ",HowManyGroups, sep=""))
gr_filter<-which(GroupCodesAtData%in%gr)
survivalData<-as.data.frame(Reduce("rbind",storeRspMats[gr_filter]))
colnames(survivalData)<-c(paste("JourneyType_",1:MaxCode,sep=""),"Duration")
whichLowVar<-which(apply(survivalData,2,var,na.rm=T)<.009)
if (length(whichLowVar)>0) survivalData[,whichLowVar]<-0
survivalVars<-colnames(survivalData)
eval(parse(text=paste("Model<-glm(",survivalVars[length(survivalVars)-1],"~.,survivalData,family='binomial')",sep="")))
tempResults<-coef(summary(Model))[,c(1,4)]
tempResults<-tempResults[match(VarsShouldModel,rownames(tempResults)),]
tempResults<-cbind(tempResults[,1],abs(tempResults[,1]),tempResults[,2],round(100*abs(tempResults[,1])/mean(abs(tempResults[,1]),na.rm=T),digits=0))
colnames(tempResults)<-paste(paste("Gr_",gr,sep=""),c("Coef","absCoef","sig","Indexed"),sep="")
rownames(tempResults)<-tpLabels[-TermEventNumCode]
StoreResults[[paste("JGImpact",gr,sep="")]]<-tempResults
}
}
Return<-as.data.frame(Reduce("cbind",StoreResults))
#colnames(Return)<-matrix(matrix(c(paste(colnames(Return)[1],names(StoreResults),sep="_"),paste(colnames(Return)[2],names(StoreResults),sep="_")),nrow=2,ncol=length(StoreResults),byrow=T))
if (!is.null(ExcelOutPath)) {
library(WriteXLS)
WriteXLS("Return",ExcelOutPath,row.names=T,col.names=T)
}
return(Return)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.