## getPercentilePlottingPositions.r file
##
## Author: David J. Silkworth
## (c)2014-2018 OpenReliability.org
##
getPercentilePlottingPositions<-function(x, s=NULL, interval=NULL, ppos="beta", aranks="Johnson", ties="none") {
if(is.vector(x)) {
F<-length(x)
N<-F+length(s)
## create the event vector
if(!missing(s)) {
## if(length(s)>0) {
## suspension data has been provided
data<-c(x,s)
event<-c(rep(1,F),rep(0,N-F))
prep_df<-data.frame(time=data,event=event)
## now sort the dataframe on data values
NDX<-order(prep_df[,1])
prep_df<-prep_df[NDX,]
}else{
## this is simply a complete failure set
data<-sort(x)
event<-rep(1,F)
prep_df<-data.frame(time=data,event=event)
}
}else{
if(is(x, "data.frame")) {
## this test is drawn from Abrem.R
if(is.null(x$time) || is.null(x$event)){
stop(': Argument \"x\" is missing $time and/or ",
"$event columns...')
}
## verify positive time values
if (anyNA(x$time)) {
stop("NA in failure or suspension data")
}
if (any(x$time<= 0)) {
stop("non-positive values in failure or suspension data")
}
## verify 1's and 0's only in event
## using Jurgen's validation code
ev_info <- levels(factor(x$event))
if(identical(ev_info,c("0","1")) || identical(ev_info,"1")){
# okay x is holding event indicators
}else{
stop("event column not '1' or '0' ")
}
if(length(s)>0) {
warning("argument 's' ignored when time-event dataframe provided")
}
failures<-x[x$event==1,]
if(is.null(x$qty)) {
qty<-rep(1,nrow(failures))
failures<-cbind(failures,qty)
}else{
# if(any(!is.integer(x$qty))) stop("non-integers in input object qty column")
# is.integer gives unexpected results
## Just make sure that the qty column contains integers
x$qty<-ceiling(x$qty)
}
F<-sum(failures$qty)
suspensions<-NULL
if(any("0" %in% ev_info)) {
suspensions<-x[x$event==0,]
if(is.null(x$qty)) {
qty<-rep(1,nrow(suspensions))
suspensions <-cbind(suspensions,qty)
}
N<-F+sum(suspensions$qty)
}else{
N<-F
}
prep_df<-rbind(failures,suspensions)
## now sort the dataframe on data values
NDX<-order(prep_df[,1])
prep_df<-prep_df[NDX,]
## expand the qty values, if present
if(!is.null(x$qty)) {
exp_df<-data.frame(time=rep(prep_df$time[1],prep_df$qty[1]), event=rep(prep_df$event[1],prep_df$qty[1]))
for(da_line in 2:nrow(prep_df)) {
this_df<-data.frame(time=rep(prep_df$time[da_line],prep_df$qty[da_line]), event=rep(prep_df$event[da_line],prep_df$qty[da_line]))
exp_df<-rbind(exp_df,this_df)
}
prep_df<-exp_df
}else{
prep_df<-prep_df[,-3]
}
}else{
stop("error in x argument type")
}
}
if(aranks=="Johnson") {
## adjust ranks using Leonard Johnson's method
# adj_rank<-.Call("adjustedRank", prep_df$event, package="WeibullR")
adj_rank<-.Call(adjustedRank, prep_df$event)
## eliminate suspensions and the event column
ftime<-prep_df$time[prep_df$event==1]
## combine ftime and adj_rank as columns in re-defined prep_df
prep_df<-data.frame(time=ftime, adj_rank=adj_rank)
## following code has been replaced by code above due to severe bottleneck with large data sets.
# ## start with extra element to reference zero as previous rank to first
# adj_rank<-0
# for(k in 1:N) {
# rr=N-k+1
# if(prep_df$event[k]>0) {
# this_rank<-(rr*adj_rank[k]+N+1)/(rr+1)
# adj_rank<-c(adj_rank, this_rank)
# }else{
# adj_rank<-c(adj_rank,adj_rank[k])
# }
# }
# prep_df<-cbind(prep_df,adj_rank=adj_rank[-1])
# ## now eliminate the suspension data
# prep_df<-prep_df[sapply(prep_df$event, function(x) x>0),c(1,3)]
}else{
if(aranks=="KMestimator") {
## adjust ranks using David Silkworth's adaptation of the modified
## Kaplan-Meier estimator used by Minitab as "nonparametric"
## start with extra element to reference zero as previous rank to first
adj_rank<-0
for(k in 1:N) {
if(prep_df$event[k]>0) {
this_rank<-1-((1-adj_rank[k])*(N-k)/(N-k+1))
adj_rank<-c(adj_rank, this_rank)
}else{
adj_rank<-c(adj_rank,adj_rank[k])
}
}
prep_df<-cbind(prep_df,adj_rank=adj_rank[-1])
## now eliminate the suspension data
prep_df<-prep_df[sapply(prep_df$event, function(x) x>0),c(1,3)]
## Now provide a modification for the final element if it was a failure
## This adjustment is used by Minitab
if(prep_df$adj_rank[F]==1) {
prep_df$adj_rank[F]=1-((1-prep_df$adj_rank[F-1])*1/10)
}
## Finally reverse the Kaplan-Meier plotting position to reveal usable
## adj_rank for any plotting position method.
prep_df$adj_rank<-prep_df$adj_rank*N
}else{
stop("aranks argument not recognized")
}
}
## now to handle ties, if called for
if(ties!="none") {
## if(length(ties)>0) {
test_hi<-prep_df$time-c(prep_df$time[-1],0)
test_lo<-prep_df$time-c(1,prep_df$time[1:(F-1)])
highest<-prep_df[sapply(test_hi, function(y) y!=0),]
lowest<-prep_df[sapply(test_lo, function(y) y!=0),]
if(ties=="highest") {
prep_df<-highest
}else{
if(ties=="lowest") {
prep_df<-lowest
}else{
if(ties=="mean") {
prep_df<-data.frame(time=highest$time,adj_rank=(highest$adj_rank+lowest$adj_rank)/2)
}else{
if(ties=="sequential") {
seq_adj<-cumsum(highest$adj_rank-lowest$adj_rank)
prep_df<-data.frame(time=highest$time, adj_rank=highest$adj_rank-seq_adj)
}else{
stop("ties argument not recognized")
}
}
}
}
}
## special note: the number of data entries N remains as originally identified
## it CANNOT be changed due to removal of suspensions or ties
## prep_df now contains the data to be plotted with their final adjusted ranks
## finally we get the probability plotting positions
if(ppos=="Benard"||ppos=="beta"||ppos=="mean") {
if(ppos=="Benard") {
ppp<-(prep_df$adj_rank-0.3)/(N+0.4)
}else{
## the incomplete beta function is what
## Benard was approximating
## we can simply use this directly
if(ppos=="beta") {
ppp<-qbeta(0.5, prep_df$adj_rank,N-prep_df$adj_rank+1)
}else{
if(ppos=="mean") {
## mean ranks (aka Herd-Johnson) give even spacing
## mean ranks were originally proposed by Weibull, but later
## he agreed that the Benard approximation gave
## more pleasing results when used with fatigue failure data
ppp<-prep_df$adj_rank/(N+1)
}
}
}
}else{
if(ppos=="Hazen"||ppos=="Kaplan-Meier"||ppos=="Blom") {
if(ppos=="Hazen") {
## aka modified Kaplan-Meier
ppp<-(prep_df$adj_rank-0.5)/N
}else{
if(ppos=="Kaplan-Meier") {
## This is method used by SuperSMITH with Johnson aranks
ppp<-(prep_df$adj_rank)/N
K<-length(prep_df[,1])
if(prep_df$adj_rank[K]==N) {
ppp[K]<-K/(N+0.001)
}
}else{
if(ppos=="Blom") {
ppp<-(prep_df$adj_rank-0.375)/(N+0.25)
}
}
}
}else{
stop("ppos argument not recognized")
}
}
outDF<-cbind(prep_df$time,data.frame(ppp),prep_df$adj_rank)
colnames(outDF)<-c("time","ppp","adj_rank")
return(outDF)
}
## assign the alias
getPPP<-getPercentilePlottingPositions
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.