#' @title explore formula for probability and total points formula
#' @description explore the probability formula to total points and get the best power.
#'
#' @param nomogram nomogram after nomogram command in rms package
#' @param power if missing, power will be choose automatically up to 100 based on all R2 equealling to 1
#' @param digits default is 6
#' @importFrom utils sessionInfo
#' @importFrom stats as.formula lm predict t.test
#' @importFrom graphics legend par plot points
#' @return a global variable Formula_probability, the formula of probability and total points
#' @export
#'
#' @examples
#' \donttest{
#' library(rms)
#' set.seed(2018)
#' n <-2019
#' age <- rnorm(n,60,20)
#' sex <- factor(sample(c('female','male'),n,TRUE))
#' sex <- as.numeric(sex)
#' weight <- sample(50:100,n,replace = TRUE)
#' time <- sample(50:800,n,replace = TRUE)
#' units(time)="day"
#' death <- sample(c(1,0,0),n,replace = TRUE)
#' df <- data.frame(time,death,age,sex,weight)
#' ddist <- datadist(df)
#' options(datadist='ddist')
#' f <- cph(formula(Surv(time,death)~sex+age+weight),data=df,
#' x=TRUE,y=TRUE,surv=TRUE,time.inc=3)
#' surv <- Survival(f)
#' nomo <- nomogram(f,
#' lp=TRUE,
#' fun=list(function(x) surv(365,x),
#' function(x) surv(365*2,x)),
#' funlabel=c("1-Year Survival Prob",
#' "2-Year Survival Prob"))
#' library(nomogramFormula)
#' formula_probability(nomogram = nomo)
#' formula_probability(nomogram = nomo,power = 2)
#' formula_probability(nomogram = nomo,power = 3)
#' }
formula_probability <- function(nomogram,power,digits=6){
options(digits=digits)
#get probability part
if ("lp" %in% names(nomogram)){
lp_location=("lp" %==% names(nomogram))
prob_part=nomogram[(lp_location+1):length(nomogram)]
}else if("total.points" %in% names(nomogram)){
ttp_location=("total.points" %==% names(nomogram))
prob_part=nomogram[(ttp_location+1):length(nomogram)]
}
ChiCheck=any(grepl("Chinese",sessionInfo()))
id = 0
cat("\n")
if (ChiCheck) cat(crayon::red$bold(tmcn::toUTF8("\u62DF\u5408\u65B9\u7A0B: \u6839\u636E\u603B\u5F97\u5206\u8BA1\u7B97\u6982\u7387")))
if (!ChiCheck) cat(crayon::red$bold("Formula: caculate probability based on total points"))
cat("\n")
if (missing(power)){
#missing power : choose power automatically
power = 0
test=data.frame(R2=0.5)
while (any(test$R2<1)) {
if (power==100) {
if (ChiCheck) stop(tmcn::toUTF8("power\u503C\u5DF2\u8FBE\u5230\u6781\u9650100,\u8BF7\u624B\u52A8\u8BBE\u7F6Epower\u503C"))
if (!ChiCheck) stop("power gets limit 100, R2 still lower1, please choose power by hand.")
}
power=power+1
for (i in 1:length(prob_part)) {
if (i==1){
nomo.reslut=data.frame()
test=data.frame()
real_fit_list=list()
}
######get each 3 variables
nomo.i=prob_part[i]
var.i=names(nomo.i)
#change name of nomo.i to a, to get list points
names(nomo.i)="a"
points.i=nomo.i$a$x
names(points.i)=NULL
value.i=nomo.i$a$x.real
######caculate
formu=as.formula(paste0('value.i~',inner_Add_Symbol(paste0("I(points.i^",1:power,")"))))
reg=lm(formu)
fit.i=predict(reg)
value.i=value.i
diff=round(value.i-fit.i,6)
real_fit=t(data.frame(nomogram=value.i,fit=fit.i,diff))
colnames(real_fit)=round(points.i,6)
real_fit.i=list(real_fit)
names(real_fit.i)=var.i
real_fit_list=c(real_fit_list,real_fit.i)
R2=suppressWarnings(summary(reg)$r.squared)
RMSE=(mean(predict(reg)-value.i)^2)^(1/2)
test.i=data.frame(R2,RMSE)
test=rbind(test,test.i)
coef=reg$coefficients
lm.result=data.frame(t(coef))
rownames(lm.result)=var.i
colnames(lm.result)=c("b0",paste0("x^",1:power))
nomo.reslut=rbind(nomo.reslut,lm.result)
}
}
cat("\n")
id = id +1
if (ChiCheck)
cat(crayon::black$bgCyan(" "),crayon::red$bold(paste0(id,
tmcn::toUTF8(". power\u503C:"))),power,"\n")
if (!ChiCheck)
cat(crayon::black$bgCyan(" "),crayon::red$bold(paste0(id,
". power chooses:")),power,"\n")
}else{
#exist power
if (power<1) stop("power must not be less 1")
for (i in 1:length(prob_part)) {
if (i==1){
nomo.reslut=data.frame()
test=data.frame()
real_fit_list=list()
}
######get each 3 variables
nomo.i=prob_part[i]
var.i=names(nomo.i)
#change name of nomo.i to a, to get list points
names(nomo.i)="a"
points.i=nomo.i$a$x
names(points.i)=NULL
value.i=nomo.i$a$x.real
######caculate
formu=as.formula(paste0('value.i~',inner_Add_Symbol(paste0("I(points.i^",1:power,")"))))
reg=lm(formu)
fit.i=predict(reg)
value.i=value.i
diff=round(value.i-fit.i,6)
real_fit=t(data.frame(nomogram=value.i,
fit=fit.i,diff))
colnames(real_fit)=round(points.i,6)
real_fit.i=list(real_fit)
names(real_fit.i)=var.i
real_fit_list=c(real_fit_list,real_fit.i)
R2=suppressWarnings(summary(reg)$r.squared)
RMSE=(mean(predict(reg)-value.i)^2)^(1/2)
test.i=data.frame(R2,RMSE)
test=rbind(test,test.i)
coef=reg$coefficients
lm.result=data.frame(t(coef))
rownames(lm.result)=var.i
colnames(lm.result)=c("b0",paste0("x^",1:power))
nomo.reslut=rbind(nomo.reslut,lm.result)
}
}
rownames(test)=rownames(nomo.reslut)
if (ChiCheck){
#formula
Formula_probability<<-nomo.reslut
id = id + 1
cat("\n")
cat(crayon::black$bgCyan(" "),crayon::red$bold(paste0(id,'.'),
tmcn::toUTF8("\u5F97\u5230\u7684\u65B9\u7A0B")),"\n")
for (k in 1:nrow(nomo.reslut)) {
predic.i=nomo.reslut[k,]
cat(crayon::black$bgWhite(rownames(predic.i)),"\n")
rownames(predic.i)="total points"
predic.i = predic.i[,!is.na(predic.i)]
print(predic.i)
}
#r2 test
id = id +1
cat("\n")
cat(crayon::black$bgCyan(" "),crayon::red$bold(paste0(id,'.'),
tmcn::toUTF8("\u62DF\u5408\u65B9\u7A0B\u7684R\u65B9\u548CRMSE")),"\n")
print(test)
#diff
if (length(diff)>50){
id = id +1
cat("\n")
cat(crayon::black$bgCyan(" "),
crayon::red$bold(paste0(id,'.'),
tmcn::toUTF8("\u753B\u56FE")),"\n")
par(mfrow=c(length(real_fit_list),2))
for (i in 1:length(real_fit_list)){
predic.i=real_fit_list[i]
#cat(crayon::black$bgWhite(names(predic.i)),"\n")
#print(predic.i[[1]])
length.check=length(predic.i[[1]]["nomogram",])
cex.points=ifelse(length.check<100,1,
ifelse(length.check<500,0.5,0.3))
plot(x=colnames(predic.i[[1]]),
y=predic.i[[1]]["nomogram",],
main = paste0(names(predic.i),"\n",
"probability of nomogram and fit"),
xlab = "points",
ylab="probability",
pch=16,cex=cex.points)
points(x = colnames(predic.i[[1]]),
y=predic.i[[1]]["fit",],
pch=16,cex=cex.points,col="red")
legend("top",legend = c("nomogram","fit"),
col=c("black","red"),pch=16,
box.col = "transparent",bg="transparent")
plot(x = colnames(predic.i[[1]]),
y=predic.i[[1]]["diff",],
main = paste0(names(predic.i)," probability difference\nbebween nomogram and fit"),
xlab = "points",
ylab="P.nomogram - P.fit",
pch=16,cex=cex.points)
}
}else{
id = id +1
cat("\n")
cat(crayon::black$bgCyan(" "),
crayon::red$bold(paste0(id,'.'),
tmcn::toUTF8("\u6BD4\u8F83nomogram\u7684\u6982\u7387\u548C\u62DF\u5408\u6982\u7387")),"\n")
for (i in 1:length(real_fit_list)){
predic.i=real_fit_list[i]
cat(crayon::black$bgWhite(names(predic.i)),"\n")
print(predic.i[[1]])
}
}
}else{
#formula
id = id + 1
Formula_probability<<-nomo.reslut
cat("\n")
cat(crayon::black$bgCyan(" "),crayon::red$bold(paste0(id,'. Formula')),"\n")
for (k in 1:nrow(nomo.reslut)) {
predic.i=nomo.reslut[k,]
cat(crayon::black$bgWhite(rownames(predic.i)),"\n")
rownames(predic.i)="total points"
predic.i = predic.i[,!is.na(predic.i)]
print(predic.i)
}
#r2 test
id = id +1
cat("\n")
cat(crayon::black$bgCyan(" "),crayon::red$bold(paste0(id,
'. R2 and RMSE for Formula')),"\n")
print(test)
#diff
if (length(diff)>50){
id = id +1
cat("\n")
cat(crayon::black$bgCyan(" "),
crayon::red$bold(paste0(id,'. plot')),"\n")
par(mfrow=c(length(real_fit_list),2))
for (i in 1:length(real_fit_list)){
predic.i=real_fit_list[i]
#cat(crayon::black$bgWhite(names(predic.i)),"\n")
#print(predic.i[[1]])
length.check=length(predic.i[[1]]["nomogram",])
cex.points=ifelse(length.check<100,1,
ifelse(length.check<500,0.5,0.3))
plot(x=colnames(predic.i[[1]]),
y=predic.i[[1]]["nomogram",],
main = paste0(names(predic.i),"\n",
"probability of nomogram and fit"),
xlab = "points",
ylab="probability",
pch=16,cex=cex.points)
points(x = colnames(predic.i[[1]]),
y=predic.i[[1]]["fit",],
pch=16,cex=cex.points,col="red")
legend("top",legend = c("nomogram","fit"),
col=c("black","red"),pch=16,
box.col = "transparent",bg="transparent")
plot(x = colnames(predic.i[[1]]),
y=predic.i[[1]]["diff",],
main = paste0(names(predic.i)," probability difference\nbebween nomogram and fit"),
xlab = "points",
ylab="P.nomogram - P.fit",
pch=16,cex=cex.points)
}
}else{
id = id +1
cat("\n")
cat(crayon::black$bgCyan(" "),
crayon::red$bold(paste0(id,'. difference between nomogram and fit probability ')),"\n")
for (i in 1:length(real_fit_list)){
predic.i=real_fit_list[i]
cat(crayon::black$bgWhite(names(predic.i)),"\n")
print(predic.i[[1]])
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.