R/coe_gam.R

coe_gam <-
function(data, dist=3, trans="log", output=TRUE, intg.length=0,
                    func.name="double.weibull.b", xlab="Length", ylab="Integral",
                    file.folder=getwd(), file.name, auto.smooth=TRUE){
  
#   data <- coe_lm_FEB_5KS30_ls_1_360$nls
#   dist=3
#   trans="log"
#   output=F
#   intg.length=0
#   func.name="double.weibull.b"
#   xlab="Length"
#   ylab="Integral"
  
  num_para <- length(unique(data$para))
  
  num <- nrow(data)/num_para
  
  if (trans=="log"){
    data$estimates <- log(data$estimates)
  } else if (trans=="log10") {
    data$estimates <- log10(data$estimates) 
  } else if (trans=="sqrt") {
    data$estimates <- sqrt(data$estimates)
  } else if (!missing(trans)){
    writeLines("The supplied type of transformation is probably not supported. Thus no transformation is done.")
  }
  
  data$gam_o <- c()
  data$gam_r <- c()
  data$gam_r_se <- c()
  outliers <- c()
  
  op <- par(no.readonly = TRUE)
  par(mfrow=c(2,num_para))
  
  for (i in 1:num_para){
    #i=1
    para_data <- data[(1+(i-1)*num):(i*num),]
    
    para_gam_1<- gam(estimates~s(length), data=para_data)
    para_pred_se <- predict(para_gam_1,para_data, se.fit=TRUE)
    
    plot(para_data$length, para_data$estimates,
         xlab=xlab, ylab="Parameter Value",
         main=paste("Original GAM fit of ", data$para[1+(i-1)*num], sep=""))
    lines(para_data$length, para_pred_se$fit, col=2)
    lines(para_data$length, para_pred_se$fit+para_pred_se$se, col=3)
    lines(para_data$length, para_pred_se$fit-para_pred_se$se, col=3)
    lines(para_data$length, para_pred_se$fit+dist*(para_pred_se$se+sqrt(para_gam_1$sig2)), col=4)
    lines(para_data$length, para_pred_se$fit-dist*(para_pred_se$se+sqrt(para_gam_1$sig2)), col=4)
    
    para_data_outer <- para_data[(para_data$estimates>(para_pred_se$fit+
      dist*(para_pred_se$se+sqrt(para_gam_1$sig2))) |
      para_data$estimates<(para_pred_se$fit-dist*(para_pred_se$se+sqrt(para_gam_1$sig2)))),]
    para_data_re <- para_data[(para_data$estimates<=(para_pred_se$fit+
      dist*(para_pred_se$se+sqrt(para_gam_1$sig2))) &
      para_data$estimates>=(para_pred_se$fit-dist*(para_pred_se$se+sqrt(para_gam_1$sig2)))),]
    
    para_gam_2 <- gam(estimates~s(length), data=para_data_re)
    para_pred_se_re <- predict(para_gam_2,para_data, se.fit=TRUE)
    
    plot(para_data_re$length, para_data_re$estimates, 
         xlab=xlab, ylab="Parameter Value",
         main=paste("Refit after outlier-out of ", data$para[1+(i-1)*num], sep=""))
    lines(para_data$length, para_pred_se_re$fit, col=2)
    lines(para_data$length, para_pred_se_re$fit+para_pred_se_re$se, col=3)
    lines(para_data$length, para_pred_se_re$fit-para_pred_se_re$se, col=3)
    lines(para_data$length, para_pred_se_re$fit+dist*(para_pred_se_re$se+sqrt(para_gam_1$sig2)), col=4)
    lines(para_data$length, para_pred_se_re$fit-dist*(para_pred_se_re$se+sqrt(para_gam_1$sig2)), col=4)
    
    data$gam_o[(1+(i-1)*num):(i*num)] <- para_pred_se$fit
    data$gam_r[(1+(i-1)*num):(i*num)] <- para_pred_se_re$fit
    data$gam_r_se[(1+(i-1)*num):(i*num)] <- para_pred_se_re$se.fit
    outliers <- rbind(outliers, para_data_outer[,c(1:3)])
  }
  
  par(op)
  
  if (trans=="log"){
    data$estimates <- exp(data$estimates)
    data$gam_o <- exp(data$gam_o)
    data$gam_r <- exp(data$gam_r)
  } else if (trans=="log10"){
    data$estimates <- 10^(data$estimates)
    data$gam_o <- 10^(data$gam_o)
    data$gam_r <- 10^(data$gam_r)
  } else if (trans=="sqrt"){
    data$estimates <- (data$estimates)^2
    data$gam_o <- (data$gam_o)^2
    data$gam_r <- (data$gam_r)^2
  }
  
  true_gam_re_intg <- data.frame(length=data$length[1:num],
                                 true_intg=c(NA), gam_o_intg=c(NA),
                                 gam_re_intg=c(NA))
  
  if (func.name=="double.weibull.b"){
    for (i in 1:num){
      len<-true_gam_re_intg$length[i]
      
      t <- 1:len
      true_gam_re_intg$true_intg[i]<-
        sum(double.weibull.b(t, a1=data$estimates[i], 
                             b1=data$estimates[i+num], 
                             a2=data$estimates[i+2*num], 
                             b2=data$estimates[i+3*num], 
                             duration=len))
      
      true_gam_re_intg$gam_o_intg[i] <-
        sum(double.weibull.b(t, a1=data$gam_o[i], 
                             b1=data$gam_o[i+num], 
                             a2=data$gam_o[i+2*num], 
                             b2=data$gam_o[i+3*num], 
                             duration=len))
      
      true_gam_re_intg$gam_re_intg[i] <-
        sum(double.weibull.b(t, a1=data$gam_r[i], 
                     b1=data$gam_r[i+num], 
                     a2=data$gam_r[i+2*num], 
                     b2=data$gam_r[i+3*num], 
                     duration=len)) 
    }
  } else if (func.name=="weibull"){
    
    if (intg.length==0){
      cat("Integration is done by dynamic length.\n")
    } else {
      cat("integration is done by fixed length of", intg.length, ".\n")
    }
    
    for (i in 1:num){
      if (intg.length==0){
        len<-true_gam_re_intg$length[i]
        t <- 1:len
      } else {
        t <- 1:intg.length
      }
      true_gam_re_intg$true_intg[i]<-
        sum(weibull(t, a=data$estimates[i], 
                    b=data$estimates[i+num]))
      
      true_gam_re_intg$gam_o_intg[i] <-
        sum(weibull(t, a=data$gam_o[i], 
                    b=data$gam_o[i+num]))
      
      true_gam_re_intg$gam_re_intg[i] <-
        sum(weibull(t, a=data$gam_r[i], 
                    b=data$gam_r[i+num])) 
    }
  } else if (func.name=="weib2"){
    
    if (intg.length==0){
      cat("Integration is done by dynamic length.\n")
    } else {
      cat("integration is done by fixed length of", intg.length, ".\n")
    }
    
    for (i in 1:num){
      if (intg.length==0){
        len<-true_gam_re_intg$length[i]
        t <- 1:len
      } else {
        t <- 1:intg.length
      }
      true_gam_re_intg$true_intg[i]<-
        sum(weib2(t, a=data$estimates[i], 
                    b=data$estimates[i+num]))
      
      true_gam_re_intg$gam_o_intg[i] <-
        sum(weib2(t, a=data$gam_o[i], 
                    b=data$gam_o[i+num]))
      
      true_gam_re_intg$gam_re_intg[i] <-
        sum(weib2(t, a=data$gam_r[i], 
                    b=data$gam_r[i+num])) 
    }
  } else if (func.name=="weib.line") {
    
    if (intg.length==0){
      cat("Integration is done by dynamic length.\n")
    } else {
      cat("integration is done by fixed length of", intg.length, ".\n")
    }
    
    for (i in 1:num){
      if (intg.length==0){
        len<-true_gam_re_intg$length[i]
        t <- 1:len
      } else {
        t <- 1:intg.length
      }
      true_gam_re_intg$true_intg[i]<-
        sum(weib.line(t, a=data$estimates[i], 
                    b=data$estimates[i+num],
                      beta=data$gam_r[i+2*num]))
      
      true_gam_re_intg$gam_o_intg[i] <-
        sum(weib.line(t, a=data$gam_o[i], 
                    b=data$gam_o[i+num],
                      beta=data$gam_r[i+2*num]))
      
      true_gam_re_intg$gam_re_intg[i] <-
        sum(weib.line(t, a=data$gam_r[i], 
                    b=data$gam_r[i+num],
                      beta=data$gam_r[i+2*num])) 
    }
  } else if (func.name=="weib.line2") {
    
    if (intg.length==0){
      cat("Integration is done by dynamic length.\n")
    } else {
      cat("integration is done by fixed length of", intg.length, ".\n")
    }
    
    for (i in 1:num){
      if (intg.length==0){
        len<-true_gam_re_intg$length[i]
        t <- 1:len
      } else {
        t <- 1:intg.length
      }
      true_gam_re_intg$true_intg[i]<-
        sum(weib.line2(t, a=data$estimates[i], 
                      b=data$estimates[i+num],
                      beta=data$gam_r[i+2*num]))
      
      true_gam_re_intg$gam_o_intg[i] <-
        sum(weib.line2(t, a=data$gam_o[i], 
                      b=data$gam_o[i+num],
                      beta=data$gam_r[i+2*num]))
      
      true_gam_re_intg$gam_re_intg[i] <-
        sum(weib.line2(t, a=data$gam_r[i], 
                      b=data$gam_r[i+num],
                      beta=data$gam_r[i+2*num])) 
    }
  } else {
    stop("Not supported survival model function")
  }
  
  # true_gam_re_intg[1:100,]
  intg_lines_melt <- data.frame(Length=rep(true_gam_re_intg$length, 2),
                                Type=rep(c("Original_Fit", "Re_fit"), each=num),
                                Integral=c(true_gam_re_intg$gam_o_intg, true_gam_re_intg$gam_re_intg))
  if (auto.smooth) {
    integer_plot <- 
      ggplot(data=true_gam_re_intg, aes(x=length, y=true_intg))+
      geom_point()+
      stat_smooth()+
      geom_line(data=intg_lines_melt, aes(x=Length, y=Integral, color=Type), size=1)+
      ylab(ylab)+xlab(xlab)
  } else {
    integer_plot <- 
      ggplot(data=true_gam_re_intg, aes(x=length, y=true_intg))+
      geom_point()+
      geom_line(data=intg_lines_melt, aes(x=Length, y=Integral, color=Type), size=1)+
      ylab(ylab)+xlab(xlab)
  }
  
  print(integer_plot)
  
  if (output){
    write.csv(true_gam_re_intg, 
              file=paste(file.folder, "/", file.name, "_intg.csv", sep=""), 
              row.names=F, na="")
    write.csv(data, 
              file=paste(file.folder, "/", file.name, ".csv", sep=""),
              row.names=F, na="")
  }
  
  return(list(coe_smooth=data, integer_fit=true_gam_re_intg, 
              integer_plot=integer_plot))
}
fzwaeustc/pcrfn documentation built on May 16, 2019, 4:06 p.m.