pcrfn: Peng's Collection of R Functions at Nielsen

coe_lm <-
function(mat, weights, picked, func, xlabel,
                   para.initials=list(a1 = 0.03, b1 = 1, a2=0.3, b2=0.6)){
  #1.Be care the weights, it should have the length and exactly matches 
  #  the order of all lengthes in the input matrix, or weights will be assigned
  #  incorectly;
  #2.The adjust should be equal to the start length -1. If the start length in
  #  the input matrix is 60, adjust=59. If 1, adjust=0;
  #   
  #   mat=FEB_5KS30_ls_1_360_3D
  #   picked <- c(60, 100, 155, 215, 250, 300, 360, 367)
  #   func <- weibull
  #   para.initials <- list(a = 0.03, b = 1)
  
  nls_fit_1 <- nls_1d_fit(mat=mat, picked=picked, func=func,
                          para.initials=para.initials)
  
  #nls_fit_1$effective_picked: all the cases in both picked and the dataframe range;
  #eft_len: all the above length - the problematic ones;
  eft_len <- length(nls_fit_1$effective_picked)-length(nls_fit_1$problematic_length)
  if (eft_len<=0)
    stop("Probably all picked cases have numerical problems in nls() fitting")
  
  #since in nls_1d_fit you can only store the fit_coe when there is no numerical
  #problem, there seem to be no way to avoid this;
  #eft_picked: those don't have numerical problems;
  eft_picked <- nls_fit_1$effective_picked[! nls_fit_1$effective_picked %in% nls_fit_1$problematic_length]
  
  coe_result <- data.frame(length=c(eft_picked, picked[! picked %in% eft_picked]), 
                           matrix(NA, ncol=length(para.initials), 
                                  nrow=length(picked)))
  names(coe_result)<- c("length", names(para.initials))
  
  #Store the fitting result into the coe_result
  for (i in 1:eft_len)
    coe_result[i,(1:length(para.initials)+1)] <- nls_fit_1[[2]][[i]][,1]
  
  coe_result <- coe_result[order(coe_result$length),]
  #melt the coe_result for ggplot2 ploting
  coe_result_melt <- melt(data=coe_result, id="length", variable="para",
                          value.name="estimates")
  
  if (!missing(weights))
    weights <- weights[eft_picked-adjust]
  
  #store the coe of the linear fitting of the 4 coe
  coe_coe <- list()
  #Store the fitted value of the lines for the 4 coe
  coe_fit <- rep(NA, nrow(coe_result_melt))
  
  for (i in 1:length(para.initials)){
    if (missing(weights)){
      fit <- eval(substitute(lm(para ~ length, data=coe_result),  
                             list(para=as.name(names(coe_result)[i+1]))))
    } else {
      fit <- eval(substitute(lm(para ~ length, data=coe_result, weights=weights),  
                             list(para=as.name(names(coe_result)[i+1]))))
    }
    
    coe_fit[((i-1)*nrow(coe_result)+1):(i*nrow(coe_result))]<-
      predict(fit, list(length = coe_result[,1]))
    
    eval(substitute(coe_coe$para <- summary(fit)$coe, 
                    list(para=as.name(names(coe_result)[i+1]))))
  }
  
  all_para=c()
  for (j in 1:length(para.initials)){
    all_para<-c(all_para, coe_coe[[j]][,1])
  }
  
  names(all_para)<-paste(rep(names(para.initials), each=2), 
                         sep="_", c("intercept", "slope"))
  
  coe_result_melt$fitted <- coe_fit
  
  #You already have this after melt
  #coe_result_melt$para <- rep(c("a1", "b1", "a2", "b2"), each=nrow(coe_result))
  
  coe_result_melt$residuals <- coe_result_melt$estimates-coe_result_melt$fitted
  
  if (tolower(xlabel)=="qh"){
    len_breaks <- seq(floor(min(picked)/4)*4, max(picked), by=4)
    len_labels <- paste(len_breaks/4, "h", sep="")
    #     len_breaks <- picked
    #     len_labels <- as.character(picked)
    xlab=c("QH in Daytime")
  } else {
    len_breaks <- seq(floor(min(eft_picked)/30)*30, max(eft_picked), by=30)
    len_labels <- paste(len_breaks/60, "h", sep="")
    xlab=c("length")
  }
  
  func1_melt <- subset(coe_result_melt, para %in% names(para.initials)[1:(length(para.initials)/2)])
  func2_melt <- subset(coe_result_melt, para %in% names(para.initials)[(floor(length(para.initials)/2)+1):length(para.initials)])
  
  coe_result_plot_func1 <- 
    ggplot(data=func1_melt)+
    facet_grid(para~., scales="free_y")+
    geom_point(aes(x=length, y=estimates))+
    geom_line(aes(x=length, y=fitted), size=2, color="red")+
    geom_smooth(aes(x=length, y=estimates), color="blue")+
    scale_x_continuous(breaks=len_breaks, labels=len_labels)+xlab(xlab)
  
  coe_result_plot_func2 <- 
    ggplot(data=func2_melt)+
    facet_grid(para~., scales="free_y")+
    geom_point(aes(x=length, y=estimates))+
    geom_line(aes(x=length, y=fitted), size=2, color="red")+
    geom_smooth(aes(x=length, y=estimates), color="blue")+
    scale_x_continuous(breaks=len_breaks, labels=len_labels)+xlab(xlab)
  
  coe_result_plot_all <- 
    ggplot(data=coe_result_melt)+
    geom_point(aes(x=length, y=estimates, color=para))+
    geom_line(aes(x=length, y=fitted, color=para), size=2)+
    scale_colour_brewer(palette="Dark2")+
    scale_x_continuous(breaks=len_breaks, labels=len_labels)+xlab(xlab)
  
  coe_residual_plot <- 
    ggplot(data=coe_result_melt)+
    geom_line(aes(x=length, y=residuals))+
    geom_point(aes(x=length, y=residuals))+
    facet_grid(para~., scales="free_y")+
    geom_hline(yintercept=0, colour="red", size=2)+
    scale_x_continuous(breaks=len_breaks, labels=len_labels)+xlab(xlab)
  
  return(list(coe_summary=all_para, coe_details=coe_coe, 
              nls_fitting_all=coe_result_melt,
              fit_plot=nls_fit_1$plot, 
              residual_plot=coe_residual_plot, 
              coe_result_plot_func1=coe_result_plot_func1,
              coe_result_plot_func2=coe_result_plot_func2,
              coe_result_plot_all=coe_result_plot_all,
              problematic_lengths=nls_fit_1$problematic_length))
}