R/analCI.R

#' @title summary from those data which fulfill the condition of interest
#' @description This functions provide a summary, for each experiment, and provides
#'     an only .csv file which stores a summary from all experiments assessed where
#'     cells (measurement units) taken into account to get a summary value for each
#'     experimental unit are those which fulfill a condition of interest, shuch a
#'     threshold value (for instance, if oxcilation index is greater or equal to 1.12),
#'     or equal to 1 if the colomn chosen as condition is the colomn which reports 1 if
#'     this experimental unit is considered as stimulus-responsive cell and 0 in other case.
#' @details At first, you have to store each measurement unit data within each experimental
#'     unit into a .csv file called datosExperimentalUnitName.csv, such that has to have the
#'     follows columns:
#'      - CELL ID: Each ROI is a measurement unit, such a cell with in an experiment.
#'      - Response Variable: For instance, the Area under the curve related to a stimulus of
#'        interest.
#'      - Threshold values: This column can also be the Response Variable, such that you are
#'        concern with those measurement units whose Response Variable is greater or equal to
#'        a threshold value, which you stablish in correspondent param of present function.
#'        Furthermore, the Treshold values column can be other column where a value equal to 1
#'        means a stimulus-responsive cell, and 0in other case, so you have to type the value 1
#'        in correspondent param into present function, such that this function, in orther to
#'        provide a summary value for response variable among the measurement units within each
#'        experiment, only will take into account those cells whose value into Trehshold Values
#'        Column is greater or equal to threshold value considered.
#' @author Enrique Perez_Riesgo & Maria Elena Hernando-Perez
#' @param column Response Values for each measurement unit (for instance, Area for each cell)
#' @param variables The column where look for the threshold values. If you are concern with
#'                  those cells or measurement units which are stimulus-responsive, variables
#'                  will have to be a column where those responsive-stimulus cells show the
#'                  value 1 and 0 in other case. Nevertheless, if you are concern with those
#'                  measurement whose param-column-value is greater or equal to threshold
#'                  value, then variables-param is the vector which stores the response variable
#'                  of interest, such a Area under the curve.
#' @param threshold The threshold value stabilshed. If you are concern with those cells which are
#'                  stimulus-responsive you wil type 1 for those stimulus-responsive cells and 0
#'                  in other case. If you are concern with those measurement units whose variables-param
#'                  column is greater or equal to a value of interest you will type the value of
#'                  interest.
#' @param direction This provide the direction related to threshold in terms of you want to keep
#'                  values lower or greater than threshold
#' @param category Factor or character vector with store the level of factor for each experimental
#'                 unit. For instance, if you are dealing with two kind of mice, WT and KO, the
#'                 vector might have the follows shape: c(WT, KO, KO, WT, ...)
#' @param centr.par This param allows you to pick the summary variable for each experimental unit,
#'                  such a median or mean of responses among all measurement units within
#'                  experimental unit.
#' @param disp.par This param allows you to pick the disperssion estimator related to the summary variable
#'                 for each experimental unit, such a median or mean of responses among all measurement
#'                 units within experimental unit.
#' @param folder The name of folder which stores the files called datosExperimentalUnitName.csv, such
#'               that the relative PATH is the name type into this param, taking into account that the
#'               work directory is the that where R is running, which you can get by typing pwd() in
#'               the shell.
#'
#' @return .csv
#' @export recopilationROI

recopilationROI <- function(column = "oscilation.index", variables = "oscilation.index",
                            threshold = 0, category = category,
                            centr.par = "median", disp.par = "mad", folder = "resultados",
                            cut.X = NULL, direction = "up", minus = 1, breaks = "scott",
                            max.hist = 6, histogram = FALSE){
  # Set work directory and   Subdirectory where data sets which are going to be analyzed are stored
  directory <- getwd()
  datosfich <- file.path(directory, folder)
  ficheros <- dir(datosfich)
  ficheros <- ficheros[-grep("datosO", ficheros)] # Function which generates data sets create
                                                  #not only "datosExperiment.name.csv", but also
                                                  #others such as "datosOutExperiment.name.csv".
                                                  #Thus, the have to be removed.
  ficheros.datos <- ficheros[grep("datos", ficheros)]

  # Building matrix which will store whole data analysis summary from each experiment
  datos <- data.frame(matrix(0, nrow = length(ficheros.datos), ncol = 4))
  colnames(datos) <- c("Experiment", "category", "No", "Yes")

  # Building vector which will store localization parameters from each experiment
  media <- as.vector(matrix(0, nrow = length(ficheros.datos), ncol = 1))

  # Building vector which will store response from each experiment
  response <- vector(mode = "numeric")

  # Building vector which will store categorical variable from each experiment
  categories <- vector(mode = "character")

  # Building vector which will store the name of the i experiment
  exp.name.vector <- vector(mode = "character")

  # Building vector which will the index of those experiments which not contain the variable of interest
  indice.na <- vector(mode = "numeric")

  values <- data.frame(Variable = NULL, Phenotype = NULL, Experiment = NULL)
  for(i in 1:length(ficheros.datos)){
    datos.tabla <- read.csv2(file.path(datosfich, ficheros.datos[i]))
    #Nombre del experimento
    exp.name <- sub("datos", x = ficheros.datos[i], replacement = "")
    exp.name <- sub(".csv", x = exp.name, replacement = "")
    if(length(grep(variables, colnames(datos.tabla))) != 0){
      #Seleccion variable de interes where asses the condition (column)
      variable <- datos.tabla[, variables]
      #column is the column, related to variable, where evaluate the mean.
      #Sometimes could be the same
      if(centr.par == "median"){
        if(direction == "up"){
          media[i] <- median(datos.tabla[variable >= threshold, column])
          #vectors with measurements and categories
          response <- c(response, datos.tabla[variable >= threshold, column])
          categories <- c(categories,
                          rep(as.character(category[i]),
                              length(datos.tabla[variable >= threshold, column])))
          tabla <- c(sum(variable < threshold), sum(variable >= threshold))
          exp.name.vector <- c(exp.name.vector,
                               rep(as.character(exp.name),
                                   length(datos.tabla[variable >= threshold, column])))
        }
        if(direction == "down"){
          media[i] <- median(datos.tabla[variable <= threshold, column])
          #vectors with measurements and categories
          response <- c(response, datos.tabla[variable <= threshold, column])
          categories <- c(categories,
                          rep(as.character(category[i]),
                              length(datos.tabla[variable <= threshold, column])))
          exp.name.vector <- c(exp.name.vector,
                               rep(as.character(exp.name),
                                   length(datos.tabla[variable <= threshold, column])))
          tabla <- c(sum(variable < threshold), sum(variable <= threshold))
        }
      }
      if(centr.par == "mean"){
        if(direction == "up"){
          media[i] <- mean(datos.tabla[variable >= threshold, column])
          #vectors with measurements and categories
          response <- c(response, datos.tabla[variable >= threshold, column])
          categories <- c(categories,
                          rep(as.character(category[i]),
                              length(datos.tabla[variable >= threshold, column])))
          exp.name.vector <- c(exp.name.vector,
                               rep(as.character(exp.name),
                                   length(datos.tabla[variable >= threshold, column])))
          tabla <- c(sum(variable < threshold), sum(variable <= threshold))
        }
        if(direction == "down"){
          media[i] <- mean(datos.tabla[variable <= threshold, column])
          #vectors with measurements and categories
          response <- c(response, datos.tabla[variable <= threshold, column])
          categories <- c(categories,
                          rep(as.character(category[i]),
                              length(datos.tabla[variable <= threshold, column])))
          exp.name.vector <- c(exp.name.vector,
                               rep(as.character(exp.name),
                                   length(datos.tabla[variable <= threshold, column])))
          tabla <- c(sum(variable < threshold), sum(variable <= threshold))
        }
      }
      datos[i, ] <- c(exp.name, as.character(category[i]), tabla)
    }else{
      indice.na <- c(indice.na, i)
    }
  }
  # Remove those rows from those experiments where variable of interest has not been assesed
  datos <- datos[- indice.na, ]
  media <- media[- indice.na]

  categories <- as.factor(categories)
  if(centr.par == "median"){
    datos <- cbind(datos, Median = media)
    datos <- rbind(datos,
                   n= c(NA, NA, sum(as.numeric(datos$No)), sum(as.numeric(datos$Yes)), NA),
                   Median = c(NA, NA, NA, NA, median(datos$Median, na.rm = TRUE)),
                   Sd = c(NA, NA, NA, NA, mad(datos$Median, na.rm = TRUE)))
    rownames(datos) <- c(1: (nrow(datos) - 3), "n", "Median", "mad")
  }
  if(centr.par == "mean"){
    datos <- cbind(datos, Mean = media)
    datos <- rbind(datos,
                   n= c(NA, NA, sum(as.numeric(datos$No)), sum(as.numeric(datos$Yes)), NA),
                   Mean = c(NA, NA, NA, NA, mean(datos$Mean, na.rm = TRUE)),
                   Sd = c(NA, NA, NA, NA, sd(datos$Mean, na.rm = TRUE)))
    rownames(datos) <- c(1: (nrow(datos) - 3), "n", "Mean", "Sd")
  }

  write.csv2(datos, file = file.path(directory,
                                     paste("summary", column, centr.par, direction,".csv", sep = "")))
  write.csv2(data.frame(Respuesta = response,
                        Fenotipo = categories,
                        Experimento = exp.name.vector),
             file = file.path(directory,
                              paste("RAWdata", column, centr.par, direction,".csv", sep = "")))
  if(histogram == TRUE){
    pdf(file.path(directory,
                  paste(folder,"/histogram.", column, centr.par, direction, ".pdf", sep = "")))
    for(i in unique(categories)){
      hist(ifelse((response - minus)[categories == i] > breaks[length(breaks)],
                  breaks[length(breaks)],
                  (response - minus)[categories == i]),
           breaks = breaks,
           probability = TRUE,
           ylim = c(0, max.hist),
           xlab = variables,
           main = paste(i, ": ", variables, sep = ""))
    }
    dev.off()
  }

  pdf(file.path(directory,
                paste(folder,"/density.", column, centr.par, direction, ".pdf", sep = "")))
  if(!is.null(cut.X)){
    categories <- categories[response <= cut.X]
    response <- response[response <= cut.X]
  }
  plot(density(response[categories == levels(categories)[1]]),
       xlim = c(0, max(response)))
  colour = 1
  for(i in levels(categories)[-1]){
    colour = colour +1
    lines(density(response[categories == i]),
          col = colour)
  }
  legend("topright",
         legend = as.character(levels(categories)),
         col = 1:length(levels(categories)),
         pch = 16)
  write.csv2(quantile(response),
             file = file.path(directory, paste("Cuantiles", column, centr.par, direction,".csv", sep = "")))
  dev.off()
}



#' @title wave length
#' @description This functions provide the number of peaks, oscilations.
#' @author Enrique Perez_Riesgo
#' @param grupos
#' @return plots
#' @export wave.length

wave.length <- function(interval = NULL, data){

  if(is.null(interval)){ #En caso de ser NULL el intervalo, por defecto se toma el intervalo [0, 1] para estimar el Ć­ndice de oscilaciones
    interval <- c(0, 1)
  }
  data <- data[data[, 1] <= interval[2] &  data[, 1] >= interval[1], ]
  sigma <- apply(data[, -1], MARGIN = 2, function(X){summary(lm(X ~ data[, 1]))$sigma})
  resid <- apply(data[, -1], MARGIN = 2, function(X){residuals(lm(X ~ data[, 1]))})
  DW <- apply(data[, -1], MARGIN = 2, function(X){lmtest::dwtest(data[, 1] ~ X)$p.value})
  autoc <- DW <= 0.05

  resid.lag <- rbind(c(rep(NA, ncol(resid))), resid[- nrow(resid), ])
  resid.sign <- sign(resid.lag) + sign(resid)
  resid.sign2 <- apply(resid.sign[-1, ], 2, function(x){
    results <- NULL
    for(i in 1:(length(x))){results <- c(results, ifelse(x[i] == 0 & x[(i + 1)] == 0,x[(i - 1)] , x[(i)]))}
    return(results)
  })
  resid.sign2[nrow(resid.sign2), ] <- resid.sign[nrow(resid.sign), ]
  wave.length <- apply(resid.sign2, 2, function(x){sum(x != 0)/sum(x == 0)})*(data[2, 1] - data[1, 1])
  amplitud <-  apply(resid, 2, function(x){abs(max(x) - min(x))})


  areas <- apply(abs(resid) , 2, function(x){trapz(x = data[, 1], y = x)})

  return(data.frame(WL = wave.length, Amplitud = amplitud, OA = areas, Dispersion = sigma))
}



#' @title Oscilations number.
#' @description This functions provide the number of peaks, oscilations.
#' @author Enrique Perez_Riesgo
#' @param grupos
#' @return plots
#' @export OI

OI <- function(interval = NULL, data){

  if(is.null(interval)){ #En caso de ser NULL el intervalo, por defecto se toma el intervalo [0, 1] para estimar el Ć­ndice de oscilaciones
    interval <- c(0, 1)
  }
  data <- data[data[, 1] <= interval[2] &  data[, 1] >= interval[1], ]
  data.lag <- data[- nrow(data), ]
  diferencia <- data[-1, ]- data.lag
  diferencia2 <- diferencia ^ 2
  d2 <- diferencia2[, -1] + diferencia2[, 1]
  distancia <- sqrt(d2)
  IO <- colSums(distancia)/(data[nrow(data), 1]- data[1, 1])
  return(IO)
  #prueba <- NULL
  #for(i in 2:nrow(as.matrix(dist(data[, c(1, 3)])))){prueba <- c(prueba, as.matrix(dist(data[, c(1, 3)]))[i, (i-1)])}
  #sum(prueba)
}



#' @title Oscilations number increment.
#' @description This functions provide the oscilations index as increment average among every two points.
#' @author Enrique Perez_Riesgo
#' @param grupos
#' @return plots
#' @export OIl

OIl <- function(interval = NULL, data){

  if(is.null(interval)){ #En caso de ser NULL el intervalo, por defecto se toma el intervalo [0, 1] para estimar el Ć­ndice de oscilaciones
    interval <- c(0, 1)
  }
  data <- data[data[, 1] <= interval[2] &  data[, 1] >= interval[1], ]
  data.lag <- data[- nrow(data), ]
  diferencia <- abs(data[-1, ]- data.lag)
  IOl <- colSums(diferencia)/((nrow(data)-1))
  IOl <- IOl[-1]
  return(IOl)
}




#' @title Multivariate outliers.
#' @description An oultier detection is carried out by applying Mahalanobis distances, where means vetor and correlation matrix is computed with those data whose mahalanobis distance is lower than median of mahalanobis distances.
#' @author Enrique Perez_Riesgo
#' @param grupos
#' @return plots
#' @export mahOutlier

mahOutlier <- function(X){
  #rango
  QR.desc <- qr(X)
  rangoX <- QR.desc$rank
  # Media de todos los datos, el centro
  centro <- colMeans(X)
  # Distancia de Mahalanobis al centro
  #if(rangoX < ncol(X)){
    dm <- mahalanobis(X, centro, MASS::ginv(cov(X)), inverted = TRUE)
  #}else{
    #dm <- mahalanobis(X, centro, cov(X))
  #}
  # Selección del 50% de los datos con menor dm
  X.menordm <- X[dm <= quantile(dm)[4],]
  # Estimadores reducidos
  centroR <- colMeans(X.menordm)
  covarianzaR <- cov(X.menordm)
  # distancias de mahalanobis al centro reducido
  p <- ncol(X)
  #if(rangoX < ncol(X)){
    dmR <- mahalanobis(X, centroR, MASS::ginv(covarianzaR), inverted = TRUE)
  #}else{
   # dmR <- mahalanobis(X, centroR, covarianzaR)
  #}

  outliers <- as.numeric(which(dmR > p + 3 * sqrt(2 * p)))
  return(outliers)
}




#' @title Calcium Image data analysis.
#' @description this function allows arring out a comprensive calcium image data
#'   analysis. Notably, the folder which stores the .txt also has to store a
#'   .csv file where each row correspond to a stimuli, shuch that the first
#'   colomn store the name of each stimuli, the second column the stimuli start
#'   time, in seconds, and the third column the stimuli-end-time. Aditional rows
#'   can be typing: cut: This row meaning the interval of experiment which you
#'   want to remove.
#'   IO: Interval where asses oscilation index.
#'   Nisoldipina: The interval of experiment where Nisoldipina,
#'   an antagonist of VOCCs, is emplyed in order to remove the masking efect of
#'   Calcium entry through VOCCs when you are concern with asses SOCE
#' @author Enrique Perez_Riesgo
#' @param grupos Allows stablishing a number of groups deshired
#' @param time.lapse.max If you want to fix an interval where get the max of the response you will set
#'                       this param where the param-value is the upper bound of the interval, by considering
#'                       that the lower bound is set as zero by program, but not by .csv build by researcher.
#' @return plots
#' @export analCI

#Analisis de imagen

analCI <- function(grupos = NULL, time.lapse.max = NULL, agrupacion = "silueta",
                   modo = "Kmedioids", outlier = TRUE,directory = NULL, skip = 5,
                   data.scale = TRUE, legend.ROIs = TRUE, interval = NULL, Units = "ms",
                   Smooth. = TRUE, y.int =c(0, 1.5), min.threshold = 0, slope.conf = 0.95,
                   factor.error = 1.645, OscInd = "OI") {
  #directories
  if(is.null(directory)){
    directory <- getwd()
  }
  archivos <- dir(directory)

  if(outlier == TRUE){
    if(length(grep(pattern = "^resultadosOUT$", archivos)) == 0){
      dir.create(file.path(directory, "resultadosOUT"))
    }
    results.dir <- file.path(directory, "resultadosOUT")
  }
  if(outlier == FALSE){
    if(length(grep(pattern = "^resultados$", archivos)) == 0){
      dir.create(file.path(directory, "resultados"))
    }
    results.dir <- file.path(directory, "resultados")
  }


  if(length(grep("resultados", archivos)) != 0){archivos <- archivos[-(grep("resultados", archivos))]}
  if(length(grep(".Rmd", archivos)) != 0){archivos <- archivos[-(grep(".Rmd", archivos))]}
  if(length(grep(".csv", archivos)) != 0){archivos <- archivos[-(grep(".csv", archivos))]}
  if(length(grep(".xls", archivos)) != 0){archivos <- archivos[-(grep(".xls", archivos))]}
  if(length(grep(".txt", archivos)) != 0){archivos <- archivos[-(grep(".txt", archivos))]}
  grupo.numero <- 0
  #read .txt
  for(z in archivos){
    tequiste <- dir(file.path(directory, z))
    tequiste <-tequiste[grep(".txt", tequiste)]
    if(length(tequiste) > 1){
      datos <- read.table(file.path(file.path(directory, z), tequiste[1]), header = FALSE, skip = skip)
      for(i in tequiste[-1]){
        datos2 <- read.table(file.path(file.path(directory, z), i), header = FALSE, skip = skip)
        datos <- cbind(datos, datos2)
      }

    }else{

      datos <- read.table(file.path(file.path(directory, z), tequiste), header = FALSE, skip = skip)
    }


    colnames(datos) <- c("Time", paste("ROI", 1:(dim(datos)[2]-1)))
    #Unidades
    Unidades <- c("ms", "s")
    unidades <- c(6*10^4, 6*10)
    datos$Time <- datos$Time/unidades[grep(paste("^", Units, sep = ""), Unidades)]
    #remove those ROIs whose response is bad
    if(length(grep("remove", dir(file.path(directory, z)))) != 0){
      remove.exp <- read.csv2(file.path(file.path(directory, z), "remove.csv"), header = TRUE)
      datos <- datos[, -(as.numeric(remove.exp$remove)+1)]
    }

    #EstĆ­mulos
    estimulos <- read.csv2(file.path(file.path(directory, z), "estimulos.csv"), header = TRUE)

    #Cortar el registro hasta donde interese, denominado como cut en estimulos
    if(length(grep("cut", estimulos[,1])) != 0){
      cut <- estimulos[grep("cut", estimulos[, 1]), ]
      estimulos <- estimulos[- grep("cut", estimulos[,1]), ]
      datos <- datos[datos[, 1] <= as.numeric(cut[2]), ]

    }
    if(length(grep("IO", estimulos[,1])) != 0){
      interval <- as.numeric(estimulos[grep("IO", estimulos[, 1]), -1])
      estimulos <- estimulos[- grep("IO", estimulos[,1]), ]
    }
    Nisoldipina <- NULL
    if(length(grep("Nisoldipin", estimulos[,1])) != 0){
      Nisoldipina <- as.numeric(estimulos[grep("Nisoldipina", estimulos[, 1]), -1])
      Nisoldipina <- c(datos[sum(datos$Time <= Nisoldipina[1]), 1], datos[sum(datos$Time < Nisoldipina[2]) + 1, 1])
      estimulos <- estimulos[- grep("Nisoldipina", estimulos[,1]), ]
    }

    #elimnar segun min.threshold
    encima <- apply(datos[, -1] <= min.threshold, MARGIN = 2, sum)
    datos <- datos[, c(TRUE, encima == 0)]


    #pdf datos sin suavizar
    pdf(paste(results.dir,"/Graficos", z, ".pdf", sep = ""))
    plot(datos$Time, datos[,2], type = "l", col = 2, xlab = "tiempo", ylab = "Ratio F340/380", ylim = c(ifelse(is.null(y.int[1]), -0.1, y.int[1]), ifelse(is.null(y.int[2]), max(datos[,-1])*1.25, y.int[2]*1.25)) , main = z, axes = FALSE)
    axis(side = 2, at = seq(0, round(max(datos[,-1])+max(datos[,-1])*0.25, 0), by = 0.1))
    for(i in 3:dim(datos)[2]){
      lines(datos$Time, datos[,i], type = "l", col = i, lwd = 2)
    }
    if(legend.ROIs == TRUE){
      legend("topleft", legend = colnames(datos)[-1], col = 2:dim(datos)[2], cex = 0.5, lty = 1, ncol = 2)
    }

    color <- estimulos[,1]
    for(i in 1:dim(estimulos)[1]){
      lines(estimulos[i,2:3], c(0,0), lty = 1, col = as.numeric(color)[i], lwd = 10)
      text(mean(as.numeric(estimulos[i,2:3])), c(-0.05, -0.05), labels = estimulos[i,1])
    }
    lines(c(max(datos[,1])-1.5, max(datos[,1])-0.5), c(max(datos[,-1])+0.15*max(datos[,-1]), max(datos[,-1])+max(datos[,-1])*0.15), lty = 1, col = "black", lwd = 10)
    text(mean(c(max(datos[,1])-1.5, max(datos[,1])-0.5)),c(max(datos[,-1])+0.20*max(datos[,-1]), max(datos[,-1])+max(datos[,-1])*0.20), labels = "1min")
    dev.off()

    #Datos Suavizados (datos) y sin suavizar (datosraw)
    datosraw <- datos
    if(Smooth. == TRUE){
      datos <- data.frame(apply(datos, 2, function(x){smooth(x)}))
    }

    #ejes estimulos
    require(pracma)
    y.estimulosS <- data.frame(matrix(0,nrow=dim(estimulos)[1], ncol=dim(datos)[2]-1)) #matriz de longitud estiulosXROIS
    y.estimulosE <- data.frame(matrix(0,nrow=dim(estimulos)[1], ncol=dim(datos)[2]-1)) #matriz de longitud estiulosXROIS
    if(estimulos[dim(estimulos)[1], dim(estimulos)[2]] > datos$Time[length(datos$Time)]){
      estimulos[dim(estimulos)[1], dim(estimulos)[2]] <- datos$Time[length(datos$Time)]
    } #En caso de que el estimulo final termine mas tarde del tiempo de los datos, se fija que termina cuando acaba el tiempo de los datos
    if(estimulos[1, 2] == 0){
      estimulos[1, 2] <- datos$Time[2]
    } #En caso de que el estimulo inicial empiece a tiempo cero, se corrige para que sea el instante posterior y no de posteriormente errores
    for(i in 1:dim(estimulos)[1]){
      posicionS <- datos[datos$Time < estimulos[i,2],] #TRUE todos los valores cuyo tiempo sea inferior al del estimulo
      y.estimulosS[i,] <- as.numeric(posicionS[dim(posicionS)[1],-1]) #Se fija el valor de respuesta correspondiente al ultimo TRUE de posicionS, y sera el valor de respuesta cuando comienza el estimulo (basal por estimulo)
      rownames(y.estimulosS)[i] <- as.character(posicionS[dim(posicionS)[1],1]) #Se guarda a que tiempo comienza el estimulo segun los datos temporales registrados
      y.estimulosE[i,] <- as.numeric(datos[datos$Time > estimulos[i,3],-1][1,]) #Aqui el primer true corresponde con el fin del estimulo
      if(sum(is.na(y.estimulosE[i,])) != 0){
        y.estimulosE[i,] <- as.numeric(datos[datos$Time >= estimulos[i,3],-1][1,])
        rownames(y.estimulosE)[i] <- as.character(datos[datos$Time >= estimulos[i,3],1][1]) #En caso de que no haya tiempos mayores se coge el igual o mayor en vez de mayor
      }else {
        rownames(y.estimulosE)[i] <- as.character(datos[datos$Time > estimulos[i,3],1][1])
      }
    }
    #Alturas
    alturas <- data.frame(matrix(0,ncol=(dim(estimulos)[1]+2), nrow = dim(datos)[2]-1)) #Matriz de dimension ROIS X Estimulos+1


    #Definir objetos para guardar pendiente,  maximo, coefiientes de regresion, ...
    pendientesL <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))
    pendientesU <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))
    pendientes <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))
    intercepto <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))
    pendientecoef <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))
    maximos1 <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))
    alturas1 <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))
    areas1 <- data.frame(matrix(0,ncol=(dim(estimulos)[1]), nrow = dim(datos)[2]-1))

    #Nombrar los anteriores objetos (columnas, etc)
    colnames(alturas1) <- paste("ALTURA", as.character(estimulos[, 1]), sep = " ")
    colnames(areas1) <- paste("AREA", as.character(estimulos[, 1]), sep = " ")


    y.estimulos <- rbind(y.estimulosS, y.estimulosE) #Se une la matriz de registro para principio del estimulo con la del final del estimulo para cada ROI
    tiempos <- as.numeric(rownames(y.estimulos)) #Los tiempos de principio y fin de estimulo segun tiempo registrado
    alturas[,(dim(alturas)[2]-1)] <- as.numeric(datos[1,-1]) #datos basales inicio
    alturas[,dim(alturas)[2]] <- as.numeric(datos[nrow(datos),-1]) #datos basales final
    colnames(alturas)[(dim(alturas)[2]-1)] <- "BASAL.Principio"
    colnames(alturas)[dim(alturas)[2]] <- "BASAL.Final"


    for(i in 1:dim(estimulos)[1]){
      #Selecciona el intervalo del estimulo y busca el max
      intervalo <- c(as.numeric(estimulos[i, 2]), as.numeric(estimulos[i, 3]))
      altura.total <- apply(datos[sum(datos$Time < estimulos[i,2]):(sum(datos$Time < estimulos[i,3])+1),-1], 2, max)
      if(!is.null(Nisoldipina)){ #Primero se evalua si Nisoldipina es NULL, y en caso negativo, si el estimulo i esta dentro del intervalo de accion de la nisoldipina
        print(Nisoldipina)
        if(Nisoldipina[1] <= datos[sum(datos$Time < estimulos[i,2]), 1]  & Nisoldipina[2] >= datos[sum(datos$Time < (estimulos[i,3])+1), 1])
        {
          altura.total <- as.numeric(datos[(sum(datos$Time < estimulos[i,3])),-1])
        }
      }
      ###NUEVO
      #Seleccionar los datos del intervalo del estimulo i y crear un objeto con ellos
      datos.int <- datos[datos[, 1] > intervalo[1] & datos[, 1] < intervalo[2], ]

      #Seleccionar el valor minimo de respuesta, por ROI, entre medio min anter y medio min despues del comienzo del estimulo
      datos.int.min <- datos[datos[, 1] > (intervalo[1]-0.5) & datos[, 1] < (intervalo[1] + 0.25), ]
      datos.int.min <- apply(datos.int.min, MARGIN = 2, min)

      #Hacer cero el tiempo minimo de datos.int
      datos.int <- rbind(datos.int.min, datos.int)
      datos.int[, 1] <- datos[datos[, 1] >= datos[(which(datos[, 1] > intervalo[1]) - 1), 1][1] & datos[, 1] < intervalo[2], 1]

      #Restar a los datos del intervalo el valor mĆ­nimo calculado
      datos.int <- apply(datos.int, 2, function(x){
        x - x[1]
      })


      Y <- datos.int[,1]
      #Modelo polin
      datostiempo <- as.matrix(data.frame(T = datos.int[, 1], T2 = datos.int[, 1]^2, T3 = datos.int[, 1]^3, T4 = datos.int[, 1]^4, T5 = datos.int[, 1]^5, T6 = datos.int[, 1]^6))

      modelo6 <- apply(datos.int[,-1], 2, function(X){
        model <- lm(X ~ datostiempo)
        u <- seq(0, intervalo[2] - intervalo[1], by = 0.1)
        C6 <- coef(model)
        colnames(datostiempo) <- c("datostiempoT", paste("datostiempoT", 2:6, sep=""))
        maximos <- max(predict(model)[1: sum(Y<=1)])
      })


      pendientesL[i] <- apply(datos.int[,-1], 2, function(X){confint(lm(X ~ Y), parm = "Y", level = slope.conf)})[1, ]
      pendientesU[i] <- apply(datos.int[,-1], 2, function(X){confint(lm(X ~ Y), parm = "Y", level = slope.conf)})[2, ]
      pendientes[i] <- sign(pendientesL[i]) + sign(pendientesU[i])
      # Por si acaso no quiero el mƔximo predicho por modelo6
      if(!is.null(time.lapse.max)){
        maximos1[i] <- apply(datos.int[1: sum(Y<=time.lapse.max), -1], 2, max)
      }else{
        maximos1[i] <- apply(datos.int[, -1], 2, max)
      }
      # maximo predicho por modelo6
      maximos1[i] <- modelo6

      # En caso de que se quiera que el mƔximo estƩ dentro de un intervalo timme.lapse.max desde que empieza el estimulo
      if(!is.null(time.lapse.max)){
        alturas1[i] <- apply(datos.int[1: sum(Y<=time.lapse.max),-1], 2, max)
      }else{
        alturas1[i] <- apply(datos.int[,-1], 2, max)
      }

      intercepto[i] <- apply(datos.int[,-1], 2, function(X){coef(lm(X ~ Y))[1]})
      pendientecoef[i] <- apply(datos.int[,-1], 2, function(X){coef(lm(X ~ Y))[2]})
      areas1[i] <- apply(datos.int[,-1], 2, function(x){trapz(x = Y, y = x)})/max(Y)

      #Selecciona el intervalo del estimulo y busca el min
      altura.total.min <- apply(datos[sum(datos$Time < estimulos[i,2]):(sum(datos$Time < estimulos[i,3])+1),-1], 2, min)
      #Aquƭ tiene en cuenta si el estƭmulo hace bajar o subir la seƱal
      alturas[,i] <- apply(rbind(altura.total, altura.total.min, as.numeric(y.estimulosS[i,])), 2, FUN = function(x){ifelse(abs(x[1] - x[3]) >= abs(x[2] - x[3]), x[1] - x[3], x[2] - x[3])}) #corrige por el valor de respuesta (min) previo al estimulo
      colnames(alturas)[i] <- paste("ALTURA", i, sep="")
    }

    write.csv2(cbind(pendientesL, pendientes, pendientesU), paste(results.dir,"/pendientes", z, ".csv", sep = ""))

    rownames(alturas) <- colnames(datos)[-1] #Nombra segun los ROIs
    #variables alturas
    datos.basal <- datos
    #variables Ɣrea
    areas <- data.frame(matrix(0,ncol=dim(estimulos)[1], nrow = dim(datos)[2]-1))
    for(i in 1:dim(estimulos)[1]){
      area.total <- apply(datos.basal[sum(datos.basal$Time < estimulos[i,2]):(sum(datos.basal$Time < estimulos[i,3])+1),-1], 2, function(x){trapz(x = datos.basal[sum(datos.basal$Time < estimulos[i,2]):(sum(datos.basal$Time < estimulos[i,3])+1),1], y = x)})
      area.restar <- apply(y.estimulos[c(i,(i+dim(estimulos)[1])),], 2, function(x){trapz(x = tiempos[c(i,(i+dim(estimulos)[1]))], y = x)})
      areas[,i] <- area.total- area.restar
      colnames(areas)[i] <- paste("AREA", i, sep="")
    }
    rownames(areas) <- colnames(datos)[-1]

    #Oscilations Index
    if(OscInd == "OI"){
      oscilation.index <- OI(interval = interval, data = datosraw)
    }
    if(OscInd == "OIl"){
      oscilation.index <- OIl(interval = interval, data = datosraw)
    }
    longitud.onda <- wave.length(interval = interval, data = datosraw)


    #table
    write.csv2(cbind(areas1, alturas1, longitud.onda, oscilation.index), paste(results.dir,"/datos", z, ".csv", sep = ""))


    #Decidir si hay o no seƱal
    dispersion <- longitud.onda$Dispersion * factor.error ##Āæerror?
    datos.responden <- cbind(alturas1[, -c((ncol(alturas) - 1): ncol(alturas))], dispersion, maximos1, intercepto, pendientecoef, pendientes)
    colnames(datos.responden)[1:length(estimulos[, 1])] <- as.character(estimulos[, 1])
    colnames(datos.responden)[(length(estimulos[, 1])+2):(2*length(estimulos[, 1])+1)] <- paste("Max", as.character(estimulos[, 1]))
    colnames(datos.responden)[(2*length(estimulos[, 1])+2):(3*length(estimulos[, 1])+1)] <- paste("interc", as.character(estimulos[, 1]))
    colnames(datos.responden)[(3*length(estimulos[, 1])+2):(4*length(estimulos[, 1])+1)] <- paste("pend", as.character(estimulos[, 1]))
    colnames(datos.responden)[(4*length(estimulos[, 1])+2):(5*length(estimulos[, 1])+1)] <- paste("signPend", as.character(estimulos[, 1]))
    phase2 <- grep("2f", estimulos[, 1])

    decision <- t(apply(datos.responden, 1, function(x){
      LOQ <- x[(length(estimulos[, 1])+2):(2*length(estimulos[, 1])+1)] >= as.numeric(x[(nrow(estimulos)+1)])*3.29


      slopes <-  x[(4*length(estimulos[, 1])+2):(5*length(estimulos[, 1])+1)] <= 0
      slopesp <-  x[(4*length(estimulos[, 1])+2):(5*length(estimulos[, 1])+1)] >= 0
      a <- as.numeric(c(x[(2*length(estimulos[, 1])+2):(3*length(estimulos[, 1])+1)]))
      b <- as.numeric(c(x[(3*length(estimulos[, 1])+2):(4*length(estimulos[, 1])+1)]))
      LOD2 <- ((estimulos[, 3]-estimulos[,2] <= (as.numeric(x[length(estimulos[, 1])+1]*2) - a)/as.numeric(b)) & slopes) | ((0 >= (as.numeric(x[length(estimulos[, 1])+1]*2) - a)/as.numeric(b)) & slopesp)#el LOD2 es el corte de la reta estimada ocn el Y = LOD
      #LOD2 <- estimulos[, 3]-estimulos[,2] <= (as.numeric(x[length(estimulos[, 1])+1]*2) - a)/as.numeric(b)
      #signal <- LOQ * slopes
      signal <- as.numeric(LOQ)
      if(length(phase2) != 0){
        #signal[phase2] <- LOQ * slopes * LOD2  #para que haya segunda fase ha de cumplirse que haya primera
        signal[phase2] <- as.numeric(LOQ[phase2] * LOD2[phase2])
      }

      return(signal)
    }))
    colnames(decision) <- as.character(estimulos[, 1])

    #tabla datos.responden
    write.csv2(datos.responden, paste(results.dir,"/dat.responden", z, ".csv", sep = ""))

    #tabla total
    write.csv2(cbind(areas1, alturas1, longitud.onda, oscilation.index, decision), paste(results.dir,"/datos", z, ".csv", sep = ""))

    #Outliers Se tiene en cuenta que el numero de observaciones no sea menor que el numero de variables. En ese caso, no se obtienen los outliers
    datosO <- datos #En el grƔfico del final sin outliers se usarƔ datosO en lugar de datos, por si a caso se han eliminado outliers cumpliendose que p >= n
    if(outlier == TRUE && nrow(areas) > ncol(cbind(areas, alturas))){
      X = cbind(areas, alturas)
      outliers <- mahOutlier(X)
      if(length(outliers) != 0){ #Si no hay outliers no se quitan
        areas <- areas[-outliers, ]
        alturas <- alturas[-outliers, ]
        areas1 <- areas1[-outliers, ]
        alturas1 <- alturas1[-outliers, ]
        longitud.onda <- longitud.onda[-outliers, ]
        oscilation.index <- oscilation.index[-outliers]
        datosO <- datos[,-(outliers+1)]
        decision <- decision[-outliers, ]
      }
    }


    #distancias
    distancias <- dist(scale(cbind(areas1, alturas1, oscilation.index)), method = "euclidean")
    grupo.numero <- grupo.numero + 1
    if(is.null(grupos)){
      grupos = rep(3, length(archivos))
    }
    if(agrupacion == "silueta"){
      grupos = rep(0, length(archivos))
      tope <- ifelse(ncol(datosO[,-1]) >= 6, 5, ncol(datosO)-2)
      siluetas <- as.numeric(vector(length = tope-1))
      for (i in 2:tope) {
        silueta.media <- cluster::silhouette(cluster::pam(scale(cbind(areas1, alturas1)), k = i), grupos[grupo.numero], dist = distancias)
        siluetas[(i-1)] <- mean(silueta.media[,3])
      }
      grupos[grupo.numero] <- which(siluetas == max(siluetas))+1
    }
    pdf(paste(results.dir,"/Cluster", z, ".pdf", sep = ""))
    plot(hclust(distancias, method = "ward.D2"), main = z)
    dev.off()
    kmedioides <- cluster::pam(scale(cbind(areas1, alturas1, oscilation.index)), grupos[grupo.numero])
    grupos2 <- cutree(hclust(distancias), grupos[grupo.numero])

    #tabla total
    write.csv2(cbind(areas1, alturas1, longitud.onda, oscilation.index, grupos.Kmedioids = kmedioides$clustering, grupos.Cluster = grupos2, decision), paste(results.dir,"/datosOut", z, ".csv", sep = ""))

    #Grupos segun seleccion en argumento
    if(modo == "Kmedioids"){
      grupitos = as.numeric(table(kmedioides$clustering))
      asignacion <- kmedioides$clustering
    }
    if(modo == "cluster"){
      grupitos = as.numeric(table(grupos2))
      asignacion <- grupos2
    }
    require(ggplot2)
    require(ggfortify)
    require(cluster)
    PCA <- prcomp(cbind(areas1, alturas1, oscilation.index), scale. = data.scale)
    pdf(paste(results.dir,"/PCA", z, ".pdf", sep = ""))
    #autoplot(PCA, shape = FALSE, label.size = 3)
    plot(PCA$x[,1], PCA$x[,2], xlab = paste("PC1", round(PCA$sdev[1]^2/sum(PCA$sdev^2)*100,2),"%"), ylab = paste("PC2", round(PCA$sdev[2]^2/sum(PCA$sdev^2)*100,2),"%"), main = z, col = 0)
    text(PCA$x[,1], PCA$x[,2], labels = colnames(datosO)[-1], col = as.numeric(asignacion))
    dev.off()
    tabla.medias <- apply(cbind(areas1, alturas1, longitud.onda, oscilation.index), MARGIN = 2, FUN = tapply, INDEX=asignacion, mean)
    tabla.desviaciones <- apply(cbind(areas1, alturas1, longitud.onda, oscilation.index), MARGIN = 2, FUN = tapply, INDEX=asignacion, sd)/sqrt(grupitos)

    #ALTURAS
    pdf(paste(results.dir,"/Barras.Altura", z, ".pdf", sep = ""))
    media.altura <- tabla.medias[,grep("ALTURA", colnames(tabla.medias))]
    media.altura[is.na(media.altura)] <- 0
    desviacion.altura <- tabla.desviaciones[,grep("ALTURA", colnames(tabla.desviaciones))]
    desviacion.altura[is.na(desviacion.altura)] <- 0

    #diagramas de barras
    barras <- barplot(media.altura, beside = TRUE, las = 2, cex.names = 1, ylim = c(min(pretty(media.altura - desviacion.altura)), max(media.altura + desviacion.altura)*1.2), col = 2:(length(grupitos)+1), main = z, names.arg = estimulos[,1])
    arrows(barras, media.altura + desviacion.altura, barras, media.altura - desviacion.altura, angle = 90, code = 3)
    legend("topright", legend = paste("n = ", grupitos), fill = 2:(length(grupitos)+1))
    box(bty = "l")
    dev.off()

    #OI
    pdf(paste(results.dir,"/Barras.OI", z, ".pdf", sep = ""))
    media.OI <- tabla.medias[,grep("oscilation.index", colnames(tabla.medias))]
    media.OI[is.na(media.OI)] <- 0
    desviacion.OI <- tabla.desviaciones[,grep("oscilation.index", colnames(tabla.desviaciones))]
    desviacion.OI[is.na(desviacion.OI)] <- 0

    #diagramas de barras
    barras <- barplot(media.OI, beside = TRUE, las = 2, cex.names = 1, ylim = c(min(pretty(media.OI - desviacion.OI)), max(pretty(media.OI + desviacion.OI*1.2))), col = 2:(length(grupitos)+1), main = z, xpd = F)
    arrows(barras, media.OI + desviacion.OI, barras, media.OI - desviacion.OI, angle = 90, code = 3)
    legend("topright", legend = paste("n = ", grupitos), fill = 2:(length(grupitos)+1))
    box(bty = "l")
    dev.off()

    #OA
    pdf(paste(results.dir,"/Barras.OA", z, ".pdf", sep = ""))
    media.OA <- tabla.medias[,grep("OA", colnames(tabla.medias))]
    media.OA[is.na(media.OA)] <- 0
    desviacion.OA <- tabla.desviaciones[,grep("oscilation.index", colnames(tabla.desviaciones))]
    desviacion.OA[is.na(desviacion.OA)] <- 0

    #diagramas de barras
    barras <- barplot(media.OA, beside = TRUE, las = 2, cex.names = 1, ylim = c(min(pretty(media.OA - desviacion.OA)), max(pretty(media.OA + desviacion.OA*1.2))), col = 2:(length(grupitos)+1), main = z, xpd = F)
    arrows(barras, media.OA + desviacion.OA, barras, media.OA - desviacion.OA, angle = 90, code = 3)
    legend("topright", legend = paste("n = ", grupitos), fill = 2:(length(grupitos)+1))
    box(bty = "l")
    dev.off()

    #Dispersion
    pdf(paste(results.dir,"/Barras.sigma", z, ".pdf", sep = ""))
    media.Dispersion <- tabla.medias[,grep("Dispersion", colnames(tabla.medias))]
    media.Dispersion[is.na(media.Dispersion)] <- 0
    desviacion.Dispersion <- tabla.desviaciones[,grep("Dispersion", colnames(tabla.desviaciones))]
    desviacion.Dispersion[is.na(desviacion.Dispersion)] <- 0

    #diagramas de barras
    barras <- barplot(media.Dispersion, beside = TRUE, las = 2, cex.names = 1, ylim = c(min(pretty(media.Dispersion - desviacion.Dispersion)), max(pretty(media.Dispersion + desviacion.Dispersion*1.2))), col = 2:(length(grupitos)+1), main = z, xpd = F)
    arrows(barras, media.Dispersion + desviacion.Dispersion, barras, media.Dispersion - desviacion.Dispersion, angle = 90, code = 3)
    legend("topright", legend = paste("n = ", grupitos), fill = 2:(length(grupitos)+1))
    box(bty = "l")
    dev.off()


    #tablas medias desviaciones
    descriptiva <- matrix(0, ncol = (2*length(grupitos)+2), nrow = (length(estimulos[,1])+4))
    rownames(descriptiva) <- c(as.character(estimulos[,1]), "n", "OI", "OA", "sigma")
    colnames(descriptiva) <- c(paste(rep(c("media", "desviación"), length(grupitos)), rep(1:length(grupitos), each = 2)), "Media Global", "Desviación Global")
    descriptiva <- data.frame(descriptiva)
    for(i in 1:length(grupitos)){
      descriptiva[,(2*(i-1)+1)] <- c(signif(t(media.altura)[,i],2),grupitos[i], t(media.OI)[,i], t(media.OA)[,i], t(media.Dispersion)[,i])
      descriptiva[,(2*(i))] <- c(signif(t(desviacion.altura)[,i],2)," ", t(desviacion.OI)[,i], t(desviacion.OA)[,i] , t(desviacion.Dispersion)[,i])
    }
    if(length(estimulos[,1]) > 1){
      descriptiva[,(dim(descriptiva)[2]-1)] <- c(signif(apply(alturas1[,grep("ALTURA", colnames(alturas1))], MARGIN = 2, mean), 2), sum(grupitos), mean(oscilation.index), mean(longitud.onda$OA), mean(longitud.onda$Dispersion))
      descriptiva[,(dim(descriptiva)[2])] <- c(signif(apply(alturas1[,grep("ALTURA", colnames(alturas1))], MARGIN = 2, sd),2), "", sd(oscilation.index), sd(longitud.onda$OA), sd(longitud.onda$Dispersion))
    }else{
      descriptiva[,(dim(descriptiva)[2]-1)] <- c(signif(mean(alturas1[,grep("ALTURA", colnames(alturas1))]),2), sum(grupitos), mean(oscilation.index), mean(longitud.onda$OA), mean(longitud.onda$Dispersion))
      descriptiva[,(dim(descriptiva)[2])] <- c(signif(sd(alturas1[,grep("ALTURA", colnames(alturas1))]),2), "", sd(oscilation.index), sd(longitud.onda$OA), sd(longitud.onda$Dispersion))
    }

    write.csv2(descriptiva, paste(results.dir,"/descriptiva", z, ".csv", sep = ""))

    #pdf raw data agrupados y sin outliers
    pdf(paste(results.dir,"/Graficos_Grupos", z, ".pdf", sep = ""))
    color <- as.numeric(asignacion) + 1
    plot(datos$Time, datosO[,2], type = "l", col = color[1], xlab = "tiempo", ylab = "Ratio F340/380", ylim = c(ifelse(is.null(y.int[1]), -0.1, y.int[1]), ifelse(is.null(y.int[2]), max(datosO[,-1])*1.25, y.int[2]*1.25)), main = z, axes = FALSE)
    axis(side = 2, at = seq(0, round(max(datosO[,-1])+max(datosO[,-1])*0.25, 1), by = 0.1))
    for(i in 3:dim(datosO)[2]){
      lines(datosO$Time, datosO[,i], type = "l", col = color[(i-1)], lwd = 2)
    }
    legend("topleft", legend = paste("n = ", grupitos), fill = unique(color))

    color <- estimulos[,1]
    for(i in 1:dim(estimulos)[1]){
      lines(estimulos[i,2:3], c(0,0), lty = 1, col = as.numeric(color)[i], lwd = 10)
      text(mean(as.numeric(estimulos[i,2:3])), c(-0.05, -0.05), labels = estimulos[i,1])
    }
    lines(c(max(datosO[,1])-1.5, max(datosO[,1])-0.5), c(max(datosO[,-1])+0.15*max(datosO[,-1]), max(datosO[,-1])+max(datosO[,-1])*0.15), lty = 1, col = "black", lwd = 10)
    text(mean(c(max(datosO[,1])-1.5, max(datosO[,1])-0.5)),c(max(datosO[,-1])+0.20*max(datosO[,-1]), max(datosO[,-1])+max(datosO[,-1])*0.20), labels = "1min")
    dev.off()
    print(paste("Exp", z, "(", grep(z, archivos), "of", length(archivos),")"))
  }
}
emodoro/CimageD4 documentation built on May 25, 2019, 2:52 a.m.