R/to_sort/helper_outliers-1.R

# library(fields)


##############################################################################################
########################### DISTANCE - BASED ####################################

# This function returns the indexes of streamline identified as outliers using a distance-based approach.
# In particular for every streamline, the barycenter is projected on the xy plane (and then on the yz).
# On this plane(s) outliers are identified using the basic distance-based approach (Knorr and Ng 1997)
# Then the same procedure is repeated using the spatial median of the streamlines
get_outliers_distance <- function (tract)
{
  percentage=0.01

  ##################### SPATIAL MEDIAN (sp)
  epsilon_sp=5
  dataset_Median = get_spatial_median(tract$data)
  dataset_MedianXY = dataset_Median[,-3]
  dataset_MedianYZ = dataset_Median[,-1]

  ### XY
  # distance.matrix_sp_XY <- rdist(dataset_MedianXY)
  # card_neighbourhood_sp_xy = apply(distance.matrix_sp_XY, 1, check_neighbourhood, epsilon_sp)

  distance.matrix_sp_XY <- dist(dataset_MedianXY, method = "euclidean")
  card_neighbourhood_sp_xy = check_neighbourhood_dist_cycle(distance.matrix_sp_XY, epsilon_sp)

  n=dim(dataset_MedianXY)[1]

  outliers.sp_xy = which(card_neighbourhood_sp_xy/(n-1) < percentage)

  # outliers.sp_xy <- NULL
  # points.sp_xy <- NULL
  # for (i in 1:n) {
  #   count=-1
  #   for (j in 1:n) {
  #     if (distance.matrix_XY[i,j]<epsilon)
  #       count=count+1
  #   }
  #
  #   if (count/(n-1) <percentage){
  #     outliers.sp_xy <- c(outliers.sp_xy, i) #Punti che hanno meno di percentage punti a distanza minore di epsilon
  #     points.sp_xy=rbind(points.sp_xy, dataset_MedianXY[i,])
  #   }
  # }

  ### YZ
  # distance.matrix_sp_YZ <- rdist(dataset_MedianYZ)
  # card_neighbourhood_sp_yz = apply(distance.matrix_sp_YZ, 1, check_neighbourhood, epsilon_sp)

  n=dim(dataset_MedianYZ)[1]

  distance.matrix_sp_YZ <- dist(dataset_MedianYZ, method = "euclidean")
  card_neighbourhood_sp_yz = check_neighbourhood_dist_cycle(distance.matrix_sp_YZ, epsilon_sp)


  # outliers.sp_yz <- NULL
  # points.sp_yz <- NULL
  # for (i in 1:n) {
  #   count=-1
  #   for (j in 1:n) {
  #     if (distance.matrixYZ[i,j]<epsilon)
  #       count=count+1
  #   }
  #
  #   if (count/(n-1) <percentage){
  #     outliers.sp_yz <- c(outliers.sp_yz, i) #Punti che hanno meno di percentage punti a distanza minore di epsilon
  #     points.sp_yz=rbind(points.sp_yz, dataset_MedianYZ[i,])
  #   }
  # }

  outliers.sp_yz = which(card_neighbourhood_sp_yz/(n-1) < percentage)

  #####################  BARYCENTER (bar)
  epsilon_bar=3

  dataset_Barycenter = get_barycenter(tract$data)
  dataset_BarycenterXY = dataset_Barycenter[,-3]
  dataset_BarycenterYZ = dataset_Barycenter[,-1]

  ### XY
  # distance.matrix_XY <- as.matrix(dist(dataset_BarycenterXY, method = "euclidean", diag = TRUE, upper = TRUE, p = 2))
  # distance.matrix_bar_XY <- rdist(dataset_BarycenterXY)
  # card_neighbourhood_bar_xy = apply(distance.matrix_bar_XY, 1, check_neighbourhood, epsilon_bar)

  n=dim(dataset_BarycenterXY)[1]

  distance.matrix_bar_XY <- dist(dataset_BarycenterXY, method = "euclidean")
  card_neighbourhood_bar_xy = check_neighbourhood_dist_cycle(distance.matrix_bar_XY, epsilon_bar)

  outliers.bar_xy = which(card_neighbourhood_bar_xy/(n-1) < percentage)


  ### YZ

  # distance.matrix_bar_YZ <- rdist(dataset_BarycenterYZ)
  # card_neighbourhood_bar_yz = apply(distance.matrix_bar_YZ, 1, check_neighbourhood,epsilon_bar)
  n=dim(dataset_BarycenterYZ)[1]

  distance.matrix_bar_YZ <- dist(dataset_BarycenterYZ, method = "euclidean")
  card_neighbourhood_bar_yz = check_neighbourhood_dist_cycle(distance.matrix_bar_YZ, epsilon_bar)

  outliers.bar_yz = which(card_neighbourhood_bar_yz/(n-1) < percentage)

  outliers = unique(c(outliers.sp_xy, outliers.sp_yz, outliers.bar_yz, outliers.bar_xy))

  return (sort(outliers))
}


check_neighbourhood = function (row_dist_matrix, epsilon) {
  return(sum(row_dist_matrix<epsilon)-1)
}

myfun <- function(i, dist_object) {
  n <- attr(dist_object, "Size")
  count <- 0
  for (j in 1:n) {
    if (j > i)
      dij <- dist_object[n * (i - 1) - i * (i - 1) / 2 + j - i]   # Distanza tra riga i e j (con j maggiore di i e minore o uguale a n)
    else if (j < i)
      dij <- dist_object[n * (j - 1) - j * (j - 1) / 2 + i - j]
    if (i != j && dij < epsilon)
      count <- count + 1
  }
  count
}

myfun2 <- function(i, dist_object) {
  n <- attr(dist_object, "Size")
  count <- 0
  for (j in 1:(i-1)) {
    dij <- dist_object[n * (j - 1) - j * (j - 1) / 2 + i - j]
    if (dij < epsilon)
      count <- count + 2
  }
  count
}

myfun3 <- function(i, dist_object, epsilon) {
  n <- attr(dist_object, "Size")
	j_indices <- 1:(i-1)
	linear_indices <- n * (j_indices - 1) - j_indices * (j_indices - 1) / 2 + i - j_indices
	dvalues <- dist_object[linear_indices]
	2L * sum(dvalues < epsilon)
}

check_neighbourhood_dist_cycle = function (dist_object, epsilon) {
  n <- attr(dist_object, "Size")
  purrr::map_int(1:n, check_neighborhood_of_i, dist_object=dist_object, epsilon=epsilon, n=n)
}

check_neighborhood_of_i <- function(i, dist_object, epsilon, n) {
  j_indices_before= NULL
  j_indices_after= NULL
  if (i!=1)
    j_indices_before = 1:(i-1)
  if (i!=n)
    j_indices_after = (i+1):n
  distance_indices_before = n * (j_indices_before - 1) - j_indices_before * (j_indices_before - 1) / 2 + i - j_indices_before
  distance_indices_after = n * (i - 1) - i * (i - 1) / 2 + j_indices_after - i
  dvalues_before = dist_object[distance_indices_before]
  dvalues_after = dist_object[distance_indices_after]
  sum(dvalues_before < epsilon)+sum(dvalues_after < epsilon)
}

##############################################################################################
########################### DEPTH - BASED ####################################
library(depth)

# Returns the index of the streamline with the barycenter closest to the point outlier
index_min_distance <- function (outlier, dataset) {
  n=dim(dataset)[1]
  dist_vec=rdist.vec(dataset,cbind(rep(outlier[1], n), rep(outlier[2], n)))
  return(which.min(dist_vec))
}

# Given a set of points (spatial median or barycenters) identified as outliers,
# returns the indexes of the corresponding streamlines
idx_outliers <- function( dataset, outliers)
{
  idx= apply(as.matrix(outliers), 1, index_min_distance, dataset=dataset)
  return(idx)

}

# This function returns the indexes of streamline identified as outliers using a depth-based approach.
# In particular for every streamline, the barycenter is projected on the xy plane (and then on the yz).
# On this plane(s) outliers are identified using the basic distance-based approach (Knorr and Ng 1997)
# Then the same procedure is repeated using the spatial median of the streamlines
get_outliers_depth <- function (tract){

  #### Spatial median

  dataset_Median = get_spatial_median(tract$data)
  dataset_MedianXY = dataset_Median[,-3]
  dataset_MedianYZ = dataset_Median[,-1]

  # XY
  #isodepth(dataset_MedianXY, dpth =c(1,2,3), output = FALSE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = c('red','green', 'blue', 'orange', 'purple'))
  outliers_XY = isodepth(dataset_MedianXY, dpth =c(1,2,3), output = TRUE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = NULL)

  outliers1_XY = as.data.frame(outliers_XY$Contour1)
  outliers2_XY = as.data.frame(outliers_XY$Contour2)
  outliers3_XY = as.data.frame(outliers_XY$Contour3)

  idx1 = idx_outliers(dataset_MedianXY, outliers1_XY)
  idx2 = idx_outliers(dataset_MedianXY, outliers2_XY)
  idx3 = idx_outliers(dataset_MedianXY, outliers3_XY)

  # YZ
  #isodepth(dataset_MedianYZ, dpth =c(1,2,3, 4), output = FALSE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = c('red','green', 'blue', 'orange', 'purple'))
  outliers_YZ = isodepth(dataset_MedianYZ, dpth =c(1,2,3, 4), output = TRUE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = NULL)

  outliers1_YZ = as.data.frame(outliers_YZ$Contour1)
  outliers2_YZ = as.data.frame(outliers_YZ$Contour2)
  outliers3_YZ = as.data.frame(outliers_YZ$Contour3)
  outliers4_YZ = as.data.frame(outliers_YZ$Contour4)

  idx4 = idx_outliers(dataset_MedianYZ, outliers1_YZ)
  idx5 = idx_outliers(dataset_MedianYZ, outliers2_YZ)
  idx6 = idx_outliers(dataset_MedianYZ, outliers3_YZ)
  idx7 = idx_outliers(dataset_MedianYZ, outliers4_YZ)

  outliers_depth_Median = sort(unique(c(idx1, idx2, idx4, idx5, idx6, idx7)))

  #### Barycenter

  dataset_Barycenter = get_barycenter(tract$data)
  dataset_BarycenterXY = dataset_Barycenter[,-3]
  dataset_BarycenterYZ = dataset_Barycenter[,-1]

  # XY
  #isodepth(dataset_BarycenterXY, dpth =c(1,2,3), output = FALSE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = c('red','green', 'blue', 'orange', 'purple'))
  outliers_XY = isodepth(dataset_BarycenterXY, dpth =c(1,2,3), output = TRUE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = NULL)

  outliers1_XY = as.data.frame(outliers_XY$Contour1)
  outliers2_XY = as.data.frame(outliers_XY$Contour2)
  outliers3_XY = as.data.frame(outliers_XY$Contour3)

  idx1 = idx_outliers(dataset_BarycenterXY, outliers1_XY)
  idx2 = idx_outliers(dataset_BarycenterXY, outliers2_XY)
  idx3 = idx_outliers(dataset_BarycenterXY, outliers3_XY)

  # YZ
  #isodepth(dataset_BarycenterYZ, dpth =c(1,2,3, 4), output = FALSE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = c('red','green', 'blue', 'orange', 'purple'))
  outliers_YZ = isodepth(dataset_BarycenterYZ, dpth =c(1,2,3, 4), output = TRUE, twodim = TRUE,mustdith = FALSE, maxdith = 10, dithfactor = 10,trace.errors = TRUE, eps = 1e-8, factor = 0.8, xlab = "X",ylab = "Y", zlab = "Tukey's depth", colcontours = NULL)

  outliers1_YZ = as.data.frame(outliers_YZ$Contour1)
  outliers2_YZ = as.data.frame(outliers_YZ$Contour2)
  outliers3_YZ = as.data.frame(outliers_YZ$Contour3)
  outliers4_YZ = as.data.frame(outliers_YZ$Contour4)

  idx4 = idx_outliers(dataset_BarycenterYZ, outliers1_YZ)
  idx5 = idx_outliers(dataset_BarycenterYZ, outliers2_YZ)
  idx6 = idx_outliers(dataset_BarycenterYZ, outliers3_YZ)
  idx7 = idx_outliers(dataset_BarycenterYZ, outliers4_YZ)

  outliers_depth_Barycenter = sort(unique(c(idx1, idx2, idx4, idx5, idx6, idx7)))

  return (sort(unique(c(outliers_depth_Barycenter, outliers_depth_Median))))

}
astamm/fdatractography documentation built on May 12, 2019, 5:37 a.m.