R/Network_map.R

Defines functions Network_map

Documented in Network_map

#' A function to map statistics (i.e., genetic differentiation) between points as a network on a map.
#'
#' @param dat Data frame or character string that supplies the input data. If it is a character string, the file should be a csv. If it is a csv, the 1st row should contain the individual/population names. The columns should also be named in this fashion.
#' @param pops Data frame or character string that supplies the input data. If it is a character string, the file should be a csv. The columns should be named Sample, containing the sample IDs; Population indicating the population assignment of the individual; Long, indicating the longitude of the sample; Lat, indicating the latitude of the sample. Alternatively, see the Longitude_col and Latitude_col arguments.
#' @param neighbors Numeric or character. The number of neighbors to plot connections with, or the specific relationship that you want to visualize. Names should match those in the population assignment file and be seperated by an underscore. If I want to visualize the relationship between East and West, for example, I would set neighbors = "East_West".
#' @param col Character vector indicating the colors you wish to use for plotting.
#' @param statistic Character indicating the statistic being plotted. This will be used to title the legend. The legend title will be blank if left as NULL.
#' @param breaks Numeric. The breaks used to generate the color ramp when plotting. Users should supply 3 values if custom breaks are desired.
#' @param Lat_buffer Numeric. A buffer to customize visualization.
#' @param Long_buffer Numeric. A buffer to customize visualization.
#' @param Latitude_col Numeric. The number of the column indicating the latitude for each sample. If this is not null, PopGenHelpR will use this column instead of looking for the Lat column.
#' @param Longitude_col Numeric. The number of the column indicating the longitude for each sample. If this is not null, PopGenHelpR will use this column instead of looking for the Long column.
#'
#'
#' @return A list containing the map and the matrix used to plot the map.
#'
#' @author Keaka Farleigh
#'
#' @export
#'
#' @examples
#' \donttest{
#' data(Fst_dat)
#' Fst <- Fst_dat[[1]]
#' Loc <- Fst_dat[[2]]
#' Test <- Network_map(dat = Fst, pops = Loc,
#' neighbors = 2,col = c('#4575b4', '#91bfdb', '#e0f3f8','#fd8d3c','#fc4e2a'),
#' statistic = "Fst", Lat_buffer = 1, Long_buffer = 1)}
Network_map <- function(dat, pops, neighbors, col, statistic = NULL, breaks = NULL, Lat_buffer = 1, Long_buffer = 1, Latitude_col = NULL, Longitude_col = NULL){
  X1 <- X2 <- X3 <- X4 <- Dif <- Long <- Lat <- alpha <- NULL
  ################### Get the data for mapping
  # Get map data
  map <- spData::world["continent"]
  states <- spData::us_states["REGION"]
  ### Make a base map for the countries of interest
  base_map <- ggplot2::ggplot() + ggplot2::geom_sf(data = map, fill = "#f4f4f4") +
    ggplot2::geom_sf(data = states, fill = ggplot2::alpha("#f4f4f4", 0))


  # Read in files
  if(is.data.frame(dat) | is.matrix(dat)){
    Dif_mat <- dat
  }
  else if(is.character(dat) == TRUE){
    Dif_mat <- utils::read.csv(dat, row.names = 1)
  }
  else{
    stop("Please supply a dataframe or .csv file name for analysis")
  }
  if(is.data.frame(pops) == TRUE){
    Pops <- pops
  }
  else if(is.character(pops) == TRUE){
    Pops <- utils::read.csv(pops)
  }
  else{
    stop("Please supply a population assignment file as a dataframe or .csv file name")
  }

  # Make sure that the diagonal of the diferentiation matrix is 0
  diag(Dif_mat) <- 0


  if(!is.null(Latitude_col)){
    colnames(Div_mat)[Latitude_col] <- "Lat"
  }

  if(!is.null(Longitude_col)){
    colnames(Div_mat)[Longitude_col] <- "Long"
  }

  # Pull coordinates

  if(!is.null(Latitude_col) | !is.null(Longitude_col)){
    Coords <- Pops[,c(Longitude_col, Latitude_col)]
  } else{
    Coords <- Pops[,3:4]
  }

  # Get coordinates for each population
  Mapping <- Pops[!duplicated(Pops[,2]),]

  # Get the combination of all points
  All_pts <- data.frame(t(apply(utils::combn(seq_len(nrow(Mapping)), 2), 2, function(x) c(Mapping[x,]$Long, Mapping[x,]$Lat))))
  # Get the midpoint between each point pair
  Midpts<- data.frame(rowMeans(All_pts[,1:2]), rowMeans(All_pts[,3:4]))
  colnames(Midpts) <- c('x', 'y')

  # Get the names for each comparison
  Pop_combos <- utils::combn(Mapping$Population, 2, paste, collapse = "_")
  rownames(All_pts) <- Pop_combos
  Midpts$Combo <- Pop_combos

  # Reorganize diagonal matrix into long format
  Dif_Mapping <- data.frame(
    value=Dif_mat[lower.tri(Dif_mat,diag=T)],
    col=rep(1:ncol(Dif_mat),ncol(Dif_mat):1),
    row=unlist(sapply(1:nrow(Dif_mat),function(x) x:nrow(Dif_mat))))
  Dif_Mapping <- Dif_Mapping[-c(which(Dif_Mapping$value == 0)),]
  Dif_Mapping$labels <- Pop_combos

  # Make sure that Dif_Mapping is in the same order as All_pts
  if(any(table(Dif_Mapping$labels == rownames(All_pts)) == FALSE)){
    stop("The matrices are not in the correct order, please contact the package authors if you see this error")
  }
  # Append the Dif statistic to the all pts matrix
  All_pts$Dif <- Dif_Mapping$value
  # Set breaks
  if(missing(col)){
    col <- c('#4575b4', '#91bfdb', '#e0f3f8','#fd8d3c','#fc4e2a')
  }
  if(is.null(breaks) == TRUE){
    Breaks <- summary(All_pts$Dif)
    Breaks <- as.numeric(Breaks[1:5])
  }
  else{
    Breaks <- breaks
    Breaks <- as.numeric(Breaks)
  }
  if(missing(neighbors)){
    Coords <- as.matrix(Mapping[,3:4])
    Neigh <- spdep::knearneigh(Coords, k = 1, longlat = TRUE, use_kd_tree = FALSE)
    nNeigh <- as.data.frame(Neigh$nn)
    row.names(nNeigh) <- Mapping$Population
    nNeigh$Label <- Mapping$Population[nNeigh$V1]
    nNeigh$Comp1 <- paste(row.names(nNeigh), nNeigh$Label, sep = '_')
    nNeigh$Comp2 <- paste(nNeigh$Label, row.names(nNeigh), sep = '_')
    All_pts_toplot1 <- All_pts[which(rownames(All_pts) %in% nNeigh$Comp1),]
    All_pts_toplot2 <- All_pts[which(rownames(All_pts) %in% nNeigh$Comp2),]
    All_pts_toplot <- rbind(All_pts_toplot1, All_pts_toplot2) }
  else if(is.numeric(neighbors) == TRUE){
    Coords <- as.matrix(Mapping[,3:4])
    Neigh <- spdep::knearneigh(Coords, k = neighbors, longlat = TRUE, use_kd_tree = FALSE)
    nNeigh <- as.data.frame(Neigh$nn)
    row.names(nNeigh) <- Mapping$Population
    combs1 <- c()
    combs2 <- c()
    for (i in seq(1:ncol(nNeigh))){
      nNeigh[,neighbors+i] <- Mapping$Population[nNeigh[,i]]
      combs1 <- c(combs1, paste(row.names(nNeigh), nNeigh[,neighbors+i], sep = '_'))
      combs2 <- c(combs2, paste(nNeigh[,neighbors+i], row.names(nNeigh), sep = '_'))}

    combs_tot <- c(combs1,combs2)
    combs_tot <- unique(combs_tot)
    All_pts_toplot <- All_pts[which(rownames(All_pts) %in% combs_tot),]
    All_pts_toplot$Comp <- rownames(All_pts_toplot)
  }
  else if(is.character(neighbors) == TRUE){
    combs1 <- neighbors
    combs2 <- c()
    for (i in seq(1:length(neighbors))) {
      nsplit <- strsplit(neighbors[i], "_")
      combs2 <- c(combs2, paste(rev(nsplit[[1]]), collapse = "_"))
    }
    combs_tot <- c(combs1,combs2)
    All_pts_toplot <- All_pts[which(row.names(All_pts) %in% combs_tot),]
    All_pts_toplot$Comp <- rownames(All_pts_toplot)
    All_pts_toplot <- All_pts_toplot[!duplicated(All_pts_toplot$Comp),]
  }
  Breaks <- round(Breaks,2)
  All_pts_toplot$Dif <- round(All_pts_toplot$Dif,2)
  # Add buffers for mapping
  Lat_Min <- min(All_pts_toplot[,3:4]) - Lat_buffer
  Lat_Max <- max(All_pts_toplot[,3:4]) + Lat_buffer
  Long_Min <- min(All_pts_toplot[,1:2]) - Long_buffer
  Long_Max <- max(All_pts_toplot[,1:2]) + Long_buffer

  # Map it
  if(is.null(statistic)){
  Dif_map <- base_map + ggplot2::coord_sf(xlim = c(Long_Min, Long_Max),  ylim = c(Lat_Min, Lat_Max)) +
    ggplot2::geom_segment(data = All_pts_toplot, ggplot2::aes(x = X1, xend = X2, y = X3, yend = X4), color = 'black', linewidth= 1.05) +
    ggplot2::geom_segment(data = All_pts_toplot, ggplot2::aes(x = X1, xend = X2, y = X3, yend = X4, color = Dif), linewidth = 1) +
    ggplot2::scale_color_gradientn(colors = col, breaks = Breaks, name = ggplot2::element_blank()) +
    ggplot2::geom_point(data = Mapping, ggplot2::aes(x = Long, y = Lat), size = 3, shape = 21, fill = 'gray', color = "black") +
    ggplot2::theme(panel.grid=ggplot2::element_blank(), legend.position = "right") + ggplot2::xlab('Longitude') + ggplot2::ylab('Latitude')
  } else{
    Dif_map <- base_map + ggplot2::coord_sf(xlim = c(Long_Min, Long_Max),  ylim = c(Lat_Min, Lat_Max)) +
      ggplot2::geom_segment(data = All_pts_toplot, ggplot2::aes(x = X1, xend = X2, y = X3, yend = X4), color = 'black', linewidth= 1.05) +
      ggplot2::geom_segment(data = All_pts_toplot, ggplot2::aes(x = X1, xend = X2, y = X3, yend = X4, color = Dif), linewidth = 1) +
      ggplot2::scale_color_gradientn(colors = col, breaks = Breaks, name = statistic) +
      ggplot2::geom_point(data = Mapping, ggplot2::aes(x = Long, y = Lat), size = 3, shape = 21, fill = 'gray', color = "black") +
      ggplot2::theme(panel.grid=ggplot2::element_blank(), legend.position = "right") + ggplot2::xlab('Longitude') + ggplot2::ylab('Latitude')
  }

  Output_difmap <- list(All_pts_toplot[,c("Dif","Comp")], Dif_map)
  names(Output_difmap) <- c("Statistic Matrix", "Map")
  return(Output_difmap)
}

Try the PopGenHelpR package in your browser

Any scripts or data that you put into this service are public.

PopGenHelpR documentation built on Sept. 11, 2024, 7:51 p.m.