##################################################################################
#This function creates a dataframe with HBDsegments used for plottting graphs #
#by segments #
# #
#!!! Submaps : the list of objects #
#!!! HBD_recap : a dataframe with HBD probabilities #
#!!! n.consecutive.markers : the number of consecutive HBD probabilities > #
# threshold #
# #
#*** return a dataframe with HBDsegments #
##################################################################################
HBDSegmentsBySegments <- function(submaps, HBD_recap, n.consecutive.markers, threshold)
{
l <- list()#liste des individus
un.ids <- rownames(HBD_recap) # famid:id
status <- submaps@bedmatrix@ped$pheno[ match( un.ids, uniqueIds(submaps@bedmatrix@ped$famid, submaps@bedmatrix@ped$id) ) ]
individuals_name <- get.id(un.ids)
family_id <- get.famid(un.ids)
# ### fin modifs ####
#find the name of the marker to find the index of the segments in the chromosome
marker_names <- submaps@submaps_list[[1]]@map$id
#to which chromosomes these markers correspond
correspondance <- match(marker_names, submaps@bedmatrix@snps$id)
chromosome <- submaps@bedmatrix@snps$chr[correspondance]
#find the positions and distances of all the segmetn
segmentSummary <- segmentsListSummary(submaps@segments_list)
shift <- cumsum(segmentSummary$number_of_segments)
shift <- append(0, shift)#0 because I add one after
max <- shift[length(shift)]
start_dist <- numeric(max)
end_dist <- numeric(max)
start_pos <- numeric(max)
end_pos <- numeric(max)
for(i in seq_along(submaps@segments_list))
{
segment_chr <- submaps@segments_list[[i]]#get the list of segments in the chromosome i
start <- numeric(length(segment_chr)) #a vector with the number of segments for the chr i
end <- numeric(length(segment_chr)) #a vector with the number of segments for the chr i
for(j in seq_along(segment_chr)) #loop over the segments
{
if(length(segment_chr[[j]])== 0) next()
start[j] <- segment_chr[[ j ]][ 1 ] # first marker of the segment j
end[j] <- segment_chr[[ j ]][ length( segment_chr[[ j ]] ) ] # last marker of the segment j
}
#segment start
start_dist[(shift[i]+1):shift[i+1]] <- submaps@bedmatrix@snps$dist[ start ]
#segment end -> last marker
end_dist[(shift[i]+1):shift[i+1]] <- submaps@bedmatrix@snps$dist[ end ]
#segment start
start_pos[(shift[i]+1):shift[i+1]] <- submaps@bedmatrix@snps$pos[ start ]
#segment end-> last marker
end_pos[(shift[i]+1):shift[i+1]] <- submaps@bedmatrix@snps$pos[ end ]
}
min_segment_size = n.consecutive.markers
#create the dataframe for every individual
for(i in seq_len(nrow(HBD_recap)))
{
data<-c(HBD_recap[i,])
test<- as.vector(data >= threshold)#test
all_segments<-rle(test)
good_segments<-as.numeric( which( all_segments$length >= min_segment_size & all_segments$value )-1 )
if(length(good_segments) == 0)
next()
if(good_segments[1] == 0)
good_segments[1] <- 1
good_segments_length <-as.numeric( all_segments$length[ good_segments+1 ] )
good_segments_start<- as.numeric( cumsum(all_segments$lengths)[ good_segments ] +1)
good_segments_end<-as.numeric( good_segments_start+good_segments_length -1)
segment_dataframe<-data.frame(id = rep(individuals_name[i], length(good_segments_start)),
famid = rep(family_id[i], length(good_segments_start)),
pheno = rep(status[i], length(good_segments_start)),
start = good_segments_start,
end = good_segments_end,
size = good_segments_length,
chromosome = chromosome[good_segments_start],
start_pos = start_pos[good_segments_start],
end_pos = end_pos[good_segments_end],
start_dist = start_dist[good_segments_start],
end_dist = end_dist[good_segments_end])
#treating the case when segments overlaps two different chromosomes
overlap <- which(segment_dataframe$start_dist > segment_dataframe$end_dist)
#treating the case when segments overlaps two different chromosomes
if(length(overlap) != 0)
{
segment_dataframe <- segment_dataframe[-overlap,,drop=F]
}
l[[i]] <- segment_dataframe
}
return(l)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.