match_seq <- function(a,b, min){
y <- lapply(b, '==', a)
cond <- lapply(seq_along(y), function(x){
y[[x]][x:(length(a) - length(b) + x)]
})
cond <- Reduce('+', cond)
res <- which(cond == length(b))
res[res > min]
}
diff_text <- function(a, b) {
# Voir si besoin de unlist
cond <- lapply(tokenize_words(b), "==", tokenize_words(a)) %>%
unlist
if(any(cond)) {
diff_lcs(a, b)
} else {
a <- tokenize_words(a)
b <- tokenize_words(b)
diff <- data.frame(
a = seq_along(a),
b = rep(NA, length(a)),
mot = a,
status = rep("-", length(a))
)
data.frame(
a = rep(NA, length(b)),
b = seq_along(b),
mot = b,
status = rep("+", length(b))
) %>% rbind(diff)
}
}
diff_lcs <- function(a, b) {
align <- align_local(a, b, progress = FALSE)
a <- tokenize_words(a)
b <- tokenize_words(b)
lcs <- sapply(align[1:2], str_split, " ") %>%
sapply(str_to_lower) %>%
matrix(ncol = 2, byrow = F) %>%
as.data.frame(stringsAsFactors = FALSE)
names(lcs) <- c("a_edits", "b_edits")
# Groupage de lcs par sous-sequences
lcs$group <- cumsum((substr(lcs[, 1], 1, 1) == "#") | (substr(lcs[, 2], 1, 1) == "#"))
tmp <- rle(lcs$group)
tmp$values <- tmp$length > 1
tmp$values[tmp$values] <- cumsum(tmp$values[tmp$values])
lcs$group <- inverse.rle(tmp)
lcs <- lcs[(substr(lcs[, 1], 1, 1) != "#") & (substr(lcs[, 2], 1, 1) != "#"), ]
min <- 0
lcsA <- foreach(i = unique(lcs$group)) %do% {
text <- lcs$a_edits[lcs$group == i]
m <- match_seq(a, text, min) %>% min
res <- seq(m, m + length(text) - 1, by = 1)
min <- max(res)
res
} %>% unlist
min <- 0
lcsB <- foreach(i = unique(lcs$group)) %do% {
text <- lcs$a_edits[lcs$group == i]
m <- match_seq(b, text, min) %>% min
res <- seq(m, m + length(text) - 1, by = 1)
min <- max(res)
res
} %>% unlist
diff <- data.frame(
a = lcsA,
b = lcsB,
mot = b[lcsB],
status = rep("=", length(lcsA)),
stringsAsFactors = FALSE
)
a_seq <- seq_along(a)[!seq_along(a) %in% lcsA]
b_seq <- seq_along(b)[!seq_along(b) %in% lcsB]
diff <- data.frame(
a = a_seq,
b = rep(NA, length(a_seq)),
mot = a[a_seq],
status = rep("-", length(a_seq)),
stringsAsFactors = FALSE
) %>% rbind(diff)
diff <- data.frame(
a = rep(NA, length(b_seq)),
b = b_seq,
mot = b[b_seq],
status = rep("+", length(b_seq)),
stringsAsFactors = FALSE
) %>% rbind(diff)
diff
}
set_id <- function(tbl_version) {
tbl_version$id <- NA
tbl_version$id[tbl_version$step == 1] <- seq(1, (tbl_version$step == 1) %>% cumsum %>% max, by = 1)
tbl_past <- filter(tbl_version, tbl_version$step == 1)
start_id <- max(tbl_version$id, na.rm = TRUE)
tbl_version_id <- foreach(i = unique(tbl_version$step)[-1], .combine= rbind) %do% {
tbl_now <- filter(tbl_version, step == i)
tbl_now$id <- tbl_past$id[match(tbl_now$a, tbl_past$b)]
if(anyNA(tbl_now$a)) {
tbl_now[is.na(tbl_now$a), "id"] <- seq(1, cumsum(is.na(tbl_now$a)) %>% max, by = 1) + start_id + 1
}
tbl_past <- tbl_now
max_id <- max(tbl_past$id, na.rm = TRUE)
start_id <- ifelse(max_id > start_id, max_id, start_id)
tbl_now
}
rbind(
tbl_version[tbl_version$step == 1, ],
tbl_version_id
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.