R/hdnaO.R

Defines functions hdnaO hdnaO_main hdnaO_base

Documented in hdnaO

hdnaO_base <- function(a){
  # replace spaces with nothing
  aSpace <- gsub("-", "", a)
  a <- aSpace

  # main strand

  a25 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\25\\24\\23\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a24 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\24\\23\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a23 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\23\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a22 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a21 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a20 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a19 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a18 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a17 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a16 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a15 <- "([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])([AGR])\\w{0,500}\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a45 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\25\\24\\23\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a44 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\24\\23\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a43 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\23\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a42 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\22\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a41 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\21\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a40 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\20\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a39 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a38 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\18\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a37 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\17\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a36 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\16\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"
  a35 <- "([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])([CTY])\\w{0,500}\\15\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1"

  #a35
  a35b <- gregexpr(a35, a, ignore.case = TRUE, perl = T)
  sequence_position <- a35b[[1]][1:length(a35b[[1]])]
  if(a35b[[1]][1] == -1){
    resultClean35 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_35 <- regmatches(a, a35b)
    sequence <- a8_35[[1]][1:length(a8_35[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_35 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_35[,1]) == 0){
      resultClean35 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean35 <- resultClean4_35
    }
  }

  #a36
  a36b <- gregexpr(a36, a, ignore.case = TRUE, perl = T)
  sequence_position <- a36b[[1]][1:length(a36b[[1]])]
  if(a36b[[1]][1] == -1){
    resultClean36 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_36 <- regmatches(a, a36b)
    sequence <- a8_36[[1]][1:length(a8_36[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_36 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_36[,1]) == 0){
      resultClean36 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean36 <- resultClean4_36
    }
  }

  #a37
  a37b <- gregexpr(a37, a, ignore.case = TRUE, perl = T)
  sequence_position <- a37b[[1]][1:length(a37b[[1]])]
  if(a37b[[1]][1] == -1){
    resultClean37 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_37 <- regmatches(a, a37b)
    sequence <- a8_37[[1]][1:length(a8_37[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_37 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_37[,1]) == 0){
      resultClean37 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean37 <- resultClean4_37
    }
  }

  #a38
  a38b <- gregexpr(a38, a, ignore.case = TRUE, perl = T)
  sequence_position <- a38b[[1]][1:length(a38b[[1]])]
  if(a38b[[1]][1] == -1){
    resultClean38 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_38 <- regmatches(a, a38b)
    sequence <- a8_38[[1]][1:length(a8_38[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_38 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_38[,1]) == 0){
      resultClean38 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean38 <- resultClean4_38
    }
  }

  #a39
  a39b <- gregexpr(a39, a, ignore.case = TRUE, perl = T)
  sequence_position <- a39b[[1]][1:length(a39b[[1]])]
  if(a39b[[1]][1] == -1){
    resultClean39 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_39 <- regmatches(a, a39b)
    sequence <- a8_39[[1]][1:length(a8_39[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_39 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_39[,1]) == 0){
      resultClean39 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean39 <- resultClean4_39
    }
  }

  #a40
  a40b <- gregexpr(a40, a, ignore.case = TRUE, perl = T)
  sequence_position <- a40b[[1]][1:length(a40b[[1]])]
  if(a40b[[1]][1] == -1){
    resultClean40 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_40 <- regmatches(a, a40b)
    sequence <- a8_40[[1]][1:length(a8_40[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_40 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_40[,1]) == 0){
      resultClean40 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean40 <- resultClean4_40
    }
  }

  #a41
  a41b <- gregexpr(a41, a, ignore.case = TRUE, perl = T)
  sequence_position <- a41b[[1]][1:length(a41b[[1]])]
  if(a41b[[1]][1] == -1){
    resultClean41 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_41 <- regmatches(a, a41b)
    sequence <- a8_41[[1]][1:length(a8_41[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_41 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_41[,1]) == 0){
      resultClean41 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean41 <- resultClean4_41
    }
  }

  #a42
  a42b <- gregexpr(a42, a, ignore.case = TRUE, perl = T)
  sequence_position <- a42b[[1]][1:length(a42b[[1]])]
  if(a42b[[1]][1] == -1){
    resultClean42 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_42 <- regmatches(a, a42b)
    sequence <- a8_42[[1]][1:length(a8_42[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_42 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_42[,1]) == 0){
      resultClean42 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean42 <- resultClean4_42
    }
  }

  #a43
  a43b <- gregexpr(a43, a, ignore.case = TRUE, perl = T)
  sequence_position <- a43b[[1]][1:length(a43b[[1]])]
  if(a43b[[1]][1] == -1){
    resultClean43 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_43 <- regmatches(a, a43b)
    sequence <- a8_43[[1]][1:length(a8_43[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_43 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_43[,1]) == 0){
      resultClean43 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean43 <- resultClean4_43
    }
  }

  #a44
  a44b <- gregexpr(a44, a, ignore.case = TRUE, perl = T)
  sequence_position <- a44b[[1]][1:length(a44b[[1]])]
  if(a44b[[1]][1] == -1){
    resultClean44 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_44 <- regmatches(a, a44b)
    sequence <- a8_44[[1]][1:length(a8_44[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_44 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_44[,1]) == 0){
      resultClean44 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean44 <- resultClean4_44
    }
  }

  #a45
  a45b <- gregexpr(a45, a, ignore.case = TRUE, perl = T)
  sequence_position <- a45b[[1]][1:length(a45b[[1]])]
  if(a45b[[1]][1] == -1){
    resultClean45 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_45 <- regmatches(a, a45b)
    sequence <- a8_45[[1]][1:length(a8_45[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_45 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_45[,1]) == 0){
      resultClean45 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean45 <- resultClean4_45
    }
  }

  #a15
  a15b <- gregexpr(a15, a, ignore.case = TRUE, perl = T)
  sequence_position <- a15b[[1]][1:length(a15b[[1]])]
  if(a15b[[1]][1] == -1){
    resultClean15 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_15 <- regmatches(a, a15b)
    sequence <- a8_15[[1]][1:length(a8_15[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_15 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_15[,1]) == 0){
      resultClean15 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean15 <- resultClean4_15
    }
  }

  #a16
  a16b <- gregexpr(a16, a, ignore.case = TRUE, perl = T)
  sequence_position <- a16b[[1]][1:length(a16b[[1]])]
  if(a16b[[1]][1] == -1){
    resultClean16 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_16 <- regmatches(a, a16b)
    sequence <- a8_16[[1]][1:length(a8_16[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_16 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_16[,1]) == 0){
      resultClean16 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean16 <- resultClean4_16
    }
  }

  #a17
  a17b <- gregexpr(a17, a, ignore.case = TRUE, perl = T)
  sequence_position <- a17b[[1]][1:length(a17b[[1]])]
  if(a17b[[1]][1] == -1){
    resultClean17 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_17 <- regmatches(a, a17b)
    sequence <- a8_17[[1]][1:length(a8_17[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_17 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_17[,1]) == 0){
      resultClean17 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean17 <- resultClean4_17
    }
  }

  #a18
  a18b <- gregexpr(a18, a, ignore.case = TRUE, perl = T)
  sequence_position <- a18b[[1]][1:length(a18b[[1]])]
  if(a18b[[1]][1] == -1){
    resultClean18 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_18 <- regmatches(a, a18b)
    sequence <- a8_18[[1]][1:length(a8_18[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_18 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_18[,1]) == 0){
      resultClean18 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean18 <- resultClean4_18
    }
  }

  #a19
  a19b <- gregexpr(a19, a, ignore.case = TRUE, perl = T)
  sequence_position <- a19b[[1]][1:length(a19b[[1]])]
  if(a19b[[1]][1] == -1){
    resultClean19 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_19 <- regmatches(a, a19b)
    sequence <- a8_19[[1]][1:length(a8_19[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_19 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_19[,1]) == 0){
      resultClean19 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean19 <- resultClean4_19
    }
  }

  #a20
  a20b <- gregexpr(a20, a, ignore.case = TRUE, perl = T)
  sequence_position <- a20b[[1]][1:length(a20b[[1]])]
  if(a20b[[1]][1] == -1){
    resultClean20 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_20 <- regmatches(a, a20b)
    sequence <- a8_20[[1]][1:length(a8_20[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_20 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_20[,1]) == 0){
      resultClean20 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean20 <- resultClean4_20
    }
  }

  #a21
  a21b <- gregexpr(a21, a, ignore.case = TRUE, perl = T)
  sequence_position <- a21b[[1]][1:length(a21b[[1]])]
  if(a21b[[1]][1] == -1){
    resultClean21 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_21 <- regmatches(a, a21b)
    sequence <- a8_21[[1]][1:length(a8_21[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_21 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_21[,1]) == 0){
      resultClean21 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean21 <- resultClean4_21
    }
  }

  #a22
  a22b <- gregexpr(a22, a, ignore.case = TRUE, perl = T)
  sequence_position <- a22b[[1]][1:length(a22b[[1]])]
  if(a22b[[1]][1] == -1){
    resultClean22 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_22 <- regmatches(a, a22b)
    sequence <- a8_22[[1]][1:length(a8_22[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_22 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_22[,1]) == 0){
      resultClean22 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean22 <- resultClean4_22
    }
  }

  #a23
  a23b <- gregexpr(a23, a, ignore.case = TRUE, perl = T)
  sequence_position <- a23b[[1]][1:length(a23b[[1]])]
  if(a23b[[1]][1] == -1){
    resultClean23 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_23 <- regmatches(a, a23b)
    sequence <- a8_23[[1]][1:length(a8_23[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_23 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_23[,1]) == 0){
      resultClean23 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean23 <- resultClean4_23
    }
  }

  #a24
  a24b <- gregexpr(a24, a, ignore.case = TRUE, perl = T)
  sequence_position <- a24b[[1]][1:length(a24b[[1]])]
  if(a24b[[1]][1] == -1){
    resultClean24 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_24 <- regmatches(a, a24b)
    sequence <- a8_24[[1]][1:length(a8_24[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_24 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_24[,1]) == 0){
      resultClean24 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean24 <- resultClean4_24
    }
  }

  #a25
  a25b <- gregexpr(a25, a, ignore.case = TRUE, perl = T)
  sequence_position <- a25b[[1]][1:length(a25b[[1]])]
  if(a25b[[1]][1] == -1){
    resultClean25 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }else{
    a8_25 <- regmatches(a, a25b)
    sequence <- a8_25[[1]][1:length(a8_25[[1]])]
    sequence_length <- nchar(sequence)
    resultClean4_25 <- data.frame(cbind(sequence_position, sequence, sequence_length))
    if(length(resultClean4_25[,1]) == 0){
      resultClean25 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
    }else{
      resultClean25 <- resultClean4_25
    }
  }

  resultCleanR <- rbind(resultClean25, resultClean24, resultClean23, resultClean22, resultClean21, resultClean20, resultClean19, resultClean18, resultClean17, resultClean16, resultClean15, resultClean35, resultClean36, resultClean37, resultClean38, resultClean39, resultClean40, resultClean41, resultClean42, resultClean43, resultClean44, resultClean45)
  resultCleanU <- unique(resultCleanR)
  if(nrow(resultCleanU) > 1){
    resultClean4 <- resultCleanU[!(resultCleanU$sequence == "-"),]
  }else{
    resultClean4 <- resultCleanU
  }
  if(nrow(resultClean4[!(resultClean4$sequence == "-"),]) > 0){
    resultClean5 <- resultClean4[order(as.integer(as.character(resultClean4[,1])), -as.integer(as.character(resultClean4[,3]))),]
    rownames(resultClean5) <- 1:nrow(resultClean5)
  }else{
    resultClean5 <- data.frame("sequence_position" = "-", "sequence" = "-", "sequence_length" = "-")
  }

  return(resultClean5)
}



hdnaO_main <- function(b){
  if(length(b) == 1){
    #remove newlines
    b <- gsub("[\r\n]", "", b)
    b <- gsub(" ", "", b)
    # exit if unacceptable characters exist
    if(grepl("[^acgtryswkmbdhvnACGTRYSWKMBDHVN-]", b) == "TRUE"){
      b1 <- data.frame("sequence_position" = "!", "sequence" = "Error: Non-DNA character(s) in input", "sequence_length" = "!")
      return(b1)
    }else{
      # else continue
      b1 <- hdnaO_base(b)
      return(b1)
    }

  }else{
    # exit if unacceptable characters exist
    input_pos = 0
    q <- data.frame("input_ID" = integer(0), "sequence_position" = character(0), "sequence" = character(0), "sequence_length" = character(0))
    for(i in b){
      #remove newlines
      b <- gsub("[\r\n]", "", i)
      b <- gsub(" ", "", i)
      # exit if unacceptable characters exist
      if(grepl("[^acgtryswkmbdhvnACGTRYSWKMBDHVN-]", i) == "TRUE"){
        b1 <- data.frame("sequence_position" = "!", "sequence" = "Error: Non-DNA character(s) in input", "sequence_length" = "!")
      }else{
        b1 <- hdnaO_base(i)
      }
      input_pos = input_pos + 1
      b2 <- cbind(input_ID = input_pos, b1)
      b2[,c(2,4)] <- sapply(b2[,c(2,4)],as.character)
      q <- rbind(q, b2)
    }
    return(q)
  }
}


#' Predicting intramolecular triplexes (H-DNA) including overlaps
#'
#' This function predicts H-DNA
#' in 'x' DNA sequence like the hdna function, but includes overlaps.
#' DNA sequence can be provided in raw or fasta format or as GenBank accession number(s).
#' Internet is needed to connect to GenBank database, if accession number(s) is given as argument.
#'
#' @param x DNA sequence(s) in raw format or a fasta file or a GenBank accession number(s); from which H-DNA (including overlaps) will be predicted.
#'  If the fasta file name does not contain an absolute path, the file name is relative to the current working directory.
#' @param xformat a character string specifying the format of x : default (raw), fasta, GenBank (GenBank accession number(s)).
#' @return A dataframe of H-DNA' position, sequence and length. If more than one DNA sequence is provided as argument, an input ID is returned for motif(s) predicted from each input sequence.
#' @author Hannah O. Ajoge
#' @details
#' This function predicts H-DNA in DNA sequences, including overlaps and provide the position, sequence and length of the predicted motif(s), if any.
#' @export
#' @importFrom ape read.GenBank
#' @importFrom seqinr read.fasta
#' @importFrom seqinr getSequence
#' @references Paper on gquad and the web application (Non-B DNA Predictor) is under review, see draft in vignettes
#' @seealso hdna
#' @examples
#' ## Predicting H-DNA (including overlaps) from raw DNA sequences
#' E1 <- c("TCTTCCCCCCTTTTTYYYYYGCTYYYYYTTTTTCCCCCCGAAT", "taggtgctgggaggtagagacaggatatcct")
#' hdnaO(E1)
#'
#' ## Predicting H-DNA (including overlaps) from DNA sequences in fasta file
#' ## Not run: hdnaO(x="Example.fasta", xformat = "fasta")
#'
#' ## Predicting H-DNA (including overlaps) from DNA sequences,
#' ## using GenBank accession numbers.
#' ## Internet connectivity is needed for this to work.
#' ## Not run: hdnaO(c("BH114913", "AY611035"), xformat = "GenBank")



hdnaO <- function(x, xformat = "default"){
  if(xformat == "default"){
    x1 <- hdnaO_main(x)
    return(x1)
  }

  if(xformat == "GenBank"){
    x2 <- read.GenBank(x, as.character = TRUE)
    x3 <- sapply(x2, paste, collapse="")
    x4 <- hdnaO_main(x3)
    return(x4)
  }

  if(xformat == "fasta"){
    x5 <-read.fasta(x)
    x6 <- getSequence(x5, as.string = TRUE)
    x7 <- unlist(x6)
    x8 <- hdnaO_main(x7)
    return(x8)
  }else{
    stop("Unacceptable option for argument 'xformat'")
  }

}

Try the gquad package in your browser

Any scripts or data that you put into this service are public.

gquad documentation built on Nov. 29, 2022, 9:05 a.m.