R/charHack.R

# ####BROKEN!!
# CharHack <- function(inp)
# {
#   qualCharMat <- matrix(c(
#     #Add more pairs of "special characters", "encodings" here:
#     "&","&amp;"
#   ),
#   ncol = 2, byrow=T)
#   for (i in 1:dim(qualCharMat)[1])
#   {
#     inp <- str_replace_all(inp,fixed(qualCharMat[i,1]),qualCharMat[i,2])
#   }
#
#   return(inp)
# }
#
#
# ####################################################
# #Takes care of additional problematic url conversion
# ####################################################
# #seems to be correctly transcoding to UTF-8, but is being interpreted by
# #R and MTurk as Windows-1252: http://www.w3schools.com/tags/ref_urlencode.asp
# # charHack <- function(inp)
# # {
# #   charMat <- matrix(c("%E2%82%AC","%80",#`
# #                       "%E2%80%9A","%82",#‚
# #                       "%C6%92","%83",#ƒ
# #                       "%E2%80%9E","%84",#„
# #                       "%E2%80%A6","%85",#…
# #                       "%E2%80%A0","%86",#†
# #                       "%E2%80%A1","%87",#‡
# #                       "%CB%86","%88",#ˆ
# #                       "%E2%80%B0","%89",#‰
# #                       "%C5%A0","%8A",#Š
# #                       "%E2%80%B9","%8B",#‹
# #                       "%C5%92","%8C",#Œ
# #                       "%C5%8D","%8D",#
# #                       "%C5%BD","%8E",#Ž
# #                       "%C2%90","%90",#
# #                       "%E2%80%98","%91",#‘
# #                       "%E2%80%99","%92",#’
# #                       "%E2%80%9C","%93",#“
# #                       "%E2%80%9D","%94",#”
# #                       "%E2%80%A2","%95",#•
# #                       "%E2%80%93","%96",#–
# #                       "%E2%80%94","%97",#—
# #                       "%CB%9C","%98",#˜
# #                       "%E2%84","%99",#™
# #                       "%C5%A1","%9A",#š
# #                       "%E2%80","%9B",#›
# #                       "%C5%93","%9C",#œ
# #                       "%C5%BE","%9E",#ž
# #                       "%C5%B8","%9F",#Ÿ
# #                       "%C2%A0","%A0",#
# #                       "%C2%A1","%A1",#¡
# #                       "%C2%A2","%A2",#¢
# #                       "%C2%A3","%A3",#£
# #                       "%C2%A4","%A4",#¤
# #                       "%C2%A5","%A5",#¥
# #                       "%C2%A6","%A6",#¦
# #                       "%C2%A7","%A7",#§
# #                       "%C2%A8","%A8",#¨
# #                       "%C2%A9","%A9",#©
# #                       "%C2%AA","%AA",#ª
# #                       "%C2%AB","%AB",#«
# #                       "%C2%AC","%AC",#¬
# #                       "%C2%AD","%AD",#
# #                       "%C2%AE","%AE",#®
# #                       "%C2%AF","%AF",#¯
# #                       "%C2%B0","%B0",#°
# #                       "%C2%B1","%B1",#±
# #                       "%C2%B2","%B2",#²
# #                       "%C2%B3","%B3",#³
# #                       "%C2%B4","%B4",#´
# #                       "%C2%B5","%B5",#µ
# # #                       "%C2%B6","%B6",#¶
# # #                       "%C2%B7","%B7",#·
# # #                       "%C2%B8","%B8",#¸
# # #                       "%C2%B9","%B9",#¹
# # #                       "%C2%BA","%BA",#º
# # #                       "%C2%BB","%BB",#»
# #                       "%C2%BC","%BC",#¼
# #                       "%C2%BD","%BD",#½
# #                       "%C2%BE","%BE",#¾
# # #                       "%C2%BF","%BF",#¿
# # #                       "%C3%80","%C0",#À
# # #                       "%C3%81","%C1",#Á
# # #                       "%C3%82","%C2",#Â
# # #                       "%C3%83","%C3",#Ã
# # #                       "%C3%84","%C4",#Ä
# # #                       "%C3%85","%C5",#Å
# # #                       "%C3%86","%C6",#Æ
# # #                       "%C3%87","%C7",#Ç
# # #                       "%C3%88","%C8",#È
# # #                       "%C3%89","%C9",#É
# # #                       "%C3%8A","%CA",#Ê
# # #                       "%C3%8B","%CB",#Ë
# # #                       "%C3%8C","%CC",#Ì
# # #                       "%C3%8D","%CD",#Í
# # #                       "%C3%8E","%CE",#Î
# # #                       "%C3%8F","%CF",#Ï
# # #                       "%C3%90","%D0",#Ð
# # #                       "%C3%91","%D1",#Ñ
# # #                       "%C3%92","%D2",#Ò
# # #                       "%C3%93","%D3",#Ó
# # #                       "%C3%94","%D4",#Ô
# # #                       "%C3%95","%D5",#Õ
# # #                       "%C3%96","%D6",#Ö
# # #                       "%C3%97","%D7",#×
# # #                       "%C3%98","%D8",#Ø
# # #                       "%C3%99","%D9",#Ù
# # #                       "%C3%9A","%DA",#Ú
# # #                       "%C3%9B","%DB",#Û
# # #                       "%C3%9C","%DC",#Ü
# # #                       "%C3%9D","%DD",#Ý
# # #                       "%C3%9E","%DE",#Þ
# #                       "%C3%9F","%DF",#ß
# # #                       "%C3%A0","%E0",#à
# # #                       "%C3%A1","%E1",#á
# # #                       "%C3%A2","%E2",#â
# # #                       "%C3%A3","%E3",#ã
# # #                       "%C3%A4","%E4",#ä
# # #                       "%C3%A5","%E5",#å
# # #                       "%C3%A6","%E6",#æ
# # #                       "%C3%A7","%E7",#ç
# # #                       "%C3%A8","%E8",#è
# # #                       "%C3%A9","%E9",#é
# # #                       "%C3%AA","%EA",#ê
# # #                       "%C3%AB","%EB",#ë
# # #                       "%C3%AC","%EC",#ì
# # #                       "%C3%AD","%ED",#í
# # #                       "%C3%AE","%EE",#î
# # #                       "%C3%AF","%EF",#ï
# # #                       "%C3%B0","%F0",#ð
# # #                       "%C3%B1","%F1",#ñ
# # #                       "%C3%B2","%F2",#ò
# # #                       "%C3%B3","%F3",#ó
# # #                       "%C3%B4","%F4",#ô
# # #                       "%C3%B5","%F5",#õ
# # #                       "%C3%B6","%F6",#ö
# # #                       "%C3%B7","%F7",#÷
# # #                       "%C3%B8","%F8",#ø
# # #                       "%C3%B9","%F9",#ù
# # #                       "%C3%BA","%FA",#ú
# # #                       "%C3%BB","%FB",#û
# # #                       "%C3%BC","%FC",#ü
# # #                       "%C3%BD","%FD",#ý
# # #                       "%C3%BE","%FE",#þ
# # #                       "%C3%BF","%FF",#ÿ
# #                       "%CE%BA", "kappa",  #kappa
# #                       "%E2%89%A5", "%3E%3D", #greater equal
# #                       "%E2%89%A4", "%26lt%3B%3D", #lesser equal
# #                       "%E2%80%8A", "%20", #weird space?
# #                       "%E2%80%89", "%20", #weird space?
# #                       "%CE%B1", "alpha",  #alpha
# #                       "%CF%81", "rho"), #rho
# #                     ncol = 2, byrow=T)
# #
# #   for (i in 1:dim(charMat)[1])
# #   {
# #     inp <- str_replace_all(inp,fixed(charMat[i,1]),charMat[i,2])
# #   }
# #
# #   return(inp)
# #
# # }
#
# #################
# ### New CharHack
# #####################
# #From table: http://www.idautomation.com/product-support/ascii-chart-char-set.html
# # charMat <- matrix(c(
# #   #"","&nbsp;",
# #   "\"","&quot;",
# #   "&","&amp;",
# #   "<","&lt;",
# #   ">","&gt;",
# #   #"","&#127;",
# #   "€","&#128;",
# #   #"","&#129;",
# #   "‚","&#130;",
# #   "ƒ","&#131;",
# #   "„","&#132;",
# #   "…","&#133;",
# #   "†","&#134;",
# #   "‡","&#135;",
# #   "ˆ","&#136;",
# #   "‰","&#137;",
# #   "Š","&#138;",
# #   "‹","&#139;",
# #   "Œ","&#140;",
# #   #"","&#141;",
# #   "Ž","&#142;",
# #   #"","&#143;",
# #   #"","&#144;",
# #   "‘","&#145;",
# #   "’","&#146;",
# #   "“","&#147;",
# #   "”","&#148;",
# #   "•","&#149;",
# #   "–","&#150;",
# #   "—","&#151;",
# #   "˜","&#152;",
# #   "™","&#153;",
# #   "š","&#154;",
# #   "›","&#155;",
# #   "œ","&#156;",
# #   #"","&#157;",
# #   "ž","&#158;",
# #   "Ÿ","&#159;",
# #   #" ","&#160;",
# #   "¡","&#161;",
# #   "¢","&#162;",
# #   "£","&#163;",
# #   "¤","&#164;",
# #   "¥","&#165;",
# #   "¦","&#166;",
# #   "§","&#167;",
# #   "¨","&#168;",
# #   "©","&#169;",
# #   "ª","&#170;",
# #   "«","&#171;",
# #   "¬","&#172;",
# #   #"","&#173;",
# #   "®","&#174;",
# #   "¯","&#175;",
# #   "°","&#176;",
# #   "±","&#177;",
# #   "²","&#178;",
# #   "³","&#179;",
# #   "´","&#180;",
# #   "µ","&#181;",
# #   "¶","&#182;",
# #   "·","&#183;",
# #   "¸","&#184;",
# #   "¹","&#185;",
# #   "º","&#186;",
# #   "»","&#187;",
# #   "¼","&#188;",
# #   "½","&#189;",
# #   "¾","&#190;",
# #   "¿","&#191;",
# #   "À","&#192;",
# #   "Á","&#193;",
# #   "Â","&#194;",
# #   "Ã","&#195;",
# #   "Ä","&#196;",
# #   "Å","&#197;",
# #   "Æ","&#198;",
# #   "Ç","&#199;",
# #   "È","&#200;",
# #   "É","&#201;",
# #   "Ê","&#202;",
# #   "Ë","&#203;",
# #   "Ì","&#204;",
# #   "Í","&#205;",
# #   "Î","&#206;",
# #   "Ï","&#207;",
# #   "Ð","&#208;",
# #   "Ñ","&#209;",
# #   "Ò","&#210;",
# #   "Ó","&#211;",
# #   "Ô","&#212;",
# #   "Õ","&#213;",
# #   "Ö","&#214;",
# #   "×","&#215;",
# #   "Ø","&#216;",
# #   "Ù","&#217;",
# #   "Ú","&#218;",
# #   "Û","&#219;",
# #   "Ü","&#220;",
# #   "Ý","&#221;",
# #   "Þ","&#222;",
# #   "ß","&#223;",
# #   "à","&#224;",
# #   "á","&#225;",
# #   "â","&#226;",
# #   "ã","&#227;",
# #   "ä","&#228;",
# #   "å","&#229;",
# #   "æ","&#230;",
# #   "ç","&#231;",
# #   "è","&#232;",
# #   "é","&#233;",
# #   "ê","&#234;",
# #   "ë","&#235;",
# #   "ì","&#236;",
# #   "í","&#237;",
# #   "î","&#238;",
# #   "ï","&#239;",
# #   "ð","&#240;",
# #   "ñ","&#241;",
# #   "ò","&#242;",
# #   "ó","&#243;",
# #   "ô","&#244;",
# #   "õ","&#245;",
# #   "ö","&#246;",
# #   "÷","&#247;",
# #   "ø","&#248;",
# #   "ù","&#249;",
# #   "ú","&#250;",
# #   "û","&#251;",
# #   "ü","&#252;",
# #   "ý","&#253;",
# #   "þ","&#254;",
# #   "ÿ","&#255;"),
# #   ncol = 2, byrow=T)
# #
# # charHack <- function(inp,charMat)
# # {
# #   for (i in 1:dim(charMat)[1])
# #   {
# #     inp <- str_replace_all(inp,fixed(charMat[i,1]),charMat[i,2])
# #   }
# #
# #   return(inp)
# #
# # }
andrewbrownphd/MetaTurkR documentation built on Nov. 23, 2019, 4:17 p.m.