R/to_suffix.r

to_suffix <- function(incomingstring) {

  strip <- c( "location [:digit:]*",
              "play area",
              "industrial area",
              "residential area",
              "islamic center",
              "slaughter house",
              "dispensary",
              "community graveyard",
              "hostels",
              "industrial area",
              "country club",
              "number three dam",
              "number one dam",
              "grazing area",
              "district road",
              "ranching estate",
              "housing estate",
              "tea estate",
              "fruit and vegetable farm",
              "national farm",
              "experimental farm",
              "agriculture experimental farm",
              "demonstration farm",
              "pontoon bridge",
              "detention camp",
              "fishing camp",
              "boat channel",
              "foot hill",
              "Number Two Dam",
              "Number Two Dam",
              "forest guard post",
              "Mental Hospital",
              "Game Reserve",
              "National Reserve",
              "Land Unit",
              "Grazing Scheme",
              "Forest Guard Post",
              "sub-surface dam",
              "railway landies",
              "Settlement Area",
              "headwaters",
              "NORTHERN GRAZING AREA",
              "dairy farm",
              "railway station",
              "police post",
              "south reserve",
              "forest area",
              "settled area",
              "timber yard",
              "coffee estate",
              "mission dispenssary",
              "golf club",
              "sports club",
              "river area",
              "agricultureal department seed farm",
              "forest station",
              "escarp forest",
              "forest area",
              "kenya meat commission",
              "timber mill",
              "police post",
              "government road",
              "squatter",
              "police station",
              "labour line",
              "escarpment forest reserve",
              "escarpment forest",
              "european primary school",
              "power station",
              "royal national park",
              "nyeri",
              "h.g post",
              "hg post",
              "home guard post",
              "fort hall",
              "s.n.r.",
              "ngobit",
              "nanyuki",
              "mweiga",
              "embu",
              "murungaini",
              "\\, nakuru",
              ", makuyu",
              "rv.",
              "loc",
              "south nyeri reserve",
              "forest nursery","pass","peak","plateau","house","lagh",
              "sub-location",
              "hills",
              "hill",
              "location",
              "dam",
              "settlement",
              "estates",
              "estate",
              "forest reserve",
              "forest",
              "rocks",
              "ranch",
              "passage",
              "group ranch",
              "river",
              "ranch",
              "nursery",
              "island",
              "farm",
              "road",
              "station",
              "ridge",
              "stream",
              "summit",
              "channel",
              "reserve",
              "lake",
              "bay",
              "point",
              "swamp",
              "school",

              "plains",
              "plain",
              "range",
              "spring",

              "valley",
              "district",
              "escarpment",

              "sub-section",
              "section",
              "area",
              "landing strip",
              "airstrip",
              "aerodrome",
              "airfield",
              "research station",
              "camp",
              "region",
              #"falls",
              "creek",
              "bridge",
              "tea factory",
              "factory",
              "village",
              "mission",
              "market",
              "mountain",
              "canning factory",
              "trading company",
              "pump",
              "settlement",
              "airfield",
              "labour camp"   ,
              "labour camps"  ,
              "commission",
              "depot",
              "flats",
              "foothills",
              "inn",
              "peak",
              "parklands",
              "office",
              "rabate",
              "racecourse",
              "roadhouse"  ,
              "routes" ,
              "section",
              "settled" ,
              "ditch",
              "temple",
              "crater",
              "lake",
              "woods",
              "yard" ,
              "church",
              "dispenssary",
              "park"        ,
              "lands",
              "ppla",
              "pplu",
              "pplx",
              "store",
              "town"  ,
              "club"   ,
              "hospital"  ,
              "plot",
              "ravine",
              "ridge"  ,
              "rivers"  ,
              "sawmill"  ,
              "track",
              "labour lines" ,
              "ltd"    ,
              "swamp",
              "uplands",
              "hotel"    ,
              "border"    ,
              "escarpment",
              "estates" ,
              "street",
              "bridge" ,
              "post"  ,
              "dykes"  ,
              "house"  ,
              "camp",
              "mission",
              "village",
              "sawmills",
              "(ppl)" ,
              "district",
              "market"   ,
              "police station",
              "division",
              "school",
              "township",
              "valley"   ,
              #"falls"     ,
              "range",
              "forest reserve",
              "reserve",
              "road"   ,
              "river",
              "sub-location",
              "area",
              "ppl"  ,
              "location",
              #"hall" , that's just fort hall
              "estate",
              "forest" ,
              "farm",
              'hill','hll','market','mkt','post','river','AIRFIELD CAMP','fall','lake','swamp','school','district','interschool','forest','estate','farm','dairy',
              'area','escarpment','ditch','road','bridge','railway','airfield','village','vill',
              'township','town','mill','sawmill', 'border','mount','reserve','mission','house', 'hut','hospital','building','valley','rivr','river','street','police station','salvation army school',
              'ridge','detention camp','est','inn','location','city','ridge','near','national park','station',
              'welfare centre','flat','inter.school','church','foothill','native','settled','prison')

  strip <- unique(strip)

  df <- data.frame(incomingstring, stringsAsFactors = F)
  df$incomingstring_nosuffix <- df$incomingstring
  df$incomingstring_suffix <- NA

  #df <- subset(df, !duplicated(incomingstring))


  for(q in strip){
    print(q)
    df$incomingstring_nosuffix <- str_trim(tolower(df$incomingstring_nosuffix))
    df$incomingstring_nosuffix  <- gsub("\\s+"," ", df$incomingstring_nosuffix  )

    condition <- grepl(paste0(" ",q,"$"), df$incomingstring_nosuffix , ignore.case = T); print(table(condition))
    df$incomingstring_suffix[condition] <- q

    df$incomingstring_nosuffix[condition] <- gsub(paste0(" ",q,"$"),"", df$incomingstring_nosuffix[condition], ignore.case = T)


  }

  return(df)
}
rexdouglass/MeasuringLandscape documentation built on May 13, 2019, 6:16 p.m.