diff --git a/.gitignore b/.gitignore
index ba74660..d0f89f2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,3 +55,7 @@ docs/_build/
 
 # PyBuilder
 target/
+
+# MacOS
+
+.DS_Store
diff --git a/CHANGES.txt b/CHANGES.txt
index 0548c48..c312a72 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,15 @@
 Changelog
 **********
 
+2.0 (2020-04-18)
+----------------
+
+- Major refactoring & cleanup (e.g. #16)
+- Optimizations
+- new APIs
+- Python3 only (#46)
+- Better Unicode matching (#45)
+
 1.3 (9.9. 2015)
 ----------------
 
diff --git a/README.md b/README.md
index 1f5c90d..e805011 100644
--- a/README.md
+++ b/README.md
@@ -17,36 +17,39 @@ countries.
 ## How do I install it?
 Just use 'pip install cleanco' if you have pip installed (as most systems do). Or download the zip distribution from this site, unzip it and then:
 
-* Mac: `cd` into it, and enter `sudo python setup.py install` along with your system password.
-* Windows: Same thing but without `sudo`.
+* Mac: `cd` into it, and enter `sudo python3 setup.py install` along with your system password.
+* Windows: `python setup.py install`.
 
 ## How does it work?
-Let's look at some sample code.  First, create an instance of the module:
+If you only want a clean version of the company name, first pull in the terms:
 
-    >>> from cleanco import cleanco
+    >>> terms = get_terms()
 
-Prepare a string of a company name that you want to process:
+Then, run the string and the terms through the "basename" function:
 
-    >>> business_name = "Some Big Pharma, LLC"
+    >>> basename("Daddy & Sons, Ltd.", terms)
+    Daddy & Sons
 
-Throw it into the instance:
+If you want to classify the name by business entity type, first select it as a source:
 
-    >>> x = cleanco(business_name)
+    >>> classification_sources = typesources()
 
-You can now get the company types:
+Then, run the string and classication source through the "matches" function:
 
-    >>> x.type()
-    ['Limited Liability Company']
+    >>> matches("MyCompany Ltd", classification_sources)
+    ['Limited']
 
-...the possible countries...
+If you want to classify the name by possible countries, first select it as a source:
 
-    >>> x.country()
-    ['United States of America', 'Philippines']
+    >>> classification_sources = countrysources()
+
+Then, run the string and classication source through the "matches" function:
 
-...and a clean version of the company name.
+    >>> matches("MyCompany Ltd", classification_sources)
+    ['United States of America', 'Philippines']
 
-    >>> x.clean_name()
-    'Some Big Pharma'
+## Compatibility with previous versions
+cleanco's API was simplified in version 2.0. While previous functions are still compatible, they are not preferred.
 
 ## Are there bugs?
 See the issue tracker. If you find a bug or have enhancement suggestion or question, please file an issue and provide a PR if you can. For example, some of the company suffixes may be incorrect or there may be suffixes missing.
@@ -55,5 +58,5 @@ To run tests, simply install the package and run `python setup.py test`. To run
 
 ## Special thanks to:
 
-- Wikipedia's [Types of Business Entity article](http://en.wikipedia.org/wiki/Types_of_business_entity), where I spent hours of research.
+- Wikipedia's [Types of Business Entity article](http://en.wikipedia.org/wiki/Types_of_business_entity).
 - Contributors: Petri Savolainen <petri.savolainen@koodaamo.fi>
diff --git a/cleanco.py b/cleanco.py
deleted file mode 100644
index 9cc813e..0000000
--- a/cleanco.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Note that this script is geared towards identifying businesses in terms of the US/UK
-
-from collections import OrderedDict
-import re
-
-from termdata import terms_by_country as country_dict, terms_by_type as type_dict
-
-
-# Sorted business types / abbreviation by length of business type
-sorted_types = []
-for business_type in type_dict:
-	for item in type_dict[business_type]:
-		temp_tuple = [business_type, item]
-		sorted_types.append(temp_tuple)
-sorted_types = sorted(sorted_types, key=lambda part: len(part[1]), reverse=True)
-
-# Sorted business countries / type abbreviations by length of business type abbreviations
-sorted_countries = []
-for country in country_dict:
-	for item in country_dict[country]:
-		temp_tuple = [country, item]
-		sorted_countries.append(temp_tuple)
-sorted_countries = sorted(sorted_countries, key=lambda part: len(part[1]), reverse=True)
-
-# All of the suffixes sorted by length
-all_sorted = sorted_types + sorted_countries
-suffix_sort = []
-for item in all_sorted:
-	suffix_sort.append(item[1])
-suffix_sort = sorted(suffix_sort, key=lambda part: len(part), reverse=True)
-
-
-class cleanco(object):
-
-	def __init__(self, business_name):
-		# always do non-visible cleanup, but store the original just in case
-		self.business_name = ' '.join(business_name.split())
-		self._original = business_name
-
-	def string_stripper(self, business_name):
-
-		# Get rid of extra prefix-, suffix- & in-between spaces
-		business_name = " ".join(business_name.split())
-
-		# Get rid of all trailing non-letter symbols except '.'
-		match = re.search(r'[^\.\w]+$', business_name, flags=re.UNICODE)
-		if match is not None:
-			business_name = business_name[:match.span()[0]]
-
-		return business_name
-
-	def end_strip(self, a_set):
-
-		end_set = []
-		business_name = self.business_name
-		business_name = self.string_stripper(business_name)
-
-		for key, suffix in a_set:
-			if ((business_name.lower()).endswith(" " + suffix)):
-				end_set.append(key)
-
-		end_set = list(OrderedDict.fromkeys(end_set))
-
-		if end_set != []:
-			return end_set
-		else:
-			return None
-
-
-	def clean_name(self, suffix=True, prefix=False, middle=False, multi=False):
-		"return cleared version of the business name"
-
-		name = self.business_name
-
-		# Run it through the string_stripper once more
-		name = self.string_stripper(name)
-		loname = name.lower()
-		
-		# return name without suffixed/prefixed/middle type term(s)
-
-		for item in suffix_sort:
-			if suffix:
-				if loname.endswith(" " + item):
-					start = loname.find(item)
-					end = len(item)
-					name = name[0:-end-1]
-					name = self.string_stripper(name)
-					if multi==False:
-						break
-			if prefix:
-				if loname.startswith(item+' '):
-					name = name[len(item)+1:]
-					if multi==False:
-						break
-			if middle:
-				term = ' ' + item + ' '
-				if term in loname:
-					start = loname.find(term)
-					end = start + len(term)
-					name = name[:start] + " " + name[end:]
-					if multi==False:
-						break
-
-		return self.string_stripper(name)
-
-
-	def type(self):
-		self.type = self.end_strip(sorted_types)
-		return self.type
-
-	def country(self):
-		self.country = self.end_strip(sorted_countries)
-		return self.country
diff --git a/cleanco/__init__.py b/cleanco/__init__.py
new file mode 100644
index 0000000..750f103
--- /dev/null
+++ b/cleanco/__init__.py
@@ -0,0 +1 @@
+from .cleanco import cleanco
diff --git a/cleanco/classify.py b/cleanco/classify.py
new file mode 100644
index 0000000..9c53304
--- /dev/null
+++ b/cleanco/classify.py
@@ -0,0 +1,60 @@
+"""
+Functions to help classify business names by country or type, based on legal terms.
+
+Examples of use:
+
+>> # check name for its possible business type(s)
+>> classification_sources = typesources()
+>> matches("MyCompany Ltd", classification_sources)
+['Limited']
+>>
+
+>> # check name for its possible jurisdictions, usually countries
+>> classification_sources = countrysources()
+>> matches("MyCompany Ltd", classification_sources)
+['New Zealand', 'United Kingdom', 'United States of America']
+>>
+
+"""
+
+from termdata import terms_by_country, terms_by_type
+from clean import strip_tail, normalized
+
+
+def typesources():
+   "business types / abbreviations sorted by length of business type"
+   types = []
+   for business_type in terms_by_type:
+       for item in terms_by_type[business_type]:
+           types.append((business_type, item))
+
+   return sorted(types, key=lambda part: len(part[1]), reverse=True)
+
+def countrysources():
+   "business countries / type abbreviations sorted by length of type abbreviations"
+   countries = []
+   for country in terms_by_country:
+       for item in terms_by_country[country]:
+           countries.append((country, item))
+
+   return sorted(countries, key=lambda part: len(part[1]), reverse=True)
+
+def matches(name, sources):
+    "get types or countries matching with the legal terms in name"
+
+    name = strip_tail(name)
+    parts = name.split()
+    nparts = [normalized(p) for p in parts]
+    matches = []
+
+    for classifier, term in sources:
+        nterm = normalized(term)
+        try:
+            idx = nparts.index(nterm)
+        except ValueError:
+            pass
+        else:
+            matches.append(classifier)
+
+    return matches
+
diff --git a/cleanco/clean.py b/cleanco/clean.py
new file mode 100644
index 0000000..62d5ec1
--- /dev/null
+++ b/cleanco/clean.py
@@ -0,0 +1,75 @@
+"""Functions to help clean & normalize business names.
+
+See http://www.unicode.org/reports/tr15/#Normalization_Forms_Table for details
+on Unicode normalization and the NFKD normalization used here.
+
+Basic usage:
+
+>> terms = get_terms()
+>> clean_name("Daddy & Sons, Ltd.", terms)
+Daddy & Sons
+
+"""
+
+import functools
+import operator
+from collections import OrderedDict
+import re
+import unicodedata
+from termdata import terms_by_type, terms_by_country
+
+
+tail_removal_rexp = re.compile(r"[^\.\w]+$", flags=re.UNICODE)
+
+
+def get_terms():
+    "retrieve all unique terms from termdata definitions"
+    ts = functools.reduce(operator.iconcat, terms_by_type.values(), [])
+    cs = functools.reduce(operator.iconcat, terms_by_country.values(), [])
+    return set(ts + cs)
+    
+
+def strip_tail(name):
+    "Get rid of all trailing non-letter symbols except the dot"
+    match = re.search(tail_removal_rexp, name)
+    if match is not None:
+        name = name[: match.span()[0]]
+    return name
+
+
+def normalized(text):
+    "caseless Unicode normalization"
+    return unicodedata.normalize("NFKD", text.casefold())
+
+
+def basename(name, terms, suffix=True, prefix=False, middle=False, multi=False):
+    "return cleaned base version of the business name"
+
+    name = strip_tail(name)
+    parts = name.split()
+    nparts = [normalized(p) for p in parts]
+
+    # return name without suffixed/prefixed/middle type term(s)
+    for term in (normalized(t) for t in terms):
+        if suffix and nparts[-1] == term:
+            del nparts[-1]
+            del parts[-1]
+            if multi == False:
+                break
+        if prefix and nparts[0] == term:
+            del nparts[0]
+            del parts[0]
+            if multi == False:
+                break
+        if middle:
+            try:
+                idx = nparts.index(term)
+            except ValueError:
+                pass
+            else:
+                del nparts[idx]
+                del parts[idx]
+            if multi == False:
+                break
+
+    return strip_tail(" ".join(parts))
diff --git a/cleanco/cleanco.py b/cleanco/cleanco.py
new file mode 100644
index 0000000..52c651d
--- /dev/null
+++ b/cleanco/cleanco.py
@@ -0,0 +1,20 @@
+from clean import get_terms, basename
+from classify import typesources, countrysources
+
+
+class cleanco:
+   "silly backwards compatibility wrapper, you should NOT use this"
+
+   def __init__(self):
+      self._types = typesources()
+      self._countries = countrysources()
+      self._terms = get_terms()
+
+   def clean_name(self, name):
+      return basename(name, self._terms)
+
+   def country(self, name):
+      return matches(name, self._countries)
+
+   def type(self, name):
+      return matches(name, self._types)
diff --git a/termdata.py b/cleanco/termdata.py
similarity index 90%
rename from termdata.py
rename to cleanco/termdata.py
index 5f6bfb3..072f3dd 100644
--- a/termdata.py
+++ b/cleanco/termdata.py
@@ -19,7 +19,7 @@
       'lda.', 'tov', 'pp'
    ],
    'Limited Liability Company': ['pllc', 'llc', 'l.l.c.', 'plc.', 'plc', 'hf.', 'oyj',
-      'a.e.', 'nyrt.', 'p.l.c.', 'sh.a.', 's.a.', 's.r.l.', 'srl.', 'aat', '3at', 'd.d.',
+      'a.e.', 'nyrt.', 'p.l.c.', 'sh.a.', 's.a.', 's.r.l.', 'srl.', 'srl', 'aat', '3at', 'd.d.',
       's.r.o.', 'spol. s r.o.', 's.m.b.a.', 'smba', 'sarl', 'nv', 'sa', 'aps',
       'a/s', 'p/s', 'sae', 'sasu', 'eurl', 'ae', 'cpt', 'as', 'ab', 'asa', 'ooo', 'dat',
       'vat', 'zat', 'mchj', 'a.d.'
@@ -50,10 +50,10 @@
    'Australia': ['nl', 'pty. ltd.', 'pty ltd'],
    'Austria': ['e.u.', 'stg', 'gesbr', 'a.g.', 'ag', 'og', 'kg', 'aktiengesellschaft'],
    'Belarus': ['aat', '3at'],
-   'Belgium': ['esv', 'vzw', 'vof', 'snc', 'comm.v', 'scs', 'bvba', 'sprl', 'cbva',
+   'Belgium': ['esv', 'vzw', 'vof', 'snc', 'comm.v', 'scs', 'bvba', 'sprl', 'cvba',
       'cvoa', 'sca', 'sep', 'gie'
    ],
-   'Bosnia / Herzegovina': ['d.d.', 'a.d.', 'd.n.o.', 'd.o.o.', 'k.v.', 's.p.'],
+   'Bosnia and Herzegovina': ['d.d.', 'a.d.', 'd.n.o.', 'd.o.o.', 'k.v.', 's.p.'],
    'Brazil': ['ltda', 's.a.', 'pllc', 'ad', 'adsitz', 'ead', 'et', 'kd', 'kda', 'sd'],
    'Bulgaria': ['ad', 'adsitz', 'ead', 'et', 'kd', 'kda', 'sd'],
    'Cambodia': ['gp', 'sm pte ltd.', 'pte ltd.', 'plc ltd.', 'peec', 'sp'],
@@ -61,13 +61,13 @@
    'Chile': ['eirl', 's.a.', 'sgr', 's.g.r.', 'ltda', 's.p.a.', 'sa', 's. en c.',
       'ltda.'
    ],
-   'Columbia': ['s.a.', 'e.u.', 's.a.s.', 'suc. de descendants', 'sca'],
+   'Colombia': ['s.a.', 'e.u.', 's.a.s.', 'suc. de descendants', 'sca'],
    'Croatia': ['d.d.', 'd.o.o.', 'obrt'],
-   'Czech Republic': ['a.s.', 'akc. spol.', 's.r.o.', 'spol. s r.o.', 'v.o.s.', u've\xc5\x99. obch. spol.', 'a spol.', 'k.s.', 'kom. spol.', 'kom. spol.'],
+   'Czechia': ['a.s.', 'akc. spol.', 's.r.o.', 'spol. s r.o.', 'v.o.s.', u've\xc5\x99. obch. spol.', 'a spol.', 'k.s.', 'kom. spol.', 'kom. spol.'],
    'Denmark': ['i/s', 'a/s', 'k/s', 'p/s', 'amba', 'a.m.b.a.', 'fmba', 'f.m.b.a.', 'smba',
       's.m.b.a.', 'g/s'
    ],
-   'Dominican Republic': ['c. por a.', 'cxa', 's.a.', 's.a.s.', 'srl.', 'eirl.', 'sa',
+   'Dominican Republic': ['c. por a.', 'cxa', 's.a.', 's.a.s.', 'srl.', 'srl', 'eirl.', 'sa',
       'sas'
    ],
    'Ecuador': ['s.a.', 'c.a.', 'sa', 'ep'],
@@ -98,8 +98,8 @@
    'Latvia': ['as', 'sia', 'ik', 'ps', 'ks'],
    'Lebanon': ['sal'],
    'Lithuania': ['uab', 'ab', 'ij', 'mb'],
-   'Luxemborg': ['s.a.', 's.a.r.l.', 'secs'],
-   'Macedonia': ['d.o.o.', 'd.o.o.e.l', 'k.d.a.', 'j.t.d.', 'a.d.', 'k.d.'],
+   'Luxembourg': ['s.a.', 's.a.r.l.', 'secs'],
+   'North Macedonia': ['d.o.o.', 'd.o.o.e.l', 'k.d.a.', 'j.t.d.', 'a.d.', 'k.d.'],
    'Malaysia': ['bhd.', 'sdn. bhd.'],
    'Mexico': ['s.a.', 's. de. r.l.', 's. en c.', 's.a.b.', 's.a.p.i.'],
    'Mongolia': ['xk', 'xxk'],
@@ -118,7 +118,7 @@
    'Poland': ['p.p.', 's.k.a.', 'sp.j.', 'sp.k.', 'sp.p.', 'sp. z.o.o.', 's.c.', 's.a.'],
    'Portugal': ['lda.', 'crl', 's.a.', 's.f.', 'sgps'],
    'Romania': ['s.c.a.', 's.c.s.', 's.n.c.', 's.r.l.', 'o.n.g.', 's.a.'],
-   'Russia': ['ooo', 'oao', 'zao', '3ao'],
+   'Russian Federation': ['ooo', 'oao', 'zao', '3ao'],
    'Serbia': ['d.o.o.', 'a.d.', 'k.d.', 'o.d.'],
    'Singapore': ['bhd', 'pte ltd', 'sdn bhd', 'llp', 'l.l.p.', 'ltd.', 'pte'],
    'Slovenia': ['d.d.', 'd.o.o.', 'd.n.o.', 'k.d.', 's.p.'],
@@ -130,7 +130,7 @@
    'Switzerland': ['ab', 'sa', 'gmbh', 'g.m.b.h.', 'sarl', 'sagl'],
    'Turkey': ['koop.'],
    'Ukraine': ['dat', 'fop', 'kt', 'pt', 'tdv', 'tov', 'pp', 'vat', 'zat', 'at'],
-   'United Kingdom': ['plc.', 'plc', 'cic', 'cio', 'l.l.p.', 'llp', 'l.p.', 'lp', 'ltd.',
+   'United Kingdom of Great Britain and Northern Ireland': ['plc.', 'plc', 'cic', 'cio', 'l.l.p.', 'llp', 'l.p.', 'lp', 'ltd.',
       'ltd', 'limited'
    ],
    'United States of America': ['llc', 'inc.', 'corporation', 'incorporated', 'company',
diff --git a/setup.py b/setup.py
index 403e5c5..c1f1141 100755
--- a/setup.py
+++ b/setup.py
@@ -6,20 +6,19 @@
 
 setup(name='cleanco',
       description='Python library to process company names',
-      version='1.361',
+      version='2.0',
       license="MIT",
       classifiers = [
          "Topic :: Office/Business",
          "Development Status :: 4 - Beta",
          "Intended Audience :: Developers",
          "License :: OSI Approved :: MIT License",
-         "Programming Language :: Python :: 2.7",
-         "Programming Language :: Python :: 3.5"
+         "Programming Language :: Python :: 3"
       ],
       url='https://github.com/psolin/cleanco',
       author='Paul Solin',
       author_email='paul@paulsolin.com',
-      py_modules=['cleanco', 'termdata'],
+      packages=["cleanco"],
       setup_requires=['pytest-runner'],
       tests_require=['pytest', 'tox'],
-      )
+)
diff --git a/tests/test_cleanname.py b/tests/test_cleanname.py
index 4ee2e9a..f997bb0 100644
--- a/tests/test_cleanname.py
+++ b/tests/test_cleanname.py
@@ -1,7 +1,11 @@
 # encoding: utf-8
+import pytest
+from cleanco.clean import get_terms, basename
 
-from cleanco import cleanco
 
+@pytest.fixture
+def terms():
+   return get_terms()
 
 # Tests that demonstrate stuff is stripped away
 
@@ -14,11 +18,11 @@
    "name w/ ws suffix dot ws": " Hello World ltd. ",
 }
 
-def test_basic_cleanups():
+def test_basic_cleanups(terms):
    expected = "Hello World"
    errmsg = "cleanup of %s failed"
    for testname, variation in basic_cleanup_tests.items():
-      assert cleanco(variation).clean_name() == expected, errmsg % testname
+      assert basename(variation, terms) == expected, errmsg % testname
 
 multi_cleanup_tests = {
    "name + suffix":          "Hello World Oy",
@@ -29,38 +33,38 @@ def test_basic_cleanups():
    "name w/ mid + suffix":   "Hello Oy World Ab"
 }
 
-def test_multi_type_cleanups():
+def test_multi_type_cleanups(terms):
    expected = "Hello World"
    errmsg = "cleanup of %s failed"
    for testname, variation in multi_cleanup_tests.items():
-      result = cleanco(variation).clean_name(prefix=True, suffix=True, middle=True, multi=True)
+      result = basename(variation, terms, prefix=True, suffix=True, middle=True, multi=True)
       assert result == expected, errmsg % testname
 
 
 # Tests that demonstrate organization name is kept intact
 
 preserving_cleanup_tests = {
-   "name with comma": (u"Hello, World, ltd.", u"Hello, World"),
-   "name with dot": (u"Hello. World, Oy", u"Hello. World")
+   "name with comma": ("Hello, World, ltd.", "Hello, World"),
+   "name with dot": ("Hello. World, Oy", "Hello. World")
 }
 
-def test_preserving_cleanups():
+def test_preserving_cleanups(terms):
    errmsg = "preserving cleanup of %s failed"
    for testname, (variation, expected) in preserving_cleanup_tests.items():
-      assert cleanco(variation).clean_name() == expected, errmsg % testname
+      assert basename(variation, terms) == expected, errmsg % testname
 
 # Test umlauts
 
 
 unicode_umlaut_tests = {
-   "name with umlaut in end": (u"Säätämö Oy", u"Säätämö"),
-   "name with umlauts & comma": (u"Säätämö, Oy", u"Säätämö"),
-   "name with no ending umlaut": (u"Säätämo Oy", u"Säätämo"),
-   "name with beginning umlaut": (u"Äätämo Oy", u"Äätämo"),
-   "name with just umlauts": (u"Äätämö", u"Äätämö")
+   "name with umlaut in end": ("Säätämö Oy", "Säätämö"),
+   "name with umlauts & comma": ("Säätämö, Oy", "Säätämö"),
+   "name with no ending umlaut": ("Säätämo Oy", "Säätämo"),
+   "name with beginning umlaut": ("Äätämo Oy", "Äätämo"),
+   "name with just umlauts": ("Äätämö", "Äätämö")
 }
 
-def test_with_unicode_umlauted_name():
+def test_with_unicode_umlauted_name(terms):
    errmsg = "preserving cleanup of %s failed"
    for testname, (variation, expected) in unicode_umlaut_tests.items():
-      assert cleanco(variation).clean_name() == expected, errmsg % testname
+      assert basename(variation, terms) == expected, errmsg % testname
diff --git a/tox.ini b/tox.ini
index 6677f4e..a814859 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27,py35
+envlist = py35, py36, py37, py38
 
 [testenv]
 deps=pytest