# example.py
#
# Example of unicode normalization

# Two strings
s1 = 'Spicy Jalape\u00f1o'
s2 = 'Spicy Jalapen\u0303o'

# (a) Print them out (usually looks identical)
print(s1)
print(s2)

# (b) Examine equality and length
print('s1 == s2', s1 == s2)
print(len(s1), len(s2))

# (c) Normalize and try the same experiment
import unicodedata

n_s1 = unicodedata.normalize('NFC', s1)
n_s2 = unicodedata.normalize('NFC', s2)

print('n_s1 == n_s2', n_s1 == n_s2)
print(len(n_s1), len(n_s2))

# (d) Example of normalizing to a decomposed form and stripping accents
t1 = unicodedata.normalize('NFD', s1)
print(''.join(c for c in t1 if not unicodedata.combining(c)))