You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					465 lines
				
				16 KiB
			
		
		
			
		
	
	
					465 lines
				
				16 KiB
			| 
											8 years ago
										 | #!/usr/bin/env python
 | ||
|  | # -*- coding: utf-8 -*-
 | ||
|  | # :Copyright: © 2011, 2017 Günter Milde.
 | ||
|  | # :License: Released under the terms of the `2-Clause BSD license`_, in short:
 | ||
|  | #
 | ||
|  | #    Copying and distribution of this file, with or without modification,
 | ||
|  | #    are permitted in any medium without royalty provided the copyright
 | ||
|  | #    notice and this notice are preserved.
 | ||
|  | #    This file is offered as-is, without any warranty.
 | ||
|  | #
 | ||
|  | # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
 | ||
|  | 
 | ||
|  | # :Id: $Id: generate_punctuation_chars.py 8016 2017-01-17 15:06:17Z milde $
 | ||
|  | #
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  | """(Re)generate the utils.punctuation_chars module."""
 | ||
|  | 
 | ||
|  | # (re)generate the utils.punctuation_chars module
 | ||
|  | # ===============================================
 | ||
|  | #
 | ||
|  | # The category of some characters can change with the development of the
 | ||
|  | # Unicode standard. This tool checks the patterns in `utils.punctuation_chars`
 | ||
|  | # against a re-calculation based on the "unicodedata" stdlib module
 | ||
|  | # which may give different results for different Python versions.
 | ||
|  | #
 | ||
|  | # Updating the module with changed `unicode_punctuation_categories` (due to
 | ||
|  | # a new Python or Unicode standard version is an API cange (may render valid
 | ||
|  | # rST documents invalid). It should only be done for "feature releases" and
 | ||
|  | # requires also updating the specification of `inline markup recognition
 | ||
|  | # rules`_ in ../../docs/ref/rst/restructuredtext.txt.
 | ||
|  | #
 | ||
|  | # .. _inline markup recognition rules:
 | ||
|  | #     ../../docs/ref/rst/restructuredtext.html#inline-markup
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # Setup::
 | ||
|  | 
 | ||
|  | import sys, re
 | ||
|  | import unicodedata
 | ||
|  | 
 | ||
|  | if sys.version_info >= (3,):
 | ||
|  |     unichr = chr # unichr not available in Py3k
 | ||
|  | else:
 | ||
|  |     import codecs
 | ||
|  |     sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # Template for utils.punctuation_chars
 | ||
|  | # ------------------------------------
 | ||
|  | #
 | ||
|  | # Problem: ``ur`` prefix fails with Py 3.5 ::
 | ||
|  | 
 | ||
|  | module_template = u'''#!/usr/bin/env python
 | ||
|  | # -*- coding: utf-8 -*-
 | ||
|  | # :Id: $Id: generate_punctuation_chars.py 8016 2017-01-17 15:06:17Z milde $
 | ||
|  | # :Copyright: © 2011, 2017 Günter Milde.
 | ||
|  | # :License: Released under the terms of the `2-Clause BSD license`_, in short:
 | ||
|  | #
 | ||
|  | #    Copying and distribution of this file, with or without modification,
 | ||
|  | #    are permitted in any medium without royalty provided the copyright
 | ||
|  | #    notice and this notice are preserved.
 | ||
|  | #    This file is offered as-is, without any warranty.
 | ||
|  | #
 | ||
|  | # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
 | ||
|  | #
 | ||
|  | # This file is generated by
 | ||
|  | # ``docutils/tools/dev/generate_punctuation_chars.py``.
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  | import sys, re
 | ||
|  | import unicodedata
 | ||
|  | 
 | ||
|  | """Docutils character category patterns.
 | ||
|  | 
 | ||
|  |    Patterns for the implementation of the `inline markup recognition rules`_
 | ||
|  |    in the reStructuredText parser `docutils.parsers.rst.states.py` based
 | ||
|  |    on Unicode character categories.
 | ||
|  |    The patterns are used inside ``[ ]`` in regular expressions.
 | ||
|  | 
 | ||
|  |    Rule (5) requires determination of matching open/close pairs. However, the
 | ||
|  |    pairing of open/close quotes is ambiguous due to  different typographic
 | ||
|  |    conventions in different languages. The ``quote_pairs`` function tests
 | ||
|  |    whether two characters form an open/close pair.
 | ||
|  | 
 | ||
|  |    The patterns are generated by
 | ||
|  |    ``docutils/tools/dev/generate_punctuation_chars.py`` to  prevent dependence
 | ||
|  |    on the Python version and avoid the time-consuming generation with every
 | ||
|  |    Docutils run. See there for motives and implementation details.
 | ||
|  | 
 | ||
|  |    The category of some characters changed with the development of the
 | ||
|  |    Unicode standard. The current lists are generated with the help of the
 | ||
|  |    "unicodedata" module of Python %(python_version)s (based on Unicode version %(unidata_version)s).
 | ||
|  | 
 | ||
|  |    .. _inline markup recognition rules:
 | ||
|  |       http://docutils.sf.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
 | ||
|  | """
 | ||
|  | 
 | ||
|  | %(openers)s
 | ||
|  | %(closers)s
 | ||
|  | %(delimiters)s
 | ||
|  | if sys.maxunicode >= 0x10FFFF: # "wide" build
 | ||
|  | %(delimiters_wide)s
 | ||
|  | closing_delimiters = u'\\\\\\\\.,;!?'
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # Matching open/close quotes
 | ||
|  | # --------------------------
 | ||
|  | 
 | ||
|  | quote_pairs = {# open char: matching closing characters # usage example
 | ||
|  |                u'\\xbb':   u'\\xbb',         # » » Swedish
 | ||
|  |                u'\\u2018': u'\\u201a',       # ‘ ‚ Albanian/Greek/Turkish
 | ||
|  |                u'\\u2019': u'\\u2019',       # ’ ’ Swedish
 | ||
|  |                u'\\u201a': u'\\u2018\\u2019', # ‚ ‘ German ‚ ’ Polish
 | ||
|  |                u'\\u201c': u'\\u201e',       # “ „ Albanian/Greek/Turkish
 | ||
|  |                u'\\u201e': u'\\u201c\\u201d', # „ “ German „ ” Polish
 | ||
|  |                u'\\u201d': u'\\u201d',       # ” ” Swedish
 | ||
|  |                u'\\u203a': u'\\u203a',       # › › Swedish
 | ||
|  |               }
 | ||
|  | """Additional open/close quote pairs."""
 | ||
|  | 
 | ||
|  | def match_chars(c1, c2):
 | ||
|  |     """Test whether `c1` and `c2` are a matching open/close character pair.
 | ||
|  | 
 | ||
|  |     Matching open/close pairs are at the same position in
 | ||
|  |     `punctuation_chars.openers` and `punctuation_chars.closers`.
 | ||
|  |     The pairing of open/close quotes is ambiguous due to  different
 | ||
|  |     typographic conventions in different languages,
 | ||
|  |     so we test for additional matches stored in `quote_pairs`.
 | ||
|  |     """
 | ||
|  |     try:
 | ||
|  |         i = openers.index(c1)
 | ||
|  |     except ValueError:  # c1 not in openers
 | ||
|  |         return False
 | ||
|  |     return c2 == closers[i] or c2 in quote_pairs.get(c1, u'')\
 | ||
|  | '''
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # Generation of the  character category patterns
 | ||
|  | # ----------------------------------------------
 | ||
|  | #
 | ||
|  | # Unicode punctuation character categories
 | ||
|  | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||
|  | #
 | ||
|  | # For details about Unicode categories, see
 | ||
|  | # http://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  | unicode_punctuation_categories = {
 | ||
|  |     # 'Pc': 'Connector', # not used in Docutils inline markup recognition
 | ||
|  |     'Pd': 'Dash',
 | ||
|  |     'Ps': 'Open',
 | ||
|  |     'Pe': 'Close',
 | ||
|  |     'Pi': 'Initial quote', # may behave like Ps or Pe depending on usage
 | ||
|  |     'Pf': 'Final quote', # may behave like Ps or Pe depending on usage
 | ||
|  |     'Po': 'Other'
 | ||
|  |     }
 | ||
|  | """Unicode character categories for punctuation"""
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # generate character pattern strings
 | ||
|  | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||
|  | #
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  | def unicode_charlists(categories, cp_min=0, cp_max=None):
 | ||
|  |     """Return dictionary of Unicode character lists.
 | ||
|  | 
 | ||
|  |     For each of the `catagories`, an item contains a list with all Unicode
 | ||
|  |     characters with `cp_min` <= code-point <= `cp_max` that belong to
 | ||
|  |     the category.
 | ||
|  | 
 | ||
|  |     The default values check every code-point supported by Python
 | ||
|  |     (`sys.maxint` is 0x10FFFF in a "wide" build and 0xFFFF in a "narrow"
 | ||
|  |     build, i.e. ucs4 and ucs2 respectively).
 | ||
|  |     """
 | ||
|  |     # Determine highest code point with one of the given categories
 | ||
|  |     # (may shorten the search time considerably if there are many
 | ||
|  |     # categories with not too high characters):
 | ||
|  |     if cp_max is None:
 | ||
|  |         cp_max = max(x for x in range(sys.maxunicode+1)
 | ||
|  |                     if unicodedata.category(unichr(x)) in categories)
 | ||
|  |         # print(cp_max) # => 74867 for unicode_punctuation_categories
 | ||
|  |     charlists = {}
 | ||
|  |     for cat in categories:
 | ||
|  |         charlists[cat] = [unichr(x) for x in range(cp_min, cp_max+1)
 | ||
|  |                             if unicodedata.category(unichr(x)) == cat]
 | ||
|  |     return charlists
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # Character categories in Docutils
 | ||
|  | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||
|  | #
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  | def character_category_patterns():
 | ||
|  | 
 | ||
|  |     """Docutils character category patterns.
 | ||
|  | 
 | ||
|  |     Return list of pattern strings for the categories "Open", "Close",
 | ||
|  |     "Delimiters" and "Closing-Delimiters" used in the `inline markup
 | ||
|  |     recognition rules`_.
 | ||
|  |     """
 | ||
|  | 
 | ||
|  |     cp_min = 160 # ASCII chars have special rules for backwards compatibility
 | ||
|  |     ucharlists = unicode_charlists(unicode_punctuation_categories, cp_min)
 | ||
|  |     """Strings of characters in Unicode punctuation character categories"""
 | ||
|  | 
 | ||
|  |     # match opening/closing characters
 | ||
|  |     # --------------------------------
 | ||
|  |     # Rearange the lists to ensure matching characters at the same
 | ||
|  |     # index position.
 | ||
|  | 
 | ||
|  |     # low quotation marks are also used as closers (e.g. in Greek)
 | ||
|  |     # move them to category Pi:
 | ||
|  |     ucharlists['Ps'].remove(u'‚') # 201A  SINGLE LOW-9 QUOTATION MARK
 | ||
|  |     ucharlists['Ps'].remove(u'„') # 201E  DOUBLE LOW-9 QUOTATION MARK
 | ||
|  |     ucharlists['Pi'] += [u'‚', u'„']
 | ||
|  | 
 | ||
|  |     ucharlists['Pi'].remove(u'‛') # 201B  SINGLE HIGH-REVERSED-9 QUOTATION MARK
 | ||
|  |     ucharlists['Pi'].remove(u'‟') # 201F  DOUBLE HIGH-REVERSED-9 QUOTATION MARK
 | ||
|  |     ucharlists['Pf'] += [u'‛', u'‟']
 | ||
|  | 
 | ||
|  |     # 301F  LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant:
 | ||
|  |     ucharlists['Ps'].insert(ucharlists['Pe'].index(u'\u301f'), u'\u301d')
 | ||
|  | 
 | ||
|  |     # print(u''.join(ucharlists['Ps']).encode('utf8')
 | ||
|  |     # print(u''.join(ucharlists['Pe']).encode('utf8')
 | ||
|  |     # print(u''.join(ucharlists['Pi']).encode('utf8')
 | ||
|  |     # print(u''.join(ucharlists['Pf']).encode('utf8')
 | ||
|  | 
 | ||
|  |     # The Docutils character categories
 | ||
|  |     # ---------------------------------
 | ||
|  |     #
 | ||
|  |     # The categorization of ASCII chars is non-standard to reduce
 | ||
|  |     # both false positives and need for escaping. (see `inline markup
 | ||
|  |     # recognition rules`_)
 | ||
|  | 
 | ||
|  |     # allowed before markup if there is a matching closer
 | ||
|  |     openers = [u'"\'(<\\[{']
 | ||
|  |     for category in ('Ps', 'Pi', 'Pf'):
 | ||
|  |         openers.extend(ucharlists[category])
 | ||
|  | 
 | ||
|  |     # allowed after markup if there is a matching opener
 | ||
|  |     closers = [u'"\')>\\]}']
 | ||
|  |     for category in ('Pe', 'Pf', 'Pi'):
 | ||
|  |         closers.extend(ucharlists[category])
 | ||
|  | 
 | ||
|  |     # non-matching, allowed on both sides
 | ||
|  |     delimiters = [u'\\-/:']
 | ||
|  |     for category in ('Pd', 'Po'):
 | ||
|  |         delimiters.extend(ucharlists[category])
 | ||
|  | 
 | ||
|  |     # non-matching, after markup
 | ||
|  |     closing_delimiters = [r'\\.,;!?']
 | ||
|  | 
 | ||
|  |     return [u''.join(chars) for chars in (openers, closers, delimiters,
 | ||
|  |                                             closing_delimiters)]
 | ||
|  | 
 | ||
|  | def separate_wide_chars(s):
 | ||
|  |     """Return (s1,s2) with characters above 0xFFFF in s2"""
 | ||
|  |     maxunicode_narrow = 0xFFFF
 | ||
|  |     l1 = [ch for ch in s if ord(ch) <= maxunicode_narrow]
 | ||
|  |     l2 = [ch for ch in s if ord(ch) > maxunicode_narrow]
 | ||
|  |     return ''.join(l1), ''.join(l2)
 | ||
|  | 
 | ||
|  | def mark_intervals(s):
 | ||
|  |     """Return s with shortcut notation for runs of consecutive characters
 | ||
|  | 
 | ||
|  |     Sort string and replace 'cdef' by 'c-f' and similar.
 | ||
|  |     """
 | ||
|  |     l =[]
 | ||
|  |     s = [ord(ch) for ch in s]
 | ||
|  |     s.sort()
 | ||
|  |     for n in s:
 | ||
|  |         try:
 | ||
|  |             if l[-1][-1]+1 == n:
 | ||
|  |                 l[-1].append(n)
 | ||
|  |             else:
 | ||
|  |                 l.append([n])
 | ||
|  |         except IndexError:
 | ||
|  |             l.append([n])
 | ||
|  | 
 | ||
|  |     l2 = []
 | ||
|  |     for i in l:
 | ||
|  |         i = [unichr(n) for n in i]
 | ||
|  |         if len(i) > 2:
 | ||
|  |             i = i[0], u'-', i[-1]
 | ||
|  |         l2.extend(i)
 | ||
|  | 
 | ||
|  |     return ''.join(l2)
 | ||
|  | 
 | ||
|  | def wrap_string(s, startstring= "(u'",
 | ||
|  |                     endstring = "')", wrap=67):
 | ||
|  |     """Line-wrap a unicode string literal definition."""
 | ||
|  |     c = len(startstring)
 | ||
|  |     contstring = "'\n" + ' ' * (len(startstring)-2) + "u'"
 | ||
|  |     l = [startstring]
 | ||
|  |     for ch in s.replace("'", r"\'"):
 | ||
|  |         c += 1
 | ||
|  |         if ch == '\\' and c > wrap:
 | ||
|  |             c = len(startstring)
 | ||
|  |             ch = contstring + ch
 | ||
|  |         l.append(ch)
 | ||
|  |     l.append(endstring)
 | ||
|  |     return ''.join(l)
 | ||
|  | 
 | ||
|  | 
 | ||
|  | def print_differences(old, new, name):
 | ||
|  |     """List characters missing in old/new."""
 | ||
|  |     if old != new:
 | ||
|  |         print('new %s:' % name)
 | ||
|  |         for c in new:
 | ||
|  |             if c not in old:
 | ||
|  |                 print('  %04x'%ord(c), unicodedata.name(c))
 | ||
|  |         print('removed %s:' % name)
 | ||
|  |         for c in old:
 | ||
|  |             if c not in new:
 | ||
|  |                 print('  %04x'%ord(c), unicodedata.name(c))
 | ||
|  |     else:
 | ||
|  |         print('%s unchanged' % name)
 | ||
|  | 
 | ||
|  | def print_quote_pairs():
 | ||
|  |     pairs = [(o,c) for o,c in quote_pairs.items()]
 | ||
|  |     for o,c in sorted(pairs):
 | ||
|  |         print((u'%s %s' % (o,c)).encode('utf8'))
 | ||
|  | 
 | ||
|  |     # # Test open/close matching:
 | ||
|  |     # for i in range(min(len(openers),len(closers))):
 | ||
|  |     #     print('%4d    %s    %s' % (i, openers[i].encode('utf8'),
 | ||
|  |     #                                closers[i].encode('utf8'))
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # Output
 | ||
|  | # ------
 | ||
|  | #
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  | if __name__ == '__main__':
 | ||
|  | 
 | ||
|  |     import argparse
 | ||
|  |     parser = argparse.ArgumentParser(description=__doc__)
 | ||
|  |     parser.add_argument('-t', '--test', action="store_true",
 | ||
|  |                         help='test for changed character categories')
 | ||
|  |     args = parser.parse_args()
 | ||
|  | 
 | ||
|  | # (Re)create character patterns
 | ||
|  | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||
|  | #
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  |     (o, c, d, cd) = character_category_patterns()
 | ||
|  | 
 | ||
|  | # Characters in the upper plane require a "wide" build::
 | ||
|  | 
 | ||
|  |     o, o_wide = separate_wide_chars(o)
 | ||
|  |     c, c_wide = separate_wide_chars(c)
 | ||
|  |     d, d_wide = separate_wide_chars(d)
 | ||
|  | 
 | ||
|  | # delimiters: sort and use shortcut for intervals (saves ~150 characters)
 | ||
|  | # (`openers` and `closers` must be verbose and keep order
 | ||
|  | # because they are also used in `match_chars()`)::
 | ||
|  | 
 | ||
|  |     d = d[:5] + mark_intervals(d[5:])
 | ||
|  |     d_wide = mark_intervals(d_wide)
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # Test: compare module content with re-generated definitions
 | ||
|  | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  |     if args.test:
 | ||
|  | 
 | ||
|  | # Import the punctuation_chars module from the source
 | ||
|  | # or Py3k build path for local Python modules::
 | ||
|  | 
 | ||
|  |         if sys.version_info < (3,):
 | ||
|  |             sys.path.insert(0, '../../docutils')
 | ||
|  |         else:
 | ||
|  |             sys.path.insert(0, '../../build/lib')
 | ||
|  | 
 | ||
|  |         from docutils.utils.punctuation_chars import (openers, closers,
 | ||
|  |                                           delimiters, closing_delimiters)
 | ||
|  | 
 | ||
|  |         print('Check for differences between the current `punctuation_chars`'
 | ||
|  |               ' module\n and a regeneration based on Unicode version %s:'
 | ||
|  |               % unicodedata.unidata_version)
 | ||
|  | 
 | ||
|  |         print_differences(openers, o, 'openers')
 | ||
|  |         if o_wide:
 | ||
|  |             print('+ openers-wide = ur"""%s"""' % o_wide.encode('utf8'))
 | ||
|  |         print_differences(closers, c, 'closers')
 | ||
|  |         if c_wide:
 | ||
|  |             print('+ closers-wide = ur"""%s"""' % c_wide.encode('utf8'))
 | ||
|  | 
 | ||
|  |         print_differences(delimiters, d + d_wide, 'delimiters')
 | ||
|  |         print_differences(closing_delimiters, cd, 'closing_delimiters')
 | ||
|  | 
 | ||
|  |         sys.exit()
 | ||
|  | 
 | ||
|  | # Print re-generation of the punctuation_chars module
 | ||
|  | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||
|  | #
 | ||
|  | # The output can be copied to docutils/utils if an update is wanted
 | ||
|  | # (API change, see Intro).
 | ||
|  | 
 | ||
|  | # Replacements::
 | ||
|  | 
 | ||
|  |     substitutions = {
 | ||
|  |         'python_version': '.'.join(str(s) for s in sys.version_info[:3]),
 | ||
|  |         'unidata_version': unicodedata.unidata_version,
 | ||
|  |         'openers': wrap_string(o.encode('unicode-escape').decode(),
 | ||
|  |                                startstring="openers = (u'"),
 | ||
|  |         'closers': wrap_string(c.encode('unicode-escape').decode(),
 | ||
|  |                                startstring="closers = (u'"),
 | ||
|  |         'delimiters': wrap_string(d.encode('unicode-escape').decode(),
 | ||
|  |                                   startstring="delimiters = (u'"),
 | ||
|  |         'delimiters_wide': wrap_string(
 | ||
|  |                             d_wide.encode('unicode-escape').decode(),
 | ||
|  |                             startstring="    delimiters += (u'")
 | ||
|  |         }
 | ||
|  | 
 | ||
|  |     print(module_template % substitutions)
 | ||
|  | 
 | ||
|  | 
 | ||
|  | # test prints
 | ||
|  | # ~~~~~~~~~~~
 | ||
|  | #
 | ||
|  | # For interactive use in development you may uncomment the following
 | ||
|  | # definitions::
 | ||
|  | 
 | ||
|  |     # print "wide" Unicode characters:
 | ||
|  |     # ucharlists = unicode_charlists(unicode_punctuation_categories)
 | ||
|  |     # for key in ucharlists:
 | ||
|  |     #     if key.endswith('wide'):
 | ||
|  |     #         print key, ucharlists[key]
 | ||
|  | 
 | ||
|  |     # print 'openers = ', repr(openers)
 | ||
|  |     # print 'closers = ', repr(closers)
 | ||
|  |     # print 'delimiters = ', repr(delimiters)
 | ||
|  |     # print 'closing_delimiters = ', repr(closing_delimiters)
 | ||
|  | 
 | ||
|  |     # ucharlists = unicode_charlists(unicode_punctuation_categories)
 | ||
|  |     # for cat, chars in ucharlists.items():
 | ||
|  |     #     # print cat, chars
 | ||
|  |     #     # compact output (visible with a comprehensive font):
 | ||
|  |     #     print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8')
 | ||
|  | 
 | ||
|  | # verbose print
 | ||
|  | #
 | ||
|  | # ::
 | ||
|  | 
 | ||
|  |     # print 'openers:'
 | ||
|  |     # for ch in openers:
 | ||
|  |     #     print ch.encode('utf8'), unicodedata.name(ch)
 | ||
|  |     # print 'closers:'
 | ||
|  |     # for ch in closers:
 | ||
|  |     #     print ch.encode('utf8'), unicodedata.name(ch)
 | ||
|  |     # print 'delimiters:'
 | ||
|  |     # for ch in delimiters:
 | ||
|  |     #     print ch.encode('utf8'), unicodedata.name(ch)
 | ||
|  |     # print 'closing_delimiters:'
 | ||
|  |     # for ch in closing_delimiters:
 | ||
|  |     #     print ch.encode('utf8'), unicodedata.name(ch)
 |