You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							571 lines
						
					
					
						
							17 KiB
						
					
					
				
			
		
		
	
	
							571 lines
						
					
					
						
							17 KiB
						
					
					
				| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
|  | |
| # :Id: $Id: latex2mathml.py 7995 2016-12-10 17:50:59Z milde $ | |
| # :Copyright: © 2010 Günter Milde. | |
| #             Based on rst2mathml.py from the latex_math sandbox project | |
| #             © 2005 Jens Jørgen Mortensen | |
| # :License: Released under the terms of the `2-Clause BSD license`_, in short: | |
| #  | |
| #    Copying and distribution of this file, with or without modification, | |
| #    are permitted in any medium without royalty provided the copyright | |
| #    notice and this notice are preserved. | |
| #    This file is offered as-is, without any warranty. | |
| #  | |
| # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause | |
|  | |
|  | |
| """Convert LaTex math code into presentational MathML""" | |
|  | |
| # Based on the `latex_math` sandbox project by Jens Jørgen Mortensen | |
|  | |
| import docutils.utils.math.tex2unichar as tex2unichar | |
|  | |
| #        TeX      spacing    combining | |
| over = {'acute':    u'\u00B4', # u'\u0301', | |
|         'bar':      u'\u00AF', # u'\u0304', | |
|         'breve':    u'\u02D8', # u'\u0306', | |
|         'check':    u'\u02C7', # u'\u030C', | |
|         'dot':      u'\u02D9', # u'\u0307', | |
|         'ddot':     u'\u00A8', # u'\u0308', | |
|         'dddot':               u'\u20DB', | |
|         'grave':    u'`',      # u'\u0300', | |
|         'hat':      u'^',      # u'\u0302', | |
|         'mathring': u'\u02DA', # u'\u030A', | |
|         'overleftrightarrow':  u'\u20e1', | |
|         # 'overline':        # u'\u0305', | |
|         'tilde':    u'\u02DC', # u'\u0303', | |
|         'vec':               u'\u20D7'} | |
|  | |
| Greek = { # Capital Greek letters: (upright in TeX style) | |
|     'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3', | |
|     'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398', | |
|     'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9', | |
|     'Gamma':u'\u0393', 'Lambda':u'\u039b'} | |
|  | |
| letters = tex2unichar.mathalpha | |
|  | |
| special = tex2unichar.mathbin         # Binary symbols | |
| special.update(tex2unichar.mathrel)   # Relation symbols, arrow symbols | |
| special.update(tex2unichar.mathord)   # Miscellaneous symbols | |
| special.update(tex2unichar.mathop)    # Variable-sized symbols | |
| special.update(tex2unichar.mathopen)  # Braces | |
| special.update(tex2unichar.mathclose) # Braces | |
| special.update(tex2unichar.mathfence) | |
|  | |
| sumintprod = ''.join([special[symbol] for symbol in | |
|                       ['sum', 'int', 'oint', 'prod']]) | |
|  | |
| functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos',  'cosh', | |
|              'cot',    'coth',   'csc',    'deg', 'det',  'dim', | |
|              'exp',    'gcd',    'hom',    'inf', 'ker',  'lg', | |
|              'lim',    'liminf', 'limsup', 'ln',  'log',  'max', | |
|              'min',    'Pr',     'sec',    'sin', 'sinh', 'sup', | |
|              'tan',    'tanh', | |
|              'injlim',  'varinjlim', 'varlimsup', | |
|              'projlim', 'varliminf', 'varprojlim'] | |
|  | |
|  | |
| mathbb = { | |
|           'A': u'\U0001D538', | |
|           'B': u'\U0001D539', | |
|           'C': u'\u2102', | |
|           'D': u'\U0001D53B', | |
|           'E': u'\U0001D53C', | |
|           'F': u'\U0001D53D', | |
|           'G': u'\U0001D53E', | |
|           'H': u'\u210D', | |
|           'I': u'\U0001D540', | |
|           'J': u'\U0001D541', | |
|           'K': u'\U0001D542', | |
|           'L': u'\U0001D543', | |
|           'M': u'\U0001D544', | |
|           'N': u'\u2115', | |
|           'O': u'\U0001D546', | |
|           'P': u'\u2119', | |
|           'Q': u'\u211A', | |
|           'R': u'\u211D', | |
|           'S': u'\U0001D54A', | |
|           'T': u'\U0001D54B', | |
|           'U': u'\U0001D54C', | |
|           'V': u'\U0001D54D', | |
|           'W': u'\U0001D54E', | |
|           'X': u'\U0001D54F', | |
|           'Y': u'\U0001D550', | |
|           'Z': u'\u2124', | |
|          } | |
|  | |
| mathscr = { | |
|            'A': u'\U0001D49C', | |
|            'B': u'\u212C',     # bernoulli function | |
|            'C': u'\U0001D49E', | |
|            'D': u'\U0001D49F', | |
|            'E': u'\u2130', | |
|            'F': u'\u2131', | |
|            'G': u'\U0001D4A2', | |
|            'H': u'\u210B',     # hamiltonian | |
|            'I': u'\u2110', | |
|            'J': u'\U0001D4A5', | |
|            'K': u'\U0001D4A6', | |
|            'L': u'\u2112',     # lagrangian | |
|            'M': u'\u2133',     # physics m-matrix | |
|            'N': u'\U0001D4A9', | |
|            'O': u'\U0001D4AA', | |
|            'P': u'\U0001D4AB', | |
|            'Q': u'\U0001D4AC', | |
|            'R': u'\u211B', | |
|            'S': u'\U0001D4AE', | |
|            'T': u'\U0001D4AF', | |
|            'U': u'\U0001D4B0', | |
|            'V': u'\U0001D4B1', | |
|            'W': u'\U0001D4B2', | |
|            'X': u'\U0001D4B3', | |
|            'Y': u'\U0001D4B4', | |
|            'Z': u'\U0001D4B5', | |
|            'a': u'\U0001D4B6', | |
|            'b': u'\U0001D4B7', | |
|            'c': u'\U0001D4B8', | |
|            'd': u'\U0001D4B9', | |
|            'e': u'\u212F', | |
|            'f': u'\U0001D4BB', | |
|            'g': u'\u210A', | |
|            'h': u'\U0001D4BD', | |
|            'i': u'\U0001D4BE', | |
|            'j': u'\U0001D4BF', | |
|            'k': u'\U0001D4C0', | |
|            'l': u'\U0001D4C1', | |
|            'm': u'\U0001D4C2', | |
|            'n': u'\U0001D4C3', | |
|            'o': u'\u2134',     # order of | |
|            'p': u'\U0001D4C5', | |
|            'q': u'\U0001D4C6', | |
|            'r': u'\U0001D4C7', | |
|            's': u'\U0001D4C8', | |
|            't': u'\U0001D4C9', | |
|            'u': u'\U0001D4CA', | |
|            'v': u'\U0001D4CB', | |
|            'w': u'\U0001D4CC', | |
|            'x': u'\U0001D4CD', | |
|            'y': u'\U0001D4CE', | |
|            'z': u'\U0001D4CF', | |
|           } | |
|  | |
| negatables = {'=': u'\u2260', | |
|               r'\in': u'\u2209', | |
|               r'\equiv': u'\u2262'} | |
|  | |
| # LaTeX to MathML translation stuff: | |
| class math: | |
|     """Base class for MathML elements.""" | |
|  | |
|     nchildren = 1000000 | |
|     """Required number of children""" | |
|  | |
|     def __init__(self, children=None, inline=None): | |
|         """math([children]) -> MathML element | |
|  | |
|         children can be one child or a list of children.""" | |
|  | |
|         self.children = [] | |
|         if children is not None: | |
|             if type(children) is list: | |
|                 for child in children: | |
|                     self.append(child) | |
|             else: | |
|                 # Only one child: | |
|                 self.append(children) | |
|  | |
|         if inline is not None: | |
|             self.inline = inline | |
|  | |
|     def __repr__(self): | |
|         if hasattr(self, 'children'): | |
|             return self.__class__.__name__ + '(%s)' % \ | |
|                    ','.join([repr(child) for child in self.children]) | |
|         else: | |
|             return self.__class__.__name__ | |
|  | |
|     def full(self): | |
|         """Room for more children?""" | |
|  | |
|         return len(self.children) >= self.nchildren | |
|  | |
|     def append(self, child): | |
|         """append(child) -> element | |
|  | |
|         Appends child and returns self if self is not full or first | |
|         non-full parent.""" | |
|  | |
|         assert not self.full() | |
|         self.children.append(child) | |
|         child.parent = self | |
|         node = self | |
|         while node.full(): | |
|             node = node.parent | |
|         return node | |
|  | |
|     def delete_child(self): | |
|         """delete_child() -> child | |
|  | |
|         Delete last child and return it.""" | |
|  | |
|         child = self.children[-1] | |
|         del self.children[-1] | |
|         return child | |
|  | |
|     def close(self): | |
|         """close() -> parent | |
|  | |
|         Close element and return first non-full element.""" | |
|  | |
|         parent = self.parent | |
|         while parent.full(): | |
|             parent = parent.parent | |
|         return parent | |
|  | |
|     def xml(self): | |
|         """xml() -> xml-string""" | |
|  | |
|         return self.xml_start() + self.xml_body() + self.xml_end() | |
|  | |
|     def xml_start(self): | |
|         if not hasattr(self, 'inline'): | |
|             return ['<%s>' % self.__class__.__name__] | |
|         xmlns = 'http://www.w3.org/1998/Math/MathML' | |
|         if self.inline: | |
|             return ['<math xmlns="%s">' % xmlns] | |
|         else: | |
|             return ['<math xmlns="%s" mode="display">' % xmlns] | |
|  | |
|     def xml_end(self): | |
|         return ['</%s>' % self.__class__.__name__] | |
|  | |
|     def xml_body(self): | |
|         xml = [] | |
|         for child in self.children: | |
|             xml.extend(child.xml()) | |
|         return xml | |
|  | |
| class mrow(math): | |
|     def xml_start(self): | |
|         return ['\n<%s>' % self.__class__.__name__] | |
|  | |
| class mtable(math): | |
|     def xml_start(self): | |
|         return ['\n<%s>' % self.__class__.__name__] | |
|  | |
| class mtr(mrow): pass | |
| class mtd(mrow): pass | |
|  | |
| class mx(math): | |
|     """Base class for mo, mi, and mn""" | |
|  | |
|     nchildren = 0 | |
|     def __init__(self, data): | |
|         self.data = data | |
|  | |
|     def xml_body(self): | |
|         return [self.data] | |
|  | |
| class mo(mx): | |
|     translation = {'<': '<', '>': '>'} | |
|     def xml_body(self): | |
|         return [self.translation.get(self.data, self.data)] | |
|  | |
| class mi(mx): pass | |
| class mn(mx): pass | |
|  | |
| class msub(math): | |
|     nchildren = 2 | |
|  | |
| class msup(math): | |
|     nchildren = 2 | |
|  | |
| class msqrt(math): | |
|     nchildren = 1 | |
|  | |
| class mroot(math): | |
|     nchildren = 2 | |
|  | |
| class mfrac(math): | |
|     nchildren = 2 | |
|  | |
| class msubsup(math): | |
|     nchildren = 3 | |
|     def __init__(self, children=None, reversed=False): | |
|         self.reversed = reversed | |
|         math.__init__(self, children) | |
|  | |
|     def xml(self): | |
|         if self.reversed: | |
| ##            self.children[1:3] = self.children[2:0:-1] | |
|             self.children[1:3] = [self.children[2], self.children[1]] | |
|             self.reversed = False | |
|         return math.xml(self) | |
|  | |
| class mfenced(math): | |
|     translation = {'\\{': '{', '\\langle': u'\u2329', | |
|                    '\\}': '}', '\\rangle': u'\u232A', | |
|                    '.': ''} | |
|     def __init__(self, par): | |
|         self.openpar = par | |
|         math.__init__(self) | |
|  | |
|     def xml_start(self): | |
|         open = self.translation.get(self.openpar, self.openpar) | |
|         close = self.translation.get(self.closepar, self.closepar) | |
|         return ['<mfenced open="%s" close="%s">' % (open, close)] | |
|  | |
| class mspace(math): | |
|     nchildren = 0 | |
|  | |
| class mstyle(math): | |
|     def __init__(self, children=None, nchildren=None, **kwargs): | |
|         if nchildren is not None: | |
|             self.nchildren = nchildren | |
|         math.__init__(self, children) | |
|         self.attrs = kwargs | |
|  | |
|     def xml_start(self): | |
|         return ['<mstyle '] + ['%s="%s"' % item | |
|                                for item in self.attrs.items()] + ['>'] | |
|  | |
| class mover(math): | |
|     nchildren = 2 | |
|     def __init__(self, children=None, reversed=False): | |
|         self.reversed = reversed | |
|         math.__init__(self, children) | |
|  | |
|     def xml(self): | |
|         if self.reversed: | |
|             self.children.reverse() | |
|             self.reversed = False | |
|         return math.xml(self) | |
|  | |
| class munder(math): | |
|     nchildren = 2 | |
|  | |
| class munderover(math): | |
|     nchildren = 3 | |
|     def __init__(self, children=None): | |
|         math.__init__(self, children) | |
|  | |
| class mtext(math): | |
|     nchildren = 0 | |
|     def __init__(self, text): | |
|         self.text = text | |
|  | |
|     def xml_body(self): | |
|         return [self.text] | |
|  | |
| def parse_latex_math(string, inline=True): | |
|     """parse_latex_math(string [,inline]) -> MathML-tree | |
|  | |
|     Returns a MathML-tree parsed from string.  inline=True is for | |
|     inline math and inline=False is for displayed math. | |
|  | |
|     tree is the whole tree and node is the current element.""" | |
|  | |
|     # Normalize white-space: | |
|     string = ' '.join(string.split()) | |
|  | |
|     if inline: | |
|         node = mrow() | |
|         tree = math(node, inline=True) | |
|     else: | |
|         node = mtd() | |
|         tree = math(mtable(mtr(node)), inline=False) | |
|  | |
|     while len(string) > 0: | |
|         n = len(string) | |
|         c = string[0] | |
|         skip = 1  # number of characters consumed | |
|         if n > 1: | |
|             c2 = string[1] | |
|         else: | |
|             c2 = '' | |
| ##        print n, string, c, c2, node.__class__.__name__ | |
|         if c == ' ': | |
|             pass | |
|         elif c == '\\': | |
|             if c2 in '{}': | |
|                 node = node.append(mo(c2)) | |
|                 skip = 2 | |
|             elif c2 == ' ': | |
|                 node = node.append(mspace()) | |
|                 skip = 2 | |
|             elif c2 == ',': # TODO: small space | |
|                 node = node.append(mspace()) | |
|                 skip = 2 | |
|             elif c2.isalpha(): | |
|                 # We have a LaTeX-name: | |
|                 i = 2 | |
|                 while i < n and string[i].isalpha(): | |
|                     i += 1 | |
|                 name = string[1:i] | |
|                 node, skip = handle_keyword(name, node, string[i:]) | |
|                 skip += i | |
|             elif c2 == '\\': | |
|                 # End of a row: | |
|                 entry = mtd() | |
|                 row = mtr(entry) | |
|                 node.close().close().append(row) | |
|                 node = entry | |
|                 skip = 2 | |
|             else: | |
|                 raise SyntaxError(ur'Syntax error: "%s%s"' % (c, c2)) | |
|         elif c.isalpha(): | |
|             node = node.append(mi(c)) | |
|         elif c.isdigit(): | |
|             node = node.append(mn(c)) | |
|         elif c in "+-*/=()[]|<>,.!?':;@": | |
|             node = node.append(mo(c)) | |
|         elif c == '_': | |
|             child = node.delete_child() | |
|             if isinstance(child, msup): | |
|                 sub = msubsup(child.children, reversed=True) | |
|             elif isinstance(child, mo) and child.data in sumintprod: | |
|                 sub = munder(child) | |
|             else: | |
|                 sub = msub(child) | |
|             node.append(sub) | |
|             node = sub | |
|         elif c == '^': | |
|             child = node.delete_child() | |
|             if isinstance(child, msub): | |
|                 sup = msubsup(child.children) | |
|             elif isinstance(child, mo) and child.data in sumintprod: | |
|                 sup = mover(child) | |
|             elif (isinstance(child, munder) and | |
|                   child.children[0].data in sumintprod): | |
|                 sup = munderover(child.children) | |
|             else: | |
|                 sup = msup(child) | |
|             node.append(sup) | |
|             node = sup | |
|         elif c == '{': | |
|             row = mrow() | |
|             node.append(row) | |
|             node = row | |
|         elif c == '}': | |
|             node = node.close() | |
|         elif c == '&': | |
|             entry = mtd() | |
|             node.close().append(entry) | |
|             node = entry | |
|         else: | |
|             raise SyntaxError(ur'Illegal character: "%s"' % c) | |
|         string = string[skip:] | |
|     return tree | |
|  | |
|  | |
| def handle_keyword(name, node, string): | |
|     skip = 0 | |
|     if len(string) > 0 and string[0] == ' ': | |
|         string = string[1:] | |
|         skip = 1 | |
|     if name == 'begin': | |
|         if not string.startswith('{matrix}'): | |
|             raise SyntaxError(u'Environment not supported! ' | |
|                               u'Supported environment: "matrix".') | |
|         skip += 8 | |
|         entry = mtd() | |
|         table = mtable(mtr(entry)) | |
|         node.append(table) | |
|         node = entry | |
|     elif name == 'end': | |
|         if not string.startswith('{matrix}'): | |
|             raise SyntaxError(ur'Expected "\end{matrix}"!') | |
|         skip += 8 | |
|         node = node.close().close().close() | |
|     elif name in ('text', 'mathrm'): | |
|         if string[0] != '{': | |
|             raise SyntaxError(ur'Expected "\text{...}"!') | |
|         i = string.find('}') | |
|         if i == -1: | |
|             raise SyntaxError(ur'Expected "\text{...}"!') | |
|         node = node.append(mtext(string[1:i])) | |
|         skip += i + 1 | |
|     elif name == 'sqrt': | |
|         sqrt = msqrt() | |
|         node.append(sqrt) | |
|         node = sqrt | |
|     elif name == 'frac': | |
|         frac = mfrac() | |
|         node.append(frac) | |
|         node = frac | |
|     elif name == 'left': | |
|         for par in ['(', '[', '|', '\\{', '\\langle', '.']: | |
|             if string.startswith(par): | |
|                 break | |
|         else: | |
|             raise SyntaxError(u'Missing left-brace!') | |
|         fenced = mfenced(par) | |
|         node.append(fenced) | |
|         row = mrow() | |
|         fenced.append(row) | |
|         node = row | |
|         skip += len(par) | |
|     elif name == 'right': | |
|         for par in [')', ']', '|', '\\}', '\\rangle', '.']: | |
|             if string.startswith(par): | |
|                 break | |
|         else: | |
|             raise SyntaxError(u'Missing right-brace!') | |
|         node = node.close() | |
|         node.closepar = par | |
|         node = node.close() | |
|         skip += len(par) | |
|     elif name == 'not': | |
|         for operator in negatables: | |
|             if string.startswith(operator): | |
|                 break | |
|         else: | |
|             raise SyntaxError(ur'Expected something to negate: "\not ..."!') | |
|         node = node.append(mo(negatables[operator])) | |
|         skip += len(operator) | |
|     elif name == 'mathbf': | |
|         style = mstyle(nchildren=1, fontweight='bold') | |
|         node.append(style) | |
|         node = style | |
|     elif name == 'mathbb': | |
|         if string[0] != '{' or not string[1].isupper() or string[2] != '}': | |
|             raise SyntaxError(ur'Expected something like "\mathbb{A}"!') | |
|         node = node.append(mi(mathbb[string[1]])) | |
|         skip += 3 | |
|     elif name in ('mathscr', 'mathcal'): | |
|         if string[0] != '{' or string[2] != '}': | |
|             raise SyntaxError(ur'Expected something like "\mathscr{A}"!') | |
|         node = node.append(mi(mathscr[string[1]])) | |
|         skip += 3 | |
|     elif name == 'colon': # "normal" colon, not binary operator | |
|         node = node.append(mo(':')) # TODO: add ``lspace="0pt"`` | |
|     elif name in Greek:   # Greek capitals (upright in "TeX style") | |
|         node = node.append(mo(Greek[name])) | |
|         # TODO: "ISO style" sets them italic. Could we use a class argument | |
|         # to enable styling via CSS? | |
|     elif name in letters: | |
|         node = node.append(mi(letters[name])) | |
|     elif name in special: | |
|         node = node.append(mo(special[name])) | |
|     elif name in functions: | |
|         node = node.append(mo(name)) | |
|     elif name in over: | |
|         ovr = mover(mo(over[name]), reversed=True) | |
|         node.append(ovr) | |
|         node = ovr | |
|     else: | |
|         raise SyntaxError(u'Unknown LaTeX command: ' + name) | |
|  | |
|     return node, skip | |
|  | |
| def tex2mathml(tex_math, inline=True): | |
|     """Return string with MathML code corresponding to `tex_math`.  | |
|      | |
|     `inline`=True is for inline math and `inline`=False for displayed math. | |
|     """ | |
|      | |
|     mathml_tree = parse_latex_math(tex_math, inline=inline) | |
|     return ''.join(mathml_tree.xml()) | |
|  | |
|     
 | |
| 
 |