Module pyparsing

Source Code for Module pyparsing

   1  #-*- coding: utf-8 -*-
 
   2  # module pyparsing.py
 
   3  #
 
   4  # Copyright (c) 2003-2018  Paul T. McGuire
 
   5  #
 
   6  # Permission is hereby granted, free of charge, to any person obtaining
 
   7  # a copy of this software and associated documentation files (the
 
   8  # "Software"), to deal in the Software without restriction, including
 
   9  # without limitation the rights to use, copy, modify, merge, publish,
 
  10  # distribute, sublicense, and/or sell copies of the Software, and to
 
  11  # permit persons to whom the Software is furnished to do so, subject to
 
  12  # the following conditions:
 
  13  #
 
  14  # The above copyright notice and this permission notice shall be
 
  15  # included in all copies or substantial portions of the Software.
 
  16  #
 
  17  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  18  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  19  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 
  20  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 
  21  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 
  22  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 
  23  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
  24  #
 
  25  
 
  26  __doc__ = \
 
  27  """
 
  28  pyparsing module - Classes and methods to define and execute parsing grammars
 
  29  =============================================================================
 
  30  
 
  31  The pyparsing module is an alternative approach to creating and executing simple grammars,
 
  32  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
 
  33  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
 
  34  provides a library of classes that you use to construct the grammar directly in Python.
 
  35  
 
  36  Here is a program to parse "Hello, World!" (or any greeting of the form 
 
  37  C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements 
 
  38  (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
 
  39  L{Literal} expressions)::
 
  40  
 
  41      from pyparsing import Word, alphas
 
  42  
 
  43      # define grammar of a greeting
 
  44      greet = Word(alphas) + "," + Word(alphas) + "!"
 
  45  
 
  46      hello = "Hello, World!"
 
  47      print (hello, "->", greet.parseString(hello))
 
  48  
 
  49  The program outputs the following::
 
  50  
 
  51      Hello, World! -> ['Hello', ',', 'World', '!']
 
  52  
 
  53  The Python representation of the grammar is quite readable, owing to the self-explanatory
 
  54  class names, and the use of '+', '|' and '^' operators.
 
  55  
 
  56  The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
 
  57  object with named attributes.
 
  58  
 
  59  The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
 
  60   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
 
  61   - quoted strings
 
  62   - embedded comments
 
  63  
 
  64  
 
  65  Getting Started -
 
  66  -----------------
 
  67  Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
 
  68  classes inherit from. Use the docstrings for examples of how to:
 
  69   - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
 
  70   - construct character word-group expressions using the L{Word} class
 
  71   - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
 
  72   - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
 
  73   - associate names with your parsed results using L{ParserElement.setResultsName}
 
  74   - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
 
  75   - find more useful common expressions in the L{pyparsing_common} namespace class
 
  76  """ 
  77  
 
  78  __version__ = "2.3.0" 
  79  __versionTime__ = "28 Oct 2018 01:57 UTC" 
  80  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  81  
 
  82  import string 
  83  from weakref import ref as wkref 
  84  import copy 
  85  import sys 
  86  import warnings 
  87  import re 
  88  import sre_constants 
  89  import collections 
  90  import pprint 
  91  import traceback 
  92  import types 
  93  from datetime import datetime 
  94  try: 
  95      # Python 3
 
  96      from itertools import filterfalse 
  97  except ImportError: 
  98      from itertools import ifilterfalse as filterfalse 
  99  
 
 100  try: 
 101      from _thread import RLock 
 102  except ImportError: 
 103      from threading import RLock 
 104  
 
 105  try: 
 106      # Python 3
 
 107      from collections.abc import Iterable 
 108      from collections.abc import MutableMapping 
 109  except ImportError: 
 110      # Python 2.7
 
 111      from collections import Iterable 
 112      from collections import MutableMapping 
 113  
 
 114  try: 
 115      from collections import OrderedDict as _OrderedDict 
 116  except ImportError: 
 117      try: 
 118          from ordereddict import OrderedDict as _OrderedDict 
 119      except ImportError: 
 120          _OrderedDict = None 
 121  
 
 122  try: 
 123      from types import SimpleNamespace 
 124  except ImportError: 
125 - class SimpleNamespace: pass
126 127 128 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 129 130 __all__ = [ 131 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 132 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 133 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 134 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 135 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 136 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 137 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char', 138 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 139 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 140 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 141 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 142 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 143 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 144 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 145 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 146 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 147 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 148 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set', 149 ] 150 151 system_version = tuple(sys.version_info)[:3] 152 PY_3 = system_version[0] == 3 153 if PY_3: 154 _MAX_INT = sys.maxsize 155 basestring = str 156 unichr = chr 157 unicode = str 158 _ustr = str 159 160 # build list of single arg builtins, that can be used as parse actions 161 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 162 163 else: 164 _MAX_INT = sys.maxint 165 range = xrange
166 167 - def _ustr(obj):
168 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 169 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 170 then < returns the unicode object | encodes it with the default encoding | ... >. 171 """ 172 if isinstance(obj,unicode): 173 return obj 174 175 try: 176 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 177 # it won't break any existing code. 178 return str(obj) 179 180 except UnicodeEncodeError: 181 # Else encode it 182 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 183 xmlcharref = Regex(r'&#\d+;') 184 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 185 return xmlcharref.transformString(ret)
186 187 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 188 singleArgBuiltins = [] 189 import __builtin__ 190 for fname in "sum len sorted reversed list tuple set any all min max".split(): 191 try: 192 singleArgBuiltins.append(getattr(__builtin__,fname)) 193 except AttributeError: 194 continue 195 196 _generatorType = type((y for y in range(1)))
197 198 -def _xml_escape(data):
199 """Escape &, <, >, ", ', etc. in a string of data.""" 200 201 # ampersand must be replaced first 202 from_symbols = '&><"\'' 203 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 204 for from_,to_ in zip(from_symbols, to_symbols): 205 data = data.replace(from_, to_) 206 return data
207 208 alphas = string.ascii_uppercase + string.ascii_lowercase 209 nums = "0123456789" 210 hexnums = nums + "ABCDEFabcdef" 211 alphanums = alphas + nums 212 _bslash = chr(92) 213 printables = "".join(c for c in string.printable if c not in string.whitespace)
214 215 -class ParseBaseException(Exception):
216 """base exception class for all parsing runtime exceptions""" 217 # Performance tuning: we construct a *lot* of these, so keep this 218 # constructor as small and fast as possible
219 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
220 self.loc = loc 221 if msg is None: 222 self.msg = pstr 223 self.pstr = "" 224 else: 225 self.msg = msg 226 self.pstr = pstr 227 self.parserElement = elem 228 self.args = (pstr, loc, msg)
229 230 @classmethod
231 - def _from_exception(cls, pe):
232 """ 233 internal factory method to simplify creating one type of ParseException 234 from another - avoids having __init__ signature conflicts among subclasses 235 """ 236 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
237
238 - def __getattr__( self, aname ):
239 """supported attributes by name are: 240 - lineno - returns the line number of the exception text 241 - col - returns the column number of the exception text 242 - line - returns the line containing the exception text 243 """ 244 if( aname == "lineno" ): 245 return lineno( self.loc, self.pstr ) 246 elif( aname in ("col", "column") ): 247 return col( self.loc, self.pstr ) 248 elif( aname == "line" ): 249 return line( self.loc, self.pstr ) 250 else: 251 raise AttributeError(aname)
252
253 - def __str__( self ):
254 return "%s (at char %d), (line:%d, col:%d)" % \ 255 ( self.msg, self.loc, self.lineno, self.column )
256 - def __repr__( self ):
257 return _ustr(self)
258 - def markInputline( self, markerString = ">!<" ):
259 """Extracts the exception line from the input string, and marks 260 the location of the exception with a special symbol. 261 """ 262 line_str = self.line 263 line_column = self.column - 1 264 if markerString: 265 line_str = "".join((line_str[:line_column], 266 markerString, line_str[line_column:])) 267 return line_str.strip()
268 - def __dir__(self):
269 return "lineno col line".split() + dir(type(self))
270
271 -class ParseException(ParseBaseException):
272 """ 273 Exception thrown when parse expressions don't match class; 274 supported attributes by name are: 275 - lineno - returns the line number of the exception text 276 - col - returns the column number of the exception text 277 - line - returns the line containing the exception text 278 279 Example:: 280 try: 281 Word(nums).setName("integer").parseString("ABC") 282 except ParseException as pe: 283 print(pe) 284 print("column: {}".format(pe.col)) 285 286 prints:: 287 Expected integer (at char 0), (line:1, col:1) 288 column: 1 289 """ 290 pass
291
292 -class ParseFatalException(ParseBaseException):
293 """user-throwable exception thrown when inconsistent parse content 294 is found; stops all parsing immediately""" 295 pass
296
297 -class ParseSyntaxException(ParseFatalException):
298 """just like L{ParseFatalException}, but thrown internally when an 299 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 300 immediately because an unbacktrackable syntax error has been found""" 301 pass
302
303 #~ class ReparseException(ParseBaseException): 304 #~ """Experimental class - parse actions can raise this exception to cause 305 #~ pyparsing to reparse the input string: 306 #~ - with a modified input string, and/or 307 #~ - with a modified start location 308 #~ Set the values of the ReparseException in the constructor, and raise the 309 #~ exception in a parse action to cause pyparsing to use the new string/location. 310 #~ Setting the values as None causes no change to be made. 311 #~ """ 312 #~ def __init_( self, newstring, restartLoc ): 313 #~ self.newParseText = newstring 314 #~ self.reparseLoc = restartLoc 315 316 -class RecursiveGrammarException(Exception):
317 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
318 - def __init__( self, parseElementList ):
319 self.parseElementTrace = parseElementList
320
321 - def __str__( self ):
322 return "RecursiveGrammarException: %s" % self.parseElementTrace
323
324 -class _ParseResultsWithOffset(object):
325 - def __init__(self,p1,p2):
326 self.tup = (p1,p2)
327 - def __getitem__(self,i):
328 return self.tup[i]
329 - def __repr__(self):
330 return repr(self.tup[0])
331 - def setOffset(self,i):
332 self.tup = (self.tup[0],i)
333
334 -class ParseResults(object):
335 """ 336 Structured parse results, to provide multiple means of access to the parsed data: 337 - as a list (C{len(results)}) 338 - by list index (C{results[0], results[1]}, etc.) 339 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 340 341 Example:: 342 integer = Word(nums) 343 date_str = (integer.setResultsName("year") + '/' 344 + integer.setResultsName("month") + '/' 345 + integer.setResultsName("day")) 346 # equivalent form: 347 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 348 349 # parseString returns a ParseResults object 350 result = date_str.parseString("1999/12/31") 351 352 def test(s, fn=repr): 353 print("%s -> %s" % (s, fn(eval(s)))) 354 test("list(result)") 355 test("result[0]") 356 test("result['month']") 357 test("result.day") 358 test("'month' in result") 359 test("'minutes' in result") 360 test("result.dump()", str) 361 prints:: 362 list(result) -> ['1999', '/', '12', '/', '31'] 363 result[0] -> '1999' 364 result['month'] -> '12' 365 result.day -> '31' 366 'month' in result -> True 367 'minutes' in result -> False 368 result.dump() -> ['1999', '/', '12', '/', '31'] 369 - day: 31 370 - month: 12 371 - year: 1999 372 """
373 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
374 if isinstance(toklist, cls): 375 return toklist 376 retobj = object.__new__(cls) 377 retobj.__doinit = True 378 return retobj
379 380 # Performance tuning: we construct a *lot* of these, so keep this 381 # constructor as small and fast as possible
382 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
383 if self.__doinit: 384 self.__doinit = False 385 self.__name = None 386 self.__parent = None 387 self.__accumNames = {} 388 self.__asList = asList 389 self.__modal = modal 390 if toklist is None: 391 toklist = [] 392 if isinstance(toklist, list): 393 self.__toklist = toklist[:] 394 elif isinstance(toklist, _generatorType): 395 self.__toklist = list(toklist) 396 else: 397 self.__toklist = [toklist] 398 self.__tokdict = dict() 399 400 if name is not None and name: 401 if not modal: 402 self.__accumNames[name] = 0 403 if isinstance(name,int): 404 name = _ustr(name) # will always return a str, but use _ustr for consistency 405 self.__name = name 406 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 407 if isinstance(toklist,basestring): 408 toklist = [ toklist ] 409 if asList: 410 if isinstance(toklist,ParseResults): 411 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0) 412 else: 413 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 414 self[name].__name = name 415 else: 416 try: 417 self[name] = toklist[0] 418 except (KeyError,TypeError,IndexError): 419 self[name] = toklist
420
421 - def __getitem__( self, i ):
422 if isinstance( i, (int,slice) ): 423 return self.__toklist[i] 424 else: 425 if i not in self.__accumNames: 426 return self.__tokdict[i][-1][0] 427 else: 428 return ParseResults([ v[0] for v in self.__tokdict[i] ])
429
430 - def __setitem__( self, k, v, isinstance=isinstance ):
431 if isinstance(v,_ParseResultsWithOffset): 432 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 433 sub = v[0] 434 elif isinstance(k,(int,slice)): 435 self.__toklist[k] = v 436 sub = v 437 else: 438 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 439 sub = v 440 if isinstance(sub,ParseResults): 441 sub.__parent = wkref(self)
442
443 - def __delitem__( self, i ):
444 if isinstance(i,(int,slice)): 445 mylen = len( self.__toklist ) 446 del self.__toklist[i] 447 448 # convert int to slice 449 if isinstance(i, int): 450 if i < 0: 451 i += mylen 452 i = slice(i, i+1) 453 # get removed indices 454 removed = list(range(*i.indices(mylen))) 455 removed.reverse() 456 # fixup indices in token dictionary 457 for name,occurrences in self.__tokdict.items(): 458 for j in removed: 459 for k, (value, position) in enumerate(occurrences): 460 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 461 else: 462 del self.__tokdict[i]
463
464 - def __contains__( self, k ):
465 return k in self.__tokdict
466
467 - def __len__( self ): return len( self.__toklist )
468 - def __bool__(self): return ( not not self.__toklist )
469 __nonzero__ = __bool__
470 - def __iter__( self ): return iter( self.__toklist )
471 - def __reversed__( self ): return iter( self.__toklist[::-1] )
472 - def _iterkeys( self ):
473 if hasattr(self.__tokdict, "iterkeys"): 474 return self.__tokdict.iterkeys() 475 else: 476 return iter(self.__tokdict)
477
478 - def _itervalues( self ):
479 return (self[k] for k in self._iterkeys())
480
481 - def _iteritems( self ):
482 return ((k, self[k]) for k in self._iterkeys())
483 484 if PY_3: 485 keys = _iterkeys 486 """Returns an iterator of all named result keys (Python 3.x only).""" 487 488 values = _itervalues 489 """Returns an iterator of all named result values (Python 3.x only).""" 490 491 items = _iteritems 492 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 493 494 else: 495 iterkeys = _iterkeys 496 """Returns an iterator of all named result keys (Python 2.x only).""" 497 498 itervalues = _itervalues 499 """Returns an iterator of all named result values (Python 2.x only).""" 500 501 iteritems = _iteritems 502 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 503
504 - def keys( self ):
505 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 506 return list(self.iterkeys())
507
508 - def values( self ):
509 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 510 return list(self.itervalues())
511
512 - def items( self ):
513 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 514 return list(self.iteritems())
515
516 - def haskeys( self ):
517 """Since keys() returns an iterator, this method is helpful in bypassing 518 code that looks for the existence of any defined results names.""" 519 return bool(self.__tokdict)
520
521 - def pop( self, *args, **kwargs):
522 """ 523 Removes and returns item at specified index (default=C{last}). 524 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 525 argument or an integer argument, it will use C{list} semantics 526 and pop tokens from the list of parsed tokens. If passed a 527 non-integer argument (most likely a string), it will use C{dict} 528 semantics and pop the corresponding value from any defined 529 results names. A second default return value argument is 530 supported, just as in C{dict.pop()}. 531 532 Example:: 533 def remove_first(tokens): 534 tokens.pop(0) 535 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 536 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 537 538 label = Word(alphas) 539 patt = label("LABEL") + OneOrMore(Word(nums)) 540 print(patt.parseString("AAB 123 321").dump()) 541 542 # Use pop() in a parse action to remove named result (note that corresponding value is not 543 # removed from list form of results) 544 def remove_LABEL(tokens): 545 tokens.pop("LABEL") 546 return tokens 547 patt.addParseAction(remove_LABEL) 548 print(patt.parseString("AAB 123 321").dump()) 549 prints:: 550 ['AAB', '123', '321'] 551 - LABEL: AAB 552 553 ['AAB', '123', '321'] 554 """ 555 if not args: 556 args = [-1] 557 for k,v in kwargs.items(): 558 if k == 'default': 559 args = (args[0], v) 560 else: 561 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 562 if (isinstance(args[0], int) or 563 len(args) == 1 or 564 args[0] in self): 565 index = args[0] 566 ret = self[index] 567 del self[index] 568 return ret 569 else: 570 defaultvalue = args[1] 571 return defaultvalue
572
573 - def get(self, key, defaultValue=None):
574 """ 575 Returns named result matching the given key, or if there is no 576 such name, then returns the given C{defaultValue} or C{None} if no 577 C{defaultValue} is specified. 578 579 Similar to C{dict.get()}. 580 581 Example:: 582 integer = Word(nums) 583 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 584 585 result = date_str.parseString("1999/12/31") 586 print(result.get("year")) # -> '1999' 587 print(result.get("hour", "not specified")) # -> 'not specified' 588 print(result.get("hour")) # -> None 589 """ 590 if key in self: 591 return self[key] 592 else: 593 return defaultValue
594
595 - def insert( self, index, insStr ):
596 """ 597 Inserts new element at location index in the list of parsed tokens. 598 599 Similar to C{list.insert()}. 600 601 Example:: 602 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 603 604 # use a parse action to insert the parse location in the front of the parsed results 605 def insert_locn(locn, tokens): 606 tokens.insert(0, locn) 607 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 608 """ 609 self.__toklist.insert(index, insStr) 610 # fixup indices in token dictionary 611 for name,occurrences in self.__tokdict.items(): 612 for k, (value, position) in enumerate(occurrences): 613 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
614
615 - def append( self, item ):
616 """ 617 Add single element to end of ParseResults list of elements. 618 619 Example:: 620 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 621 622 # use a parse action to compute the sum of the parsed integers, and add it to the end 623 def append_sum(tokens): 624 tokens.append(sum(map(int, tokens))) 625 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 626 """ 627 self.__toklist.append(item)
628
629 - def extend( self, itemseq ):
630 """ 631 Add sequence of elements to end of ParseResults list of elements. 632 633 Example:: 634 patt = OneOrMore(Word(alphas)) 635 636 # use a parse action to append the reverse of the matched strings, to make a palindrome 637 def make_palindrome(tokens): 638 tokens.extend(reversed([t[::-1] for t in tokens])) 639 return ''.join(tokens) 640 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 641 """ 642 if isinstance(itemseq, ParseResults): 643 self += itemseq 644 else: 645 self.__toklist.extend(itemseq)
646
647 - def clear( self ):
648 """ 649 Clear all elements and results names. 650 """ 651 del self.__toklist[:] 652 self.__tokdict.clear()
653
654 - def __getattr__( self, name ):
655 try: 656 return self[name] 657 except KeyError: 658 return "" 659 660 if name in self.__tokdict: 661 if name not in self.__accumNames: 662 return self.__tokdict[name][-1][0] 663 else: 664 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 665 else: 666 return ""
667
668 - def __add__( self, other ):
669 ret = self.copy() 670 ret += other 671 return ret
672
673 - def __iadd__( self, other ):
674 if other.__tokdict: 675 offset = len(self.__toklist) 676 addoffset = lambda a: offset if a<0 else a+offset 677 otheritems = other.__tokdict.items() 678 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 679 for (k,vlist) in otheritems for v in vlist] 680 for k,v in otherdictitems: 681 self[k] = v 682 if isinstance(v[0],ParseResults): 683 v[0].__parent = wkref(self) 684 685 self.__toklist += other.__toklist 686 self.__accumNames.update( other.__accumNames ) 687 return self
688
689 - def __radd__(self, other):
690 if isinstance(other,int) and other == 0: 691 # useful for merging many ParseResults using sum() builtin 692 return self.copy() 693 else: 694 # this may raise a TypeError - so be it 695 return other + self
696
697 - def __repr__( self ):
698 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
699
700 - def __str__( self ):
701 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
702
703 - def _asStringList( self, sep='' ):
704 out = [] 705 for item in self.__toklist: 706 if out and sep: 707 out.append(sep) 708 if isinstance( item, ParseResults ): 709 out += item._asStringList() 710 else: 711 out.append( _ustr(item) ) 712 return out
713
714 - def asList( self ):
715 """ 716 Returns the parse results as a nested list of matching tokens, all converted to strings. 717 718 Example:: 719 patt = OneOrMore(Word(alphas)) 720 result = patt.parseString("sldkj lsdkj sldkj") 721 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 722 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 723 724 # Use asList() to create an actual list 725 result_list = result.asList() 726 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 727 """ 728 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
729
730 - def asDict( self ):
731 """ 732 Returns the named parse results as a nested dictionary. 733 734 Example:: 735 integer = Word(nums) 736 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 737 738 result = date_str.parseString('12/31/1999') 739 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 740 741 result_dict = result.asDict() 742 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 743 744 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 745 import json 746 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 747 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 748 """ 749 if PY_3: 750 item_fn = self.items 751 else: 752 item_fn = self.iteritems 753 754 def toItem(obj): 755 if isinstance(obj, ParseResults): 756 if obj.haskeys(): 757 return obj.asDict() 758 else: 759 return [toItem(v) for v in obj] 760 else: 761 return obj
762 763 return dict((k,toItem(v)) for k,v in item_fn())
764
765 - def copy( self ):
766 """ 767 Returns a new copy of a C{ParseResults} object. 768 """ 769 ret = ParseResults( self.__toklist ) 770 ret.__tokdict = dict(self.__tokdict.items()) 771 ret.__parent = self.__parent 772 ret.__accumNames.update( self.__accumNames ) 773 ret.__name = self.__name 774 return ret
775
776 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
777 """ 778 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 779 """ 780 nl = "\n" 781 out = [] 782 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 783 for v in vlist) 784 nextLevelIndent = indent + " " 785 786 # collapse out indents if formatting is not desired 787 if not formatted: 788 indent = "" 789 nextLevelIndent = "" 790 nl = "" 791 792 selfTag = None 793 if doctag is not None: 794 selfTag = doctag 795 else: 796 if self.__name: 797 selfTag = self.__name 798 799 if not selfTag: 800 if namedItemsOnly: 801 return "" 802 else: 803 selfTag = "ITEM" 804 805 out += [ nl, indent, "<", selfTag, ">" ] 806 807 for i,res in enumerate(self.__toklist): 808 if isinstance(res,ParseResults): 809 if i in namedItems: 810 out += [ res.asXML(namedItems[i], 811 namedItemsOnly and doctag is None, 812 nextLevelIndent, 813 formatted)] 814 else: 815 out += [ res.asXML(None, 816 namedItemsOnly and doctag is None, 817 nextLevelIndent, 818 formatted)] 819 else: 820 # individual token, see if there is a name for it 821 resTag = None 822 if i in namedItems: 823 resTag = namedItems[i] 824 if not resTag: 825 if namedItemsOnly: 826 continue 827 else: 828 resTag = "ITEM" 829 xmlBodyText = _xml_escape(_ustr(res)) 830 out += [ nl, nextLevelIndent, "<", resTag, ">", 831 xmlBodyText, 832 "</", resTag, ">" ] 833 834 out += [ nl, indent, "</", selfTag, ">" ] 835 return "".join(out)
836
837 - def __lookup(self,sub):
838 for k,vlist in self.__tokdict.items(): 839 for v,loc in vlist: 840 if sub is v: 841 return k 842 return None
843
844 - def getName(self):
845 r""" 846 Returns the results name for this token expression. Useful when several 847 different expressions might match at a particular location. 848 849 Example:: 850 integer = Word(nums) 851 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 852 house_number_expr = Suppress('#') + Word(nums, alphanums) 853 user_data = (Group(house_number_expr)("house_number") 854 | Group(ssn_expr)("ssn") 855 | Group(integer)("age")) 856 user_info = OneOrMore(user_data) 857 858 result = user_info.parseString("22 111-22-3333 #221B") 859 for item in result: 860 print(item.getName(), ':', item[0]) 861 prints:: 862 age : 22 863 ssn : 111-22-3333 864 house_number : 221B 865 """ 866 if self.__name: 867 return self.__name 868 elif self.__parent: 869 par = self.__parent() 870 if par: 871 return par.__lookup(self) 872 else: 873 return None 874 elif (len(self) == 1 and 875 len(self.__tokdict) == 1 and 876 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 877 return next(iter(self.__tokdict.keys())) 878 else: 879 return None
880
881 - def dump(self, indent='', depth=0, full=True):
882 """ 883 Diagnostic method for listing out the contents of a C{ParseResults}. 884 Accepts an optional C{indent} argument so that this string can be embedded 885 in a nested display of other data. 886 887 Example:: 888 integer = Word(nums) 889 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 890 891 result = date_str.parseString('12/31/1999') 892 print(result.dump()) 893 prints:: 894 ['12', '/', '31', '/', '1999'] 895 - day: 1999 896 - month: 31 897 - year: 12 898 """ 899 out = [] 900 NL = '\n' 901 out.append( indent+_ustr(self.asList()) ) 902 if full: 903 if self.haskeys(): 904 items = sorted((str(k), v) for k,v in self.items()) 905 for k,v in items: 906 if out: 907 out.append(NL) 908 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 909 if isinstance(v,ParseResults): 910 if v: 911 out.append( v.dump(indent,depth+1) ) 912 else: 913 out.append(_ustr(v)) 914 else: 915 out.append(repr(v)) 916 elif any(isinstance(vv,ParseResults) for vv in self): 917 v = self 918 for i,vv in enumerate(v): 919 if isinstance(vv,ParseResults): 920 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 921 else: 922 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 923 924 return "".join(out)
925
926 - def pprint(self, *args, **kwargs):
927 """ 928 Pretty-printer for parsed results as a list, using the C{pprint} module. 929 Accepts additional positional or keyword args as defined for the 930 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 931 932 Example:: 933 ident = Word(alphas, alphanums) 934 num = Word(nums) 935 func = Forward() 936 term = ident | num | Group('(' + func + ')') 937 func <<= ident + Group(Optional(delimitedList(term))) 938 result = func.parseString("fna a,b,(fnb c,d,200),100") 939 result.pprint(width=40) 940 prints:: 941 ['fna', 942 ['a', 943 'b', 944 ['(', 'fnb', ['c', 'd', '200'], ')'], 945 '100']] 946 """ 947 pprint.pprint(self.asList(), *args, **kwargs)
948 949 # add support for pickle protocol
950 - def __getstate__(self):
951 return ( self.__toklist, 952 ( self.__tokdict.copy(), 953 self.__parent is not None and self.__parent() or None, 954 self.__accumNames, 955 self.__name ) )
956
957 - def __setstate__(self,state):
958 self.__toklist = state[0] 959 (self.__tokdict, 960 par, 961 inAccumNames, 962 self.__name) = state[1] 963 self.__accumNames = {} 964 self.__accumNames.update(inAccumNames) 965 if par is not None: 966 self.__parent = wkref(par) 967 else: 968 self.__parent = None
969
970 - def __getnewargs__(self):
971 return self.__toklist, self.__name, self.__asList, self.__modal
972
973 - def __dir__(self):
974 return (dir(type(self)) + list(self.keys()))
975 976 MutableMapping.register(ParseResults)
977 978 -def col (loc,strg):
979 """Returns current column within a string, counting newlines as line separators. 980 The first column is number 1. 981 982 Note: the default parsing behavior is to expand tabs in the input string 983 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 984 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 985 consistent view of the parsed string, the parse location, and line and column 986 positions within the parsed string. 987 """ 988 s = strg 989 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
990
991 -def lineno(loc,strg):
992 """Returns current line number within a string, counting newlines as line separators. 993 The first line is number 1. 994 995 Note: the default parsing behavior is to expand tabs in the input string 996 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 997 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 998 consistent view of the parsed string, the parse location, and line and column 999 positions within the parsed string. 1000 """ 1001 return strg.count("\n",0,loc) + 1
1002
1003 -def line( loc, strg ):
1004 """Returns the line of text containing loc within a string, counting newlines as line separators. 1005 """ 1006 lastCR = strg.rfind("\n", 0, loc) 1007 nextCR = strg.find("\n", loc) 1008 if nextCR >= 0: 1009 return strg[lastCR+1:nextCR] 1010 else: 1011 return strg[lastCR+1:]
1012
1013 -def _defaultStartDebugAction( instring, loc, expr ):
1014 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1015
1016 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
1017 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1018
1019 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
1020 print ("Exception raised:" + _ustr(exc))
1021
1022 -def nullDebugAction(*args):
1023 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 1024 pass
1025 1026 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 1027 #~ 'decorator to trim function calls to match the arity of the target' 1028 #~ def _trim_arity(func, maxargs=3): 1029 #~ if func in singleArgBuiltins: 1030 #~ return lambda s,l,t: func(t) 1031 #~ limit = 0 1032 #~ foundArity = False 1033 #~ def wrapper(*args): 1034 #~ nonlocal limit,foundArity 1035 #~ while 1: 1036 #~ try: 1037 #~ ret = func(*args[limit:]) 1038 #~ foundArity = True 1039 #~ return ret 1040 #~ except TypeError: 1041 #~ if limit == maxargs or foundArity: 1042 #~ raise 1043 #~ limit += 1 1044 #~ continue 1045 #~ return wrapper 1046 1047 # this version is Python 2.x-3.x cross-compatible 1048 'decorator to trim function calls to match the arity of the target'
1049 -def _trim_arity(func, maxargs=2):
1050 if func in singleArgBuiltins: 1051 return lambda s,l,t: func(t) 1052 limit = [0] 1053 foundArity = [False] 1054 1055 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1056 if system_version[:2] >= (3,5): 1057 def extract_stack(limit=0): 1058 # special handling for Python 3.5.0 - extra deep call stack by 1 1059 offset = -3 if system_version == (3,5,0) else -2 1060 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 1061 return [frame_summary[:2]]
1062 def extract_tb(tb, limit=0): 1063 frames = traceback.extract_tb(tb, limit=limit) 1064 frame_summary = frames[-1] 1065 return [frame_summary[:2]] 1066 else: 1067 extract_stack = traceback.extract_stack 1068 extract_tb = traceback.extract_tb 1069 1070 # synthesize what would be returned by traceback.extract_stack at the call to 1071 # user's parse action 'func', so that we don't incur call penalty at parse time 1072 1073 LINE_DIFF = 6 1074 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1075 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1076 this_line = extract_stack(limit=2)[-1] 1077 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1078 1079 def wrapper(*args): 1080 while 1: 1081 try: 1082 ret = func(*args[limit[0]:]) 1083 foundArity[0] = True 1084 return ret 1085 except TypeError: 1086 # re-raise TypeErrors if they did not come from our arity testing 1087 if foundArity[0]: 1088 raise 1089 else: 1090 try: 1091 tb = sys.exc_info()[-1] 1092 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1093 raise 1094 finally: 1095 del tb 1096 1097 if limit[0] <= maxargs: 1098 limit[0] += 1 1099 continue 1100 raise 1101 1102 # copy func name to wrapper for sensible debug output 1103 func_name = "<parse action>" 1104 try: 1105 func_name = getattr(func, '__name__', 1106 getattr(func, '__class__').__name__) 1107 except Exception: 1108 func_name = str(func) 1109 wrapper.__name__ = func_name 1110 1111 return wrapper 1112
1113 -class ParserElement(object):
1114 """Abstract base level parser element class.""" 1115 DEFAULT_WHITE_CHARS = " \n\t\r" 1116 verbose_stacktrace = False 1117 1118 @staticmethod
1119 - def setDefaultWhitespaceChars( chars ):
1120 r""" 1121 Overrides the default whitespace chars 1122 1123 Example:: 1124 # default whitespace chars are space, <TAB> and newline 1125 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1126 1127 # change to just treat newline as significant 1128 ParserElement.setDefaultWhitespaceChars(" \t") 1129 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1130 """ 1131 ParserElement.DEFAULT_WHITE_CHARS = chars
1132 1133 @staticmethod
1134 - def inlineLiteralsUsing(cls):
1135 """ 1136 Set class to be used for inclusion of string literals into a parser. 1137 1138 Example:: 1139 # default literal class used is Literal 1140 integer = Word(nums) 1141 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1142 1143 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1144 1145 1146 # change to Suppress 1147 ParserElement.inlineLiteralsUsing(Suppress) 1148 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1149 1150 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1151 """ 1152 ParserElement._literalStringClass = cls
1153
1154 - def __init__( self, savelist=False ):
1155 self.parseAction = list() 1156 self.failAction = None 1157 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1158 self.strRepr = None 1159 self.resultsName = None 1160 self.saveAsList = savelist 1161 self.skipWhitespace = True 1162 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1163 self.copyDefaultWhiteChars = True 1164 self.mayReturnEmpty = False # used when checking for left-recursion 1165 self.keepTabs = False 1166 self.ignoreExprs = list() 1167 self.debug = False 1168 self.streamlined = False 1169 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1170 self.errmsg = "" 1171 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1172 self.debugActions = ( None, None, None ) #custom debug actions 1173 self.re = None 1174 self.callPreparse = True # used to avoid redundant calls to preParse 1175 self.callDuringTry = False
1176
1177 - def copy( self ):
1178 """ 1179 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1180 for the same parsing pattern, using copies of the original parse element. 1181 1182 Example:: 1183 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1184 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1185 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1186 1187 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1188 prints:: 1189 [5120, 100, 655360, 268435456] 1190 Equivalent form of C{expr.copy()} is just C{expr()}:: 1191 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1192 """ 1193 cpy = copy.copy( self ) 1194 cpy.parseAction = self.parseAction[:] 1195 cpy.ignoreExprs = self.ignoreExprs[:] 1196 if self.copyDefaultWhiteChars: 1197 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1198 return cpy
1199
1200 - def setName( self, name ):
1201 """ 1202 Define name for this expression, makes debugging and exception messages clearer. 1203 1204 Example:: 1205 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1206 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1207 """ 1208 self.name = name 1209 self.errmsg = "Expected " + self.name 1210 if hasattr(self,"exception"): 1211 self.exception.msg = self.errmsg 1212 return self
1213
1214 - def setResultsName( self, name, listAllMatches=False ):
1215 """ 1216 Define name for referencing matching tokens as a nested attribute 1217 of the returned parse results. 1218 NOTE: this returns a *copy* of the original C{ParserElement} object; 1219 this is so that the client can define a basic element, such as an 1220 integer, and reference it in multiple places with different names. 1221 1222 You can also set results names using the abbreviated syntax, 1223 C{expr("name")} in place of C{expr.setResultsName("name")} - 1224 see L{I{__call__}<__call__>}. 1225 1226 Example:: 1227 date_str = (integer.setResultsName("year") + '/' 1228 + integer.setResultsName("month") + '/' 1229 + integer.setResultsName("day")) 1230 1231 # equivalent form: 1232 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1233 """ 1234 newself = self.copy() 1235 if name.endswith("*"): 1236 name = name[:-1] 1237 listAllMatches=True 1238 newself.resultsName = name 1239 newself.modalResults = not listAllMatches 1240 return newself
1241
1242 - def setBreak(self,breakFlag = True):
1243 """Method to invoke the Python pdb debugger when this element is 1244 about to be parsed. Set C{breakFlag} to True to enable, False to 1245 disable. 1246 """ 1247 if breakFlag: 1248 _parseMethod = self._parse 1249 def breaker(instring, loc, doActions=True, callPreParse=True): 1250 import pdb 1251 pdb.set_trace() 1252 return _parseMethod( instring, loc, doActions, callPreParse )
1253 breaker._originalParseMethod = _parseMethod 1254 self._parse = breaker 1255 else: 1256 if hasattr(self._parse,"_originalParseMethod"): 1257 self._parse = self._parse._originalParseMethod 1258 return self
1259
1260 - def setParseAction( self, *fns, **kwargs ):
1261 """ 1262 Define one or more actions to perform when successfully matching parse element definition. 1263 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1264 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1265 - s = the original string being parsed (see note below) 1266 - loc = the location of the matching substring 1267 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1268 If the functions in fns modify the tokens, they can return them as the return 1269 value from fn, and the modified list of tokens will replace the original. 1270 Otherwise, fn does not need to return any value. 1271 1272 Optional keyword arguments: 1273 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1274 1275 Note: the default parsing behavior is to expand tabs in the input string 1276 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1277 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1278 consistent view of the parsed string, the parse location, and line and column 1279 positions within the parsed string. 1280 1281 Example:: 1282 integer = Word(nums) 1283 date_str = integer + '/' + integer + '/' + integer 1284 1285 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1286 1287 # use parse action to convert to ints at parse time 1288 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1289 date_str = integer + '/' + integer + '/' + integer 1290 1291 # note that integer fields are now ints, not strings 1292 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1293 """ 1294 self.parseAction = list(map(_trim_arity, list(fns))) 1295 self.callDuringTry = kwargs.get("callDuringTry", False) 1296 return self
1297
1298 - def addParseAction( self, *fns, **kwargs ):
1299 """ 1300 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1301 1302 See examples in L{I{copy}<copy>}. 1303 """ 1304 self.parseAction += list(map(_trim_arity, list(fns))) 1305 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1306 return self
1307
1308 - def addCondition(self, *fns, **kwargs):
1309 """Add a boolean predicate function to expression's list of parse actions. See 1310 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1311 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1312 1313 Optional keyword arguments: 1314 - message = define a custom message to be used in the raised exception 1315 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1316 1317 Example:: 1318 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1319 year_int = integer.copy() 1320 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1321 date_str = year_int + '/' + integer + '/' + integer 1322 1323 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1324 """ 1325 msg = kwargs.get("message", "failed user-defined condition") 1326 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1327 for fn in fns: 1328 def pa(s,l,t): 1329 if not bool(_trim_arity(fn)(s,l,t)): 1330 raise exc_type(s,l,msg)
1331 self.parseAction.append(pa) 1332 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1333 return self 1334
1335 - def setFailAction( self, fn ):
1336 """Define action to perform if parsing fails at this expression. 1337 Fail acton fn is a callable function that takes the arguments 1338 C{fn(s,loc,expr,err)} where: 1339 - s = string being parsed 1340 - loc = location where expression match was attempted and failed 1341 - expr = the parse expression that failed 1342 - err = the exception thrown 1343 The function returns no value. It may throw C{L{ParseFatalException}} 1344 if it is desired to stop parsing immediately.""" 1345 self.failAction = fn 1346 return self
1347
1348 - def _skipIgnorables( self, instring, loc ):
1349 exprsFound = True 1350 while exprsFound: 1351 exprsFound = False 1352 for e in self.ignoreExprs: 1353 try: 1354 while 1: 1355 loc,dummy = e._parse( instring, loc ) 1356 exprsFound = True 1357 except ParseException: 1358 pass 1359 return loc
1360
1361 - def preParse( self, instring, loc ):
1362 if self.ignoreExprs: 1363 loc = self._skipIgnorables( instring, loc ) 1364 1365 if self.skipWhitespace: 1366 wt = self.whiteChars 1367 instrlen = len(instring) 1368 while loc < instrlen and instring[loc] in wt: 1369 loc += 1 1370 1371 return loc
1372
1373 - def parseImpl( self, instring, loc, doActions=True ):
1374 return loc, []
1375
1376 - def postParse( self, instring, loc, tokenlist ):
1377 return tokenlist
1378 1379 #~ @profile
1380 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1381 debugging = ( self.debug ) #and doActions ) 1382 1383 if debugging or self.failAction: 1384 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1385 if (self.debugActions[0] ): 1386 self.debugActions[0]( instring, loc, self ) 1387 if callPreParse and self.callPreparse: 1388 preloc = self.preParse( instring, loc ) 1389 else: 1390 preloc = loc 1391 tokensStart = preloc 1392 try: 1393 try: 1394 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1395 except IndexError: 1396 raise ParseException( instring, len(instring), self.errmsg, self ) 1397 except ParseBaseException as err: 1398 #~ print ("Exception raised:", err) 1399 if self.debugActions[2]: 1400 self.debugActions[2]( instring, tokensStart, self, err ) 1401 if self.failAction: 1402 self.failAction( instring, tokensStart, self, err ) 1403 raise 1404 else: 1405 if callPreParse and self.callPreparse: 1406 preloc = self.preParse( instring, loc ) 1407 else: 1408 preloc = loc 1409 tokensStart = preloc 1410 if self.mayIndexError or preloc >= len(instring): 1411 try: 1412 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1413 except IndexError: 1414 raise ParseException( instring, len(instring), self.errmsg, self ) 1415 else: 1416 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1417 1418 tokens = self.postParse( instring, loc, tokens ) 1419 1420 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1421 if self.parseAction and (doActions or self.callDuringTry): 1422 if debugging: 1423 try: 1424 for fn in self.parseAction: 1425 try: 1426 tokens = fn( instring, tokensStart, retTokens ) 1427 except IndexError as parse_action_exc: 1428 exc = ParseException("exception raised in parse action") 1429 exc.__cause__ = parse_action_exc 1430 raise exc 1431 1432 if tokens is not None and tokens is not retTokens: 1433 retTokens = ParseResults( tokens, 1434 self.resultsName, 1435 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1436 modal=self.modalResults ) 1437 except ParseBaseException as err: 1438 #~ print "Exception raised in user parse action:", err 1439 if (self.debugActions[2] ): 1440 self.debugActions[2]( instring, tokensStart, self, err ) 1441 raise 1442 else: 1443 for fn in self.parseAction: 1444 try: 1445 tokens = fn( instring, tokensStart, retTokens ) 1446 except IndexError as parse_action_exc: 1447 exc = ParseException("exception raised in parse action") 1448 exc.__cause__ = parse_action_exc 1449 raise exc 1450 1451 if tokens is not None and tokens is not retTokens: 1452 retTokens = ParseResults( tokens, 1453 self.resultsName, 1454 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1455 modal=self.modalResults ) 1456 if debugging: 1457 #~ print ("Matched",self,"->",retTokens.asList()) 1458 if (self.debugActions[1] ): 1459 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1460 1461 return loc, retTokens
1462
1463 - def tryParse( self, instring, loc ):
1464 try: 1465 return self._parse( instring, loc, doActions=False )[0] 1466 except ParseFatalException: 1467 raise ParseException( instring, loc, self.errmsg, self)
1468
1469 - def canParseNext(self, instring, loc):
1470 try: 1471 self.tryParse(instring, loc) 1472 except (ParseException, IndexError): 1473 return False 1474 else: 1475 return True
1476
1477 - class _UnboundedCache(object):
1478 - def __init__(self):
1479 cache = {} 1480 self.not_in_cache = not_in_cache = object() 1481 1482 def get(self, key): 1483 return cache.get(key, not_in_cache)
1484 1485 def set(self, key, value): 1486 cache[key] = value
1487 1488 def clear(self): 1489 cache.clear() 1490 1491 def cache_len(self): 1492 return len(cache) 1493 1494 self.get = types.MethodType(get, self) 1495 self.set = types.MethodType(set, self) 1496 self.clear = types.MethodType(clear, self) 1497 self.__len__ = types.MethodType(cache_len, self) 1498 1499 if _OrderedDict is not None:
1500 - class _FifoCache(object):
1501 - def __init__(self, size):
1502 self.not_in_cache = not_in_cache = object() 1503 1504 cache = _OrderedDict() 1505 1506 def get(self, key): 1507 return cache.get(key, not_in_cache)
1508 1509 def set(self, key, value): 1510 cache[key] = value 1511 while len(cache) > size: 1512 try: 1513 cache.popitem(False) 1514 except KeyError: 1515 pass
1516 1517 def clear(self): 1518 cache.clear() 1519 1520 def cache_len(self): 1521 return len(cache) 1522 1523 self.get = types.MethodType(get, self) 1524 self.set = types.MethodType(set, self) 1525 self.clear = types.MethodType(clear, self) 1526 self.__len__ = types.MethodType(cache_len, self) 1527 1528 else:
1529 - class _FifoCache(object):
1530 - def __init__(self, size):
1531 self.not_in_cache = not_in_cache = object() 1532 1533 cache = {} 1534 key_fifo = collections.deque([], size) 1535 1536 def get(self, key): 1537 return cache.get(key, not_in_cache)
1538 1539 def set(self, key, value): 1540 cache[key] = value 1541 while len(key_fifo) > size: 1542 cache.pop(key_fifo.popleft(), None) 1543 key_fifo.append(key)
1544 1545 def clear(self): 1546 cache.clear() 1547 key_fifo.clear() 1548 1549 def cache_len(self): 1550 return len(cache) 1551 1552 self.get = types.MethodType(get, self) 1553 self.set = types.MethodType(set, self) 1554 self.clear = types.MethodType(clear, self) 1555 self.__len__ = types.MethodType(cache_len, self) 1556 1557 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1558 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1559 packrat_cache_lock = RLock() 1560 packrat_cache_stats = [0, 0] 1561 1562 # this method gets repeatedly called during backtracking with the same arguments - 1563 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1564 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1565 HIT, MISS = 0, 1 1566 lookup = (self, instring, loc, callPreParse, doActions) 1567 with ParserElement.packrat_cache_lock: 1568 cache = ParserElement.packrat_cache 1569 value = cache.get(lookup) 1570 if value is cache.not_in_cache: 1571 ParserElement.packrat_cache_stats[MISS] += 1 1572 try: 1573 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1574 except ParseBaseException as pe: 1575 # cache a copy of the exception, without the traceback 1576 cache.set(lookup, pe.__class__(*pe.args)) 1577 raise 1578 else: 1579 cache.set(lookup, (value[0], value[1].copy())) 1580 return value 1581 else: 1582 ParserElement.packrat_cache_stats[HIT] += 1 1583 if isinstance(value, Exception): 1584 raise value 1585 return (value[0], value[1].copy())
1586 1587 _parse = _parseNoCache 1588 1589 @staticmethod
1590 - def resetCache():
1591 ParserElement.packrat_cache.clear() 1592 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1593 1594 _packratEnabled = False 1595 @staticmethod
1596 - def enablePackrat(cache_size_limit=128):
1597 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1598 Repeated parse attempts at the same string location (which happens 1599 often in many complex grammars) can immediately return a cached value, 1600 instead of re-executing parsing/validating code. Memoizing is done of 1601 both valid results and parsing exceptions. 1602 1603 Parameters: 1604 - cache_size_limit - (default=C{128}) - if an integer value is provided 1605 will limit the size of the packrat cache; if None is passed, then 1606 the cache size will be unbounded; if 0 is passed, the cache will 1607 be effectively disabled. 1608 1609 This speedup may break existing programs that use parse actions that 1610 have side-effects. For this reason, packrat parsing is disabled when 1611 you first import pyparsing. To activate the packrat feature, your 1612 program must call the class method C{ParserElement.enablePackrat()}. If 1613 your program uses C{psyco} to "compile as you go", you must call 1614 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1615 Python will crash. For best results, call C{enablePackrat()} immediately 1616 after importing pyparsing. 1617 1618 Example:: 1619 import pyparsing 1620 pyparsing.ParserElement.enablePackrat() 1621 """ 1622 if not ParserElement._packratEnabled: 1623 ParserElement._packratEnabled = True 1624 if cache_size_limit is None: 1625 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1626 else: 1627 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1628 ParserElement._parse = ParserElement._parseCache
1629
1630 - def parseString( self, instring, parseAll=False ):
1631 """ 1632 Execute the parse expression with the given string. 1633 This is the main interface to the client code, once the complete 1634 expression has been built. 1635 1636 If you want the grammar to require that the entire input string be 1637 successfully parsed, then set C{parseAll} to True (equivalent to ending 1638 the grammar with C{L{StringEnd()}}). 1639 1640 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1641 in order to report proper column numbers in parse actions. 1642 If the input string contains tabs and 1643 the grammar uses parse actions that use the C{loc} argument to index into the 1644 string being parsed, you can ensure you have a consistent view of the input 1645 string by: 1646 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1647 (see L{I{parseWithTabs}<parseWithTabs>}) 1648 - define your parse action using the full C{(s,loc,toks)} signature, and 1649 reference the input string using the parse action's C{s} argument 1650 - explictly expand the tabs in your input string before calling 1651 C{parseString} 1652 1653 Example:: 1654 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1655 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1656 """ 1657 ParserElement.resetCache() 1658 if not self.streamlined: 1659 self.streamline() 1660 #~ self.saveAsList = True 1661 for e in self.ignoreExprs: 1662 e.streamline() 1663 if not self.keepTabs: 1664 instring = instring.expandtabs() 1665 try: 1666 loc, tokens = self._parse( instring, 0 ) 1667 if parseAll: 1668 loc = self.preParse( instring, loc ) 1669 se = Empty() + StringEnd() 1670 se._parse( instring, loc ) 1671 except ParseBaseException as exc: 1672 if ParserElement.verbose_stacktrace: 1673 raise 1674 else: 1675 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1676 raise exc 1677 else: 1678 return tokens
1679
1680 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1681 """ 1682 Scan the input string for expression matches. Each match will return the 1683 matching tokens, start location, and end location. May be called with optional 1684 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1685 C{overlap} is specified, then overlapping matches will be reported. 1686 1687 Note that the start and end locations are reported relative to the string 1688 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1689 strings with embedded tabs. 1690 1691 Example:: 1692 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1693 print(source) 1694 for tokens,start,end in Word(alphas).scanString(source): 1695 print(' '*start + '^'*(end-start)) 1696 print(' '*start + tokens[0]) 1697 1698 prints:: 1699 1700 sldjf123lsdjjkf345sldkjf879lkjsfd987 1701 ^^^^^ 1702 sldjf 1703 ^^^^^^^ 1704 lsdjjkf 1705 ^^^^^^ 1706 sldkjf 1707 ^^^^^^ 1708 lkjsfd 1709 """ 1710 if not self.streamlined: 1711 self.streamline() 1712 for e in self.ignoreExprs: 1713 e.streamline() 1714 1715 if not self.keepTabs: 1716 instring = _ustr(instring).expandtabs() 1717 instrlen = len(instring) 1718 loc = 0 1719 preparseFn = self.preParse 1720 parseFn = self._parse 1721 ParserElement.resetCache() 1722 matches = 0 1723 try: 1724 while loc <= instrlen and matches < maxMatches: 1725 try: 1726 preloc = preparseFn( instring, loc ) 1727 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1728 except ParseException: 1729 loc = preloc+1 1730 else: 1731 if nextLoc > loc: 1732 matches += 1 1733 yield tokens, preloc, nextLoc 1734 if overlap: 1735 nextloc = preparseFn( instring, loc ) 1736 if nextloc > loc: 1737 loc = nextLoc 1738 else: 1739 loc += 1 1740 else: 1741 loc = nextLoc 1742 else: 1743 loc = preloc+1 1744 except ParseBaseException as exc: 1745 if ParserElement.verbose_stacktrace: 1746 raise 1747 else: 1748 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1749 raise exc
1750
1751 - def transformString( self, instring ):
1752 """ 1753 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1754 be returned from a parse action. To use C{transformString}, define a grammar and 1755 attach a parse action to it that modifies the returned token list. 1756 Invoking C{transformString()} on a target string will then scan for matches, 1757 and replace the matched text patterns according to the logic in the parse 1758 action. C{transformString()} returns the resulting transformed string. 1759 1760 Example:: 1761 wd = Word(alphas) 1762 wd.setParseAction(lambda toks: toks[0].title()) 1763 1764 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1765 Prints:: 1766 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1767 """ 1768 out = [] 1769 lastE = 0 1770 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1771 # keep string locs straight between transformString and scanString 1772 self.keepTabs = True 1773 try: 1774 for t,s,e in self.scanString( instring ): 1775 out.append( instring[lastE:s] ) 1776 if t: 1777 if isinstance(t,ParseResults): 1778 out += t.asList() 1779 elif isinstance(t,list): 1780 out += t 1781 else: 1782 out.append(t) 1783 lastE = e 1784 out.append(instring[lastE:]) 1785 out = [o for o in out if o] 1786 return "".join(map(_ustr,_flatten(out))) 1787 except ParseBaseException as exc: 1788 if ParserElement.verbose_stacktrace: 1789 raise 1790 else: 1791 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1792 raise exc
1793
1794 - def searchString( self, instring, maxMatches=_MAX_INT ):
1795 """ 1796 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1797 to match the given parse expression. May be called with optional 1798 C{maxMatches} argument, to clip searching after 'n' matches are found. 1799 1800 Example:: 1801 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1802 cap_word = Word(alphas.upper(), alphas.lower()) 1803 1804 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1805 1806 # the sum() builtin can be used to merge results into a single ParseResults object 1807 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 1808 prints:: 1809 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 1810 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 1811 """ 1812 try: 1813 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1814 except ParseBaseException as exc: 1815 if ParserElement.verbose_stacktrace: 1816 raise 1817 else: 1818 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1819 raise exc
1820
1821 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1822 """ 1823 Generator method to split a string using the given expression as a separator. 1824 May be called with optional C{maxsplit} argument, to limit the number of splits; 1825 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1826 matching text should be included in the split results. 1827 1828 Example:: 1829 punc = oneOf(list(".,;:/-!?")) 1830 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1831 prints:: 1832 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1833 """ 1834 splits = 0 1835 last = 0 1836 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1837 yield instring[last:s] 1838 if includeSeparators: 1839 yield t[0] 1840 last = e 1841 yield instring[last:]
1842
1843 - def __add__(self, other ):
1844 """ 1845 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 1846 converts them to L{Literal}s by default. 1847 1848 Example:: 1849 greet = Word(alphas) + "," + Word(alphas) + "!" 1850 hello = "Hello, World!" 1851 print (hello, "->", greet.parseString(hello)) 1852 Prints:: 1853 Hello, World! -> ['Hello', ',', 'World', '!'] 1854 """ 1855 if isinstance( other, basestring ): 1856 other = ParserElement._literalStringClass( other ) 1857 if not isinstance( other, ParserElement ): 1858 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1859 SyntaxWarning, stacklevel=2) 1860 return None 1861 return And( [ self, other ] )
1862
1863 - def __radd__(self, other ):
1864 """ 1865 Implementation of + operator when left operand is not a C{L{ParserElement}} 1866 """ 1867 if isinstance( other, basestring ): 1868 other = ParserElement._literalStringClass( other ) 1869 if not isinstance( other, ParserElement ): 1870 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1871 SyntaxWarning, stacklevel=2) 1872 return None 1873 return other + self
1874
1875 - def __sub__(self, other):
1876 """ 1877 Implementation of - operator, returns C{L{And}} with error stop 1878 """ 1879 if isinstance( other, basestring ): 1880 other = ParserElement._literalStringClass( other ) 1881 if not isinstance( other, ParserElement ): 1882 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1883 SyntaxWarning, stacklevel=2) 1884 return None 1885 return self + And._ErrorStop() + other
1886
1887 - def __rsub__(self, other ):
1888 """ 1889 Implementation of - operator when left operand is not a C{L{ParserElement}} 1890 """ 1891 if isinstance( other, basestring ): 1892 other = ParserElement._literalStringClass( other ) 1893 if not isinstance( other, ParserElement ): 1894 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1895 SyntaxWarning, stacklevel=2) 1896 return None 1897 return other - self
1898
1899 - def __mul__(self,other):
1900 """ 1901 Implementation of * operator, allows use of C{expr * 3} in place of 1902 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1903 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1904 may also include C{None} as in: 1905 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1906 to C{expr*n + L{ZeroOrMore}(expr)} 1907 (read as "at least n instances of C{expr}") 1908 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1909 (read as "0 to n instances of C{expr}") 1910 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1911 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1912 1913 Note that C{expr*(None,n)} does not raise an exception if 1914 more than n exprs exist in the input stream; that is, 1915 C{expr*(None,n)} does not enforce a maximum number of expr 1916 occurrences. If this behavior is desired, then write 1917 C{expr*(None,n) + ~expr} 1918 """ 1919 if isinstance(other,int): 1920 minElements, optElements = other,0 1921 elif isinstance(other,tuple): 1922 other = (other + (None, None))[:2] 1923 if other[0] is None: 1924 other = (0, other[1]) 1925 if isinstance(other[0],int) and other[1] is None: 1926 if other[0] == 0: 1927 return ZeroOrMore(self) 1928 if other[0] == 1: 1929 return OneOrMore(self) 1930 else: 1931 return self*other[0] + ZeroOrMore(self) 1932 elif isinstance(other[0],int) and isinstance(other[1],int): 1933 minElements, optElements = other 1934 optElements -= minElements 1935 else: 1936 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1937 else: 1938 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1939 1940 if minElements < 0: 1941 raise ValueError("cannot multiply ParserElement by negative value") 1942 if optElements < 0: 1943 raise ValueError("second tuple value must be greater or equal to first tuple value") 1944 if minElements == optElements == 0: 1945 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1946 1947 if (optElements): 1948 def makeOptionalList(n): 1949 if n>1: 1950 return Optional(self + makeOptionalList(n-1)) 1951 else: 1952 return Optional(self)
1953 if minElements: 1954 if minElements == 1: 1955 ret = self + makeOptionalList(optElements) 1956 else: 1957 ret = And([self]*minElements) + makeOptionalList(optElements) 1958 else: 1959 ret = makeOptionalList(optElements) 1960 else: 1961 if minElements == 1: 1962 ret = self 1963 else: 1964 ret = And([self]*minElements) 1965 return ret 1966
1967 - def __rmul__(self, other):
1968 return self.__mul__(other)
1969
1970 - def __or__(self, other ):
1971 """ 1972 Implementation of | operator - returns C{L{MatchFirst}} 1973 """ 1974 if isinstance( other, basestring ): 1975 other = ParserElement._literalStringClass( other ) 1976 if not isinstance( other, ParserElement ): 1977 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1978 SyntaxWarning, stacklevel=2) 1979 return None 1980 return MatchFirst( [ self, other ] )
1981
1982 - def __ror__(self, other ):
1983 """ 1984 Implementation of | operator when left operand is not a C{L{ParserElement}} 1985 """ 1986 if isinstance( other, basestring ): 1987 other = ParserElement._literalStringClass( other ) 1988 if not isinstance( other, ParserElement ): 1989 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1990 SyntaxWarning, stacklevel=2) 1991 return None 1992 return other | self
1993
1994 - def __xor__(self, other ):
1995 """ 1996 Implementation of ^ operator - returns C{L{Or}} 1997 """ 1998 if isinstance( other, basestring ): 1999 other = ParserElement._literalStringClass( other ) 2000 if not isinstance( other, ParserElement ): 2001 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2002 SyntaxWarning, stacklevel=2) 2003 return None 2004 return Or( [ self, other ] )
2005
2006 - def __rxor__(self, other ):
2007 """ 2008 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 2009 """ 2010 if isinstance( other, basestring ): 2011 other = ParserElement._literalStringClass( other ) 2012 if not isinstance( other, ParserElement ): 2013 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2014 SyntaxWarning, stacklevel=2) 2015 return None 2016 return other ^ self
2017
2018 - def __and__(self, other ):
2019 """ 2020 Implementation of & operator - returns C{L{Each}} 2021 """ 2022 if isinstance( other, basestring ): 2023 other = ParserElement._literalStringClass( other ) 2024 if not isinstance( other, ParserElement ): 2025 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2026 SyntaxWarning, stacklevel=2) 2027 return None 2028 return Each( [ self, other ] )
2029
2030 - def __rand__(self, other ):
2031 """ 2032 Implementation of & operator when left operand is not a C{L{ParserElement}} 2033 """ 2034 if isinstance( other, basestring ): 2035 other = ParserElement._literalStringClass( other ) 2036 if not isinstance( other, ParserElement ): 2037 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2038 SyntaxWarning, stacklevel=2) 2039 return None 2040 return other & self
2041
2042 - def __invert__( self ):
2043 """ 2044 Implementation of ~ operator - returns C{L{NotAny}} 2045 """ 2046 return NotAny( self )
2047
2048 - def __call__(self, name=None):
2049 """ 2050 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 2051 2052 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 2053 passed as C{True}. 2054 2055 If C{name} is omitted, same as calling C{L{copy}}. 2056 2057 Example:: 2058 # these are equivalent 2059 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 2060 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 2061 """ 2062 if name is not None: 2063 return self.setResultsName(name) 2064 else: 2065 return self.copy()
2066
2067 - def suppress( self ):
2068 """ 2069 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 2070 cluttering up returned output. 2071 """ 2072 return Suppress( self )
2073
2074 - def leaveWhitespace( self ):
2075 """ 2076 Disables the skipping of whitespace before matching the characters in the 2077 C{ParserElement}'s defined pattern. This is normally only used internally by 2078 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2079 """ 2080 self.skipWhitespace = False 2081 return self
2082
2083 - def setWhitespaceChars( self, chars ):
2084 """ 2085 Overrides the default whitespace chars 2086 """ 2087 self.skipWhitespace = True 2088 self.whiteChars = chars 2089 self.copyDefaultWhiteChars = False 2090 return self
2091
2092 - def parseWithTabs( self ):
2093 """ 2094 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2095 Must be called before C{parseString} when the input grammar contains elements that 2096 match C{<TAB>} characters. 2097 """ 2098 self.keepTabs = True 2099 return self
2100
2101 - def ignore( self, other ):
2102 """ 2103 Define expression to be ignored (e.g., comments) while doing pattern 2104 matching; may be called repeatedly, to define multiple comment or other 2105 ignorable patterns. 2106 2107 Example:: 2108 patt = OneOrMore(Word(alphas)) 2109 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2110 2111 patt.ignore(cStyleComment) 2112 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2113 """ 2114 if isinstance(other, basestring): 2115 other = Suppress(other) 2116 2117 if isinstance( other, Suppress ): 2118 if other not in self.ignoreExprs: 2119 self.ignoreExprs.append(other) 2120 else: 2121 self.ignoreExprs.append( Suppress( other.copy() ) ) 2122 return self
2123
2124 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2125 """ 2126 Enable display of debugging messages while doing pattern matching. 2127 """ 2128 self.debugActions = (startAction or _defaultStartDebugAction, 2129 successAction or _defaultSuccessDebugAction, 2130 exceptionAction or _defaultExceptionDebugAction) 2131 self.debug = True 2132 return self
2133
2134 - def setDebug( self, flag=True ):
2135 """ 2136 Enable display of debugging messages while doing pattern matching. 2137 Set C{flag} to True to enable, False to disable. 2138 2139 Example:: 2140 wd = Word(alphas).setName("alphaword") 2141 integer = Word(nums).setName("numword") 2142 term = wd | integer 2143 2144 # turn on debugging for wd 2145 wd.setDebug() 2146 2147 OneOrMore(term).parseString("abc 123 xyz 890") 2148 2149 prints:: 2150 Match alphaword at loc 0(1,1) 2151 Matched alphaword -> ['abc'] 2152 Match alphaword at loc 3(1,4) 2153 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2154 Match alphaword at loc 7(1,8) 2155 Matched alphaword -> ['xyz'] 2156 Match alphaword at loc 11(1,12) 2157 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2158 Match alphaword at loc 15(1,16) 2159 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2160 2161 The output shown is that produced by the default debug actions - custom debug actions can be 2162 specified using L{setDebugActions}. Prior to attempting 2163 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 2164 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 2165 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 2166 which makes debugging and exception messages easier to understand - for instance, the default 2167 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 2168 """ 2169 if flag: 2170 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2171 else: 2172 self.debug = False 2173 return self
2174
2175 - def __str__( self ):
2176 return self.name
2177
2178 - def __repr__( self ):
2179 return _ustr(self)
2180
2181 - def streamline( self ):
2182 self.streamlined = True 2183 self.strRepr = None 2184 return self
2185
2186 - def checkRecursion( self, parseElementList ):
2187 pass
2188
2189 - def validate( self, validateTrace=[] ):
2190 """ 2191 Check defined expressions for valid structure, check for infinite recursive definitions. 2192 """ 2193 self.checkRecursion( [] )
2194
2195 - def parseFile( self, file_or_filename, parseAll=False ):
2196 """ 2197 Execute the parse expression on the given file or filename. 2198 If a filename is specified (instead of a file object), 2199 the entire file is opened, read, and closed before parsing. 2200 """ 2201 try: 2202 file_contents = file_or_filename.read() 2203 except AttributeError: 2204 with open(file_or_filename, "r") as f: 2205 file_contents = f.read() 2206 try: 2207 return self.parseString(file_contents, parseAll) 2208 except ParseBaseException as exc: 2209 if ParserElement.verbose_stacktrace: 2210 raise 2211 else: 2212 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2213 raise exc
2214
2215 - def __eq__(self,other):
2216 if isinstance(other, ParserElement): 2217 return self is other or vars(self) == vars(other) 2218 elif isinstance(other, basestring): 2219 return self.matches(other) 2220 else: 2221 return super(ParserElement,self)==other
2222
2223 - def __ne__(self,other):
2224 return not (self == other)
2225
2226 - def __hash__(self):
2227 return hash(id(self))
2228
2229 - def __req__(self,other):
2230 return self == other
2231
2232 - def __rne__(self,other):
2233 return not (self == other)
2234
2235 - def matches(self, testString, parseAll=True):
2236 """ 2237 Method for quick testing of a parser against a test string. Good for simple 2238 inline microtests of sub expressions while building up larger parser. 2239 2240 Parameters: 2241 - testString - to test against this expression for a match 2242 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2243 2244 Example:: 2245 expr = Word(nums) 2246 assert expr.matches("100") 2247 """ 2248 try: 2249 self.parseString(_ustr(testString), parseAll=parseAll) 2250 return True 2251 except ParseBaseException: 2252 return False
2253
2254 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2255 """ 2256 Execute the parse expression on a series of test strings, showing each 2257 test, the parsed results or where the parse failed. Quick and easy way to 2258 run a parse expression against a list of sample strings. 2259 2260 Parameters: 2261 - tests - a list of separate test strings, or a multiline string of test strings 2262 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2263 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2264 string; pass None to disable comment filtering 2265 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2266 if False, only dump nested list 2267 - printResults - (default=C{True}) prints test output to stdout 2268 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2269 2270 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2271 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2272 test's output 2273 2274 Example:: 2275 number_expr = pyparsing_common.number.copy() 2276 2277 result = number_expr.runTests(''' 2278 # unsigned integer 2279 100 2280 # negative integer 2281 -100 2282 # float with scientific notation 2283 6.02e23 2284 # integer with scientific notation 2285 1e-12 2286 ''') 2287 print("Success" if result[0] else "Failed!") 2288 2289 result = number_expr.runTests(''' 2290 # stray character 2291 100Z 2292 # missing leading digit before '.' 2293 -.100 2294 # too many '.' 2295 3.14.159 2296 ''', failureTests=True) 2297 print("Success" if result[0] else "Failed!") 2298 prints:: 2299 # unsigned integer 2300 100 2301 [100] 2302 2303 # negative integer 2304 -100 2305 [-100] 2306 2307 # float with scientific notation 2308 6.02e23 2309 [6.02e+23] 2310 2311 # integer with scientific notation 2312 1e-12 2313 [1e-12] 2314 2315 Success 2316 2317 # stray character 2318 100Z 2319 ^ 2320 FAIL: Expected end of text (at char 3), (line:1, col:4) 2321 2322 # missing leading digit before '.' 2323 -.100 2324 ^ 2325 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2326 2327 # too many '.' 2328 3.14.159 2329 ^ 2330 FAIL: Expected end of text (at char 4), (line:1, col:5) 2331 2332 Success 2333 2334 Each test string must be on a single line. If you want to test a string that spans multiple 2335 lines, create a test like this:: 2336 2337 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 2338 2339 (Note that this is a raw string literal, you must include the leading 'r'.) 2340 """ 2341 if isinstance(tests, basestring): 2342 tests = list(map(str.strip, tests.rstrip().splitlines())) 2343 if isinstance(comment, basestring): 2344 comment = Literal(comment) 2345 allResults = [] 2346 comments = [] 2347 success = True 2348 for t in tests: 2349 if comment is not None and comment.matches(t, False) or comments and not t: 2350 comments.append(t) 2351 continue 2352 if not t: 2353 continue 2354 out = ['\n'.join(comments), t] 2355 comments = [] 2356 try: 2357 t = t.replace(r'\n','\n') 2358 result = self.parseString(t, parseAll=parseAll) 2359 out.append(result.dump(full=fullDump)) 2360 success = success and not failureTests 2361 except ParseBaseException as pe: 2362 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2363 if '\n' in t: 2364 out.append(line(pe.loc, t)) 2365 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2366 else: 2367 out.append(' '*pe.loc + '^' + fatal) 2368 out.append("FAIL: " + str(pe)) 2369 success = success and failureTests 2370 result = pe 2371 except Exception as exc: 2372 out.append("FAIL-EXCEPTION: " + str(exc)) 2373 success = success and failureTests 2374 result = exc 2375 2376 if printResults: 2377 if fullDump: 2378 out.append('') 2379 print('\n'.join(out)) 2380 2381 allResults.append((t, result)) 2382 2383 return success, allResults
2384
2385 2386 -class Token(ParserElement):
2387 """ 2388 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2389 """
2390 - def __init__( self ):
2391 super(Token,self).__init__( savelist=False )
2392
2393 2394 -class Empty(Token):
2395 """ 2396 An empty token, will always match. 2397 """
2398 - def __init__( self ):
2399 super(Empty,self).__init__() 2400 self.name = "Empty" 2401 self.mayReturnEmpty = True 2402 self.mayIndexError = False
2403
2404 2405 -class NoMatch(Token):
2406 """ 2407 A token that will never match. 2408 """
2409 - def __init__( self ):
2410 super(NoMatch,self).__init__() 2411 self.name = "NoMatch" 2412 self.mayReturnEmpty = True 2413 self.mayIndexError = False 2414 self.errmsg = "Unmatchable token"
2415
2416 - def parseImpl( self, instring, loc, doActions=True ):
2417 raise ParseException(instring, loc, self.errmsg, self)
2418
2419 2420 -class Literal(Token):
2421 """ 2422 Token to exactly match a specified string. 2423 2424 Example:: 2425 Literal('blah').parseString('blah') # -> ['blah'] 2426 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2427 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2428 2429 For case-insensitive matching, use L{CaselessLiteral}. 2430 2431 For keyword matching (force word break before and after the matched string), 2432 use L{Keyword} or L{CaselessKeyword}. 2433 """
2434 - def __init__( self, matchString ):
2435 super(Literal,self).__init__() 2436 self.match = matchString 2437 self.matchLen = len(matchString) 2438 try: 2439 self.firstMatchChar = matchString[0] 2440 except IndexError: 2441 warnings.warn("null string passed to Literal; use Empty() instead", 2442 SyntaxWarning, stacklevel=2) 2443 self.__class__ = Empty 2444 self.name = '"%s"' % _ustr(self.match) 2445 self.errmsg = "Expected " + self.name 2446 self.mayReturnEmpty = False 2447 self.mayIndexError = False
2448 2449 # Performance tuning: this routine gets called a *lot* 2450 # if this is a single character match string and the first character matches, 2451 # short-circuit as quickly as possible, and avoid calling startswith 2452 #~ @profile
2453 - def parseImpl( self, instring, loc, doActions=True ):
2454 if (instring[loc] == self.firstMatchChar and 2455 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2456 return loc+self.matchLen, self.match 2457 raise ParseException(instring, loc, self.errmsg, self)
2458 _L = Literal 2459 ParserElement._literalStringClass = Literal
2460 2461 -class Keyword(Token):
2462 """ 2463 Token to exactly match a specified string as a keyword, that is, it must be 2464 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2465 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2466 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2467 Accepts two optional constructor arguments in addition to the keyword string: 2468 - C{identChars} is a string of characters that would be valid identifier characters, 2469 defaulting to all alphanumerics + "_" and "$" 2470 - C{caseless} allows case-insensitive matching, default is C{False}. 2471 2472 Example:: 2473 Keyword("start").parseString("start") # -> ['start'] 2474 Keyword("start").parseString("starting") # -> Exception 2475 2476 For case-insensitive matching, use L{CaselessKeyword}. 2477 """ 2478 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2479
2480 - def __init__( self, matchString, identChars=None, caseless=False ):
2481 super(Keyword,self).__init__() 2482 if identChars is None: 2483 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2484 self.match = matchString 2485 self.matchLen = len(matchString) 2486 try: 2487 self.firstMatchChar = matchString[0] 2488 except IndexError: 2489 warnings.warn("null string passed to Keyword; use Empty() instead", 2490 SyntaxWarning, stacklevel=2) 2491 self.name = '"%s"' % self.match 2492 self.errmsg = "Expected " + self.name 2493 self.mayReturnEmpty = False 2494 self.mayIndexError = False 2495 self.caseless = caseless 2496 if caseless: 2497 self.caselessmatch = matchString.upper() 2498 identChars = identChars.upper() 2499 self.identChars = set(identChars)
2500
2501 - def parseImpl( self, instring, loc, doActions=True ):
2502 if self.caseless: 2503 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2504 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2505 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2506 return loc+self.matchLen, self.match 2507 else: 2508 if (instring[loc] == self.firstMatchChar and 2509 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2510 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2511 (loc == 0 or instring[loc-1] not in self.identChars) ): 2512 return loc+self.matchLen, self.match 2513 raise ParseException(instring, loc, self.errmsg, self)
2514
2515 - def copy(self):
2516 c = super(Keyword,self).copy() 2517 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2518 return c
2519 2520 @staticmethod
2521 - def setDefaultKeywordChars( chars ):
2522 """Overrides the default Keyword chars 2523 """ 2524 Keyword.DEFAULT_KEYWORD_CHARS = chars
2525
2526 -class CaselessLiteral(Literal):
2527 """ 2528 Token to match a specified string, ignoring case of letters. 2529 Note: the matched results will always be in the case of the given 2530 match string, NOT the case of the input text. 2531 2532 Example:: 2533 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2534 2535 (Contrast with example for L{CaselessKeyword}.) 2536 """
2537 - def __init__( self, matchString ):
2538 super(CaselessLiteral,self).__init__( matchString.upper() ) 2539 # Preserve the defining literal. 2540 self.returnString = matchString 2541 self.name = "'%s'" % self.returnString 2542 self.errmsg = "Expected " + self.name
2543
2544 - def parseImpl( self, instring, loc, doActions=True ):
2545 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2546 return loc+self.matchLen, self.returnString 2547 raise ParseException(instring, loc, self.errmsg, self)
2548
2549 -class CaselessKeyword(Keyword):
2550 """ 2551 Caseless version of L{Keyword}. 2552 2553 Example:: 2554 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2555 2556 (Contrast with example for L{CaselessLiteral}.) 2557 """
2558 - def __init__( self, matchString, identChars=None ):
2559 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2560
2561 - def parseImpl( self, instring, loc, doActions=True ):
2562 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2563 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2564 return loc+self.matchLen, self.match 2565 raise ParseException(instring, loc, self.errmsg, self)
2566
2567 -class CloseMatch(Token):
2568 """ 2569 A variation on L{Literal} which matches "close" matches, that is, 2570 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 2571 - C{match_string} - string to be matched 2572 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 2573 2574 The results from a successful parse will contain the matched text from the input string and the following named results: 2575 - C{mismatches} - a list of the positions within the match_string where mismatches were found 2576 - C{original} - the original match_string used to compare against the input string 2577 2578 If C{mismatches} is an empty list, then the match was an exact match. 2579 2580 Example:: 2581 patt = CloseMatch("ATCATCGAATGGA") 2582 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 2583 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 2584 2585 # exact match 2586 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 2587 2588 # close match allowing up to 2 mismatches 2589 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 2590 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 2591 """
2592 - def __init__(self, match_string, maxMismatches=1):
2593 super(CloseMatch,self).__init__() 2594 self.name = match_string 2595 self.match_string = match_string 2596 self.maxMismatches = maxMismatches 2597 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 2598 self.mayIndexError = False 2599 self.mayReturnEmpty = False
2600
2601 - def parseImpl( self, instring, loc, doActions=True ):
2602 start = loc 2603 instrlen = len(instring) 2604 maxloc = start + len(self.match_string) 2605 2606 if maxloc <= instrlen: 2607 match_string = self.match_string 2608 match_stringloc = 0 2609 mismatches = [] 2610 maxMismatches = self.maxMismatches 2611 2612 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 2613 src,mat = s_m 2614 if src != mat: 2615 mismatches.append(match_stringloc) 2616 if len(mismatches) > maxMismatches: 2617 break 2618 else: 2619 loc = match_stringloc + 1 2620 results = ParseResults([instring[start:loc]]) 2621 results['original'] = self.match_string 2622 results['mismatches'] = mismatches 2623 return loc, results 2624 2625 raise ParseException(instring, loc, self.errmsg, self)
2626
2627 2628 -class Word(Token):
2629 """ 2630 Token for matching words composed of allowed character sets. 2631 Defined with string containing all allowed initial characters, 2632 an optional string containing allowed body characters (if omitted, 2633 defaults to the initial character set), and an optional minimum, 2634 maximum, and/or exact length. The default value for C{min} is 1 (a 2635 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2636 are 0, meaning no maximum or exact length restriction. An optional 2637 C{excludeChars} parameter can list characters that might be found in 2638 the input C{bodyChars} string; useful to define a word of all printables 2639 except for one or two characters, for instance. 2640 2641 L{srange} is useful for defining custom character set strings for defining 2642 C{Word} expressions, using range notation from regular expression character sets. 2643 2644 A common mistake is to use C{Word} to match a specific literal string, as in 2645 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2646 I{sets} of matchable characters. This expression would match "Add", "AAA", 2647 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2648 To match an exact literal string, use L{Literal} or L{Keyword}. 2649 2650 pyparsing includes helper strings for building Words: 2651 - L{alphas} 2652 - L{nums} 2653 - L{alphanums} 2654 - L{hexnums} 2655 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2656 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2657 - L{printables} (any non-whitespace character) 2658 2659 Example:: 2660 # a word composed of digits 2661 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2662 2663 # a word with a leading capital, and zero or more lowercase 2664 capital_word = Word(alphas.upper(), alphas.lower()) 2665 2666 # hostnames are alphanumeric, with leading alpha, and '-' 2667 hostname = Word(alphas, alphanums+'-') 2668 2669 # roman numeral (not a strict parser, accepts invalid mix of characters) 2670 roman = Word("IVXLCDM") 2671 2672 # any string of non-whitespace characters, except for ',' 2673 csv_value = Word(printables, excludeChars=",") 2674 """
2675 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2676 super(Word,self).__init__() 2677 if excludeChars: 2678 initChars = ''.join(c for c in initChars if c not in excludeChars) 2679 if bodyChars: 2680 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2681 self.initCharsOrig = initChars 2682 self.initChars = set(initChars) 2683 if bodyChars : 2684 self.bodyCharsOrig = bodyChars 2685 self.bodyChars = set(bodyChars) 2686 else: 2687 self.bodyCharsOrig = initChars 2688 self.bodyChars = set(initChars) 2689 2690 self.maxSpecified = max > 0 2691 2692 if min < 1: 2693 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2694 2695 self.minLen = min 2696 2697 if max > 0: 2698 self.maxLen = max 2699 else: 2700 self.maxLen = _MAX_INT 2701 2702 if exact > 0: 2703 self.maxLen = exact 2704 self.minLen = exact 2705 2706 self.name = _ustr(self) 2707 self.errmsg = "Expected " + self.name 2708 self.mayIndexError = False 2709 self.asKeyword = asKeyword 2710 2711 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2712 if self.bodyCharsOrig == self.initCharsOrig: 2713 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2714 elif len(self.initCharsOrig) == 1: 2715 self.reString = "%s[%s]*" % \ 2716 (re.escape(self.initCharsOrig), 2717 _escapeRegexRangeChars(self.bodyCharsOrig),) 2718 else: 2719 self.reString = "[%s][%s]*" % \ 2720 (_escapeRegexRangeChars(self.initCharsOrig), 2721 _escapeRegexRangeChars(self.bodyCharsOrig),) 2722 if self.asKeyword: 2723 self.reString = r"\b"+self.reString+r"\b" 2724 try: 2725 self.re = re.compile( self.reString ) 2726 except Exception: 2727 self.re = None
2728
2729 - def parseImpl( self, instring, loc, doActions=True ):
2730 if self.re: 2731 result = self.re.match(instring,loc) 2732 if not result: 2733 raise ParseException(instring, loc, self.errmsg, self) 2734 2735 loc = result.end() 2736 return loc, result.group() 2737 2738 if not(instring[ loc ] in self.initChars): 2739 raise ParseException(instring, loc, self.errmsg, self) 2740 2741 start = loc 2742 loc += 1 2743 instrlen = len(instring) 2744 bodychars = self.bodyChars 2745 maxloc = start + self.maxLen 2746 maxloc = min( maxloc, instrlen ) 2747 while loc < maxloc and instring[loc] in bodychars: 2748 loc += 1 2749 2750 throwException = False 2751 if loc - start < self.minLen: 2752 throwException = True 2753 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2754 throwException = True 2755 if self.asKeyword: 2756 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2757 throwException = True 2758 2759 if throwException: 2760 raise ParseException(instring, loc, self.errmsg, self) 2761 2762 return loc, instring[start:loc]
2763
2764 - def __str__( self ):
2765 try: 2766 return super(Word,self).__str__() 2767 except Exception: 2768 pass 2769 2770 2771 if self.strRepr is None: 2772 2773 def charsAsStr(s): 2774 if len(s)>4: 2775 return s[:4]+"..." 2776 else: 2777 return s
2778 2779 if ( self.initCharsOrig != self.bodyCharsOrig ): 2780 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2781 else: 2782 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2783 2784 return self.strRepr
2785
2786 2787 -class Char(Word):
2788 """ 2789 A short-cut class for defining C{Word(characters, exact=1)}, 2790 when defining a match of any single character in a string of characters. 2791 """
2792 - def __init__(self, charset):
2793 super(Char, self).__init__(charset, exact=1) 2794 self.reString = "[%s]" % _escapeRegexRangeChars(self.initCharsOrig) 2795 self.re = re.compile( self.reString )
2796
2797 2798 -class Regex(Token):
2799 r""" 2800 Token for matching strings that match a given regular expression. 2801 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2802 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 2803 named parse results. 2804 2805 Example:: 2806 realnum = Regex(r"[+-]?\d+\.\d*") 2807 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 2808 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2809 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2810 2811 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 2812 print(make_html.transformString("h1:main title:")) 2813 # prints "<h1>main title</h1>" 2814 """ 2815 compiledREtype = type(re.compile("[A-Z]"))
2816 - def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
2817 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2818 super(Regex,self).__init__() 2819 2820 if isinstance(pattern, basestring): 2821 if not pattern: 2822 warnings.warn("null string passed to Regex; use Empty() instead", 2823 SyntaxWarning, stacklevel=2) 2824 2825 self.pattern = pattern 2826 self.flags = flags 2827 2828 try: 2829 self.re = re.compile(self.pattern, self.flags) 2830 self.reString = self.pattern 2831 except sre_constants.error: 2832 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2833 SyntaxWarning, stacklevel=2) 2834 raise 2835 2836 elif isinstance(pattern, Regex.compiledREtype): 2837 self.re = pattern 2838 self.pattern = \ 2839 self.reString = str(pattern) 2840 self.flags = flags 2841 2842 else: 2843 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2844 2845 self.name = _ustr(self) 2846 self.errmsg = "Expected " + self.name 2847 self.mayIndexError = False 2848 self.mayReturnEmpty = True 2849 self.asGroupList = asGroupList 2850 self.asMatch = asMatch
2851
2852 - def parseImpl( self, instring, loc, doActions=True ):
2853 result = self.re.match(instring,loc) 2854 if not result: 2855 raise ParseException(instring, loc, self.errmsg, self) 2856 2857 loc = result.end() 2858 d = result.groupdict() 2859 if self.asMatch: 2860 ret = result 2861 elif self.asGroupList: 2862 ret = result.groups() 2863 else: 2864 ret = ParseResults(result.group()) 2865 if d: 2866 for k in d: 2867 ret[k] = d[k] 2868 return loc,ret
2869
2870 - def __str__( self ):
2871 try: 2872 return super(Regex,self).__str__() 2873 except Exception: 2874 pass 2875 2876 if self.strRepr is None: 2877 self.strRepr = "Re:(%s)" % repr(self.pattern) 2878 2879 return self.strRepr
2880
2881 - def sub(self, repl):
2882 """ 2883 Return Regex with an attached parse action to transform the parsed 2884 result as if called using C{re.sub(expr, repl, string)}. 2885 """ 2886 if self.asGroupList: 2887 warnings.warn("cannot use sub() with Regex(asGroupList=True)", 2888 SyntaxWarning, stacklevel=2) 2889 raise SyntaxError() 2890 2891 if self.asMatch and callable(repl): 2892 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)", 2893 SyntaxWarning, stacklevel=2) 2894 raise SyntaxError() 2895 2896 if self.asMatch: 2897 def pa(tokens): 2898 return tokens[0].expand(repl)
2899 else: 2900 def pa(tokens): 2901 return self.re.sub(repl, tokens[0])
2902 return self.addParseAction(pa) 2903
2904 -class QuotedString(Token):
2905 r""" 2906 Token for matching strings that are delimited by quoting characters. 2907 2908 Defined with the following parameters: 2909 - quoteChar - string of one or more characters defining the quote delimiting string 2910 - escChar - character to escape quotes, typically backslash (default=C{None}) 2911 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2912 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2913 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2914 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2915 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2916 2917 Example:: 2918 qs = QuotedString('"') 2919 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2920 complex_qs = QuotedString('{{', endQuoteChar='}}') 2921 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2922 sql_qs = QuotedString('"', escQuote='""') 2923 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2924 prints:: 2925 [['This is the quote']] 2926 [['This is the "quote"']] 2927 [['This is the quote with "embedded" quotes']] 2928 """
2929 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2930 super(QuotedString,self).__init__() 2931 2932 # remove white space from quote chars - wont work anyway 2933 quoteChar = quoteChar.strip() 2934 if not quoteChar: 2935 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2936 raise SyntaxError() 2937 2938 if endQuoteChar is None: 2939 endQuoteChar = quoteChar 2940 else: 2941 endQuoteChar = endQuoteChar.strip() 2942 if not endQuoteChar: 2943 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2944 raise SyntaxError() 2945 2946 self.quoteChar = quoteChar 2947 self.quoteCharLen = len(quoteChar) 2948 self.firstQuoteChar = quoteChar[0] 2949 self.endQuoteChar = endQuoteChar 2950 self.endQuoteCharLen = len(endQuoteChar) 2951 self.escChar = escChar 2952 self.escQuote = escQuote 2953 self.unquoteResults = unquoteResults 2954 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2955 2956 if multiline: 2957 self.flags = re.MULTILINE | re.DOTALL 2958 self.pattern = r'%s(?:[^%s%s]' % \ 2959 ( re.escape(self.quoteChar), 2960 _escapeRegexRangeChars(self.endQuoteChar[0]), 2961 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2962 else: 2963 self.flags = 0 2964 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2965 ( re.escape(self.quoteChar), 2966 _escapeRegexRangeChars(self.endQuoteChar[0]), 2967 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2968 if len(self.endQuoteChar) > 1: 2969 self.pattern += ( 2970 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2971 _escapeRegexRangeChars(self.endQuoteChar[i])) 2972 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2973 ) 2974 if escQuote: 2975 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2976 if escChar: 2977 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2978 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2979 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2980 2981 try: 2982 self.re = re.compile(self.pattern, self.flags) 2983 self.reString = self.pattern 2984 except sre_constants.error: 2985 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2986 SyntaxWarning, stacklevel=2) 2987 raise 2988 2989 self.name = _ustr(self) 2990 self.errmsg = "Expected " + self.name 2991 self.mayIndexError = False 2992 self.mayReturnEmpty = True
2993
2994 - def parseImpl( self, instring, loc, doActions=True ):
2995 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2996 if not result: 2997 raise ParseException(instring, loc, self.errmsg, self) 2998 2999 loc = result.end() 3000 ret = result.group() 3001 3002 if self.unquoteResults: 3003 3004 # strip off quotes 3005 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 3006 3007 if isinstance(ret,basestring): 3008 # replace escaped whitespace 3009 if '\\' in ret and self.convertWhitespaceEscapes: 3010 ws_map = { 3011 r'\t' : '\t', 3012 r'\n' : '\n', 3013 r'\f' : '\f', 3014 r'\r' : '\r', 3015 } 3016 for wslit,wschar in ws_map.items(): 3017 ret = ret.replace(wslit, wschar) 3018 3019 # replace escaped characters 3020 if self.escChar: 3021 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 3022 3023 # replace escaped quotes 3024 if self.escQuote: 3025 ret = ret.replace(self.escQuote, self.endQuoteChar) 3026 3027 return loc, ret
3028
3029 - def __str__( self ):
3030 try: 3031 return super(QuotedString,self).__str__() 3032 except Exception: 3033 pass 3034 3035 if self.strRepr is None: 3036 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 3037 3038 return self.strRepr
3039
3040 3041 -class CharsNotIn(Token):
3042 """ 3043 Token for matching words composed of characters I{not} in a given set (will 3044 include whitespace in matched characters if not listed in the provided exclusion set - see example). 3045 Defined with string containing all disallowed characters, and an optional 3046 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 3047 minimum value < 1 is not valid); the default values for C{max} and C{exact} 3048 are 0, meaning no maximum or exact length restriction. 3049 3050 Example:: 3051 # define a comma-separated-value as anything that is not a ',' 3052 csv_value = CharsNotIn(',') 3053 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 3054 prints:: 3055 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 3056 """
3057 - def __init__( self, notChars, min=1, max=0, exact=0 ):
3058 super(CharsNotIn,self).__init__() 3059 self.skipWhitespace = False 3060 self.notChars = notChars 3061 3062 if min < 1: 3063 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 3064 3065 self.minLen = min 3066 3067 if max > 0: 3068 self.maxLen = max 3069 else: 3070 self.maxLen = _MAX_INT 3071 3072 if exact > 0: 3073 self.maxLen = exact 3074 self.minLen = exact 3075 3076 self.name = _ustr(self) 3077 self.errmsg = "Expected " + self.name 3078 self.mayReturnEmpty = ( self.minLen == 0 ) 3079 self.mayIndexError = False
3080
3081 - def parseImpl( self, instring, loc, doActions=True ):
3082 if instring[loc] in self.notChars: 3083 raise ParseException(instring, loc, self.errmsg, self) 3084 3085 start = loc 3086 loc += 1 3087 notchars = self.notChars 3088 maxlen = min( start+self.maxLen, len(instring) ) 3089 while loc < maxlen and \ 3090 (instring[loc] not in notchars): 3091 loc += 1 3092 3093 if loc - start < self.minLen: 3094 raise ParseException(instring, loc, self.errmsg, self) 3095 3096 return loc, instring[start:loc]
3097
3098 - def __str__( self ):
3099 try: 3100 return super(CharsNotIn, self).__str__() 3101 except Exception: 3102 pass 3103 3104 if self.strRepr is None: 3105 if len(self.notChars) > 4: 3106 self.strRepr = "!W:(%s...)" % self.notChars[:4] 3107 else: 3108 self.strRepr = "!W:(%s)" % self.notChars 3109 3110 return self.strRepr
3111
3112 -class White(Token):
3113 """ 3114 Special matching class for matching whitespace. Normally, whitespace is ignored 3115 by pyparsing grammars. This class is included when some whitespace structures 3116 are significant. Define with a string containing the whitespace characters to be 3117 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 3118 as defined for the C{L{Word}} class. 3119 """ 3120 whiteStrs = { 3121 " " : "<SPC>", 3122 "\t": "<TAB>", 3123 "\n": "<LF>", 3124 "\r": "<CR>", 3125 "\f": "<FF>", 3126 }
3127 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3128 super(White,self).__init__() 3129 self.matchWhite = ws 3130 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 3131 #~ self.leaveWhitespace() 3132 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3133 self.mayReturnEmpty = True 3134 self.errmsg = "Expected " + self.name 3135 3136 self.minLen = min 3137 3138 if max > 0: 3139 self.maxLen = max 3140 else: 3141 self.maxLen = _MAX_INT 3142 3143 if exact > 0: 3144 self.maxLen = exact 3145 self.minLen = exact
3146
3147 - def parseImpl( self, instring, loc, doActions=True ):
3148 if not(instring[ loc ] in self.matchWhite): 3149 raise ParseException(instring, loc, self.errmsg, self) 3150 start = loc 3151 loc += 1 3152 maxloc = start + self.maxLen 3153 maxloc = min( maxloc, len(instring) ) 3154 while loc < maxloc and instring[loc] in self.matchWhite: 3155 loc += 1 3156 3157 if loc - start < self.minLen: 3158 raise ParseException(instring, loc, self.errmsg, self) 3159 3160 return loc, instring[start:loc]
3161
3162 3163 -class _PositionToken(Token):
3164 - def __init__( self ):
3165 super(_PositionToken,self).__init__() 3166 self.name=self.__class__.__name__ 3167 self.mayReturnEmpty = True 3168 self.mayIndexError = False
3169
3170 -class GoToColumn(_PositionToken):
3171 """ 3172 Token to advance to a specific column of input text; useful for tabular report scraping. 3173 """
3174 - def __init__( self, colno ):
3175 super(GoToColumn,self).__init__() 3176 self.col = colno
3177
3178 - def preParse( self, instring, loc ):
3179 if col(loc,instring) != self.col: 3180 instrlen = len(instring) 3181 if self.ignoreExprs: 3182 loc = self._skipIgnorables( instring, loc ) 3183 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 3184 loc += 1 3185 return loc
3186
3187 - def parseImpl( self, instring, loc, doActions=True ):
3188 thiscol = col( loc, instring ) 3189 if thiscol > self.col: 3190 raise ParseException( instring, loc, "Text not in expected column", self ) 3191 newloc = loc + self.col - thiscol 3192 ret = instring[ loc: newloc ] 3193 return newloc, ret
3194
3195 3196 -class LineStart(_PositionToken):
3197 """ 3198 Matches if current position is at the beginning of a line within the parse string 3199 3200 Example:: 3201 3202 test = '''\ 3203 AAA this line 3204 AAA and this line 3205 AAA but not this one 3206 B AAA and definitely not this one 3207 ''' 3208 3209 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 3210 print(t) 3211 3212 Prints:: 3213 ['AAA', ' this line'] 3214 ['AAA', ' and this line'] 3215 3216 """
3217 - def __init__( self ):
3218 super(LineStart,self).__init__() 3219 self.errmsg = "Expected start of line"
3220
3221 - def parseImpl( self, instring, loc, doActions=True ):
3222 if col(loc, instring) == 1: 3223 return loc, [] 3224 raise ParseException(instring, loc, self.errmsg, self)
3225
3226 -class LineEnd(_PositionToken):
3227 """ 3228 Matches if current position is at the end of a line within the parse string 3229 """
3230 - def __init__( self ):
3231 super(LineEnd,self).__init__() 3232 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3233 self.errmsg = "Expected end of line"
3234
3235 - def parseImpl( self, instring, loc, doActions=True ):
3236 if loc<len(instring): 3237 if instring[loc] == "\n": 3238 return loc+1, "\n" 3239 else: 3240 raise ParseException(instring, loc, self.errmsg, self) 3241 elif loc == len(instring): 3242 return loc+1, [] 3243 else: 3244 raise ParseException(instring, loc, self.errmsg, self)
3245
3246 -class StringStart(_PositionToken):
3247 """ 3248 Matches if current position is at the beginning of the parse string 3249 """
3250 - def __init__( self ):
3251 super(StringStart,self).__init__() 3252 self.errmsg = "Expected start of text"
3253
3254 - def parseImpl( self, instring, loc, doActions=True ):
3255 if loc != 0: 3256 # see if entire string up to here is just whitespace and ignoreables 3257 if loc != self.preParse( instring, 0 ): 3258 raise ParseException(instring, loc, self.errmsg, self) 3259 return loc, []
3260
3261 -class StringEnd(_PositionToken):
3262 """ 3263 Matches if current position is at the end of the parse string 3264 """
3265 - def __init__( self ):
3266 super(StringEnd,self).__init__() 3267 self.errmsg = "Expected end of text"
3268
3269 - def parseImpl( self, instring, loc, doActions=True ):
3270 if loc < len(instring): 3271 raise ParseException(instring, loc, self.errmsg, self) 3272 elif loc == len(instring): 3273 return loc+1, [] 3274 elif loc > len(instring): 3275 return loc, [] 3276 else: 3277 raise ParseException(instring, loc, self.errmsg, self)
3278
3279 -class WordStart(_PositionToken):
3280 """ 3281 Matches if the current position is at the beginning of a Word, and 3282 is not preceded by any character in a given set of C{wordChars} 3283 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3284 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3285 the string being parsed, or at the beginning of a line. 3286 """
3287 - def __init__(self, wordChars = printables):
3288 super(WordStart,self).__init__() 3289 self.wordChars = set(wordChars) 3290 self.errmsg = "Not at the start of a word"
3291
3292 - def parseImpl(self, instring, loc, doActions=True ):
3293 if loc != 0: 3294 if (instring[loc-1] in self.wordChars or 3295 instring[loc] not in self.wordChars): 3296 raise ParseException(instring, loc, self.errmsg, self) 3297 return loc, []
3298
3299 -class WordEnd(_PositionToken):
3300 """ 3301 Matches if the current position is at the end of a Word, and 3302 is not followed by any character in a given set of C{wordChars} 3303 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3304 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3305 the string being parsed, or at the end of a line. 3306 """
3307 - def __init__(self, wordChars = printables):
3308 super(WordEnd,self).__init__() 3309 self.wordChars = set(wordChars) 3310 self.skipWhitespace = False 3311 self.errmsg = "Not at the end of a word"
3312
3313 - def parseImpl(self, instring, loc, doActions=True ):
3314 instrlen = len(instring) 3315 if instrlen>0 and loc<instrlen: 3316 if (instring[loc] in self.wordChars or 3317 instring[loc-1] not in self.wordChars): 3318 raise ParseException(instring, loc, self.errmsg, self) 3319 return loc, []
3320
3321 3322 -class ParseExpression(ParserElement):
3323 """ 3324 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3325 """
3326 - def __init__( self, exprs, savelist = False ):
3327 super(ParseExpression,self).__init__(savelist) 3328 if isinstance( exprs, _generatorType ): 3329 exprs = list(exprs) 3330 3331 if isinstance( exprs, basestring ): 3332 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3333 elif isinstance( exprs, Iterable ): 3334 exprs = list(exprs) 3335 # if sequence of strings provided, wrap with Literal 3336 if all(isinstance(expr, basestring) for expr in exprs): 3337 exprs = map(ParserElement._literalStringClass, exprs) 3338 self.exprs = list(exprs) 3339 else: 3340 try: 3341 self.exprs = list( exprs ) 3342 except TypeError: 3343 self.exprs = [ exprs ] 3344 self.callPreparse = False
3345
3346 - def __getitem__( self, i ):
3347 return self.exprs[i]
3348
3349 - def append( self, other ):
3350 self.exprs.append( other ) 3351 self.strRepr = None 3352 return self
3353
3354 - def leaveWhitespace( self ):
3355 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3356 all contained expressions.""" 3357 self.skipWhitespace = False 3358 self.exprs = [ e.copy() for e in self.exprs ] 3359 for e in self.exprs: 3360 e.leaveWhitespace() 3361 return self
3362
3363 - def ignore( self, other ):
3364 if isinstance( other, Suppress ): 3365 if other not in self.ignoreExprs: 3366 super( ParseExpression, self).ignore( other ) 3367 for e in self.exprs: 3368 e.ignore( self.ignoreExprs[-1] ) 3369 else: 3370 super( ParseExpression, self).ignore( other ) 3371 for e in self.exprs: 3372 e.ignore( self.ignoreExprs[-1] ) 3373 return self
3374
3375 - def __str__( self ):
3376 try: 3377 return super(ParseExpression,self).__str__() 3378 except Exception: 3379 pass 3380 3381 if self.strRepr is None: 3382 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3383 return self.strRepr
3384
3385 - def streamline( self ):
3386 super(ParseExpression,self).streamline() 3387 3388 for e in self.exprs: 3389 e.streamline() 3390 3391 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3392 # but only if there are no parse actions or resultsNames on the nested And's 3393 # (likewise for Or's and MatchFirst's) 3394 if ( len(self.exprs) == 2 ): 3395 other = self.exprs[0] 3396 if ( isinstance( other, self.__class__ ) and 3397 not(other.parseAction) and 3398 other.resultsName is None and 3399 not other.debug ): 3400 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3401 self.strRepr = None 3402 self.mayReturnEmpty |= other.mayReturnEmpty 3403 self.mayIndexError |= other.mayIndexError 3404 3405 other = self.exprs[-1] 3406 if ( isinstance( other, self.__class__ ) and 3407 not(other.parseAction) and 3408 other.resultsName is None and 3409 not other.debug ): 3410 self.exprs = self.exprs[:-1] + other.exprs[:] 3411 self.strRepr = None 3412 self.mayReturnEmpty |= other.mayReturnEmpty 3413 self.mayIndexError |= other.mayIndexError 3414 3415 self.errmsg = "Expected " + _ustr(self) 3416 3417 return self
3418
3419 - def setResultsName( self, name, listAllMatches=False ):
3420 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3421 return ret
3422
3423 - def validate( self, validateTrace=[] ):
3424 tmp = validateTrace[:]+[self] 3425 for e in self.exprs: 3426 e.validate(tmp) 3427 self.checkRecursion( [] )
3428
3429 - def copy(self):
3430 ret = super(ParseExpression,self).copy() 3431 ret.exprs = [e.copy() for e in self.exprs] 3432 return ret
3433
3434 -class And(ParseExpression):
3435 """ 3436 Requires all given C{ParseExpression}s to be found in the given order. 3437 Expressions may be separated by whitespace. 3438 May be constructed using the C{'+'} operator. 3439 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3440 3441 Example:: 3442 integer = Word(nums) 3443 name_expr = OneOrMore(Word(alphas)) 3444 3445 expr = And([integer("id"),name_expr("name"),integer("age")]) 3446 # more easily written as: 3447 expr = integer("id") + name_expr("name") + integer("age") 3448 """ 3449
3450 - class _ErrorStop(Empty):
3451 - def __init__(self, *args, **kwargs):
3452 super(And._ErrorStop,self).__init__(*args, **kwargs) 3453 self.name = '-' 3454 self.leaveWhitespace()
3455
3456 - def __init__( self, exprs, savelist = True ):
3457 super(And,self).__init__(exprs, savelist) 3458 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3459 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3460 self.skipWhitespace = self.exprs[0].skipWhitespace 3461 self.callPreparse = True
3462
3463 - def parseImpl( self, instring, loc, doActions=True ):
3464 # pass False as last arg to _parse for first element, since we already 3465 # pre-parsed the string as part of our And pre-parsing 3466 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3467 errorStop = False 3468 for e in self.exprs[1:]: 3469 if isinstance(e, And._ErrorStop): 3470 errorStop = True 3471 continue 3472 if errorStop: 3473 try: 3474 loc, exprtokens = e._parse( instring, loc, doActions ) 3475 except ParseSyntaxException: 3476 raise 3477 except ParseBaseException as pe: 3478 pe.__traceback__ = None 3479 raise ParseSyntaxException._from_exception(pe) 3480 except IndexError: 3481 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3482 else: 3483 loc, exprtokens = e._parse( instring, loc, doActions ) 3484 if exprtokens or exprtokens.haskeys(): 3485 resultlist += exprtokens 3486 return loc, resultlist
3487
3488 - def __iadd__(self, other ):
3489 if isinstance( other, basestring ): 3490 other = ParserElement._literalStringClass( other ) 3491 return self.append( other ) #And( [ self, other ] )
3492
3493 - def checkRecursion( self, parseElementList ):
3494 subRecCheckList = parseElementList[:] + [ self ] 3495 for e in self.exprs: 3496 e.checkRecursion( subRecCheckList ) 3497 if not e.mayReturnEmpty: 3498 break
3499
3500 - def __str__( self ):
3501 if hasattr(self,"name"): 3502 return self.name 3503 3504 if self.strRepr is None: 3505 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3506 3507 return self.strRepr
3508
3509 3510 -class Or(ParseExpression):
3511 """ 3512 Requires that at least one C{ParseExpression} is found. 3513 If two expressions match, the expression that matches the longest string will be used. 3514 May be constructed using the C{'^'} operator. 3515 3516 Example:: 3517 # construct Or using '^' operator 3518 3519 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3520 print(number.searchString("123 3.1416 789")) 3521 prints:: 3522 [['123'], ['3.1416'], ['789']] 3523 """
3524 - def __init__( self, exprs, savelist = False ):
3525 super(Or,self).__init__(exprs, savelist) 3526 if self.exprs: 3527 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3528 else: 3529 self.mayReturnEmpty = True
3530
3531 - def parseImpl( self, instring, loc, doActions=True ):
3532 maxExcLoc = -1 3533 maxException = None 3534 matches = [] 3535 for e in self.exprs: 3536 try: 3537 loc2 = e.tryParse( instring, loc ) 3538 except ParseException as err: 3539 err.__traceback__ = None 3540 if err.loc > maxExcLoc: 3541 maxException = err 3542 maxExcLoc = err.loc 3543 except IndexError: 3544 if len(instring) > maxExcLoc: 3545 maxException = ParseException(instring,len(instring),e.errmsg,self) 3546 maxExcLoc = len(instring) 3547 else: 3548 # save match among all matches, to retry longest to shortest 3549 matches.append((loc2, e)) 3550 3551 if matches: 3552 matches.sort(key=lambda x: -x[0]) 3553 for _,e in matches: 3554 try: 3555 return e._parse( instring, loc, doActions ) 3556 except ParseException as err: 3557 err.__traceback__ = None 3558 if err.loc > maxExcLoc: 3559 maxException = err 3560 maxExcLoc = err.loc 3561 3562 if maxException is not None: 3563 maxException.msg = self.errmsg 3564 raise maxException 3565 else: 3566 raise ParseException(instring, loc, "no defined alternatives to match", self)
3567 3568
3569 - def __ixor__(self, other ):
3570 if isinstance( other, basestring ): 3571 other = ParserElement._literalStringClass( other ) 3572 return self.append( other ) #Or( [ self, other ] )
3573
3574 - def __str__( self ):
3575 if hasattr(self,"name"): 3576 return self.name 3577 3578 if self.strRepr is None: 3579 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3580 3581 return self.strRepr
3582
3583 - def checkRecursion( self, parseElementList ):
3584 subRecCheckList = parseElementList[:] + [ self ] 3585 for e in self.exprs: 3586 e.checkRecursion( subRecCheckList )
3587
3588 3589 -class MatchFirst(ParseExpression):
3590 """ 3591 Requires that at least one C{ParseExpression} is found. 3592 If two expressions match, the first one listed is the one that will match. 3593 May be constructed using the C{'|'} operator. 3594 3595 Example:: 3596 # construct MatchFirst using '|' operator 3597 3598 # watch the order of expressions to match 3599 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3600 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3601 3602 # put more selective expression first 3603 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3604 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3605 """
3606 - def __init__( self, exprs, savelist = False ):
3607 super(MatchFirst,self).__init__(exprs, savelist) 3608 if self.exprs: 3609 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3610 else: 3611 self.mayReturnEmpty = True
3612
3613 - def parseImpl( self, instring, loc, doActions=True ):
3614 maxExcLoc = -1 3615 maxException = None 3616 for e in self.exprs: 3617 try: 3618 ret = e._parse( instring, loc, doActions ) 3619 return ret 3620 except ParseException as err: 3621 if err.loc > maxExcLoc: 3622 maxException = err 3623 maxExcLoc = err.loc 3624 except IndexError: 3625 if len(instring) > maxExcLoc: 3626 maxException = ParseException(instring,len(instring),e.errmsg,self) 3627 maxExcLoc = len(instring) 3628 3629 # only got here if no expression matched, raise exception for match that made it the furthest 3630 else: 3631 if maxException is not None: 3632 maxException.msg = self.errmsg 3633 raise maxException 3634 else: 3635 raise ParseException(instring, loc, "no defined alternatives to match", self)
3636
3637 - def __ior__(self, other ):
3638 if isinstance( other, basestring ): 3639 other = ParserElement._literalStringClass( other ) 3640 return self.append( other ) #MatchFirst( [ self, other ] )
3641
3642 - def __str__( self ):
3643 if hasattr(self,"name"): 3644 return self.name 3645 3646 if self.strRepr is None: 3647 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3648 3649 return self.strRepr
3650
3651 - def checkRecursion( self, parseElementList ):
3652 subRecCheckList = parseElementList[:] + [ self ] 3653 for e in self.exprs: 3654 e.checkRecursion( subRecCheckList )
3655
3656 3657 -class Each(ParseExpression):
3658 """ 3659 Requires all given C{ParseExpression}s to be found, but in any order. 3660 Expressions may be separated by whitespace. 3661 May be constructed using the C{'&'} operator. 3662 3663 Example:: 3664 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3665 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3666 integer = Word(nums) 3667 shape_attr = "shape:" + shape_type("shape") 3668 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3669 color_attr = "color:" + color("color") 3670 size_attr = "size:" + integer("size") 3671 3672 # use Each (using operator '&') to accept attributes in any order 3673 # (shape and posn are required, color and size are optional) 3674 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3675 3676 shape_spec.runTests(''' 3677 shape: SQUARE color: BLACK posn: 100, 120 3678 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3679 color:GREEN size:20 shape:TRIANGLE posn:20,40 3680 ''' 3681 ) 3682 prints:: 3683 shape: SQUARE color: BLACK posn: 100, 120 3684 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3685 - color: BLACK 3686 - posn: ['100', ',', '120'] 3687 - x: 100 3688 - y: 120 3689 - shape: SQUARE 3690 3691 3692 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3693 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3694 - color: BLUE 3695 - posn: ['50', ',', '80'] 3696 - x: 50 3697 - y: 80 3698 - shape: CIRCLE 3699 - size: 50 3700 3701 3702 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3703 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3704 - color: GREEN 3705 - posn: ['20', ',', '40'] 3706 - x: 20 3707 - y: 40 3708 - shape: TRIANGLE 3709 - size: 20 3710 """
3711 - def __init__( self, exprs, savelist = True ):
3712 super(Each,self).__init__(exprs, savelist) 3713 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3714 self.skipWhitespace = True 3715 self.initExprGroups = True
3716
3717 - def parseImpl( self, instring, loc, doActions=True ):
3718 if self.initExprGroups: 3719 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3720 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3721 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3722 self.optionals = opt1 + opt2 3723 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3724 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3725 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3726 self.required += self.multirequired 3727 self.initExprGroups = False 3728 tmpLoc = loc 3729 tmpReqd = self.required[:] 3730 tmpOpt = self.optionals[:] 3731 matchOrder = [] 3732 3733 keepMatching = True 3734 while keepMatching: 3735 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3736 failed = [] 3737 for e in tmpExprs: 3738 try: 3739 tmpLoc = e.tryParse( instring, tmpLoc ) 3740 except ParseException: 3741 failed.append(e) 3742 else: 3743 matchOrder.append(self.opt1map.get(id(e),e)) 3744 if e in tmpReqd: 3745 tmpReqd.remove(e) 3746 elif e in tmpOpt: 3747 tmpOpt.remove(e) 3748 if len(failed) == len(tmpExprs): 3749 keepMatching = False 3750 3751 if tmpReqd: 3752 missing = ", ".join(_ustr(e) for e in tmpReqd) 3753 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3754 3755 # add any unmatched Optionals, in case they have default values defined 3756 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3757 3758 resultlist = [] 3759 for e in matchOrder: 3760 loc,results = e._parse(instring,loc,doActions) 3761 resultlist.append(results) 3762 3763 finalResults = sum(resultlist, ParseResults([])) 3764 return loc, finalResults
3765
3766 - def __str__( self ):
3767 if hasattr(self,"name"): 3768 return self.name 3769 3770 if self.strRepr is None: 3771 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3772 3773 return self.strRepr
3774
3775 - def checkRecursion( self, parseElementList ):
3776 subRecCheckList = parseElementList[:] + [ self ] 3777 for e in self.exprs: 3778 e.checkRecursion( subRecCheckList )
3779
3780 3781 -class ParseElementEnhance(ParserElement):
3782 """ 3783 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3784 """
3785 - def __init__( self, expr, savelist=False ):
3786 super(ParseElementEnhance,self).__init__(savelist) 3787 if isinstance( expr, basestring ): 3788 if issubclass(ParserElement._literalStringClass, Token): 3789 expr = ParserElement._literalStringClass(expr) 3790 else: 3791 expr = ParserElement._literalStringClass(Literal(expr)) 3792 self.expr = expr 3793 self.strRepr = None 3794 if expr is not None: 3795 self.mayIndexError = expr.mayIndexError 3796 self.mayReturnEmpty = expr.mayReturnEmpty 3797 self.setWhitespaceChars( expr.whiteChars ) 3798 self.skipWhitespace = expr.skipWhitespace 3799 self.saveAsList = expr.saveAsList 3800 self.callPreparse = expr.callPreparse 3801 self.ignoreExprs.extend(expr.ignoreExprs)
3802
3803 - def parseImpl( self, instring, loc, doActions=True ):
3804 if self.expr is not None: 3805 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3806 else: 3807 raise ParseException("",loc,self.errmsg,self)
3808
3809 - def leaveWhitespace( self ):
3810 self.skipWhitespace = False 3811 self.expr = self.expr.copy() 3812 if self.expr is not None: 3813 self.expr.leaveWhitespace() 3814 return self
3815
3816 - def ignore( self, other ):
3817 if isinstance( other, Suppress ): 3818 if other not in self.ignoreExprs: 3819 super( ParseElementEnhance, self).ignore( other ) 3820 if self.expr is not None: 3821 self.expr.ignore( self.ignoreExprs[-1] ) 3822 else: 3823 super( ParseElementEnhance, self).ignore( other ) 3824 if self.expr is not None: 3825 self.expr.ignore( self.ignoreExprs[-1] ) 3826 return self
3827
3828 - def streamline( self ):
3829 super(ParseElementEnhance,self).streamline() 3830 if self.expr is not None: 3831 self.expr.streamline() 3832 return self
3833
3834 - def checkRecursion( self, parseElementList ):
3835 if self in parseElementList: 3836 raise RecursiveGrammarException( parseElementList+[self] ) 3837 subRecCheckList = parseElementList[:] + [ self ] 3838 if self.expr is not None: 3839 self.expr.checkRecursion( subRecCheckList )
3840
3841 - def validate( self, validateTrace=[] ):
3842 tmp = validateTrace[:]+[self] 3843 if self.expr is not None: 3844 self.expr.validate(tmp) 3845 self.checkRecursion( [] )
3846
3847 - def __str__( self ):
3848 try: 3849 return super(ParseElementEnhance,self).__str__() 3850 except Exception: 3851 pass 3852 3853 if self.strRepr is None and self.expr is not None: 3854 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3855 return self.strRepr
3856
3857 3858 -class FollowedBy(ParseElementEnhance):
3859 """ 3860 Lookahead matching of the given parse expression. C{FollowedBy} 3861 does I{not} advance the parsing position within the input string, it only 3862 verifies that the specified parse expression matches at the current 3863 position. C{FollowedBy} always returns a null token list. If any 3864 results names are defined in the lookahead expression, those *will* be 3865 returned for access by name. 3866 3867 Example:: 3868 # use FollowedBy to match a label only if it is followed by a ':' 3869 data_word = Word(alphas) 3870 label = data_word + FollowedBy(':') 3871 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3872 3873 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3874 prints:: 3875 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3876 """
3877 - def __init__( self, expr ):
3878 super(FollowedBy,self).__init__(expr) 3879 self.mayReturnEmpty = True
3880
3881 - def parseImpl( self, instring, loc, doActions=True ):
3882 _, ret = self.expr._parse(instring, loc, doActions=doActions) 3883 del ret[:] 3884 return loc, ret
3885
3886 3887 -class PrecededBy(ParseElementEnhance):
3888 """ 3889 Lookbehind matching of the given parse expression. C{PrecededBy} 3890 does not advance the parsing position within the input string, it only 3891 verifies that the specified parse expression matches prior to the current 3892 position. C{PrecededBy} always returns a null token list, but if 3893 a results name is defined on the given expression, it is returned. 3894 3895 Parameters: 3896 - expr - expression that must match prior to the current parse location 3897 - retreat - (default=C{None}) - (int) maximum number of characters to 3898 lookbehind prior to the current parse location 3899 3900 If the lookbehind expression is a string, Literal, Keyword, or a 3901 Word or CharsNotIn with a specified exact or maximum length, then 3902 the retreat parameter is not required. Otherwise, retreat must be 3903 specified to give a maximum number of characters to look back from 3904 the current parse position for a lookbehind match. 3905 3906 Example:: 3907 3908 # VB-style variable names with type prefixes 3909 int_var = PrecededBy("#") + pyparsing_common.identifier 3910 str_var = PrecededBy("$") + pyparsing_common.identifier 3911 3912 """
3913 - def __init__(self, expr, retreat=None):
3914 super(PrecededBy, self).__init__(expr) 3915 self.expr = self.expr().leaveWhitespace() 3916 self.mayReturnEmpty = True 3917 self.mayIndexError = False 3918 self.exact = False 3919 if isinstance(expr, str): 3920 retreat = len(expr) 3921 self.exact = True 3922 elif isinstance(expr, (Literal, Keyword)): 3923 retreat = expr.matchLen 3924 self.exact = True 3925 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 3926 retreat = expr.maxLen 3927 self.exact = True 3928 elif isinstance(expr, _PositionToken): 3929 retreat = 0 3930 self.exact = True 3931 self.retreat = retreat 3932 self.errmsg = "not preceded by " + str(expr) 3933 self.skipWhitespace = False
3934
3935 - def parseImpl(self, instring, loc=0, doActions=True):
3936 if self.exact: 3937 if loc < self.retreat: 3938 raise ParseException(instring, loc, self.errmsg) 3939 start = loc - self.retreat 3940 _, ret = self.expr._parse(instring, start) 3941 else: 3942 # retreat specified a maximum lookbehind window, iterate 3943 test_expr = self.expr + StringEnd() 3944 instring_slice = instring[:loc] 3945 last_expr = ParseException(instring, loc, self.errmsg) 3946 for offset in range(1, min(loc, self.retreat+1)): 3947 try: 3948 _, ret = test_expr._parse(instring_slice, loc-offset) 3949 except ParseBaseException as pbe: 3950 last_expr = pbe 3951 else: 3952 break 3953 else: 3954 raise last_expr 3955 # return empty list of tokens, but preserve any defined results names 3956 del ret[:] 3957 return loc, ret
3958
3959 3960 -class NotAny(ParseElementEnhance):
3961 """ 3962 Lookahead to disallow matching with the given parse expression. C{NotAny} 3963 does I{not} advance the parsing position within the input string, it only 3964 verifies that the specified parse expression does I{not} match at the current 3965 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 3966 always returns a null token list. May be constructed using the '~' operator. 3967 3968 Example:: 3969 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 3970 3971 # take care not to mistake keywords for identifiers 3972 ident = ~(AND | OR | NOT) + Word(alphas) 3973 boolean_term = Optional(NOT) + ident 3974 3975 # very crude boolean expression - to support parenthesis groups and 3976 # operation hierarchy, use infixNotation 3977 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term) 3978 3979 # integers that are followed by "." are actually floats 3980 integer = Word(nums) + ~Char(".") 3981 """
3982 - def __init__( self, expr ):
3983 super(NotAny,self).__init__(expr) 3984 #~ self.leaveWhitespace() 3985 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3986 self.mayReturnEmpty = True 3987 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3988
3989 - def parseImpl( self, instring, loc, doActions=True ):
3990 if self.expr.canParseNext(instring, loc): 3991 raise ParseException(instring, loc, self.errmsg, self) 3992 return loc, []
3993
3994 - def __str__( self ):
3995 if hasattr(self,"name"): 3996 return self.name 3997 3998 if self.strRepr is None: 3999 self.strRepr = "~{" + _ustr(self.expr) + "}" 4000 4001 return self.strRepr
4002
4003 -class _MultipleMatch(ParseElementEnhance):
4004 - def __init__( self, expr, stopOn=None):
4005 super(_MultipleMatch, self).__init__(expr) 4006 self.saveAsList = True 4007 ender = stopOn 4008 if isinstance(ender, basestring): 4009 ender = ParserElement._literalStringClass(ender) 4010 self.not_ender = ~ender if ender is not None else None
4011
4012 - def parseImpl( self, instring, loc, doActions=True ):
4013 self_expr_parse = self.expr._parse 4014 self_skip_ignorables = self._skipIgnorables 4015 check_ender = self.not_ender is not None 4016 if check_ender: 4017 try_not_ender = self.not_ender.tryParse 4018 4019 # must be at least one (but first see if we are the stopOn sentinel; 4020 # if so, fail) 4021 if check_ender: 4022 try_not_ender(instring, loc) 4023 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 4024 try: 4025 hasIgnoreExprs = (not not self.ignoreExprs) 4026 while 1: 4027 if check_ender: 4028 try_not_ender(instring, loc) 4029 if hasIgnoreExprs: 4030 preloc = self_skip_ignorables( instring, loc ) 4031 else: 4032 preloc = loc 4033 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 4034 if tmptokens or tmptokens.haskeys(): 4035 tokens += tmptokens 4036 except (ParseException,IndexError): 4037 pass 4038 4039 return loc, tokens
4040
4041 -class OneOrMore(_MultipleMatch):
4042 """ 4043 Repetition of one or more of the given expression. 4044 4045 Parameters: 4046 - expr - expression that must match one or more times 4047 - stopOn - (default=C{None}) - expression for a terminating sentinel 4048 (only required if the sentinel would ordinarily match the repetition 4049 expression) 4050 4051 Example:: 4052 data_word = Word(alphas) 4053 label = data_word + FollowedBy(':') 4054 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4055 4056 text = "shape: SQUARE posn: upper left color: BLACK" 4057 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 4058 4059 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 4060 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4061 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 4062 4063 # could also be written as 4064 (attr_expr * (1,)).parseString(text).pprint() 4065 """ 4066
4067 - def __str__( self ):
4068 if hasattr(self,"name"): 4069 return self.name 4070 4071 if self.strRepr is None: 4072 self.strRepr = "{" + _ustr(self.expr) + "}..." 4073 4074 return self.strRepr
4075
4076 -class ZeroOrMore(_MultipleMatch):
4077 """ 4078 Optional repetition of zero or more of the given expression. 4079 4080 Parameters: 4081 - expr - expression that must match zero or more times 4082 - stopOn - (default=C{None}) - expression for a terminating sentinel 4083 (only required if the sentinel would ordinarily match the repetition 4084 expression) 4085 4086 Example: similar to L{OneOrMore} 4087 """
4088 - def __init__( self, expr, stopOn=None):
4089 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 4090 self.mayReturnEmpty = True
4091
4092 - def parseImpl( self, instring, loc, doActions=True ):
4093 try: 4094 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 4095 except (ParseException,IndexError): 4096 return loc, []
4097
4098 - def __str__( self ):
4099 if hasattr(self,"name"): 4100 return self.name 4101 4102 if self.strRepr is None: 4103 self.strRepr = "[" + _ustr(self.expr) + "]..." 4104 4105 return self.strRepr
4106
4107 -class _NullToken(object):
4108 - def __bool__(self):
4109 return False
4110 __nonzero__ = __bool__
4111 - def __str__(self):
4112 return ""
4113 4114 _optionalNotMatched = _NullToken()
4115 -class Optional(ParseElementEnhance):
4116 """ 4117 Optional matching of the given expression. 4118 4119 Parameters: 4120 - expr - expression that must match zero or more times 4121 - default (optional) - value to be returned if the optional expression is not found. 4122 4123 Example:: 4124 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 4125 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 4126 zip.runTests(''' 4127 # traditional ZIP code 4128 12345 4129 4130 # ZIP+4 form 4131 12101-0001 4132 4133 # invalid ZIP 4134 98765- 4135 ''') 4136 prints:: 4137 # traditional ZIP code 4138 12345 4139 ['12345'] 4140 4141 # ZIP+4 form 4142 12101-0001 4143 ['12101-0001'] 4144 4145 # invalid ZIP 4146 98765- 4147 ^ 4148 FAIL: Expected end of text (at char 5), (line:1, col:6) 4149 """
4150 - def __init__( self, expr, default=_optionalNotMatched ):
4151 super(Optional,self).__init__( expr, savelist=False ) 4152 self.saveAsList = self.expr.saveAsList 4153 self.defaultValue = default 4154 self.mayReturnEmpty = True
4155
4156 - def parseImpl( self, instring, loc, doActions=True ):
4157 try: 4158 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 4159 except (ParseException,IndexError): 4160 if self.defaultValue is not _optionalNotMatched: 4161 if self.expr.resultsName: 4162 tokens = ParseResults([ self.defaultValue ]) 4163 tokens[self.expr.resultsName] = self.defaultValue 4164 else: 4165 tokens = [ self.defaultValue ] 4166 else: 4167 tokens = [] 4168 return loc, tokens
4169
4170 - def __str__( self ):
4171 if hasattr(self,"name"): 4172 return self.name 4173 4174 if self.strRepr is None: 4175 self.strRepr = "[" + _ustr(self.expr) + "]" 4176 4177 return self.strRepr
4178
4179 -class SkipTo(ParseElementEnhance):
4180 """ 4181 Token for skipping over all undefined text until the matched expression is found. 4182 4183 Parameters: 4184 - expr - target expression marking the end of the data to be skipped 4185 - include - (default=C{False}) if True, the target expression is also parsed 4186 (the skipped text and target expression are returned as a 2-element list). 4187 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 4188 comments) that might contain false matches to the target expression 4189 - failOn - (default=C{None}) define expressions that are not allowed to be 4190 included in the skipped test; if found before the target expression is found, 4191 the SkipTo is not a match 4192 4193 Example:: 4194 report = ''' 4195 Outstanding Issues Report - 1 Jan 2000 4196 4197 # | Severity | Description | Days Open 4198 -----+----------+-------------------------------------------+----------- 4199 101 | Critical | Intermittent system crash | 6 4200 94 | Cosmetic | Spelling error on Login ('log|n') | 14 4201 79 | Minor | System slow when running too many reports | 47 4202 ''' 4203 integer = Word(nums) 4204 SEP = Suppress('|') 4205 # use SkipTo to simply match everything up until the next SEP 4206 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 4207 # - parse action will call token.strip() for each matched token, i.e., the description body 4208 string_data = SkipTo(SEP, ignore=quotedString) 4209 string_data.setParseAction(tokenMap(str.strip)) 4210 ticket_expr = (integer("issue_num") + SEP 4211 + string_data("sev") + SEP 4212 + string_data("desc") + SEP 4213 + integer("days_open")) 4214 4215 for tkt in ticket_expr.searchString(report): 4216 print tkt.dump() 4217 prints:: 4218 ['101', 'Critical', 'Intermittent system crash', '6'] 4219 - days_open: 6 4220 - desc: Intermittent system crash 4221 - issue_num: 101 4222 - sev: Critical 4223 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4224 - days_open: 14 4225 - desc: Spelling error on Login ('log|n') 4226 - issue_num: 94 4227 - sev: Cosmetic 4228 ['79', 'Minor', 'System slow when running too many reports', '47'] 4229 - days_open: 47 4230 - desc: System slow when running too many reports 4231 - issue_num: 79 4232 - sev: Minor 4233 """
4234 - def __init__( self, other, include=False, ignore=None, failOn=None ):
4235 super( SkipTo, self ).__init__( other ) 4236 self.ignoreExpr = ignore 4237 self.mayReturnEmpty = True 4238 self.mayIndexError = False 4239 self.includeMatch = include 4240 self.saveAsList = False 4241 if isinstance(failOn, basestring): 4242 self.failOn = ParserElement._literalStringClass(failOn) 4243 else: 4244 self.failOn = failOn 4245 self.errmsg = "No match found for "+_ustr(self.expr)
4246
4247 - def parseImpl( self, instring, loc, doActions=True ):
4248 startloc = loc 4249 instrlen = len(instring) 4250 expr = self.expr 4251 expr_parse = self.expr._parse 4252 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 4253 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 4254 4255 tmploc = loc 4256 while tmploc <= instrlen: 4257 if self_failOn_canParseNext is not None: 4258 # break if failOn expression matches 4259 if self_failOn_canParseNext(instring, tmploc): 4260 break 4261 4262 if self_ignoreExpr_tryParse is not None: 4263 # advance past ignore expressions 4264 while 1: 4265 try: 4266 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 4267 except ParseBaseException: 4268 break 4269 4270 try: 4271 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 4272 except (ParseException, IndexError): 4273 # no match, advance loc in string 4274 tmploc += 1 4275 else: 4276 # matched skipto expr, done 4277 break 4278 4279 else: 4280 # ran off the end of the input string without matching skipto expr, fail 4281 raise ParseException(instring, loc, self.errmsg, self) 4282 4283 # build up return values 4284 loc = tmploc 4285 skiptext = instring[startloc:loc] 4286 skipresult = ParseResults(skiptext) 4287 4288 if self.includeMatch: 4289 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 4290 skipresult += mat 4291 4292 return loc, skipresult
4293
4294 -class Forward(ParseElementEnhance):
4295 """ 4296 Forward declaration of an expression to be defined later - 4297 used for recursive grammars, such as algebraic infix notation. 4298 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 4299 4300 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 4301 Specifically, '|' has a lower precedence than '<<', so that:: 4302 fwdExpr << a | b | c 4303 will actually be evaluated as:: 4304 (fwdExpr << a) | b | c 4305 thereby leaving b and c out as parseable alternatives. It is recommended that you 4306 explicitly group the values inserted into the C{Forward}:: 4307 fwdExpr << (a | b | c) 4308 Converting to use the '<<=' operator instead will avoid this problem. 4309 4310 See L{ParseResults.pprint} for an example of a recursive parser created using 4311 C{Forward}. 4312 """
4313 - def __init__( self, other=None ):
4314 super(Forward,self).__init__( other, savelist=False )
4315
4316 - def __lshift__( self, other ):
4317 if isinstance( other, basestring ): 4318 other = ParserElement._literalStringClass(other) 4319 self.expr = other 4320 self.strRepr = None 4321 self.mayIndexError = self.expr.mayIndexError 4322 self.mayReturnEmpty = self.expr.mayReturnEmpty 4323 self.setWhitespaceChars( self.expr.whiteChars ) 4324 self.skipWhitespace = self.expr.skipWhitespace 4325 self.saveAsList = self.expr.saveAsList 4326 self.ignoreExprs.extend(self.expr.ignoreExprs) 4327 return self
4328
4329 - def __ilshift__(self, other):
4330 return self << other
4331
4332 - def leaveWhitespace( self ):
4333 self.skipWhitespace = False 4334 return self
4335
4336 - def streamline( self ):
4337 if not self.streamlined: 4338 self.streamlined = True 4339 if self.expr is not None: 4340 self.expr.streamline() 4341 return self
4342
4343 - def validate( self, validateTrace=[] ):
4344 if self not in validateTrace: 4345 tmp = validateTrace[:]+[self] 4346 if self.expr is not None: 4347 self.expr.validate(tmp) 4348 self.checkRecursion([])
4349
4350 - def __str__( self ):
4351 if hasattr(self,"name"): 4352 return self.name 4353 return self.__class__.__name__ + ": ..." 4354 4355 # stubbed out for now - creates awful memory and perf issues 4356 self._revertClass = self.__class__ 4357 self.__class__ = _ForwardNoRecurse 4358 try: 4359 if self.expr is not None: 4360 retString = _ustr(self.expr) 4361 else: 4362 retString = "None" 4363 finally: 4364 self.__class__ = self._revertClass 4365 return self.__class__.__name__ + ": " + retString
4366
4367 - def copy(self):
4368 if self.expr is not None: 4369 return super(Forward,self).copy() 4370 else: 4371 ret = Forward() 4372 ret <<= self 4373 return ret
4374
4375 -class _ForwardNoRecurse(Forward):
4376 - def __str__( self ):
4377 return "..."
4378
4379 -class TokenConverter(ParseElementEnhance):
4380 """ 4381 Abstract subclass of C{ParseExpression}, for converting parsed results. 4382 """
4383 - def __init__( self, expr, savelist=False ):
4384 super(TokenConverter,self).__init__( expr )#, savelist ) 4385 self.saveAsList = False
4386
4387 -class Combine(TokenConverter):
4388 """ 4389 Converter to concatenate all matching tokens to a single string. 4390 By default, the matching patterns must also be contiguous in the input string; 4391 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4392 4393 Example:: 4394 real = Word(nums) + '.' + Word(nums) 4395 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4396 # will also erroneously match the following 4397 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4398 4399 real = Combine(Word(nums) + '.' + Word(nums)) 4400 print(real.parseString('3.1416')) # -> ['3.1416'] 4401 # no match when there are internal spaces 4402 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4403 """
4404 - def __init__( self, expr, joinString="", adjacent=True ):
4405 super(Combine,self).__init__( expr ) 4406 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4407 if adjacent: 4408 self.leaveWhitespace() 4409 self.adjacent = adjacent 4410 self.skipWhitespace = True 4411 self.joinString = joinString 4412 self.callPreparse = True
4413
4414 - def ignore( self, other ):
4415 if self.adjacent: 4416 ParserElement.ignore(self, other) 4417 else: 4418 super( Combine, self).ignore( other ) 4419 return self
4420
4421 - def postParse( self, instring, loc, tokenlist ):
4422 retToks = tokenlist.copy() 4423 del retToks[:] 4424 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4425 4426 if self.resultsName and retToks.haskeys(): 4427 return [ retToks ] 4428 else: 4429 return retToks
4430
4431 -class Group(TokenConverter):
4432 """ 4433 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4434 4435 Example:: 4436 ident = Word(alphas) 4437 num = Word(nums) 4438 term = ident | num 4439 func = ident + Optional(delimitedList(term)) 4440 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4441 4442 func = ident + Group(Optional(delimitedList(term))) 4443 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4444 """
4445 - def __init__( self, expr ):
4446 super(Group,self).__init__( expr ) 4447 self.saveAsList = True
4448
4449 - def postParse( self, instring, loc, tokenlist ):
4450 return [ tokenlist ]
4451
4452 -class Dict(TokenConverter):
4453 """ 4454 Converter to return a repetitive expression as a list, but also as a dictionary. 4455 Each element can also be referenced using the first token in the expression as its key. 4456 Useful for tabular report scraping when the first column can be used as a item key. 4457 4458 Example:: 4459 data_word = Word(alphas) 4460 label = data_word + FollowedBy(':') 4461 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4462 4463 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4464 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4465 4466 # print attributes as plain groups 4467 print(OneOrMore(attr_expr).parseString(text).dump()) 4468 4469 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4470 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4471 print(result.dump()) 4472 4473 # access named fields as dict entries, or output as dict 4474 print(result['shape']) 4475 print(result.asDict()) 4476 prints:: 4477 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4478 4479 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4480 - color: light blue 4481 - posn: upper left 4482 - shape: SQUARE 4483 - texture: burlap 4484 SQUARE 4485 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4486 See more examples at L{ParseResults} of accessing fields by results name. 4487 """
4488 - def __init__( self, expr ):
4489 super(Dict,self).__init__( expr ) 4490 self.saveAsList = True
4491
4492 - def postParse( self, instring, loc, tokenlist ):
4493 for i,tok in enumerate(tokenlist): 4494 if len(tok) == 0: 4495 continue 4496 ikey = tok[0] 4497 if isinstance(ikey,int): 4498 ikey = _ustr(tok[0]).strip() 4499 if len(tok)==1: 4500 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4501 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4502 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4503 else: 4504 dictvalue = tok.copy() #ParseResults(i) 4505 del dictvalue[0] 4506 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4507 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4508 else: 4509 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4510 4511 if self.resultsName: 4512 return [ tokenlist ] 4513 else: 4514 return tokenlist
4515
4516 4517 -class Suppress(TokenConverter):
4518 """ 4519 Converter for ignoring the results of a parsed expression. 4520 4521 Example:: 4522 source = "a, b, c,d" 4523 wd = Word(alphas) 4524 wd_list1 = wd + ZeroOrMore(',' + wd) 4525 print(wd_list1.parseString(source)) 4526 4527 # often, delimiters that are useful during parsing are just in the 4528 # way afterward - use Suppress to keep them out of the parsed output 4529 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4530 print(wd_list2.parseString(source)) 4531 prints:: 4532 ['a', ',', 'b', ',', 'c', ',', 'd'] 4533 ['a', 'b', 'c', 'd'] 4534 (See also L{delimitedList}.) 4535 """
4536 - def postParse( self, instring, loc, tokenlist ):
4537 return []
4538
4539 - def suppress( self ):
4540 return self
4541
4542 4543 -class OnlyOnce(object):
4544 """ 4545 Wrapper for parse actions, to ensure they are only called once. 4546 """
4547 - def __init__(self, methodCall):
4548 self.callable = _trim_arity(methodCall) 4549 self.called = False
4550 - def __call__(self,s,l,t):
4551 if not self.called: 4552 results = self.callable(s,l,t) 4553 self.called = True 4554 return results 4555 raise ParseException(s,l,"")
4556 - def reset(self):
4557 self.called = False
4558
4559 -def traceParseAction(f):
4560 """ 4561 Decorator for debugging parse actions. 4562 4563 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 4564 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 4565 4566 Example:: 4567 wd = Word(alphas) 4568 4569 @traceParseAction 4570 def remove_duplicate_chars(tokens): 4571 return ''.join(sorted(set(''.join(tokens)))) 4572 4573 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4574 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4575 prints:: 4576 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4577 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4578 ['dfjkls'] 4579 """ 4580 f = _trim_arity(f) 4581 def z(*paArgs): 4582 thisFunc = f.__name__ 4583 s,l,t = paArgs[-3:] 4584 if len(paArgs)>3: 4585 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4586 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4587 try: 4588 ret = f(*paArgs) 4589 except Exception as exc: 4590 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4591 raise 4592 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4593 return ret
4594 try: 4595 z.__name__ = f.__name__ 4596 except AttributeError: 4597 pass 4598 return z 4599
4600 # 4601 # global helpers 4602 # 4603 -def delimitedList( expr, delim=",", combine=False ):
4604 """ 4605 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4606 By default, the list elements and delimiters can have intervening whitespace, and 4607 comments, but this can be overridden by passing C{combine=True} in the constructor. 4608 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4609 string, with the delimiters included; otherwise, the matching tokens are returned 4610 as a list of tokens, with the delimiters suppressed. 4611 4612 Example:: 4613 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4614 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4615 """ 4616 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4617 if combine: 4618 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4619 else: 4620 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4621
4622 -def countedArray( expr, intExpr=None ):
4623 """ 4624 Helper to define a counted list of expressions. 4625 This helper defines a pattern of the form:: 4626 integer expr expr expr... 4627 where the leading integer tells how many expr expressions follow. 4628 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4629 4630 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 4631 4632 Example:: 4633 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4634 4635 # in this parser, the leading integer value is given in binary, 4636 # '10' indicating that 2 values are in the array 4637 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 4638 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 4639 """ 4640 arrayExpr = Forward() 4641 def countFieldParseAction(s,l,t): 4642 n = t[0] 4643 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4644 return []
4645 if intExpr is None: 4646 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4647 else: 4648 intExpr = intExpr.copy() 4649 intExpr.setName("arrayLen") 4650 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4651 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4652
4653 -def _flatten(L):
4654 ret = [] 4655 for i in L: 4656 if isinstance(i,list): 4657 ret.extend(_flatten(i)) 4658 else: 4659 ret.append(i) 4660 return ret
4661
4662 -def matchPreviousLiteral(expr):
4663 """ 4664 Helper to define an expression that is indirectly defined from 4665 the tokens matched in a previous expression, that is, it looks 4666 for a 'repeat' of a previous expression. For example:: 4667 first = Word(nums) 4668 second = matchPreviousLiteral(first) 4669 matchExpr = first + ":" + second 4670 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4671 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4672 If this is not desired, use C{matchPreviousExpr}. 4673 Do I{not} use with packrat parsing enabled. 4674 """ 4675 rep = Forward() 4676 def copyTokenToRepeater(s,l,t): 4677 if t: 4678 if len(t) == 1: 4679 rep << t[0] 4680 else: 4681 # flatten t tokens 4682 tflat = _flatten(t.asList()) 4683 rep << And(Literal(tt) for tt in tflat) 4684 else: 4685 rep << Empty()
4686 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4687 rep.setName('(prev) ' + _ustr(expr)) 4688 return rep 4689
4690 -def matchPreviousExpr(expr):
4691 """ 4692 Helper to define an expression that is indirectly defined from 4693 the tokens matched in a previous expression, that is, it looks 4694 for a 'repeat' of a previous expression. For example:: 4695 first = Word(nums) 4696 second = matchPreviousExpr(first) 4697 matchExpr = first + ":" + second 4698 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4699 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 4700 the expressions are evaluated first, and then compared, so 4701 C{"1"} is compared with C{"10"}. 4702 Do I{not} use with packrat parsing enabled. 4703 """ 4704 rep = Forward() 4705 e2 = expr.copy() 4706 rep <<= e2 4707 def copyTokenToRepeater(s,l,t): 4708 matchTokens = _flatten(t.asList()) 4709 def mustMatchTheseTokens(s,l,t): 4710 theseTokens = _flatten(t.asList()) 4711 if theseTokens != matchTokens: 4712 raise ParseException("",0,"")
4713 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4714 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4715 rep.setName('(prev) ' + _ustr(expr)) 4716 return rep 4717
4718 -def _escapeRegexRangeChars(s):
4719 #~ escape these chars: ^-] 4720 for c in r"\^-]": 4721 s = s.replace(c,_bslash+c) 4722 s = s.replace("\n",r"\n") 4723 s = s.replace("\t",r"\t") 4724 return _ustr(s)
4725
4726 -def oneOf( strs, caseless=False, useRegex=True ):
4727 """ 4728 Helper to quickly define a set of alternative Literals, and makes sure to do 4729 longest-first testing when there is a conflict, regardless of the input order, 4730 but returns a C{L{MatchFirst}} for best performance. 4731 4732 Parameters: 4733 - strs - a string of space-delimited literals, or a collection of string literals 4734 - caseless - (default=C{False}) - treat all literals as caseless 4735 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4736 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4737 if creating a C{Regex} raises an exception) 4738 4739 Example:: 4740 comp_oper = oneOf("< = > <= >= !=") 4741 var = Word(alphas) 4742 number = Word(nums) 4743 term = var | number 4744 comparison_expr = term + comp_oper + term 4745 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4746 prints:: 4747 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4748 """ 4749 if caseless: 4750 isequal = ( lambda a,b: a.upper() == b.upper() ) 4751 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4752 parseElementClass = CaselessLiteral 4753 else: 4754 isequal = ( lambda a,b: a == b ) 4755 masks = ( lambda a,b: b.startswith(a) ) 4756 parseElementClass = Literal 4757 4758 symbols = [] 4759 if isinstance(strs,basestring): 4760 symbols = strs.split() 4761 elif isinstance(strs, Iterable): 4762 symbols = list(strs) 4763 else: 4764 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4765 SyntaxWarning, stacklevel=2) 4766 if not symbols: 4767 return NoMatch() 4768 4769 i = 0 4770 while i < len(symbols)-1: 4771 cur = symbols[i] 4772 for j,other in enumerate(symbols[i+1:]): 4773 if ( isequal(other, cur) ): 4774 del symbols[i+j+1] 4775 break 4776 elif ( masks(cur, other) ): 4777 del symbols[i+j+1] 4778 symbols.insert(i,other) 4779 cur = other 4780 break 4781 else: 4782 i += 1 4783 4784 if not caseless and useRegex: 4785 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4786 try: 4787 if len(symbols)==len("".join(symbols)): 4788 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4789 else: 4790 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4791 except Exception: 4792 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4793 SyntaxWarning, stacklevel=2) 4794 4795 4796 # last resort, just use MatchFirst 4797 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4798
4799 -def dictOf( key, value ):
4800 """ 4801 Helper to easily and clearly define a dictionary by specifying the respective patterns 4802 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4803 in the proper order. The key pattern can include delimiting markers or punctuation, 4804 as long as they are suppressed, thereby leaving the significant key text. The value 4805 pattern can include named results, so that the C{Dict} results can include named token 4806 fields. 4807 4808 Example:: 4809 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4810 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4811 print(OneOrMore(attr_expr).parseString(text).dump()) 4812 4813 attr_label = label 4814 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4815 4816 # similar to Dict, but simpler call format 4817 result = dictOf(attr_label, attr_value).parseString(text) 4818 print(result.dump()) 4819 print(result['shape']) 4820 print(result.shape) # object attribute access works too 4821 print(result.asDict()) 4822 prints:: 4823 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4824 - color: light blue 4825 - posn: upper left 4826 - shape: SQUARE 4827 - texture: burlap 4828 SQUARE 4829 SQUARE 4830 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4831 """ 4832 return Dict( ZeroOrMore( Group ( key + value ) ) )
4833
4834 -def originalTextFor(expr, asString=True):
4835 """ 4836 Helper to return the original, untokenized text for a given expression. Useful to 4837 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4838 revert separate tokens with intervening whitespace back to the original matching 4839 input text. By default, returns astring containing the original parsed text. 4840 4841 If the optional C{asString} argument is passed as C{False}, then the return value is a 4842 C{L{ParseResults}} containing any results names that were originally matched, and a 4843 single token containing the original matched text from the input string. So if 4844 the expression passed to C{L{originalTextFor}} contains expressions with defined 4845 results names, you must set C{asString} to C{False} if you want to preserve those 4846 results name values. 4847 4848 Example:: 4849 src = "this is test <b> bold <i>text</i> </b> normal text " 4850 for tag in ("b","i"): 4851 opener,closer = makeHTMLTags(tag) 4852 patt = originalTextFor(opener + SkipTo(closer) + closer) 4853 print(patt.searchString(src)[0]) 4854 prints:: 4855 ['<b> bold <i>text</i> </b>'] 4856 ['<i>text</i>'] 4857 """ 4858 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4859 endlocMarker = locMarker.copy() 4860 endlocMarker.callPreparse = False 4861 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4862 if asString: 4863 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4864 else: 4865 def extractText(s,l,t): 4866 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4867 matchExpr.setParseAction(extractText) 4868 matchExpr.ignoreExprs = expr.ignoreExprs 4869 return matchExpr 4870
4871 -def ungroup(expr):
4872 """ 4873 Helper to undo pyparsing's default grouping of And expressions, even 4874 if all but one are non-empty. 4875 """ 4876 return TokenConverter(expr).setParseAction(lambda t:t[0])
4877
4878 -def locatedExpr(expr):
4879 """ 4880 Helper to decorate a returned token with its starting and ending locations in the input string. 4881 This helper adds the following results names: 4882 - locn_start = location where matched expression begins 4883 - locn_end = location where matched expression ends 4884 - value = the actual parsed results 4885 4886 Be careful if the input text contains C{<TAB>} characters, you may want to call 4887 C{L{ParserElement.parseWithTabs}} 4888 4889 Example:: 4890 wd = Word(alphas) 4891 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4892 print(match) 4893 prints:: 4894 [[0, 'ljsdf', 5]] 4895 [[8, 'lksdjjf', 15]] 4896 [[18, 'lkkjj', 23]] 4897 """ 4898 locator = Empty().setParseAction(lambda s,l,t: l) 4899 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4900 4901 4902 # convenience constants for positional expressions 4903 empty = Empty().setName("empty") 4904 lineStart = LineStart().setName("lineStart") 4905 lineEnd = LineEnd().setName("lineEnd") 4906 stringStart = StringStart().setName("stringStart") 4907 stringEnd = StringEnd().setName("stringEnd") 4908 4909 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4910 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4911 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4912 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) 4913 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 4914 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4915 4916 -def srange(s):
4917 r""" 4918 Helper to easily define string ranges for use in Word construction. Borrows 4919 syntax from regexp '[]' string range definitions:: 4920 srange("[0-9]") -> "0123456789" 4921 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4922 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4923 The input string must be enclosed in []'s, and the returned string is the expanded 4924 character set joined into a single string. 4925 The values enclosed in the []'s may be: 4926 - a single character 4927 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4928 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4929 (C{\0x##} is also supported for backwards compatibility) 4930 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4931 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4932 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4933 """ 4934 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4935 try: 4936 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4937 except Exception: 4938 return ""
4939
4940 -def matchOnlyAtCol(n):
4941 """ 4942 Helper method for defining parse actions that require matching at a specific 4943 column in the input text. 4944 """ 4945 def verifyCol(strg,locn,toks): 4946 if col(locn,strg) != n: 4947 raise ParseException(strg,locn,"matched token not at column %d" % n)
4948 return verifyCol 4949
4950 -def replaceWith(replStr):
4951 """ 4952 Helper method for common parse actions that simply return a literal value. Especially 4953 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4954 4955 Example:: 4956 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4957 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4958 term = na | num 4959 4960 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4961 """ 4962 return lambda s,l,t: [replStr]
4963
4964 -def removeQuotes(s,l,t):
4965 """ 4966 Helper parse action for removing quotation marks from parsed quoted strings. 4967 4968 Example:: 4969 # by default, quotation marks are included in parsed results 4970 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4971 4972 # use removeQuotes to strip quotation marks from parsed results 4973 quotedString.setParseAction(removeQuotes) 4974 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4975 """ 4976 return t[0][1:-1]
4977
4978 -def tokenMap(func, *args):
4979 """ 4980 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4981 args are passed, they are forwarded to the given function as additional arguments after 4982 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4983 parsed data to an integer using base 16. 4984 4985 Example (compare the last to example in L{ParserElement.transformString}:: 4986 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4987 hex_ints.runTests(''' 4988 00 11 22 aa FF 0a 0d 1a 4989 ''') 4990 4991 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4992 OneOrMore(upperword).runTests(''' 4993 my kingdom for a horse 4994 ''') 4995 4996 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4997 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4998 now is the winter of our discontent made glorious summer by this sun of york 4999 ''') 5000 prints:: 5001 00 11 22 aa FF 0a 0d 1a 5002 [0, 17, 34, 170, 255, 10, 13, 26] 5003 5004 my kingdom for a horse 5005 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 5006 5007 now is the winter of our discontent made glorious summer by this sun of york 5008 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 5009 """ 5010 def pa(s,l,t): 5011 return [func(tokn, *args) for tokn in t]
5012 5013 try: 5014 func_name = getattr(func, '__name__', 5015 getattr(func, '__class__').__name__) 5016 except Exception: 5017 func_name = str(func) 5018 pa.__name__ = func_name 5019 5020 return pa 5021 5022 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 5023 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 5024 5025 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 5026 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
5027 5028 -def _makeTags(tagStr, xml):
5029 """Internal helper to construct opening and closing tag expressions, given a tag name""" 5030 if isinstance(tagStr,basestring): 5031 resname = tagStr 5032 tagStr = Keyword(tagStr, caseless=not xml) 5033 else: 5034 resname = tagStr.name 5035 5036 tagAttrName = Word(alphas,alphanums+"_-:") 5037 if (xml): 5038 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 5039 openTag = Suppress("<") + tagStr("tag") + \ 5040 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 5041 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 5042 else: 5043 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 5044 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 5045 openTag = Suppress("<") + tagStr("tag") + \ 5046 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 5047 Optional( Suppress("=") + tagAttrValue ) ))) + \ 5048 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 5049 closeTag = Combine(_L("</") + tagStr + ">") 5050 5051 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 5052 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 5053 openTag.tag = resname 5054 closeTag.tag = resname 5055 return openTag, closeTag
5056
5057 -def makeHTMLTags(tagStr):
5058 """ 5059 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 5060 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 5061 5062 Example:: 5063 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5064 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 5065 a,a_end = makeHTMLTags("A") 5066 link_expr = a + SkipTo(a_end)("link_text") + a_end 5067 5068 for link in link_expr.searchString(text): 5069 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 5070 print(link.link_text, '->', link.href) 5071 prints:: 5072 pyparsing -> http://pyparsing.wikispaces.com 5073 """ 5074 return _makeTags( tagStr, False )
5075
5076 -def makeXMLTags(tagStr):
5077 """ 5078 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 5079 tags only in the given upper/lower case. 5080 5081 Example: similar to L{makeHTMLTags} 5082 """ 5083 return _makeTags( tagStr, True )
5084
5085 -def withAttribute(*args,**attrDict):
5086 """ 5087 Helper to create a validating parse action to be used with start tags created 5088 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 5089 with a required attribute value, to avoid false matches on common tags such as 5090 C{<TD>} or C{<DIV>}. 5091 5092 Call C{withAttribute} with a series of attribute names and values. Specify the list 5093 of filter attributes names and values as: 5094 - keyword arguments, as in C{(align="right")}, or 5095 - as an explicit dict with C{**} operator, when an attribute name is also a Python 5096 reserved word, as in C{**{"class":"Customer", "align":"right"}} 5097 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 5098 For attribute names with a namespace prefix, you must use the second form. Attribute 5099 names are matched insensitive to upper/lower case. 5100 5101 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 5102 5103 To verify that the attribute exists, but without specifying a value, pass 5104 C{withAttribute.ANY_VALUE} as the value. 5105 5106 Example:: 5107 html = ''' 5108 <div> 5109 Some text 5110 <div type="grid">1 4 0 1 0</div> 5111 <div type="graph">1,3 2,3 1,1</div> 5112 <div>this has no type</div> 5113 </div> 5114 5115 ''' 5116 div,div_end = makeHTMLTags("div") 5117 5118 # only match div tag having a type attribute with value "grid" 5119 div_grid = div().setParseAction(withAttribute(type="grid")) 5120 grid_expr = div_grid + SkipTo(div | div_end)("body") 5121 for grid_header in grid_expr.searchString(html): 5122 print(grid_header.body) 5123 5124 # construct a match with any div tag having a type attribute, regardless of the value 5125 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 5126 div_expr = div_any_type + SkipTo(div | div_end)("body") 5127 for div_header in div_expr.searchString(html): 5128 print(div_header.body) 5129 prints:: 5130 1 4 0 1 0 5131 5132 1 4 0 1 0 5133 1,3 2,3 1,1 5134 """ 5135 if args: 5136 attrs = args[:] 5137 else: 5138 attrs = attrDict.items() 5139 attrs = [(k,v) for k,v in attrs] 5140 def pa(s,l,tokens): 5141 for attrName,attrValue in attrs: 5142 if attrName not in tokens: 5143 raise ParseException(s,l,"no matching attribute " + attrName) 5144 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 5145 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 5146 (attrName, tokens[attrName], attrValue))
5147 return pa 5148 withAttribute.ANY_VALUE = object()
5149 5150 -def withClass(classname, namespace=''):
5151 """ 5152 Simplified version of C{L{withAttribute}} when matching on a div class - made 5153 difficult because C{class} is a reserved word in Python. 5154 5155 Example:: 5156 html = ''' 5157 <div> 5158 Some text 5159 <div class="grid">1 4 0 1 0</div> 5160 <div class="graph">1,3 2,3 1,1</div> 5161 <div>this &lt;div&gt; has no class</div> 5162 </div> 5163 5164 ''' 5165 div,div_end = makeHTMLTags("div") 5166 div_grid = div().setParseAction(withClass("grid")) 5167 5168 grid_expr = div_grid + SkipTo(div | div_end)("body") 5169 for grid_header in grid_expr.searchString(html): 5170 print(grid_header.body) 5171 5172 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 5173 div_expr = div_any_type + SkipTo(div | div_end)("body") 5174 for div_header in div_expr.searchString(html): 5175 print(div_header.body) 5176 prints:: 5177 1 4 0 1 0 5178 5179 1 4 0 1 0 5180 1,3 2,3 1,1 5181 """ 5182 classattr = "%s:class" % namespace if namespace else "class" 5183 return withAttribute(**{classattr : classname})
5184 5185 opAssoc = SimpleNamespace() 5186 opAssoc.LEFT = object() 5187 opAssoc.RIGHT = object()
5188 5189 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5190 """ 5191 Helper method for constructing grammars of expressions made up of 5192 operators working in a precedence hierarchy. Operators may be unary or 5193 binary, left- or right-associative. Parse actions can also be attached 5194 to operator expressions. The generated parser will also recognize the use 5195 of parentheses to override operator precedences (see example below). 5196 5197 Note: if you define a deep operator list, you may see performance issues 5198 when using infixNotation. See L{ParserElement.enablePackrat} for a 5199 mechanism to potentially improve your parser performance. 5200 5201 Parameters: 5202 - baseExpr - expression representing the most basic element for the nested 5203 - opList - list of tuples, one for each operator precedence level in the 5204 expression grammar; each tuple is of the form 5205 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 5206 - opExpr is the pyparsing expression for the operator; 5207 may also be a string, which will be converted to a Literal; 5208 if numTerms is 3, opExpr is a tuple of two expressions, for the 5209 two operators separating the 3 terms 5210 - numTerms is the number of terms for this operator (must 5211 be 1, 2, or 3) 5212 - rightLeftAssoc is the indicator whether the operator is 5213 right or left associative, using the pyparsing-defined 5214 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 5215 - parseAction is the parse action to be associated with 5216 expressions matching this operator expression (the 5217 parse action tuple member may be omitted); if the parse action 5218 is passed a tuple or list of functions, this is equivalent to 5219 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) 5220 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 5221 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 5222 5223 Example:: 5224 # simple example of four-function arithmetic with ints and variable names 5225 integer = pyparsing_common.signed_integer 5226 varname = pyparsing_common.identifier 5227 5228 arith_expr = infixNotation(integer | varname, 5229 [ 5230 ('-', 1, opAssoc.RIGHT), 5231 (oneOf('* /'), 2, opAssoc.LEFT), 5232 (oneOf('+ -'), 2, opAssoc.LEFT), 5233 ]) 5234 5235 arith_expr.runTests(''' 5236 5+3*6 5237 (5+3)*6 5238 -2--11 5239 ''', fullDump=False) 5240 prints:: 5241 5+3*6 5242 [[5, '+', [3, '*', 6]]] 5243 5244 (5+3)*6 5245 [[[5, '+', 3], '*', 6]] 5246 5247 -2--11 5248 [[['-', 2], '-', ['-', 11]]] 5249 """ 5250 # captive version of FollowedBy that does not do parse actions or capture results names 5251 class _FB(FollowedBy): 5252 def parseImpl(self, instring, loc, doActions=True): 5253 self.expr.tryParse(instring, loc) 5254 return loc, []
5255 5256 ret = Forward() 5257 lastExpr = baseExpr | ( lpar + ret + rpar ) 5258 for i,operDef in enumerate(opList): 5259 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 5260 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 5261 if arity == 3: 5262 if opExpr is None or len(opExpr) != 2: 5263 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 5264 opExpr1, opExpr2 = opExpr 5265 thisExpr = Forward().setName(termName) 5266 if rightLeftAssoc == opAssoc.LEFT: 5267 if arity == 1: 5268 matchExpr = _FB(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 5269 elif arity == 2: 5270 if opExpr is not None: 5271 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 5272 else: 5273 matchExpr = _FB(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 5274 elif arity == 3: 5275 matchExpr = _FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 5276 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 5277 else: 5278 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5279 elif rightLeftAssoc == opAssoc.RIGHT: 5280 if arity == 1: 5281 # try to avoid LR with this extra test 5282 if not isinstance(opExpr, Optional): 5283 opExpr = Optional(opExpr) 5284 matchExpr = _FB(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 5285 elif arity == 2: 5286 if opExpr is not None: 5287 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 5288 else: 5289 matchExpr = _FB(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 5290 elif arity == 3: 5291 matchExpr = _FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 5292 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 5293 else: 5294 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5295 else: 5296 raise ValueError("operator must indicate right or left associativity") 5297 if pa: 5298 if isinstance(pa, (tuple, list)): 5299 matchExpr.setParseAction(*pa) 5300 else: 5301 matchExpr.setParseAction(pa) 5302 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 5303 lastExpr = thisExpr 5304 ret <<= lastExpr 5305 return ret 5306 5307 operatorPrecedence = infixNotation 5308 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 5309 5310 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 5311 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 5312 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 5313 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 5314 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5315 5316 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5317 """ 5318 Helper method for defining nested lists enclosed in opening and closing 5319 delimiters ("(" and ")" are the default). 5320 5321 Parameters: 5322 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 5323 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 5324 - content - expression for items within the nested lists (default=C{None}) 5325 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 5326 5327 If an expression is not provided for the content argument, the nested 5328 expression will capture all whitespace-delimited content between delimiters 5329 as a list of separate values. 5330 5331 Use the C{ignoreExpr} argument to define expressions that may contain 5332 opening or closing characters that should not be treated as opening 5333 or closing characters for nesting, such as quotedString or a comment 5334 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 5335 The default is L{quotedString}, but if no expressions are to be ignored, 5336 then pass C{None} for this argument. 5337 5338 Example:: 5339 data_type = oneOf("void int short long char float double") 5340 decl_data_type = Combine(data_type + Optional(Word('*'))) 5341 ident = Word(alphas+'_', alphanums+'_') 5342 number = pyparsing_common.number 5343 arg = Group(decl_data_type + ident) 5344 LPAR,RPAR = map(Suppress, "()") 5345 5346 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5347 5348 c_function = (decl_data_type("type") 5349 + ident("name") 5350 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5351 + code_body("body")) 5352 c_function.ignore(cStyleComment) 5353 5354 source_code = ''' 5355 int is_odd(int x) { 5356 return (x%2); 5357 } 5358 5359 int dec_to_hex(char hchar) { 5360 if (hchar >= '0' && hchar <= '9') { 5361 return (ord(hchar)-ord('0')); 5362 } else { 5363 return (10+ord(hchar)-ord('A')); 5364 } 5365 } 5366 ''' 5367 for func in c_function.searchString(source_code): 5368 print("%(name)s (%(type)s) args: %(args)s" % func) 5369 5370 prints:: 5371 is_odd (int) args: [['int', 'x']] 5372 dec_to_hex (int) args: [['char', 'hchar']] 5373 """ 5374 if opener == closer: 5375 raise ValueError("opening and closing strings cannot be the same") 5376 if content is None: 5377 if isinstance(opener,basestring) and isinstance(closer,basestring): 5378 if len(opener) == 1 and len(closer)==1: 5379 if ignoreExpr is not None: 5380 content = (Combine(OneOrMore(~ignoreExpr + 5381 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5382 ).setParseAction(lambda t:t[0].strip())) 5383 else: 5384 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5385 ).setParseAction(lambda t:t[0].strip())) 5386 else: 5387 if ignoreExpr is not None: 5388 content = (Combine(OneOrMore(~ignoreExpr + 5389 ~Literal(opener) + ~Literal(closer) + 5390 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5391 ).setParseAction(lambda t:t[0].strip())) 5392 else: 5393 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5394 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5395 ).setParseAction(lambda t:t[0].strip())) 5396 else: 5397 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5398 ret = Forward() 5399 if ignoreExpr is not None: 5400 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5401 else: 5402 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5403 ret.setName('nested %s%s expression' % (opener,closer)) 5404 return ret
5405
5406 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5407 """ 5408 Helper method for defining space-delimited indentation blocks, such as 5409 those used to define block statements in Python source code. 5410 5411 Parameters: 5412 - blockStatementExpr - expression defining syntax of statement that 5413 is repeated within the indented block 5414 - indentStack - list created by caller to manage indentation stack 5415 (multiple statementWithIndentedBlock expressions within a single grammar 5416 should share a common indentStack) 5417 - indent - boolean indicating whether block must be indented beyond the 5418 the current level; set to False for block of left-most statements 5419 (default=C{True}) 5420 5421 A valid block must contain at least one C{blockStatement}. 5422 5423 Example:: 5424 data = ''' 5425 def A(z): 5426 A1 5427 B = 100 5428 G = A2 5429 A2 5430 A3 5431 B 5432 def BB(a,b,c): 5433 BB1 5434 def BBA(): 5435 bba1 5436 bba2 5437 bba3 5438 C 5439 D 5440 def spam(x,y): 5441 def eggs(z): 5442 pass 5443 ''' 5444 5445 5446 indentStack = [1] 5447 stmt = Forward() 5448 5449 identifier = Word(alphas, alphanums) 5450 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5451 func_body = indentedBlock(stmt, indentStack) 5452 funcDef = Group( funcDecl + func_body ) 5453 5454 rvalue = Forward() 5455 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5456 rvalue << (funcCall | identifier | Word(nums)) 5457 assignment = Group(identifier + "=" + rvalue) 5458 stmt << ( funcDef | assignment | identifier ) 5459 5460 module_body = OneOrMore(stmt) 5461 5462 parseTree = module_body.parseString(data) 5463 parseTree.pprint() 5464 prints:: 5465 [['def', 5466 'A', 5467 ['(', 'z', ')'], 5468 ':', 5469 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5470 'B', 5471 ['def', 5472 'BB', 5473 ['(', 'a', 'b', 'c', ')'], 5474 ':', 5475 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5476 'C', 5477 'D', 5478 ['def', 5479 'spam', 5480 ['(', 'x', 'y', ')'], 5481 ':', 5482 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5483 """ 5484 def checkPeerIndent(s,l,t): 5485 if l >= len(s): return 5486 curCol = col(l,s) 5487 if curCol != indentStack[-1]: 5488 if curCol > indentStack[-1]: 5489 raise ParseFatalException(s,l,"illegal nesting") 5490 raise ParseException(s,l,"not a peer entry")
5491 5492 def checkSubIndent(s,l,t): 5493 curCol = col(l,s) 5494 if curCol > indentStack[-1]: 5495 indentStack.append( curCol ) 5496 else: 5497 raise ParseException(s,l,"not a subentry") 5498 5499 def checkUnindent(s,l,t): 5500 if l >= len(s): return 5501 curCol = col(l,s) 5502 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5503 raise ParseException(s,l,"not an unindent") 5504 indentStack.pop() 5505 5506 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5507 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5508 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5509 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5510 if indent: 5511 smExpr = Group( Optional(NL) + 5512 #~ FollowedBy(blockStatementExpr) + 5513 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5514 else: 5515 smExpr = Group( Optional(NL) + 5516 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5517 blockStatementExpr.ignore(_bslash + LineEnd()) 5518 return smExpr.setName('indented block') 5519 5520 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5521 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5522 5523 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5524 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5525 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5526 -def replaceHTMLEntity(t):
5527 """Helper parser action to replace common HTML entities with their special characters""" 5528 return _htmlEntityMap.get(t.entity)
5529 5530 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 5531 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5532 "Comment of the form C{/* ... */}" 5533 5534 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5535 "Comment of the form C{<!-- ... -->}" 5536 5537 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5538 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5539 "Comment of the form C{// ... (to end of line)}" 5540 5541 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5542 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5543 5544 javaStyleComment = cppStyleComment 5545 "Same as C{L{cppStyleComment}}" 5546 5547 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5548 "Comment of the form C{# ... (to end of line)}" 5549 5550 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5551 Optional( Word(" \t") + 5552 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5553 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5554 """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 5555 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5556 5557 # some other useful expressions - using lower-case class name since we are really using this as a namespace 5558 -class pyparsing_common:
5559 """ 5560 Here are some common low-level expressions that may be useful in jump-starting parser development: 5561 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 5562 - common L{programming identifiers<identifier>} 5563 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5564 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5565 - L{UUID<uuid>} 5566 - L{comma-separated list<comma_separated_list>} 5567 Parse actions: 5568 - C{L{convertToInteger}} 5569 - C{L{convertToFloat}} 5570 - C{L{convertToDate}} 5571 - C{L{convertToDatetime}} 5572 - C{L{stripHTMLTags}} 5573 - C{L{upcaseTokens}} 5574 - C{L{downcaseTokens}} 5575 5576 Example:: 5577 pyparsing_common.number.runTests(''' 5578 # any int or real number, returned as the appropriate type 5579 100 5580 -100 5581 +100 5582 3.14159 5583 6.02e23 5584 1e-12 5585 ''') 5586 5587 pyparsing_common.fnumber.runTests(''' 5588 # any int or real number, returned as float 5589 100 5590 -100 5591 +100 5592 3.14159 5593 6.02e23 5594 1e-12 5595 ''') 5596 5597 pyparsing_common.hex_integer.runTests(''' 5598 # hex numbers 5599 100 5600 FF 5601 ''') 5602 5603 pyparsing_common.fraction.runTests(''' 5604 # fractions 5605 1/2 5606 -3/4 5607 ''') 5608 5609 pyparsing_common.mixed_integer.runTests(''' 5610 # mixed fractions 5611 1 5612 1/2 5613 -3/4 5614 1-3/4 5615 ''') 5616 5617 import uuid 5618 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5619 pyparsing_common.uuid.runTests(''' 5620 # uuid 5621 12345678-1234-5678-1234-567812345678 5622 ''') 5623 prints:: 5624 # any int or real number, returned as the appropriate type 5625 100 5626 [100] 5627 5628 -100 5629 [-100] 5630 5631 +100 5632 [100] 5633 5634 3.14159 5635 [3.14159] 5636 5637 6.02e23 5638 [6.02e+23] 5639 5640 1e-12 5641 [1e-12] 5642 5643 # any int or real number, returned as float 5644 100 5645 [100.0] 5646 5647 -100 5648 [-100.0] 5649 5650 +100 5651 [100.0] 5652 5653 3.14159 5654 [3.14159] 5655 5656 6.02e23 5657 [6.02e+23] 5658 5659 1e-12 5660 [1e-12] 5661 5662 # hex numbers 5663 100 5664 [256] 5665 5666 FF 5667 [255] 5668 5669 # fractions 5670 1/2 5671 [0.5] 5672 5673 -3/4 5674 [-0.75] 5675 5676 # mixed fractions 5677 1 5678 [1] 5679 5680 1/2 5681 [0.5] 5682 5683 -3/4 5684 [-0.75] 5685 5686 1-3/4 5687 [1.75] 5688 5689 # uuid 5690 12345678-1234-5678-1234-567812345678 5691 [UUID('12345678-1234-5678-1234-567812345678')] 5692 """ 5693 5694 convertToInteger = tokenMap(int) 5695 """ 5696 Parse action for converting parsed integers to Python int 5697 """ 5698 5699 convertToFloat = tokenMap(float) 5700 """ 5701 Parse action for converting parsed numbers to Python float 5702 """ 5703 5704 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5705 """expression that parses an unsigned integer, returns an int""" 5706 5707 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5708 """expression that parses a hexadecimal integer, returns an int""" 5709 5710 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5711 """expression that parses an integer with optional leading sign, returns an int""" 5712 5713 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 5714 """fractional expression of an integer divided by an integer, returns a float""" 5715 fraction.addParseAction(lambda t: t[0]/t[-1]) 5716 5717 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5718 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5719 mixed_integer.addParseAction(sum) 5720 5721 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5722 """expression that parses a floating point number and returns a float""" 5723 5724 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5725 """expression that parses a floating point number with optional scientific notation and returns a float""" 5726 5727 # streamlining this expression makes the docs nicer-looking 5728 number = (sci_real | real | signed_integer).streamline() 5729 """any numeric expression, returns the corresponding Python type""" 5730 5731 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5732 """any int or real number, returned as float""" 5733 5734 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5735 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5736 5737 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5738 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5739 5740 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5741 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5742 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5743 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5744 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5745 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5746 "IPv6 address (long, short, or mixed form)" 5747 5748 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5749 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5750 5751 @staticmethod
5752 - def convertToDate(fmt="%Y-%m-%d"):
5753 """ 5754 Helper to create a parse action for converting parsed date string to Python datetime.date 5755 5756 Params - 5757 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5758 5759 Example:: 5760 date_expr = pyparsing_common.iso8601_date.copy() 5761 date_expr.setParseAction(pyparsing_common.convertToDate()) 5762 print(date_expr.parseString("1999-12-31")) 5763 prints:: 5764 [datetime.date(1999, 12, 31)] 5765 """ 5766 def cvt_fn(s,l,t): 5767 try: 5768 return datetime.strptime(t[0], fmt).date() 5769 except ValueError as ve: 5770 raise ParseException(s, l, str(ve))
5771 return cvt_fn
5772 5773 @staticmethod
5774 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5775 """ 5776 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5777 5778 Params - 5779 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5780 5781 Example:: 5782 dt_expr = pyparsing_common.iso8601_datetime.copy() 5783 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5784 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5785 prints:: 5786 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5787 """ 5788 def cvt_fn(s,l,t): 5789 try: 5790 return datetime.strptime(t[0], fmt) 5791 except ValueError as ve: 5792 raise ParseException(s, l, str(ve))
5793 return cvt_fn 5794 5795 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5796 "ISO8601 date (C{yyyy-mm-dd})" 5797 5798 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5799 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5800 5801 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5802 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5803 5804 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5805 @staticmethod
5806 - def stripHTMLTags(s, l, tokens):
5807 """ 5808 Parse action to remove HTML tags from web page HTML source 5809 5810 Example:: 5811 # strip HTML links from normal text 5812 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5813 td,td_end = makeHTMLTags("TD") 5814 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5815 5816 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5817 """ 5818 return pyparsing_common._html_stripper.transformString(tokens[0])
5819 5820 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 5821 + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 5822 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 5823 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 5824 5825 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5826 """Parse action to convert tokens to upper case.""" 5827 5828 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5829 """Parse action to convert tokens to lower case.""" 5830
5831 5832 -class _lazyclassproperty(object):
5833 - def __init__(self, fn):
5834 self.fn = fn
5835
5836 - def __get__(self, obj, cls):
5837 if cls is None: 5838 cls = type(obj) 5839 ret = self.fn(cls) 5840 setattr(cls, self.fn.__name__, ret) 5841 return ret
5842
5843 5844 -class unicode_set:
5845 _ranges = [] 5846 5847 @_lazyclassproperty
5848 - def printables(cls):
5849 return ''.join(filterfalse(unicode.isspace, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
5850 5851 @_lazyclassproperty
5852 - def alphas(cls):
5853 return ''.join(filter(unicode.isalpha, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
5854 5855 @_lazyclassproperty
5856 - def nums(cls):
5857 return ''.join(filter(unicode.isdigit, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
5858 5859 @_lazyclassproperty
5860 - def alphanums(cls):
5861 return cls.alphas + cls.nums
5862
5863 5864 -class pyparsing_unicode(unicode_set):
5865 _ranges = [(32, sys.maxunicode)] 5866
5867 - class Latin1(unicode_set):
5868 _ranges = [ 5869 (0x0020, 0x007e), (0x00a0, 0x00ff), 5870 ]
5871
5872 - class Greek(unicode_set):
5873 _ranges = [ 5874 (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d), 5875 (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4), 5876 (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe), 5877 ]
5878
5879 - class Cyrillic(unicode_set):
5880 _ranges = [(0x0400, 0x04ff)]
5881
5882 - class Chinese(unicode_set):
5883 _ranges = [(0x4e00, 0x9fff)]
5884
5885 - class Japanese(unicode_set):
5886 _ranges = [ ] # sum of Kanji, Hiragana, and Katakana ranges 5887
5888 - class Kanji(unicode_set):
5889 _ranges = [(0x4E00, 0x9Fbf), ]
5890
5891 - class Hiragana(unicode_set):
5892 _ranges = [(0x3040, 0x309f), ]
5893
5894 - class Katakana(unicode_set):
5895 _ranges = [(0x30a0, 0x30ff), ]
5896
5897 - class Korean(unicode_set):
5898 _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), ]
5899
5900 - class CJK(unicode_set):
5901 _ranges = [ # sum of Chinese, Japanese, and Korean ranges 5902 ]
5903
5904 - class Thai(unicode_set):
5905 _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b), ]
5906
5907 - class Arabic(unicode_set):
5908 _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f), ]
5909
5910 - class Hebrew(unicode_set):
5911 _ranges = [(0x0590, 0x05ff), ]
5912
5913 - class Devanagari(unicode_set):
5914 _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
5915 5916 pyparsing_unicode.Japanese._ranges = pyparsing_unicode.Japanese.Kanji._ranges + pyparsing_unicode.Japanese.Hiragana._ranges + pyparsing_unicode.Japanese.Katakana._ranges 5917 pyparsing_unicode.CJK._ranges = pyparsing_unicode.Chinese._ranges + pyparsing_unicode.Japanese._ranges + pyparsing_unicode.Korean._ranges 5918 5919 # define ranges in language character sets 5920 if PY_3: 5921 setattr(pyparsing_unicode, "العربية", pyparsing_unicode.Arabic) 5922 setattr(pyparsing_unicode, "中文", pyparsing_unicode.Chinese) 5923 setattr(pyparsing_unicode, "кириллица", pyparsing_unicode.Cyrillic) 5924 setattr(pyparsing_unicode, "Ελληνικά", pyparsing_unicode.Greek) 5925 setattr(pyparsing_unicode, "עִברִית", pyparsing_unicode.Hebrew) 5926 setattr(pyparsing_unicode, "日本語", pyparsing_unicode.Japanese) 5927 setattr(pyparsing_unicode.Japanese, "漢字", pyparsing_unicode.Japanese.Kanji) 5928 setattr(pyparsing_unicode.Japanese, "カタカナ", pyparsing_unicode.Japanese.Katakana) 5929 setattr(pyparsing_unicode.Japanese, "ひらがな", pyparsing_unicode.Japanese.Hiragana) 5930 setattr(pyparsing_unicode, "한국어", pyparsing_unicode.Korean) 5931 setattr(pyparsing_unicode, "ไทย", pyparsing_unicode.Thai) 5932 setattr(pyparsing_unicode, "देवनागरी", pyparsing_unicode.Devanagari) 5933 5934 5935 if __name__ == "__main__": 5936 5937 selectToken = CaselessLiteral("select") 5938 fromToken = CaselessLiteral("from") 5939 5940 ident = Word(alphas, alphanums + "_$") 5941 5942 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5943 columnNameList = Group(delimitedList(columnName)).setName("columns") 5944 columnSpec = ('*' | columnNameList) 5945 5946 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5947 tableNameList = Group(delimitedList(tableName)).setName("tables") 5948 5949 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5950 5951 # demo runTests method, including embedded comments in test string 5952 simpleSQL.runTests(""" 5953 # '*' as column list and dotted table name 5954 select * from SYS.XYZZY 5955 5956 # caseless match on "SELECT", and casts back to "select" 5957 SELECT * from XYZZY, ABC 5958 5959 # list of column names, and mixed case SELECT keyword 5960 Select AA,BB,CC from Sys.dual 5961 5962 # multiple tables 5963 Select A, B, C from Sys.dual, Table2 5964 5965 # invalid SELECT keyword - should fail 5966 Xelect A, B, C from Sys.dual 5967 5968 # incomplete command - should fail 5969 Select 5970 5971 # invalid column name - should fail 5972 Select ^^^ frox Sys.dual 5973 5974 """) 5975 5976 pyparsing_common.number.runTests(""" 5977 100 5978 -100 5979 +100 5980 3.14159 5981 6.02e23 5982 1e-12 5983 """) 5984 5985 # any int or real number, returned as float 5986 pyparsing_common.fnumber.runTests(""" 5987 100 5988 -100 5989 +100 5990 3.14159 5991 6.02e23 5992 1e-12 5993 """) 5994 5995 pyparsing_common.hex_integer.runTests(""" 5996 100 5997 FF 5998 """) 5999 6000 import uuid 6001 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 6002 pyparsing_common.uuid.runTests(""" 6003 12345678-1234-5678-1234-567812345678 6004 """) 6005