1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29 =============================================================================
30
31 The pyparsing module is an alternative approach to creating and executing simple grammars,
32 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
33 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
34 provides a library of classes that you use to construct the grammar directly in Python.
35
36 Here is a program to parse "Hello, World!" (or any greeting of the form
37 C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
38 (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
39 L{Literal} expressions)::
40
41 from pyparsing import Word, alphas
42
43 # define grammar of a greeting
44 greet = Word(alphas) + "," + Word(alphas) + "!"
45
46 hello = "Hello, World!"
47 print (hello, "->", greet.parseString(hello))
48
49 The program outputs the following::
50
51 Hello, World! -> ['Hello', ',', 'World', '!']
52
53 The Python representation of the grammar is quite readable, owing to the self-explanatory
54 class names, and the use of '+', '|' and '^' operators.
55
56 The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
57 object with named attributes.
58
59 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
60 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
61 - quoted strings
62 - embedded comments
63
64
65 Getting Started -
66 -----------------
67 Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
68 classes inherit from. Use the docstrings for examples of how to:
69 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
70 - construct character word-group expressions using the L{Word} class
71 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
72 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
73 - associate names with your parsed results using L{ParserElement.setResultsName}
74 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
75 - find more useful common expressions in the L{pyparsing_common} namespace class
76 """
77
78 __version__ = "2.3.0"
79 __versionTime__ = "28 Oct 2018 01:57 UTC"
80 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
81
82 import string
83 from weakref import ref as wkref
84 import copy
85 import sys
86 import warnings
87 import re
88 import sre_constants
89 import collections
90 import pprint
91 import traceback
92 import types
93 from datetime import datetime
94 try:
95
96 from itertools import filterfalse
97 except ImportError:
98 from itertools import ifilterfalse as filterfalse
99
100 try:
101 from _thread import RLock
102 except ImportError:
103 from threading import RLock
104
105 try:
106
107 from collections.abc import Iterable
108 from collections.abc import MutableMapping
109 except ImportError:
110
111 from collections import Iterable
112 from collections import MutableMapping
113
114 try:
115 from collections import OrderedDict as _OrderedDict
116 except ImportError:
117 try:
118 from ordereddict import OrderedDict as _OrderedDict
119 except ImportError:
120 _OrderedDict = None
121
122 try:
123 from types import SimpleNamespace
124 except ImportError:
126
127
128
129
130 __all__ = [
131 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
132 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
133 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
134 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
135 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
136 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
137 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
138 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
139 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
140 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
141 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
142 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
143 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
144 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
145 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
146 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
147 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
148 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
149 ]
150
151 system_version = tuple(sys.version_info)[:3]
152 PY_3 = system_version[0] == 3
153 if PY_3:
154 _MAX_INT = sys.maxsize
155 basestring = str
156 unichr = chr
157 unicode = str
158 _ustr = str
159
160
161 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
162
163 else:
164 _MAX_INT = sys.maxint
165 range = xrange
168 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
169 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
170 then < returns the unicode object | encodes it with the default encoding | ... >.
171 """
172 if isinstance(obj,unicode):
173 return obj
174
175 try:
176
177
178 return str(obj)
179
180 except UnicodeEncodeError:
181
182 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
183 xmlcharref = Regex(r'&#\d+;')
184 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
185 return xmlcharref.transformString(ret)
186
187
188 singleArgBuiltins = []
189 import __builtin__
190 for fname in "sum len sorted reversed list tuple set any all min max".split():
191 try:
192 singleArgBuiltins.append(getattr(__builtin__,fname))
193 except AttributeError:
194 continue
195
196 _generatorType = type((y for y in range(1)))
199 """Escape &, <, >, ", ', etc. in a string of data."""
200
201
202 from_symbols = '&><"\''
203 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
204 for from_,to_ in zip(from_symbols, to_symbols):
205 data = data.replace(from_, to_)
206 return data
207
208 alphas = string.ascii_uppercase + string.ascii_lowercase
209 nums = "0123456789"
210 hexnums = nums + "ABCDEFabcdef"
211 alphanums = alphas + nums
212 _bslash = chr(92)
213 printables = "".join(c for c in string.printable if c not in string.whitespace)
216 """base exception class for all parsing runtime exceptions"""
217
218
219 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
220 self.loc = loc
221 if msg is None:
222 self.msg = pstr
223 self.pstr = ""
224 else:
225 self.msg = msg
226 self.pstr = pstr
227 self.parserElement = elem
228 self.args = (pstr, loc, msg)
229
230 @classmethod
232 """
233 internal factory method to simplify creating one type of ParseException
234 from another - avoids having __init__ signature conflicts among subclasses
235 """
236 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
237
239 """supported attributes by name are:
240 - lineno - returns the line number of the exception text
241 - col - returns the column number of the exception text
242 - line - returns the line containing the exception text
243 """
244 if( aname == "lineno" ):
245 return lineno( self.loc, self.pstr )
246 elif( aname in ("col", "column") ):
247 return col( self.loc, self.pstr )
248 elif( aname == "line" ):
249 return line( self.loc, self.pstr )
250 else:
251 raise AttributeError(aname)
252
254 return "%s (at char %d), (line:%d, col:%d)" % \
255 ( self.msg, self.loc, self.lineno, self.column )
269 return "lineno col line".split() + dir(type(self))
270
272 """
273 Exception thrown when parse expressions don't match class;
274 supported attributes by name are:
275 - lineno - returns the line number of the exception text
276 - col - returns the column number of the exception text
277 - line - returns the line containing the exception text
278
279 Example::
280 try:
281 Word(nums).setName("integer").parseString("ABC")
282 except ParseException as pe:
283 print(pe)
284 print("column: {}".format(pe.col))
285
286 prints::
287 Expected integer (at char 0), (line:1, col:1)
288 column: 1
289 """
290 pass
291
293 """user-throwable exception thrown when inconsistent parse content
294 is found; stops all parsing immediately"""
295 pass
296
298 """just like L{ParseFatalException}, but thrown internally when an
299 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
300 immediately because an unbacktrackable syntax error has been found"""
301 pass
302
317 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
318 - def __init__( self, parseElementList ):
319 self.parseElementTrace = parseElementList
320
322 return "RecursiveGrammarException: %s" % self.parseElementTrace
323
330 return repr(self.tup[0])
332 self.tup = (self.tup[0],i)
333
335 """
336 Structured parse results, to provide multiple means of access to the parsed data:
337 - as a list (C{len(results)})
338 - by list index (C{results[0], results[1]}, etc.)
339 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
340
341 Example::
342 integer = Word(nums)
343 date_str = (integer.setResultsName("year") + '/'
344 + integer.setResultsName("month") + '/'
345 + integer.setResultsName("day"))
346 # equivalent form:
347 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
348
349 # parseString returns a ParseResults object
350 result = date_str.parseString("1999/12/31")
351
352 def test(s, fn=repr):
353 print("%s -> %s" % (s, fn(eval(s))))
354 test("list(result)")
355 test("result[0]")
356 test("result['month']")
357 test("result.day")
358 test("'month' in result")
359 test("'minutes' in result")
360 test("result.dump()", str)
361 prints::
362 list(result) -> ['1999', '/', '12', '/', '31']
363 result[0] -> '1999'
364 result['month'] -> '12'
365 result.day -> '31'
366 'month' in result -> True
367 'minutes' in result -> False
368 result.dump() -> ['1999', '/', '12', '/', '31']
369 - day: 31
370 - month: 12
371 - year: 1999
372 """
373 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
374 if isinstance(toklist, cls):
375 return toklist
376 retobj = object.__new__(cls)
377 retobj.__doinit = True
378 return retobj
379
380
381
382 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
383 if self.__doinit:
384 self.__doinit = False
385 self.__name = None
386 self.__parent = None
387 self.__accumNames = {}
388 self.__asList = asList
389 self.__modal = modal
390 if toklist is None:
391 toklist = []
392 if isinstance(toklist, list):
393 self.__toklist = toklist[:]
394 elif isinstance(toklist, _generatorType):
395 self.__toklist = list(toklist)
396 else:
397 self.__toklist = [toklist]
398 self.__tokdict = dict()
399
400 if name is not None and name:
401 if not modal:
402 self.__accumNames[name] = 0
403 if isinstance(name,int):
404 name = _ustr(name)
405 self.__name = name
406 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
407 if isinstance(toklist,basestring):
408 toklist = [ toklist ]
409 if asList:
410 if isinstance(toklist,ParseResults):
411 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
412 else:
413 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
414 self[name].__name = name
415 else:
416 try:
417 self[name] = toklist[0]
418 except (KeyError,TypeError,IndexError):
419 self[name] = toklist
420
422 if isinstance( i, (int,slice) ):
423 return self.__toklist[i]
424 else:
425 if i not in self.__accumNames:
426 return self.__tokdict[i][-1][0]
427 else:
428 return ParseResults([ v[0] for v in self.__tokdict[i] ])
429
431 if isinstance(v,_ParseResultsWithOffset):
432 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
433 sub = v[0]
434 elif isinstance(k,(int,slice)):
435 self.__toklist[k] = v
436 sub = v
437 else:
438 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
439 sub = v
440 if isinstance(sub,ParseResults):
441 sub.__parent = wkref(self)
442
444 if isinstance(i,(int,slice)):
445 mylen = len( self.__toklist )
446 del self.__toklist[i]
447
448
449 if isinstance(i, int):
450 if i < 0:
451 i += mylen
452 i = slice(i, i+1)
453
454 removed = list(range(*i.indices(mylen)))
455 removed.reverse()
456
457 for name,occurrences in self.__tokdict.items():
458 for j in removed:
459 for k, (value, position) in enumerate(occurrences):
460 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
461 else:
462 del self.__tokdict[i]
463
465 return k in self.__tokdict
466
467 - def __len__( self ): return len( self.__toklist )
468 - def __bool__(self): return ( not not self.__toklist )
469 __nonzero__ = __bool__
470 - def __iter__( self ): return iter( self.__toklist )
471 - def __reversed__( self ): return iter( self.__toklist[::-1] )
473 if hasattr(self.__tokdict, "iterkeys"):
474 return self.__tokdict.iterkeys()
475 else:
476 return iter(self.__tokdict)
477
479 return (self[k] for k in self._iterkeys())
480
482 return ((k, self[k]) for k in self._iterkeys())
483
484 if PY_3:
485 keys = _iterkeys
486 """Returns an iterator of all named result keys (Python 3.x only)."""
487
488 values = _itervalues
489 """Returns an iterator of all named result values (Python 3.x only)."""
490
491 items = _iteritems
492 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
493
494 else:
495 iterkeys = _iterkeys
496 """Returns an iterator of all named result keys (Python 2.x only)."""
497
498 itervalues = _itervalues
499 """Returns an iterator of all named result values (Python 2.x only)."""
500
501 iteritems = _iteritems
502 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
503
505 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
506 return list(self.iterkeys())
507
509 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
510 return list(self.itervalues())
511
513 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
514 return list(self.iteritems())
515
517 """Since keys() returns an iterator, this method is helpful in bypassing
518 code that looks for the existence of any defined results names."""
519 return bool(self.__tokdict)
520
521 - def pop( self, *args, **kwargs):
522 """
523 Removes and returns item at specified index (default=C{last}).
524 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
525 argument or an integer argument, it will use C{list} semantics
526 and pop tokens from the list of parsed tokens. If passed a
527 non-integer argument (most likely a string), it will use C{dict}
528 semantics and pop the corresponding value from any defined
529 results names. A second default return value argument is
530 supported, just as in C{dict.pop()}.
531
532 Example::
533 def remove_first(tokens):
534 tokens.pop(0)
535 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
536 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
537
538 label = Word(alphas)
539 patt = label("LABEL") + OneOrMore(Word(nums))
540 print(patt.parseString("AAB 123 321").dump())
541
542 # Use pop() in a parse action to remove named result (note that corresponding value is not
543 # removed from list form of results)
544 def remove_LABEL(tokens):
545 tokens.pop("LABEL")
546 return tokens
547 patt.addParseAction(remove_LABEL)
548 print(patt.parseString("AAB 123 321").dump())
549 prints::
550 ['AAB', '123', '321']
551 - LABEL: AAB
552
553 ['AAB', '123', '321']
554 """
555 if not args:
556 args = [-1]
557 for k,v in kwargs.items():
558 if k == 'default':
559 args = (args[0], v)
560 else:
561 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
562 if (isinstance(args[0], int) or
563 len(args) == 1 or
564 args[0] in self):
565 index = args[0]
566 ret = self[index]
567 del self[index]
568 return ret
569 else:
570 defaultvalue = args[1]
571 return defaultvalue
572
573 - def get(self, key, defaultValue=None):
574 """
575 Returns named result matching the given key, or if there is no
576 such name, then returns the given C{defaultValue} or C{None} if no
577 C{defaultValue} is specified.
578
579 Similar to C{dict.get()}.
580
581 Example::
582 integer = Word(nums)
583 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
584
585 result = date_str.parseString("1999/12/31")
586 print(result.get("year")) # -> '1999'
587 print(result.get("hour", "not specified")) # -> 'not specified'
588 print(result.get("hour")) # -> None
589 """
590 if key in self:
591 return self[key]
592 else:
593 return defaultValue
594
595 - def insert( self, index, insStr ):
596 """
597 Inserts new element at location index in the list of parsed tokens.
598
599 Similar to C{list.insert()}.
600
601 Example::
602 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
603
604 # use a parse action to insert the parse location in the front of the parsed results
605 def insert_locn(locn, tokens):
606 tokens.insert(0, locn)
607 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
608 """
609 self.__toklist.insert(index, insStr)
610
611 for name,occurrences in self.__tokdict.items():
612 for k, (value, position) in enumerate(occurrences):
613 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
614
616 """
617 Add single element to end of ParseResults list of elements.
618
619 Example::
620 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
621
622 # use a parse action to compute the sum of the parsed integers, and add it to the end
623 def append_sum(tokens):
624 tokens.append(sum(map(int, tokens)))
625 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
626 """
627 self.__toklist.append(item)
628
630 """
631 Add sequence of elements to end of ParseResults list of elements.
632
633 Example::
634 patt = OneOrMore(Word(alphas))
635
636 # use a parse action to append the reverse of the matched strings, to make a palindrome
637 def make_palindrome(tokens):
638 tokens.extend(reversed([t[::-1] for t in tokens]))
639 return ''.join(tokens)
640 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
641 """
642 if isinstance(itemseq, ParseResults):
643 self += itemseq
644 else:
645 self.__toklist.extend(itemseq)
646
648 """
649 Clear all elements and results names.
650 """
651 del self.__toklist[:]
652 self.__tokdict.clear()
653
655 try:
656 return self[name]
657 except KeyError:
658 return ""
659
660 if name in self.__tokdict:
661 if name not in self.__accumNames:
662 return self.__tokdict[name][-1][0]
663 else:
664 return ParseResults([ v[0] for v in self.__tokdict[name] ])
665 else:
666 return ""
667
669 ret = self.copy()
670 ret += other
671 return ret
672
674 if other.__tokdict:
675 offset = len(self.__toklist)
676 addoffset = lambda a: offset if a<0 else a+offset
677 otheritems = other.__tokdict.items()
678 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
679 for (k,vlist) in otheritems for v in vlist]
680 for k,v in otherdictitems:
681 self[k] = v
682 if isinstance(v[0],ParseResults):
683 v[0].__parent = wkref(self)
684
685 self.__toklist += other.__toklist
686 self.__accumNames.update( other.__accumNames )
687 return self
688
690 if isinstance(other,int) and other == 0:
691
692 return self.copy()
693 else:
694
695 return other + self
696
698 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
699
701 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
702
704 out = []
705 for item in self.__toklist:
706 if out and sep:
707 out.append(sep)
708 if isinstance( item, ParseResults ):
709 out += item._asStringList()
710 else:
711 out.append( _ustr(item) )
712 return out
713
715 """
716 Returns the parse results as a nested list of matching tokens, all converted to strings.
717
718 Example::
719 patt = OneOrMore(Word(alphas))
720 result = patt.parseString("sldkj lsdkj sldkj")
721 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
722 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
723
724 # Use asList() to create an actual list
725 result_list = result.asList()
726 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
727 """
728 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
729
731 """
732 Returns the named parse results as a nested dictionary.
733
734 Example::
735 integer = Word(nums)
736 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
737
738 result = date_str.parseString('12/31/1999')
739 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
740
741 result_dict = result.asDict()
742 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
743
744 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
745 import json
746 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
747 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
748 """
749 if PY_3:
750 item_fn = self.items
751 else:
752 item_fn = self.iteritems
753
754 def toItem(obj):
755 if isinstance(obj, ParseResults):
756 if obj.haskeys():
757 return obj.asDict()
758 else:
759 return [toItem(v) for v in obj]
760 else:
761 return obj
762
763 return dict((k,toItem(v)) for k,v in item_fn())
764
766 """
767 Returns a new copy of a C{ParseResults} object.
768 """
769 ret = ParseResults( self.__toklist )
770 ret.__tokdict = dict(self.__tokdict.items())
771 ret.__parent = self.__parent
772 ret.__accumNames.update( self.__accumNames )
773 ret.__name = self.__name
774 return ret
775
776 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
777 """
778 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
779 """
780 nl = "\n"
781 out = []
782 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
783 for v in vlist)
784 nextLevelIndent = indent + " "
785
786
787 if not formatted:
788 indent = ""
789 nextLevelIndent = ""
790 nl = ""
791
792 selfTag = None
793 if doctag is not None:
794 selfTag = doctag
795 else:
796 if self.__name:
797 selfTag = self.__name
798
799 if not selfTag:
800 if namedItemsOnly:
801 return ""
802 else:
803 selfTag = "ITEM"
804
805 out += [ nl, indent, "<", selfTag, ">" ]
806
807 for i,res in enumerate(self.__toklist):
808 if isinstance(res,ParseResults):
809 if i in namedItems:
810 out += [ res.asXML(namedItems[i],
811 namedItemsOnly and doctag is None,
812 nextLevelIndent,
813 formatted)]
814 else:
815 out += [ res.asXML(None,
816 namedItemsOnly and doctag is None,
817 nextLevelIndent,
818 formatted)]
819 else:
820
821 resTag = None
822 if i in namedItems:
823 resTag = namedItems[i]
824 if not resTag:
825 if namedItemsOnly:
826 continue
827 else:
828 resTag = "ITEM"
829 xmlBodyText = _xml_escape(_ustr(res))
830 out += [ nl, nextLevelIndent, "<", resTag, ">",
831 xmlBodyText,
832 "</", resTag, ">" ]
833
834 out += [ nl, indent, "</", selfTag, ">" ]
835 return "".join(out)
836
838 for k,vlist in self.__tokdict.items():
839 for v,loc in vlist:
840 if sub is v:
841 return k
842 return None
843
845 r"""
846 Returns the results name for this token expression. Useful when several
847 different expressions might match at a particular location.
848
849 Example::
850 integer = Word(nums)
851 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
852 house_number_expr = Suppress('#') + Word(nums, alphanums)
853 user_data = (Group(house_number_expr)("house_number")
854 | Group(ssn_expr)("ssn")
855 | Group(integer)("age"))
856 user_info = OneOrMore(user_data)
857
858 result = user_info.parseString("22 111-22-3333 #221B")
859 for item in result:
860 print(item.getName(), ':', item[0])
861 prints::
862 age : 22
863 ssn : 111-22-3333
864 house_number : 221B
865 """
866 if self.__name:
867 return self.__name
868 elif self.__parent:
869 par = self.__parent()
870 if par:
871 return par.__lookup(self)
872 else:
873 return None
874 elif (len(self) == 1 and
875 len(self.__tokdict) == 1 and
876 next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
877 return next(iter(self.__tokdict.keys()))
878 else:
879 return None
880
881 - def dump(self, indent='', depth=0, full=True):
882 """
883 Diagnostic method for listing out the contents of a C{ParseResults}.
884 Accepts an optional C{indent} argument so that this string can be embedded
885 in a nested display of other data.
886
887 Example::
888 integer = Word(nums)
889 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
890
891 result = date_str.parseString('12/31/1999')
892 print(result.dump())
893 prints::
894 ['12', '/', '31', '/', '1999']
895 - day: 1999
896 - month: 31
897 - year: 12
898 """
899 out = []
900 NL = '\n'
901 out.append( indent+_ustr(self.asList()) )
902 if full:
903 if self.haskeys():
904 items = sorted((str(k), v) for k,v in self.items())
905 for k,v in items:
906 if out:
907 out.append(NL)
908 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
909 if isinstance(v,ParseResults):
910 if v:
911 out.append( v.dump(indent,depth+1) )
912 else:
913 out.append(_ustr(v))
914 else:
915 out.append(repr(v))
916 elif any(isinstance(vv,ParseResults) for vv in self):
917 v = self
918 for i,vv in enumerate(v):
919 if isinstance(vv,ParseResults):
920 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
921 else:
922 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
923
924 return "".join(out)
925
926 - def pprint(self, *args, **kwargs):
927 """
928 Pretty-printer for parsed results as a list, using the C{pprint} module.
929 Accepts additional positional or keyword args as defined for the
930 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
931
932 Example::
933 ident = Word(alphas, alphanums)
934 num = Word(nums)
935 func = Forward()
936 term = ident | num | Group('(' + func + ')')
937 func <<= ident + Group(Optional(delimitedList(term)))
938 result = func.parseString("fna a,b,(fnb c,d,200),100")
939 result.pprint(width=40)
940 prints::
941 ['fna',
942 ['a',
943 'b',
944 ['(', 'fnb', ['c', 'd', '200'], ')'],
945 '100']]
946 """
947 pprint.pprint(self.asList(), *args, **kwargs)
948
949
951 return ( self.__toklist,
952 ( self.__tokdict.copy(),
953 self.__parent is not None and self.__parent() or None,
954 self.__accumNames,
955 self.__name ) )
956
958 self.__toklist = state[0]
959 (self.__tokdict,
960 par,
961 inAccumNames,
962 self.__name) = state[1]
963 self.__accumNames = {}
964 self.__accumNames.update(inAccumNames)
965 if par is not None:
966 self.__parent = wkref(par)
967 else:
968 self.__parent = None
969
971 return self.__toklist, self.__name, self.__asList, self.__modal
972
974 return (dir(type(self)) + list(self.keys()))
975
976 MutableMapping.register(ParseResults)
977
978 -def col (loc,strg):
979 """Returns current column within a string, counting newlines as line separators.
980 The first column is number 1.
981
982 Note: the default parsing behavior is to expand tabs in the input string
983 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
984 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
985 consistent view of the parsed string, the parse location, and line and column
986 positions within the parsed string.
987 """
988 s = strg
989 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
990
992 """Returns current line number within a string, counting newlines as line separators.
993 The first line is number 1.
994
995 Note: the default parsing behavior is to expand tabs in the input string
996 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
997 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
998 consistent view of the parsed string, the parse location, and line and column
999 positions within the parsed string.
1000 """
1001 return strg.count("\n",0,loc) + 1
1002
1003 -def line( loc, strg ):
1004 """Returns the line of text containing loc within a string, counting newlines as line separators.
1005 """
1006 lastCR = strg.rfind("\n", 0, loc)
1007 nextCR = strg.find("\n", loc)
1008 if nextCR >= 0:
1009 return strg[lastCR+1:nextCR]
1010 else:
1011 return strg[lastCR+1:]
1012
1014 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1015
1017 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1018
1020 print ("Exception raised:" + _ustr(exc))
1021
1023 """'Do-nothing' debug action, to suppress debugging output during parsing."""
1024 pass
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048 'decorator to trim function calls to match the arity of the target'
1050 if func in singleArgBuiltins:
1051 return lambda s,l,t: func(t)
1052 limit = [0]
1053 foundArity = [False]
1054
1055
1056 if system_version[:2] >= (3,5):
1057 def extract_stack(limit=0):
1058
1059 offset = -3 if system_version == (3,5,0) else -2
1060 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1061 return [frame_summary[:2]]
1062 def extract_tb(tb, limit=0):
1063 frames = traceback.extract_tb(tb, limit=limit)
1064 frame_summary = frames[-1]
1065 return [frame_summary[:2]]
1066 else:
1067 extract_stack = traceback.extract_stack
1068 extract_tb = traceback.extract_tb
1069
1070
1071
1072
1073 LINE_DIFF = 6
1074
1075
1076 this_line = extract_stack(limit=2)[-1]
1077 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1078
1079 def wrapper(*args):
1080 while 1:
1081 try:
1082 ret = func(*args[limit[0]:])
1083 foundArity[0] = True
1084 return ret
1085 except TypeError:
1086
1087 if foundArity[0]:
1088 raise
1089 else:
1090 try:
1091 tb = sys.exc_info()[-1]
1092 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1093 raise
1094 finally:
1095 del tb
1096
1097 if limit[0] <= maxargs:
1098 limit[0] += 1
1099 continue
1100 raise
1101
1102
1103 func_name = "<parse action>"
1104 try:
1105 func_name = getattr(func, '__name__',
1106 getattr(func, '__class__').__name__)
1107 except Exception:
1108 func_name = str(func)
1109 wrapper.__name__ = func_name
1110
1111 return wrapper
1112
1114 """Abstract base level parser element class."""
1115 DEFAULT_WHITE_CHARS = " \n\t\r"
1116 verbose_stacktrace = False
1117
1118 @staticmethod
1120 r"""
1121 Overrides the default whitespace chars
1122
1123 Example::
1124 # default whitespace chars are space, <TAB> and newline
1125 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1126
1127 # change to just treat newline as significant
1128 ParserElement.setDefaultWhitespaceChars(" \t")
1129 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1130 """
1131 ParserElement.DEFAULT_WHITE_CHARS = chars
1132
1133 @staticmethod
1135 """
1136 Set class to be used for inclusion of string literals into a parser.
1137
1138 Example::
1139 # default literal class used is Literal
1140 integer = Word(nums)
1141 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1142
1143 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1144
1145
1146 # change to Suppress
1147 ParserElement.inlineLiteralsUsing(Suppress)
1148 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1149
1150 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1151 """
1152 ParserElement._literalStringClass = cls
1153
1155 self.parseAction = list()
1156 self.failAction = None
1157
1158 self.strRepr = None
1159 self.resultsName = None
1160 self.saveAsList = savelist
1161 self.skipWhitespace = True
1162 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1163 self.copyDefaultWhiteChars = True
1164 self.mayReturnEmpty = False
1165 self.keepTabs = False
1166 self.ignoreExprs = list()
1167 self.debug = False
1168 self.streamlined = False
1169 self.mayIndexError = True
1170 self.errmsg = ""
1171 self.modalResults = True
1172 self.debugActions = ( None, None, None )
1173 self.re = None
1174 self.callPreparse = True
1175 self.callDuringTry = False
1176
1178 """
1179 Make a copy of this C{ParserElement}. Useful for defining different parse actions
1180 for the same parsing pattern, using copies of the original parse element.
1181
1182 Example::
1183 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1184 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1185 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1186
1187 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1188 prints::
1189 [5120, 100, 655360, 268435456]
1190 Equivalent form of C{expr.copy()} is just C{expr()}::
1191 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1192 """
1193 cpy = copy.copy( self )
1194 cpy.parseAction = self.parseAction[:]
1195 cpy.ignoreExprs = self.ignoreExprs[:]
1196 if self.copyDefaultWhiteChars:
1197 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1198 return cpy
1199
1201 """
1202 Define name for this expression, makes debugging and exception messages clearer.
1203
1204 Example::
1205 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1206 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1207 """
1208 self.name = name
1209 self.errmsg = "Expected " + self.name
1210 if hasattr(self,"exception"):
1211 self.exception.msg = self.errmsg
1212 return self
1213
1215 """
1216 Define name for referencing matching tokens as a nested attribute
1217 of the returned parse results.
1218 NOTE: this returns a *copy* of the original C{ParserElement} object;
1219 this is so that the client can define a basic element, such as an
1220 integer, and reference it in multiple places with different names.
1221
1222 You can also set results names using the abbreviated syntax,
1223 C{expr("name")} in place of C{expr.setResultsName("name")} -
1224 see L{I{__call__}<__call__>}.
1225
1226 Example::
1227 date_str = (integer.setResultsName("year") + '/'
1228 + integer.setResultsName("month") + '/'
1229 + integer.setResultsName("day"))
1230
1231 # equivalent form:
1232 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1233 """
1234 newself = self.copy()
1235 if name.endswith("*"):
1236 name = name[:-1]
1237 listAllMatches=True
1238 newself.resultsName = name
1239 newself.modalResults = not listAllMatches
1240 return newself
1241
1243 """Method to invoke the Python pdb debugger when this element is
1244 about to be parsed. Set C{breakFlag} to True to enable, False to
1245 disable.
1246 """
1247 if breakFlag:
1248 _parseMethod = self._parse
1249 def breaker(instring, loc, doActions=True, callPreParse=True):
1250 import pdb
1251 pdb.set_trace()
1252 return _parseMethod( instring, loc, doActions, callPreParse )
1253 breaker._originalParseMethod = _parseMethod
1254 self._parse = breaker
1255 else:
1256 if hasattr(self._parse,"_originalParseMethod"):
1257 self._parse = self._parse._originalParseMethod
1258 return self
1259
1261 """
1262 Define one or more actions to perform when successfully matching parse element definition.
1263 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1264 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1265 - s = the original string being parsed (see note below)
1266 - loc = the location of the matching substring
1267 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1268 If the functions in fns modify the tokens, they can return them as the return
1269 value from fn, and the modified list of tokens will replace the original.
1270 Otherwise, fn does not need to return any value.
1271
1272 Optional keyword arguments:
1273 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1274
1275 Note: the default parsing behavior is to expand tabs in the input string
1276 before starting the parsing process. See L{I{parseString}<parseString>} for more information
1277 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1278 consistent view of the parsed string, the parse location, and line and column
1279 positions within the parsed string.
1280
1281 Example::
1282 integer = Word(nums)
1283 date_str = integer + '/' + integer + '/' + integer
1284
1285 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1286
1287 # use parse action to convert to ints at parse time
1288 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1289 date_str = integer + '/' + integer + '/' + integer
1290
1291 # note that integer fields are now ints, not strings
1292 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1293 """
1294 self.parseAction = list(map(_trim_arity, list(fns)))
1295 self.callDuringTry = kwargs.get("callDuringTry", False)
1296 return self
1297
1299 """
1300 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1301
1302 See examples in L{I{copy}<copy>}.
1303 """
1304 self.parseAction += list(map(_trim_arity, list(fns)))
1305 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1306 return self
1307
1309 """Add a boolean predicate function to expression's list of parse actions. See
1310 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1311 functions passed to C{addCondition} need to return boolean success/fail of the condition.
1312
1313 Optional keyword arguments:
1314 - message = define a custom message to be used in the raised exception
1315 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1316
1317 Example::
1318 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1319 year_int = integer.copy()
1320 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1321 date_str = year_int + '/' + integer + '/' + integer
1322
1323 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1324 """
1325 msg = kwargs.get("message", "failed user-defined condition")
1326 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1327 for fn in fns:
1328 def pa(s,l,t):
1329 if not bool(_trim_arity(fn)(s,l,t)):
1330 raise exc_type(s,l,msg)
1331 self.parseAction.append(pa)
1332 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1333 return self
1334
1336 """Define action to perform if parsing fails at this expression.
1337 Fail acton fn is a callable function that takes the arguments
1338 C{fn(s,loc,expr,err)} where:
1339 - s = string being parsed
1340 - loc = location where expression match was attempted and failed
1341 - expr = the parse expression that failed
1342 - err = the exception thrown
1343 The function returns no value. It may throw C{L{ParseFatalException}}
1344 if it is desired to stop parsing immediately."""
1345 self.failAction = fn
1346 return self
1347
1349 exprsFound = True
1350 while exprsFound:
1351 exprsFound = False
1352 for e in self.ignoreExprs:
1353 try:
1354 while 1:
1355 loc,dummy = e._parse( instring, loc )
1356 exprsFound = True
1357 except ParseException:
1358 pass
1359 return loc
1360
1362 if self.ignoreExprs:
1363 loc = self._skipIgnorables( instring, loc )
1364
1365 if self.skipWhitespace:
1366 wt = self.whiteChars
1367 instrlen = len(instring)
1368 while loc < instrlen and instring[loc] in wt:
1369 loc += 1
1370
1371 return loc
1372
1373 - def parseImpl( self, instring, loc, doActions=True ):
1375
1376 - def postParse( self, instring, loc, tokenlist ):
1378
1379
1380 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1381 debugging = ( self.debug )
1382
1383 if debugging or self.failAction:
1384
1385 if (self.debugActions[0] ):
1386 self.debugActions[0]( instring, loc, self )
1387 if callPreParse and self.callPreparse:
1388 preloc = self.preParse( instring, loc )
1389 else:
1390 preloc = loc
1391 tokensStart = preloc
1392 try:
1393 try:
1394 loc,tokens = self.parseImpl( instring, preloc, doActions )
1395 except IndexError:
1396 raise ParseException( instring, len(instring), self.errmsg, self )
1397 except ParseBaseException as err:
1398
1399 if self.debugActions[2]:
1400 self.debugActions[2]( instring, tokensStart, self, err )
1401 if self.failAction:
1402 self.failAction( instring, tokensStart, self, err )
1403 raise
1404 else:
1405 if callPreParse and self.callPreparse:
1406 preloc = self.preParse( instring, loc )
1407 else:
1408 preloc = loc
1409 tokensStart = preloc
1410 if self.mayIndexError or preloc >= len(instring):
1411 try:
1412 loc,tokens = self.parseImpl( instring, preloc, doActions )
1413 except IndexError:
1414 raise ParseException( instring, len(instring), self.errmsg, self )
1415 else:
1416 loc,tokens = self.parseImpl( instring, preloc, doActions )
1417
1418 tokens = self.postParse( instring, loc, tokens )
1419
1420 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1421 if self.parseAction and (doActions or self.callDuringTry):
1422 if debugging:
1423 try:
1424 for fn in self.parseAction:
1425 try:
1426 tokens = fn( instring, tokensStart, retTokens )
1427 except IndexError as parse_action_exc:
1428 exc = ParseException("exception raised in parse action")
1429 exc.__cause__ = parse_action_exc
1430 raise exc
1431
1432 if tokens is not None and tokens is not retTokens:
1433 retTokens = ParseResults( tokens,
1434 self.resultsName,
1435 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1436 modal=self.modalResults )
1437 except ParseBaseException as err:
1438
1439 if (self.debugActions[2] ):
1440 self.debugActions[2]( instring, tokensStart, self, err )
1441 raise
1442 else:
1443 for fn in self.parseAction:
1444 try:
1445 tokens = fn( instring, tokensStart, retTokens )
1446 except IndexError as parse_action_exc:
1447 exc = ParseException("exception raised in parse action")
1448 exc.__cause__ = parse_action_exc
1449 raise exc
1450
1451 if tokens is not None and tokens is not retTokens:
1452 retTokens = ParseResults( tokens,
1453 self.resultsName,
1454 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1455 modal=self.modalResults )
1456 if debugging:
1457
1458 if (self.debugActions[1] ):
1459 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1460
1461 return loc, retTokens
1462
1468
1470 try:
1471 self.tryParse(instring, loc)
1472 except (ParseException, IndexError):
1473 return False
1474 else:
1475 return True
1476
1479 cache = {}
1480 self.not_in_cache = not_in_cache = object()
1481
1482 def get(self, key):
1483 return cache.get(key, not_in_cache)
1484
1485 def set(self, key, value):
1486 cache[key] = value
1487
1488 def clear(self):
1489 cache.clear()
1490
1491 def cache_len(self):
1492 return len(cache)
1493
1494 self.get = types.MethodType(get, self)
1495 self.set = types.MethodType(set, self)
1496 self.clear = types.MethodType(clear, self)
1497 self.__len__ = types.MethodType(cache_len, self)
1498
1499 if _OrderedDict is not None:
1502 self.not_in_cache = not_in_cache = object()
1503
1504 cache = _OrderedDict()
1505
1506 def get(self, key):
1507 return cache.get(key, not_in_cache)
1508
1509 def set(self, key, value):
1510 cache[key] = value
1511 while len(cache) > size:
1512 try:
1513 cache.popitem(False)
1514 except KeyError:
1515 pass
1516
1517 def clear(self):
1518 cache.clear()
1519
1520 def cache_len(self):
1521 return len(cache)
1522
1523 self.get = types.MethodType(get, self)
1524 self.set = types.MethodType(set, self)
1525 self.clear = types.MethodType(clear, self)
1526 self.__len__ = types.MethodType(cache_len, self)
1527
1528 else:
1531 self.not_in_cache = not_in_cache = object()
1532
1533 cache = {}
1534 key_fifo = collections.deque([], size)
1535
1536 def get(self, key):
1537 return cache.get(key, not_in_cache)
1538
1539 def set(self, key, value):
1540 cache[key] = value
1541 while len(key_fifo) > size:
1542 cache.pop(key_fifo.popleft(), None)
1543 key_fifo.append(key)
1544
1545 def clear(self):
1546 cache.clear()
1547 key_fifo.clear()
1548
1549 def cache_len(self):
1550 return len(cache)
1551
1552 self.get = types.MethodType(get, self)
1553 self.set = types.MethodType(set, self)
1554 self.clear = types.MethodType(clear, self)
1555 self.__len__ = types.MethodType(cache_len, self)
1556
1557
1558 packrat_cache = {}
1559 packrat_cache_lock = RLock()
1560 packrat_cache_stats = [0, 0]
1561
1562
1563
1564 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1565 HIT, MISS = 0, 1
1566 lookup = (self, instring, loc, callPreParse, doActions)
1567 with ParserElement.packrat_cache_lock:
1568 cache = ParserElement.packrat_cache
1569 value = cache.get(lookup)
1570 if value is cache.not_in_cache:
1571 ParserElement.packrat_cache_stats[MISS] += 1
1572 try:
1573 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1574 except ParseBaseException as pe:
1575
1576 cache.set(lookup, pe.__class__(*pe.args))
1577 raise
1578 else:
1579 cache.set(lookup, (value[0], value[1].copy()))
1580 return value
1581 else:
1582 ParserElement.packrat_cache_stats[HIT] += 1
1583 if isinstance(value, Exception):
1584 raise value
1585 return (value[0], value[1].copy())
1586
1587 _parse = _parseNoCache
1588
1589 @staticmethod
1593
1594 _packratEnabled = False
1595 @staticmethod
1597 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1598 Repeated parse attempts at the same string location (which happens
1599 often in many complex grammars) can immediately return a cached value,
1600 instead of re-executing parsing/validating code. Memoizing is done of
1601 both valid results and parsing exceptions.
1602
1603 Parameters:
1604 - cache_size_limit - (default=C{128}) - if an integer value is provided
1605 will limit the size of the packrat cache; if None is passed, then
1606 the cache size will be unbounded; if 0 is passed, the cache will
1607 be effectively disabled.
1608
1609 This speedup may break existing programs that use parse actions that
1610 have side-effects. For this reason, packrat parsing is disabled when
1611 you first import pyparsing. To activate the packrat feature, your
1612 program must call the class method C{ParserElement.enablePackrat()}. If
1613 your program uses C{psyco} to "compile as you go", you must call
1614 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1615 Python will crash. For best results, call C{enablePackrat()} immediately
1616 after importing pyparsing.
1617
1618 Example::
1619 import pyparsing
1620 pyparsing.ParserElement.enablePackrat()
1621 """
1622 if not ParserElement._packratEnabled:
1623 ParserElement._packratEnabled = True
1624 if cache_size_limit is None:
1625 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1626 else:
1627 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1628 ParserElement._parse = ParserElement._parseCache
1629
1631 """
1632 Execute the parse expression with the given string.
1633 This is the main interface to the client code, once the complete
1634 expression has been built.
1635
1636 If you want the grammar to require that the entire input string be
1637 successfully parsed, then set C{parseAll} to True (equivalent to ending
1638 the grammar with C{L{StringEnd()}}).
1639
1640 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1641 in order to report proper column numbers in parse actions.
1642 If the input string contains tabs and
1643 the grammar uses parse actions that use the C{loc} argument to index into the
1644 string being parsed, you can ensure you have a consistent view of the input
1645 string by:
1646 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1647 (see L{I{parseWithTabs}<parseWithTabs>})
1648 - define your parse action using the full C{(s,loc,toks)} signature, and
1649 reference the input string using the parse action's C{s} argument
1650 - explictly expand the tabs in your input string before calling
1651 C{parseString}
1652
1653 Example::
1654 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1655 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1656 """
1657 ParserElement.resetCache()
1658 if not self.streamlined:
1659 self.streamline()
1660
1661 for e in self.ignoreExprs:
1662 e.streamline()
1663 if not self.keepTabs:
1664 instring = instring.expandtabs()
1665 try:
1666 loc, tokens = self._parse( instring, 0 )
1667 if parseAll:
1668 loc = self.preParse( instring, loc )
1669 se = Empty() + StringEnd()
1670 se._parse( instring, loc )
1671 except ParseBaseException as exc:
1672 if ParserElement.verbose_stacktrace:
1673 raise
1674 else:
1675
1676 raise exc
1677 else:
1678 return tokens
1679
1681 """
1682 Scan the input string for expression matches. Each match will return the
1683 matching tokens, start location, and end location. May be called with optional
1684 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1685 C{overlap} is specified, then overlapping matches will be reported.
1686
1687 Note that the start and end locations are reported relative to the string
1688 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1689 strings with embedded tabs.
1690
1691 Example::
1692 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1693 print(source)
1694 for tokens,start,end in Word(alphas).scanString(source):
1695 print(' '*start + '^'*(end-start))
1696 print(' '*start + tokens[0])
1697
1698 prints::
1699
1700 sldjf123lsdjjkf345sldkjf879lkjsfd987
1701 ^^^^^
1702 sldjf
1703 ^^^^^^^
1704 lsdjjkf
1705 ^^^^^^
1706 sldkjf
1707 ^^^^^^
1708 lkjsfd
1709 """
1710 if not self.streamlined:
1711 self.streamline()
1712 for e in self.ignoreExprs:
1713 e.streamline()
1714
1715 if not self.keepTabs:
1716 instring = _ustr(instring).expandtabs()
1717 instrlen = len(instring)
1718 loc = 0
1719 preparseFn = self.preParse
1720 parseFn = self._parse
1721 ParserElement.resetCache()
1722 matches = 0
1723 try:
1724 while loc <= instrlen and matches < maxMatches:
1725 try:
1726 preloc = preparseFn( instring, loc )
1727 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1728 except ParseException:
1729 loc = preloc+1
1730 else:
1731 if nextLoc > loc:
1732 matches += 1
1733 yield tokens, preloc, nextLoc
1734 if overlap:
1735 nextloc = preparseFn( instring, loc )
1736 if nextloc > loc:
1737 loc = nextLoc
1738 else:
1739 loc += 1
1740 else:
1741 loc = nextLoc
1742 else:
1743 loc = preloc+1
1744 except ParseBaseException as exc:
1745 if ParserElement.verbose_stacktrace:
1746 raise
1747 else:
1748
1749 raise exc
1750
1793
1795 """
1796 Another extension to C{L{scanString}}, simplifying the access to the tokens found
1797 to match the given parse expression. May be called with optional
1798 C{maxMatches} argument, to clip searching after 'n' matches are found.
1799
1800 Example::
1801 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1802 cap_word = Word(alphas.upper(), alphas.lower())
1803
1804 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1805
1806 # the sum() builtin can be used to merge results into a single ParseResults object
1807 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1808 prints::
1809 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1810 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1811 """
1812 try:
1813 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1814 except ParseBaseException as exc:
1815 if ParserElement.verbose_stacktrace:
1816 raise
1817 else:
1818
1819 raise exc
1820
1821 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1822 """
1823 Generator method to split a string using the given expression as a separator.
1824 May be called with optional C{maxsplit} argument, to limit the number of splits;
1825 and the optional C{includeSeparators} argument (default=C{False}), if the separating
1826 matching text should be included in the split results.
1827
1828 Example::
1829 punc = oneOf(list(".,;:/-!?"))
1830 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1831 prints::
1832 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1833 """
1834 splits = 0
1835 last = 0
1836 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1837 yield instring[last:s]
1838 if includeSeparators:
1839 yield t[0]
1840 last = e
1841 yield instring[last:]
1842
1844 """
1845 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1846 converts them to L{Literal}s by default.
1847
1848 Example::
1849 greet = Word(alphas) + "," + Word(alphas) + "!"
1850 hello = "Hello, World!"
1851 print (hello, "->", greet.parseString(hello))
1852 Prints::
1853 Hello, World! -> ['Hello', ',', 'World', '!']
1854 """
1855 if isinstance( other, basestring ):
1856 other = ParserElement._literalStringClass( other )
1857 if not isinstance( other, ParserElement ):
1858 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1859 SyntaxWarning, stacklevel=2)
1860 return None
1861 return And( [ self, other ] )
1862
1864 """
1865 Implementation of + operator when left operand is not a C{L{ParserElement}}
1866 """
1867 if isinstance( other, basestring ):
1868 other = ParserElement._literalStringClass( other )
1869 if not isinstance( other, ParserElement ):
1870 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1871 SyntaxWarning, stacklevel=2)
1872 return None
1873 return other + self
1874
1876 """
1877 Implementation of - operator, returns C{L{And}} with error stop
1878 """
1879 if isinstance( other, basestring ):
1880 other = ParserElement._literalStringClass( other )
1881 if not isinstance( other, ParserElement ):
1882 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1883 SyntaxWarning, stacklevel=2)
1884 return None
1885 return self + And._ErrorStop() + other
1886
1888 """
1889 Implementation of - operator when left operand is not a C{L{ParserElement}}
1890 """
1891 if isinstance( other, basestring ):
1892 other = ParserElement._literalStringClass( other )
1893 if not isinstance( other, ParserElement ):
1894 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1895 SyntaxWarning, stacklevel=2)
1896 return None
1897 return other - self
1898
1900 """
1901 Implementation of * operator, allows use of C{expr * 3} in place of
1902 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1903 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1904 may also include C{None} as in:
1905 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1906 to C{expr*n + L{ZeroOrMore}(expr)}
1907 (read as "at least n instances of C{expr}")
1908 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1909 (read as "0 to n instances of C{expr}")
1910 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1911 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1912
1913 Note that C{expr*(None,n)} does not raise an exception if
1914 more than n exprs exist in the input stream; that is,
1915 C{expr*(None,n)} does not enforce a maximum number of expr
1916 occurrences. If this behavior is desired, then write
1917 C{expr*(None,n) + ~expr}
1918 """
1919 if isinstance(other,int):
1920 minElements, optElements = other,0
1921 elif isinstance(other,tuple):
1922 other = (other + (None, None))[:2]
1923 if other[0] is None:
1924 other = (0, other[1])
1925 if isinstance(other[0],int) and other[1] is None:
1926 if other[0] == 0:
1927 return ZeroOrMore(self)
1928 if other[0] == 1:
1929 return OneOrMore(self)
1930 else:
1931 return self*other[0] + ZeroOrMore(self)
1932 elif isinstance(other[0],int) and isinstance(other[1],int):
1933 minElements, optElements = other
1934 optElements -= minElements
1935 else:
1936 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1937 else:
1938 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1939
1940 if minElements < 0:
1941 raise ValueError("cannot multiply ParserElement by negative value")
1942 if optElements < 0:
1943 raise ValueError("second tuple value must be greater or equal to first tuple value")
1944 if minElements == optElements == 0:
1945 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1946
1947 if (optElements):
1948 def makeOptionalList(n):
1949 if n>1:
1950 return Optional(self + makeOptionalList(n-1))
1951 else:
1952 return Optional(self)
1953 if minElements:
1954 if minElements == 1:
1955 ret = self + makeOptionalList(optElements)
1956 else:
1957 ret = And([self]*minElements) + makeOptionalList(optElements)
1958 else:
1959 ret = makeOptionalList(optElements)
1960 else:
1961 if minElements == 1:
1962 ret = self
1963 else:
1964 ret = And([self]*minElements)
1965 return ret
1966
1969
1971 """
1972 Implementation of | operator - returns C{L{MatchFirst}}
1973 """
1974 if isinstance( other, basestring ):
1975 other = ParserElement._literalStringClass( other )
1976 if not isinstance( other, ParserElement ):
1977 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1978 SyntaxWarning, stacklevel=2)
1979 return None
1980 return MatchFirst( [ self, other ] )
1981
1983 """
1984 Implementation of | operator when left operand is not a C{L{ParserElement}}
1985 """
1986 if isinstance( other, basestring ):
1987 other = ParserElement._literalStringClass( other )
1988 if not isinstance( other, ParserElement ):
1989 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1990 SyntaxWarning, stacklevel=2)
1991 return None
1992 return other | self
1993
1995 """
1996 Implementation of ^ operator - returns C{L{Or}}
1997 """
1998 if isinstance( other, basestring ):
1999 other = ParserElement._literalStringClass( other )
2000 if not isinstance( other, ParserElement ):
2001 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2002 SyntaxWarning, stacklevel=2)
2003 return None
2004 return Or( [ self, other ] )
2005
2007 """
2008 Implementation of ^ operator when left operand is not a C{L{ParserElement}}
2009 """
2010 if isinstance( other, basestring ):
2011 other = ParserElement._literalStringClass( other )
2012 if not isinstance( other, ParserElement ):
2013 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2014 SyntaxWarning, stacklevel=2)
2015 return None
2016 return other ^ self
2017
2019 """
2020 Implementation of & operator - returns C{L{Each}}
2021 """
2022 if isinstance( other, basestring ):
2023 other = ParserElement._literalStringClass( other )
2024 if not isinstance( other, ParserElement ):
2025 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2026 SyntaxWarning, stacklevel=2)
2027 return None
2028 return Each( [ self, other ] )
2029
2031 """
2032 Implementation of & operator when left operand is not a C{L{ParserElement}}
2033 """
2034 if isinstance( other, basestring ):
2035 other = ParserElement._literalStringClass( other )
2036 if not isinstance( other, ParserElement ):
2037 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2038 SyntaxWarning, stacklevel=2)
2039 return None
2040 return other & self
2041
2043 """
2044 Implementation of ~ operator - returns C{L{NotAny}}
2045 """
2046 return NotAny( self )
2047
2049 """
2050 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
2051
2052 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
2053 passed as C{True}.
2054
2055 If C{name} is omitted, same as calling C{L{copy}}.
2056
2057 Example::
2058 # these are equivalent
2059 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2060 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
2061 """
2062 if name is not None:
2063 return self.setResultsName(name)
2064 else:
2065 return self.copy()
2066
2068 """
2069 Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2070 cluttering up returned output.
2071 """
2072 return Suppress( self )
2073
2075 """
2076 Disables the skipping of whitespace before matching the characters in the
2077 C{ParserElement}'s defined pattern. This is normally only used internally by
2078 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2079 """
2080 self.skipWhitespace = False
2081 return self
2082
2084 """
2085 Overrides the default whitespace chars
2086 """
2087 self.skipWhitespace = True
2088 self.whiteChars = chars
2089 self.copyDefaultWhiteChars = False
2090 return self
2091
2093 """
2094 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2095 Must be called before C{parseString} when the input grammar contains elements that
2096 match C{<TAB>} characters.
2097 """
2098 self.keepTabs = True
2099 return self
2100
2102 """
2103 Define expression to be ignored (e.g., comments) while doing pattern
2104 matching; may be called repeatedly, to define multiple comment or other
2105 ignorable patterns.
2106
2107 Example::
2108 patt = OneOrMore(Word(alphas))
2109 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2110
2111 patt.ignore(cStyleComment)
2112 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2113 """
2114 if isinstance(other, basestring):
2115 other = Suppress(other)
2116
2117 if isinstance( other, Suppress ):
2118 if other not in self.ignoreExprs:
2119 self.ignoreExprs.append(other)
2120 else:
2121 self.ignoreExprs.append( Suppress( other.copy() ) )
2122 return self
2123
2124 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2125 """
2126 Enable display of debugging messages while doing pattern matching.
2127 """
2128 self.debugActions = (startAction or _defaultStartDebugAction,
2129 successAction or _defaultSuccessDebugAction,
2130 exceptionAction or _defaultExceptionDebugAction)
2131 self.debug = True
2132 return self
2133
2135 """
2136 Enable display of debugging messages while doing pattern matching.
2137 Set C{flag} to True to enable, False to disable.
2138
2139 Example::
2140 wd = Word(alphas).setName("alphaword")
2141 integer = Word(nums).setName("numword")
2142 term = wd | integer
2143
2144 # turn on debugging for wd
2145 wd.setDebug()
2146
2147 OneOrMore(term).parseString("abc 123 xyz 890")
2148
2149 prints::
2150 Match alphaword at loc 0(1,1)
2151 Matched alphaword -> ['abc']
2152 Match alphaword at loc 3(1,4)
2153 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2154 Match alphaword at loc 7(1,8)
2155 Matched alphaword -> ['xyz']
2156 Match alphaword at loc 11(1,12)
2157 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2158 Match alphaword at loc 15(1,16)
2159 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2160
2161 The output shown is that produced by the default debug actions - custom debug actions can be
2162 specified using L{setDebugActions}. Prior to attempting
2163 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2164 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2165 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2166 which makes debugging and exception messages easier to understand - for instance, the default
2167 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2168 """
2169 if flag:
2170 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2171 else:
2172 self.debug = False
2173 return self
2174
2177
2180
2182 self.streamlined = True
2183 self.strRepr = None
2184 return self
2185
2188
2189 - def validate( self, validateTrace=[] ):
2190 """
2191 Check defined expressions for valid structure, check for infinite recursive definitions.
2192 """
2193 self.checkRecursion( [] )
2194
2195 - def parseFile( self, file_or_filename, parseAll=False ):
2196 """
2197 Execute the parse expression on the given file or filename.
2198 If a filename is specified (instead of a file object),
2199 the entire file is opened, read, and closed before parsing.
2200 """
2201 try:
2202 file_contents = file_or_filename.read()
2203 except AttributeError:
2204 with open(file_or_filename, "r") as f:
2205 file_contents = f.read()
2206 try:
2207 return self.parseString(file_contents, parseAll)
2208 except ParseBaseException as exc:
2209 if ParserElement.verbose_stacktrace:
2210 raise
2211 else:
2212
2213 raise exc
2214
2216 if isinstance(other, ParserElement):
2217 return self is other or vars(self) == vars(other)
2218 elif isinstance(other, basestring):
2219 return self.matches(other)
2220 else:
2221 return super(ParserElement,self)==other
2222
2224 return not (self == other)
2225
2227 return hash(id(self))
2228
2230 return self == other
2231
2233 return not (self == other)
2234
2235 - def matches(self, testString, parseAll=True):
2236 """
2237 Method for quick testing of a parser against a test string. Good for simple
2238 inline microtests of sub expressions while building up larger parser.
2239
2240 Parameters:
2241 - testString - to test against this expression for a match
2242 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2243
2244 Example::
2245 expr = Word(nums)
2246 assert expr.matches("100")
2247 """
2248 try:
2249 self.parseString(_ustr(testString), parseAll=parseAll)
2250 return True
2251 except ParseBaseException:
2252 return False
2253
2254 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2255 """
2256 Execute the parse expression on a series of test strings, showing each
2257 test, the parsed results or where the parse failed. Quick and easy way to
2258 run a parse expression against a list of sample strings.
2259
2260 Parameters:
2261 - tests - a list of separate test strings, or a multiline string of test strings
2262 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2263 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2264 string; pass None to disable comment filtering
2265 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2266 if False, only dump nested list
2267 - printResults - (default=C{True}) prints test output to stdout
2268 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2269
2270 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2271 (or failed if C{failureTests} is True), and the results contain a list of lines of each
2272 test's output
2273
2274 Example::
2275 number_expr = pyparsing_common.number.copy()
2276
2277 result = number_expr.runTests('''
2278 # unsigned integer
2279 100
2280 # negative integer
2281 -100
2282 # float with scientific notation
2283 6.02e23
2284 # integer with scientific notation
2285 1e-12
2286 ''')
2287 print("Success" if result[0] else "Failed!")
2288
2289 result = number_expr.runTests('''
2290 # stray character
2291 100Z
2292 # missing leading digit before '.'
2293 -.100
2294 # too many '.'
2295 3.14.159
2296 ''', failureTests=True)
2297 print("Success" if result[0] else "Failed!")
2298 prints::
2299 # unsigned integer
2300 100
2301 [100]
2302
2303 # negative integer
2304 -100
2305 [-100]
2306
2307 # float with scientific notation
2308 6.02e23
2309 [6.02e+23]
2310
2311 # integer with scientific notation
2312 1e-12
2313 [1e-12]
2314
2315 Success
2316
2317 # stray character
2318 100Z
2319 ^
2320 FAIL: Expected end of text (at char 3), (line:1, col:4)
2321
2322 # missing leading digit before '.'
2323 -.100
2324 ^
2325 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2326
2327 # too many '.'
2328 3.14.159
2329 ^
2330 FAIL: Expected end of text (at char 4), (line:1, col:5)
2331
2332 Success
2333
2334 Each test string must be on a single line. If you want to test a string that spans multiple
2335 lines, create a test like this::
2336
2337 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2338
2339 (Note that this is a raw string literal, you must include the leading 'r'.)
2340 """
2341 if isinstance(tests, basestring):
2342 tests = list(map(str.strip, tests.rstrip().splitlines()))
2343 if isinstance(comment, basestring):
2344 comment = Literal(comment)
2345 allResults = []
2346 comments = []
2347 success = True
2348 for t in tests:
2349 if comment is not None and comment.matches(t, False) or comments and not t:
2350 comments.append(t)
2351 continue
2352 if not t:
2353 continue
2354 out = ['\n'.join(comments), t]
2355 comments = []
2356 try:
2357 t = t.replace(r'\n','\n')
2358 result = self.parseString(t, parseAll=parseAll)
2359 out.append(result.dump(full=fullDump))
2360 success = success and not failureTests
2361 except ParseBaseException as pe:
2362 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2363 if '\n' in t:
2364 out.append(line(pe.loc, t))
2365 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2366 else:
2367 out.append(' '*pe.loc + '^' + fatal)
2368 out.append("FAIL: " + str(pe))
2369 success = success and failureTests
2370 result = pe
2371 except Exception as exc:
2372 out.append("FAIL-EXCEPTION: " + str(exc))
2373 success = success and failureTests
2374 result = exc
2375
2376 if printResults:
2377 if fullDump:
2378 out.append('')
2379 print('\n'.join(out))
2380
2381 allResults.append((t, result))
2382
2383 return success, allResults
2384
2385
2386 -class Token(ParserElement):
2387 """
2388 Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2389 """
2392
2393
2394 -class Empty(Token):
2395 """
2396 An empty token, will always match.
2397 """
2399 super(Empty,self).__init__()
2400 self.name = "Empty"
2401 self.mayReturnEmpty = True
2402 self.mayIndexError = False
2403
2406 """
2407 A token that will never match.
2408 """
2410 super(NoMatch,self).__init__()
2411 self.name = "NoMatch"
2412 self.mayReturnEmpty = True
2413 self.mayIndexError = False
2414 self.errmsg = "Unmatchable token"
2415
2416 - def parseImpl( self, instring, loc, doActions=True ):
2418
2421 """
2422 Token to exactly match a specified string.
2423
2424 Example::
2425 Literal('blah').parseString('blah') # -> ['blah']
2426 Literal('blah').parseString('blahfooblah') # -> ['blah']
2427 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2428
2429 For case-insensitive matching, use L{CaselessLiteral}.
2430
2431 For keyword matching (force word break before and after the matched string),
2432 use L{Keyword} or L{CaselessKeyword}.
2433 """
2435 super(Literal,self).__init__()
2436 self.match = matchString
2437 self.matchLen = len(matchString)
2438 try:
2439 self.firstMatchChar = matchString[0]
2440 except IndexError:
2441 warnings.warn("null string passed to Literal; use Empty() instead",
2442 SyntaxWarning, stacklevel=2)
2443 self.__class__ = Empty
2444 self.name = '"%s"' % _ustr(self.match)
2445 self.errmsg = "Expected " + self.name
2446 self.mayReturnEmpty = False
2447 self.mayIndexError = False
2448
2449
2450
2451
2452
2453 - def parseImpl( self, instring, loc, doActions=True ):
2454 if (instring[loc] == self.firstMatchChar and
2455 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2456 return loc+self.matchLen, self.match
2457 raise ParseException(instring, loc, self.errmsg, self)
2458 _L = Literal
2459 ParserElement._literalStringClass = Literal
2462 """
2463 Token to exactly match a specified string as a keyword, that is, it must be
2464 immediately followed by a non-keyword character. Compare with C{L{Literal}}:
2465 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2466 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2467 Accepts two optional constructor arguments in addition to the keyword string:
2468 - C{identChars} is a string of characters that would be valid identifier characters,
2469 defaulting to all alphanumerics + "_" and "$"
2470 - C{caseless} allows case-insensitive matching, default is C{False}.
2471
2472 Example::
2473 Keyword("start").parseString("start") # -> ['start']
2474 Keyword("start").parseString("starting") # -> Exception
2475
2476 For case-insensitive matching, use L{CaselessKeyword}.
2477 """
2478 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2479
2480 - def __init__( self, matchString, identChars=None, caseless=False ):
2481 super(Keyword,self).__init__()
2482 if identChars is None:
2483 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2484 self.match = matchString
2485 self.matchLen = len(matchString)
2486 try:
2487 self.firstMatchChar = matchString[0]
2488 except IndexError:
2489 warnings.warn("null string passed to Keyword; use Empty() instead",
2490 SyntaxWarning, stacklevel=2)
2491 self.name = '"%s"' % self.match
2492 self.errmsg = "Expected " + self.name
2493 self.mayReturnEmpty = False
2494 self.mayIndexError = False
2495 self.caseless = caseless
2496 if caseless:
2497 self.caselessmatch = matchString.upper()
2498 identChars = identChars.upper()
2499 self.identChars = set(identChars)
2500
2501 - def parseImpl( self, instring, loc, doActions=True ):
2502 if self.caseless:
2503 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2504 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2505 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2506 return loc+self.matchLen, self.match
2507 else:
2508 if (instring[loc] == self.firstMatchChar and
2509 (self.matchLen==1 or instring.startswith(self.match,loc)) and
2510 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2511 (loc == 0 or instring[loc-1] not in self.identChars) ):
2512 return loc+self.matchLen, self.match
2513 raise ParseException(instring, loc, self.errmsg, self)
2514
2519
2520 @staticmethod
2525
2527 """
2528 Token to match a specified string, ignoring case of letters.
2529 Note: the matched results will always be in the case of the given
2530 match string, NOT the case of the input text.
2531
2532 Example::
2533 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2534
2535 (Contrast with example for L{CaselessKeyword}.)
2536 """
2538 super(CaselessLiteral,self).__init__( matchString.upper() )
2539
2540 self.returnString = matchString
2541 self.name = "'%s'" % self.returnString
2542 self.errmsg = "Expected " + self.name
2543
2544 - def parseImpl( self, instring, loc, doActions=True ):
2545 if instring[ loc:loc+self.matchLen ].upper() == self.match:
2546 return loc+self.matchLen, self.returnString
2547 raise ParseException(instring, loc, self.errmsg, self)
2548
2550 """
2551 Caseless version of L{Keyword}.
2552
2553 Example::
2554 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2555
2556 (Contrast with example for L{CaselessLiteral}.)
2557 """
2558 - def __init__( self, matchString, identChars=None ):
2560
2561 - def parseImpl( self, instring, loc, doActions=True ):
2562 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2563 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2564 return loc+self.matchLen, self.match
2565 raise ParseException(instring, loc, self.errmsg, self)
2566
2568 """
2569 A variation on L{Literal} which matches "close" matches, that is,
2570 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2571 - C{match_string} - string to be matched
2572 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2573
2574 The results from a successful parse will contain the matched text from the input string and the following named results:
2575 - C{mismatches} - a list of the positions within the match_string where mismatches were found
2576 - C{original} - the original match_string used to compare against the input string
2577
2578 If C{mismatches} is an empty list, then the match was an exact match.
2579
2580 Example::
2581 patt = CloseMatch("ATCATCGAATGGA")
2582 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2583 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2584
2585 # exact match
2586 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2587
2588 # close match allowing up to 2 mismatches
2589 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2590 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2591 """
2592 - def __init__(self, match_string, maxMismatches=1):
2593 super(CloseMatch,self).__init__()
2594 self.name = match_string
2595 self.match_string = match_string
2596 self.maxMismatches = maxMismatches
2597 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2598 self.mayIndexError = False
2599 self.mayReturnEmpty = False
2600
2601 - def parseImpl( self, instring, loc, doActions=True ):
2602 start = loc
2603 instrlen = len(instring)
2604 maxloc = start + len(self.match_string)
2605
2606 if maxloc <= instrlen:
2607 match_string = self.match_string
2608 match_stringloc = 0
2609 mismatches = []
2610 maxMismatches = self.maxMismatches
2611
2612 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2613 src,mat = s_m
2614 if src != mat:
2615 mismatches.append(match_stringloc)
2616 if len(mismatches) > maxMismatches:
2617 break
2618 else:
2619 loc = match_stringloc + 1
2620 results = ParseResults([instring[start:loc]])
2621 results['original'] = self.match_string
2622 results['mismatches'] = mismatches
2623 return loc, results
2624
2625 raise ParseException(instring, loc, self.errmsg, self)
2626
2627
2628 -class Word(Token):
2629 """
2630 Token for matching words composed of allowed character sets.
2631 Defined with string containing all allowed initial characters,
2632 an optional string containing allowed body characters (if omitted,
2633 defaults to the initial character set), and an optional minimum,
2634 maximum, and/or exact length. The default value for C{min} is 1 (a
2635 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2636 are 0, meaning no maximum or exact length restriction. An optional
2637 C{excludeChars} parameter can list characters that might be found in
2638 the input C{bodyChars} string; useful to define a word of all printables
2639 except for one or two characters, for instance.
2640
2641 L{srange} is useful for defining custom character set strings for defining
2642 C{Word} expressions, using range notation from regular expression character sets.
2643
2644 A common mistake is to use C{Word} to match a specific literal string, as in
2645 C{Word("Address")}. Remember that C{Word} uses the string argument to define
2646 I{sets} of matchable characters. This expression would match "Add", "AAA",
2647 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2648 To match an exact literal string, use L{Literal} or L{Keyword}.
2649
2650 pyparsing includes helper strings for building Words:
2651 - L{alphas}
2652 - L{nums}
2653 - L{alphanums}
2654 - L{hexnums}
2655 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2656 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2657 - L{printables} (any non-whitespace character)
2658
2659 Example::
2660 # a word composed of digits
2661 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2662
2663 # a word with a leading capital, and zero or more lowercase
2664 capital_word = Word(alphas.upper(), alphas.lower())
2665
2666 # hostnames are alphanumeric, with leading alpha, and '-'
2667 hostname = Word(alphas, alphanums+'-')
2668
2669 # roman numeral (not a strict parser, accepts invalid mix of characters)
2670 roman = Word("IVXLCDM")
2671
2672 # any string of non-whitespace characters, except for ','
2673 csv_value = Word(printables, excludeChars=",")
2674 """
2675 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2676 super(Word,self).__init__()
2677 if excludeChars:
2678 initChars = ''.join(c for c in initChars if c not in excludeChars)
2679 if bodyChars:
2680 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2681 self.initCharsOrig = initChars
2682 self.initChars = set(initChars)
2683 if bodyChars :
2684 self.bodyCharsOrig = bodyChars
2685 self.bodyChars = set(bodyChars)
2686 else:
2687 self.bodyCharsOrig = initChars
2688 self.bodyChars = set(initChars)
2689
2690 self.maxSpecified = max > 0
2691
2692 if min < 1:
2693 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2694
2695 self.minLen = min
2696
2697 if max > 0:
2698 self.maxLen = max
2699 else:
2700 self.maxLen = _MAX_INT
2701
2702 if exact > 0:
2703 self.maxLen = exact
2704 self.minLen = exact
2705
2706 self.name = _ustr(self)
2707 self.errmsg = "Expected " + self.name
2708 self.mayIndexError = False
2709 self.asKeyword = asKeyword
2710
2711 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2712 if self.bodyCharsOrig == self.initCharsOrig:
2713 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2714 elif len(self.initCharsOrig) == 1:
2715 self.reString = "%s[%s]*" % \
2716 (re.escape(self.initCharsOrig),
2717 _escapeRegexRangeChars(self.bodyCharsOrig),)
2718 else:
2719 self.reString = "[%s][%s]*" % \
2720 (_escapeRegexRangeChars(self.initCharsOrig),
2721 _escapeRegexRangeChars(self.bodyCharsOrig),)
2722 if self.asKeyword:
2723 self.reString = r"\b"+self.reString+r"\b"
2724 try:
2725 self.re = re.compile( self.reString )
2726 except Exception:
2727 self.re = None
2728
2729 - def parseImpl( self, instring, loc, doActions=True ):
2730 if self.re:
2731 result = self.re.match(instring,loc)
2732 if not result:
2733 raise ParseException(instring, loc, self.errmsg, self)
2734
2735 loc = result.end()
2736 return loc, result.group()
2737
2738 if not(instring[ loc ] in self.initChars):
2739 raise ParseException(instring, loc, self.errmsg, self)
2740
2741 start = loc
2742 loc += 1
2743 instrlen = len(instring)
2744 bodychars = self.bodyChars
2745 maxloc = start + self.maxLen
2746 maxloc = min( maxloc, instrlen )
2747 while loc < maxloc and instring[loc] in bodychars:
2748 loc += 1
2749
2750 throwException = False
2751 if loc - start < self.minLen:
2752 throwException = True
2753 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2754 throwException = True
2755 if self.asKeyword:
2756 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2757 throwException = True
2758
2759 if throwException:
2760 raise ParseException(instring, loc, self.errmsg, self)
2761
2762 return loc, instring[start:loc]
2763
2765 try:
2766 return super(Word,self).__str__()
2767 except Exception:
2768 pass
2769
2770
2771 if self.strRepr is None:
2772
2773 def charsAsStr(s):
2774 if len(s)>4:
2775 return s[:4]+"..."
2776 else:
2777 return s
2778
2779 if ( self.initCharsOrig != self.bodyCharsOrig ):
2780 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2781 else:
2782 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2783
2784 return self.strRepr
2785
2786
2787 -class Char(Word):
2788 """
2789 A short-cut class for defining C{Word(characters, exact=1)},
2790 when defining a match of any single character in a string of characters.
2791 """
2793 super(Char, self).__init__(charset, exact=1)
2794 self.reString = "[%s]" % _escapeRegexRangeChars(self.initCharsOrig)
2795 self.re = re.compile( self.reString )
2796
2797
2798 -class Regex(Token):
2799 r"""
2800 Token for matching strings that match a given regular expression.
2801 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2802 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2803 named parse results.
2804
2805 Example::
2806 realnum = Regex(r"[+-]?\d+\.\d*")
2807 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2808 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2809 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2810
2811 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
2812 print(make_html.transformString("h1:main title:"))
2813 # prints "<h1>main title</h1>"
2814 """
2815 compiledREtype = type(re.compile("[A-Z]"))
2816 - def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
2817 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2818 super(Regex,self).__init__()
2819
2820 if isinstance(pattern, basestring):
2821 if not pattern:
2822 warnings.warn("null string passed to Regex; use Empty() instead",
2823 SyntaxWarning, stacklevel=2)
2824
2825 self.pattern = pattern
2826 self.flags = flags
2827
2828 try:
2829 self.re = re.compile(self.pattern, self.flags)
2830 self.reString = self.pattern
2831 except sre_constants.error:
2832 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2833 SyntaxWarning, stacklevel=2)
2834 raise
2835
2836 elif isinstance(pattern, Regex.compiledREtype):
2837 self.re = pattern
2838 self.pattern = \
2839 self.reString = str(pattern)
2840 self.flags = flags
2841
2842 else:
2843 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2844
2845 self.name = _ustr(self)
2846 self.errmsg = "Expected " + self.name
2847 self.mayIndexError = False
2848 self.mayReturnEmpty = True
2849 self.asGroupList = asGroupList
2850 self.asMatch = asMatch
2851
2852 - def parseImpl( self, instring, loc, doActions=True ):
2853 result = self.re.match(instring,loc)
2854 if not result:
2855 raise ParseException(instring, loc, self.errmsg, self)
2856
2857 loc = result.end()
2858 d = result.groupdict()
2859 if self.asMatch:
2860 ret = result
2861 elif self.asGroupList:
2862 ret = result.groups()
2863 else:
2864 ret = ParseResults(result.group())
2865 if d:
2866 for k in d:
2867 ret[k] = d[k]
2868 return loc,ret
2869
2871 try:
2872 return super(Regex,self).__str__()
2873 except Exception:
2874 pass
2875
2876 if self.strRepr is None:
2877 self.strRepr = "Re:(%s)" % repr(self.pattern)
2878
2879 return self.strRepr
2880
2881 - def sub(self, repl):
2882 """
2883 Return Regex with an attached parse action to transform the parsed
2884 result as if called using C{re.sub(expr, repl, string)}.
2885 """
2886 if self.asGroupList:
2887 warnings.warn("cannot use sub() with Regex(asGroupList=True)",
2888 SyntaxWarning, stacklevel=2)
2889 raise SyntaxError()
2890
2891 if self.asMatch and callable(repl):
2892 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
2893 SyntaxWarning, stacklevel=2)
2894 raise SyntaxError()
2895
2896 if self.asMatch:
2897 def pa(tokens):
2898 return tokens[0].expand(repl)
2899 else:
2900 def pa(tokens):
2901 return self.re.sub(repl, tokens[0])
2902 return self.addParseAction(pa)
2903
2905 r"""
2906 Token for matching strings that are delimited by quoting characters.
2907
2908 Defined with the following parameters:
2909 - quoteChar - string of one or more characters defining the quote delimiting string
2910 - escChar - character to escape quotes, typically backslash (default=C{None})
2911 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2912 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2913 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2914 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2915 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2916
2917 Example::
2918 qs = QuotedString('"')
2919 print(qs.searchString('lsjdf "This is the quote" sldjf'))
2920 complex_qs = QuotedString('{{', endQuoteChar='}}')
2921 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2922 sql_qs = QuotedString('"', escQuote='""')
2923 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2924 prints::
2925 [['This is the quote']]
2926 [['This is the "quote"']]
2927 [['This is the quote with "embedded" quotes']]
2928 """
2929 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2930 super(QuotedString,self).__init__()
2931
2932
2933 quoteChar = quoteChar.strip()
2934 if not quoteChar:
2935 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2936 raise SyntaxError()
2937
2938 if endQuoteChar is None:
2939 endQuoteChar = quoteChar
2940 else:
2941 endQuoteChar = endQuoteChar.strip()
2942 if not endQuoteChar:
2943 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2944 raise SyntaxError()
2945
2946 self.quoteChar = quoteChar
2947 self.quoteCharLen = len(quoteChar)
2948 self.firstQuoteChar = quoteChar[0]
2949 self.endQuoteChar = endQuoteChar
2950 self.endQuoteCharLen = len(endQuoteChar)
2951 self.escChar = escChar
2952 self.escQuote = escQuote
2953 self.unquoteResults = unquoteResults
2954 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2955
2956 if multiline:
2957 self.flags = re.MULTILINE | re.DOTALL
2958 self.pattern = r'%s(?:[^%s%s]' % \
2959 ( re.escape(self.quoteChar),
2960 _escapeRegexRangeChars(self.endQuoteChar[0]),
2961 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2962 else:
2963 self.flags = 0
2964 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2965 ( re.escape(self.quoteChar),
2966 _escapeRegexRangeChars(self.endQuoteChar[0]),
2967 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2968 if len(self.endQuoteChar) > 1:
2969 self.pattern += (
2970 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2971 _escapeRegexRangeChars(self.endQuoteChar[i]))
2972 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2973 )
2974 if escQuote:
2975 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2976 if escChar:
2977 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2978 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2979 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2980
2981 try:
2982 self.re = re.compile(self.pattern, self.flags)
2983 self.reString = self.pattern
2984 except sre_constants.error:
2985 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2986 SyntaxWarning, stacklevel=2)
2987 raise
2988
2989 self.name = _ustr(self)
2990 self.errmsg = "Expected " + self.name
2991 self.mayIndexError = False
2992 self.mayReturnEmpty = True
2993
2994 - def parseImpl( self, instring, loc, doActions=True ):
2995 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2996 if not result:
2997 raise ParseException(instring, loc, self.errmsg, self)
2998
2999 loc = result.end()
3000 ret = result.group()
3001
3002 if self.unquoteResults:
3003
3004
3005 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
3006
3007 if isinstance(ret,basestring):
3008
3009 if '\\' in ret and self.convertWhitespaceEscapes:
3010 ws_map = {
3011 r'\t' : '\t',
3012 r'\n' : '\n',
3013 r'\f' : '\f',
3014 r'\r' : '\r',
3015 }
3016 for wslit,wschar in ws_map.items():
3017 ret = ret.replace(wslit, wschar)
3018
3019
3020 if self.escChar:
3021 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3022
3023
3024 if self.escQuote:
3025 ret = ret.replace(self.escQuote, self.endQuoteChar)
3026
3027 return loc, ret
3028
3030 try:
3031 return super(QuotedString,self).__str__()
3032 except Exception:
3033 pass
3034
3035 if self.strRepr is None:
3036 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3037
3038 return self.strRepr
3039
3042 """
3043 Token for matching words composed of characters I{not} in a given set (will
3044 include whitespace in matched characters if not listed in the provided exclusion set - see example).
3045 Defined with string containing all disallowed characters, and an optional
3046 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
3047 minimum value < 1 is not valid); the default values for C{max} and C{exact}
3048 are 0, meaning no maximum or exact length restriction.
3049
3050 Example::
3051 # define a comma-separated-value as anything that is not a ','
3052 csv_value = CharsNotIn(',')
3053 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3054 prints::
3055 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3056 """
3057 - def __init__( self, notChars, min=1, max=0, exact=0 ):
3058 super(CharsNotIn,self).__init__()
3059 self.skipWhitespace = False
3060 self.notChars = notChars
3061
3062 if min < 1:
3063 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
3064
3065 self.minLen = min
3066
3067 if max > 0:
3068 self.maxLen = max
3069 else:
3070 self.maxLen = _MAX_INT
3071
3072 if exact > 0:
3073 self.maxLen = exact
3074 self.minLen = exact
3075
3076 self.name = _ustr(self)
3077 self.errmsg = "Expected " + self.name
3078 self.mayReturnEmpty = ( self.minLen == 0 )
3079 self.mayIndexError = False
3080
3081 - def parseImpl( self, instring, loc, doActions=True ):
3082 if instring[loc] in self.notChars:
3083 raise ParseException(instring, loc, self.errmsg, self)
3084
3085 start = loc
3086 loc += 1
3087 notchars = self.notChars
3088 maxlen = min( start+self.maxLen, len(instring) )
3089 while loc < maxlen and \
3090 (instring[loc] not in notchars):
3091 loc += 1
3092
3093 if loc - start < self.minLen:
3094 raise ParseException(instring, loc, self.errmsg, self)
3095
3096 return loc, instring[start:loc]
3097
3099 try:
3100 return super(CharsNotIn, self).__str__()
3101 except Exception:
3102 pass
3103
3104 if self.strRepr is None:
3105 if len(self.notChars) > 4:
3106 self.strRepr = "!W:(%s...)" % self.notChars[:4]
3107 else:
3108 self.strRepr = "!W:(%s)" % self.notChars
3109
3110 return self.strRepr
3111
3113 """
3114 Special matching class for matching whitespace. Normally, whitespace is ignored
3115 by pyparsing grammars. This class is included when some whitespace structures
3116 are significant. Define with a string containing the whitespace characters to be
3117 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
3118 as defined for the C{L{Word}} class.
3119 """
3120 whiteStrs = {
3121 " " : "<SPC>",
3122 "\t": "<TAB>",
3123 "\n": "<LF>",
3124 "\r": "<CR>",
3125 "\f": "<FF>",
3126 }
3127 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3128 super(White,self).__init__()
3129 self.matchWhite = ws
3130 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3131
3132 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3133 self.mayReturnEmpty = True
3134 self.errmsg = "Expected " + self.name
3135
3136 self.minLen = min
3137
3138 if max > 0:
3139 self.maxLen = max
3140 else:
3141 self.maxLen = _MAX_INT
3142
3143 if exact > 0:
3144 self.maxLen = exact
3145 self.minLen = exact
3146
3147 - def parseImpl( self, instring, loc, doActions=True ):
3148 if not(instring[ loc ] in self.matchWhite):
3149 raise ParseException(instring, loc, self.errmsg, self)
3150 start = loc
3151 loc += 1
3152 maxloc = start + self.maxLen
3153 maxloc = min( maxloc, len(instring) )
3154 while loc < maxloc and instring[loc] in self.matchWhite:
3155 loc += 1
3156
3157 if loc - start < self.minLen:
3158 raise ParseException(instring, loc, self.errmsg, self)
3159
3160 return loc, instring[start:loc]
3161
3165 super(_PositionToken,self).__init__()
3166 self.name=self.__class__.__name__
3167 self.mayReturnEmpty = True
3168 self.mayIndexError = False
3169
3171 """
3172 Token to advance to a specific column of input text; useful for tabular report scraping.
3173 """
3177
3179 if col(loc,instring) != self.col:
3180 instrlen = len(instring)
3181 if self.ignoreExprs:
3182 loc = self._skipIgnorables( instring, loc )
3183 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3184 loc += 1
3185 return loc
3186
3187 - def parseImpl( self, instring, loc, doActions=True ):
3188 thiscol = col( loc, instring )
3189 if thiscol > self.col:
3190 raise ParseException( instring, loc, "Text not in expected column", self )
3191 newloc = loc + self.col - thiscol
3192 ret = instring[ loc: newloc ]
3193 return newloc, ret
3194
3197 """
3198 Matches if current position is at the beginning of a line within the parse string
3199
3200 Example::
3201
3202 test = '''\
3203 AAA this line
3204 AAA and this line
3205 AAA but not this one
3206 B AAA and definitely not this one
3207 '''
3208
3209 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3210 print(t)
3211
3212 Prints::
3213 ['AAA', ' this line']
3214 ['AAA', ' and this line']
3215
3216 """
3220
3221 - def parseImpl( self, instring, loc, doActions=True ):
3222 if col(loc, instring) == 1:
3223 return loc, []
3224 raise ParseException(instring, loc, self.errmsg, self)
3225
3227 """
3228 Matches if current position is at the end of a line within the parse string
3229 """
3234
3235 - def parseImpl( self, instring, loc, doActions=True ):
3236 if loc<len(instring):
3237 if instring[loc] == "\n":
3238 return loc+1, "\n"
3239 else:
3240 raise ParseException(instring, loc, self.errmsg, self)
3241 elif loc == len(instring):
3242 return loc+1, []
3243 else:
3244 raise ParseException(instring, loc, self.errmsg, self)
3245
3247 """
3248 Matches if current position is at the beginning of the parse string
3249 """
3253
3254 - def parseImpl( self, instring, loc, doActions=True ):
3255 if loc != 0:
3256
3257 if loc != self.preParse( instring, 0 ):
3258 raise ParseException(instring, loc, self.errmsg, self)
3259 return loc, []
3260
3262 """
3263 Matches if current position is at the end of the parse string
3264 """
3268
3269 - def parseImpl( self, instring, loc, doActions=True ):
3270 if loc < len(instring):
3271 raise ParseException(instring, loc, self.errmsg, self)
3272 elif loc == len(instring):
3273 return loc+1, []
3274 elif loc > len(instring):
3275 return loc, []
3276 else:
3277 raise ParseException(instring, loc, self.errmsg, self)
3278
3280 """
3281 Matches if the current position is at the beginning of a Word, and
3282 is not preceded by any character in a given set of C{wordChars}
3283 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3284 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3285 the string being parsed, or at the beginning of a line.
3286 """
3288 super(WordStart,self).__init__()
3289 self.wordChars = set(wordChars)
3290 self.errmsg = "Not at the start of a word"
3291
3292 - def parseImpl(self, instring, loc, doActions=True ):
3293 if loc != 0:
3294 if (instring[loc-1] in self.wordChars or
3295 instring[loc] not in self.wordChars):
3296 raise ParseException(instring, loc, self.errmsg, self)
3297 return loc, []
3298
3300 """
3301 Matches if the current position is at the end of a Word, and
3302 is not followed by any character in a given set of C{wordChars}
3303 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3304 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3305 the string being parsed, or at the end of a line.
3306 """
3308 super(WordEnd,self).__init__()
3309 self.wordChars = set(wordChars)
3310 self.skipWhitespace = False
3311 self.errmsg = "Not at the end of a word"
3312
3313 - def parseImpl(self, instring, loc, doActions=True ):
3314 instrlen = len(instring)
3315 if instrlen>0 and loc<instrlen:
3316 if (instring[loc] in self.wordChars or
3317 instring[loc-1] not in self.wordChars):
3318 raise ParseException(instring, loc, self.errmsg, self)
3319 return loc, []
3320
3323 """
3324 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3325 """
3326 - def __init__( self, exprs, savelist = False ):
3327 super(ParseExpression,self).__init__(savelist)
3328 if isinstance( exprs, _generatorType ):
3329 exprs = list(exprs)
3330
3331 if isinstance( exprs, basestring ):
3332 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3333 elif isinstance( exprs, Iterable ):
3334 exprs = list(exprs)
3335
3336 if all(isinstance(expr, basestring) for expr in exprs):
3337 exprs = map(ParserElement._literalStringClass, exprs)
3338 self.exprs = list(exprs)
3339 else:
3340 try:
3341 self.exprs = list( exprs )
3342 except TypeError:
3343 self.exprs = [ exprs ]
3344 self.callPreparse = False
3345
3347 return self.exprs[i]
3348
3350 self.exprs.append( other )
3351 self.strRepr = None
3352 return self
3353
3355 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3356 all contained expressions."""
3357 self.skipWhitespace = False
3358 self.exprs = [ e.copy() for e in self.exprs ]
3359 for e in self.exprs:
3360 e.leaveWhitespace()
3361 return self
3362
3364 if isinstance( other, Suppress ):
3365 if other not in self.ignoreExprs:
3366 super( ParseExpression, self).ignore( other )
3367 for e in self.exprs:
3368 e.ignore( self.ignoreExprs[-1] )
3369 else:
3370 super( ParseExpression, self).ignore( other )
3371 for e in self.exprs:
3372 e.ignore( self.ignoreExprs[-1] )
3373 return self
3374
3376 try:
3377 return super(ParseExpression,self).__str__()
3378 except Exception:
3379 pass
3380
3381 if self.strRepr is None:
3382 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3383 return self.strRepr
3384
3386 super(ParseExpression,self).streamline()
3387
3388 for e in self.exprs:
3389 e.streamline()
3390
3391
3392
3393
3394 if ( len(self.exprs) == 2 ):
3395 other = self.exprs[0]
3396 if ( isinstance( other, self.__class__ ) and
3397 not(other.parseAction) and
3398 other.resultsName is None and
3399 not other.debug ):
3400 self.exprs = other.exprs[:] + [ self.exprs[1] ]
3401 self.strRepr = None
3402 self.mayReturnEmpty |= other.mayReturnEmpty
3403 self.mayIndexError |= other.mayIndexError
3404
3405 other = self.exprs[-1]
3406 if ( isinstance( other, self.__class__ ) and
3407 not(other.parseAction) and
3408 other.resultsName is None and
3409 not other.debug ):
3410 self.exprs = self.exprs[:-1] + other.exprs[:]
3411 self.strRepr = None
3412 self.mayReturnEmpty |= other.mayReturnEmpty
3413 self.mayIndexError |= other.mayIndexError
3414
3415 self.errmsg = "Expected " + _ustr(self)
3416
3417 return self
3418
3422
3423 - def validate( self, validateTrace=[] ):
3424 tmp = validateTrace[:]+[self]
3425 for e in self.exprs:
3426 e.validate(tmp)
3427 self.checkRecursion( [] )
3428
3433
3434 -class And(ParseExpression):
3435 """
3436 Requires all given C{ParseExpression}s to be found in the given order.
3437 Expressions may be separated by whitespace.
3438 May be constructed using the C{'+'} operator.
3439 May also be constructed using the C{'-'} operator, which will suppress backtracking.
3440
3441 Example::
3442 integer = Word(nums)
3443 name_expr = OneOrMore(Word(alphas))
3444
3445 expr = And([integer("id"),name_expr("name"),integer("age")])
3446 # more easily written as:
3447 expr = integer("id") + name_expr("name") + integer("age")
3448 """
3449
3455
3456 - def __init__( self, exprs, savelist = True ):
3457 super(And,self).__init__(exprs, savelist)
3458 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3459 self.setWhitespaceChars( self.exprs[0].whiteChars )
3460 self.skipWhitespace = self.exprs[0].skipWhitespace
3461 self.callPreparse = True
3462
3463 - def parseImpl( self, instring, loc, doActions=True ):
3464
3465
3466 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3467 errorStop = False
3468 for e in self.exprs[1:]:
3469 if isinstance(e, And._ErrorStop):
3470 errorStop = True
3471 continue
3472 if errorStop:
3473 try:
3474 loc, exprtokens = e._parse( instring, loc, doActions )
3475 except ParseSyntaxException:
3476 raise
3477 except ParseBaseException as pe:
3478 pe.__traceback__ = None
3479 raise ParseSyntaxException._from_exception(pe)
3480 except IndexError:
3481 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3482 else:
3483 loc, exprtokens = e._parse( instring, loc, doActions )
3484 if exprtokens or exprtokens.haskeys():
3485 resultlist += exprtokens
3486 return loc, resultlist
3487
3489 if isinstance( other, basestring ):
3490 other = ParserElement._literalStringClass( other )
3491 return self.append( other )
3492
3494 subRecCheckList = parseElementList[:] + [ self ]
3495 for e in self.exprs:
3496 e.checkRecursion( subRecCheckList )
3497 if not e.mayReturnEmpty:
3498 break
3499
3501 if hasattr(self,"name"):
3502 return self.name
3503
3504 if self.strRepr is None:
3505 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3506
3507 return self.strRepr
3508
3509
3510 -class Or(ParseExpression):
3511 """
3512 Requires that at least one C{ParseExpression} is found.
3513 If two expressions match, the expression that matches the longest string will be used.
3514 May be constructed using the C{'^'} operator.
3515
3516 Example::
3517 # construct Or using '^' operator
3518
3519 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3520 print(number.searchString("123 3.1416 789"))
3521 prints::
3522 [['123'], ['3.1416'], ['789']]
3523 """
3524 - def __init__( self, exprs, savelist = False ):
3525 super(Or,self).__init__(exprs, savelist)
3526 if self.exprs:
3527 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3528 else:
3529 self.mayReturnEmpty = True
3530
3531 - def parseImpl( self, instring, loc, doActions=True ):
3532 maxExcLoc = -1
3533 maxException = None
3534 matches = []
3535 for e in self.exprs:
3536 try:
3537 loc2 = e.tryParse( instring, loc )
3538 except ParseException as err:
3539 err.__traceback__ = None
3540 if err.loc > maxExcLoc:
3541 maxException = err
3542 maxExcLoc = err.loc
3543 except IndexError:
3544 if len(instring) > maxExcLoc:
3545 maxException = ParseException(instring,len(instring),e.errmsg,self)
3546 maxExcLoc = len(instring)
3547 else:
3548
3549 matches.append((loc2, e))
3550
3551 if matches:
3552 matches.sort(key=lambda x: -x[0])
3553 for _,e in matches:
3554 try:
3555 return e._parse( instring, loc, doActions )
3556 except ParseException as err:
3557 err.__traceback__ = None
3558 if err.loc > maxExcLoc:
3559 maxException = err
3560 maxExcLoc = err.loc
3561
3562 if maxException is not None:
3563 maxException.msg = self.errmsg
3564 raise maxException
3565 else:
3566 raise ParseException(instring, loc, "no defined alternatives to match", self)
3567
3568
3570 if isinstance( other, basestring ):
3571 other = ParserElement._literalStringClass( other )
3572 return self.append( other )
3573
3575 if hasattr(self,"name"):
3576 return self.name
3577
3578 if self.strRepr is None:
3579 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3580
3581 return self.strRepr
3582
3584 subRecCheckList = parseElementList[:] + [ self ]
3585 for e in self.exprs:
3586 e.checkRecursion( subRecCheckList )
3587
3590 """
3591 Requires that at least one C{ParseExpression} is found.
3592 If two expressions match, the first one listed is the one that will match.
3593 May be constructed using the C{'|'} operator.
3594
3595 Example::
3596 # construct MatchFirst using '|' operator
3597
3598 # watch the order of expressions to match
3599 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3600 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3601
3602 # put more selective expression first
3603 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3604 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3605 """
3606 - def __init__( self, exprs, savelist = False ):
3607 super(MatchFirst,self).__init__(exprs, savelist)
3608 if self.exprs:
3609 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3610 else:
3611 self.mayReturnEmpty = True
3612
3613 - def parseImpl( self, instring, loc, doActions=True ):
3614 maxExcLoc = -1
3615 maxException = None
3616 for e in self.exprs:
3617 try:
3618 ret = e._parse( instring, loc, doActions )
3619 return ret
3620 except ParseException as err:
3621 if err.loc > maxExcLoc:
3622 maxException = err
3623 maxExcLoc = err.loc
3624 except IndexError:
3625 if len(instring) > maxExcLoc:
3626 maxException = ParseException(instring,len(instring),e.errmsg,self)
3627 maxExcLoc = len(instring)
3628
3629
3630 else:
3631 if maxException is not None:
3632 maxException.msg = self.errmsg
3633 raise maxException
3634 else:
3635 raise ParseException(instring, loc, "no defined alternatives to match", self)
3636
3638 if isinstance( other, basestring ):
3639 other = ParserElement._literalStringClass( other )
3640 return self.append( other )
3641
3643 if hasattr(self,"name"):
3644 return self.name
3645
3646 if self.strRepr is None:
3647 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3648
3649 return self.strRepr
3650
3652 subRecCheckList = parseElementList[:] + [ self ]
3653 for e in self.exprs:
3654 e.checkRecursion( subRecCheckList )
3655
3656
3657 -class Each(ParseExpression):
3658 """
3659 Requires all given C{ParseExpression}s to be found, but in any order.
3660 Expressions may be separated by whitespace.
3661 May be constructed using the C{'&'} operator.
3662
3663 Example::
3664 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3665 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3666 integer = Word(nums)
3667 shape_attr = "shape:" + shape_type("shape")
3668 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3669 color_attr = "color:" + color("color")
3670 size_attr = "size:" + integer("size")
3671
3672 # use Each (using operator '&') to accept attributes in any order
3673 # (shape and posn are required, color and size are optional)
3674 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3675
3676 shape_spec.runTests('''
3677 shape: SQUARE color: BLACK posn: 100, 120
3678 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3679 color:GREEN size:20 shape:TRIANGLE posn:20,40
3680 '''
3681 )
3682 prints::
3683 shape: SQUARE color: BLACK posn: 100, 120
3684 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3685 - color: BLACK
3686 - posn: ['100', ',', '120']
3687 - x: 100
3688 - y: 120
3689 - shape: SQUARE
3690
3691
3692 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3693 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3694 - color: BLUE
3695 - posn: ['50', ',', '80']
3696 - x: 50
3697 - y: 80
3698 - shape: CIRCLE
3699 - size: 50
3700
3701
3702 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3703 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3704 - color: GREEN
3705 - posn: ['20', ',', '40']
3706 - x: 20
3707 - y: 40
3708 - shape: TRIANGLE
3709 - size: 20
3710 """
3711 - def __init__( self, exprs, savelist = True ):
3712 super(Each,self).__init__(exprs, savelist)
3713 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3714 self.skipWhitespace = True
3715 self.initExprGroups = True
3716
3717 - def parseImpl( self, instring, loc, doActions=True ):
3718 if self.initExprGroups:
3719 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3720 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3721 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3722 self.optionals = opt1 + opt2
3723 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3724 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3725 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3726 self.required += self.multirequired
3727 self.initExprGroups = False
3728 tmpLoc = loc
3729 tmpReqd = self.required[:]
3730 tmpOpt = self.optionals[:]
3731 matchOrder = []
3732
3733 keepMatching = True
3734 while keepMatching:
3735 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3736 failed = []
3737 for e in tmpExprs:
3738 try:
3739 tmpLoc = e.tryParse( instring, tmpLoc )
3740 except ParseException:
3741 failed.append(e)
3742 else:
3743 matchOrder.append(self.opt1map.get(id(e),e))
3744 if e in tmpReqd:
3745 tmpReqd.remove(e)
3746 elif e in tmpOpt:
3747 tmpOpt.remove(e)
3748 if len(failed) == len(tmpExprs):
3749 keepMatching = False
3750
3751 if tmpReqd:
3752 missing = ", ".join(_ustr(e) for e in tmpReqd)
3753 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3754
3755
3756 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3757
3758 resultlist = []
3759 for e in matchOrder:
3760 loc,results = e._parse(instring,loc,doActions)
3761 resultlist.append(results)
3762
3763 finalResults = sum(resultlist, ParseResults([]))
3764 return loc, finalResults
3765
3767 if hasattr(self,"name"):
3768 return self.name
3769
3770 if self.strRepr is None:
3771 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3772
3773 return self.strRepr
3774
3776 subRecCheckList = parseElementList[:] + [ self ]
3777 for e in self.exprs:
3778 e.checkRecursion( subRecCheckList )
3779
3782 """
3783 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3784 """
3785 - def __init__( self, expr, savelist=False ):
3786 super(ParseElementEnhance,self).__init__(savelist)
3787 if isinstance( expr, basestring ):
3788 if issubclass(ParserElement._literalStringClass, Token):
3789 expr = ParserElement._literalStringClass(expr)
3790 else:
3791 expr = ParserElement._literalStringClass(Literal(expr))
3792 self.expr = expr
3793 self.strRepr = None
3794 if expr is not None:
3795 self.mayIndexError = expr.mayIndexError
3796 self.mayReturnEmpty = expr.mayReturnEmpty
3797 self.setWhitespaceChars( expr.whiteChars )
3798 self.skipWhitespace = expr.skipWhitespace
3799 self.saveAsList = expr.saveAsList
3800 self.callPreparse = expr.callPreparse
3801 self.ignoreExprs.extend(expr.ignoreExprs)
3802
3803 - def parseImpl( self, instring, loc, doActions=True ):
3804 if self.expr is not None:
3805 return self.expr._parse( instring, loc, doActions, callPreParse=False )
3806 else:
3807 raise ParseException("",loc,self.errmsg,self)
3808
3810 self.skipWhitespace = False
3811 self.expr = self.expr.copy()
3812 if self.expr is not None:
3813 self.expr.leaveWhitespace()
3814 return self
3815
3817 if isinstance( other, Suppress ):
3818 if other not in self.ignoreExprs:
3819 super( ParseElementEnhance, self).ignore( other )
3820 if self.expr is not None:
3821 self.expr.ignore( self.ignoreExprs[-1] )
3822 else:
3823 super( ParseElementEnhance, self).ignore( other )
3824 if self.expr is not None:
3825 self.expr.ignore( self.ignoreExprs[-1] )
3826 return self
3827
3833
3835 if self in parseElementList:
3836 raise RecursiveGrammarException( parseElementList+[self] )
3837 subRecCheckList = parseElementList[:] + [ self ]
3838 if self.expr is not None:
3839 self.expr.checkRecursion( subRecCheckList )
3840
3841 - def validate( self, validateTrace=[] ):
3842 tmp = validateTrace[:]+[self]
3843 if self.expr is not None:
3844 self.expr.validate(tmp)
3845 self.checkRecursion( [] )
3846
3848 try:
3849 return super(ParseElementEnhance,self).__str__()
3850 except Exception:
3851 pass
3852
3853 if self.strRepr is None and self.expr is not None:
3854 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3855 return self.strRepr
3856
3859 """
3860 Lookahead matching of the given parse expression. C{FollowedBy}
3861 does I{not} advance the parsing position within the input string, it only
3862 verifies that the specified parse expression matches at the current
3863 position. C{FollowedBy} always returns a null token list. If any
3864 results names are defined in the lookahead expression, those *will* be
3865 returned for access by name.
3866
3867 Example::
3868 # use FollowedBy to match a label only if it is followed by a ':'
3869 data_word = Word(alphas)
3870 label = data_word + FollowedBy(':')
3871 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3872
3873 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3874 prints::
3875 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3876 """
3880
3881 - def parseImpl( self, instring, loc, doActions=True ):
3882 _, ret = self.expr._parse(instring, loc, doActions=doActions)
3883 del ret[:]
3884 return loc, ret
3885
3888 """
3889 Lookbehind matching of the given parse expression. C{PrecededBy}
3890 does not advance the parsing position within the input string, it only
3891 verifies that the specified parse expression matches prior to the current
3892 position. C{PrecededBy} always returns a null token list, but if
3893 a results name is defined on the given expression, it is returned.
3894
3895 Parameters:
3896 - expr - expression that must match prior to the current parse location
3897 - retreat - (default=C{None}) - (int) maximum number of characters to
3898 lookbehind prior to the current parse location
3899
3900 If the lookbehind expression is a string, Literal, Keyword, or a
3901 Word or CharsNotIn with a specified exact or maximum length, then
3902 the retreat parameter is not required. Otherwise, retreat must be
3903 specified to give a maximum number of characters to look back from
3904 the current parse position for a lookbehind match.
3905
3906 Example::
3907
3908 # VB-style variable names with type prefixes
3909 int_var = PrecededBy("#") + pyparsing_common.identifier
3910 str_var = PrecededBy("$") + pyparsing_common.identifier
3911
3912 """
3913 - def __init__(self, expr, retreat=None):
3914 super(PrecededBy, self).__init__(expr)
3915 self.expr = self.expr().leaveWhitespace()
3916 self.mayReturnEmpty = True
3917 self.mayIndexError = False
3918 self.exact = False
3919 if isinstance(expr, str):
3920 retreat = len(expr)
3921 self.exact = True
3922 elif isinstance(expr, (Literal, Keyword)):
3923 retreat = expr.matchLen
3924 self.exact = True
3925 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
3926 retreat = expr.maxLen
3927 self.exact = True
3928 elif isinstance(expr, _PositionToken):
3929 retreat = 0
3930 self.exact = True
3931 self.retreat = retreat
3932 self.errmsg = "not preceded by " + str(expr)
3933 self.skipWhitespace = False
3934
3935 - def parseImpl(self, instring, loc=0, doActions=True):
3936 if self.exact:
3937 if loc < self.retreat:
3938 raise ParseException(instring, loc, self.errmsg)
3939 start = loc - self.retreat
3940 _, ret = self.expr._parse(instring, start)
3941 else:
3942
3943 test_expr = self.expr + StringEnd()
3944 instring_slice = instring[:loc]
3945 last_expr = ParseException(instring, loc, self.errmsg)
3946 for offset in range(1, min(loc, self.retreat+1)):
3947 try:
3948 _, ret = test_expr._parse(instring_slice, loc-offset)
3949 except ParseBaseException as pbe:
3950 last_expr = pbe
3951 else:
3952 break
3953 else:
3954 raise last_expr
3955
3956 del ret[:]
3957 return loc, ret
3958
3959
3960 -class NotAny(ParseElementEnhance):
3961 """
3962 Lookahead to disallow matching with the given parse expression. C{NotAny}
3963 does I{not} advance the parsing position within the input string, it only
3964 verifies that the specified parse expression does I{not} match at the current
3965 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3966 always returns a null token list. May be constructed using the '~' operator.
3967
3968 Example::
3969 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
3970
3971 # take care not to mistake keywords for identifiers
3972 ident = ~(AND | OR | NOT) + Word(alphas)
3973 boolean_term = Optional(NOT) + ident
3974
3975 # very crude boolean expression - to support parenthesis groups and
3976 # operation hierarchy, use infixNotation
3977 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
3978
3979 # integers that are followed by "." are actually floats
3980 integer = Word(nums) + ~Char(".")
3981 """
3983 super(NotAny,self).__init__(expr)
3984
3985 self.skipWhitespace = False
3986 self.mayReturnEmpty = True
3987 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3988
3989 - def parseImpl( self, instring, loc, doActions=True ):
3993
3995 if hasattr(self,"name"):
3996 return self.name
3997
3998 if self.strRepr is None:
3999 self.strRepr = "~{" + _ustr(self.expr) + "}"
4000
4001 return self.strRepr
4002
4004 - def __init__( self, expr, stopOn=None):
4005 super(_MultipleMatch, self).__init__(expr)
4006 self.saveAsList = True
4007 ender = stopOn
4008 if isinstance(ender, basestring):
4009 ender = ParserElement._literalStringClass(ender)
4010 self.not_ender = ~ender if ender is not None else None
4011
4012 - def parseImpl( self, instring, loc, doActions=True ):
4013 self_expr_parse = self.expr._parse
4014 self_skip_ignorables = self._skipIgnorables
4015 check_ender = self.not_ender is not None
4016 if check_ender:
4017 try_not_ender = self.not_ender.tryParse
4018
4019
4020
4021 if check_ender:
4022 try_not_ender(instring, loc)
4023 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
4024 try:
4025 hasIgnoreExprs = (not not self.ignoreExprs)
4026 while 1:
4027 if check_ender:
4028 try_not_ender(instring, loc)
4029 if hasIgnoreExprs:
4030 preloc = self_skip_ignorables( instring, loc )
4031 else:
4032 preloc = loc
4033 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
4034 if tmptokens or tmptokens.haskeys():
4035 tokens += tmptokens
4036 except (ParseException,IndexError):
4037 pass
4038
4039 return loc, tokens
4040
4042 """
4043 Repetition of one or more of the given expression.
4044
4045 Parameters:
4046 - expr - expression that must match one or more times
4047 - stopOn - (default=C{None}) - expression for a terminating sentinel
4048 (only required if the sentinel would ordinarily match the repetition
4049 expression)
4050
4051 Example::
4052 data_word = Word(alphas)
4053 label = data_word + FollowedBy(':')
4054 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4055
4056 text = "shape: SQUARE posn: upper left color: BLACK"
4057 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4058
4059 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
4060 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4061 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4062
4063 # could also be written as
4064 (attr_expr * (1,)).parseString(text).pprint()
4065 """
4066
4068 if hasattr(self,"name"):
4069 return self.name
4070
4071 if self.strRepr is None:
4072 self.strRepr = "{" + _ustr(self.expr) + "}..."
4073
4074 return self.strRepr
4075
4077 """
4078 Optional repetition of zero or more of the given expression.
4079
4080 Parameters:
4081 - expr - expression that must match zero or more times
4082 - stopOn - (default=C{None}) - expression for a terminating sentinel
4083 (only required if the sentinel would ordinarily match the repetition
4084 expression)
4085
4086 Example: similar to L{OneOrMore}
4087 """
4088 - def __init__( self, expr, stopOn=None):
4091
4092 - def parseImpl( self, instring, loc, doActions=True ):
4097
4099 if hasattr(self,"name"):
4100 return self.name
4101
4102 if self.strRepr is None:
4103 self.strRepr = "[" + _ustr(self.expr) + "]..."
4104
4105 return self.strRepr
4106
4113
4114 _optionalNotMatched = _NullToken()
4116 """
4117 Optional matching of the given expression.
4118
4119 Parameters:
4120 - expr - expression that must match zero or more times
4121 - default (optional) - value to be returned if the optional expression is not found.
4122
4123 Example::
4124 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4125 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4126 zip.runTests('''
4127 # traditional ZIP code
4128 12345
4129
4130 # ZIP+4 form
4131 12101-0001
4132
4133 # invalid ZIP
4134 98765-
4135 ''')
4136 prints::
4137 # traditional ZIP code
4138 12345
4139 ['12345']
4140
4141 # ZIP+4 form
4142 12101-0001
4143 ['12101-0001']
4144
4145 # invalid ZIP
4146 98765-
4147 ^
4148 FAIL: Expected end of text (at char 5), (line:1, col:6)
4149 """
4151 super(Optional,self).__init__( expr, savelist=False )
4152 self.saveAsList = self.expr.saveAsList
4153 self.defaultValue = default
4154 self.mayReturnEmpty = True
4155
4156 - def parseImpl( self, instring, loc, doActions=True ):
4157 try:
4158 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
4159 except (ParseException,IndexError):
4160 if self.defaultValue is not _optionalNotMatched:
4161 if self.expr.resultsName:
4162 tokens = ParseResults([ self.defaultValue ])
4163 tokens[self.expr.resultsName] = self.defaultValue
4164 else:
4165 tokens = [ self.defaultValue ]
4166 else:
4167 tokens = []
4168 return loc, tokens
4169
4171 if hasattr(self,"name"):
4172 return self.name
4173
4174 if self.strRepr is None:
4175 self.strRepr = "[" + _ustr(self.expr) + "]"
4176
4177 return self.strRepr
4178
4179 -class SkipTo(ParseElementEnhance):
4180 """
4181 Token for skipping over all undefined text until the matched expression is found.
4182
4183 Parameters:
4184 - expr - target expression marking the end of the data to be skipped
4185 - include - (default=C{False}) if True, the target expression is also parsed
4186 (the skipped text and target expression are returned as a 2-element list).
4187 - ignore - (default=C{None}) used to define grammars (typically quoted strings and
4188 comments) that might contain false matches to the target expression
4189 - failOn - (default=C{None}) define expressions that are not allowed to be
4190 included in the skipped test; if found before the target expression is found,
4191 the SkipTo is not a match
4192
4193 Example::
4194 report = '''
4195 Outstanding Issues Report - 1 Jan 2000
4196
4197 # | Severity | Description | Days Open
4198 -----+----------+-------------------------------------------+-----------
4199 101 | Critical | Intermittent system crash | 6
4200 94 | Cosmetic | Spelling error on Login ('log|n') | 14
4201 79 | Minor | System slow when running too many reports | 47
4202 '''
4203 integer = Word(nums)
4204 SEP = Suppress('|')
4205 # use SkipTo to simply match everything up until the next SEP
4206 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4207 # - parse action will call token.strip() for each matched token, i.e., the description body
4208 string_data = SkipTo(SEP, ignore=quotedString)
4209 string_data.setParseAction(tokenMap(str.strip))
4210 ticket_expr = (integer("issue_num") + SEP
4211 + string_data("sev") + SEP
4212 + string_data("desc") + SEP
4213 + integer("days_open"))
4214
4215 for tkt in ticket_expr.searchString(report):
4216 print tkt.dump()
4217 prints::
4218 ['101', 'Critical', 'Intermittent system crash', '6']
4219 - days_open: 6
4220 - desc: Intermittent system crash
4221 - issue_num: 101
4222 - sev: Critical
4223 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4224 - days_open: 14
4225 - desc: Spelling error on Login ('log|n')
4226 - issue_num: 94
4227 - sev: Cosmetic
4228 ['79', 'Minor', 'System slow when running too many reports', '47']
4229 - days_open: 47
4230 - desc: System slow when running too many reports
4231 - issue_num: 79
4232 - sev: Minor
4233 """
4234 - def __init__( self, other, include=False, ignore=None, failOn=None ):
4235 super( SkipTo, self ).__init__( other )
4236 self.ignoreExpr = ignore
4237 self.mayReturnEmpty = True
4238 self.mayIndexError = False
4239 self.includeMatch = include
4240 self.saveAsList = False
4241 if isinstance(failOn, basestring):
4242 self.failOn = ParserElement._literalStringClass(failOn)
4243 else:
4244 self.failOn = failOn
4245 self.errmsg = "No match found for "+_ustr(self.expr)
4246
4247 - def parseImpl( self, instring, loc, doActions=True ):
4248 startloc = loc
4249 instrlen = len(instring)
4250 expr = self.expr
4251 expr_parse = self.expr._parse
4252 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4253 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4254
4255 tmploc = loc
4256 while tmploc <= instrlen:
4257 if self_failOn_canParseNext is not None:
4258
4259 if self_failOn_canParseNext(instring, tmploc):
4260 break
4261
4262 if self_ignoreExpr_tryParse is not None:
4263
4264 while 1:
4265 try:
4266 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4267 except ParseBaseException:
4268 break
4269
4270 try:
4271 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4272 except (ParseException, IndexError):
4273
4274 tmploc += 1
4275 else:
4276
4277 break
4278
4279 else:
4280
4281 raise ParseException(instring, loc, self.errmsg, self)
4282
4283
4284 loc = tmploc
4285 skiptext = instring[startloc:loc]
4286 skipresult = ParseResults(skiptext)
4287
4288 if self.includeMatch:
4289 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4290 skipresult += mat
4291
4292 return loc, skipresult
4293
4294 -class Forward(ParseElementEnhance):
4295 """
4296 Forward declaration of an expression to be defined later -
4297 used for recursive grammars, such as algebraic infix notation.
4298 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4299
4300 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4301 Specifically, '|' has a lower precedence than '<<', so that::
4302 fwdExpr << a | b | c
4303 will actually be evaluated as::
4304 (fwdExpr << a) | b | c
4305 thereby leaving b and c out as parseable alternatives. It is recommended that you
4306 explicitly group the values inserted into the C{Forward}::
4307 fwdExpr << (a | b | c)
4308 Converting to use the '<<=' operator instead will avoid this problem.
4309
4310 See L{ParseResults.pprint} for an example of a recursive parser created using
4311 C{Forward}.
4312 """
4315
4317 if isinstance( other, basestring ):
4318 other = ParserElement._literalStringClass(other)
4319 self.expr = other
4320 self.strRepr = None
4321 self.mayIndexError = self.expr.mayIndexError
4322 self.mayReturnEmpty = self.expr.mayReturnEmpty
4323 self.setWhitespaceChars( self.expr.whiteChars )
4324 self.skipWhitespace = self.expr.skipWhitespace
4325 self.saveAsList = self.expr.saveAsList
4326 self.ignoreExprs.extend(self.expr.ignoreExprs)
4327 return self
4328
4330 return self << other
4331
4333 self.skipWhitespace = False
4334 return self
4335
4337 if not self.streamlined:
4338 self.streamlined = True
4339 if self.expr is not None:
4340 self.expr.streamline()
4341 return self
4342
4343 - def validate( self, validateTrace=[] ):
4344 if self not in validateTrace:
4345 tmp = validateTrace[:]+[self]
4346 if self.expr is not None:
4347 self.expr.validate(tmp)
4348 self.checkRecursion([])
4349
4351 if hasattr(self,"name"):
4352 return self.name
4353 return self.__class__.__name__ + ": ..."
4354
4355
4356 self._revertClass = self.__class__
4357 self.__class__ = _ForwardNoRecurse
4358 try:
4359 if self.expr is not None:
4360 retString = _ustr(self.expr)
4361 else:
4362 retString = "None"
4363 finally:
4364 self.__class__ = self._revertClass
4365 return self.__class__.__name__ + ": " + retString
4366
4368 if self.expr is not None:
4369 return super(Forward,self).copy()
4370 else:
4371 ret = Forward()
4372 ret <<= self
4373 return ret
4374
4378
4380 """
4381 Abstract subclass of C{ParseExpression}, for converting parsed results.
4382 """
4383 - def __init__( self, expr, savelist=False ):
4386
4388 """
4389 Converter to concatenate all matching tokens to a single string.
4390 By default, the matching patterns must also be contiguous in the input string;
4391 this can be disabled by specifying C{'adjacent=False'} in the constructor.
4392
4393 Example::
4394 real = Word(nums) + '.' + Word(nums)
4395 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4396 # will also erroneously match the following
4397 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4398
4399 real = Combine(Word(nums) + '.' + Word(nums))
4400 print(real.parseString('3.1416')) # -> ['3.1416']
4401 # no match when there are internal spaces
4402 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4403 """
4404 - def __init__( self, expr, joinString="", adjacent=True ):
4405 super(Combine,self).__init__( expr )
4406
4407 if adjacent:
4408 self.leaveWhitespace()
4409 self.adjacent = adjacent
4410 self.skipWhitespace = True
4411 self.joinString = joinString
4412 self.callPreparse = True
4413
4420
4421 - def postParse( self, instring, loc, tokenlist ):
4422 retToks = tokenlist.copy()
4423 del retToks[:]
4424 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4425
4426 if self.resultsName and retToks.haskeys():
4427 return [ retToks ]
4428 else:
4429 return retToks
4430
4431 -class Group(TokenConverter):
4432 """
4433 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4434
4435 Example::
4436 ident = Word(alphas)
4437 num = Word(nums)
4438 term = ident | num
4439 func = ident + Optional(delimitedList(term))
4440 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4441
4442 func = ident + Group(Optional(delimitedList(term)))
4443 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4444 """
4446 super(Group,self).__init__( expr )
4447 self.saveAsList = True
4448
4449 - def postParse( self, instring, loc, tokenlist ):
4450 return [ tokenlist ]
4451
4452 -class Dict(TokenConverter):
4453 """
4454 Converter to return a repetitive expression as a list, but also as a dictionary.
4455 Each element can also be referenced using the first token in the expression as its key.
4456 Useful for tabular report scraping when the first column can be used as a item key.
4457
4458 Example::
4459 data_word = Word(alphas)
4460 label = data_word + FollowedBy(':')
4461 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4462
4463 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4464 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4465
4466 # print attributes as plain groups
4467 print(OneOrMore(attr_expr).parseString(text).dump())
4468
4469 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4470 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4471 print(result.dump())
4472
4473 # access named fields as dict entries, or output as dict
4474 print(result['shape'])
4475 print(result.asDict())
4476 prints::
4477 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4478
4479 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4480 - color: light blue
4481 - posn: upper left
4482 - shape: SQUARE
4483 - texture: burlap
4484 SQUARE
4485 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4486 See more examples at L{ParseResults} of accessing fields by results name.
4487 """
4489 super(Dict,self).__init__( expr )
4490 self.saveAsList = True
4491
4492 - def postParse( self, instring, loc, tokenlist ):
4493 for i,tok in enumerate(tokenlist):
4494 if len(tok) == 0:
4495 continue
4496 ikey = tok[0]
4497 if isinstance(ikey,int):
4498 ikey = _ustr(tok[0]).strip()
4499 if len(tok)==1:
4500 tokenlist[ikey] = _ParseResultsWithOffset("",i)
4501 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4502 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4503 else:
4504 dictvalue = tok.copy()
4505 del dictvalue[0]
4506 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4507 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4508 else:
4509 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4510
4511 if self.resultsName:
4512 return [ tokenlist ]
4513 else:
4514 return tokenlist
4515
4518 """
4519 Converter for ignoring the results of a parsed expression.
4520
4521 Example::
4522 source = "a, b, c,d"
4523 wd = Word(alphas)
4524 wd_list1 = wd + ZeroOrMore(',' + wd)
4525 print(wd_list1.parseString(source))
4526
4527 # often, delimiters that are useful during parsing are just in the
4528 # way afterward - use Suppress to keep them out of the parsed output
4529 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4530 print(wd_list2.parseString(source))
4531 prints::
4532 ['a', ',', 'b', ',', 'c', ',', 'd']
4533 ['a', 'b', 'c', 'd']
4534 (See also L{delimitedList}.)
4535 """
4536 - def postParse( self, instring, loc, tokenlist ):
4538
4541
4544 """
4545 Wrapper for parse actions, to ensure they are only called once.
4546 """
4548 self.callable = _trim_arity(methodCall)
4549 self.called = False
4551 if not self.called:
4552 results = self.callable(s,l,t)
4553 self.called = True
4554 return results
4555 raise ParseException(s,l,"")
4558
4560 """
4561 Decorator for debugging parse actions.
4562
4563 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4564 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4565
4566 Example::
4567 wd = Word(alphas)
4568
4569 @traceParseAction
4570 def remove_duplicate_chars(tokens):
4571 return ''.join(sorted(set(''.join(tokens))))
4572
4573 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4574 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4575 prints::
4576 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4577 <<leaving remove_duplicate_chars (ret: 'dfjkls')
4578 ['dfjkls']
4579 """
4580 f = _trim_arity(f)
4581 def z(*paArgs):
4582 thisFunc = f.__name__
4583 s,l,t = paArgs[-3:]
4584 if len(paArgs)>3:
4585 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4586 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4587 try:
4588 ret = f(*paArgs)
4589 except Exception as exc:
4590 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4591 raise
4592 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4593 return ret
4594 try:
4595 z.__name__ = f.__name__
4596 except AttributeError:
4597 pass
4598 return z
4599
4600
4601
4602
4603 -def delimitedList( expr, delim=",", combine=False ):
4604 """
4605 Helper to define a delimited list of expressions - the delimiter defaults to ','.
4606 By default, the list elements and delimiters can have intervening whitespace, and
4607 comments, but this can be overridden by passing C{combine=True} in the constructor.
4608 If C{combine} is set to C{True}, the matching tokens are returned as a single token
4609 string, with the delimiters included; otherwise, the matching tokens are returned
4610 as a list of tokens, with the delimiters suppressed.
4611
4612 Example::
4613 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4614 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4615 """
4616 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4617 if combine:
4618 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4619 else:
4620 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4621
4623 """
4624 Helper to define a counted list of expressions.
4625 This helper defines a pattern of the form::
4626 integer expr expr expr...
4627 where the leading integer tells how many expr expressions follow.
4628 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4629
4630 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4631
4632 Example::
4633 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4634
4635 # in this parser, the leading integer value is given in binary,
4636 # '10' indicating that 2 values are in the array
4637 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4638 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
4639 """
4640 arrayExpr = Forward()
4641 def countFieldParseAction(s,l,t):
4642 n = t[0]
4643 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4644 return []
4645 if intExpr is None:
4646 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4647 else:
4648 intExpr = intExpr.copy()
4649 intExpr.setName("arrayLen")
4650 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4651 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4652
4654 ret = []
4655 for i in L:
4656 if isinstance(i,list):
4657 ret.extend(_flatten(i))
4658 else:
4659 ret.append(i)
4660 return ret
4661
4663 """
4664 Helper to define an expression that is indirectly defined from
4665 the tokens matched in a previous expression, that is, it looks
4666 for a 'repeat' of a previous expression. For example::
4667 first = Word(nums)
4668 second = matchPreviousLiteral(first)
4669 matchExpr = first + ":" + second
4670 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
4671 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4672 If this is not desired, use C{matchPreviousExpr}.
4673 Do I{not} use with packrat parsing enabled.
4674 """
4675 rep = Forward()
4676 def copyTokenToRepeater(s,l,t):
4677 if t:
4678 if len(t) == 1:
4679 rep << t[0]
4680 else:
4681
4682 tflat = _flatten(t.asList())
4683 rep << And(Literal(tt) for tt in tflat)
4684 else:
4685 rep << Empty()
4686 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4687 rep.setName('(prev) ' + _ustr(expr))
4688 return rep
4689
4691 """
4692 Helper to define an expression that is indirectly defined from
4693 the tokens matched in a previous expression, that is, it looks
4694 for a 'repeat' of a previous expression. For example::
4695 first = Word(nums)
4696 second = matchPreviousExpr(first)
4697 matchExpr = first + ":" + second
4698 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
4699 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4700 the expressions are evaluated first, and then compared, so
4701 C{"1"} is compared with C{"10"}.
4702 Do I{not} use with packrat parsing enabled.
4703 """
4704 rep = Forward()
4705 e2 = expr.copy()
4706 rep <<= e2
4707 def copyTokenToRepeater(s,l,t):
4708 matchTokens = _flatten(t.asList())
4709 def mustMatchTheseTokens(s,l,t):
4710 theseTokens = _flatten(t.asList())
4711 if theseTokens != matchTokens:
4712 raise ParseException("",0,"")
4713 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4714 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4715 rep.setName('(prev) ' + _ustr(expr))
4716 return rep
4717
4719
4720 for c in r"\^-]":
4721 s = s.replace(c,_bslash+c)
4722 s = s.replace("\n",r"\n")
4723 s = s.replace("\t",r"\t")
4724 return _ustr(s)
4725
4726 -def oneOf( strs, caseless=False, useRegex=True ):
4727 """
4728 Helper to quickly define a set of alternative Literals, and makes sure to do
4729 longest-first testing when there is a conflict, regardless of the input order,
4730 but returns a C{L{MatchFirst}} for best performance.
4731
4732 Parameters:
4733 - strs - a string of space-delimited literals, or a collection of string literals
4734 - caseless - (default=C{False}) - treat all literals as caseless
4735 - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4736 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4737 if creating a C{Regex} raises an exception)
4738
4739 Example::
4740 comp_oper = oneOf("< = > <= >= !=")
4741 var = Word(alphas)
4742 number = Word(nums)
4743 term = var | number
4744 comparison_expr = term + comp_oper + term
4745 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
4746 prints::
4747 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4748 """
4749 if caseless:
4750 isequal = ( lambda a,b: a.upper() == b.upper() )
4751 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4752 parseElementClass = CaselessLiteral
4753 else:
4754 isequal = ( lambda a,b: a == b )
4755 masks = ( lambda a,b: b.startswith(a) )
4756 parseElementClass = Literal
4757
4758 symbols = []
4759 if isinstance(strs,basestring):
4760 symbols = strs.split()
4761 elif isinstance(strs, Iterable):
4762 symbols = list(strs)
4763 else:
4764 warnings.warn("Invalid argument to oneOf, expected string or iterable",
4765 SyntaxWarning, stacklevel=2)
4766 if not symbols:
4767 return NoMatch()
4768
4769 i = 0
4770 while i < len(symbols)-1:
4771 cur = symbols[i]
4772 for j,other in enumerate(symbols[i+1:]):
4773 if ( isequal(other, cur) ):
4774 del symbols[i+j+1]
4775 break
4776 elif ( masks(cur, other) ):
4777 del symbols[i+j+1]
4778 symbols.insert(i,other)
4779 cur = other
4780 break
4781 else:
4782 i += 1
4783
4784 if not caseless and useRegex:
4785
4786 try:
4787 if len(symbols)==len("".join(symbols)):
4788 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4789 else:
4790 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4791 except Exception:
4792 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4793 SyntaxWarning, stacklevel=2)
4794
4795
4796
4797 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4798
4800 """
4801 Helper to easily and clearly define a dictionary by specifying the respective patterns
4802 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4803 in the proper order. The key pattern can include delimiting markers or punctuation,
4804 as long as they are suppressed, thereby leaving the significant key text. The value
4805 pattern can include named results, so that the C{Dict} results can include named token
4806 fields.
4807
4808 Example::
4809 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4810 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4811 print(OneOrMore(attr_expr).parseString(text).dump())
4812
4813 attr_label = label
4814 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4815
4816 # similar to Dict, but simpler call format
4817 result = dictOf(attr_label, attr_value).parseString(text)
4818 print(result.dump())
4819 print(result['shape'])
4820 print(result.shape) # object attribute access works too
4821 print(result.asDict())
4822 prints::
4823 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4824 - color: light blue
4825 - posn: upper left
4826 - shape: SQUARE
4827 - texture: burlap
4828 SQUARE
4829 SQUARE
4830 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4831 """
4832 return Dict( ZeroOrMore( Group ( key + value ) ) )
4833
4834 -def originalTextFor(expr, asString=True):
4835 """
4836 Helper to return the original, untokenized text for a given expression. Useful to
4837 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4838 revert separate tokens with intervening whitespace back to the original matching
4839 input text. By default, returns astring containing the original parsed text.
4840
4841 If the optional C{asString} argument is passed as C{False}, then the return value is a
4842 C{L{ParseResults}} containing any results names that were originally matched, and a
4843 single token containing the original matched text from the input string. So if
4844 the expression passed to C{L{originalTextFor}} contains expressions with defined
4845 results names, you must set C{asString} to C{False} if you want to preserve those
4846 results name values.
4847
4848 Example::
4849 src = "this is test <b> bold <i>text</i> </b> normal text "
4850 for tag in ("b","i"):
4851 opener,closer = makeHTMLTags(tag)
4852 patt = originalTextFor(opener + SkipTo(closer) + closer)
4853 print(patt.searchString(src)[0])
4854 prints::
4855 ['<b> bold <i>text</i> </b>']
4856 ['<i>text</i>']
4857 """
4858 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4859 endlocMarker = locMarker.copy()
4860 endlocMarker.callPreparse = False
4861 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4862 if asString:
4863 extractText = lambda s,l,t: s[t._original_start:t._original_end]
4864 else:
4865 def extractText(s,l,t):
4866 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4867 matchExpr.setParseAction(extractText)
4868 matchExpr.ignoreExprs = expr.ignoreExprs
4869 return matchExpr
4870
4872 """
4873 Helper to undo pyparsing's default grouping of And expressions, even
4874 if all but one are non-empty.
4875 """
4876 return TokenConverter(expr).setParseAction(lambda t:t[0])
4877
4879 """
4880 Helper to decorate a returned token with its starting and ending locations in the input string.
4881 This helper adds the following results names:
4882 - locn_start = location where matched expression begins
4883 - locn_end = location where matched expression ends
4884 - value = the actual parsed results
4885
4886 Be careful if the input text contains C{<TAB>} characters, you may want to call
4887 C{L{ParserElement.parseWithTabs}}
4888
4889 Example::
4890 wd = Word(alphas)
4891 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4892 print(match)
4893 prints::
4894 [[0, 'ljsdf', 5]]
4895 [[8, 'lksdjjf', 15]]
4896 [[18, 'lkkjj', 23]]
4897 """
4898 locator = Empty().setParseAction(lambda s,l,t: l)
4899 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4900
4901
4902
4903 empty = Empty().setName("empty")
4904 lineStart = LineStart().setName("lineStart")
4905 lineEnd = LineEnd().setName("lineEnd")
4906 stringStart = StringStart().setName("stringStart")
4907 stringEnd = StringEnd().setName("stringEnd")
4908
4909 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4910 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4911 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4912 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
4913 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
4914 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4917 r"""
4918 Helper to easily define string ranges for use in Word construction. Borrows
4919 syntax from regexp '[]' string range definitions::
4920 srange("[0-9]") -> "0123456789"
4921 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
4922 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4923 The input string must be enclosed in []'s, and the returned string is the expanded
4924 character set joined into a single string.
4925 The values enclosed in the []'s may be:
4926 - a single character
4927 - an escaped character with a leading backslash (such as C{\-} or C{\]})
4928 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4929 (C{\0x##} is also supported for backwards compatibility)
4930 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4931 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4932 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4933 """
4934 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4935 try:
4936 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4937 except Exception:
4938 return ""
4939
4941 """
4942 Helper method for defining parse actions that require matching at a specific
4943 column in the input text.
4944 """
4945 def verifyCol(strg,locn,toks):
4946 if col(locn,strg) != n:
4947 raise ParseException(strg,locn,"matched token not at column %d" % n)
4948 return verifyCol
4949
4951 """
4952 Helper method for common parse actions that simply return a literal value. Especially
4953 useful when used with C{L{transformString<ParserElement.transformString>}()}.
4954
4955 Example::
4956 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4957 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4958 term = na | num
4959
4960 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4961 """
4962 return lambda s,l,t: [replStr]
4963
4965 """
4966 Helper parse action for removing quotation marks from parsed quoted strings.
4967
4968 Example::
4969 # by default, quotation marks are included in parsed results
4970 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4971
4972 # use removeQuotes to strip quotation marks from parsed results
4973 quotedString.setParseAction(removeQuotes)
4974 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4975 """
4976 return t[0][1:-1]
4977
4979 """
4980 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4981 args are passed, they are forwarded to the given function as additional arguments after
4982 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4983 parsed data to an integer using base 16.
4984
4985 Example (compare the last to example in L{ParserElement.transformString}::
4986 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4987 hex_ints.runTests('''
4988 00 11 22 aa FF 0a 0d 1a
4989 ''')
4990
4991 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4992 OneOrMore(upperword).runTests('''
4993 my kingdom for a horse
4994 ''')
4995
4996 wd = Word(alphas).setParseAction(tokenMap(str.title))
4997 OneOrMore(wd).setParseAction(' '.join).runTests('''
4998 now is the winter of our discontent made glorious summer by this sun of york
4999 ''')
5000 prints::
5001 00 11 22 aa FF 0a 0d 1a
5002 [0, 17, 34, 170, 255, 10, 13, 26]
5003
5004 my kingdom for a horse
5005 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5006
5007 now is the winter of our discontent made glorious summer by this sun of york
5008 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5009 """
5010 def pa(s,l,t):
5011 return [func(tokn, *args) for tokn in t]
5012
5013 try:
5014 func_name = getattr(func, '__name__',
5015 getattr(func, '__class__').__name__)
5016 except Exception:
5017 func_name = str(func)
5018 pa.__name__ = func_name
5019
5020 return pa
5021
5022 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
5023 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
5024
5025 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
5026 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
5056
5075
5084
5086 """
5087 Helper to create a validating parse action to be used with start tags created
5088 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
5089 with a required attribute value, to avoid false matches on common tags such as
5090 C{<TD>} or C{<DIV>}.
5091
5092 Call C{withAttribute} with a series of attribute names and values. Specify the list
5093 of filter attributes names and values as:
5094 - keyword arguments, as in C{(align="right")}, or
5095 - as an explicit dict with C{**} operator, when an attribute name is also a Python
5096 reserved word, as in C{**{"class":"Customer", "align":"right"}}
5097 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
5098 For attribute names with a namespace prefix, you must use the second form. Attribute
5099 names are matched insensitive to upper/lower case.
5100
5101 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
5102
5103 To verify that the attribute exists, but without specifying a value, pass
5104 C{withAttribute.ANY_VALUE} as the value.
5105
5106 Example::
5107 html = '''
5108 <div>
5109 Some text
5110 <div type="grid">1 4 0 1 0</div>
5111 <div type="graph">1,3 2,3 1,1</div>
5112 <div>this has no type</div>
5113 </div>
5114
5115 '''
5116 div,div_end = makeHTMLTags("div")
5117
5118 # only match div tag having a type attribute with value "grid"
5119 div_grid = div().setParseAction(withAttribute(type="grid"))
5120 grid_expr = div_grid + SkipTo(div | div_end)("body")
5121 for grid_header in grid_expr.searchString(html):
5122 print(grid_header.body)
5123
5124 # construct a match with any div tag having a type attribute, regardless of the value
5125 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5126 div_expr = div_any_type + SkipTo(div | div_end)("body")
5127 for div_header in div_expr.searchString(html):
5128 print(div_header.body)
5129 prints::
5130 1 4 0 1 0
5131
5132 1 4 0 1 0
5133 1,3 2,3 1,1
5134 """
5135 if args:
5136 attrs = args[:]
5137 else:
5138 attrs = attrDict.items()
5139 attrs = [(k,v) for k,v in attrs]
5140 def pa(s,l,tokens):
5141 for attrName,attrValue in attrs:
5142 if attrName not in tokens:
5143 raise ParseException(s,l,"no matching attribute " + attrName)
5144 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5145 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
5146 (attrName, tokens[attrName], attrValue))
5147 return pa
5148 withAttribute.ANY_VALUE = object()
5149
5150 -def withClass(classname, namespace=''):
5151 """
5152 Simplified version of C{L{withAttribute}} when matching on a div class - made
5153 difficult because C{class} is a reserved word in Python.
5154
5155 Example::
5156 html = '''
5157 <div>
5158 Some text
5159 <div class="grid">1 4 0 1 0</div>
5160 <div class="graph">1,3 2,3 1,1</div>
5161 <div>this <div> has no class</div>
5162 </div>
5163
5164 '''
5165 div,div_end = makeHTMLTags("div")
5166 div_grid = div().setParseAction(withClass("grid"))
5167
5168 grid_expr = div_grid + SkipTo(div | div_end)("body")
5169 for grid_header in grid_expr.searchString(html):
5170 print(grid_header.body)
5171
5172 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5173 div_expr = div_any_type + SkipTo(div | div_end)("body")
5174 for div_header in div_expr.searchString(html):
5175 print(div_header.body)
5176 prints::
5177 1 4 0 1 0
5178
5179 1 4 0 1 0
5180 1,3 2,3 1,1
5181 """
5182 classattr = "%s:class" % namespace if namespace else "class"
5183 return withAttribute(**{classattr : classname})
5184
5185 opAssoc = SimpleNamespace()
5186 opAssoc.LEFT = object()
5187 opAssoc.RIGHT = object()
5190 """
5191 Helper method for constructing grammars of expressions made up of
5192 operators working in a precedence hierarchy. Operators may be unary or
5193 binary, left- or right-associative. Parse actions can also be attached
5194 to operator expressions. The generated parser will also recognize the use
5195 of parentheses to override operator precedences (see example below).
5196
5197 Note: if you define a deep operator list, you may see performance issues
5198 when using infixNotation. See L{ParserElement.enablePackrat} for a
5199 mechanism to potentially improve your parser performance.
5200
5201 Parameters:
5202 - baseExpr - expression representing the most basic element for the nested
5203 - opList - list of tuples, one for each operator precedence level in the
5204 expression grammar; each tuple is of the form
5205 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
5206 - opExpr is the pyparsing expression for the operator;
5207 may also be a string, which will be converted to a Literal;
5208 if numTerms is 3, opExpr is a tuple of two expressions, for the
5209 two operators separating the 3 terms
5210 - numTerms is the number of terms for this operator (must
5211 be 1, 2, or 3)
5212 - rightLeftAssoc is the indicator whether the operator is
5213 right or left associative, using the pyparsing-defined
5214 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5215 - parseAction is the parse action to be associated with
5216 expressions matching this operator expression (the
5217 parse action tuple member may be omitted); if the parse action
5218 is passed a tuple or list of functions, this is equivalent to
5219 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
5220 - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5221 - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5222
5223 Example::
5224 # simple example of four-function arithmetic with ints and variable names
5225 integer = pyparsing_common.signed_integer
5226 varname = pyparsing_common.identifier
5227
5228 arith_expr = infixNotation(integer | varname,
5229 [
5230 ('-', 1, opAssoc.RIGHT),
5231 (oneOf('* /'), 2, opAssoc.LEFT),
5232 (oneOf('+ -'), 2, opAssoc.LEFT),
5233 ])
5234
5235 arith_expr.runTests('''
5236 5+3*6
5237 (5+3)*6
5238 -2--11
5239 ''', fullDump=False)
5240 prints::
5241 5+3*6
5242 [[5, '+', [3, '*', 6]]]
5243
5244 (5+3)*6
5245 [[[5, '+', 3], '*', 6]]
5246
5247 -2--11
5248 [[['-', 2], '-', ['-', 11]]]
5249 """
5250
5251 class _FB(FollowedBy):
5252 def parseImpl(self, instring, loc, doActions=True):
5253 self.expr.tryParse(instring, loc)
5254 return loc, []
5255
5256 ret = Forward()
5257 lastExpr = baseExpr | ( lpar + ret + rpar )
5258 for i,operDef in enumerate(opList):
5259 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5260 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5261 if arity == 3:
5262 if opExpr is None or len(opExpr) != 2:
5263 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5264 opExpr1, opExpr2 = opExpr
5265 thisExpr = Forward().setName(termName)
5266 if rightLeftAssoc == opAssoc.LEFT:
5267 if arity == 1:
5268 matchExpr = _FB(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5269 elif arity == 2:
5270 if opExpr is not None:
5271 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5272 else:
5273 matchExpr = _FB(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5274 elif arity == 3:
5275 matchExpr = _FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5276 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5277 else:
5278 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5279 elif rightLeftAssoc == opAssoc.RIGHT:
5280 if arity == 1:
5281
5282 if not isinstance(opExpr, Optional):
5283 opExpr = Optional(opExpr)
5284 matchExpr = _FB(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5285 elif arity == 2:
5286 if opExpr is not None:
5287 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5288 else:
5289 matchExpr = _FB(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5290 elif arity == 3:
5291 matchExpr = _FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5292 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5293 else:
5294 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5295 else:
5296 raise ValueError("operator must indicate right or left associativity")
5297 if pa:
5298 if isinstance(pa, (tuple, list)):
5299 matchExpr.setParseAction(*pa)
5300 else:
5301 matchExpr.setParseAction(pa)
5302 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5303 lastExpr = thisExpr
5304 ret <<= lastExpr
5305 return ret
5306
5307 operatorPrecedence = infixNotation
5308 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5309
5310 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5311 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5312 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5313 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5314 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5317 """
5318 Helper method for defining nested lists enclosed in opening and closing
5319 delimiters ("(" and ")" are the default).
5320
5321 Parameters:
5322 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5323 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5324 - content - expression for items within the nested lists (default=C{None})
5325 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5326
5327 If an expression is not provided for the content argument, the nested
5328 expression will capture all whitespace-delimited content between delimiters
5329 as a list of separate values.
5330
5331 Use the C{ignoreExpr} argument to define expressions that may contain
5332 opening or closing characters that should not be treated as opening
5333 or closing characters for nesting, such as quotedString or a comment
5334 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5335 The default is L{quotedString}, but if no expressions are to be ignored,
5336 then pass C{None} for this argument.
5337
5338 Example::
5339 data_type = oneOf("void int short long char float double")
5340 decl_data_type = Combine(data_type + Optional(Word('*')))
5341 ident = Word(alphas+'_', alphanums+'_')
5342 number = pyparsing_common.number
5343 arg = Group(decl_data_type + ident)
5344 LPAR,RPAR = map(Suppress, "()")
5345
5346 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5347
5348 c_function = (decl_data_type("type")
5349 + ident("name")
5350 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5351 + code_body("body"))
5352 c_function.ignore(cStyleComment)
5353
5354 source_code = '''
5355 int is_odd(int x) {
5356 return (x%2);
5357 }
5358
5359 int dec_to_hex(char hchar) {
5360 if (hchar >= '0' && hchar <= '9') {
5361 return (ord(hchar)-ord('0'));
5362 } else {
5363 return (10+ord(hchar)-ord('A'));
5364 }
5365 }
5366 '''
5367 for func in c_function.searchString(source_code):
5368 print("%(name)s (%(type)s) args: %(args)s" % func)
5369
5370 prints::
5371 is_odd (int) args: [['int', 'x']]
5372 dec_to_hex (int) args: [['char', 'hchar']]
5373 """
5374 if opener == closer:
5375 raise ValueError("opening and closing strings cannot be the same")
5376 if content is None:
5377 if isinstance(opener,basestring) and isinstance(closer,basestring):
5378 if len(opener) == 1 and len(closer)==1:
5379 if ignoreExpr is not None:
5380 content = (Combine(OneOrMore(~ignoreExpr +
5381 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5382 ).setParseAction(lambda t:t[0].strip()))
5383 else:
5384 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5385 ).setParseAction(lambda t:t[0].strip()))
5386 else:
5387 if ignoreExpr is not None:
5388 content = (Combine(OneOrMore(~ignoreExpr +
5389 ~Literal(opener) + ~Literal(closer) +
5390 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5391 ).setParseAction(lambda t:t[0].strip()))
5392 else:
5393 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5394 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5395 ).setParseAction(lambda t:t[0].strip()))
5396 else:
5397 raise ValueError("opening and closing arguments must be strings if no content expression is given")
5398 ret = Forward()
5399 if ignoreExpr is not None:
5400 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5401 else:
5402 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5403 ret.setName('nested %s%s expression' % (opener,closer))
5404 return ret
5405
5406 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5407 """
5408 Helper method for defining space-delimited indentation blocks, such as
5409 those used to define block statements in Python source code.
5410
5411 Parameters:
5412 - blockStatementExpr - expression defining syntax of statement that
5413 is repeated within the indented block
5414 - indentStack - list created by caller to manage indentation stack
5415 (multiple statementWithIndentedBlock expressions within a single grammar
5416 should share a common indentStack)
5417 - indent - boolean indicating whether block must be indented beyond the
5418 the current level; set to False for block of left-most statements
5419 (default=C{True})
5420
5421 A valid block must contain at least one C{blockStatement}.
5422
5423 Example::
5424 data = '''
5425 def A(z):
5426 A1
5427 B = 100
5428 G = A2
5429 A2
5430 A3
5431 B
5432 def BB(a,b,c):
5433 BB1
5434 def BBA():
5435 bba1
5436 bba2
5437 bba3
5438 C
5439 D
5440 def spam(x,y):
5441 def eggs(z):
5442 pass
5443 '''
5444
5445
5446 indentStack = [1]
5447 stmt = Forward()
5448
5449 identifier = Word(alphas, alphanums)
5450 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5451 func_body = indentedBlock(stmt, indentStack)
5452 funcDef = Group( funcDecl + func_body )
5453
5454 rvalue = Forward()
5455 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5456 rvalue << (funcCall | identifier | Word(nums))
5457 assignment = Group(identifier + "=" + rvalue)
5458 stmt << ( funcDef | assignment | identifier )
5459
5460 module_body = OneOrMore(stmt)
5461
5462 parseTree = module_body.parseString(data)
5463 parseTree.pprint()
5464 prints::
5465 [['def',
5466 'A',
5467 ['(', 'z', ')'],
5468 ':',
5469 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5470 'B',
5471 ['def',
5472 'BB',
5473 ['(', 'a', 'b', 'c', ')'],
5474 ':',
5475 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5476 'C',
5477 'D',
5478 ['def',
5479 'spam',
5480 ['(', 'x', 'y', ')'],
5481 ':',
5482 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5483 """
5484 def checkPeerIndent(s,l,t):
5485 if l >= len(s): return
5486 curCol = col(l,s)
5487 if curCol != indentStack[-1]:
5488 if curCol > indentStack[-1]:
5489 raise ParseFatalException(s,l,"illegal nesting")
5490 raise ParseException(s,l,"not a peer entry")
5491
5492 def checkSubIndent(s,l,t):
5493 curCol = col(l,s)
5494 if curCol > indentStack[-1]:
5495 indentStack.append( curCol )
5496 else:
5497 raise ParseException(s,l,"not a subentry")
5498
5499 def checkUnindent(s,l,t):
5500 if l >= len(s): return
5501 curCol = col(l,s)
5502 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5503 raise ParseException(s,l,"not an unindent")
5504 indentStack.pop()
5505
5506 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5507 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5508 PEER = Empty().setParseAction(checkPeerIndent).setName('')
5509 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5510 if indent:
5511 smExpr = Group( Optional(NL) +
5512
5513 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5514 else:
5515 smExpr = Group( Optional(NL) +
5516 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5517 blockStatementExpr.ignore(_bslash + LineEnd())
5518 return smExpr.setName('indented block')
5519
5520 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5521 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5522
5523 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5524 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5525 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5527 """Helper parser action to replace common HTML entities with their special characters"""
5528 return _htmlEntityMap.get(t.entity)
5529
5530
5531 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5532 "Comment of the form C{/* ... */}"
5533
5534 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5535 "Comment of the form C{<!-- ... -->}"
5536
5537 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5538 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5539 "Comment of the form C{// ... (to end of line)}"
5540
5541 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5542 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5543
5544 javaStyleComment = cppStyleComment
5545 "Same as C{L{cppStyleComment}}"
5546
5547 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5548 "Comment of the form C{# ... (to end of line)}"
5549
5550 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5551 Optional( Word(" \t") +
5552 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5553 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5554 """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5555 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5559 """
5560 Here are some common low-level expressions that may be useful in jump-starting parser development:
5561 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5562 - common L{programming identifiers<identifier>}
5563 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5564 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5565 - L{UUID<uuid>}
5566 - L{comma-separated list<comma_separated_list>}
5567 Parse actions:
5568 - C{L{convertToInteger}}
5569 - C{L{convertToFloat}}
5570 - C{L{convertToDate}}
5571 - C{L{convertToDatetime}}
5572 - C{L{stripHTMLTags}}
5573 - C{L{upcaseTokens}}
5574 - C{L{downcaseTokens}}
5575
5576 Example::
5577 pyparsing_common.number.runTests('''
5578 # any int or real number, returned as the appropriate type
5579 100
5580 -100
5581 +100
5582 3.14159
5583 6.02e23
5584 1e-12
5585 ''')
5586
5587 pyparsing_common.fnumber.runTests('''
5588 # any int or real number, returned as float
5589 100
5590 -100
5591 +100
5592 3.14159
5593 6.02e23
5594 1e-12
5595 ''')
5596
5597 pyparsing_common.hex_integer.runTests('''
5598 # hex numbers
5599 100
5600 FF
5601 ''')
5602
5603 pyparsing_common.fraction.runTests('''
5604 # fractions
5605 1/2
5606 -3/4
5607 ''')
5608
5609 pyparsing_common.mixed_integer.runTests('''
5610 # mixed fractions
5611 1
5612 1/2
5613 -3/4
5614 1-3/4
5615 ''')
5616
5617 import uuid
5618 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5619 pyparsing_common.uuid.runTests('''
5620 # uuid
5621 12345678-1234-5678-1234-567812345678
5622 ''')
5623 prints::
5624 # any int or real number, returned as the appropriate type
5625 100
5626 [100]
5627
5628 -100
5629 [-100]
5630
5631 +100
5632 [100]
5633
5634 3.14159
5635 [3.14159]
5636
5637 6.02e23
5638 [6.02e+23]
5639
5640 1e-12
5641 [1e-12]
5642
5643 # any int or real number, returned as float
5644 100
5645 [100.0]
5646
5647 -100
5648 [-100.0]
5649
5650 +100
5651 [100.0]
5652
5653 3.14159
5654 [3.14159]
5655
5656 6.02e23
5657 [6.02e+23]
5658
5659 1e-12
5660 [1e-12]
5661
5662 # hex numbers
5663 100
5664 [256]
5665
5666 FF
5667 [255]
5668
5669 # fractions
5670 1/2
5671 [0.5]
5672
5673 -3/4
5674 [-0.75]
5675
5676 # mixed fractions
5677 1
5678 [1]
5679
5680 1/2
5681 [0.5]
5682
5683 -3/4
5684 [-0.75]
5685
5686 1-3/4
5687 [1.75]
5688
5689 # uuid
5690 12345678-1234-5678-1234-567812345678
5691 [UUID('12345678-1234-5678-1234-567812345678')]
5692 """
5693
5694 convertToInteger = tokenMap(int)
5695 """
5696 Parse action for converting parsed integers to Python int
5697 """
5698
5699 convertToFloat = tokenMap(float)
5700 """
5701 Parse action for converting parsed numbers to Python float
5702 """
5703
5704 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5705 """expression that parses an unsigned integer, returns an int"""
5706
5707 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5708 """expression that parses a hexadecimal integer, returns an int"""
5709
5710 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5711 """expression that parses an integer with optional leading sign, returns an int"""
5712
5713 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5714 """fractional expression of an integer divided by an integer, returns a float"""
5715 fraction.addParseAction(lambda t: t[0]/t[-1])
5716
5717 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5718 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5719 mixed_integer.addParseAction(sum)
5720
5721 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5722 """expression that parses a floating point number and returns a float"""
5723
5724 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5725 """expression that parses a floating point number with optional scientific notation and returns a float"""
5726
5727
5728 number = (sci_real | real | signed_integer).streamline()
5729 """any numeric expression, returns the corresponding Python type"""
5730
5731 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5732 """any int or real number, returned as float"""
5733
5734 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5735 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5736
5737 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5738 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5739
5740 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5741 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5742 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5743 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5744 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5745 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5746 "IPv6 address (long, short, or mixed form)"
5747
5748 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5749 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5750
5751 @staticmethod
5753 """
5754 Helper to create a parse action for converting parsed date string to Python datetime.date
5755
5756 Params -
5757 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5758
5759 Example::
5760 date_expr = pyparsing_common.iso8601_date.copy()
5761 date_expr.setParseAction(pyparsing_common.convertToDate())
5762 print(date_expr.parseString("1999-12-31"))
5763 prints::
5764 [datetime.date(1999, 12, 31)]
5765 """
5766 def cvt_fn(s,l,t):
5767 try:
5768 return datetime.strptime(t[0], fmt).date()
5769 except ValueError as ve:
5770 raise ParseException(s, l, str(ve))
5771 return cvt_fn
5772
5773 @staticmethod
5775 """
5776 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5777
5778 Params -
5779 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5780
5781 Example::
5782 dt_expr = pyparsing_common.iso8601_datetime.copy()
5783 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5784 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5785 prints::
5786 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5787 """
5788 def cvt_fn(s,l,t):
5789 try:
5790 return datetime.strptime(t[0], fmt)
5791 except ValueError as ve:
5792 raise ParseException(s, l, str(ve))
5793 return cvt_fn
5794
5795 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5796 "ISO8601 date (C{yyyy-mm-dd})"
5797
5798 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5799 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5800
5801 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5802 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5803
5804 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5805 @staticmethod
5819
5820 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
5821 + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5822 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5823 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5824
5825 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5826 """Parse action to convert tokens to upper case."""
5827
5828 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5829 """Parse action to convert tokens to lower case."""
5830
5835
5837 if cls is None:
5838 cls = type(obj)
5839 ret = self.fn(cls)
5840 setattr(cls, self.fn.__name__, ret)
5841 return ret
5842
5845 _ranges = []
5846
5847 @_lazyclassproperty
5849 return ''.join(filterfalse(unicode.isspace, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
5850
5851 @_lazyclassproperty
5853 return ''.join(filter(unicode.isalpha, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
5854
5855 @_lazyclassproperty
5857 return ''.join(filter(unicode.isdigit, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
5858
5859 @_lazyclassproperty
5862
5865 _ranges = [(32, sys.maxunicode)]
5866
5868 _ranges = [
5869 (0x0020, 0x007e), (0x00a0, 0x00ff),
5870 ]
5871
5872 - class Greek(unicode_set):
5873 _ranges = [
5874 (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
5875 (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
5876 (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
5877 ]
5878
5881
5884
5886 _ranges = [ ]
5887
5888 - class Kanji(unicode_set):
5889 _ranges = [(0x4E00, 0x9Fbf), ]
5890
5892 _ranges = [(0x3040, 0x309f), ]
5893
5895 _ranges = [(0x30a0, 0x30ff), ]
5896
5898 _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), ]
5899
5900 - class CJK(unicode_set):
5903
5904 - class Thai(unicode_set):
5905 _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b), ]
5906
5908 _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f), ]
5909
5911 _ranges = [(0x0590, 0x05ff), ]
5912
5914 _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
5915
5916 pyparsing_unicode.Japanese._ranges = pyparsing_unicode.Japanese.Kanji._ranges + pyparsing_unicode.Japanese.Hiragana._ranges + pyparsing_unicode.Japanese.Katakana._ranges
5917 pyparsing_unicode.CJK._ranges = pyparsing_unicode.Chinese._ranges + pyparsing_unicode.Japanese._ranges + pyparsing_unicode.Korean._ranges
5918
5919
5920 if PY_3:
5921 setattr(pyparsing_unicode, "العربية", pyparsing_unicode.Arabic)
5922 setattr(pyparsing_unicode, "中文", pyparsing_unicode.Chinese)
5923 setattr(pyparsing_unicode, "кириллица", pyparsing_unicode.Cyrillic)
5924 setattr(pyparsing_unicode, "Ελληνικά", pyparsing_unicode.Greek)
5925 setattr(pyparsing_unicode, "עִברִית", pyparsing_unicode.Hebrew)
5926 setattr(pyparsing_unicode, "日本語", pyparsing_unicode.Japanese)
5927 setattr(pyparsing_unicode.Japanese, "漢字", pyparsing_unicode.Japanese.Kanji)
5928 setattr(pyparsing_unicode.Japanese, "カタカナ", pyparsing_unicode.Japanese.Katakana)
5929 setattr(pyparsing_unicode.Japanese, "ひらがな", pyparsing_unicode.Japanese.Hiragana)
5930 setattr(pyparsing_unicode, "한국어", pyparsing_unicode.Korean)
5931 setattr(pyparsing_unicode, "ไทย", pyparsing_unicode.Thai)
5932 setattr(pyparsing_unicode, "देवनागरी", pyparsing_unicode.Devanagari)
5933
5934
5935 if __name__ == "__main__":
5936
5937 selectToken = CaselessLiteral("select")
5938 fromToken = CaselessLiteral("from")
5939
5940 ident = Word(alphas, alphanums + "_$")
5941
5942 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5943 columnNameList = Group(delimitedList(columnName)).setName("columns")
5944 columnSpec = ('*' | columnNameList)
5945
5946 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5947 tableNameList = Group(delimitedList(tableName)).setName("tables")
5948
5949 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5950
5951
5952 simpleSQL.runTests("""
5953 # '*' as column list and dotted table name
5954 select * from SYS.XYZZY
5955
5956 # caseless match on "SELECT", and casts back to "select"
5957 SELECT * from XYZZY, ABC
5958
5959 # list of column names, and mixed case SELECT keyword
5960 Select AA,BB,CC from Sys.dual
5961
5962 # multiple tables
5963 Select A, B, C from Sys.dual, Table2
5964
5965 # invalid SELECT keyword - should fail
5966 Xelect A, B, C from Sys.dual
5967
5968 # incomplete command - should fail
5969 Select
5970
5971 # invalid column name - should fail
5972 Select ^^^ frox Sys.dual
5973
5974 """)
5975
5976 pyparsing_common.number.runTests("""
5977 100
5978 -100
5979 +100
5980 3.14159
5981 6.02e23
5982 1e-12
5983 """)
5984
5985
5986 pyparsing_common.fnumber.runTests("""
5987 100
5988 -100
5989 +100
5990 3.14159
5991 6.02e23
5992 1e-12
5993 """)
5994
5995 pyparsing_common.hex_integer.runTests("""
5996 100
5997 FF
5998 """)
5999
6000 import uuid
6001 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6002 pyparsing_common.uuid.runTests("""
6003 12345678-1234-5678-1234-567812345678
6004 """)
6005