1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28 =============================================================================
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form
36 C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
37 (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
38 L{Literal} expressions)::
39
40 from pyparsing import Word, alphas
41
42 # define grammar of a greeting
43 greet = Word(alphas) + "," + Word(alphas) + "!"
44
45 hello = "Hello, World!"
46 print (hello, "->", greet.parseString(hello))
47
48 The program outputs the following::
49
50 Hello, World! -> ['Hello', ',', 'World', '!']
51
52 The Python representation of the grammar is quite readable, owing to the self-explanatory
53 class names, and the use of '+', '|' and '^' operators.
54
55 The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
56 object with named attributes.
57
58 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
60 - quoted strings
61 - embedded comments
62
63
64 Getting Started -
65 -----------------
66 Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
67 classes inherit from. Use the docstrings for examples of how to:
68 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
69 - construct character word-group expressions using the L{Word} class
70 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
71 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
72 - associate names with your parsed results using L{ParserElement.setResultsName}
73 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
74 - find more useful common expressions in the L{pyparsing_common} namespace class
75 """
76
77 __version__ = "2.2.2"
78 __versionTime__ = "29 Sep 2018 15:58 UTC"
79 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
80
81 import string
82 from weakref import ref as wkref
83 import copy
84 import sys
85 import warnings
86 import re
87 import sre_constants
88 import collections
89 import pprint
90 import traceback
91 import types
92 from datetime import datetime
93
94 try:
95 from _thread import RLock
96 except ImportError:
97 from threading import RLock
98
99 try:
100
101 from collections.abc import Iterable
102 from collections.abc import MutableMapping
103 except ImportError:
104
105 from collections import Iterable
106 from collections import MutableMapping
107
108 try:
109 from collections import OrderedDict as _OrderedDict
110 except ImportError:
111 try:
112 from ordereddict import OrderedDict as _OrderedDict
113 except ImportError:
114 _OrderedDict = None
115
116
117
118 __all__ = [
119 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
120 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
121 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
122 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
123 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
124 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
125 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
126 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
127 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
128 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
129 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
130 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
131 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
132 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
133 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
134 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
135 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
136 'CloseMatch', 'tokenMap', 'pyparsing_common',
137 ]
138
139 system_version = tuple(sys.version_info)[:3]
140 PY_3 = system_version[0] == 3
141 if PY_3:
142 _MAX_INT = sys.maxsize
143 basestring = str
144 unichr = chr
145 _ustr = str
146
147
148 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
149
150 else:
151 _MAX_INT = sys.maxint
152 range = xrange
155 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
156 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
157 then < returns the unicode object | encodes it with the default encoding | ... >.
158 """
159 if isinstance(obj,unicode):
160 return obj
161
162 try:
163
164
165 return str(obj)
166
167 except UnicodeEncodeError:
168
169 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
170 xmlcharref = Regex(r'&#\d+;')
171 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
172 return xmlcharref.transformString(ret)
173
174
175 singleArgBuiltins = []
176 import __builtin__
177 for fname in "sum len sorted reversed list tuple set any all min max".split():
178 try:
179 singleArgBuiltins.append(getattr(__builtin__,fname))
180 except AttributeError:
181 continue
182
183 _generatorType = type((y for y in range(1)))
186 """Escape &, <, >, ", ', etc. in a string of data."""
187
188
189 from_symbols = '&><"\''
190 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
191 for from_,to_ in zip(from_symbols, to_symbols):
192 data = data.replace(from_, to_)
193 return data
194
197
198 alphas = string.ascii_uppercase + string.ascii_lowercase
199 nums = "0123456789"
200 hexnums = nums + "ABCDEFabcdef"
201 alphanums = alphas + nums
202 _bslash = chr(92)
203 printables = "".join(c for c in string.printable if c not in string.whitespace)
206 """base exception class for all parsing runtime exceptions"""
207
208
209 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
210 self.loc = loc
211 if msg is None:
212 self.msg = pstr
213 self.pstr = ""
214 else:
215 self.msg = msg
216 self.pstr = pstr
217 self.parserElement = elem
218 self.args = (pstr, loc, msg)
219
220 @classmethod
222 """
223 internal factory method to simplify creating one type of ParseException
224 from another - avoids having __init__ signature conflicts among subclasses
225 """
226 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
227
229 """supported attributes by name are:
230 - lineno - returns the line number of the exception text
231 - col - returns the column number of the exception text
232 - line - returns the line containing the exception text
233 """
234 if( aname == "lineno" ):
235 return lineno( self.loc, self.pstr )
236 elif( aname in ("col", "column") ):
237 return col( self.loc, self.pstr )
238 elif( aname == "line" ):
239 return line( self.loc, self.pstr )
240 else:
241 raise AttributeError(aname)
242
244 return "%s (at char %d), (line:%d, col:%d)" % \
245 ( self.msg, self.loc, self.lineno, self.column )
259 return "lineno col line".split() + dir(type(self))
260
262 """
263 Exception thrown when parse expressions don't match class;
264 supported attributes by name are:
265 - lineno - returns the line number of the exception text
266 - col - returns the column number of the exception text
267 - line - returns the line containing the exception text
268
269 Example::
270 try:
271 Word(nums).setName("integer").parseString("ABC")
272 except ParseException as pe:
273 print(pe)
274 print("column: {}".format(pe.col))
275
276 prints::
277 Expected integer (at char 0), (line:1, col:1)
278 column: 1
279 """
280 pass
281
283 """user-throwable exception thrown when inconsistent parse content
284 is found; stops all parsing immediately"""
285 pass
286
288 """just like L{ParseFatalException}, but thrown internally when an
289 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
290 immediately because an unbacktrackable syntax error has been found"""
291 pass
292
307 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
308 - def __init__( self, parseElementList ):
309 self.parseElementTrace = parseElementList
310
312 return "RecursiveGrammarException: %s" % self.parseElementTrace
313
320 return repr(self.tup[0])
322 self.tup = (self.tup[0],i)
323
325 """
326 Structured parse results, to provide multiple means of access to the parsed data:
327 - as a list (C{len(results)})
328 - by list index (C{results[0], results[1]}, etc.)
329 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
330
331 Example::
332 integer = Word(nums)
333 date_str = (integer.setResultsName("year") + '/'
334 + integer.setResultsName("month") + '/'
335 + integer.setResultsName("day"))
336 # equivalent form:
337 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
338
339 # parseString returns a ParseResults object
340 result = date_str.parseString("1999/12/31")
341
342 def test(s, fn=repr):
343 print("%s -> %s" % (s, fn(eval(s))))
344 test("list(result)")
345 test("result[0]")
346 test("result['month']")
347 test("result.day")
348 test("'month' in result")
349 test("'minutes' in result")
350 test("result.dump()", str)
351 prints::
352 list(result) -> ['1999', '/', '12', '/', '31']
353 result[0] -> '1999'
354 result['month'] -> '12'
355 result.day -> '31'
356 'month' in result -> True
357 'minutes' in result -> False
358 result.dump() -> ['1999', '/', '12', '/', '31']
359 - day: 31
360 - month: 12
361 - year: 1999
362 """
363 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
364 if isinstance(toklist, cls):
365 return toklist
366 retobj = object.__new__(cls)
367 retobj.__doinit = True
368 return retobj
369
370
371
372 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
373 if self.__doinit:
374 self.__doinit = False
375 self.__name = None
376 self.__parent = None
377 self.__accumNames = {}
378 self.__asList = asList
379 self.__modal = modal
380 if toklist is None:
381 toklist = []
382 if isinstance(toklist, list):
383 self.__toklist = toklist[:]
384 elif isinstance(toklist, _generatorType):
385 self.__toklist = list(toklist)
386 else:
387 self.__toklist = [toklist]
388 self.__tokdict = dict()
389
390 if name is not None and name:
391 if not modal:
392 self.__accumNames[name] = 0
393 if isinstance(name,int):
394 name = _ustr(name)
395 self.__name = name
396 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
397 if isinstance(toklist,basestring):
398 toklist = [ toklist ]
399 if asList:
400 if isinstance(toklist,ParseResults):
401 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
402 else:
403 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
404 self[name].__name = name
405 else:
406 try:
407 self[name] = toklist[0]
408 except (KeyError,TypeError,IndexError):
409 self[name] = toklist
410
412 if isinstance( i, (int,slice) ):
413 return self.__toklist[i]
414 else:
415 if i not in self.__accumNames:
416 return self.__tokdict[i][-1][0]
417 else:
418 return ParseResults([ v[0] for v in self.__tokdict[i] ])
419
421 if isinstance(v,_ParseResultsWithOffset):
422 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
423 sub = v[0]
424 elif isinstance(k,(int,slice)):
425 self.__toklist[k] = v
426 sub = v
427 else:
428 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
429 sub = v
430 if isinstance(sub,ParseResults):
431 sub.__parent = wkref(self)
432
434 if isinstance(i,(int,slice)):
435 mylen = len( self.__toklist )
436 del self.__toklist[i]
437
438
439 if isinstance(i, int):
440 if i < 0:
441 i += mylen
442 i = slice(i, i+1)
443
444 removed = list(range(*i.indices(mylen)))
445 removed.reverse()
446
447 for name,occurrences in self.__tokdict.items():
448 for j in removed:
449 for k, (value, position) in enumerate(occurrences):
450 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
451 else:
452 del self.__tokdict[i]
453
455 return k in self.__tokdict
456
457 - def __len__( self ): return len( self.__toklist )
458 - def __bool__(self): return ( not not self.__toklist )
459 __nonzero__ = __bool__
460 - def __iter__( self ): return iter( self.__toklist )
461 - def __reversed__( self ): return iter( self.__toklist[::-1] )
463 if hasattr(self.__tokdict, "iterkeys"):
464 return self.__tokdict.iterkeys()
465 else:
466 return iter(self.__tokdict)
467
469 return (self[k] for k in self._iterkeys())
470
472 return ((k, self[k]) for k in self._iterkeys())
473
474 if PY_3:
475 keys = _iterkeys
476 """Returns an iterator of all named result keys (Python 3.x only)."""
477
478 values = _itervalues
479 """Returns an iterator of all named result values (Python 3.x only)."""
480
481 items = _iteritems
482 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
483
484 else:
485 iterkeys = _iterkeys
486 """Returns an iterator of all named result keys (Python 2.x only)."""
487
488 itervalues = _itervalues
489 """Returns an iterator of all named result values (Python 2.x only)."""
490
491 iteritems = _iteritems
492 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
493
495 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
496 return list(self.iterkeys())
497
499 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
500 return list(self.itervalues())
501
503 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
504 return list(self.iteritems())
505
507 """Since keys() returns an iterator, this method is helpful in bypassing
508 code that looks for the existence of any defined results names."""
509 return bool(self.__tokdict)
510
511 - def pop( self, *args, **kwargs):
512 """
513 Removes and returns item at specified index (default=C{last}).
514 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
515 argument or an integer argument, it will use C{list} semantics
516 and pop tokens from the list of parsed tokens. If passed a
517 non-integer argument (most likely a string), it will use C{dict}
518 semantics and pop the corresponding value from any defined
519 results names. A second default return value argument is
520 supported, just as in C{dict.pop()}.
521
522 Example::
523 def remove_first(tokens):
524 tokens.pop(0)
525 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
526 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
527
528 label = Word(alphas)
529 patt = label("LABEL") + OneOrMore(Word(nums))
530 print(patt.parseString("AAB 123 321").dump())
531
532 # Use pop() in a parse action to remove named result (note that corresponding value is not
533 # removed from list form of results)
534 def remove_LABEL(tokens):
535 tokens.pop("LABEL")
536 return tokens
537 patt.addParseAction(remove_LABEL)
538 print(patt.parseString("AAB 123 321").dump())
539 prints::
540 ['AAB', '123', '321']
541 - LABEL: AAB
542
543 ['AAB', '123', '321']
544 """
545 if not args:
546 args = [-1]
547 for k,v in kwargs.items():
548 if k == 'default':
549 args = (args[0], v)
550 else:
551 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
552 if (isinstance(args[0], int) or
553 len(args) == 1 or
554 args[0] in self):
555 index = args[0]
556 ret = self[index]
557 del self[index]
558 return ret
559 else:
560 defaultvalue = args[1]
561 return defaultvalue
562
563 - def get(self, key, defaultValue=None):
564 """
565 Returns named result matching the given key, or if there is no
566 such name, then returns the given C{defaultValue} or C{None} if no
567 C{defaultValue} is specified.
568
569 Similar to C{dict.get()}.
570
571 Example::
572 integer = Word(nums)
573 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
574
575 result = date_str.parseString("1999/12/31")
576 print(result.get("year")) # -> '1999'
577 print(result.get("hour", "not specified")) # -> 'not specified'
578 print(result.get("hour")) # -> None
579 """
580 if key in self:
581 return self[key]
582 else:
583 return defaultValue
584
585 - def insert( self, index, insStr ):
586 """
587 Inserts new element at location index in the list of parsed tokens.
588
589 Similar to C{list.insert()}.
590
591 Example::
592 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
593
594 # use a parse action to insert the parse location in the front of the parsed results
595 def insert_locn(locn, tokens):
596 tokens.insert(0, locn)
597 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
598 """
599 self.__toklist.insert(index, insStr)
600
601 for name,occurrences in self.__tokdict.items():
602 for k, (value, position) in enumerate(occurrences):
603 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
604
606 """
607 Add single element to end of ParseResults list of elements.
608
609 Example::
610 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
611
612 # use a parse action to compute the sum of the parsed integers, and add it to the end
613 def append_sum(tokens):
614 tokens.append(sum(map(int, tokens)))
615 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
616 """
617 self.__toklist.append(item)
618
620 """
621 Add sequence of elements to end of ParseResults list of elements.
622
623 Example::
624 patt = OneOrMore(Word(alphas))
625
626 # use a parse action to append the reverse of the matched strings, to make a palindrome
627 def make_palindrome(tokens):
628 tokens.extend(reversed([t[::-1] for t in tokens]))
629 return ''.join(tokens)
630 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
631 """
632 if isinstance(itemseq, ParseResults):
633 self += itemseq
634 else:
635 self.__toklist.extend(itemseq)
636
638 """
639 Clear all elements and results names.
640 """
641 del self.__toklist[:]
642 self.__tokdict.clear()
643
645 try:
646 return self[name]
647 except KeyError:
648 return ""
649
650 if name in self.__tokdict:
651 if name not in self.__accumNames:
652 return self.__tokdict[name][-1][0]
653 else:
654 return ParseResults([ v[0] for v in self.__tokdict[name] ])
655 else:
656 return ""
657
659 ret = self.copy()
660 ret += other
661 return ret
662
664 if other.__tokdict:
665 offset = len(self.__toklist)
666 addoffset = lambda a: offset if a<0 else a+offset
667 otheritems = other.__tokdict.items()
668 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
669 for (k,vlist) in otheritems for v in vlist]
670 for k,v in otherdictitems:
671 self[k] = v
672 if isinstance(v[0],ParseResults):
673 v[0].__parent = wkref(self)
674
675 self.__toklist += other.__toklist
676 self.__accumNames.update( other.__accumNames )
677 return self
678
680 if isinstance(other,int) and other == 0:
681
682 return self.copy()
683 else:
684
685 return other + self
686
688 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
689
691 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
692
694 out = []
695 for item in self.__toklist:
696 if out and sep:
697 out.append(sep)
698 if isinstance( item, ParseResults ):
699 out += item._asStringList()
700 else:
701 out.append( _ustr(item) )
702 return out
703
705 """
706 Returns the parse results as a nested list of matching tokens, all converted to strings.
707
708 Example::
709 patt = OneOrMore(Word(alphas))
710 result = patt.parseString("sldkj lsdkj sldkj")
711 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
712 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
713
714 # Use asList() to create an actual list
715 result_list = result.asList()
716 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
717 """
718 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
719
721 """
722 Returns the named parse results as a nested dictionary.
723
724 Example::
725 integer = Word(nums)
726 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
727
728 result = date_str.parseString('12/31/1999')
729 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
730
731 result_dict = result.asDict()
732 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
733
734 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
735 import json
736 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
737 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
738 """
739 if PY_3:
740 item_fn = self.items
741 else:
742 item_fn = self.iteritems
743
744 def toItem(obj):
745 if isinstance(obj, ParseResults):
746 if obj.haskeys():
747 return obj.asDict()
748 else:
749 return [toItem(v) for v in obj]
750 else:
751 return obj
752
753 return dict((k,toItem(v)) for k,v in item_fn())
754
756 """
757 Returns a new copy of a C{ParseResults} object.
758 """
759 ret = ParseResults( self.__toklist )
760 ret.__tokdict = self.__tokdict.copy()
761 ret.__parent = self.__parent
762 ret.__accumNames.update( self.__accumNames )
763 ret.__name = self.__name
764 return ret
765
766 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
767 """
768 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
769 """
770 nl = "\n"
771 out = []
772 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
773 for v in vlist)
774 nextLevelIndent = indent + " "
775
776
777 if not formatted:
778 indent = ""
779 nextLevelIndent = ""
780 nl = ""
781
782 selfTag = None
783 if doctag is not None:
784 selfTag = doctag
785 else:
786 if self.__name:
787 selfTag = self.__name
788
789 if not selfTag:
790 if namedItemsOnly:
791 return ""
792 else:
793 selfTag = "ITEM"
794
795 out += [ nl, indent, "<", selfTag, ">" ]
796
797 for i,res in enumerate(self.__toklist):
798 if isinstance(res,ParseResults):
799 if i in namedItems:
800 out += [ res.asXML(namedItems[i],
801 namedItemsOnly and doctag is None,
802 nextLevelIndent,
803 formatted)]
804 else:
805 out += [ res.asXML(None,
806 namedItemsOnly and doctag is None,
807 nextLevelIndent,
808 formatted)]
809 else:
810
811 resTag = None
812 if i in namedItems:
813 resTag = namedItems[i]
814 if not resTag:
815 if namedItemsOnly:
816 continue
817 else:
818 resTag = "ITEM"
819 xmlBodyText = _xml_escape(_ustr(res))
820 out += [ nl, nextLevelIndent, "<", resTag, ">",
821 xmlBodyText,
822 "</", resTag, ">" ]
823
824 out += [ nl, indent, "</", selfTag, ">" ]
825 return "".join(out)
826
828 for k,vlist in self.__tokdict.items():
829 for v,loc in vlist:
830 if sub is v:
831 return k
832 return None
833
835 r"""
836 Returns the results name for this token expression. Useful when several
837 different expressions might match at a particular location.
838
839 Example::
840 integer = Word(nums)
841 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
842 house_number_expr = Suppress('#') + Word(nums, alphanums)
843 user_data = (Group(house_number_expr)("house_number")
844 | Group(ssn_expr)("ssn")
845 | Group(integer)("age"))
846 user_info = OneOrMore(user_data)
847
848 result = user_info.parseString("22 111-22-3333 #221B")
849 for item in result:
850 print(item.getName(), ':', item[0])
851 prints::
852 age : 22
853 ssn : 111-22-3333
854 house_number : 221B
855 """
856 if self.__name:
857 return self.__name
858 elif self.__parent:
859 par = self.__parent()
860 if par:
861 return par.__lookup(self)
862 else:
863 return None
864 elif (len(self) == 1 and
865 len(self.__tokdict) == 1 and
866 next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
867 return next(iter(self.__tokdict.keys()))
868 else:
869 return None
870
871 - def dump(self, indent='', depth=0, full=True):
872 """
873 Diagnostic method for listing out the contents of a C{ParseResults}.
874 Accepts an optional C{indent} argument so that this string can be embedded
875 in a nested display of other data.
876
877 Example::
878 integer = Word(nums)
879 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
880
881 result = date_str.parseString('12/31/1999')
882 print(result.dump())
883 prints::
884 ['12', '/', '31', '/', '1999']
885 - day: 1999
886 - month: 31
887 - year: 12
888 """
889 out = []
890 NL = '\n'
891 out.append( indent+_ustr(self.asList()) )
892 if full:
893 if self.haskeys():
894 items = sorted((str(k), v) for k,v in self.items())
895 for k,v in items:
896 if out:
897 out.append(NL)
898 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
899 if isinstance(v,ParseResults):
900 if v:
901 out.append( v.dump(indent,depth+1) )
902 else:
903 out.append(_ustr(v))
904 else:
905 out.append(repr(v))
906 elif any(isinstance(vv,ParseResults) for vv in self):
907 v = self
908 for i,vv in enumerate(v):
909 if isinstance(vv,ParseResults):
910 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
911 else:
912 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
913
914 return "".join(out)
915
916 - def pprint(self, *args, **kwargs):
917 """
918 Pretty-printer for parsed results as a list, using the C{pprint} module.
919 Accepts additional positional or keyword args as defined for the
920 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
921
922 Example::
923 ident = Word(alphas, alphanums)
924 num = Word(nums)
925 func = Forward()
926 term = ident | num | Group('(' + func + ')')
927 func <<= ident + Group(Optional(delimitedList(term)))
928 result = func.parseString("fna a,b,(fnb c,d,200),100")
929 result.pprint(width=40)
930 prints::
931 ['fna',
932 ['a',
933 'b',
934 ['(', 'fnb', ['c', 'd', '200'], ')'],
935 '100']]
936 """
937 pprint.pprint(self.asList(), *args, **kwargs)
938
939
941 return ( self.__toklist,
942 ( self.__tokdict.copy(),
943 self.__parent is not None and self.__parent() or None,
944 self.__accumNames,
945 self.__name ) )
946
948 self.__toklist = state[0]
949 (self.__tokdict,
950 par,
951 inAccumNames,
952 self.__name) = state[1]
953 self.__accumNames = {}
954 self.__accumNames.update(inAccumNames)
955 if par is not None:
956 self.__parent = wkref(par)
957 else:
958 self.__parent = None
959
961 return self.__toklist, self.__name, self.__asList, self.__modal
962
964 return (dir(type(self)) + list(self.keys()))
965
966 MutableMapping.register(ParseResults)
967
968 -def col (loc,strg):
969 """Returns current column within a string, counting newlines as line separators.
970 The first column is number 1.
971
972 Note: the default parsing behavior is to expand tabs in the input string
973 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
975 consistent view of the parsed string, the parse location, and line and column
976 positions within the parsed string.
977 """
978 s = strg
979 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
980
982 """Returns current line number within a string, counting newlines as line separators.
983 The first line is number 1.
984
985 Note: the default parsing behavior is to expand tabs in the input string
986 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
987 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
988 consistent view of the parsed string, the parse location, and line and column
989 positions within the parsed string.
990 """
991 return strg.count("\n",0,loc) + 1
992
993 -def line( loc, strg ):
994 """Returns the line of text containing loc within a string, counting newlines as line separators.
995 """
996 lastCR = strg.rfind("\n", 0, loc)
997 nextCR = strg.find("\n", loc)
998 if nextCR >= 0:
999 return strg[lastCR+1:nextCR]
1000 else:
1001 return strg[lastCR+1:]
1002
1004 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1005
1007 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1008
1010 print ("Exception raised:" + _ustr(exc))
1011
1013 """'Do-nothing' debug action, to suppress debugging output during parsing."""
1014 pass
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038 'decorator to trim function calls to match the arity of the target'
1040 if func in singleArgBuiltins:
1041 return lambda s,l,t: func(t)
1042 limit = [0]
1043 foundArity = [False]
1044
1045
1046 if system_version[:2] >= (3,5):
1047 def extract_stack(limit=0):
1048
1049 offset = -3 if system_version == (3,5,0) else -2
1050 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1051 return [frame_summary[:2]]
1052 def extract_tb(tb, limit=0):
1053 frames = traceback.extract_tb(tb, limit=limit)
1054 frame_summary = frames[-1]
1055 return [frame_summary[:2]]
1056 else:
1057 extract_stack = traceback.extract_stack
1058 extract_tb = traceback.extract_tb
1059
1060
1061
1062
1063 LINE_DIFF = 6
1064
1065
1066 this_line = extract_stack(limit=2)[-1]
1067 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1068
1069 def wrapper(*args):
1070 while 1:
1071 try:
1072 ret = func(*args[limit[0]:])
1073 foundArity[0] = True
1074 return ret
1075 except TypeError:
1076
1077 if foundArity[0]:
1078 raise
1079 else:
1080 try:
1081 tb = sys.exc_info()[-1]
1082 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1083 raise
1084 finally:
1085 del tb
1086
1087 if limit[0] <= maxargs:
1088 limit[0] += 1
1089 continue
1090 raise
1091
1092
1093 func_name = "<parse action>"
1094 try:
1095 func_name = getattr(func, '__name__',
1096 getattr(func, '__class__').__name__)
1097 except Exception:
1098 func_name = str(func)
1099 wrapper.__name__ = func_name
1100
1101 return wrapper
1102
1104 """Abstract base level parser element class."""
1105 DEFAULT_WHITE_CHARS = " \n\t\r"
1106 verbose_stacktrace = False
1107
1108 @staticmethod
1110 r"""
1111 Overrides the default whitespace chars
1112
1113 Example::
1114 # default whitespace chars are space, <TAB> and newline
1115 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1116
1117 # change to just treat newline as significant
1118 ParserElement.setDefaultWhitespaceChars(" \t")
1119 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1120 """
1121 ParserElement.DEFAULT_WHITE_CHARS = chars
1122
1123 @staticmethod
1125 """
1126 Set class to be used for inclusion of string literals into a parser.
1127
1128 Example::
1129 # default literal class used is Literal
1130 integer = Word(nums)
1131 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1132
1133 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1134
1135
1136 # change to Suppress
1137 ParserElement.inlineLiteralsUsing(Suppress)
1138 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1139
1140 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1141 """
1142 ParserElement._literalStringClass = cls
1143
1145 self.parseAction = list()
1146 self.failAction = None
1147
1148 self.strRepr = None
1149 self.resultsName = None
1150 self.saveAsList = savelist
1151 self.skipWhitespace = True
1152 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1153 self.copyDefaultWhiteChars = True
1154 self.mayReturnEmpty = False
1155 self.keepTabs = False
1156 self.ignoreExprs = list()
1157 self.debug = False
1158 self.streamlined = False
1159 self.mayIndexError = True
1160 self.errmsg = ""
1161 self.modalResults = True
1162 self.debugActions = ( None, None, None )
1163 self.re = None
1164 self.callPreparse = True
1165 self.callDuringTry = False
1166
1168 """
1169 Make a copy of this C{ParserElement}. Useful for defining different parse actions
1170 for the same parsing pattern, using copies of the original parse element.
1171
1172 Example::
1173 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1174 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1175 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1176
1177 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1178 prints::
1179 [5120, 100, 655360, 268435456]
1180 Equivalent form of C{expr.copy()} is just C{expr()}::
1181 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1182 """
1183 cpy = copy.copy( self )
1184 cpy.parseAction = self.parseAction[:]
1185 cpy.ignoreExprs = self.ignoreExprs[:]
1186 if self.copyDefaultWhiteChars:
1187 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1188 return cpy
1189
1191 """
1192 Define name for this expression, makes debugging and exception messages clearer.
1193
1194 Example::
1195 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1196 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1197 """
1198 self.name = name
1199 self.errmsg = "Expected " + self.name
1200 if hasattr(self,"exception"):
1201 self.exception.msg = self.errmsg
1202 return self
1203
1205 """
1206 Define name for referencing matching tokens as a nested attribute
1207 of the returned parse results.
1208 NOTE: this returns a *copy* of the original C{ParserElement} object;
1209 this is so that the client can define a basic element, such as an
1210 integer, and reference it in multiple places with different names.
1211
1212 You can also set results names using the abbreviated syntax,
1213 C{expr("name")} in place of C{expr.setResultsName("name")} -
1214 see L{I{__call__}<__call__>}.
1215
1216 Example::
1217 date_str = (integer.setResultsName("year") + '/'
1218 + integer.setResultsName("month") + '/'
1219 + integer.setResultsName("day"))
1220
1221 # equivalent form:
1222 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1223 """
1224 newself = self.copy()
1225 if name.endswith("*"):
1226 name = name[:-1]
1227 listAllMatches=True
1228 newself.resultsName = name
1229 newself.modalResults = not listAllMatches
1230 return newself
1231
1233 """Method to invoke the Python pdb debugger when this element is
1234 about to be parsed. Set C{breakFlag} to True to enable, False to
1235 disable.
1236 """
1237 if breakFlag:
1238 _parseMethod = self._parse
1239 def breaker(instring, loc, doActions=True, callPreParse=True):
1240 import pdb
1241 pdb.set_trace()
1242 return _parseMethod( instring, loc, doActions, callPreParse )
1243 breaker._originalParseMethod = _parseMethod
1244 self._parse = breaker
1245 else:
1246 if hasattr(self._parse,"_originalParseMethod"):
1247 self._parse = self._parse._originalParseMethod
1248 return self
1249
1251 """
1252 Define one or more actions to perform when successfully matching parse element definition.
1253 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1254 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1255 - s = the original string being parsed (see note below)
1256 - loc = the location of the matching substring
1257 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1258 If the functions in fns modify the tokens, they can return them as the return
1259 value from fn, and the modified list of tokens will replace the original.
1260 Otherwise, fn does not need to return any value.
1261
1262 Optional keyword arguments:
1263 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1264
1265 Note: the default parsing behavior is to expand tabs in the input string
1266 before starting the parsing process. See L{I{parseString}<parseString>} for more information
1267 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1268 consistent view of the parsed string, the parse location, and line and column
1269 positions within the parsed string.
1270
1271 Example::
1272 integer = Word(nums)
1273 date_str = integer + '/' + integer + '/' + integer
1274
1275 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1276
1277 # use parse action to convert to ints at parse time
1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1279 date_str = integer + '/' + integer + '/' + integer
1280
1281 # note that integer fields are now ints, not strings
1282 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1283 """
1284 self.parseAction = list(map(_trim_arity, list(fns)))
1285 self.callDuringTry = kwargs.get("callDuringTry", False)
1286 return self
1287
1289 """
1290 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1291
1292 See examples in L{I{copy}<copy>}.
1293 """
1294 self.parseAction += list(map(_trim_arity, list(fns)))
1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1296 return self
1297
1299 """Add a boolean predicate function to expression's list of parse actions. See
1300 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1301 functions passed to C{addCondition} need to return boolean success/fail of the condition.
1302
1303 Optional keyword arguments:
1304 - message = define a custom message to be used in the raised exception
1305 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1306
1307 Example::
1308 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1309 year_int = integer.copy()
1310 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1311 date_str = year_int + '/' + integer + '/' + integer
1312
1313 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1314 """
1315 msg = kwargs.get("message", "failed user-defined condition")
1316 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1317 for fn in fns:
1318 def pa(s,l,t):
1319 if not bool(_trim_arity(fn)(s,l,t)):
1320 raise exc_type(s,l,msg)
1321 self.parseAction.append(pa)
1322 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1323 return self
1324
1326 """Define action to perform if parsing fails at this expression.
1327 Fail acton fn is a callable function that takes the arguments
1328 C{fn(s,loc,expr,err)} where:
1329 - s = string being parsed
1330 - loc = location where expression match was attempted and failed
1331 - expr = the parse expression that failed
1332 - err = the exception thrown
1333 The function returns no value. It may throw C{L{ParseFatalException}}
1334 if it is desired to stop parsing immediately."""
1335 self.failAction = fn
1336 return self
1337
1339 exprsFound = True
1340 while exprsFound:
1341 exprsFound = False
1342 for e in self.ignoreExprs:
1343 try:
1344 while 1:
1345 loc,dummy = e._parse( instring, loc )
1346 exprsFound = True
1347 except ParseException:
1348 pass
1349 return loc
1350
1352 if self.ignoreExprs:
1353 loc = self._skipIgnorables( instring, loc )
1354
1355 if self.skipWhitespace:
1356 wt = self.whiteChars
1357 instrlen = len(instring)
1358 while loc < instrlen and instring[loc] in wt:
1359 loc += 1
1360
1361 return loc
1362
1363 - def parseImpl( self, instring, loc, doActions=True ):
1365
1366 - def postParse( self, instring, loc, tokenlist ):
1368
1369
1370 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1371 debugging = ( self.debug )
1372
1373 if debugging or self.failAction:
1374
1375 if (self.debugActions[0] ):
1376 self.debugActions[0]( instring, loc, self )
1377 if callPreParse and self.callPreparse:
1378 preloc = self.preParse( instring, loc )
1379 else:
1380 preloc = loc
1381 tokensStart = preloc
1382 try:
1383 try:
1384 loc,tokens = self.parseImpl( instring, preloc, doActions )
1385 except IndexError:
1386 raise ParseException( instring, len(instring), self.errmsg, self )
1387 except ParseBaseException as err:
1388
1389 if self.debugActions[2]:
1390 self.debugActions[2]( instring, tokensStart, self, err )
1391 if self.failAction:
1392 self.failAction( instring, tokensStart, self, err )
1393 raise
1394 else:
1395 if callPreParse and self.callPreparse:
1396 preloc = self.preParse( instring, loc )
1397 else:
1398 preloc = loc
1399 tokensStart = preloc
1400 if self.mayIndexError or preloc >= len(instring):
1401 try:
1402 loc,tokens = self.parseImpl( instring, preloc, doActions )
1403 except IndexError:
1404 raise ParseException( instring, len(instring), self.errmsg, self )
1405 else:
1406 loc,tokens = self.parseImpl( instring, preloc, doActions )
1407
1408 tokens = self.postParse( instring, loc, tokens )
1409
1410 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1411 if self.parseAction and (doActions or self.callDuringTry):
1412 if debugging:
1413 try:
1414 for fn in self.parseAction:
1415 tokens = fn( instring, tokensStart, retTokens )
1416 if tokens is not None:
1417 retTokens = ParseResults( tokens,
1418 self.resultsName,
1419 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1420 modal=self.modalResults )
1421 except ParseBaseException as err:
1422
1423 if (self.debugActions[2] ):
1424 self.debugActions[2]( instring, tokensStart, self, err )
1425 raise
1426 else:
1427 for fn in self.parseAction:
1428 tokens = fn( instring, tokensStart, retTokens )
1429 if tokens is not None:
1430 retTokens = ParseResults( tokens,
1431 self.resultsName,
1432 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1433 modal=self.modalResults )
1434 if debugging:
1435
1436 if (self.debugActions[1] ):
1437 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1438
1439 return loc, retTokens
1440
1446
1448 try:
1449 self.tryParse(instring, loc)
1450 except (ParseException, IndexError):
1451 return False
1452 else:
1453 return True
1454
1457 cache = {}
1458 self.not_in_cache = not_in_cache = object()
1459
1460 def get(self, key):
1461 return cache.get(key, not_in_cache)
1462
1463 def set(self, key, value):
1464 cache[key] = value
1465
1466 def clear(self):
1467 cache.clear()
1468
1469 def cache_len(self):
1470 return len(cache)
1471
1472 self.get = types.MethodType(get, self)
1473 self.set = types.MethodType(set, self)
1474 self.clear = types.MethodType(clear, self)
1475 self.__len__ = types.MethodType(cache_len, self)
1476
1477 if _OrderedDict is not None:
1480 self.not_in_cache = not_in_cache = object()
1481
1482 cache = _OrderedDict()
1483
1484 def get(self, key):
1485 return cache.get(key, not_in_cache)
1486
1487 def set(self, key, value):
1488 cache[key] = value
1489 while len(cache) > size:
1490 try:
1491 cache.popitem(False)
1492 except KeyError:
1493 pass
1494
1495 def clear(self):
1496 cache.clear()
1497
1498 def cache_len(self):
1499 return len(cache)
1500
1501 self.get = types.MethodType(get, self)
1502 self.set = types.MethodType(set, self)
1503 self.clear = types.MethodType(clear, self)
1504 self.__len__ = types.MethodType(cache_len, self)
1505
1506 else:
1509 self.not_in_cache = not_in_cache = object()
1510
1511 cache = {}
1512 key_fifo = collections.deque([], size)
1513
1514 def get(self, key):
1515 return cache.get(key, not_in_cache)
1516
1517 def set(self, key, value):
1518 cache[key] = value
1519 while len(key_fifo) > size:
1520 cache.pop(key_fifo.popleft(), None)
1521 key_fifo.append(key)
1522
1523 def clear(self):
1524 cache.clear()
1525 key_fifo.clear()
1526
1527 def cache_len(self):
1528 return len(cache)
1529
1530 self.get = types.MethodType(get, self)
1531 self.set = types.MethodType(set, self)
1532 self.clear = types.MethodType(clear, self)
1533 self.__len__ = types.MethodType(cache_len, self)
1534
1535
1536 packrat_cache = {}
1537 packrat_cache_lock = RLock()
1538 packrat_cache_stats = [0, 0]
1539
1540
1541
1542 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1543 HIT, MISS = 0, 1
1544 lookup = (self, instring, loc, callPreParse, doActions)
1545 with ParserElement.packrat_cache_lock:
1546 cache = ParserElement.packrat_cache
1547 value = cache.get(lookup)
1548 if value is cache.not_in_cache:
1549 ParserElement.packrat_cache_stats[MISS] += 1
1550 try:
1551 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1552 except ParseBaseException as pe:
1553
1554 cache.set(lookup, pe.__class__(*pe.args))
1555 raise
1556 else:
1557 cache.set(lookup, (value[0], value[1].copy()))
1558 return value
1559 else:
1560 ParserElement.packrat_cache_stats[HIT] += 1
1561 if isinstance(value, Exception):
1562 raise value
1563 return (value[0], value[1].copy())
1564
1565 _parse = _parseNoCache
1566
1567 @staticmethod
1571
1572 _packratEnabled = False
1573 @staticmethod
1575 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1576 Repeated parse attempts at the same string location (which happens
1577 often in many complex grammars) can immediately return a cached value,
1578 instead of re-executing parsing/validating code. Memoizing is done of
1579 both valid results and parsing exceptions.
1580
1581 Parameters:
1582 - cache_size_limit - (default=C{128}) - if an integer value is provided
1583 will limit the size of the packrat cache; if None is passed, then
1584 the cache size will be unbounded; if 0 is passed, the cache will
1585 be effectively disabled.
1586
1587 This speedup may break existing programs that use parse actions that
1588 have side-effects. For this reason, packrat parsing is disabled when
1589 you first import pyparsing. To activate the packrat feature, your
1590 program must call the class method C{ParserElement.enablePackrat()}. If
1591 your program uses C{psyco} to "compile as you go", you must call
1592 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1593 Python will crash. For best results, call C{enablePackrat()} immediately
1594 after importing pyparsing.
1595
1596 Example::
1597 import pyparsing
1598 pyparsing.ParserElement.enablePackrat()
1599 """
1600 if not ParserElement._packratEnabled:
1601 ParserElement._packratEnabled = True
1602 if cache_size_limit is None:
1603 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1604 else:
1605 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1606 ParserElement._parse = ParserElement._parseCache
1607
1609 """
1610 Execute the parse expression with the given string.
1611 This is the main interface to the client code, once the complete
1612 expression has been built.
1613
1614 If you want the grammar to require that the entire input string be
1615 successfully parsed, then set C{parseAll} to True (equivalent to ending
1616 the grammar with C{L{StringEnd()}}).
1617
1618 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1619 in order to report proper column numbers in parse actions.
1620 If the input string contains tabs and
1621 the grammar uses parse actions that use the C{loc} argument to index into the
1622 string being parsed, you can ensure you have a consistent view of the input
1623 string by:
1624 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1625 (see L{I{parseWithTabs}<parseWithTabs>})
1626 - define your parse action using the full C{(s,loc,toks)} signature, and
1627 reference the input string using the parse action's C{s} argument
1628 - explictly expand the tabs in your input string before calling
1629 C{parseString}
1630
1631 Example::
1632 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1633 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1634 """
1635 ParserElement.resetCache()
1636 if not self.streamlined:
1637 self.streamline()
1638
1639 for e in self.ignoreExprs:
1640 e.streamline()
1641 if not self.keepTabs:
1642 instring = instring.expandtabs()
1643 try:
1644 loc, tokens = self._parse( instring, 0 )
1645 if parseAll:
1646 loc = self.preParse( instring, loc )
1647 se = Empty() + StringEnd()
1648 se._parse( instring, loc )
1649 except ParseBaseException as exc:
1650 if ParserElement.verbose_stacktrace:
1651 raise
1652 else:
1653
1654 raise exc
1655 else:
1656 return tokens
1657
1659 """
1660 Scan the input string for expression matches. Each match will return the
1661 matching tokens, start location, and end location. May be called with optional
1662 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1663 C{overlap} is specified, then overlapping matches will be reported.
1664
1665 Note that the start and end locations are reported relative to the string
1666 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1667 strings with embedded tabs.
1668
1669 Example::
1670 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1671 print(source)
1672 for tokens,start,end in Word(alphas).scanString(source):
1673 print(' '*start + '^'*(end-start))
1674 print(' '*start + tokens[0])
1675
1676 prints::
1677
1678 sldjf123lsdjjkf345sldkjf879lkjsfd987
1679 ^^^^^
1680 sldjf
1681 ^^^^^^^
1682 lsdjjkf
1683 ^^^^^^
1684 sldkjf
1685 ^^^^^^
1686 lkjsfd
1687 """
1688 if not self.streamlined:
1689 self.streamline()
1690 for e in self.ignoreExprs:
1691 e.streamline()
1692
1693 if not self.keepTabs:
1694 instring = _ustr(instring).expandtabs()
1695 instrlen = len(instring)
1696 loc = 0
1697 preparseFn = self.preParse
1698 parseFn = self._parse
1699 ParserElement.resetCache()
1700 matches = 0
1701 try:
1702 while loc <= instrlen and matches < maxMatches:
1703 try:
1704 preloc = preparseFn( instring, loc )
1705 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1706 except ParseException:
1707 loc = preloc+1
1708 else:
1709 if nextLoc > loc:
1710 matches += 1
1711 yield tokens, preloc, nextLoc
1712 if overlap:
1713 nextloc = preparseFn( instring, loc )
1714 if nextloc > loc:
1715 loc = nextLoc
1716 else:
1717 loc += 1
1718 else:
1719 loc = nextLoc
1720 else:
1721 loc = preloc+1
1722 except ParseBaseException as exc:
1723 if ParserElement.verbose_stacktrace:
1724 raise
1725 else:
1726
1727 raise exc
1728
1771
1773 """
1774 Another extension to C{L{scanString}}, simplifying the access to the tokens found
1775 to match the given parse expression. May be called with optional
1776 C{maxMatches} argument, to clip searching after 'n' matches are found.
1777
1778 Example::
1779 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1780 cap_word = Word(alphas.upper(), alphas.lower())
1781
1782 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1783
1784 # the sum() builtin can be used to merge results into a single ParseResults object
1785 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1786 prints::
1787 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1788 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1789 """
1790 try:
1791 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1792 except ParseBaseException as exc:
1793 if ParserElement.verbose_stacktrace:
1794 raise
1795 else:
1796
1797 raise exc
1798
1799 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1800 """
1801 Generator method to split a string using the given expression as a separator.
1802 May be called with optional C{maxsplit} argument, to limit the number of splits;
1803 and the optional C{includeSeparators} argument (default=C{False}), if the separating
1804 matching text should be included in the split results.
1805
1806 Example::
1807 punc = oneOf(list(".,;:/-!?"))
1808 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1809 prints::
1810 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1811 """
1812 splits = 0
1813 last = 0
1814 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1815 yield instring[last:s]
1816 if includeSeparators:
1817 yield t[0]
1818 last = e
1819 yield instring[last:]
1820
1822 """
1823 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1824 converts them to L{Literal}s by default.
1825
1826 Example::
1827 greet = Word(alphas) + "," + Word(alphas) + "!"
1828 hello = "Hello, World!"
1829 print (hello, "->", greet.parseString(hello))
1830 Prints::
1831 Hello, World! -> ['Hello', ',', 'World', '!']
1832 """
1833 if isinstance( other, basestring ):
1834 other = ParserElement._literalStringClass( other )
1835 if not isinstance( other, ParserElement ):
1836 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1837 SyntaxWarning, stacklevel=2)
1838 return None
1839 return And( [ self, other ] )
1840
1842 """
1843 Implementation of + operator when left operand is not a C{L{ParserElement}}
1844 """
1845 if isinstance( other, basestring ):
1846 other = ParserElement._literalStringClass( other )
1847 if not isinstance( other, ParserElement ):
1848 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1849 SyntaxWarning, stacklevel=2)
1850 return None
1851 return other + self
1852
1854 """
1855 Implementation of - operator, returns C{L{And}} with error stop
1856 """
1857 if isinstance( other, basestring ):
1858 other = ParserElement._literalStringClass( other )
1859 if not isinstance( other, ParserElement ):
1860 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1861 SyntaxWarning, stacklevel=2)
1862 return None
1863 return self + And._ErrorStop() + other
1864
1866 """
1867 Implementation of - operator when left operand is not a C{L{ParserElement}}
1868 """
1869 if isinstance( other, basestring ):
1870 other = ParserElement._literalStringClass( other )
1871 if not isinstance( other, ParserElement ):
1872 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1873 SyntaxWarning, stacklevel=2)
1874 return None
1875 return other - self
1876
1878 """
1879 Implementation of * operator, allows use of C{expr * 3} in place of
1880 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1881 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1882 may also include C{None} as in:
1883 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1884 to C{expr*n + L{ZeroOrMore}(expr)}
1885 (read as "at least n instances of C{expr}")
1886 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1887 (read as "0 to n instances of C{expr}")
1888 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1889 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1890
1891 Note that C{expr*(None,n)} does not raise an exception if
1892 more than n exprs exist in the input stream; that is,
1893 C{expr*(None,n)} does not enforce a maximum number of expr
1894 occurrences. If this behavior is desired, then write
1895 C{expr*(None,n) + ~expr}
1896 """
1897 if isinstance(other,int):
1898 minElements, optElements = other,0
1899 elif isinstance(other,tuple):
1900 other = (other + (None, None))[:2]
1901 if other[0] is None:
1902 other = (0, other[1])
1903 if isinstance(other[0],int) and other[1] is None:
1904 if other[0] == 0:
1905 return ZeroOrMore(self)
1906 if other[0] == 1:
1907 return OneOrMore(self)
1908 else:
1909 return self*other[0] + ZeroOrMore(self)
1910 elif isinstance(other[0],int) and isinstance(other[1],int):
1911 minElements, optElements = other
1912 optElements -= minElements
1913 else:
1914 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1915 else:
1916 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1917
1918 if minElements < 0:
1919 raise ValueError("cannot multiply ParserElement by negative value")
1920 if optElements < 0:
1921 raise ValueError("second tuple value must be greater or equal to first tuple value")
1922 if minElements == optElements == 0:
1923 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1924
1925 if (optElements):
1926 def makeOptionalList(n):
1927 if n>1:
1928 return Optional(self + makeOptionalList(n-1))
1929 else:
1930 return Optional(self)
1931 if minElements:
1932 if minElements == 1:
1933 ret = self + makeOptionalList(optElements)
1934 else:
1935 ret = And([self]*minElements) + makeOptionalList(optElements)
1936 else:
1937 ret = makeOptionalList(optElements)
1938 else:
1939 if minElements == 1:
1940 ret = self
1941 else:
1942 ret = And([self]*minElements)
1943 return ret
1944
1947
1949 """
1950 Implementation of | operator - returns C{L{MatchFirst}}
1951 """
1952 if isinstance( other, basestring ):
1953 other = ParserElement._literalStringClass( other )
1954 if not isinstance( other, ParserElement ):
1955 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1956 SyntaxWarning, stacklevel=2)
1957 return None
1958 return MatchFirst( [ self, other ] )
1959
1961 """
1962 Implementation of | operator when left operand is not a C{L{ParserElement}}
1963 """
1964 if isinstance( other, basestring ):
1965 other = ParserElement._literalStringClass( other )
1966 if not isinstance( other, ParserElement ):
1967 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1968 SyntaxWarning, stacklevel=2)
1969 return None
1970 return other | self
1971
1973 """
1974 Implementation of ^ operator - returns C{L{Or}}
1975 """
1976 if isinstance( other, basestring ):
1977 other = ParserElement._literalStringClass( other )
1978 if not isinstance( other, ParserElement ):
1979 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1980 SyntaxWarning, stacklevel=2)
1981 return None
1982 return Or( [ self, other ] )
1983
1985 """
1986 Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1987 """
1988 if isinstance( other, basestring ):
1989 other = ParserElement._literalStringClass( other )
1990 if not isinstance( other, ParserElement ):
1991 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1992 SyntaxWarning, stacklevel=2)
1993 return None
1994 return other ^ self
1995
1997 """
1998 Implementation of & operator - returns C{L{Each}}
1999 """
2000 if isinstance( other, basestring ):
2001 other = ParserElement._literalStringClass( other )
2002 if not isinstance( other, ParserElement ):
2003 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2004 SyntaxWarning, stacklevel=2)
2005 return None
2006 return Each( [ self, other ] )
2007
2009 """
2010 Implementation of & operator when left operand is not a C{L{ParserElement}}
2011 """
2012 if isinstance( other, basestring ):
2013 other = ParserElement._literalStringClass( other )
2014 if not isinstance( other, ParserElement ):
2015 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2016 SyntaxWarning, stacklevel=2)
2017 return None
2018 return other & self
2019
2021 """
2022 Implementation of ~ operator - returns C{L{NotAny}}
2023 """
2024 return NotAny( self )
2025
2027 """
2028 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
2029
2030 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
2031 passed as C{True}.
2032
2033 If C{name} is omitted, same as calling C{L{copy}}.
2034
2035 Example::
2036 # these are equivalent
2037 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2038 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
2039 """
2040 if name is not None:
2041 return self.setResultsName(name)
2042 else:
2043 return self.copy()
2044
2046 """
2047 Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2048 cluttering up returned output.
2049 """
2050 return Suppress( self )
2051
2053 """
2054 Disables the skipping of whitespace before matching the characters in the
2055 C{ParserElement}'s defined pattern. This is normally only used internally by
2056 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2057 """
2058 self.skipWhitespace = False
2059 return self
2060
2062 """
2063 Overrides the default whitespace chars
2064 """
2065 self.skipWhitespace = True
2066 self.whiteChars = chars
2067 self.copyDefaultWhiteChars = False
2068 return self
2069
2071 """
2072 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2073 Must be called before C{parseString} when the input grammar contains elements that
2074 match C{<TAB>} characters.
2075 """
2076 self.keepTabs = True
2077 return self
2078
2080 """
2081 Define expression to be ignored (e.g., comments) while doing pattern
2082 matching; may be called repeatedly, to define multiple comment or other
2083 ignorable patterns.
2084
2085 Example::
2086 patt = OneOrMore(Word(alphas))
2087 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2088
2089 patt.ignore(cStyleComment)
2090 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2091 """
2092 if isinstance(other, basestring):
2093 other = Suppress(other)
2094
2095 if isinstance( other, Suppress ):
2096 if other not in self.ignoreExprs:
2097 self.ignoreExprs.append(other)
2098 else:
2099 self.ignoreExprs.append( Suppress( other.copy() ) )
2100 return self
2101
2102 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2103 """
2104 Enable display of debugging messages while doing pattern matching.
2105 """
2106 self.debugActions = (startAction or _defaultStartDebugAction,
2107 successAction or _defaultSuccessDebugAction,
2108 exceptionAction or _defaultExceptionDebugAction)
2109 self.debug = True
2110 return self
2111
2113 """
2114 Enable display of debugging messages while doing pattern matching.
2115 Set C{flag} to True to enable, False to disable.
2116
2117 Example::
2118 wd = Word(alphas).setName("alphaword")
2119 integer = Word(nums).setName("numword")
2120 term = wd | integer
2121
2122 # turn on debugging for wd
2123 wd.setDebug()
2124
2125 OneOrMore(term).parseString("abc 123 xyz 890")
2126
2127 prints::
2128 Match alphaword at loc 0(1,1)
2129 Matched alphaword -> ['abc']
2130 Match alphaword at loc 3(1,4)
2131 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2132 Match alphaword at loc 7(1,8)
2133 Matched alphaword -> ['xyz']
2134 Match alphaword at loc 11(1,12)
2135 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2136 Match alphaword at loc 15(1,16)
2137 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2138
2139 The output shown is that produced by the default debug actions - custom debug actions can be
2140 specified using L{setDebugActions}. Prior to attempting
2141 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2142 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2143 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2144 which makes debugging and exception messages easier to understand - for instance, the default
2145 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2146 """
2147 if flag:
2148 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2149 else:
2150 self.debug = False
2151 return self
2152
2155
2158
2160 self.streamlined = True
2161 self.strRepr = None
2162 return self
2163
2166
2167 - def validate( self, validateTrace=[] ):
2168 """
2169 Check defined expressions for valid structure, check for infinite recursive definitions.
2170 """
2171 self.checkRecursion( [] )
2172
2173 - def parseFile( self, file_or_filename, parseAll=False ):
2174 """
2175 Execute the parse expression on the given file or filename.
2176 If a filename is specified (instead of a file object),
2177 the entire file is opened, read, and closed before parsing.
2178 """
2179 try:
2180 file_contents = file_or_filename.read()
2181 except AttributeError:
2182 with open(file_or_filename, "r") as f:
2183 file_contents = f.read()
2184 try:
2185 return self.parseString(file_contents, parseAll)
2186 except ParseBaseException as exc:
2187 if ParserElement.verbose_stacktrace:
2188 raise
2189 else:
2190
2191 raise exc
2192
2194 if isinstance(other, ParserElement):
2195 return self is other or vars(self) == vars(other)
2196 elif isinstance(other, basestring):
2197 return self.matches(other)
2198 else:
2199 return super(ParserElement,self)==other
2200
2202 return not (self == other)
2203
2205 return hash(id(self))
2206
2208 return self == other
2209
2211 return not (self == other)
2212
2213 - def matches(self, testString, parseAll=True):
2214 """
2215 Method for quick testing of a parser against a test string. Good for simple
2216 inline microtests of sub expressions while building up larger parser.
2217
2218 Parameters:
2219 - testString - to test against this expression for a match
2220 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2221
2222 Example::
2223 expr = Word(nums)
2224 assert expr.matches("100")
2225 """
2226 try:
2227 self.parseString(_ustr(testString), parseAll=parseAll)
2228 return True
2229 except ParseBaseException:
2230 return False
2231
2232 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2233 """
2234 Execute the parse expression on a series of test strings, showing each
2235 test, the parsed results or where the parse failed. Quick and easy way to
2236 run a parse expression against a list of sample strings.
2237
2238 Parameters:
2239 - tests - a list of separate test strings, or a multiline string of test strings
2240 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2241 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2242 string; pass None to disable comment filtering
2243 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2244 if False, only dump nested list
2245 - printResults - (default=C{True}) prints test output to stdout
2246 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2247
2248 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2249 (or failed if C{failureTests} is True), and the results contain a list of lines of each
2250 test's output
2251
2252 Example::
2253 number_expr = pyparsing_common.number.copy()
2254
2255 result = number_expr.runTests('''
2256 # unsigned integer
2257 100
2258 # negative integer
2259 -100
2260 # float with scientific notation
2261 6.02e23
2262 # integer with scientific notation
2263 1e-12
2264 ''')
2265 print("Success" if result[0] else "Failed!")
2266
2267 result = number_expr.runTests('''
2268 # stray character
2269 100Z
2270 # missing leading digit before '.'
2271 -.100
2272 # too many '.'
2273 3.14.159
2274 ''', failureTests=True)
2275 print("Success" if result[0] else "Failed!")
2276 prints::
2277 # unsigned integer
2278 100
2279 [100]
2280
2281 # negative integer
2282 -100
2283 [-100]
2284
2285 # float with scientific notation
2286 6.02e23
2287 [6.02e+23]
2288
2289 # integer with scientific notation
2290 1e-12
2291 [1e-12]
2292
2293 Success
2294
2295 # stray character
2296 100Z
2297 ^
2298 FAIL: Expected end of text (at char 3), (line:1, col:4)
2299
2300 # missing leading digit before '.'
2301 -.100
2302 ^
2303 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2304
2305 # too many '.'
2306 3.14.159
2307 ^
2308 FAIL: Expected end of text (at char 4), (line:1, col:5)
2309
2310 Success
2311
2312 Each test string must be on a single line. If you want to test a string that spans multiple
2313 lines, create a test like this::
2314
2315 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2316
2317 (Note that this is a raw string literal, you must include the leading 'r'.)
2318 """
2319 if isinstance(tests, basestring):
2320 tests = list(map(str.strip, tests.rstrip().splitlines()))
2321 if isinstance(comment, basestring):
2322 comment = Literal(comment)
2323 allResults = []
2324 comments = []
2325 success = True
2326 for t in tests:
2327 if comment is not None and comment.matches(t, False) or comments and not t:
2328 comments.append(t)
2329 continue
2330 if not t:
2331 continue
2332 out = ['\n'.join(comments), t]
2333 comments = []
2334 try:
2335 t = t.replace(r'\n','\n')
2336 result = self.parseString(t, parseAll=parseAll)
2337 out.append(result.dump(full=fullDump))
2338 success = success and not failureTests
2339 except ParseBaseException as pe:
2340 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2341 if '\n' in t:
2342 out.append(line(pe.loc, t))
2343 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2344 else:
2345 out.append(' '*pe.loc + '^' + fatal)
2346 out.append("FAIL: " + str(pe))
2347 success = success and failureTests
2348 result = pe
2349 except Exception as exc:
2350 out.append("FAIL-EXCEPTION: " + str(exc))
2351 success = success and failureTests
2352 result = exc
2353
2354 if printResults:
2355 if fullDump:
2356 out.append('')
2357 print('\n'.join(out))
2358
2359 allResults.append((t, result))
2360
2361 return success, allResults
2362
2363
2364 -class Token(ParserElement):
2365 """
2366 Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2367 """
2370
2371
2372 -class Empty(Token):
2373 """
2374 An empty token, will always match.
2375 """
2377 super(Empty,self).__init__()
2378 self.name = "Empty"
2379 self.mayReturnEmpty = True
2380 self.mayIndexError = False
2381
2384 """
2385 A token that will never match.
2386 """
2388 super(NoMatch,self).__init__()
2389 self.name = "NoMatch"
2390 self.mayReturnEmpty = True
2391 self.mayIndexError = False
2392 self.errmsg = "Unmatchable token"
2393
2394 - def parseImpl( self, instring, loc, doActions=True ):
2396
2399 """
2400 Token to exactly match a specified string.
2401
2402 Example::
2403 Literal('blah').parseString('blah') # -> ['blah']
2404 Literal('blah').parseString('blahfooblah') # -> ['blah']
2405 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2406
2407 For case-insensitive matching, use L{CaselessLiteral}.
2408
2409 For keyword matching (force word break before and after the matched string),
2410 use L{Keyword} or L{CaselessKeyword}.
2411 """
2413 super(Literal,self).__init__()
2414 self.match = matchString
2415 self.matchLen = len(matchString)
2416 try:
2417 self.firstMatchChar = matchString[0]
2418 except IndexError:
2419 warnings.warn("null string passed to Literal; use Empty() instead",
2420 SyntaxWarning, stacklevel=2)
2421 self.__class__ = Empty
2422 self.name = '"%s"' % _ustr(self.match)
2423 self.errmsg = "Expected " + self.name
2424 self.mayReturnEmpty = False
2425 self.mayIndexError = False
2426
2427
2428
2429
2430
2431 - def parseImpl( self, instring, loc, doActions=True ):
2432 if (instring[loc] == self.firstMatchChar and
2433 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2434 return loc+self.matchLen, self.match
2435 raise ParseException(instring, loc, self.errmsg, self)
2436 _L = Literal
2437 ParserElement._literalStringClass = Literal
2440 """
2441 Token to exactly match a specified string as a keyword, that is, it must be
2442 immediately followed by a non-keyword character. Compare with C{L{Literal}}:
2443 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2444 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2445 Accepts two optional constructor arguments in addition to the keyword string:
2446 - C{identChars} is a string of characters that would be valid identifier characters,
2447 defaulting to all alphanumerics + "_" and "$"
2448 - C{caseless} allows case-insensitive matching, default is C{False}.
2449
2450 Example::
2451 Keyword("start").parseString("start") # -> ['start']
2452 Keyword("start").parseString("starting") # -> Exception
2453
2454 For case-insensitive matching, use L{CaselessKeyword}.
2455 """
2456 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2457
2458 - def __init__( self, matchString, identChars=None, caseless=False ):
2459 super(Keyword,self).__init__()
2460 if identChars is None:
2461 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2462 self.match = matchString
2463 self.matchLen = len(matchString)
2464 try:
2465 self.firstMatchChar = matchString[0]
2466 except IndexError:
2467 warnings.warn("null string passed to Keyword; use Empty() instead",
2468 SyntaxWarning, stacklevel=2)
2469 self.name = '"%s"' % self.match
2470 self.errmsg = "Expected " + self.name
2471 self.mayReturnEmpty = False
2472 self.mayIndexError = False
2473 self.caseless = caseless
2474 if caseless:
2475 self.caselessmatch = matchString.upper()
2476 identChars = identChars.upper()
2477 self.identChars = set(identChars)
2478
2479 - def parseImpl( self, instring, loc, doActions=True ):
2480 if self.caseless:
2481 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2482 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2483 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2484 return loc+self.matchLen, self.match
2485 else:
2486 if (instring[loc] == self.firstMatchChar and
2487 (self.matchLen==1 or instring.startswith(self.match,loc)) and
2488 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2489 (loc == 0 or instring[loc-1] not in self.identChars) ):
2490 return loc+self.matchLen, self.match
2491 raise ParseException(instring, loc, self.errmsg, self)
2492
2497
2498 @staticmethod
2503
2505 """
2506 Token to match a specified string, ignoring case of letters.
2507 Note: the matched results will always be in the case of the given
2508 match string, NOT the case of the input text.
2509
2510 Example::
2511 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2512
2513 (Contrast with example for L{CaselessKeyword}.)
2514 """
2516 super(CaselessLiteral,self).__init__( matchString.upper() )
2517
2518 self.returnString = matchString
2519 self.name = "'%s'" % self.returnString
2520 self.errmsg = "Expected " + self.name
2521
2522 - def parseImpl( self, instring, loc, doActions=True ):
2523 if instring[ loc:loc+self.matchLen ].upper() == self.match:
2524 return loc+self.matchLen, self.returnString
2525 raise ParseException(instring, loc, self.errmsg, self)
2526
2528 """
2529 Caseless version of L{Keyword}.
2530
2531 Example::
2532 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2533
2534 (Contrast with example for L{CaselessLiteral}.)
2535 """
2536 - def __init__( self, matchString, identChars=None ):
2538
2539 - def parseImpl( self, instring, loc, doActions=True ):
2540 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2541 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2542 return loc+self.matchLen, self.match
2543 raise ParseException(instring, loc, self.errmsg, self)
2544
2546 """
2547 A variation on L{Literal} which matches "close" matches, that is,
2548 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2549 - C{match_string} - string to be matched
2550 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2551
2552 The results from a successful parse will contain the matched text from the input string and the following named results:
2553 - C{mismatches} - a list of the positions within the match_string where mismatches were found
2554 - C{original} - the original match_string used to compare against the input string
2555
2556 If C{mismatches} is an empty list, then the match was an exact match.
2557
2558 Example::
2559 patt = CloseMatch("ATCATCGAATGGA")
2560 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2561 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2562
2563 # exact match
2564 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2565
2566 # close match allowing up to 2 mismatches
2567 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2568 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2569 """
2570 - def __init__(self, match_string, maxMismatches=1):
2571 super(CloseMatch,self).__init__()
2572 self.name = match_string
2573 self.match_string = match_string
2574 self.maxMismatches = maxMismatches
2575 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2576 self.mayIndexError = False
2577 self.mayReturnEmpty = False
2578
2579 - def parseImpl( self, instring, loc, doActions=True ):
2580 start = loc
2581 instrlen = len(instring)
2582 maxloc = start + len(self.match_string)
2583
2584 if maxloc <= instrlen:
2585 match_string = self.match_string
2586 match_stringloc = 0
2587 mismatches = []
2588 maxMismatches = self.maxMismatches
2589
2590 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2591 src,mat = s_m
2592 if src != mat:
2593 mismatches.append(match_stringloc)
2594 if len(mismatches) > maxMismatches:
2595 break
2596 else:
2597 loc = match_stringloc + 1
2598 results = ParseResults([instring[start:loc]])
2599 results['original'] = self.match_string
2600 results['mismatches'] = mismatches
2601 return loc, results
2602
2603 raise ParseException(instring, loc, self.errmsg, self)
2604
2605
2606 -class Word(Token):
2607 """
2608 Token for matching words composed of allowed character sets.
2609 Defined with string containing all allowed initial characters,
2610 an optional string containing allowed body characters (if omitted,
2611 defaults to the initial character set), and an optional minimum,
2612 maximum, and/or exact length. The default value for C{min} is 1 (a
2613 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2614 are 0, meaning no maximum or exact length restriction. An optional
2615 C{excludeChars} parameter can list characters that might be found in
2616 the input C{bodyChars} string; useful to define a word of all printables
2617 except for one or two characters, for instance.
2618
2619 L{srange} is useful for defining custom character set strings for defining
2620 C{Word} expressions, using range notation from regular expression character sets.
2621
2622 A common mistake is to use C{Word} to match a specific literal string, as in
2623 C{Word("Address")}. Remember that C{Word} uses the string argument to define
2624 I{sets} of matchable characters. This expression would match "Add", "AAA",
2625 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2626 To match an exact literal string, use L{Literal} or L{Keyword}.
2627
2628 pyparsing includes helper strings for building Words:
2629 - L{alphas}
2630 - L{nums}
2631 - L{alphanums}
2632 - L{hexnums}
2633 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2634 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2635 - L{printables} (any non-whitespace character)
2636
2637 Example::
2638 # a word composed of digits
2639 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2640
2641 # a word with a leading capital, and zero or more lowercase
2642 capital_word = Word(alphas.upper(), alphas.lower())
2643
2644 # hostnames are alphanumeric, with leading alpha, and '-'
2645 hostname = Word(alphas, alphanums+'-')
2646
2647 # roman numeral (not a strict parser, accepts invalid mix of characters)
2648 roman = Word("IVXLCDM")
2649
2650 # any string of non-whitespace characters, except for ','
2651 csv_value = Word(printables, excludeChars=",")
2652 """
2653 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2654 super(Word,self).__init__()
2655 if excludeChars:
2656 initChars = ''.join(c for c in initChars if c not in excludeChars)
2657 if bodyChars:
2658 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2659 self.initCharsOrig = initChars
2660 self.initChars = set(initChars)
2661 if bodyChars :
2662 self.bodyCharsOrig = bodyChars
2663 self.bodyChars = set(bodyChars)
2664 else:
2665 self.bodyCharsOrig = initChars
2666 self.bodyChars = set(initChars)
2667
2668 self.maxSpecified = max > 0
2669
2670 if min < 1:
2671 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2672
2673 self.minLen = min
2674
2675 if max > 0:
2676 self.maxLen = max
2677 else:
2678 self.maxLen = _MAX_INT
2679
2680 if exact > 0:
2681 self.maxLen = exact
2682 self.minLen = exact
2683
2684 self.name = _ustr(self)
2685 self.errmsg = "Expected " + self.name
2686 self.mayIndexError = False
2687 self.asKeyword = asKeyword
2688
2689 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2690 if self.bodyCharsOrig == self.initCharsOrig:
2691 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2692 elif len(self.initCharsOrig) == 1:
2693 self.reString = "%s[%s]*" % \
2694 (re.escape(self.initCharsOrig),
2695 _escapeRegexRangeChars(self.bodyCharsOrig),)
2696 else:
2697 self.reString = "[%s][%s]*" % \
2698 (_escapeRegexRangeChars(self.initCharsOrig),
2699 _escapeRegexRangeChars(self.bodyCharsOrig),)
2700 if self.asKeyword:
2701 self.reString = r"\b"+self.reString+r"\b"
2702 try:
2703 self.re = re.compile( self.reString )
2704 except Exception:
2705 self.re = None
2706
2707 - def parseImpl( self, instring, loc, doActions=True ):
2708 if self.re:
2709 result = self.re.match(instring,loc)
2710 if not result:
2711 raise ParseException(instring, loc, self.errmsg, self)
2712
2713 loc = result.end()
2714 return loc, result.group()
2715
2716 if not(instring[ loc ] in self.initChars):
2717 raise ParseException(instring, loc, self.errmsg, self)
2718
2719 start = loc
2720 loc += 1
2721 instrlen = len(instring)
2722 bodychars = self.bodyChars
2723 maxloc = start + self.maxLen
2724 maxloc = min( maxloc, instrlen )
2725 while loc < maxloc and instring[loc] in bodychars:
2726 loc += 1
2727
2728 throwException = False
2729 if loc - start < self.minLen:
2730 throwException = True
2731 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2732 throwException = True
2733 if self.asKeyword:
2734 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2735 throwException = True
2736
2737 if throwException:
2738 raise ParseException(instring, loc, self.errmsg, self)
2739
2740 return loc, instring[start:loc]
2741
2743 try:
2744 return super(Word,self).__str__()
2745 except Exception:
2746 pass
2747
2748
2749 if self.strRepr is None:
2750
2751 def charsAsStr(s):
2752 if len(s)>4:
2753 return s[:4]+"..."
2754 else:
2755 return s
2756
2757 if ( self.initCharsOrig != self.bodyCharsOrig ):
2758 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2759 else:
2760 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2761
2762 return self.strRepr
2763
2764
2765 -class Regex(Token):
2766 r"""
2767 Token for matching strings that match a given regular expression.
2768 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2769 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2770 named parse results.
2771
2772 Example::
2773 realnum = Regex(r"[+-]?\d+\.\d*")
2774 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2775 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2776 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2777 """
2778 compiledREtype = type(re.compile("[A-Z]"))
2779 - def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
2780 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2781 super(Regex,self).__init__()
2782
2783 if isinstance(pattern, basestring):
2784 if not pattern:
2785 warnings.warn("null string passed to Regex; use Empty() instead",
2786 SyntaxWarning, stacklevel=2)
2787
2788 self.pattern = pattern
2789 self.flags = flags
2790
2791 try:
2792 self.re = re.compile(self.pattern, self.flags)
2793 self.reString = self.pattern
2794 except sre_constants.error:
2795 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2796 SyntaxWarning, stacklevel=2)
2797 raise
2798
2799 elif isinstance(pattern, Regex.compiledREtype):
2800 self.re = pattern
2801 self.pattern = \
2802 self.reString = str(pattern)
2803 self.flags = flags
2804
2805 else:
2806 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2807
2808 self.name = _ustr(self)
2809 self.errmsg = "Expected " + self.name
2810 self.mayIndexError = False
2811 self.mayReturnEmpty = True
2812 self.asGroupList = asGroupList
2813 self.asMatch = asMatch
2814
2815 - def parseImpl( self, instring, loc, doActions=True ):
2816 result = self.re.match(instring,loc)
2817 if not result:
2818 raise ParseException(instring, loc, self.errmsg, self)
2819
2820 loc = result.end()
2821 d = result.groupdict()
2822 if self.asMatch:
2823 ret = result
2824 elif self.asGroupList:
2825 ret = result.groups()
2826 else:
2827 ret = ParseResults(result.group())
2828 if d:
2829 for k in d:
2830 ret[k] = d[k]
2831 return loc,ret
2832
2834 try:
2835 return super(Regex,self).__str__()
2836 except Exception:
2837 pass
2838
2839 if self.strRepr is None:
2840 self.strRepr = "Re:(%s)" % repr(self.pattern)
2841
2842 return self.strRepr
2843
2844 - def sub(self, repl):
2845 """
2846 Return Regex with an attached parse action to transform the parsed
2847 result as if called using C{re.sub(expr, repl, string)}.
2848 """
2849 if self.asGroupList:
2850 warnings.warn("cannot use sub() with Regex(asGroupList=True)",
2851 SyntaxWarning, stacklevel=2)
2852 raise SyntaxError()
2853
2854 if self.asMatch and callable(repl):
2855 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
2856 SyntaxWarning, stacklevel=2)
2857 raise SyntaxError()
2858
2859 if self.asMatch:
2860 def pa(tokens):
2861 return tokens[0].expand(repl)
2862 else:
2863 def pa(tokens):
2864 return self.re.sub(repl, tokens[0])
2865 return self.addParseAction(pa)
2866
2868 r"""
2869 Token for matching strings that are delimited by quoting characters.
2870
2871 Defined with the following parameters:
2872 - quoteChar - string of one or more characters defining the quote delimiting string
2873 - escChar - character to escape quotes, typically backslash (default=C{None})
2874 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2875 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2876 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2877 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2878 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2879
2880 Example::
2881 qs = QuotedString('"')
2882 print(qs.searchString('lsjdf "This is the quote" sldjf'))
2883 complex_qs = QuotedString('{{', endQuoteChar='}}')
2884 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2885 sql_qs = QuotedString('"', escQuote='""')
2886 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2887 prints::
2888 [['This is the quote']]
2889 [['This is the "quote"']]
2890 [['This is the quote with "embedded" quotes']]
2891 """
2892 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2893 super(QuotedString,self).__init__()
2894
2895
2896 quoteChar = quoteChar.strip()
2897 if not quoteChar:
2898 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2899 raise SyntaxError()
2900
2901 if endQuoteChar is None:
2902 endQuoteChar = quoteChar
2903 else:
2904 endQuoteChar = endQuoteChar.strip()
2905 if not endQuoteChar:
2906 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2907 raise SyntaxError()
2908
2909 self.quoteChar = quoteChar
2910 self.quoteCharLen = len(quoteChar)
2911 self.firstQuoteChar = quoteChar[0]
2912 self.endQuoteChar = endQuoteChar
2913 self.endQuoteCharLen = len(endQuoteChar)
2914 self.escChar = escChar
2915 self.escQuote = escQuote
2916 self.unquoteResults = unquoteResults
2917 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2918
2919 if multiline:
2920 self.flags = re.MULTILINE | re.DOTALL
2921 self.pattern = r'%s(?:[^%s%s]' % \
2922 ( re.escape(self.quoteChar),
2923 _escapeRegexRangeChars(self.endQuoteChar[0]),
2924 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2925 else:
2926 self.flags = 0
2927 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2928 ( re.escape(self.quoteChar),
2929 _escapeRegexRangeChars(self.endQuoteChar[0]),
2930 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2931 if len(self.endQuoteChar) > 1:
2932 self.pattern += (
2933 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2934 _escapeRegexRangeChars(self.endQuoteChar[i]))
2935 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2936 )
2937 if escQuote:
2938 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2939 if escChar:
2940 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2941 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2942 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2943
2944 try:
2945 self.re = re.compile(self.pattern, self.flags)
2946 self.reString = self.pattern
2947 except sre_constants.error:
2948 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2949 SyntaxWarning, stacklevel=2)
2950 raise
2951
2952 self.name = _ustr(self)
2953 self.errmsg = "Expected " + self.name
2954 self.mayIndexError = False
2955 self.mayReturnEmpty = True
2956
2957 - def parseImpl( self, instring, loc, doActions=True ):
2958 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2959 if not result:
2960 raise ParseException(instring, loc, self.errmsg, self)
2961
2962 loc = result.end()
2963 ret = result.group()
2964
2965 if self.unquoteResults:
2966
2967
2968 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2969
2970 if isinstance(ret,basestring):
2971
2972 if '\\' in ret and self.convertWhitespaceEscapes:
2973 ws_map = {
2974 r'\t' : '\t',
2975 r'\n' : '\n',
2976 r'\f' : '\f',
2977 r'\r' : '\r',
2978 }
2979 for wslit,wschar in ws_map.items():
2980 ret = ret.replace(wslit, wschar)
2981
2982
2983 if self.escChar:
2984 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
2985
2986
2987 if self.escQuote:
2988 ret = ret.replace(self.escQuote, self.endQuoteChar)
2989
2990 return loc, ret
2991
2993 try:
2994 return super(QuotedString,self).__str__()
2995 except Exception:
2996 pass
2997
2998 if self.strRepr is None:
2999 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3000
3001 return self.strRepr
3002
3005 """
3006 Token for matching words composed of characters I{not} in a given set (will
3007 include whitespace in matched characters if not listed in the provided exclusion set - see example).
3008 Defined with string containing all disallowed characters, and an optional
3009 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
3010 minimum value < 1 is not valid); the default values for C{max} and C{exact}
3011 are 0, meaning no maximum or exact length restriction.
3012
3013 Example::
3014 # define a comma-separated-value as anything that is not a ','
3015 csv_value = CharsNotIn(',')
3016 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3017 prints::
3018 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3019 """
3020 - def __init__( self, notChars, min=1, max=0, exact=0 ):
3021 super(CharsNotIn,self).__init__()
3022 self.skipWhitespace = False
3023 self.notChars = notChars
3024
3025 if min < 1:
3026 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
3027
3028 self.minLen = min
3029
3030 if max > 0:
3031 self.maxLen = max
3032 else:
3033 self.maxLen = _MAX_INT
3034
3035 if exact > 0:
3036 self.maxLen = exact
3037 self.minLen = exact
3038
3039 self.name = _ustr(self)
3040 self.errmsg = "Expected " + self.name
3041 self.mayReturnEmpty = ( self.minLen == 0 )
3042 self.mayIndexError = False
3043
3044 - def parseImpl( self, instring, loc, doActions=True ):
3045 if instring[loc] in self.notChars:
3046 raise ParseException(instring, loc, self.errmsg, self)
3047
3048 start = loc
3049 loc += 1
3050 notchars = self.notChars
3051 maxlen = min( start+self.maxLen, len(instring) )
3052 while loc < maxlen and \
3053 (instring[loc] not in notchars):
3054 loc += 1
3055
3056 if loc - start < self.minLen:
3057 raise ParseException(instring, loc, self.errmsg, self)
3058
3059 return loc, instring[start:loc]
3060
3062 try:
3063 return super(CharsNotIn, self).__str__()
3064 except Exception:
3065 pass
3066
3067 if self.strRepr is None:
3068 if len(self.notChars) > 4:
3069 self.strRepr = "!W:(%s...)" % self.notChars[:4]
3070 else:
3071 self.strRepr = "!W:(%s)" % self.notChars
3072
3073 return self.strRepr
3074
3076 """
3077 Special matching class for matching whitespace. Normally, whitespace is ignored
3078 by pyparsing grammars. This class is included when some whitespace structures
3079 are significant. Define with a string containing the whitespace characters to be
3080 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
3081 as defined for the C{L{Word}} class.
3082 """
3083 whiteStrs = {
3084 " " : "<SPC>",
3085 "\t": "<TAB>",
3086 "\n": "<LF>",
3087 "\r": "<CR>",
3088 "\f": "<FF>",
3089 }
3090 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3091 super(White,self).__init__()
3092 self.matchWhite = ws
3093 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3094
3095 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3096 self.mayReturnEmpty = True
3097 self.errmsg = "Expected " + self.name
3098
3099 self.minLen = min
3100
3101 if max > 0:
3102 self.maxLen = max
3103 else:
3104 self.maxLen = _MAX_INT
3105
3106 if exact > 0:
3107 self.maxLen = exact
3108 self.minLen = exact
3109
3110 - def parseImpl( self, instring, loc, doActions=True ):
3111 if not(instring[ loc ] in self.matchWhite):
3112 raise ParseException(instring, loc, self.errmsg, self)
3113 start = loc
3114 loc += 1
3115 maxloc = start + self.maxLen
3116 maxloc = min( maxloc, len(instring) )
3117 while loc < maxloc and instring[loc] in self.matchWhite:
3118 loc += 1
3119
3120 if loc - start < self.minLen:
3121 raise ParseException(instring, loc, self.errmsg, self)
3122
3123 return loc, instring[start:loc]
3124
3128 super(_PositionToken,self).__init__()
3129 self.name=self.__class__.__name__
3130 self.mayReturnEmpty = True
3131 self.mayIndexError = False
3132
3134 """
3135 Token to advance to a specific column of input text; useful for tabular report scraping.
3136 """
3140
3142 if col(loc,instring) != self.col:
3143 instrlen = len(instring)
3144 if self.ignoreExprs:
3145 loc = self._skipIgnorables( instring, loc )
3146 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3147 loc += 1
3148 return loc
3149
3150 - def parseImpl( self, instring, loc, doActions=True ):
3151 thiscol = col( loc, instring )
3152 if thiscol > self.col:
3153 raise ParseException( instring, loc, "Text not in expected column", self )
3154 newloc = loc + self.col - thiscol
3155 ret = instring[ loc: newloc ]
3156 return newloc, ret
3157
3160 """
3161 Matches if current position is at the beginning of a line within the parse string
3162
3163 Example::
3164
3165 test = '''\
3166 AAA this line
3167 AAA and this line
3168 AAA but not this one
3169 B AAA and definitely not this one
3170 '''
3171
3172 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3173 print(t)
3174
3175 Prints::
3176 ['AAA', ' this line']
3177 ['AAA', ' and this line']
3178
3179 """
3183
3184 - def parseImpl( self, instring, loc, doActions=True ):
3185 if col(loc, instring) == 1:
3186 return loc, []
3187 raise ParseException(instring, loc, self.errmsg, self)
3188
3190 """
3191 Matches if current position is at the end of a line within the parse string
3192 """
3197
3198 - def parseImpl( self, instring, loc, doActions=True ):
3199 if loc<len(instring):
3200 if instring[loc] == "\n":
3201 return loc+1, "\n"
3202 else:
3203 raise ParseException(instring, loc, self.errmsg, self)
3204 elif loc == len(instring):
3205 return loc+1, []
3206 else:
3207 raise ParseException(instring, loc, self.errmsg, self)
3208
3210 """
3211 Matches if current position is at the beginning of the parse string
3212 """
3216
3217 - def parseImpl( self, instring, loc, doActions=True ):
3218 if loc != 0:
3219
3220 if loc != self.preParse( instring, 0 ):
3221 raise ParseException(instring, loc, self.errmsg, self)
3222 return loc, []
3223
3225 """
3226 Matches if current position is at the end of the parse string
3227 """
3231
3232 - def parseImpl( self, instring, loc, doActions=True ):
3233 if loc < len(instring):
3234 raise ParseException(instring, loc, self.errmsg, self)
3235 elif loc == len(instring):
3236 return loc+1, []
3237 elif loc > len(instring):
3238 return loc, []
3239 else:
3240 raise ParseException(instring, loc, self.errmsg, self)
3241
3243 """
3244 Matches if the current position is at the beginning of a Word, and
3245 is not preceded by any character in a given set of C{wordChars}
3246 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3247 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3248 the string being parsed, or at the beginning of a line.
3249 """
3251 super(WordStart,self).__init__()
3252 self.wordChars = set(wordChars)
3253 self.errmsg = "Not at the start of a word"
3254
3255 - def parseImpl(self, instring, loc, doActions=True ):
3256 if loc != 0:
3257 if (instring[loc-1] in self.wordChars or
3258 instring[loc] not in self.wordChars):
3259 raise ParseException(instring, loc, self.errmsg, self)
3260 return loc, []
3261
3263 """
3264 Matches if the current position is at the end of a Word, and
3265 is not followed by any character in a given set of C{wordChars}
3266 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3267 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3268 the string being parsed, or at the end of a line.
3269 """
3271 super(WordEnd,self).__init__()
3272 self.wordChars = set(wordChars)
3273 self.skipWhitespace = False
3274 self.errmsg = "Not at the end of a word"
3275
3276 - def parseImpl(self, instring, loc, doActions=True ):
3277 instrlen = len(instring)
3278 if instrlen>0 and loc<instrlen:
3279 if (instring[loc] in self.wordChars or
3280 instring[loc-1] not in self.wordChars):
3281 raise ParseException(instring, loc, self.errmsg, self)
3282 return loc, []
3283
3286 """
3287 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3288 """
3289 - def __init__( self, exprs, savelist = False ):
3290 super(ParseExpression,self).__init__(savelist)
3291 if isinstance( exprs, _generatorType ):
3292 exprs = list(exprs)
3293
3294 if isinstance( exprs, basestring ):
3295 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3296 elif isinstance( exprs, Iterable ):
3297 exprs = list(exprs)
3298
3299 if all(isinstance(expr, basestring) for expr in exprs):
3300 exprs = map(ParserElement._literalStringClass, exprs)
3301 self.exprs = list(exprs)
3302 else:
3303 try:
3304 self.exprs = list( exprs )
3305 except TypeError:
3306 self.exprs = [ exprs ]
3307 self.callPreparse = False
3308
3310 return self.exprs[i]
3311
3313 self.exprs.append( other )
3314 self.strRepr = None
3315 return self
3316
3318 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3319 all contained expressions."""
3320 self.skipWhitespace = False
3321 self.exprs = [ e.copy() for e in self.exprs ]
3322 for e in self.exprs:
3323 e.leaveWhitespace()
3324 return self
3325
3327 if isinstance( other, Suppress ):
3328 if other not in self.ignoreExprs:
3329 super( ParseExpression, self).ignore( other )
3330 for e in self.exprs:
3331 e.ignore( self.ignoreExprs[-1] )
3332 else:
3333 super( ParseExpression, self).ignore( other )
3334 for e in self.exprs:
3335 e.ignore( self.ignoreExprs[-1] )
3336 return self
3337
3339 try:
3340 return super(ParseExpression,self).__str__()
3341 except Exception:
3342 pass
3343
3344 if self.strRepr is None:
3345 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3346 return self.strRepr
3347
3349 super(ParseExpression,self).streamline()
3350
3351 for e in self.exprs:
3352 e.streamline()
3353
3354
3355
3356
3357 if ( len(self.exprs) == 2 ):
3358 other = self.exprs[0]
3359 if ( isinstance( other, self.__class__ ) and
3360 not(other.parseAction) and
3361 other.resultsName is None and
3362 not other.debug ):
3363 self.exprs = other.exprs[:] + [ self.exprs[1] ]
3364 self.strRepr = None
3365 self.mayReturnEmpty |= other.mayReturnEmpty
3366 self.mayIndexError |= other.mayIndexError
3367
3368 other = self.exprs[-1]
3369 if ( isinstance( other, self.__class__ ) and
3370 not(other.parseAction) and
3371 other.resultsName is None and
3372 not other.debug ):
3373 self.exprs = self.exprs[:-1] + other.exprs[:]
3374 self.strRepr = None
3375 self.mayReturnEmpty |= other.mayReturnEmpty
3376 self.mayIndexError |= other.mayIndexError
3377
3378 self.errmsg = "Expected " + _ustr(self)
3379
3380 return self
3381
3385
3386 - def validate( self, validateTrace=[] ):
3387 tmp = validateTrace[:]+[self]
3388 for e in self.exprs:
3389 e.validate(tmp)
3390 self.checkRecursion( [] )
3391
3396
3397 -class And(ParseExpression):
3398 """
3399 Requires all given C{ParseExpression}s to be found in the given order.
3400 Expressions may be separated by whitespace.
3401 May be constructed using the C{'+'} operator.
3402 May also be constructed using the C{'-'} operator, which will suppress backtracking.
3403
3404 Example::
3405 integer = Word(nums)
3406 name_expr = OneOrMore(Word(alphas))
3407
3408 expr = And([integer("id"),name_expr("name"),integer("age")])
3409 # more easily written as:
3410 expr = integer("id") + name_expr("name") + integer("age")
3411 """
3412
3418
3419 - def __init__( self, exprs, savelist = True ):
3420 super(And,self).__init__(exprs, savelist)
3421 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3422 self.setWhitespaceChars( self.exprs[0].whiteChars )
3423 self.skipWhitespace = self.exprs[0].skipWhitespace
3424 self.callPreparse = True
3425
3426 - def parseImpl( self, instring, loc, doActions=True ):
3427
3428
3429 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3430 errorStop = False
3431 for e in self.exprs[1:]:
3432 if isinstance(e, And._ErrorStop):
3433 errorStop = True
3434 continue
3435 if errorStop:
3436 try:
3437 loc, exprtokens = e._parse( instring, loc, doActions )
3438 except ParseSyntaxException:
3439 raise
3440 except ParseBaseException as pe:
3441 pe.__traceback__ = None
3442 raise ParseSyntaxException._from_exception(pe)
3443 except IndexError:
3444 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3445 else:
3446 loc, exprtokens = e._parse( instring, loc, doActions )
3447 if exprtokens or exprtokens.haskeys():
3448 resultlist += exprtokens
3449 return loc, resultlist
3450
3452 if isinstance( other, basestring ):
3453 other = ParserElement._literalStringClass( other )
3454 return self.append( other )
3455
3457 subRecCheckList = parseElementList[:] + [ self ]
3458 for e in self.exprs:
3459 e.checkRecursion( subRecCheckList )
3460 if not e.mayReturnEmpty:
3461 break
3462
3464 if hasattr(self,"name"):
3465 return self.name
3466
3467 if self.strRepr is None:
3468 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3469
3470 return self.strRepr
3471
3472
3473 -class Or(ParseExpression):
3474 """
3475 Requires that at least one C{ParseExpression} is found.
3476 If two expressions match, the expression that matches the longest string will be used.
3477 May be constructed using the C{'^'} operator.
3478
3479 Example::
3480 # construct Or using '^' operator
3481
3482 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3483 print(number.searchString("123 3.1416 789"))
3484 prints::
3485 [['123'], ['3.1416'], ['789']]
3486 """
3487 - def __init__( self, exprs, savelist = False ):
3488 super(Or,self).__init__(exprs, savelist)
3489 if self.exprs:
3490 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3491 else:
3492 self.mayReturnEmpty = True
3493
3494 - def parseImpl( self, instring, loc, doActions=True ):
3495 maxExcLoc = -1
3496 maxException = None
3497 matches = []
3498 for e in self.exprs:
3499 try:
3500 loc2 = e.tryParse( instring, loc )
3501 except ParseException as err:
3502 err.__traceback__ = None
3503 if err.loc > maxExcLoc:
3504 maxException = err
3505 maxExcLoc = err.loc
3506 except IndexError:
3507 if len(instring) > maxExcLoc:
3508 maxException = ParseException(instring,len(instring),e.errmsg,self)
3509 maxExcLoc = len(instring)
3510 else:
3511
3512 matches.append((loc2, e))
3513
3514 if matches:
3515 matches.sort(key=lambda x: -x[0])
3516 for _,e in matches:
3517 try:
3518 return e._parse( instring, loc, doActions )
3519 except ParseException as err:
3520 err.__traceback__ = None
3521 if err.loc > maxExcLoc:
3522 maxException = err
3523 maxExcLoc = err.loc
3524
3525 if maxException is not None:
3526 maxException.msg = self.errmsg
3527 raise maxException
3528 else:
3529 raise ParseException(instring, loc, "no defined alternatives to match", self)
3530
3531
3533 if isinstance( other, basestring ):
3534 other = ParserElement._literalStringClass( other )
3535 return self.append( other )
3536
3538 if hasattr(self,"name"):
3539 return self.name
3540
3541 if self.strRepr is None:
3542 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3543
3544 return self.strRepr
3545
3547 subRecCheckList = parseElementList[:] + [ self ]
3548 for e in self.exprs:
3549 e.checkRecursion( subRecCheckList )
3550
3553 """
3554 Requires that at least one C{ParseExpression} is found.
3555 If two expressions match, the first one listed is the one that will match.
3556 May be constructed using the C{'|'} operator.
3557
3558 Example::
3559 # construct MatchFirst using '|' operator
3560
3561 # watch the order of expressions to match
3562 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3563 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3564
3565 # put more selective expression first
3566 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3567 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3568 """
3569 - def __init__( self, exprs, savelist = False ):
3570 super(MatchFirst,self).__init__(exprs, savelist)
3571 if self.exprs:
3572 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3573 else:
3574 self.mayReturnEmpty = True
3575
3576 - def parseImpl( self, instring, loc, doActions=True ):
3577 maxExcLoc = -1
3578 maxException = None
3579 for e in self.exprs:
3580 try:
3581 ret = e._parse( instring, loc, doActions )
3582 return ret
3583 except ParseException as err:
3584 if err.loc > maxExcLoc:
3585 maxException = err
3586 maxExcLoc = err.loc
3587 except IndexError:
3588 if len(instring) > maxExcLoc:
3589 maxException = ParseException(instring,len(instring),e.errmsg,self)
3590 maxExcLoc = len(instring)
3591
3592
3593 else:
3594 if maxException is not None:
3595 maxException.msg = self.errmsg
3596 raise maxException
3597 else:
3598 raise ParseException(instring, loc, "no defined alternatives to match", self)
3599
3601 if isinstance( other, basestring ):
3602 other = ParserElement._literalStringClass( other )
3603 return self.append( other )
3604
3606 if hasattr(self,"name"):
3607 return self.name
3608
3609 if self.strRepr is None:
3610 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3611
3612 return self.strRepr
3613
3615 subRecCheckList = parseElementList[:] + [ self ]
3616 for e in self.exprs:
3617 e.checkRecursion( subRecCheckList )
3618
3619
3620 -class Each(ParseExpression):
3621 """
3622 Requires all given C{ParseExpression}s to be found, but in any order.
3623 Expressions may be separated by whitespace.
3624 May be constructed using the C{'&'} operator.
3625
3626 Example::
3627 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3628 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3629 integer = Word(nums)
3630 shape_attr = "shape:" + shape_type("shape")
3631 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3632 color_attr = "color:" + color("color")
3633 size_attr = "size:" + integer("size")
3634
3635 # use Each (using operator '&') to accept attributes in any order
3636 # (shape and posn are required, color and size are optional)
3637 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3638
3639 shape_spec.runTests('''
3640 shape: SQUARE color: BLACK posn: 100, 120
3641 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3642 color:GREEN size:20 shape:TRIANGLE posn:20,40
3643 '''
3644 )
3645 prints::
3646 shape: SQUARE color: BLACK posn: 100, 120
3647 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3648 - color: BLACK
3649 - posn: ['100', ',', '120']
3650 - x: 100
3651 - y: 120
3652 - shape: SQUARE
3653
3654
3655 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3656 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3657 - color: BLUE
3658 - posn: ['50', ',', '80']
3659 - x: 50
3660 - y: 80
3661 - shape: CIRCLE
3662 - size: 50
3663
3664
3665 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3666 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3667 - color: GREEN
3668 - posn: ['20', ',', '40']
3669 - x: 20
3670 - y: 40
3671 - shape: TRIANGLE
3672 - size: 20
3673 """
3674 - def __init__( self, exprs, savelist = True ):
3675 super(Each,self).__init__(exprs, savelist)
3676 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3677 self.skipWhitespace = True
3678 self.initExprGroups = True
3679
3680 - def parseImpl( self, instring, loc, doActions=True ):
3681 if self.initExprGroups:
3682 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3683 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3684 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3685 self.optionals = opt1 + opt2
3686 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3687 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3688 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3689 self.required += self.multirequired
3690 self.initExprGroups = False
3691 tmpLoc = loc
3692 tmpReqd = self.required[:]
3693 tmpOpt = self.optionals[:]
3694 matchOrder = []
3695
3696 keepMatching = True
3697 while keepMatching:
3698 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3699 failed = []
3700 for e in tmpExprs:
3701 try:
3702 tmpLoc = e.tryParse( instring, tmpLoc )
3703 except ParseException:
3704 failed.append(e)
3705 else:
3706 matchOrder.append(self.opt1map.get(id(e),e))
3707 if e in tmpReqd:
3708 tmpReqd.remove(e)
3709 elif e in tmpOpt:
3710 tmpOpt.remove(e)
3711 if len(failed) == len(tmpExprs):
3712 keepMatching = False
3713
3714 if tmpReqd:
3715 missing = ", ".join(_ustr(e) for e in tmpReqd)
3716 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3717
3718
3719 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3720
3721 resultlist = []
3722 for e in matchOrder:
3723 loc,results = e._parse(instring,loc,doActions)
3724 resultlist.append(results)
3725
3726 finalResults = sum(resultlist, ParseResults([]))
3727 return loc, finalResults
3728
3730 if hasattr(self,"name"):
3731 return self.name
3732
3733 if self.strRepr is None:
3734 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3735
3736 return self.strRepr
3737
3739 subRecCheckList = parseElementList[:] + [ self ]
3740 for e in self.exprs:
3741 e.checkRecursion( subRecCheckList )
3742
3745 """
3746 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3747 """
3748 - def __init__( self, expr, savelist=False ):
3749 super(ParseElementEnhance,self).__init__(savelist)
3750 if isinstance( expr, basestring ):
3751 if issubclass(ParserElement._literalStringClass, Token):
3752 expr = ParserElement._literalStringClass(expr)
3753 else:
3754 expr = ParserElement._literalStringClass(Literal(expr))
3755 self.expr = expr
3756 self.strRepr = None
3757 if expr is not None:
3758 self.mayIndexError = expr.mayIndexError
3759 self.mayReturnEmpty = expr.mayReturnEmpty
3760 self.setWhitespaceChars( expr.whiteChars )
3761 self.skipWhitespace = expr.skipWhitespace
3762 self.saveAsList = expr.saveAsList
3763 self.callPreparse = expr.callPreparse
3764 self.ignoreExprs.extend(expr.ignoreExprs)
3765
3766 - def parseImpl( self, instring, loc, doActions=True ):
3767 if self.expr is not None:
3768 return self.expr._parse( instring, loc, doActions, callPreParse=False )
3769 else:
3770 raise ParseException("",loc,self.errmsg,self)
3771
3773 self.skipWhitespace = False
3774 self.expr = self.expr.copy()
3775 if self.expr is not None:
3776 self.expr.leaveWhitespace()
3777 return self
3778
3780 if isinstance( other, Suppress ):
3781 if other not in self.ignoreExprs:
3782 super( ParseElementEnhance, self).ignore( other )
3783 if self.expr is not None:
3784 self.expr.ignore( self.ignoreExprs[-1] )
3785 else:
3786 super( ParseElementEnhance, self).ignore( other )
3787 if self.expr is not None:
3788 self.expr.ignore( self.ignoreExprs[-1] )
3789 return self
3790
3796
3798 if self in parseElementList:
3799 raise RecursiveGrammarException( parseElementList+[self] )
3800 subRecCheckList = parseElementList[:] + [ self ]
3801 if self.expr is not None:
3802 self.expr.checkRecursion( subRecCheckList )
3803
3804 - def validate( self, validateTrace=[] ):
3805 tmp = validateTrace[:]+[self]
3806 if self.expr is not None:
3807 self.expr.validate(tmp)
3808 self.checkRecursion( [] )
3809
3811 try:
3812 return super(ParseElementEnhance,self).__str__()
3813 except Exception:
3814 pass
3815
3816 if self.strRepr is None and self.expr is not None:
3817 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3818 return self.strRepr
3819
3822 """
3823 Lookahead matching of the given parse expression. C{FollowedBy}
3824 does I{not} advance the parsing position within the input string, it only
3825 verifies that the specified parse expression matches at the current
3826 position. C{FollowedBy} always returns a null token list.
3827
3828 Example::
3829 # use FollowedBy to match a label only if it is followed by a ':'
3830 data_word = Word(alphas)
3831 label = data_word + FollowedBy(':')
3832 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3833
3834 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3835 prints::
3836 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3837 """
3841
3842 - def parseImpl( self, instring, loc, doActions=True ):
3843 self.expr.tryParse( instring, loc )
3844 return loc, []
3845
3846
3847 -class NotAny(ParseElementEnhance):
3848 """
3849 Lookahead to disallow matching with the given parse expression. C{NotAny}
3850 does I{not} advance the parsing position within the input string, it only
3851 verifies that the specified parse expression does I{not} match at the current
3852 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3853 always returns a null token list. May be constructed using the '~' operator.
3854
3855 Example::
3856
3857 """
3859 super(NotAny,self).__init__(expr)
3860
3861 self.skipWhitespace = False
3862 self.mayReturnEmpty = True
3863 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3864
3865 - def parseImpl( self, instring, loc, doActions=True ):
3869
3871 if hasattr(self,"name"):
3872 return self.name
3873
3874 if self.strRepr is None:
3875 self.strRepr = "~{" + _ustr(self.expr) + "}"
3876
3877 return self.strRepr
3878
3880 - def __init__( self, expr, stopOn=None):
3881 super(_MultipleMatch, self).__init__(expr)
3882 self.saveAsList = True
3883 ender = stopOn
3884 if isinstance(ender, basestring):
3885 ender = ParserElement._literalStringClass(ender)
3886 self.not_ender = ~ender if ender is not None else None
3887
3888 - def parseImpl( self, instring, loc, doActions=True ):
3889 self_expr_parse = self.expr._parse
3890 self_skip_ignorables = self._skipIgnorables
3891 check_ender = self.not_ender is not None
3892 if check_ender:
3893 try_not_ender = self.not_ender.tryParse
3894
3895
3896
3897 if check_ender:
3898 try_not_ender(instring, loc)
3899 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3900 try:
3901 hasIgnoreExprs = (not not self.ignoreExprs)
3902 while 1:
3903 if check_ender:
3904 try_not_ender(instring, loc)
3905 if hasIgnoreExprs:
3906 preloc = self_skip_ignorables( instring, loc )
3907 else:
3908 preloc = loc
3909 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3910 if tmptokens or tmptokens.haskeys():
3911 tokens += tmptokens
3912 except (ParseException,IndexError):
3913 pass
3914
3915 return loc, tokens
3916
3918 """
3919 Repetition of one or more of the given expression.
3920
3921 Parameters:
3922 - expr - expression that must match one or more times
3923 - stopOn - (default=C{None}) - expression for a terminating sentinel
3924 (only required if the sentinel would ordinarily match the repetition
3925 expression)
3926
3927 Example::
3928 data_word = Word(alphas)
3929 label = data_word + FollowedBy(':')
3930 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3931
3932 text = "shape: SQUARE posn: upper left color: BLACK"
3933 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3934
3935 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3936 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3937 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3938
3939 # could also be written as
3940 (attr_expr * (1,)).parseString(text).pprint()
3941 """
3942
3944 if hasattr(self,"name"):
3945 return self.name
3946
3947 if self.strRepr is None:
3948 self.strRepr = "{" + _ustr(self.expr) + "}..."
3949
3950 return self.strRepr
3951
3953 """
3954 Optional repetition of zero or more of the given expression.
3955
3956 Parameters:
3957 - expr - expression that must match zero or more times
3958 - stopOn - (default=C{None}) - expression for a terminating sentinel
3959 (only required if the sentinel would ordinarily match the repetition
3960 expression)
3961
3962 Example: similar to L{OneOrMore}
3963 """
3964 - def __init__( self, expr, stopOn=None):
3967
3968 - def parseImpl( self, instring, loc, doActions=True ):
3973
3975 if hasattr(self,"name"):
3976 return self.name
3977
3978 if self.strRepr is None:
3979 self.strRepr = "[" + _ustr(self.expr) + "]..."
3980
3981 return self.strRepr
3982
3989
3990 _optionalNotMatched = _NullToken()
3992 """
3993 Optional matching of the given expression.
3994
3995 Parameters:
3996 - expr - expression that must match zero or more times
3997 - default (optional) - value to be returned if the optional expression is not found.
3998
3999 Example::
4000 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4001 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4002 zip.runTests('''
4003 # traditional ZIP code
4004 12345
4005
4006 # ZIP+4 form
4007 12101-0001
4008
4009 # invalid ZIP
4010 98765-
4011 ''')
4012 prints::
4013 # traditional ZIP code
4014 12345
4015 ['12345']
4016
4017 # ZIP+4 form
4018 12101-0001
4019 ['12101-0001']
4020
4021 # invalid ZIP
4022 98765-
4023 ^
4024 FAIL: Expected end of text (at char 5), (line:1, col:6)
4025 """
4027 super(Optional,self).__init__( expr, savelist=False )
4028 self.saveAsList = self.expr.saveAsList
4029 self.defaultValue = default
4030 self.mayReturnEmpty = True
4031
4032 - def parseImpl( self, instring, loc, doActions=True ):
4033 try:
4034 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
4035 except (ParseException,IndexError):
4036 if self.defaultValue is not _optionalNotMatched:
4037 if self.expr.resultsName:
4038 tokens = ParseResults([ self.defaultValue ])
4039 tokens[self.expr.resultsName] = self.defaultValue
4040 else:
4041 tokens = [ self.defaultValue ]
4042 else:
4043 tokens = []
4044 return loc, tokens
4045
4047 if hasattr(self,"name"):
4048 return self.name
4049
4050 if self.strRepr is None:
4051 self.strRepr = "[" + _ustr(self.expr) + "]"
4052
4053 return self.strRepr
4054
4055 -class SkipTo(ParseElementEnhance):
4056 """
4057 Token for skipping over all undefined text until the matched expression is found.
4058
4059 Parameters:
4060 - expr - target expression marking the end of the data to be skipped
4061 - include - (default=C{False}) if True, the target expression is also parsed
4062 (the skipped text and target expression are returned as a 2-element list).
4063 - ignore - (default=C{None}) used to define grammars (typically quoted strings and
4064 comments) that might contain false matches to the target expression
4065 - failOn - (default=C{None}) define expressions that are not allowed to be
4066 included in the skipped test; if found before the target expression is found,
4067 the SkipTo is not a match
4068
4069 Example::
4070 report = '''
4071 Outstanding Issues Report - 1 Jan 2000
4072
4073 # | Severity | Description | Days Open
4074 -----+----------+-------------------------------------------+-----------
4075 101 | Critical | Intermittent system crash | 6
4076 94 | Cosmetic | Spelling error on Login ('log|n') | 14
4077 79 | Minor | System slow when running too many reports | 47
4078 '''
4079 integer = Word(nums)
4080 SEP = Suppress('|')
4081 # use SkipTo to simply match everything up until the next SEP
4082 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4083 # - parse action will call token.strip() for each matched token, i.e., the description body
4084 string_data = SkipTo(SEP, ignore=quotedString)
4085 string_data.setParseAction(tokenMap(str.strip))
4086 ticket_expr = (integer("issue_num") + SEP
4087 + string_data("sev") + SEP
4088 + string_data("desc") + SEP
4089 + integer("days_open"))
4090
4091 for tkt in ticket_expr.searchString(report):
4092 print tkt.dump()
4093 prints::
4094 ['101', 'Critical', 'Intermittent system crash', '6']
4095 - days_open: 6
4096 - desc: Intermittent system crash
4097 - issue_num: 101
4098 - sev: Critical
4099 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4100 - days_open: 14
4101 - desc: Spelling error on Login ('log|n')
4102 - issue_num: 94
4103 - sev: Cosmetic
4104 ['79', 'Minor', 'System slow when running too many reports', '47']
4105 - days_open: 47
4106 - desc: System slow when running too many reports
4107 - issue_num: 79
4108 - sev: Minor
4109 """
4110 - def __init__( self, other, include=False, ignore=None, failOn=None ):
4111 super( SkipTo, self ).__init__( other )
4112 self.ignoreExpr = ignore
4113 self.mayReturnEmpty = True
4114 self.mayIndexError = False
4115 self.includeMatch = include
4116 self.saveAsList = False
4117 if isinstance(failOn, basestring):
4118 self.failOn = ParserElement._literalStringClass(failOn)
4119 else:
4120 self.failOn = failOn
4121 self.errmsg = "No match found for "+_ustr(self.expr)
4122
4123 - def parseImpl( self, instring, loc, doActions=True ):
4124 startloc = loc
4125 instrlen = len(instring)
4126 expr = self.expr
4127 expr_parse = self.expr._parse
4128 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4129 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4130
4131 tmploc = loc
4132 while tmploc <= instrlen:
4133 if self_failOn_canParseNext is not None:
4134
4135 if self_failOn_canParseNext(instring, tmploc):
4136 break
4137
4138 if self_ignoreExpr_tryParse is not None:
4139
4140 while 1:
4141 try:
4142 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4143 except ParseBaseException:
4144 break
4145
4146 try:
4147 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4148 except (ParseException, IndexError):
4149
4150 tmploc += 1
4151 else:
4152
4153 break
4154
4155 else:
4156
4157 raise ParseException(instring, loc, self.errmsg, self)
4158
4159
4160 loc = tmploc
4161 skiptext = instring[startloc:loc]
4162 skipresult = ParseResults(skiptext)
4163
4164 if self.includeMatch:
4165 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4166 skipresult += mat
4167
4168 return loc, skipresult
4169
4170 -class Forward(ParseElementEnhance):
4171 """
4172 Forward declaration of an expression to be defined later -
4173 used for recursive grammars, such as algebraic infix notation.
4174 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4175
4176 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4177 Specifically, '|' has a lower precedence than '<<', so that::
4178 fwdExpr << a | b | c
4179 will actually be evaluated as::
4180 (fwdExpr << a) | b | c
4181 thereby leaving b and c out as parseable alternatives. It is recommended that you
4182 explicitly group the values inserted into the C{Forward}::
4183 fwdExpr << (a | b | c)
4184 Converting to use the '<<=' operator instead will avoid this problem.
4185
4186 See L{ParseResults.pprint} for an example of a recursive parser created using
4187 C{Forward}.
4188 """
4191
4193 if isinstance( other, basestring ):
4194 other = ParserElement._literalStringClass(other)
4195 self.expr = other
4196 self.strRepr = None
4197 self.mayIndexError = self.expr.mayIndexError
4198 self.mayReturnEmpty = self.expr.mayReturnEmpty
4199 self.setWhitespaceChars( self.expr.whiteChars )
4200 self.skipWhitespace = self.expr.skipWhitespace
4201 self.saveAsList = self.expr.saveAsList
4202 self.ignoreExprs.extend(self.expr.ignoreExprs)
4203 return self
4204
4206 return self << other
4207
4209 self.skipWhitespace = False
4210 return self
4211
4213 if not self.streamlined:
4214 self.streamlined = True
4215 if self.expr is not None:
4216 self.expr.streamline()
4217 return self
4218
4219 - def validate( self, validateTrace=[] ):
4220 if self not in validateTrace:
4221 tmp = validateTrace[:]+[self]
4222 if self.expr is not None:
4223 self.expr.validate(tmp)
4224 self.checkRecursion([])
4225
4227 if hasattr(self,"name"):
4228 return self.name
4229 return self.__class__.__name__ + ": ..."
4230
4231
4232 self._revertClass = self.__class__
4233 self.__class__ = _ForwardNoRecurse
4234 try:
4235 if self.expr is not None:
4236 retString = _ustr(self.expr)
4237 else:
4238 retString = "None"
4239 finally:
4240 self.__class__ = self._revertClass
4241 return self.__class__.__name__ + ": " + retString
4242
4244 if self.expr is not None:
4245 return super(Forward,self).copy()
4246 else:
4247 ret = Forward()
4248 ret <<= self
4249 return ret
4250
4254
4256 """
4257 Abstract subclass of C{ParseExpression}, for converting parsed results.
4258 """
4259 - def __init__( self, expr, savelist=False ):
4262
4264 """
4265 Converter to concatenate all matching tokens to a single string.
4266 By default, the matching patterns must also be contiguous in the input string;
4267 this can be disabled by specifying C{'adjacent=False'} in the constructor.
4268
4269 Example::
4270 real = Word(nums) + '.' + Word(nums)
4271 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4272 # will also erroneously match the following
4273 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4274
4275 real = Combine(Word(nums) + '.' + Word(nums))
4276 print(real.parseString('3.1416')) # -> ['3.1416']
4277 # no match when there are internal spaces
4278 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4279 """
4280 - def __init__( self, expr, joinString="", adjacent=True ):
4281 super(Combine,self).__init__( expr )
4282
4283 if adjacent:
4284 self.leaveWhitespace()
4285 self.adjacent = adjacent
4286 self.skipWhitespace = True
4287 self.joinString = joinString
4288 self.callPreparse = True
4289
4296
4297 - def postParse( self, instring, loc, tokenlist ):
4298 retToks = tokenlist.copy()
4299 del retToks[:]
4300 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4301
4302 if self.resultsName and retToks.haskeys():
4303 return [ retToks ]
4304 else:
4305 return retToks
4306
4307 -class Group(TokenConverter):
4308 """
4309 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4310
4311 Example::
4312 ident = Word(alphas)
4313 num = Word(nums)
4314 term = ident | num
4315 func = ident + Optional(delimitedList(term))
4316 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4317
4318 func = ident + Group(Optional(delimitedList(term)))
4319 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4320 """
4322 super(Group,self).__init__( expr )
4323 self.saveAsList = True
4324
4325 - def postParse( self, instring, loc, tokenlist ):
4326 return [ tokenlist ]
4327
4328 -class Dict(TokenConverter):
4329 """
4330 Converter to return a repetitive expression as a list, but also as a dictionary.
4331 Each element can also be referenced using the first token in the expression as its key.
4332 Useful for tabular report scraping when the first column can be used as a item key.
4333
4334 Example::
4335 data_word = Word(alphas)
4336 label = data_word + FollowedBy(':')
4337 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4338
4339 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4340 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4341
4342 # print attributes as plain groups
4343 print(OneOrMore(attr_expr).parseString(text).dump())
4344
4345 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4346 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4347 print(result.dump())
4348
4349 # access named fields as dict entries, or output as dict
4350 print(result['shape'])
4351 print(result.asDict())
4352 prints::
4353 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4354
4355 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4356 - color: light blue
4357 - posn: upper left
4358 - shape: SQUARE
4359 - texture: burlap
4360 SQUARE
4361 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4362 See more examples at L{ParseResults} of accessing fields by results name.
4363 """
4365 super(Dict,self).__init__( expr )
4366 self.saveAsList = True
4367
4368 - def postParse( self, instring, loc, tokenlist ):
4369 for i,tok in enumerate(tokenlist):
4370 if len(tok) == 0:
4371 continue
4372 ikey = tok[0]
4373 if isinstance(ikey,int):
4374 ikey = _ustr(tok[0]).strip()
4375 if len(tok)==1:
4376 tokenlist[ikey] = _ParseResultsWithOffset("",i)
4377 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4378 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4379 else:
4380 dictvalue = tok.copy()
4381 del dictvalue[0]
4382 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4383 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4384 else:
4385 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4386
4387 if self.resultsName:
4388 return [ tokenlist ]
4389 else:
4390 return tokenlist
4391
4394 """
4395 Converter for ignoring the results of a parsed expression.
4396
4397 Example::
4398 source = "a, b, c,d"
4399 wd = Word(alphas)
4400 wd_list1 = wd + ZeroOrMore(',' + wd)
4401 print(wd_list1.parseString(source))
4402
4403 # often, delimiters that are useful during parsing are just in the
4404 # way afterward - use Suppress to keep them out of the parsed output
4405 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4406 print(wd_list2.parseString(source))
4407 prints::
4408 ['a', ',', 'b', ',', 'c', ',', 'd']
4409 ['a', 'b', 'c', 'd']
4410 (See also L{delimitedList}.)
4411 """
4412 - def postParse( self, instring, loc, tokenlist ):
4414
4417
4420 """
4421 Wrapper for parse actions, to ensure they are only called once.
4422 """
4424 self.callable = _trim_arity(methodCall)
4425 self.called = False
4427 if not self.called:
4428 results = self.callable(s,l,t)
4429 self.called = True
4430 return results
4431 raise ParseException(s,l,"")
4434
4436 """
4437 Decorator for debugging parse actions.
4438
4439 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4440 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4441
4442 Example::
4443 wd = Word(alphas)
4444
4445 @traceParseAction
4446 def remove_duplicate_chars(tokens):
4447 return ''.join(sorted(set(''.join(tokens))))
4448
4449 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4450 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4451 prints::
4452 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4453 <<leaving remove_duplicate_chars (ret: 'dfjkls')
4454 ['dfjkls']
4455 """
4456 f = _trim_arity(f)
4457 def z(*paArgs):
4458 thisFunc = f.__name__
4459 s,l,t = paArgs[-3:]
4460 if len(paArgs)>3:
4461 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4462 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4463 try:
4464 ret = f(*paArgs)
4465 except Exception as exc:
4466 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4467 raise
4468 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4469 return ret
4470 try:
4471 z.__name__ = f.__name__
4472 except AttributeError:
4473 pass
4474 return z
4475
4476
4477
4478
4479 -def delimitedList( expr, delim=",", combine=False ):
4480 """
4481 Helper to define a delimited list of expressions - the delimiter defaults to ','.
4482 By default, the list elements and delimiters can have intervening whitespace, and
4483 comments, but this can be overridden by passing C{combine=True} in the constructor.
4484 If C{combine} is set to C{True}, the matching tokens are returned as a single token
4485 string, with the delimiters included; otherwise, the matching tokens are returned
4486 as a list of tokens, with the delimiters suppressed.
4487
4488 Example::
4489 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4490 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4491 """
4492 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4493 if combine:
4494 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4495 else:
4496 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4497
4499 """
4500 Helper to define a counted list of expressions.
4501 This helper defines a pattern of the form::
4502 integer expr expr expr...
4503 where the leading integer tells how many expr expressions follow.
4504 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4505
4506 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4507
4508 Example::
4509 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4510
4511 # in this parser, the leading integer value is given in binary,
4512 # '10' indicating that 2 values are in the array
4513 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4514 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
4515 """
4516 arrayExpr = Forward()
4517 def countFieldParseAction(s,l,t):
4518 n = t[0]
4519 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4520 return []
4521 if intExpr is None:
4522 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4523 else:
4524 intExpr = intExpr.copy()
4525 intExpr.setName("arrayLen")
4526 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4527 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4528
4530 ret = []
4531 for i in L:
4532 if isinstance(i,list):
4533 ret.extend(_flatten(i))
4534 else:
4535 ret.append(i)
4536 return ret
4537
4539 """
4540 Helper to define an expression that is indirectly defined from
4541 the tokens matched in a previous expression, that is, it looks
4542 for a 'repeat' of a previous expression. For example::
4543 first = Word(nums)
4544 second = matchPreviousLiteral(first)
4545 matchExpr = first + ":" + second
4546 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
4547 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4548 If this is not desired, use C{matchPreviousExpr}.
4549 Do I{not} use with packrat parsing enabled.
4550 """
4551 rep = Forward()
4552 def copyTokenToRepeater(s,l,t):
4553 if t:
4554 if len(t) == 1:
4555 rep << t[0]
4556 else:
4557
4558 tflat = _flatten(t.asList())
4559 rep << And(Literal(tt) for tt in tflat)
4560 else:
4561 rep << Empty()
4562 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4563 rep.setName('(prev) ' + _ustr(expr))
4564 return rep
4565
4567 """
4568 Helper to define an expression that is indirectly defined from
4569 the tokens matched in a previous expression, that is, it looks
4570 for a 'repeat' of a previous expression. For example::
4571 first = Word(nums)
4572 second = matchPreviousExpr(first)
4573 matchExpr = first + ":" + second
4574 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
4575 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4576 the expressions are evaluated first, and then compared, so
4577 C{"1"} is compared with C{"10"}.
4578 Do I{not} use with packrat parsing enabled.
4579 """
4580 rep = Forward()
4581 e2 = expr.copy()
4582 rep <<= e2
4583 def copyTokenToRepeater(s,l,t):
4584 matchTokens = _flatten(t.asList())
4585 def mustMatchTheseTokens(s,l,t):
4586 theseTokens = _flatten(t.asList())
4587 if theseTokens != matchTokens:
4588 raise ParseException("",0,"")
4589 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4590 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4591 rep.setName('(prev) ' + _ustr(expr))
4592 return rep
4593
4595
4596 for c in r"\^-]":
4597 s = s.replace(c,_bslash+c)
4598 s = s.replace("\n",r"\n")
4599 s = s.replace("\t",r"\t")
4600 return _ustr(s)
4601
4602 -def oneOf( strs, caseless=False, useRegex=True ):
4603 """
4604 Helper to quickly define a set of alternative Literals, and makes sure to do
4605 longest-first testing when there is a conflict, regardless of the input order,
4606 but returns a C{L{MatchFirst}} for best performance.
4607
4608 Parameters:
4609 - strs - a string of space-delimited literals, or a collection of string literals
4610 - caseless - (default=C{False}) - treat all literals as caseless
4611 - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4612 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4613 if creating a C{Regex} raises an exception)
4614
4615 Example::
4616 comp_oper = oneOf("< = > <= >= !=")
4617 var = Word(alphas)
4618 number = Word(nums)
4619 term = var | number
4620 comparison_expr = term + comp_oper + term
4621 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
4622 prints::
4623 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4624 """
4625 if caseless:
4626 isequal = ( lambda a,b: a.upper() == b.upper() )
4627 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4628 parseElementClass = CaselessLiteral
4629 else:
4630 isequal = ( lambda a,b: a == b )
4631 masks = ( lambda a,b: b.startswith(a) )
4632 parseElementClass = Literal
4633
4634 symbols = []
4635 if isinstance(strs,basestring):
4636 symbols = strs.split()
4637 elif isinstance(strs, Iterable):
4638 symbols = list(strs)
4639 else:
4640 warnings.warn("Invalid argument to oneOf, expected string or iterable",
4641 SyntaxWarning, stacklevel=2)
4642 if not symbols:
4643 return NoMatch()
4644
4645 i = 0
4646 while i < len(symbols)-1:
4647 cur = symbols[i]
4648 for j,other in enumerate(symbols[i+1:]):
4649 if ( isequal(other, cur) ):
4650 del symbols[i+j+1]
4651 break
4652 elif ( masks(cur, other) ):
4653 del symbols[i+j+1]
4654 symbols.insert(i,other)
4655 cur = other
4656 break
4657 else:
4658 i += 1
4659
4660 if not caseless and useRegex:
4661
4662 try:
4663 if len(symbols)==len("".join(symbols)):
4664 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4665 else:
4666 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4667 except Exception:
4668 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4669 SyntaxWarning, stacklevel=2)
4670
4671
4672
4673 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4674
4676 """
4677 Helper to easily and clearly define a dictionary by specifying the respective patterns
4678 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4679 in the proper order. The key pattern can include delimiting markers or punctuation,
4680 as long as they are suppressed, thereby leaving the significant key text. The value
4681 pattern can include named results, so that the C{Dict} results can include named token
4682 fields.
4683
4684 Example::
4685 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4686 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4687 print(OneOrMore(attr_expr).parseString(text).dump())
4688
4689 attr_label = label
4690 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4691
4692 # similar to Dict, but simpler call format
4693 result = dictOf(attr_label, attr_value).parseString(text)
4694 print(result.dump())
4695 print(result['shape'])
4696 print(result.shape) # object attribute access works too
4697 print(result.asDict())
4698 prints::
4699 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4700 - color: light blue
4701 - posn: upper left
4702 - shape: SQUARE
4703 - texture: burlap
4704 SQUARE
4705 SQUARE
4706 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4707 """
4708 return Dict( ZeroOrMore( Group ( key + value ) ) )
4709
4710 -def originalTextFor(expr, asString=True):
4711 """
4712 Helper to return the original, untokenized text for a given expression. Useful to
4713 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4714 revert separate tokens with intervening whitespace back to the original matching
4715 input text. By default, returns astring containing the original parsed text.
4716
4717 If the optional C{asString} argument is passed as C{False}, then the return value is a
4718 C{L{ParseResults}} containing any results names that were originally matched, and a
4719 single token containing the original matched text from the input string. So if
4720 the expression passed to C{L{originalTextFor}} contains expressions with defined
4721 results names, you must set C{asString} to C{False} if you want to preserve those
4722 results name values.
4723
4724 Example::
4725 src = "this is test <b> bold <i>text</i> </b> normal text "
4726 for tag in ("b","i"):
4727 opener,closer = makeHTMLTags(tag)
4728 patt = originalTextFor(opener + SkipTo(closer) + closer)
4729 print(patt.searchString(src)[0])
4730 prints::
4731 ['<b> bold <i>text</i> </b>']
4732 ['<i>text</i>']
4733 """
4734 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4735 endlocMarker = locMarker.copy()
4736 endlocMarker.callPreparse = False
4737 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4738 if asString:
4739 extractText = lambda s,l,t: s[t._original_start:t._original_end]
4740 else:
4741 def extractText(s,l,t):
4742 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4743 matchExpr.setParseAction(extractText)
4744 matchExpr.ignoreExprs = expr.ignoreExprs
4745 return matchExpr
4746
4748 """
4749 Helper to undo pyparsing's default grouping of And expressions, even
4750 if all but one are non-empty.
4751 """
4752 return TokenConverter(expr).setParseAction(lambda t:t[0])
4753
4755 """
4756 Helper to decorate a returned token with its starting and ending locations in the input string.
4757 This helper adds the following results names:
4758 - locn_start = location where matched expression begins
4759 - locn_end = location where matched expression ends
4760 - value = the actual parsed results
4761
4762 Be careful if the input text contains C{<TAB>} characters, you may want to call
4763 C{L{ParserElement.parseWithTabs}}
4764
4765 Example::
4766 wd = Word(alphas)
4767 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4768 print(match)
4769 prints::
4770 [[0, 'ljsdf', 5]]
4771 [[8, 'lksdjjf', 15]]
4772 [[18, 'lkkjj', 23]]
4773 """
4774 locator = Empty().setParseAction(lambda s,l,t: l)
4775 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4776
4777
4778
4779 empty = Empty().setName("empty")
4780 lineStart = LineStart().setName("lineStart")
4781 lineEnd = LineEnd().setName("lineEnd")
4782 stringStart = StringStart().setName("stringStart")
4783 stringEnd = StringEnd().setName("stringEnd")
4784
4785 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4786 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4787 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4788 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
4789 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
4790 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4793 r"""
4794 Helper to easily define string ranges for use in Word construction. Borrows
4795 syntax from regexp '[]' string range definitions::
4796 srange("[0-9]") -> "0123456789"
4797 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
4798 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4799 The input string must be enclosed in []'s, and the returned string is the expanded
4800 character set joined into a single string.
4801 The values enclosed in the []'s may be:
4802 - a single character
4803 - an escaped character with a leading backslash (such as C{\-} or C{\]})
4804 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4805 (C{\0x##} is also supported for backwards compatibility)
4806 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4807 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4808 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4809 """
4810 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4811 try:
4812 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4813 except Exception:
4814 return ""
4815
4817 """
4818 Helper method for defining parse actions that require matching at a specific
4819 column in the input text.
4820 """
4821 def verifyCol(strg,locn,toks):
4822 if col(locn,strg) != n:
4823 raise ParseException(strg,locn,"matched token not at column %d" % n)
4824 return verifyCol
4825
4827 """
4828 Helper method for common parse actions that simply return a literal value. Especially
4829 useful when used with C{L{transformString<ParserElement.transformString>}()}.
4830
4831 Example::
4832 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4833 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4834 term = na | num
4835
4836 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4837 """
4838 return lambda s,l,t: [replStr]
4839
4841 """
4842 Helper parse action for removing quotation marks from parsed quoted strings.
4843
4844 Example::
4845 # by default, quotation marks are included in parsed results
4846 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4847
4848 # use removeQuotes to strip quotation marks from parsed results
4849 quotedString.setParseAction(removeQuotes)
4850 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4851 """
4852 return t[0][1:-1]
4853
4855 """
4856 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4857 args are passed, they are forwarded to the given function as additional arguments after
4858 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4859 parsed data to an integer using base 16.
4860
4861 Example (compare the last to example in L{ParserElement.transformString}::
4862 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4863 hex_ints.runTests('''
4864 00 11 22 aa FF 0a 0d 1a
4865 ''')
4866
4867 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4868 OneOrMore(upperword).runTests('''
4869 my kingdom for a horse
4870 ''')
4871
4872 wd = Word(alphas).setParseAction(tokenMap(str.title))
4873 OneOrMore(wd).setParseAction(' '.join).runTests('''
4874 now is the winter of our discontent made glorious summer by this sun of york
4875 ''')
4876 prints::
4877 00 11 22 aa FF 0a 0d 1a
4878 [0, 17, 34, 170, 255, 10, 13, 26]
4879
4880 my kingdom for a horse
4881 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4882
4883 now is the winter of our discontent made glorious summer by this sun of york
4884 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4885 """
4886 def pa(s,l,t):
4887 return [func(tokn, *args) for tokn in t]
4888
4889 try:
4890 func_name = getattr(func, '__name__',
4891 getattr(func, '__class__').__name__)
4892 except Exception:
4893 func_name = str(func)
4894 pa.__name__ = func_name
4895
4896 return pa
4897
4898 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4899 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
4900
4901 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4902 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4932
4951
4960
4962 """
4963 Helper to create a validating parse action to be used with start tags created
4964 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4965 with a required attribute value, to avoid false matches on common tags such as
4966 C{<TD>} or C{<DIV>}.
4967
4968 Call C{withAttribute} with a series of attribute names and values. Specify the list
4969 of filter attributes names and values as:
4970 - keyword arguments, as in C{(align="right")}, or
4971 - as an explicit dict with C{**} operator, when an attribute name is also a Python
4972 reserved word, as in C{**{"class":"Customer", "align":"right"}}
4973 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4974 For attribute names with a namespace prefix, you must use the second form. Attribute
4975 names are matched insensitive to upper/lower case.
4976
4977 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4978
4979 To verify that the attribute exists, but without specifying a value, pass
4980 C{withAttribute.ANY_VALUE} as the value.
4981
4982 Example::
4983 html = '''
4984 <div>
4985 Some text
4986 <div type="grid">1 4 0 1 0</div>
4987 <div type="graph">1,3 2,3 1,1</div>
4988 <div>this has no type</div>
4989 </div>
4990
4991 '''
4992 div,div_end = makeHTMLTags("div")
4993
4994 # only match div tag having a type attribute with value "grid"
4995 div_grid = div().setParseAction(withAttribute(type="grid"))
4996 grid_expr = div_grid + SkipTo(div | div_end)("body")
4997 for grid_header in grid_expr.searchString(html):
4998 print(grid_header.body)
4999
5000 # construct a match with any div tag having a type attribute, regardless of the value
5001 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5002 div_expr = div_any_type + SkipTo(div | div_end)("body")
5003 for div_header in div_expr.searchString(html):
5004 print(div_header.body)
5005 prints::
5006 1 4 0 1 0
5007
5008 1 4 0 1 0
5009 1,3 2,3 1,1
5010 """
5011 if args:
5012 attrs = args[:]
5013 else:
5014 attrs = attrDict.items()
5015 attrs = [(k,v) for k,v in attrs]
5016 def pa(s,l,tokens):
5017 for attrName,attrValue in attrs:
5018 if attrName not in tokens:
5019 raise ParseException(s,l,"no matching attribute " + attrName)
5020 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5021 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
5022 (attrName, tokens[attrName], attrValue))
5023 return pa
5024 withAttribute.ANY_VALUE = object()
5025
5026 -def withClass(classname, namespace=''):
5027 """
5028 Simplified version of C{L{withAttribute}} when matching on a div class - made
5029 difficult because C{class} is a reserved word in Python.
5030
5031 Example::
5032 html = '''
5033 <div>
5034 Some text
5035 <div class="grid">1 4 0 1 0</div>
5036 <div class="graph">1,3 2,3 1,1</div>
5037 <div>this <div> has no class</div>
5038 </div>
5039
5040 '''
5041 div,div_end = makeHTMLTags("div")
5042 div_grid = div().setParseAction(withClass("grid"))
5043
5044 grid_expr = div_grid + SkipTo(div | div_end)("body")
5045 for grid_header in grid_expr.searchString(html):
5046 print(grid_header.body)
5047
5048 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5049 div_expr = div_any_type + SkipTo(div | div_end)("body")
5050 for div_header in div_expr.searchString(html):
5051 print(div_header.body)
5052 prints::
5053 1 4 0 1 0
5054
5055 1 4 0 1 0
5056 1,3 2,3 1,1
5057 """
5058 classattr = "%s:class" % namespace if namespace else "class"
5059 return withAttribute(**{classattr : classname})
5060
5061 opAssoc = _Constants()
5062 opAssoc.LEFT = object()
5063 opAssoc.RIGHT = object()
5066 """
5067 Helper method for constructing grammars of expressions made up of
5068 operators working in a precedence hierarchy. Operators may be unary or
5069 binary, left- or right-associative. Parse actions can also be attached
5070 to operator expressions. The generated parser will also recognize the use
5071 of parentheses to override operator precedences (see example below).
5072
5073 Note: if you define a deep operator list, you may see performance issues
5074 when using infixNotation. See L{ParserElement.enablePackrat} for a
5075 mechanism to potentially improve your parser performance.
5076
5077 Parameters:
5078 - baseExpr - expression representing the most basic element for the nested
5079 - opList - list of tuples, one for each operator precedence level in the
5080 expression grammar; each tuple is of the form
5081 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
5082 - opExpr is the pyparsing expression for the operator;
5083 may also be a string, which will be converted to a Literal;
5084 if numTerms is 3, opExpr is a tuple of two expressions, for the
5085 two operators separating the 3 terms
5086 - numTerms is the number of terms for this operator (must
5087 be 1, 2, or 3)
5088 - rightLeftAssoc is the indicator whether the operator is
5089 right or left associative, using the pyparsing-defined
5090 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5091 - parseAction is the parse action to be associated with
5092 expressions matching this operator expression (the
5093 parse action tuple member may be omitted); if the parse action
5094 is passed a tuple or list of functions, this is equivalent to
5095 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
5096 - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5097 - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5098
5099 Example::
5100 # simple example of four-function arithmetic with ints and variable names
5101 integer = pyparsing_common.signed_integer
5102 varname = pyparsing_common.identifier
5103
5104 arith_expr = infixNotation(integer | varname,
5105 [
5106 ('-', 1, opAssoc.RIGHT),
5107 (oneOf('* /'), 2, opAssoc.LEFT),
5108 (oneOf('+ -'), 2, opAssoc.LEFT),
5109 ])
5110
5111 arith_expr.runTests('''
5112 5+3*6
5113 (5+3)*6
5114 -2--11
5115 ''', fullDump=False)
5116 prints::
5117 5+3*6
5118 [[5, '+', [3, '*', 6]]]
5119
5120 (5+3)*6
5121 [[[5, '+', 3], '*', 6]]
5122
5123 -2--11
5124 [[['-', 2], '-', ['-', 11]]]
5125 """
5126 ret = Forward()
5127 lastExpr = baseExpr | ( lpar + ret + rpar )
5128 for i,operDef in enumerate(opList):
5129 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5130 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5131 if arity == 3:
5132 if opExpr is None or len(opExpr) != 2:
5133 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5134 opExpr1, opExpr2 = opExpr
5135 thisExpr = Forward().setName(termName)
5136 if rightLeftAssoc == opAssoc.LEFT:
5137 if arity == 1:
5138 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5139 elif arity == 2:
5140 if opExpr is not None:
5141 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5142 else:
5143 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5144 elif arity == 3:
5145 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5146 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5147 else:
5148 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5149 elif rightLeftAssoc == opAssoc.RIGHT:
5150 if arity == 1:
5151
5152 if not isinstance(opExpr, Optional):
5153 opExpr = Optional(opExpr)
5154 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5155 elif arity == 2:
5156 if opExpr is not None:
5157 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5158 else:
5159 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5160 elif arity == 3:
5161 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5162 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5163 else:
5164 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5165 else:
5166 raise ValueError("operator must indicate right or left associativity")
5167 if pa:
5168 if isinstance(pa, (tuple, list)):
5169 matchExpr.setParseAction(*pa)
5170 else:
5171 matchExpr.setParseAction(pa)
5172 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5173 lastExpr = thisExpr
5174 ret <<= lastExpr
5175 return ret
5176
5177 operatorPrecedence = infixNotation
5178 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5179
5180 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5181 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5182 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5183 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5184 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5187 """
5188 Helper method for defining nested lists enclosed in opening and closing
5189 delimiters ("(" and ")" are the default).
5190
5191 Parameters:
5192 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5193 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5194 - content - expression for items within the nested lists (default=C{None})
5195 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5196
5197 If an expression is not provided for the content argument, the nested
5198 expression will capture all whitespace-delimited content between delimiters
5199 as a list of separate values.
5200
5201 Use the C{ignoreExpr} argument to define expressions that may contain
5202 opening or closing characters that should not be treated as opening
5203 or closing characters for nesting, such as quotedString or a comment
5204 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5205 The default is L{quotedString}, but if no expressions are to be ignored,
5206 then pass C{None} for this argument.
5207
5208 Example::
5209 data_type = oneOf("void int short long char float double")
5210 decl_data_type = Combine(data_type + Optional(Word('*')))
5211 ident = Word(alphas+'_', alphanums+'_')
5212 number = pyparsing_common.number
5213 arg = Group(decl_data_type + ident)
5214 LPAR,RPAR = map(Suppress, "()")
5215
5216 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5217
5218 c_function = (decl_data_type("type")
5219 + ident("name")
5220 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5221 + code_body("body"))
5222 c_function.ignore(cStyleComment)
5223
5224 source_code = '''
5225 int is_odd(int x) {
5226 return (x%2);
5227 }
5228
5229 int dec_to_hex(char hchar) {
5230 if (hchar >= '0' && hchar <= '9') {
5231 return (ord(hchar)-ord('0'));
5232 } else {
5233 return (10+ord(hchar)-ord('A'));
5234 }
5235 }
5236 '''
5237 for func in c_function.searchString(source_code):
5238 print("%(name)s (%(type)s) args: %(args)s" % func)
5239
5240 prints::
5241 is_odd (int) args: [['int', 'x']]
5242 dec_to_hex (int) args: [['char', 'hchar']]
5243 """
5244 if opener == closer:
5245 raise ValueError("opening and closing strings cannot be the same")
5246 if content is None:
5247 if isinstance(opener,basestring) and isinstance(closer,basestring):
5248 if len(opener) == 1 and len(closer)==1:
5249 if ignoreExpr is not None:
5250 content = (Combine(OneOrMore(~ignoreExpr +
5251 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5252 ).setParseAction(lambda t:t[0].strip()))
5253 else:
5254 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5255 ).setParseAction(lambda t:t[0].strip()))
5256 else:
5257 if ignoreExpr is not None:
5258 content = (Combine(OneOrMore(~ignoreExpr +
5259 ~Literal(opener) + ~Literal(closer) +
5260 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5261 ).setParseAction(lambda t:t[0].strip()))
5262 else:
5263 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5264 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5265 ).setParseAction(lambda t:t[0].strip()))
5266 else:
5267 raise ValueError("opening and closing arguments must be strings if no content expression is given")
5268 ret = Forward()
5269 if ignoreExpr is not None:
5270 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5271 else:
5272 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5273 ret.setName('nested %s%s expression' % (opener,closer))
5274 return ret
5275
5276 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5277 """
5278 Helper method for defining space-delimited indentation blocks, such as
5279 those used to define block statements in Python source code.
5280
5281 Parameters:
5282 - blockStatementExpr - expression defining syntax of statement that
5283 is repeated within the indented block
5284 - indentStack - list created by caller to manage indentation stack
5285 (multiple statementWithIndentedBlock expressions within a single grammar
5286 should share a common indentStack)
5287 - indent - boolean indicating whether block must be indented beyond the
5288 the current level; set to False for block of left-most statements
5289 (default=C{True})
5290
5291 A valid block must contain at least one C{blockStatement}.
5292
5293 Example::
5294 data = '''
5295 def A(z):
5296 A1
5297 B = 100
5298 G = A2
5299 A2
5300 A3
5301 B
5302 def BB(a,b,c):
5303 BB1
5304 def BBA():
5305 bba1
5306 bba2
5307 bba3
5308 C
5309 D
5310 def spam(x,y):
5311 def eggs(z):
5312 pass
5313 '''
5314
5315
5316 indentStack = [1]
5317 stmt = Forward()
5318
5319 identifier = Word(alphas, alphanums)
5320 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5321 func_body = indentedBlock(stmt, indentStack)
5322 funcDef = Group( funcDecl + func_body )
5323
5324 rvalue = Forward()
5325 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5326 rvalue << (funcCall | identifier | Word(nums))
5327 assignment = Group(identifier + "=" + rvalue)
5328 stmt << ( funcDef | assignment | identifier )
5329
5330 module_body = OneOrMore(stmt)
5331
5332 parseTree = module_body.parseString(data)
5333 parseTree.pprint()
5334 prints::
5335 [['def',
5336 'A',
5337 ['(', 'z', ')'],
5338 ':',
5339 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5340 'B',
5341 ['def',
5342 'BB',
5343 ['(', 'a', 'b', 'c', ')'],
5344 ':',
5345 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5346 'C',
5347 'D',
5348 ['def',
5349 'spam',
5350 ['(', 'x', 'y', ')'],
5351 ':',
5352 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5353 """
5354 def checkPeerIndent(s,l,t):
5355 if l >= len(s): return
5356 curCol = col(l,s)
5357 if curCol != indentStack[-1]:
5358 if curCol > indentStack[-1]:
5359 raise ParseFatalException(s,l,"illegal nesting")
5360 raise ParseException(s,l,"not a peer entry")
5361
5362 def checkSubIndent(s,l,t):
5363 curCol = col(l,s)
5364 if curCol > indentStack[-1]:
5365 indentStack.append( curCol )
5366 else:
5367 raise ParseException(s,l,"not a subentry")
5368
5369 def checkUnindent(s,l,t):
5370 if l >= len(s): return
5371 curCol = col(l,s)
5372 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5373 raise ParseException(s,l,"not an unindent")
5374 indentStack.pop()
5375
5376 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5377 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5378 PEER = Empty().setParseAction(checkPeerIndent).setName('')
5379 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5380 if indent:
5381 smExpr = Group( Optional(NL) +
5382
5383 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5384 else:
5385 smExpr = Group( Optional(NL) +
5386 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5387 blockStatementExpr.ignore(_bslash + LineEnd())
5388 return smExpr.setName('indented block')
5389
5390 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5391 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5392
5393 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5394 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5395 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5397 """Helper parser action to replace common HTML entities with their special characters"""
5398 return _htmlEntityMap.get(t.entity)
5399
5400
5401 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5402 "Comment of the form C{/* ... */}"
5403
5404 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5405 "Comment of the form C{<!-- ... -->}"
5406
5407 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5408 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5409 "Comment of the form C{// ... (to end of line)}"
5410
5411 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5412 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5413
5414 javaStyleComment = cppStyleComment
5415 "Same as C{L{cppStyleComment}}"
5416
5417 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5418 "Comment of the form C{# ... (to end of line)}"
5419
5420 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5421 Optional( Word(" \t") +
5422 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5423 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5424 """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5425 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5429 """
5430 Here are some common low-level expressions that may be useful in jump-starting parser development:
5431 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5432 - common L{programming identifiers<identifier>}
5433 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5434 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5435 - L{UUID<uuid>}
5436 - L{comma-separated list<comma_separated_list>}
5437 Parse actions:
5438 - C{L{convertToInteger}}
5439 - C{L{convertToFloat}}
5440 - C{L{convertToDate}}
5441 - C{L{convertToDatetime}}
5442 - C{L{stripHTMLTags}}
5443 - C{L{upcaseTokens}}
5444 - C{L{downcaseTokens}}
5445
5446 Example::
5447 pyparsing_common.number.runTests('''
5448 # any int or real number, returned as the appropriate type
5449 100
5450 -100
5451 +100
5452 3.14159
5453 6.02e23
5454 1e-12
5455 ''')
5456
5457 pyparsing_common.fnumber.runTests('''
5458 # any int or real number, returned as float
5459 100
5460 -100
5461 +100
5462 3.14159
5463 6.02e23
5464 1e-12
5465 ''')
5466
5467 pyparsing_common.hex_integer.runTests('''
5468 # hex numbers
5469 100
5470 FF
5471 ''')
5472
5473 pyparsing_common.fraction.runTests('''
5474 # fractions
5475 1/2
5476 -3/4
5477 ''')
5478
5479 pyparsing_common.mixed_integer.runTests('''
5480 # mixed fractions
5481 1
5482 1/2
5483 -3/4
5484 1-3/4
5485 ''')
5486
5487 import uuid
5488 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5489 pyparsing_common.uuid.runTests('''
5490 # uuid
5491 12345678-1234-5678-1234-567812345678
5492 ''')
5493 prints::
5494 # any int or real number, returned as the appropriate type
5495 100
5496 [100]
5497
5498 -100
5499 [-100]
5500
5501 +100
5502 [100]
5503
5504 3.14159
5505 [3.14159]
5506
5507 6.02e23
5508 [6.02e+23]
5509
5510 1e-12
5511 [1e-12]
5512
5513 # any int or real number, returned as float
5514 100
5515 [100.0]
5516
5517 -100
5518 [-100.0]
5519
5520 +100
5521 [100.0]
5522
5523 3.14159
5524 [3.14159]
5525
5526 6.02e23
5527 [6.02e+23]
5528
5529 1e-12
5530 [1e-12]
5531
5532 # hex numbers
5533 100
5534 [256]
5535
5536 FF
5537 [255]
5538
5539 # fractions
5540 1/2
5541 [0.5]
5542
5543 -3/4
5544 [-0.75]
5545
5546 # mixed fractions
5547 1
5548 [1]
5549
5550 1/2
5551 [0.5]
5552
5553 -3/4
5554 [-0.75]
5555
5556 1-3/4
5557 [1.75]
5558
5559 # uuid
5560 12345678-1234-5678-1234-567812345678
5561 [UUID('12345678-1234-5678-1234-567812345678')]
5562 """
5563
5564 convertToInteger = tokenMap(int)
5565 """
5566 Parse action for converting parsed integers to Python int
5567 """
5568
5569 convertToFloat = tokenMap(float)
5570 """
5571 Parse action for converting parsed numbers to Python float
5572 """
5573
5574 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5575 """expression that parses an unsigned integer, returns an int"""
5576
5577 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5578 """expression that parses a hexadecimal integer, returns an int"""
5579
5580 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5581 """expression that parses an integer with optional leading sign, returns an int"""
5582
5583 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5584 """fractional expression of an integer divided by an integer, returns a float"""
5585 fraction.addParseAction(lambda t: t[0]/t[-1])
5586
5587 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5588 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5589 mixed_integer.addParseAction(sum)
5590
5591 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5592 """expression that parses a floating point number and returns a float"""
5593
5594 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5595 """expression that parses a floating point number with optional scientific notation and returns a float"""
5596
5597
5598 number = (sci_real | real | signed_integer).streamline()
5599 """any numeric expression, returns the corresponding Python type"""
5600
5601 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5602 """any int or real number, returned as float"""
5603
5604 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5605 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5606
5607 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5608 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5609
5610 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5611 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5612 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5613 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5614 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5615 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5616 "IPv6 address (long, short, or mixed form)"
5617
5618 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5619 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5620
5621 @staticmethod
5623 """
5624 Helper to create a parse action for converting parsed date string to Python datetime.date
5625
5626 Params -
5627 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5628
5629 Example::
5630 date_expr = pyparsing_common.iso8601_date.copy()
5631 date_expr.setParseAction(pyparsing_common.convertToDate())
5632 print(date_expr.parseString("1999-12-31"))
5633 prints::
5634 [datetime.date(1999, 12, 31)]
5635 """
5636 def cvt_fn(s,l,t):
5637 try:
5638 return datetime.strptime(t[0], fmt).date()
5639 except ValueError as ve:
5640 raise ParseException(s, l, str(ve))
5641 return cvt_fn
5642
5643 @staticmethod
5645 """
5646 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5647
5648 Params -
5649 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5650
5651 Example::
5652 dt_expr = pyparsing_common.iso8601_datetime.copy()
5653 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5654 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5655 prints::
5656 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5657 """
5658 def cvt_fn(s,l,t):
5659 try:
5660 return datetime.strptime(t[0], fmt)
5661 except ValueError as ve:
5662 raise ParseException(s, l, str(ve))
5663 return cvt_fn
5664
5665 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5666 "ISO8601 date (C{yyyy-mm-dd})"
5667
5668 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5669 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5670
5671 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5672 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5673
5674 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5675 @staticmethod
5689
5690 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
5691 + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5692 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5693 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5694
5695 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5696 """Parse action to convert tokens to upper case."""
5697
5698 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5699 """Parse action to convert tokens to lower case."""
5700
5701
5702 if __name__ == "__main__":
5703
5704 selectToken = CaselessLiteral("select")
5705 fromToken = CaselessLiteral("from")
5706
5707 ident = Word(alphas, alphanums + "_$")
5708
5709 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5710 columnNameList = Group(delimitedList(columnName)).setName("columns")
5711 columnSpec = ('*' | columnNameList)
5712
5713 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5714 tableNameList = Group(delimitedList(tableName)).setName("tables")
5715
5716 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5717
5718
5719 simpleSQL.runTests("""
5720 # '*' as column list and dotted table name
5721 select * from SYS.XYZZY
5722
5723 # caseless match on "SELECT", and casts back to "select"
5724 SELECT * from XYZZY, ABC
5725
5726 # list of column names, and mixed case SELECT keyword
5727 Select AA,BB,CC from Sys.dual
5728
5729 # multiple tables
5730 Select A, B, C from Sys.dual, Table2
5731
5732 # invalid SELECT keyword - should fail
5733 Xelect A, B, C from Sys.dual
5734
5735 # incomplete command - should fail
5736 Select
5737
5738 # invalid column name - should fail
5739 Select ^^^ frox Sys.dual
5740
5741 """)
5742
5743 pyparsing_common.number.runTests("""
5744 100
5745 -100
5746 +100
5747 3.14159
5748 6.02e23
5749 1e-12
5750 """)
5751
5752
5753 pyparsing_common.fnumber.runTests("""
5754 100
5755 -100
5756 +100
5757 3.14159
5758 6.02e23
5759 1e-12
5760 """)
5761
5762 pyparsing_common.hex_integer.runTests("""
5763 100
5764 FF
5765 """)
5766
5767 import uuid
5768 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5769 pyparsing_common.uuid.runTests("""
5770 12345678-1234-5678-1234-567812345678
5771 """)
5772