Module pyparsing

Source Code for Module pyparsing

   1  # module pyparsing.py
 
   2  #
 
   3  # Copyright (c) 2003-2018  Paul T. McGuire
 
   4  #
 
   5  # Permission is hereby granted, free of charge, to any person obtaining
 
   6  # a copy of this software and associated documentation files (the
 
   7  # "Software"), to deal in the Software without restriction, including
 
   8  # without limitation the rights to use, copy, modify, merge, publish,
 
   9  # distribute, sublicense, and/or sell copies of the Software, and to
 
  10  # permit persons to whom the Software is furnished to do so, subject to
 
  11  # the following conditions:
 
  12  #
 
  13  # The above copyright notice and this permission notice shall be
 
  14  # included in all copies or substantial portions of the Software.
 
  15  #
 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
  23  #
 
  24  
 
  25  __doc__ = \
 
  26  """
 
  27  pyparsing module - Classes and methods to define and execute parsing grammars
 
  28  =============================================================================
 
  29  
 
  30  The pyparsing module is an alternative approach to creating and executing simple grammars,
 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
 
  33  provides a library of classes that you use to construct the grammar directly in Python.
 
  34  
 
  35  Here is a program to parse "Hello, World!" (or any greeting of the form 
 
  36  C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements 
 
  37  (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
 
  38  L{Literal} expressions)::
 
  39  
 
  40      from pyparsing import Word, alphas
 
  41  
 
  42      # define grammar of a greeting
 
  43      greet = Word(alphas) + "," + Word(alphas) + "!"
 
  44  
 
  45      hello = "Hello, World!"
 
  46      print (hello, "->", greet.parseString(hello))
 
  47  
 
  48  The program outputs the following::
 
  49  
 
  50      Hello, World! -> ['Hello', ',', 'World', '!']
 
  51  
 
  52  The Python representation of the grammar is quite readable, owing to the self-explanatory
 
  53  class names, and the use of '+', '|' and '^' operators.
 
  54  
 
  55  The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
 
  56  object with named attributes.
 
  57  
 
  58  The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
 
  59   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
 
  60   - quoted strings
 
  61   - embedded comments
 
  62  
 
  63  
 
  64  Getting Started -
 
  65  -----------------
 
  66  Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
 
  67  classes inherit from. Use the docstrings for examples of how to:
 
  68   - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
 
  69   - construct character word-group expressions using the L{Word} class
 
  70   - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
 
  71   - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
 
  72   - associate names with your parsed results using L{ParserElement.setResultsName}
 
  73   - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
 
  74   - find more useful common expressions in the L{pyparsing_common} namespace class
 
  75  """ 
  76  
 
  77  __version__ = "2.2.2" 
  78  __versionTime__ = "29 Sep 2018 15:58 UTC" 
  79  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  80  
 
  81  import string 
  82  from weakref import ref as wkref 
  83  import copy 
  84  import sys 
  85  import warnings 
  86  import re 
  87  import sre_constants 
  88  import collections 
  89  import pprint 
  90  import traceback 
  91  import types 
  92  from datetime import datetime 
  93  
 
  94  try: 
  95      from _thread import RLock 
  96  except ImportError: 
  97      from threading import RLock 
  98  
 
  99  try: 
 100      # Python 3
 
 101      from collections.abc import Iterable 
 102      from collections.abc import MutableMapping 
 103  except ImportError: 
 104      # Python 2.7
 
 105      from collections import Iterable 
 106      from collections import MutableMapping 
 107  
 
 108  try: 
 109      from collections import OrderedDict as _OrderedDict 
 110  except ImportError: 
 111      try: 
 112          from ordereddict import OrderedDict as _OrderedDict 
 113      except ImportError: 
 114          _OrderedDict = None 
 115  
 
 116  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
 
 117  
 
 118  __all__ = [
 
 119  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
 
 120  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
 
 121  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
 
 122  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
 
 123  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
 
 124  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 
 
 125  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
 
 126  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
 
 127  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
 
 128  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
 
 129  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
 
 130  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
 
 131  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
 
 132  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
 
 133  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
 
 134  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
 
 135  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
 
 136  'CloseMatch', 'tokenMap', 'pyparsing_common',
 
 137  ] 
 138  
 
 139  system_version = tuple(sys.version_info)[:3] 
 140  PY_3 = system_version[0] == 3 
 141  if PY_3: 
 142      _MAX_INT = sys.maxsize 
 143      basestring = str 
 144      unichr = chr 
 145      _ustr = str 
 146  
 
 147      # build list of single arg builtins, that can be used as parse actions
 
 148      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 149  
 
 150  else: 
 151      _MAX_INT = sys.maxint 
 152      range = xrange 
153 154 - def _ustr(obj):
155 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 156 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 157 then < returns the unicode object | encodes it with the default encoding | ... >. 158 """ 159 if isinstance(obj,unicode): 160 return obj 161 162 try: 163 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 164 # it won't break any existing code. 165 return str(obj) 166 167 except UnicodeEncodeError: 168 # Else encode it 169 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 170 xmlcharref = Regex(r'&#\d+;') 171 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 172 return xmlcharref.transformString(ret)
173 174 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 175 singleArgBuiltins = [] 176 import __builtin__ 177 for fname in "sum len sorted reversed list tuple set any all min max".split(): 178 try: 179 singleArgBuiltins.append(getattr(__builtin__,fname)) 180 except AttributeError: 181 continue 182 183 _generatorType = type((y for y in range(1)))
184 185 -def _xml_escape(data):
186 """Escape &, <, >, ", ', etc. in a string of data.""" 187 188 # ampersand must be replaced first 189 from_symbols = '&><"\'' 190 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 191 for from_,to_ in zip(from_symbols, to_symbols): 192 data = data.replace(from_, to_) 193 return data
194
195 -class _Constants(object):
196 pass
197 198 alphas = string.ascii_uppercase + string.ascii_lowercase 199 nums = "0123456789" 200 hexnums = nums + "ABCDEFabcdef" 201 alphanums = alphas + nums 202 _bslash = chr(92) 203 printables = "".join(c for c in string.printable if c not in string.whitespace)
204 205 -class ParseBaseException(Exception):
206 """base exception class for all parsing runtime exceptions""" 207 # Performance tuning: we construct a *lot* of these, so keep this 208 # constructor as small and fast as possible
209 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
210 self.loc = loc 211 if msg is None: 212 self.msg = pstr 213 self.pstr = "" 214 else: 215 self.msg = msg 216 self.pstr = pstr 217 self.parserElement = elem 218 self.args = (pstr, loc, msg)
219 220 @classmethod
221 - def _from_exception(cls, pe):
222 """ 223 internal factory method to simplify creating one type of ParseException 224 from another - avoids having __init__ signature conflicts among subclasses 225 """ 226 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
227
228 - def __getattr__( self, aname ):
229 """supported attributes by name are: 230 - lineno - returns the line number of the exception text 231 - col - returns the column number of the exception text 232 - line - returns the line containing the exception text 233 """ 234 if( aname == "lineno" ): 235 return lineno( self.loc, self.pstr ) 236 elif( aname in ("col", "column") ): 237 return col( self.loc, self.pstr ) 238 elif( aname == "line" ): 239 return line( self.loc, self.pstr ) 240 else: 241 raise AttributeError(aname)
242
243 - def __str__( self ):
244 return "%s (at char %d), (line:%d, col:%d)" % \ 245 ( self.msg, self.loc, self.lineno, self.column )
246 - def __repr__( self ):
247 return _ustr(self)
248 - def markInputline( self, markerString = ">!<" ):
249 """Extracts the exception line from the input string, and marks 250 the location of the exception with a special symbol. 251 """ 252 line_str = self.line 253 line_column = self.column - 1 254 if markerString: 255 line_str = "".join((line_str[:line_column], 256 markerString, line_str[line_column:])) 257 return line_str.strip()
258 - def __dir__(self):
259 return "lineno col line".split() + dir(type(self))
260
261 -class ParseException(ParseBaseException):
262 """ 263 Exception thrown when parse expressions don't match class; 264 supported attributes by name are: 265 - lineno - returns the line number of the exception text 266 - col - returns the column number of the exception text 267 - line - returns the line containing the exception text 268 269 Example:: 270 try: 271 Word(nums).setName("integer").parseString("ABC") 272 except ParseException as pe: 273 print(pe) 274 print("column: {}".format(pe.col)) 275 276 prints:: 277 Expected integer (at char 0), (line:1, col:1) 278 column: 1 279 """ 280 pass
281
282 -class ParseFatalException(ParseBaseException):
283 """user-throwable exception thrown when inconsistent parse content 284 is found; stops all parsing immediately""" 285 pass
286
287 -class ParseSyntaxException(ParseFatalException):
288 """just like L{ParseFatalException}, but thrown internally when an 289 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 290 immediately because an unbacktrackable syntax error has been found""" 291 pass
292
293 #~ class ReparseException(ParseBaseException): 294 #~ """Experimental class - parse actions can raise this exception to cause 295 #~ pyparsing to reparse the input string: 296 #~ - with a modified input string, and/or 297 #~ - with a modified start location 298 #~ Set the values of the ReparseException in the constructor, and raise the 299 #~ exception in a parse action to cause pyparsing to use the new string/location. 300 #~ Setting the values as None causes no change to be made. 301 #~ """ 302 #~ def __init_( self, newstring, restartLoc ): 303 #~ self.newParseText = newstring 304 #~ self.reparseLoc = restartLoc 305 306 -class RecursiveGrammarException(Exception):
307 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
308 - def __init__( self, parseElementList ):
309 self.parseElementTrace = parseElementList
310
311 - def __str__( self ):
312 return "RecursiveGrammarException: %s" % self.parseElementTrace
313
314 -class _ParseResultsWithOffset(object):
315 - def __init__(self,p1,p2):
316 self.tup = (p1,p2)
317 - def __getitem__(self,i):
318 return self.tup[i]
319 - def __repr__(self):
320 return repr(self.tup[0])
321 - def setOffset(self,i):
322 self.tup = (self.tup[0],i)
323
324 -class ParseResults(object):
325 """ 326 Structured parse results, to provide multiple means of access to the parsed data: 327 - as a list (C{len(results)}) 328 - by list index (C{results[0], results[1]}, etc.) 329 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 330 331 Example:: 332 integer = Word(nums) 333 date_str = (integer.setResultsName("year") + '/' 334 + integer.setResultsName("month") + '/' 335 + integer.setResultsName("day")) 336 # equivalent form: 337 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 338 339 # parseString returns a ParseResults object 340 result = date_str.parseString("1999/12/31") 341 342 def test(s, fn=repr): 343 print("%s -> %s" % (s, fn(eval(s)))) 344 test("list(result)") 345 test("result[0]") 346 test("result['month']") 347 test("result.day") 348 test("'month' in result") 349 test("'minutes' in result") 350 test("result.dump()", str) 351 prints:: 352 list(result) -> ['1999', '/', '12', '/', '31'] 353 result[0] -> '1999' 354 result['month'] -> '12' 355 result.day -> '31' 356 'month' in result -> True 357 'minutes' in result -> False 358 result.dump() -> ['1999', '/', '12', '/', '31'] 359 - day: 31 360 - month: 12 361 - year: 1999 362 """
363 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
364 if isinstance(toklist, cls): 365 return toklist 366 retobj = object.__new__(cls) 367 retobj.__doinit = True 368 return retobj
369 370 # Performance tuning: we construct a *lot* of these, so keep this 371 # constructor as small and fast as possible
372 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
373 if self.__doinit: 374 self.__doinit = False 375 self.__name = None 376 self.__parent = None 377 self.__accumNames = {} 378 self.__asList = asList 379 self.__modal = modal 380 if toklist is None: 381 toklist = [] 382 if isinstance(toklist, list): 383 self.__toklist = toklist[:] 384 elif isinstance(toklist, _generatorType): 385 self.__toklist = list(toklist) 386 else: 387 self.__toklist = [toklist] 388 self.__tokdict = dict() 389 390 if name is not None and name: 391 if not modal: 392 self.__accumNames[name] = 0 393 if isinstance(name,int): 394 name = _ustr(name) # will always return a str, but use _ustr for consistency 395 self.__name = name 396 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 397 if isinstance(toklist,basestring): 398 toklist = [ toklist ] 399 if asList: 400 if isinstance(toklist,ParseResults): 401 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 402 else: 403 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 404 self[name].__name = name 405 else: 406 try: 407 self[name] = toklist[0] 408 except (KeyError,TypeError,IndexError): 409 self[name] = toklist
410
411 - def __getitem__( self, i ):
412 if isinstance( i, (int,slice) ): 413 return self.__toklist[i] 414 else: 415 if i not in self.__accumNames: 416 return self.__tokdict[i][-1][0] 417 else: 418 return ParseResults([ v[0] for v in self.__tokdict[i] ])
419
420 - def __setitem__( self, k, v, isinstance=isinstance ):
421 if isinstance(v,_ParseResultsWithOffset): 422 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 423 sub = v[0] 424 elif isinstance(k,(int,slice)): 425 self.__toklist[k] = v 426 sub = v 427 else: 428 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 429 sub = v 430 if isinstance(sub,ParseResults): 431 sub.__parent = wkref(self)
432
433 - def __delitem__( self, i ):
434 if isinstance(i,(int,slice)): 435 mylen = len( self.__toklist ) 436 del self.__toklist[i] 437 438 # convert int to slice 439 if isinstance(i, int): 440 if i < 0: 441 i += mylen 442 i = slice(i, i+1) 443 # get removed indices 444 removed = list(range(*i.indices(mylen))) 445 removed.reverse() 446 # fixup indices in token dictionary 447 for name,occurrences in self.__tokdict.items(): 448 for j in removed: 449 for k, (value, position) in enumerate(occurrences): 450 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 451 else: 452 del self.__tokdict[i]
453
454 - def __contains__( self, k ):
455 return k in self.__tokdict
456
457 - def __len__( self ): return len( self.__toklist )
458 - def __bool__(self): return ( not not self.__toklist )
459 __nonzero__ = __bool__
460 - def __iter__( self ): return iter( self.__toklist )
461 - def __reversed__( self ): return iter( self.__toklist[::-1] )
462 - def _iterkeys( self ):
463 if hasattr(self.__tokdict, "iterkeys"): 464 return self.__tokdict.iterkeys() 465 else: 466 return iter(self.__tokdict)
467
468 - def _itervalues( self ):
469 return (self[k] for k in self._iterkeys())
470
471 - def _iteritems( self ):
472 return ((k, self[k]) for k in self._iterkeys())
473 474 if PY_3: 475 keys = _iterkeys 476 """Returns an iterator of all named result keys (Python 3.x only).""" 477 478 values = _itervalues 479 """Returns an iterator of all named result values (Python 3.x only).""" 480 481 items = _iteritems 482 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 483 484 else: 485 iterkeys = _iterkeys 486 """Returns an iterator of all named result keys (Python 2.x only).""" 487 488 itervalues = _itervalues 489 """Returns an iterator of all named result values (Python 2.x only).""" 490 491 iteritems = _iteritems 492 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 493
494 - def keys( self ):
495 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 496 return list(self.iterkeys())
497
498 - def values( self ):
499 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 500 return list(self.itervalues())
501
502 - def items( self ):
503 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 504 return list(self.iteritems())
505
506 - def haskeys( self ):
507 """Since keys() returns an iterator, this method is helpful in bypassing 508 code that looks for the existence of any defined results names.""" 509 return bool(self.__tokdict)
510
511 - def pop( self, *args, **kwargs):
512 """ 513 Removes and returns item at specified index (default=C{last}). 514 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 515 argument or an integer argument, it will use C{list} semantics 516 and pop tokens from the list of parsed tokens. If passed a 517 non-integer argument (most likely a string), it will use C{dict} 518 semantics and pop the corresponding value from any defined 519 results names. A second default return value argument is 520 supported, just as in C{dict.pop()}. 521 522 Example:: 523 def remove_first(tokens): 524 tokens.pop(0) 525 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 526 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 527 528 label = Word(alphas) 529 patt = label("LABEL") + OneOrMore(Word(nums)) 530 print(patt.parseString("AAB 123 321").dump()) 531 532 # Use pop() in a parse action to remove named result (note that corresponding value is not 533 # removed from list form of results) 534 def remove_LABEL(tokens): 535 tokens.pop("LABEL") 536 return tokens 537 patt.addParseAction(remove_LABEL) 538 print(patt.parseString("AAB 123 321").dump()) 539 prints:: 540 ['AAB', '123', '321'] 541 - LABEL: AAB 542 543 ['AAB', '123', '321'] 544 """ 545 if not args: 546 args = [-1] 547 for k,v in kwargs.items(): 548 if k == 'default': 549 args = (args[0], v) 550 else: 551 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 552 if (isinstance(args[0], int) or 553 len(args) == 1 or 554 args[0] in self): 555 index = args[0] 556 ret = self[index] 557 del self[index] 558 return ret 559 else: 560 defaultvalue = args[1] 561 return defaultvalue
562
563 - def get(self, key, defaultValue=None):
564 """ 565 Returns named result matching the given key, or if there is no 566 such name, then returns the given C{defaultValue} or C{None} if no 567 C{defaultValue} is specified. 568 569 Similar to C{dict.get()}. 570 571 Example:: 572 integer = Word(nums) 573 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 574 575 result = date_str.parseString("1999/12/31") 576 print(result.get("year")) # -> '1999' 577 print(result.get("hour", "not specified")) # -> 'not specified' 578 print(result.get("hour")) # -> None 579 """ 580 if key in self: 581 return self[key] 582 else: 583 return defaultValue
584
585 - def insert( self, index, insStr ):
586 """ 587 Inserts new element at location index in the list of parsed tokens. 588 589 Similar to C{list.insert()}. 590 591 Example:: 592 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 593 594 # use a parse action to insert the parse location in the front of the parsed results 595 def insert_locn(locn, tokens): 596 tokens.insert(0, locn) 597 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 598 """ 599 self.__toklist.insert(index, insStr) 600 # fixup indices in token dictionary 601 for name,occurrences in self.__tokdict.items(): 602 for k, (value, position) in enumerate(occurrences): 603 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
604
605 - def append( self, item ):
606 """ 607 Add single element to end of ParseResults list of elements. 608 609 Example:: 610 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 611 612 # use a parse action to compute the sum of the parsed integers, and add it to the end 613 def append_sum(tokens): 614 tokens.append(sum(map(int, tokens))) 615 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 616 """ 617 self.__toklist.append(item)
618
619 - def extend( self, itemseq ):
620 """ 621 Add sequence of elements to end of ParseResults list of elements. 622 623 Example:: 624 patt = OneOrMore(Word(alphas)) 625 626 # use a parse action to append the reverse of the matched strings, to make a palindrome 627 def make_palindrome(tokens): 628 tokens.extend(reversed([t[::-1] for t in tokens])) 629 return ''.join(tokens) 630 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 631 """ 632 if isinstance(itemseq, ParseResults): 633 self += itemseq 634 else: 635 self.__toklist.extend(itemseq)
636
637 - def clear( self ):
638 """ 639 Clear all elements and results names. 640 """ 641 del self.__toklist[:] 642 self.__tokdict.clear()
643
644 - def __getattr__( self, name ):
645 try: 646 return self[name] 647 except KeyError: 648 return "" 649 650 if name in self.__tokdict: 651 if name not in self.__accumNames: 652 return self.__tokdict[name][-1][0] 653 else: 654 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 655 else: 656 return ""
657
658 - def __add__( self, other ):
659 ret = self.copy() 660 ret += other 661 return ret
662
663 - def __iadd__( self, other ):
664 if other.__tokdict: 665 offset = len(self.__toklist) 666 addoffset = lambda a: offset if a<0 else a+offset 667 otheritems = other.__tokdict.items() 668 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 669 for (k,vlist) in otheritems for v in vlist] 670 for k,v in otherdictitems: 671 self[k] = v 672 if isinstance(v[0],ParseResults): 673 v[0].__parent = wkref(self) 674 675 self.__toklist += other.__toklist 676 self.__accumNames.update( other.__accumNames ) 677 return self
678
679 - def __radd__(self, other):
680 if isinstance(other,int) and other == 0: 681 # useful for merging many ParseResults using sum() builtin 682 return self.copy() 683 else: 684 # this may raise a TypeError - so be it 685 return other + self
686
687 - def __repr__( self ):
688 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
689
690 - def __str__( self ):
691 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
692
693 - def _asStringList( self, sep='' ):
694 out = [] 695 for item in self.__toklist: 696 if out and sep: 697 out.append(sep) 698 if isinstance( item, ParseResults ): 699 out += item._asStringList() 700 else: 701 out.append( _ustr(item) ) 702 return out
703
704 - def asList( self ):
705 """ 706 Returns the parse results as a nested list of matching tokens, all converted to strings. 707 708 Example:: 709 patt = OneOrMore(Word(alphas)) 710 result = patt.parseString("sldkj lsdkj sldkj") 711 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 712 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 713 714 # Use asList() to create an actual list 715 result_list = result.asList() 716 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 717 """ 718 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
719
720 - def asDict( self ):
721 """ 722 Returns the named parse results as a nested dictionary. 723 724 Example:: 725 integer = Word(nums) 726 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 727 728 result = date_str.parseString('12/31/1999') 729 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 730 731 result_dict = result.asDict() 732 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 733 734 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 735 import json 736 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 737 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 738 """ 739 if PY_3: 740 item_fn = self.items 741 else: 742 item_fn = self.iteritems 743 744 def toItem(obj): 745 if isinstance(obj, ParseResults): 746 if obj.haskeys(): 747 return obj.asDict() 748 else: 749 return [toItem(v) for v in obj] 750 else: 751 return obj
752 753 return dict((k,toItem(v)) for k,v in item_fn())
754
755 - def copy( self ):
756 """ 757 Returns a new copy of a C{ParseResults} object. 758 """ 759 ret = ParseResults( self.__toklist ) 760 ret.__tokdict = self.__tokdict.copy() 761 ret.__parent = self.__parent 762 ret.__accumNames.update( self.__accumNames ) 763 ret.__name = self.__name 764 return ret
765
766 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
767 """ 768 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 769 """ 770 nl = "\n" 771 out = [] 772 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 773 for v in vlist) 774 nextLevelIndent = indent + " " 775 776 # collapse out indents if formatting is not desired 777 if not formatted: 778 indent = "" 779 nextLevelIndent = "" 780 nl = "" 781 782 selfTag = None 783 if doctag is not None: 784 selfTag = doctag 785 else: 786 if self.__name: 787 selfTag = self.__name 788 789 if not selfTag: 790 if namedItemsOnly: 791 return "" 792 else: 793 selfTag = "ITEM" 794 795 out += [ nl, indent, "<", selfTag, ">" ] 796 797 for i,res in enumerate(self.__toklist): 798 if isinstance(res,ParseResults): 799 if i in namedItems: 800 out += [ res.asXML(namedItems[i], 801 namedItemsOnly and doctag is None, 802 nextLevelIndent, 803 formatted)] 804 else: 805 out += [ res.asXML(None, 806 namedItemsOnly and doctag is None, 807 nextLevelIndent, 808 formatted)] 809 else: 810 # individual token, see if there is a name for it 811 resTag = None 812 if i in namedItems: 813 resTag = namedItems[i] 814 if not resTag: 815 if namedItemsOnly: 816 continue 817 else: 818 resTag = "ITEM" 819 xmlBodyText = _xml_escape(_ustr(res)) 820 out += [ nl, nextLevelIndent, "<", resTag, ">", 821 xmlBodyText, 822 "</", resTag, ">" ] 823 824 out += [ nl, indent, "</", selfTag, ">" ] 825 return "".join(out)
826
827 - def __lookup(self,sub):
828 for k,vlist in self.__tokdict.items(): 829 for v,loc in vlist: 830 if sub is v: 831 return k 832 return None
833
834 - def getName(self):
835 r""" 836 Returns the results name for this token expression. Useful when several 837 different expressions might match at a particular location. 838 839 Example:: 840 integer = Word(nums) 841 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 842 house_number_expr = Suppress('#') + Word(nums, alphanums) 843 user_data = (Group(house_number_expr)("house_number") 844 | Group(ssn_expr)("ssn") 845 | Group(integer)("age")) 846 user_info = OneOrMore(user_data) 847 848 result = user_info.parseString("22 111-22-3333 #221B") 849 for item in result: 850 print(item.getName(), ':', item[0]) 851 prints:: 852 age : 22 853 ssn : 111-22-3333 854 house_number : 221B 855 """ 856 if self.__name: 857 return self.__name 858 elif self.__parent: 859 par = self.__parent() 860 if par: 861 return par.__lookup(self) 862 else: 863 return None 864 elif (len(self) == 1 and 865 len(self.__tokdict) == 1 and 866 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 867 return next(iter(self.__tokdict.keys())) 868 else: 869 return None
870
871 - def dump(self, indent='', depth=0, full=True):
872 """ 873 Diagnostic method for listing out the contents of a C{ParseResults}. 874 Accepts an optional C{indent} argument so that this string can be embedded 875 in a nested display of other data. 876 877 Example:: 878 integer = Word(nums) 879 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 880 881 result = date_str.parseString('12/31/1999') 882 print(result.dump()) 883 prints:: 884 ['12', '/', '31', '/', '1999'] 885 - day: 1999 886 - month: 31 887 - year: 12 888 """ 889 out = [] 890 NL = '\n' 891 out.append( indent+_ustr(self.asList()) ) 892 if full: 893 if self.haskeys(): 894 items = sorted((str(k), v) for k,v in self.items()) 895 for k,v in items: 896 if out: 897 out.append(NL) 898 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 899 if isinstance(v,ParseResults): 900 if v: 901 out.append( v.dump(indent,depth+1) ) 902 else: 903 out.append(_ustr(v)) 904 else: 905 out.append(repr(v)) 906 elif any(isinstance(vv,ParseResults) for vv in self): 907 v = self 908 for i,vv in enumerate(v): 909 if isinstance(vv,ParseResults): 910 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 911 else: 912 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 913 914 return "".join(out)
915
916 - def pprint(self, *args, **kwargs):
917 """ 918 Pretty-printer for parsed results as a list, using the C{pprint} module. 919 Accepts additional positional or keyword args as defined for the 920 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 921 922 Example:: 923 ident = Word(alphas, alphanums) 924 num = Word(nums) 925 func = Forward() 926 term = ident | num | Group('(' + func + ')') 927 func <<= ident + Group(Optional(delimitedList(term))) 928 result = func.parseString("fna a,b,(fnb c,d,200),100") 929 result.pprint(width=40) 930 prints:: 931 ['fna', 932 ['a', 933 'b', 934 ['(', 'fnb', ['c', 'd', '200'], ')'], 935 '100']] 936 """ 937 pprint.pprint(self.asList(), *args, **kwargs)
938 939 # add support for pickle protocol
940 - def __getstate__(self):
941 return ( self.__toklist, 942 ( self.__tokdict.copy(), 943 self.__parent is not None and self.__parent() or None, 944 self.__accumNames, 945 self.__name ) )
946
947 - def __setstate__(self,state):
948 self.__toklist = state[0] 949 (self.__tokdict, 950 par, 951 inAccumNames, 952 self.__name) = state[1] 953 self.__accumNames = {} 954 self.__accumNames.update(inAccumNames) 955 if par is not None: 956 self.__parent = wkref(par) 957 else: 958 self.__parent = None
959
960 - def __getnewargs__(self):
961 return self.__toklist, self.__name, self.__asList, self.__modal
962
963 - def __dir__(self):
964 return (dir(type(self)) + list(self.keys()))
965 966 MutableMapping.register(ParseResults)
967 968 -def col (loc,strg):
969 """Returns current column within a string, counting newlines as line separators. 970 The first column is number 1. 971 972 Note: the default parsing behavior is to expand tabs in the input string 973 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 975 consistent view of the parsed string, the parse location, and line and column 976 positions within the parsed string. 977 """ 978 s = strg 979 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
980
981 -def lineno(loc,strg):
982 """Returns current line number within a string, counting newlines as line separators. 983 The first line is number 1. 984 985 Note: the default parsing behavior is to expand tabs in the input string 986 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 987 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 988 consistent view of the parsed string, the parse location, and line and column 989 positions within the parsed string. 990 """ 991 return strg.count("\n",0,loc) + 1
992
993 -def line( loc, strg ):
994 """Returns the line of text containing loc within a string, counting newlines as line separators. 995 """ 996 lastCR = strg.rfind("\n", 0, loc) 997 nextCR = strg.find("\n", loc) 998 if nextCR >= 0: 999 return strg[lastCR+1:nextCR] 1000 else: 1001 return strg[lastCR+1:]
1002
1003 -def _defaultStartDebugAction( instring, loc, expr ):
1004 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1005
1006 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
1007 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1008
1009 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
1010 print ("Exception raised:" + _ustr(exc))
1011
1012 -def nullDebugAction(*args):
1013 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 1014 pass
1015 1016 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 1017 #~ 'decorator to trim function calls to match the arity of the target' 1018 #~ def _trim_arity(func, maxargs=3): 1019 #~ if func in singleArgBuiltins: 1020 #~ return lambda s,l,t: func(t) 1021 #~ limit = 0 1022 #~ foundArity = False 1023 #~ def wrapper(*args): 1024 #~ nonlocal limit,foundArity 1025 #~ while 1: 1026 #~ try: 1027 #~ ret = func(*args[limit:]) 1028 #~ foundArity = True 1029 #~ return ret 1030 #~ except TypeError: 1031 #~ if limit == maxargs or foundArity: 1032 #~ raise 1033 #~ limit += 1 1034 #~ continue 1035 #~ return wrapper 1036 1037 # this version is Python 2.x-3.x cross-compatible 1038 'decorator to trim function calls to match the arity of the target'
1039 -def _trim_arity(func, maxargs=2):
1040 if func in singleArgBuiltins: 1041 return lambda s,l,t: func(t) 1042 limit = [0] 1043 foundArity = [False] 1044 1045 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1046 if system_version[:2] >= (3,5): 1047 def extract_stack(limit=0): 1048 # special handling for Python 3.5.0 - extra deep call stack by 1 1049 offset = -3 if system_version == (3,5,0) else -2 1050 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 1051 return [frame_summary[:2]]
1052 def extract_tb(tb, limit=0): 1053 frames = traceback.extract_tb(tb, limit=limit) 1054 frame_summary = frames[-1] 1055 return [frame_summary[:2]] 1056 else: 1057 extract_stack = traceback.extract_stack 1058 extract_tb = traceback.extract_tb 1059 1060 # synthesize what would be returned by traceback.extract_stack at the call to 1061 # user's parse action 'func', so that we don't incur call penalty at parse time 1062 1063 LINE_DIFF = 6 1064 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1065 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1066 this_line = extract_stack(limit=2)[-1] 1067 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1068 1069 def wrapper(*args): 1070 while 1: 1071 try: 1072 ret = func(*args[limit[0]:]) 1073 foundArity[0] = True 1074 return ret 1075 except TypeError: 1076 # re-raise TypeErrors if they did not come from our arity testing 1077 if foundArity[0]: 1078 raise 1079 else: 1080 try: 1081 tb = sys.exc_info()[-1] 1082 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1083 raise 1084 finally: 1085 del tb 1086 1087 if limit[0] <= maxargs: 1088 limit[0] += 1 1089 continue 1090 raise 1091 1092 # copy func name to wrapper for sensible debug output 1093 func_name = "<parse action>" 1094 try: 1095 func_name = getattr(func, '__name__', 1096 getattr(func, '__class__').__name__) 1097 except Exception: 1098 func_name = str(func) 1099 wrapper.__name__ = func_name 1100 1101 return wrapper 1102
1103 -class ParserElement(object):
1104 """Abstract base level parser element class.""" 1105 DEFAULT_WHITE_CHARS = " \n\t\r" 1106 verbose_stacktrace = False 1107 1108 @staticmethod
1109 - def setDefaultWhitespaceChars( chars ):
1110 r""" 1111 Overrides the default whitespace chars 1112 1113 Example:: 1114 # default whitespace chars are space, <TAB> and newline 1115 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1116 1117 # change to just treat newline as significant 1118 ParserElement.setDefaultWhitespaceChars(" \t") 1119 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1120 """ 1121 ParserElement.DEFAULT_WHITE_CHARS = chars
1122 1123 @staticmethod
1124 - def inlineLiteralsUsing(cls):
1125 """ 1126 Set class to be used for inclusion of string literals into a parser. 1127 1128 Example:: 1129 # default literal class used is Literal 1130 integer = Word(nums) 1131 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1132 1133 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1134 1135 1136 # change to Suppress 1137 ParserElement.inlineLiteralsUsing(Suppress) 1138 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1139 1140 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1141 """ 1142 ParserElement._literalStringClass = cls
1143
1144 - def __init__( self, savelist=False ):
1145 self.parseAction = list() 1146 self.failAction = None 1147 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1148 self.strRepr = None 1149 self.resultsName = None 1150 self.saveAsList = savelist 1151 self.skipWhitespace = True 1152 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1153 self.copyDefaultWhiteChars = True 1154 self.mayReturnEmpty = False # used when checking for left-recursion 1155 self.keepTabs = False 1156 self.ignoreExprs = list() 1157 self.debug = False 1158 self.streamlined = False 1159 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1160 self.errmsg = "" 1161 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1162 self.debugActions = ( None, None, None ) #custom debug actions 1163 self.re = None 1164 self.callPreparse = True # used to avoid redundant calls to preParse 1165 self.callDuringTry = False
1166
1167 - def copy( self ):
1168 """ 1169 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1170 for the same parsing pattern, using copies of the original parse element. 1171 1172 Example:: 1173 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1174 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1175 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1176 1177 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1178 prints:: 1179 [5120, 100, 655360, 268435456] 1180 Equivalent form of C{expr.copy()} is just C{expr()}:: 1181 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1182 """ 1183 cpy = copy.copy( self ) 1184 cpy.parseAction = self.parseAction[:] 1185 cpy.ignoreExprs = self.ignoreExprs[:] 1186 if self.copyDefaultWhiteChars: 1187 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1188 return cpy
1189
1190 - def setName( self, name ):
1191 """ 1192 Define name for this expression, makes debugging and exception messages clearer. 1193 1194 Example:: 1195 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1196 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1197 """ 1198 self.name = name 1199 self.errmsg = "Expected " + self.name 1200 if hasattr(self,"exception"): 1201 self.exception.msg = self.errmsg 1202 return self
1203
1204 - def setResultsName( self, name, listAllMatches=False ):
1205 """ 1206 Define name for referencing matching tokens as a nested attribute 1207 of the returned parse results. 1208 NOTE: this returns a *copy* of the original C{ParserElement} object; 1209 this is so that the client can define a basic element, such as an 1210 integer, and reference it in multiple places with different names. 1211 1212 You can also set results names using the abbreviated syntax, 1213 C{expr("name")} in place of C{expr.setResultsName("name")} - 1214 see L{I{__call__}<__call__>}. 1215 1216 Example:: 1217 date_str = (integer.setResultsName("year") + '/' 1218 + integer.setResultsName("month") + '/' 1219 + integer.setResultsName("day")) 1220 1221 # equivalent form: 1222 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1223 """ 1224 newself = self.copy() 1225 if name.endswith("*"): 1226 name = name[:-1] 1227 listAllMatches=True 1228 newself.resultsName = name 1229 newself.modalResults = not listAllMatches 1230 return newself
1231
1232 - def setBreak(self,breakFlag = True):
1233 """Method to invoke the Python pdb debugger when this element is 1234 about to be parsed. Set C{breakFlag} to True to enable, False to 1235 disable. 1236 """ 1237 if breakFlag: 1238 _parseMethod = self._parse 1239 def breaker(instring, loc, doActions=True, callPreParse=True): 1240 import pdb 1241 pdb.set_trace() 1242 return _parseMethod( instring, loc, doActions, callPreParse )
1243 breaker._originalParseMethod = _parseMethod 1244 self._parse = breaker 1245 else: 1246 if hasattr(self._parse,"_originalParseMethod"): 1247 self._parse = self._parse._originalParseMethod 1248 return self
1249
1250 - def setParseAction( self, *fns, **kwargs ):
1251 """ 1252 Define one or more actions to perform when successfully matching parse element definition. 1253 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1254 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1255 - s = the original string being parsed (see note below) 1256 - loc = the location of the matching substring 1257 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1258 If the functions in fns modify the tokens, they can return them as the return 1259 value from fn, and the modified list of tokens will replace the original. 1260 Otherwise, fn does not need to return any value. 1261 1262 Optional keyword arguments: 1263 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1264 1265 Note: the default parsing behavior is to expand tabs in the input string 1266 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1267 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1268 consistent view of the parsed string, the parse location, and line and column 1269 positions within the parsed string. 1270 1271 Example:: 1272 integer = Word(nums) 1273 date_str = integer + '/' + integer + '/' + integer 1274 1275 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1276 1277 # use parse action to convert to ints at parse time 1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1279 date_str = integer + '/' + integer + '/' + integer 1280 1281 # note that integer fields are now ints, not strings 1282 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1283 """ 1284 self.parseAction = list(map(_trim_arity, list(fns))) 1285 self.callDuringTry = kwargs.get("callDuringTry", False) 1286 return self
1287
1288 - def addParseAction( self, *fns, **kwargs ):
1289 """ 1290 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1291 1292 See examples in L{I{copy}<copy>}. 1293 """ 1294 self.parseAction += list(map(_trim_arity, list(fns))) 1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1296 return self
1297
1298 - def addCondition(self, *fns, **kwargs):
1299 """Add a boolean predicate function to expression's list of parse actions. See 1300 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1301 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1302 1303 Optional keyword arguments: 1304 - message = define a custom message to be used in the raised exception 1305 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1306 1307 Example:: 1308 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1309 year_int = integer.copy() 1310 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1311 date_str = year_int + '/' + integer + '/' + integer 1312 1313 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1314 """ 1315 msg = kwargs.get("message", "failed user-defined condition") 1316 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1317 for fn in fns: 1318 def pa(s,l,t): 1319 if not bool(_trim_arity(fn)(s,l,t)): 1320 raise exc_type(s,l,msg)
1321 self.parseAction.append(pa) 1322 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1323 return self 1324
1325 - def setFailAction( self, fn ):
1326 """Define action to perform if parsing fails at this expression. 1327 Fail acton fn is a callable function that takes the arguments 1328 C{fn(s,loc,expr,err)} where: 1329 - s = string being parsed 1330 - loc = location where expression match was attempted and failed 1331 - expr = the parse expression that failed 1332 - err = the exception thrown 1333 The function returns no value. It may throw C{L{ParseFatalException}} 1334 if it is desired to stop parsing immediately.""" 1335 self.failAction = fn 1336 return self
1337
1338 - def _skipIgnorables( self, instring, loc ):
1339 exprsFound = True 1340 while exprsFound: 1341 exprsFound = False 1342 for e in self.ignoreExprs: 1343 try: 1344 while 1: 1345 loc,dummy = e._parse( instring, loc ) 1346 exprsFound = True 1347 except ParseException: 1348 pass 1349 return loc
1350
1351 - def preParse( self, instring, loc ):
1352 if self.ignoreExprs: 1353 loc = self._skipIgnorables( instring, loc ) 1354 1355 if self.skipWhitespace: 1356 wt = self.whiteChars 1357 instrlen = len(instring) 1358 while loc < instrlen and instring[loc] in wt: 1359 loc += 1 1360 1361 return loc
1362
1363 - def parseImpl( self, instring, loc, doActions=True ):
1364 return loc, []
1365
1366 - def postParse( self, instring, loc, tokenlist ):
1367 return tokenlist
1368 1369 #~ @profile
1370 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1371 debugging = ( self.debug ) #and doActions ) 1372 1373 if debugging or self.failAction: 1374 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1375 if (self.debugActions[0] ): 1376 self.debugActions[0]( instring, loc, self ) 1377 if callPreParse and self.callPreparse: 1378 preloc = self.preParse( instring, loc ) 1379 else: 1380 preloc = loc 1381 tokensStart = preloc 1382 try: 1383 try: 1384 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1385 except IndexError: 1386 raise ParseException( instring, len(instring), self.errmsg, self ) 1387 except ParseBaseException as err: 1388 #~ print ("Exception raised:", err) 1389 if self.debugActions[2]: 1390 self.debugActions[2]( instring, tokensStart, self, err ) 1391 if self.failAction: 1392 self.failAction( instring, tokensStart, self, err ) 1393 raise 1394 else: 1395 if callPreParse and self.callPreparse: 1396 preloc = self.preParse( instring, loc ) 1397 else: 1398 preloc = loc 1399 tokensStart = preloc 1400 if self.mayIndexError or preloc >= len(instring): 1401 try: 1402 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1403 except IndexError: 1404 raise ParseException( instring, len(instring), self.errmsg, self ) 1405 else: 1406 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1407 1408 tokens = self.postParse( instring, loc, tokens ) 1409 1410 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1411 if self.parseAction and (doActions or self.callDuringTry): 1412 if debugging: 1413 try: 1414 for fn in self.parseAction: 1415 tokens = fn( instring, tokensStart, retTokens ) 1416 if tokens is not None: 1417 retTokens = ParseResults( tokens, 1418 self.resultsName, 1419 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1420 modal=self.modalResults ) 1421 except ParseBaseException as err: 1422 #~ print "Exception raised in user parse action:", err 1423 if (self.debugActions[2] ): 1424 self.debugActions[2]( instring, tokensStart, self, err ) 1425 raise 1426 else: 1427 for fn in self.parseAction: 1428 tokens = fn( instring, tokensStart, retTokens ) 1429 if tokens is not None: 1430 retTokens = ParseResults( tokens, 1431 self.resultsName, 1432 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1433 modal=self.modalResults ) 1434 if debugging: 1435 #~ print ("Matched",self,"->",retTokens.asList()) 1436 if (self.debugActions[1] ): 1437 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1438 1439 return loc, retTokens
1440
1441 - def tryParse( self, instring, loc ):
1442 try: 1443 return self._parse( instring, loc, doActions=False )[0] 1444 except ParseFatalException: 1445 raise ParseException( instring, loc, self.errmsg, self)
1446
1447 - def canParseNext(self, instring, loc):
1448 try: 1449 self.tryParse(instring, loc) 1450 except (ParseException, IndexError): 1451 return False 1452 else: 1453 return True
1454
1455 - class _UnboundedCache(object):
1456 - def __init__(self):
1457 cache = {} 1458 self.not_in_cache = not_in_cache = object() 1459 1460 def get(self, key): 1461 return cache.get(key, not_in_cache)
1462 1463 def set(self, key, value): 1464 cache[key] = value
1465 1466 def clear(self): 1467 cache.clear() 1468 1469 def cache_len(self): 1470 return len(cache) 1471 1472 self.get = types.MethodType(get, self) 1473 self.set = types.MethodType(set, self) 1474 self.clear = types.MethodType(clear, self) 1475 self.__len__ = types.MethodType(cache_len, self) 1476 1477 if _OrderedDict is not None:
1478 - class _FifoCache(object):
1479 - def __init__(self, size):
1480 self.not_in_cache = not_in_cache = object() 1481 1482 cache = _OrderedDict() 1483 1484 def get(self, key): 1485 return cache.get(key, not_in_cache)
1486 1487 def set(self, key, value): 1488 cache[key] = value 1489 while len(cache) > size: 1490 try: 1491 cache.popitem(False) 1492 except KeyError: 1493 pass
1494 1495 def clear(self): 1496 cache.clear() 1497 1498 def cache_len(self): 1499 return len(cache) 1500 1501 self.get = types.MethodType(get, self) 1502 self.set = types.MethodType(set, self) 1503 self.clear = types.MethodType(clear, self) 1504 self.__len__ = types.MethodType(cache_len, self) 1505 1506 else:
1507 - class _FifoCache(object):
1508 - def __init__(self, size):
1509 self.not_in_cache = not_in_cache = object() 1510 1511 cache = {} 1512 key_fifo = collections.deque([], size) 1513 1514 def get(self, key): 1515 return cache.get(key, not_in_cache)
1516 1517 def set(self, key, value): 1518 cache[key] = value 1519 while len(key_fifo) > size: 1520 cache.pop(key_fifo.popleft(), None) 1521 key_fifo.append(key)
1522 1523 def clear(self): 1524 cache.clear() 1525 key_fifo.clear() 1526 1527 def cache_len(self): 1528 return len(cache) 1529 1530 self.get = types.MethodType(get, self) 1531 self.set = types.MethodType(set, self) 1532 self.clear = types.MethodType(clear, self) 1533 self.__len__ = types.MethodType(cache_len, self) 1534 1535 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1536 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1537 packrat_cache_lock = RLock() 1538 packrat_cache_stats = [0, 0] 1539 1540 # this method gets repeatedly called during backtracking with the same arguments - 1541 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1542 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1543 HIT, MISS = 0, 1 1544 lookup = (self, instring, loc, callPreParse, doActions) 1545 with ParserElement.packrat_cache_lock: 1546 cache = ParserElement.packrat_cache 1547 value = cache.get(lookup) 1548 if value is cache.not_in_cache: 1549 ParserElement.packrat_cache_stats[MISS] += 1 1550 try: 1551 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1552 except ParseBaseException as pe: 1553 # cache a copy of the exception, without the traceback 1554 cache.set(lookup, pe.__class__(*pe.args)) 1555 raise 1556 else: 1557 cache.set(lookup, (value[0], value[1].copy())) 1558 return value 1559 else: 1560 ParserElement.packrat_cache_stats[HIT] += 1 1561 if isinstance(value, Exception): 1562 raise value 1563 return (value[0], value[1].copy())
1564 1565 _parse = _parseNoCache 1566 1567 @staticmethod
1568 - def resetCache():
1569 ParserElement.packrat_cache.clear() 1570 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1571 1572 _packratEnabled = False 1573 @staticmethod
1574 - def enablePackrat(cache_size_limit=128):
1575 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1576 Repeated parse attempts at the same string location (which happens 1577 often in many complex grammars) can immediately return a cached value, 1578 instead of re-executing parsing/validating code. Memoizing is done of 1579 both valid results and parsing exceptions. 1580 1581 Parameters: 1582 - cache_size_limit - (default=C{128}) - if an integer value is provided 1583 will limit the size of the packrat cache; if None is passed, then 1584 the cache size will be unbounded; if 0 is passed, the cache will 1585 be effectively disabled. 1586 1587 This speedup may break existing programs that use parse actions that 1588 have side-effects. For this reason, packrat parsing is disabled when 1589 you first import pyparsing. To activate the packrat feature, your 1590 program must call the class method C{ParserElement.enablePackrat()}. If 1591 your program uses C{psyco} to "compile as you go", you must call 1592 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1593 Python will crash. For best results, call C{enablePackrat()} immediately 1594 after importing pyparsing. 1595 1596 Example:: 1597 import pyparsing 1598 pyparsing.ParserElement.enablePackrat() 1599 """ 1600 if not ParserElement._packratEnabled: 1601 ParserElement._packratEnabled = True 1602 if cache_size_limit is None: 1603 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1604 else: 1605 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1606 ParserElement._parse = ParserElement._parseCache
1607
1608 - def parseString( self, instring, parseAll=False ):
1609 """ 1610 Execute the parse expression with the given string. 1611 This is the main interface to the client code, once the complete 1612 expression has been built. 1613 1614 If you want the grammar to require that the entire input string be 1615 successfully parsed, then set C{parseAll} to True (equivalent to ending 1616 the grammar with C{L{StringEnd()}}). 1617 1618 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1619 in order to report proper column numbers in parse actions. 1620 If the input string contains tabs and 1621 the grammar uses parse actions that use the C{loc} argument to index into the 1622 string being parsed, you can ensure you have a consistent view of the input 1623 string by: 1624 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1625 (see L{I{parseWithTabs}<parseWithTabs>}) 1626 - define your parse action using the full C{(s,loc,toks)} signature, and 1627 reference the input string using the parse action's C{s} argument 1628 - explictly expand the tabs in your input string before calling 1629 C{parseString} 1630 1631 Example:: 1632 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1633 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1634 """ 1635 ParserElement.resetCache() 1636 if not self.streamlined: 1637 self.streamline() 1638 #~ self.saveAsList = True 1639 for e in self.ignoreExprs: 1640 e.streamline() 1641 if not self.keepTabs: 1642 instring = instring.expandtabs() 1643 try: 1644 loc, tokens = self._parse( instring, 0 ) 1645 if parseAll: 1646 loc = self.preParse( instring, loc ) 1647 se = Empty() + StringEnd() 1648 se._parse( instring, loc ) 1649 except ParseBaseException as exc: 1650 if ParserElement.verbose_stacktrace: 1651 raise 1652 else: 1653 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1654 raise exc 1655 else: 1656 return tokens
1657
1658 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1659 """ 1660 Scan the input string for expression matches. Each match will return the 1661 matching tokens, start location, and end location. May be called with optional 1662 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1663 C{overlap} is specified, then overlapping matches will be reported. 1664 1665 Note that the start and end locations are reported relative to the string 1666 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1667 strings with embedded tabs. 1668 1669 Example:: 1670 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1671 print(source) 1672 for tokens,start,end in Word(alphas).scanString(source): 1673 print(' '*start + '^'*(end-start)) 1674 print(' '*start + tokens[0]) 1675 1676 prints:: 1677 1678 sldjf123lsdjjkf345sldkjf879lkjsfd987 1679 ^^^^^ 1680 sldjf 1681 ^^^^^^^ 1682 lsdjjkf 1683 ^^^^^^ 1684 sldkjf 1685 ^^^^^^ 1686 lkjsfd 1687 """ 1688 if not self.streamlined: 1689 self.streamline() 1690 for e in self.ignoreExprs: 1691 e.streamline() 1692 1693 if not self.keepTabs: 1694 instring = _ustr(instring).expandtabs() 1695 instrlen = len(instring) 1696 loc = 0 1697 preparseFn = self.preParse 1698 parseFn = self._parse 1699 ParserElement.resetCache() 1700 matches = 0 1701 try: 1702 while loc <= instrlen and matches < maxMatches: 1703 try: 1704 preloc = preparseFn( instring, loc ) 1705 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1706 except ParseException: 1707 loc = preloc+1 1708 else: 1709 if nextLoc > loc: 1710 matches += 1 1711 yield tokens, preloc, nextLoc 1712 if overlap: 1713 nextloc = preparseFn( instring, loc ) 1714 if nextloc > loc: 1715 loc = nextLoc 1716 else: 1717 loc += 1 1718 else: 1719 loc = nextLoc 1720 else: 1721 loc = preloc+1 1722 except ParseBaseException as exc: 1723 if ParserElement.verbose_stacktrace: 1724 raise 1725 else: 1726 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1727 raise exc
1728
1729 - def transformString( self, instring ):
1730 """ 1731 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1732 be returned from a parse action. To use C{transformString}, define a grammar and 1733 attach a parse action to it that modifies the returned token list. 1734 Invoking C{transformString()} on a target string will then scan for matches, 1735 and replace the matched text patterns according to the logic in the parse 1736 action. C{transformString()} returns the resulting transformed string. 1737 1738 Example:: 1739 wd = Word(alphas) 1740 wd.setParseAction(lambda toks: toks[0].title()) 1741 1742 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1743 Prints:: 1744 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1745 """ 1746 out = [] 1747 lastE = 0 1748 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1749 # keep string locs straight between transformString and scanString 1750 self.keepTabs = True 1751 try: 1752 for t,s,e in self.scanString( instring ): 1753 out.append( instring[lastE:s] ) 1754 if t: 1755 if isinstance(t,ParseResults): 1756 out += t.asList() 1757 elif isinstance(t,list): 1758 out += t 1759 else: 1760 out.append(t) 1761 lastE = e 1762 out.append(instring[lastE:]) 1763 out = [o for o in out if o] 1764 return "".join(map(_ustr,_flatten(out))) 1765 except ParseBaseException as exc: 1766 if ParserElement.verbose_stacktrace: 1767 raise 1768 else: 1769 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1770 raise exc
1771
1772 - def searchString( self, instring, maxMatches=_MAX_INT ):
1773 """ 1774 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1775 to match the given parse expression. May be called with optional 1776 C{maxMatches} argument, to clip searching after 'n' matches are found. 1777 1778 Example:: 1779 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1780 cap_word = Word(alphas.upper(), alphas.lower()) 1781 1782 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1783 1784 # the sum() builtin can be used to merge results into a single ParseResults object 1785 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 1786 prints:: 1787 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 1788 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 1789 """ 1790 try: 1791 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1792 except ParseBaseException as exc: 1793 if ParserElement.verbose_stacktrace: 1794 raise 1795 else: 1796 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1797 raise exc
1798
1799 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1800 """ 1801 Generator method to split a string using the given expression as a separator. 1802 May be called with optional C{maxsplit} argument, to limit the number of splits; 1803 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1804 matching text should be included in the split results. 1805 1806 Example:: 1807 punc = oneOf(list(".,;:/-!?")) 1808 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1809 prints:: 1810 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1811 """ 1812 splits = 0 1813 last = 0 1814 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1815 yield instring[last:s] 1816 if includeSeparators: 1817 yield t[0] 1818 last = e 1819 yield instring[last:]
1820
1821 - def __add__(self, other ):
1822 """ 1823 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 1824 converts them to L{Literal}s by default. 1825 1826 Example:: 1827 greet = Word(alphas) + "," + Word(alphas) + "!" 1828 hello = "Hello, World!" 1829 print (hello, "->", greet.parseString(hello)) 1830 Prints:: 1831 Hello, World! -> ['Hello', ',', 'World', '!'] 1832 """ 1833 if isinstance( other, basestring ): 1834 other = ParserElement._literalStringClass( other ) 1835 if not isinstance( other, ParserElement ): 1836 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1837 SyntaxWarning, stacklevel=2) 1838 return None 1839 return And( [ self, other ] )
1840
1841 - def __radd__(self, other ):
1842 """ 1843 Implementation of + operator when left operand is not a C{L{ParserElement}} 1844 """ 1845 if isinstance( other, basestring ): 1846 other = ParserElement._literalStringClass( other ) 1847 if not isinstance( other, ParserElement ): 1848 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1849 SyntaxWarning, stacklevel=2) 1850 return None 1851 return other + self
1852
1853 - def __sub__(self, other):
1854 """ 1855 Implementation of - operator, returns C{L{And}} with error stop 1856 """ 1857 if isinstance( other, basestring ): 1858 other = ParserElement._literalStringClass( other ) 1859 if not isinstance( other, ParserElement ): 1860 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1861 SyntaxWarning, stacklevel=2) 1862 return None 1863 return self + And._ErrorStop() + other
1864
1865 - def __rsub__(self, other ):
1866 """ 1867 Implementation of - operator when left operand is not a C{L{ParserElement}} 1868 """ 1869 if isinstance( other, basestring ): 1870 other = ParserElement._literalStringClass( other ) 1871 if not isinstance( other, ParserElement ): 1872 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1873 SyntaxWarning, stacklevel=2) 1874 return None 1875 return other - self
1876
1877 - def __mul__(self,other):
1878 """ 1879 Implementation of * operator, allows use of C{expr * 3} in place of 1880 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1881 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1882 may also include C{None} as in: 1883 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1884 to C{expr*n + L{ZeroOrMore}(expr)} 1885 (read as "at least n instances of C{expr}") 1886 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1887 (read as "0 to n instances of C{expr}") 1888 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1889 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1890 1891 Note that C{expr*(None,n)} does not raise an exception if 1892 more than n exprs exist in the input stream; that is, 1893 C{expr*(None,n)} does not enforce a maximum number of expr 1894 occurrences. If this behavior is desired, then write 1895 C{expr*(None,n) + ~expr} 1896 """ 1897 if isinstance(other,int): 1898 minElements, optElements = other,0 1899 elif isinstance(other,tuple): 1900 other = (other + (None, None))[:2] 1901 if other[0] is None: 1902 other = (0, other[1]) 1903 if isinstance(other[0],int) and other[1] is None: 1904 if other[0] == 0: 1905 return ZeroOrMore(self) 1906 if other[0] == 1: 1907 return OneOrMore(self) 1908 else: 1909 return self*other[0] + ZeroOrMore(self) 1910 elif isinstance(other[0],int) and isinstance(other[1],int): 1911 minElements, optElements = other 1912 optElements -= minElements 1913 else: 1914 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1915 else: 1916 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1917 1918 if minElements < 0: 1919 raise ValueError("cannot multiply ParserElement by negative value") 1920 if optElements < 0: 1921 raise ValueError("second tuple value must be greater or equal to first tuple value") 1922 if minElements == optElements == 0: 1923 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1924 1925 if (optElements): 1926 def makeOptionalList(n): 1927 if n>1: 1928 return Optional(self + makeOptionalList(n-1)) 1929 else: 1930 return Optional(self)
1931 if minElements: 1932 if minElements == 1: 1933 ret = self + makeOptionalList(optElements) 1934 else: 1935 ret = And([self]*minElements) + makeOptionalList(optElements) 1936 else: 1937 ret = makeOptionalList(optElements) 1938 else: 1939 if minElements == 1: 1940 ret = self 1941 else: 1942 ret = And([self]*minElements) 1943 return ret 1944
1945 - def __rmul__(self, other):
1946 return self.__mul__(other)
1947
1948 - def __or__(self, other ):
1949 """ 1950 Implementation of | operator - returns C{L{MatchFirst}} 1951 """ 1952 if isinstance( other, basestring ): 1953 other = ParserElement._literalStringClass( other ) 1954 if not isinstance( other, ParserElement ): 1955 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1956 SyntaxWarning, stacklevel=2) 1957 return None 1958 return MatchFirst( [ self, other ] )
1959
1960 - def __ror__(self, other ):
1961 """ 1962 Implementation of | operator when left operand is not a C{L{ParserElement}} 1963 """ 1964 if isinstance( other, basestring ): 1965 other = ParserElement._literalStringClass( other ) 1966 if not isinstance( other, ParserElement ): 1967 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1968 SyntaxWarning, stacklevel=2) 1969 return None 1970 return other | self
1971
1972 - def __xor__(self, other ):
1973 """ 1974 Implementation of ^ operator - returns C{L{Or}} 1975 """ 1976 if isinstance( other, basestring ): 1977 other = ParserElement._literalStringClass( other ) 1978 if not isinstance( other, ParserElement ): 1979 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1980 SyntaxWarning, stacklevel=2) 1981 return None 1982 return Or( [ self, other ] )
1983
1984 - def __rxor__(self, other ):
1985 """ 1986 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1987 """ 1988 if isinstance( other, basestring ): 1989 other = ParserElement._literalStringClass( other ) 1990 if not isinstance( other, ParserElement ): 1991 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1992 SyntaxWarning, stacklevel=2) 1993 return None 1994 return other ^ self
1995
1996 - def __and__(self, other ):
1997 """ 1998 Implementation of & operator - returns C{L{Each}} 1999 """ 2000 if isinstance( other, basestring ): 2001 other = ParserElement._literalStringClass( other ) 2002 if not isinstance( other, ParserElement ): 2003 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2004 SyntaxWarning, stacklevel=2) 2005 return None 2006 return Each( [ self, other ] )
2007
2008 - def __rand__(self, other ):
2009 """ 2010 Implementation of & operator when left operand is not a C{L{ParserElement}} 2011 """ 2012 if isinstance( other, basestring ): 2013 other = ParserElement._literalStringClass( other ) 2014 if not isinstance( other, ParserElement ): 2015 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2016 SyntaxWarning, stacklevel=2) 2017 return None 2018 return other & self
2019
2020 - def __invert__( self ):
2021 """ 2022 Implementation of ~ operator - returns C{L{NotAny}} 2023 """ 2024 return NotAny( self )
2025
2026 - def __call__(self, name=None):
2027 """ 2028 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 2029 2030 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 2031 passed as C{True}. 2032 2033 If C{name} is omitted, same as calling C{L{copy}}. 2034 2035 Example:: 2036 # these are equivalent 2037 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 2038 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 2039 """ 2040 if name is not None: 2041 return self.setResultsName(name) 2042 else: 2043 return self.copy()
2044
2045 - def suppress( self ):
2046 """ 2047 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 2048 cluttering up returned output. 2049 """ 2050 return Suppress( self )
2051
2052 - def leaveWhitespace( self ):
2053 """ 2054 Disables the skipping of whitespace before matching the characters in the 2055 C{ParserElement}'s defined pattern. This is normally only used internally by 2056 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2057 """ 2058 self.skipWhitespace = False 2059 return self
2060
2061 - def setWhitespaceChars( self, chars ):
2062 """ 2063 Overrides the default whitespace chars 2064 """ 2065 self.skipWhitespace = True 2066 self.whiteChars = chars 2067 self.copyDefaultWhiteChars = False 2068 return self
2069
2070 - def parseWithTabs( self ):
2071 """ 2072 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2073 Must be called before C{parseString} when the input grammar contains elements that 2074 match C{<TAB>} characters. 2075 """ 2076 self.keepTabs = True 2077 return self
2078
2079 - def ignore( self, other ):
2080 """ 2081 Define expression to be ignored (e.g., comments) while doing pattern 2082 matching; may be called repeatedly, to define multiple comment or other 2083 ignorable patterns. 2084 2085 Example:: 2086 patt = OneOrMore(Word(alphas)) 2087 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2088 2089 patt.ignore(cStyleComment) 2090 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2091 """ 2092 if isinstance(other, basestring): 2093 other = Suppress(other) 2094 2095 if isinstance( other, Suppress ): 2096 if other not in self.ignoreExprs: 2097 self.ignoreExprs.append(other) 2098 else: 2099 self.ignoreExprs.append( Suppress( other.copy() ) ) 2100 return self
2101
2102 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2103 """ 2104 Enable display of debugging messages while doing pattern matching. 2105 """ 2106 self.debugActions = (startAction or _defaultStartDebugAction, 2107 successAction or _defaultSuccessDebugAction, 2108 exceptionAction or _defaultExceptionDebugAction) 2109 self.debug = True 2110 return self
2111
2112 - def setDebug( self, flag=True ):
2113 """ 2114 Enable display of debugging messages while doing pattern matching. 2115 Set C{flag} to True to enable, False to disable. 2116 2117 Example:: 2118 wd = Word(alphas).setName("alphaword") 2119 integer = Word(nums).setName("numword") 2120 term = wd | integer 2121 2122 # turn on debugging for wd 2123 wd.setDebug() 2124 2125 OneOrMore(term).parseString("abc 123 xyz 890") 2126 2127 prints:: 2128 Match alphaword at loc 0(1,1) 2129 Matched alphaword -> ['abc'] 2130 Match alphaword at loc 3(1,4) 2131 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2132 Match alphaword at loc 7(1,8) 2133 Matched alphaword -> ['xyz'] 2134 Match alphaword at loc 11(1,12) 2135 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2136 Match alphaword at loc 15(1,16) 2137 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2138 2139 The output shown is that produced by the default debug actions - custom debug actions can be 2140 specified using L{setDebugActions}. Prior to attempting 2141 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 2142 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 2143 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 2144 which makes debugging and exception messages easier to understand - for instance, the default 2145 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 2146 """ 2147 if flag: 2148 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2149 else: 2150 self.debug = False 2151 return self
2152
2153 - def __str__( self ):
2154 return self.name
2155
2156 - def __repr__( self ):
2157 return _ustr(self)
2158
2159 - def streamline( self ):
2160 self.streamlined = True 2161 self.strRepr = None 2162 return self
2163
2164 - def checkRecursion( self, parseElementList ):
2165 pass
2166
2167 - def validate( self, validateTrace=[] ):
2168 """ 2169 Check defined expressions for valid structure, check for infinite recursive definitions. 2170 """ 2171 self.checkRecursion( [] )
2172
2173 - def parseFile( self, file_or_filename, parseAll=False ):
2174 """ 2175 Execute the parse expression on the given file or filename. 2176 If a filename is specified (instead of a file object), 2177 the entire file is opened, read, and closed before parsing. 2178 """ 2179 try: 2180 file_contents = file_or_filename.read() 2181 except AttributeError: 2182 with open(file_or_filename, "r") as f: 2183 file_contents = f.read() 2184 try: 2185 return self.parseString(file_contents, parseAll) 2186 except ParseBaseException as exc: 2187 if ParserElement.verbose_stacktrace: 2188 raise 2189 else: 2190 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2191 raise exc
2192
2193 - def __eq__(self,other):
2194 if isinstance(other, ParserElement): 2195 return self is other or vars(self) == vars(other) 2196 elif isinstance(other, basestring): 2197 return self.matches(other) 2198 else: 2199 return super(ParserElement,self)==other
2200
2201 - def __ne__(self,other):
2202 return not (self == other)
2203
2204 - def __hash__(self):
2205 return hash(id(self))
2206
2207 - def __req__(self,other):
2208 return self == other
2209
2210 - def __rne__(self,other):
2211 return not (self == other)
2212
2213 - def matches(self, testString, parseAll=True):
2214 """ 2215 Method for quick testing of a parser against a test string. Good for simple 2216 inline microtests of sub expressions while building up larger parser. 2217 2218 Parameters: 2219 - testString - to test against this expression for a match 2220 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2221 2222 Example:: 2223 expr = Word(nums) 2224 assert expr.matches("100") 2225 """ 2226 try: 2227 self.parseString(_ustr(testString), parseAll=parseAll) 2228 return True 2229 except ParseBaseException: 2230 return False
2231
2232 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2233 """ 2234 Execute the parse expression on a series of test strings, showing each 2235 test, the parsed results or where the parse failed. Quick and easy way to 2236 run a parse expression against a list of sample strings. 2237 2238 Parameters: 2239 - tests - a list of separate test strings, or a multiline string of test strings 2240 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2241 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2242 string; pass None to disable comment filtering 2243 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2244 if False, only dump nested list 2245 - printResults - (default=C{True}) prints test output to stdout 2246 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2247 2248 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2249 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2250 test's output 2251 2252 Example:: 2253 number_expr = pyparsing_common.number.copy() 2254 2255 result = number_expr.runTests(''' 2256 # unsigned integer 2257 100 2258 # negative integer 2259 -100 2260 # float with scientific notation 2261 6.02e23 2262 # integer with scientific notation 2263 1e-12 2264 ''') 2265 print("Success" if result[0] else "Failed!") 2266 2267 result = number_expr.runTests(''' 2268 # stray character 2269 100Z 2270 # missing leading digit before '.' 2271 -.100 2272 # too many '.' 2273 3.14.159 2274 ''', failureTests=True) 2275 print("Success" if result[0] else "Failed!") 2276 prints:: 2277 # unsigned integer 2278 100 2279 [100] 2280 2281 # negative integer 2282 -100 2283 [-100] 2284 2285 # float with scientific notation 2286 6.02e23 2287 [6.02e+23] 2288 2289 # integer with scientific notation 2290 1e-12 2291 [1e-12] 2292 2293 Success 2294 2295 # stray character 2296 100Z 2297 ^ 2298 FAIL: Expected end of text (at char 3), (line:1, col:4) 2299 2300 # missing leading digit before '.' 2301 -.100 2302 ^ 2303 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2304 2305 # too many '.' 2306 3.14.159 2307 ^ 2308 FAIL: Expected end of text (at char 4), (line:1, col:5) 2309 2310 Success 2311 2312 Each test string must be on a single line. If you want to test a string that spans multiple 2313 lines, create a test like this:: 2314 2315 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 2316 2317 (Note that this is a raw string literal, you must include the leading 'r'.) 2318 """ 2319 if isinstance(tests, basestring): 2320 tests = list(map(str.strip, tests.rstrip().splitlines())) 2321 if isinstance(comment, basestring): 2322 comment = Literal(comment) 2323 allResults = [] 2324 comments = [] 2325 success = True 2326 for t in tests: 2327 if comment is not None and comment.matches(t, False) or comments and not t: 2328 comments.append(t) 2329 continue 2330 if not t: 2331 continue 2332 out = ['\n'.join(comments), t] 2333 comments = [] 2334 try: 2335 t = t.replace(r'\n','\n') 2336 result = self.parseString(t, parseAll=parseAll) 2337 out.append(result.dump(full=fullDump)) 2338 success = success and not failureTests 2339 except ParseBaseException as pe: 2340 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2341 if '\n' in t: 2342 out.append(line(pe.loc, t)) 2343 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2344 else: 2345 out.append(' '*pe.loc + '^' + fatal) 2346 out.append("FAIL: " + str(pe)) 2347 success = success and failureTests 2348 result = pe 2349 except Exception as exc: 2350 out.append("FAIL-EXCEPTION: " + str(exc)) 2351 success = success and failureTests 2352 result = exc 2353 2354 if printResults: 2355 if fullDump: 2356 out.append('') 2357 print('\n'.join(out)) 2358 2359 allResults.append((t, result)) 2360 2361 return success, allResults
2362
2363 2364 -class Token(ParserElement):
2365 """ 2366 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2367 """
2368 - def __init__( self ):
2369 super(Token,self).__init__( savelist=False )
2370
2371 2372 -class Empty(Token):
2373 """ 2374 An empty token, will always match. 2375 """
2376 - def __init__( self ):
2377 super(Empty,self).__init__() 2378 self.name = "Empty" 2379 self.mayReturnEmpty = True 2380 self.mayIndexError = False
2381
2382 2383 -class NoMatch(Token):
2384 """ 2385 A token that will never match. 2386 """
2387 - def __init__( self ):
2388 super(NoMatch,self).__init__() 2389 self.name = "NoMatch" 2390 self.mayReturnEmpty = True 2391 self.mayIndexError = False 2392 self.errmsg = "Unmatchable token"
2393
2394 - def parseImpl( self, instring, loc, doActions=True ):
2395 raise ParseException(instring, loc, self.errmsg, self)
2396
2397 2398 -class Literal(Token):
2399 """ 2400 Token to exactly match a specified string. 2401 2402 Example:: 2403 Literal('blah').parseString('blah') # -> ['blah'] 2404 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2405 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2406 2407 For case-insensitive matching, use L{CaselessLiteral}. 2408 2409 For keyword matching (force word break before and after the matched string), 2410 use L{Keyword} or L{CaselessKeyword}. 2411 """
2412 - def __init__( self, matchString ):
2413 super(Literal,self).__init__() 2414 self.match = matchString 2415 self.matchLen = len(matchString) 2416 try: 2417 self.firstMatchChar = matchString[0] 2418 except IndexError: 2419 warnings.warn("null string passed to Literal; use Empty() instead", 2420 SyntaxWarning, stacklevel=2) 2421 self.__class__ = Empty 2422 self.name = '"%s"' % _ustr(self.match) 2423 self.errmsg = "Expected " + self.name 2424 self.mayReturnEmpty = False 2425 self.mayIndexError = False
2426 2427 # Performance tuning: this routine gets called a *lot* 2428 # if this is a single character match string and the first character matches, 2429 # short-circuit as quickly as possible, and avoid calling startswith 2430 #~ @profile
2431 - def parseImpl( self, instring, loc, doActions=True ):
2432 if (instring[loc] == self.firstMatchChar and 2433 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2434 return loc+self.matchLen, self.match 2435 raise ParseException(instring, loc, self.errmsg, self)
2436 _L = Literal 2437 ParserElement._literalStringClass = Literal
2438 2439 -class Keyword(Token):
2440 """ 2441 Token to exactly match a specified string as a keyword, that is, it must be 2442 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2443 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2444 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2445 Accepts two optional constructor arguments in addition to the keyword string: 2446 - C{identChars} is a string of characters that would be valid identifier characters, 2447 defaulting to all alphanumerics + "_" and "$" 2448 - C{caseless} allows case-insensitive matching, default is C{False}. 2449 2450 Example:: 2451 Keyword("start").parseString("start") # -> ['start'] 2452 Keyword("start").parseString("starting") # -> Exception 2453 2454 For case-insensitive matching, use L{CaselessKeyword}. 2455 """ 2456 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2457
2458 - def __init__( self, matchString, identChars=None, caseless=False ):
2459 super(Keyword,self).__init__() 2460 if identChars is None: 2461 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2462 self.match = matchString 2463 self.matchLen = len(matchString) 2464 try: 2465 self.firstMatchChar = matchString[0] 2466 except IndexError: 2467 warnings.warn("null string passed to Keyword; use Empty() instead", 2468 SyntaxWarning, stacklevel=2) 2469 self.name = '"%s"' % self.match 2470 self.errmsg = "Expected " + self.name 2471 self.mayReturnEmpty = False 2472 self.mayIndexError = False 2473 self.caseless = caseless 2474 if caseless: 2475 self.caselessmatch = matchString.upper() 2476 identChars = identChars.upper() 2477 self.identChars = set(identChars)
2478
2479 - def parseImpl( self, instring, loc, doActions=True ):
2480 if self.caseless: 2481 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2482 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2483 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2484 return loc+self.matchLen, self.match 2485 else: 2486 if (instring[loc] == self.firstMatchChar and 2487 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2488 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2489 (loc == 0 or instring[loc-1] not in self.identChars) ): 2490 return loc+self.matchLen, self.match 2491 raise ParseException(instring, loc, self.errmsg, self)
2492
2493 - def copy(self):
2494 c = super(Keyword,self).copy() 2495 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2496 return c
2497 2498 @staticmethod
2499 - def setDefaultKeywordChars( chars ):
2500 """Overrides the default Keyword chars 2501 """ 2502 Keyword.DEFAULT_KEYWORD_CHARS = chars
2503
2504 -class CaselessLiteral(Literal):
2505 """ 2506 Token to match a specified string, ignoring case of letters. 2507 Note: the matched results will always be in the case of the given 2508 match string, NOT the case of the input text. 2509 2510 Example:: 2511 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2512 2513 (Contrast with example for L{CaselessKeyword}.) 2514 """
2515 - def __init__( self, matchString ):
2516 super(CaselessLiteral,self).__init__( matchString.upper() ) 2517 # Preserve the defining literal. 2518 self.returnString = matchString 2519 self.name = "'%s'" % self.returnString 2520 self.errmsg = "Expected " + self.name
2521
2522 - def parseImpl( self, instring, loc, doActions=True ):
2523 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2524 return loc+self.matchLen, self.returnString 2525 raise ParseException(instring, loc, self.errmsg, self)
2526
2527 -class CaselessKeyword(Keyword):
2528 """ 2529 Caseless version of L{Keyword}. 2530 2531 Example:: 2532 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2533 2534 (Contrast with example for L{CaselessLiteral}.) 2535 """
2536 - def __init__( self, matchString, identChars=None ):
2537 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2538
2539 - def parseImpl( self, instring, loc, doActions=True ):
2540 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2541 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2542 return loc+self.matchLen, self.match 2543 raise ParseException(instring, loc, self.errmsg, self)
2544
2545 -class CloseMatch(Token):
2546 """ 2547 A variation on L{Literal} which matches "close" matches, that is, 2548 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 2549 - C{match_string} - string to be matched 2550 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 2551 2552 The results from a successful parse will contain the matched text from the input string and the following named results: 2553 - C{mismatches} - a list of the positions within the match_string where mismatches were found 2554 - C{original} - the original match_string used to compare against the input string 2555 2556 If C{mismatches} is an empty list, then the match was an exact match. 2557 2558 Example:: 2559 patt = CloseMatch("ATCATCGAATGGA") 2560 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 2561 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 2562 2563 # exact match 2564 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 2565 2566 # close match allowing up to 2 mismatches 2567 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 2568 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 2569 """
2570 - def __init__(self, match_string, maxMismatches=1):
2571 super(CloseMatch,self).__init__() 2572 self.name = match_string 2573 self.match_string = match_string 2574 self.maxMismatches = maxMismatches 2575 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 2576 self.mayIndexError = False 2577 self.mayReturnEmpty = False
2578
2579 - def parseImpl( self, instring, loc, doActions=True ):
2580 start = loc 2581 instrlen = len(instring) 2582 maxloc = start + len(self.match_string) 2583 2584 if maxloc <= instrlen: 2585 match_string = self.match_string 2586 match_stringloc = 0 2587 mismatches = [] 2588 maxMismatches = self.maxMismatches 2589 2590 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 2591 src,mat = s_m 2592 if src != mat: 2593 mismatches.append(match_stringloc) 2594 if len(mismatches) > maxMismatches: 2595 break 2596 else: 2597 loc = match_stringloc + 1 2598 results = ParseResults([instring[start:loc]]) 2599 results['original'] = self.match_string 2600 results['mismatches'] = mismatches 2601 return loc, results 2602 2603 raise ParseException(instring, loc, self.errmsg, self)
2604
2605 2606 -class Word(Token):
2607 """ 2608 Token for matching words composed of allowed character sets. 2609 Defined with string containing all allowed initial characters, 2610 an optional string containing allowed body characters (if omitted, 2611 defaults to the initial character set), and an optional minimum, 2612 maximum, and/or exact length. The default value for C{min} is 1 (a 2613 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2614 are 0, meaning no maximum or exact length restriction. An optional 2615 C{excludeChars} parameter can list characters that might be found in 2616 the input C{bodyChars} string; useful to define a word of all printables 2617 except for one or two characters, for instance. 2618 2619 L{srange} is useful for defining custom character set strings for defining 2620 C{Word} expressions, using range notation from regular expression character sets. 2621 2622 A common mistake is to use C{Word} to match a specific literal string, as in 2623 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2624 I{sets} of matchable characters. This expression would match "Add", "AAA", 2625 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2626 To match an exact literal string, use L{Literal} or L{Keyword}. 2627 2628 pyparsing includes helper strings for building Words: 2629 - L{alphas} 2630 - L{nums} 2631 - L{alphanums} 2632 - L{hexnums} 2633 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2634 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2635 - L{printables} (any non-whitespace character) 2636 2637 Example:: 2638 # a word composed of digits 2639 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2640 2641 # a word with a leading capital, and zero or more lowercase 2642 capital_word = Word(alphas.upper(), alphas.lower()) 2643 2644 # hostnames are alphanumeric, with leading alpha, and '-' 2645 hostname = Word(alphas, alphanums+'-') 2646 2647 # roman numeral (not a strict parser, accepts invalid mix of characters) 2648 roman = Word("IVXLCDM") 2649 2650 # any string of non-whitespace characters, except for ',' 2651 csv_value = Word(printables, excludeChars=",") 2652 """
2653 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2654 super(Word,self).__init__() 2655 if excludeChars: 2656 initChars = ''.join(c for c in initChars if c not in excludeChars) 2657 if bodyChars: 2658 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2659 self.initCharsOrig = initChars 2660 self.initChars = set(initChars) 2661 if bodyChars : 2662 self.bodyCharsOrig = bodyChars 2663 self.bodyChars = set(bodyChars) 2664 else: 2665 self.bodyCharsOrig = initChars 2666 self.bodyChars = set(initChars) 2667 2668 self.maxSpecified = max > 0 2669 2670 if min < 1: 2671 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2672 2673 self.minLen = min 2674 2675 if max > 0: 2676 self.maxLen = max 2677 else: 2678 self.maxLen = _MAX_INT 2679 2680 if exact > 0: 2681 self.maxLen = exact 2682 self.minLen = exact 2683 2684 self.name = _ustr(self) 2685 self.errmsg = "Expected " + self.name 2686 self.mayIndexError = False 2687 self.asKeyword = asKeyword 2688 2689 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2690 if self.bodyCharsOrig == self.initCharsOrig: 2691 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2692 elif len(self.initCharsOrig) == 1: 2693 self.reString = "%s[%s]*" % \ 2694 (re.escape(self.initCharsOrig), 2695 _escapeRegexRangeChars(self.bodyCharsOrig),) 2696 else: 2697 self.reString = "[%s][%s]*" % \ 2698 (_escapeRegexRangeChars(self.initCharsOrig), 2699 _escapeRegexRangeChars(self.bodyCharsOrig),) 2700 if self.asKeyword: 2701 self.reString = r"\b"+self.reString+r"\b" 2702 try: 2703 self.re = re.compile( self.reString ) 2704 except Exception: 2705 self.re = None
2706
2707 - def parseImpl( self, instring, loc, doActions=True ):
2708 if self.re: 2709 result = self.re.match(instring,loc) 2710 if not result: 2711 raise ParseException(instring, loc, self.errmsg, self) 2712 2713 loc = result.end() 2714 return loc, result.group() 2715 2716 if not(instring[ loc ] in self.initChars): 2717 raise ParseException(instring, loc, self.errmsg, self) 2718 2719 start = loc 2720 loc += 1 2721 instrlen = len(instring) 2722 bodychars = self.bodyChars 2723 maxloc = start + self.maxLen 2724 maxloc = min( maxloc, instrlen ) 2725 while loc < maxloc and instring[loc] in bodychars: 2726 loc += 1 2727 2728 throwException = False 2729 if loc - start < self.minLen: 2730 throwException = True 2731 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2732 throwException = True 2733 if self.asKeyword: 2734 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2735 throwException = True 2736 2737 if throwException: 2738 raise ParseException(instring, loc, self.errmsg, self) 2739 2740 return loc, instring[start:loc]
2741
2742 - def __str__( self ):
2743 try: 2744 return super(Word,self).__str__() 2745 except Exception: 2746 pass 2747 2748 2749 if self.strRepr is None: 2750 2751 def charsAsStr(s): 2752 if len(s)>4: 2753 return s[:4]+"..." 2754 else: 2755 return s
2756 2757 if ( self.initCharsOrig != self.bodyCharsOrig ): 2758 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2759 else: 2760 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2761 2762 return self.strRepr
2763
2764 2765 -class Regex(Token):
2766 r""" 2767 Token for matching strings that match a given regular expression. 2768 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2769 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 2770 named parse results. 2771 2772 Example:: 2773 realnum = Regex(r"[+-]?\d+\.\d*") 2774 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 2775 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2776 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2777 """ 2778 compiledREtype = type(re.compile("[A-Z]"))
2779 - def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
2780 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2781 super(Regex,self).__init__() 2782 2783 if isinstance(pattern, basestring): 2784 if not pattern: 2785 warnings.warn("null string passed to Regex; use Empty() instead", 2786 SyntaxWarning, stacklevel=2) 2787 2788 self.pattern = pattern 2789 self.flags = flags 2790 2791 try: 2792 self.re = re.compile(self.pattern, self.flags) 2793 self.reString = self.pattern 2794 except sre_constants.error: 2795 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2796 SyntaxWarning, stacklevel=2) 2797 raise 2798 2799 elif isinstance(pattern, Regex.compiledREtype): 2800 self.re = pattern 2801 self.pattern = \ 2802 self.reString = str(pattern) 2803 self.flags = flags 2804 2805 else: 2806 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2807 2808 self.name = _ustr(self) 2809 self.errmsg = "Expected " + self.name 2810 self.mayIndexError = False 2811 self.mayReturnEmpty = True 2812 self.asGroupList = asGroupList 2813 self.asMatch = asMatch
2814
2815 - def parseImpl( self, instring, loc, doActions=True ):
2816 result = self.re.match(instring,loc) 2817 if not result: 2818 raise ParseException(instring, loc, self.errmsg, self) 2819 2820 loc = result.end() 2821 d = result.groupdict() 2822 if self.asMatch: 2823 ret = result 2824 elif self.asGroupList: 2825 ret = result.groups() 2826 else: 2827 ret = ParseResults(result.group()) 2828 if d: 2829 for k in d: 2830 ret[k] = d[k] 2831 return loc,ret
2832
2833 - def __str__( self ):
2834 try: 2835 return super(Regex,self).__str__() 2836 except Exception: 2837 pass 2838 2839 if self.strRepr is None: 2840 self.strRepr = "Re:(%s)" % repr(self.pattern) 2841 2842 return self.strRepr
2843
2844 - def sub(self, repl):
2845 """ 2846 Return Regex with an attached parse action to transform the parsed 2847 result as if called using C{re.sub(expr, repl, string)}. 2848 """ 2849 if self.asGroupList: 2850 warnings.warn("cannot use sub() with Regex(asGroupList=True)", 2851 SyntaxWarning, stacklevel=2) 2852 raise SyntaxError() 2853 2854 if self.asMatch and callable(repl): 2855 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)", 2856 SyntaxWarning, stacklevel=2) 2857 raise SyntaxError() 2858 2859 if self.asMatch: 2860 def pa(tokens): 2861 return tokens[0].expand(repl)
2862 else: 2863 def pa(tokens): 2864 return self.re.sub(repl, tokens[0])
2865 return self.addParseAction(pa) 2866
2867 -class QuotedString(Token):
2868 r""" 2869 Token for matching strings that are delimited by quoting characters. 2870 2871 Defined with the following parameters: 2872 - quoteChar - string of one or more characters defining the quote delimiting string 2873 - escChar - character to escape quotes, typically backslash (default=C{None}) 2874 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2875 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2876 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2877 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2878 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2879 2880 Example:: 2881 qs = QuotedString('"') 2882 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2883 complex_qs = QuotedString('{{', endQuoteChar='}}') 2884 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2885 sql_qs = QuotedString('"', escQuote='""') 2886 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2887 prints:: 2888 [['This is the quote']] 2889 [['This is the "quote"']] 2890 [['This is the quote with "embedded" quotes']] 2891 """
2892 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2893 super(QuotedString,self).__init__() 2894 2895 # remove white space from quote chars - wont work anyway 2896 quoteChar = quoteChar.strip() 2897 if not quoteChar: 2898 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2899 raise SyntaxError() 2900 2901 if endQuoteChar is None: 2902 endQuoteChar = quoteChar 2903 else: 2904 endQuoteChar = endQuoteChar.strip() 2905 if not endQuoteChar: 2906 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2907 raise SyntaxError() 2908 2909 self.quoteChar = quoteChar 2910 self.quoteCharLen = len(quoteChar) 2911 self.firstQuoteChar = quoteChar[0] 2912 self.endQuoteChar = endQuoteChar 2913 self.endQuoteCharLen = len(endQuoteChar) 2914 self.escChar = escChar 2915 self.escQuote = escQuote 2916 self.unquoteResults = unquoteResults 2917 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2918 2919 if multiline: 2920 self.flags = re.MULTILINE | re.DOTALL 2921 self.pattern = r'%s(?:[^%s%s]' % \ 2922 ( re.escape(self.quoteChar), 2923 _escapeRegexRangeChars(self.endQuoteChar[0]), 2924 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2925 else: 2926 self.flags = 0 2927 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2928 ( re.escape(self.quoteChar), 2929 _escapeRegexRangeChars(self.endQuoteChar[0]), 2930 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2931 if len(self.endQuoteChar) > 1: 2932 self.pattern += ( 2933 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2934 _escapeRegexRangeChars(self.endQuoteChar[i])) 2935 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2936 ) 2937 if escQuote: 2938 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2939 if escChar: 2940 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2941 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2942 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2943 2944 try: 2945 self.re = re.compile(self.pattern, self.flags) 2946 self.reString = self.pattern 2947 except sre_constants.error: 2948 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2949 SyntaxWarning, stacklevel=2) 2950 raise 2951 2952 self.name = _ustr(self) 2953 self.errmsg = "Expected " + self.name 2954 self.mayIndexError = False 2955 self.mayReturnEmpty = True
2956
2957 - def parseImpl( self, instring, loc, doActions=True ):
2958 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2959 if not result: 2960 raise ParseException(instring, loc, self.errmsg, self) 2961 2962 loc = result.end() 2963 ret = result.group() 2964 2965 if self.unquoteResults: 2966 2967 # strip off quotes 2968 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2969 2970 if isinstance(ret,basestring): 2971 # replace escaped whitespace 2972 if '\\' in ret and self.convertWhitespaceEscapes: 2973 ws_map = { 2974 r'\t' : '\t', 2975 r'\n' : '\n', 2976 r'\f' : '\f', 2977 r'\r' : '\r', 2978 } 2979 for wslit,wschar in ws_map.items(): 2980 ret = ret.replace(wslit, wschar) 2981 2982 # replace escaped characters 2983 if self.escChar: 2984 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 2985 2986 # replace escaped quotes 2987 if self.escQuote: 2988 ret = ret.replace(self.escQuote, self.endQuoteChar) 2989 2990 return loc, ret
2991
2992 - def __str__( self ):
2993 try: 2994 return super(QuotedString,self).__str__() 2995 except Exception: 2996 pass 2997 2998 if self.strRepr is None: 2999 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 3000 3001 return self.strRepr
3002
3003 3004 -class CharsNotIn(Token):
3005 """ 3006 Token for matching words composed of characters I{not} in a given set (will 3007 include whitespace in matched characters if not listed in the provided exclusion set - see example). 3008 Defined with string containing all disallowed characters, and an optional 3009 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 3010 minimum value < 1 is not valid); the default values for C{max} and C{exact} 3011 are 0, meaning no maximum or exact length restriction. 3012 3013 Example:: 3014 # define a comma-separated-value as anything that is not a ',' 3015 csv_value = CharsNotIn(',') 3016 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 3017 prints:: 3018 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 3019 """
3020 - def __init__( self, notChars, min=1, max=0, exact=0 ):
3021 super(CharsNotIn,self).__init__() 3022 self.skipWhitespace = False 3023 self.notChars = notChars 3024 3025 if min < 1: 3026 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 3027 3028 self.minLen = min 3029 3030 if max > 0: 3031 self.maxLen = max 3032 else: 3033 self.maxLen = _MAX_INT 3034 3035 if exact > 0: 3036 self.maxLen = exact 3037 self.minLen = exact 3038 3039 self.name = _ustr(self) 3040 self.errmsg = "Expected " + self.name 3041 self.mayReturnEmpty = ( self.minLen == 0 ) 3042 self.mayIndexError = False
3043
3044 - def parseImpl( self, instring, loc, doActions=True ):
3045 if instring[loc] in self.notChars: 3046 raise ParseException(instring, loc, self.errmsg, self) 3047 3048 start = loc 3049 loc += 1 3050 notchars = self.notChars 3051 maxlen = min( start+self.maxLen, len(instring) ) 3052 while loc < maxlen and \ 3053 (instring[loc] not in notchars): 3054 loc += 1 3055 3056 if loc - start < self.minLen: 3057 raise ParseException(instring, loc, self.errmsg, self) 3058 3059 return loc, instring[start:loc]
3060
3061 - def __str__( self ):
3062 try: 3063 return super(CharsNotIn, self).__str__() 3064 except Exception: 3065 pass 3066 3067 if self.strRepr is None: 3068 if len(self.notChars) > 4: 3069 self.strRepr = "!W:(%s...)" % self.notChars[:4] 3070 else: 3071 self.strRepr = "!W:(%s)" % self.notChars 3072 3073 return self.strRepr
3074
3075 -class White(Token):
3076 """ 3077 Special matching class for matching whitespace. Normally, whitespace is ignored 3078 by pyparsing grammars. This class is included when some whitespace structures 3079 are significant. Define with a string containing the whitespace characters to be 3080 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 3081 as defined for the C{L{Word}} class. 3082 """ 3083 whiteStrs = { 3084 " " : "<SPC>", 3085 "\t": "<TAB>", 3086 "\n": "<LF>", 3087 "\r": "<CR>", 3088 "\f": "<FF>", 3089 }
3090 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3091 super(White,self).__init__() 3092 self.matchWhite = ws 3093 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 3094 #~ self.leaveWhitespace() 3095 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3096 self.mayReturnEmpty = True 3097 self.errmsg = "Expected " + self.name 3098 3099 self.minLen = min 3100 3101 if max > 0: 3102 self.maxLen = max 3103 else: 3104 self.maxLen = _MAX_INT 3105 3106 if exact > 0: 3107 self.maxLen = exact 3108 self.minLen = exact
3109
3110 - def parseImpl( self, instring, loc, doActions=True ):
3111 if not(instring[ loc ] in self.matchWhite): 3112 raise ParseException(instring, loc, self.errmsg, self) 3113 start = loc 3114 loc += 1 3115 maxloc = start + self.maxLen 3116 maxloc = min( maxloc, len(instring) ) 3117 while loc < maxloc and instring[loc] in self.matchWhite: 3118 loc += 1 3119 3120 if loc - start < self.minLen: 3121 raise ParseException(instring, loc, self.errmsg, self) 3122 3123 return loc, instring[start:loc]
3124
3125 3126 -class _PositionToken(Token):
3127 - def __init__( self ):
3128 super(_PositionToken,self).__init__() 3129 self.name=self.__class__.__name__ 3130 self.mayReturnEmpty = True 3131 self.mayIndexError = False
3132
3133 -class GoToColumn(_PositionToken):
3134 """ 3135 Token to advance to a specific column of input text; useful for tabular report scraping. 3136 """
3137 - def __init__( self, colno ):
3138 super(GoToColumn,self).__init__() 3139 self.col = colno
3140
3141 - def preParse( self, instring, loc ):
3142 if col(loc,instring) != self.col: 3143 instrlen = len(instring) 3144 if self.ignoreExprs: 3145 loc = self._skipIgnorables( instring, loc ) 3146 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 3147 loc += 1 3148 return loc
3149
3150 - def parseImpl( self, instring, loc, doActions=True ):
3151 thiscol = col( loc, instring ) 3152 if thiscol > self.col: 3153 raise ParseException( instring, loc, "Text not in expected column", self ) 3154 newloc = loc + self.col - thiscol 3155 ret = instring[ loc: newloc ] 3156 return newloc, ret
3157
3158 3159 -class LineStart(_PositionToken):
3160 """ 3161 Matches if current position is at the beginning of a line within the parse string 3162 3163 Example:: 3164 3165 test = '''\ 3166 AAA this line 3167 AAA and this line 3168 AAA but not this one 3169 B AAA and definitely not this one 3170 ''' 3171 3172 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 3173 print(t) 3174 3175 Prints:: 3176 ['AAA', ' this line'] 3177 ['AAA', ' and this line'] 3178 3179 """
3180 - def __init__( self ):
3181 super(LineStart,self).__init__() 3182 self.errmsg = "Expected start of line"
3183
3184 - def parseImpl( self, instring, loc, doActions=True ):
3185 if col(loc, instring) == 1: 3186 return loc, [] 3187 raise ParseException(instring, loc, self.errmsg, self)
3188
3189 -class LineEnd(_PositionToken):
3190 """ 3191 Matches if current position is at the end of a line within the parse string 3192 """
3193 - def __init__( self ):
3194 super(LineEnd,self).__init__() 3195 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3196 self.errmsg = "Expected end of line"
3197
3198 - def parseImpl( self, instring, loc, doActions=True ):
3199 if loc<len(instring): 3200 if instring[loc] == "\n": 3201 return loc+1, "\n" 3202 else: 3203 raise ParseException(instring, loc, self.errmsg, self) 3204 elif loc == len(instring): 3205 return loc+1, [] 3206 else: 3207 raise ParseException(instring, loc, self.errmsg, self)
3208
3209 -class StringStart(_PositionToken):
3210 """ 3211 Matches if current position is at the beginning of the parse string 3212 """
3213 - def __init__( self ):
3214 super(StringStart,self).__init__() 3215 self.errmsg = "Expected start of text"
3216
3217 - def parseImpl( self, instring, loc, doActions=True ):
3218 if loc != 0: 3219 # see if entire string up to here is just whitespace and ignoreables 3220 if loc != self.preParse( instring, 0 ): 3221 raise ParseException(instring, loc, self.errmsg, self) 3222 return loc, []
3223
3224 -class StringEnd(_PositionToken):
3225 """ 3226 Matches if current position is at the end of the parse string 3227 """
3228 - def __init__( self ):
3229 super(StringEnd,self).__init__() 3230 self.errmsg = "Expected end of text"
3231
3232 - def parseImpl( self, instring, loc, doActions=True ):
3233 if loc < len(instring): 3234 raise ParseException(instring, loc, self.errmsg, self) 3235 elif loc == len(instring): 3236 return loc+1, [] 3237 elif loc > len(instring): 3238 return loc, [] 3239 else: 3240 raise ParseException(instring, loc, self.errmsg, self)
3241
3242 -class WordStart(_PositionToken):
3243 """ 3244 Matches if the current position is at the beginning of a Word, and 3245 is not preceded by any character in a given set of C{wordChars} 3246 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3247 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3248 the string being parsed, or at the beginning of a line. 3249 """
3250 - def __init__(self, wordChars = printables):
3251 super(WordStart,self).__init__() 3252 self.wordChars = set(wordChars) 3253 self.errmsg = "Not at the start of a word"
3254
3255 - def parseImpl(self, instring, loc, doActions=True ):
3256 if loc != 0: 3257 if (instring[loc-1] in self.wordChars or 3258 instring[loc] not in self.wordChars): 3259 raise ParseException(instring, loc, self.errmsg, self) 3260 return loc, []
3261
3262 -class WordEnd(_PositionToken):
3263 """ 3264 Matches if the current position is at the end of a Word, and 3265 is not followed by any character in a given set of C{wordChars} 3266 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3267 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3268 the string being parsed, or at the end of a line. 3269 """
3270 - def __init__(self, wordChars = printables):
3271 super(WordEnd,self).__init__() 3272 self.wordChars = set(wordChars) 3273 self.skipWhitespace = False 3274 self.errmsg = "Not at the end of a word"
3275
3276 - def parseImpl(self, instring, loc, doActions=True ):
3277 instrlen = len(instring) 3278 if instrlen>0 and loc<instrlen: 3279 if (instring[loc] in self.wordChars or 3280 instring[loc-1] not in self.wordChars): 3281 raise ParseException(instring, loc, self.errmsg, self) 3282 return loc, []
3283
3284 3285 -class ParseExpression(ParserElement):
3286 """ 3287 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3288 """
3289 - def __init__( self, exprs, savelist = False ):
3290 super(ParseExpression,self).__init__(savelist) 3291 if isinstance( exprs, _generatorType ): 3292 exprs = list(exprs) 3293 3294 if isinstance( exprs, basestring ): 3295 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3296 elif isinstance( exprs, Iterable ): 3297 exprs = list(exprs) 3298 # if sequence of strings provided, wrap with Literal 3299 if all(isinstance(expr, basestring) for expr in exprs): 3300 exprs = map(ParserElement._literalStringClass, exprs) 3301 self.exprs = list(exprs) 3302 else: 3303 try: 3304 self.exprs = list( exprs ) 3305 except TypeError: 3306 self.exprs = [ exprs ] 3307 self.callPreparse = False
3308
3309 - def __getitem__( self, i ):
3310 return self.exprs[i]
3311
3312 - def append( self, other ):
3313 self.exprs.append( other ) 3314 self.strRepr = None 3315 return self
3316
3317 - def leaveWhitespace( self ):
3318 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3319 all contained expressions.""" 3320 self.skipWhitespace = False 3321 self.exprs = [ e.copy() for e in self.exprs ] 3322 for e in self.exprs: 3323 e.leaveWhitespace() 3324 return self
3325
3326 - def ignore( self, other ):
3327 if isinstance( other, Suppress ): 3328 if other not in self.ignoreExprs: 3329 super( ParseExpression, self).ignore( other ) 3330 for e in self.exprs: 3331 e.ignore( self.ignoreExprs[-1] ) 3332 else: 3333 super( ParseExpression, self).ignore( other ) 3334 for e in self.exprs: 3335 e.ignore( self.ignoreExprs[-1] ) 3336 return self
3337
3338 - def __str__( self ):
3339 try: 3340 return super(ParseExpression,self).__str__() 3341 except Exception: 3342 pass 3343 3344 if self.strRepr is None: 3345 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3346 return self.strRepr
3347
3348 - def streamline( self ):
3349 super(ParseExpression,self).streamline() 3350 3351 for e in self.exprs: 3352 e.streamline() 3353 3354 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3355 # but only if there are no parse actions or resultsNames on the nested And's 3356 # (likewise for Or's and MatchFirst's) 3357 if ( len(self.exprs) == 2 ): 3358 other = self.exprs[0] 3359 if ( isinstance( other, self.__class__ ) and 3360 not(other.parseAction) and 3361 other.resultsName is None and 3362 not other.debug ): 3363 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3364 self.strRepr = None 3365 self.mayReturnEmpty |= other.mayReturnEmpty 3366 self.mayIndexError |= other.mayIndexError 3367 3368 other = self.exprs[-1] 3369 if ( isinstance( other, self.__class__ ) and 3370 not(other.parseAction) and 3371 other.resultsName is None and 3372 not other.debug ): 3373 self.exprs = self.exprs[:-1] + other.exprs[:] 3374 self.strRepr = None 3375 self.mayReturnEmpty |= other.mayReturnEmpty 3376 self.mayIndexError |= other.mayIndexError 3377 3378 self.errmsg = "Expected " + _ustr(self) 3379 3380 return self
3381
3382 - def setResultsName( self, name, listAllMatches=False ):
3383 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3384 return ret
3385
3386 - def validate( self, validateTrace=[] ):
3387 tmp = validateTrace[:]+[self] 3388 for e in self.exprs: 3389 e.validate(tmp) 3390 self.checkRecursion( [] )
3391
3392 - def copy(self):
3393 ret = super(ParseExpression,self).copy() 3394 ret.exprs = [e.copy() for e in self.exprs] 3395 return ret
3396
3397 -class And(ParseExpression):
3398 """ 3399 Requires all given C{ParseExpression}s to be found in the given order. 3400 Expressions may be separated by whitespace. 3401 May be constructed using the C{'+'} operator. 3402 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3403 3404 Example:: 3405 integer = Word(nums) 3406 name_expr = OneOrMore(Word(alphas)) 3407 3408 expr = And([integer("id"),name_expr("name"),integer("age")]) 3409 # more easily written as: 3410 expr = integer("id") + name_expr("name") + integer("age") 3411 """ 3412
3413 - class _ErrorStop(Empty):
3414 - def __init__(self, *args, **kwargs):
3415 super(And._ErrorStop,self).__init__(*args, **kwargs) 3416 self.name = '-' 3417 self.leaveWhitespace()
3418
3419 - def __init__( self, exprs, savelist = True ):
3420 super(And,self).__init__(exprs, savelist) 3421 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3422 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3423 self.skipWhitespace = self.exprs[0].skipWhitespace 3424 self.callPreparse = True
3425
3426 - def parseImpl( self, instring, loc, doActions=True ):
3427 # pass False as last arg to _parse for first element, since we already 3428 # pre-parsed the string as part of our And pre-parsing 3429 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3430 errorStop = False 3431 for e in self.exprs[1:]: 3432 if isinstance(e, And._ErrorStop): 3433 errorStop = True 3434 continue 3435 if errorStop: 3436 try: 3437 loc, exprtokens = e._parse( instring, loc, doActions ) 3438 except ParseSyntaxException: 3439 raise 3440 except ParseBaseException as pe: 3441 pe.__traceback__ = None 3442 raise ParseSyntaxException._from_exception(pe) 3443 except IndexError: 3444 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3445 else: 3446 loc, exprtokens = e._parse( instring, loc, doActions ) 3447 if exprtokens or exprtokens.haskeys(): 3448 resultlist += exprtokens 3449 return loc, resultlist
3450
3451 - def __iadd__(self, other ):
3452 if isinstance( other, basestring ): 3453 other = ParserElement._literalStringClass( other ) 3454 return self.append( other ) #And( [ self, other ] )
3455
3456 - def checkRecursion( self, parseElementList ):
3457 subRecCheckList = parseElementList[:] + [ self ] 3458 for e in self.exprs: 3459 e.checkRecursion( subRecCheckList ) 3460 if not e.mayReturnEmpty: 3461 break
3462
3463 - def __str__( self ):
3464 if hasattr(self,"name"): 3465 return self.name 3466 3467 if self.strRepr is None: 3468 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3469 3470 return self.strRepr
3471
3472 3473 -class Or(ParseExpression):
3474 """ 3475 Requires that at least one C{ParseExpression} is found. 3476 If two expressions match, the expression that matches the longest string will be used. 3477 May be constructed using the C{'^'} operator. 3478 3479 Example:: 3480 # construct Or using '^' operator 3481 3482 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3483 print(number.searchString("123 3.1416 789")) 3484 prints:: 3485 [['123'], ['3.1416'], ['789']] 3486 """
3487 - def __init__( self, exprs, savelist = False ):
3488 super(Or,self).__init__(exprs, savelist) 3489 if self.exprs: 3490 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3491 else: 3492 self.mayReturnEmpty = True
3493
3494 - def parseImpl( self, instring, loc, doActions=True ):
3495 maxExcLoc = -1 3496 maxException = None 3497 matches = [] 3498 for e in self.exprs: 3499 try: 3500 loc2 = e.tryParse( instring, loc ) 3501 except ParseException as err: 3502 err.__traceback__ = None 3503 if err.loc > maxExcLoc: 3504 maxException = err 3505 maxExcLoc = err.loc 3506 except IndexError: 3507 if len(instring) > maxExcLoc: 3508 maxException = ParseException(instring,len(instring),e.errmsg,self) 3509 maxExcLoc = len(instring) 3510 else: 3511 # save match among all matches, to retry longest to shortest 3512 matches.append((loc2, e)) 3513 3514 if matches: 3515 matches.sort(key=lambda x: -x[0]) 3516 for _,e in matches: 3517 try: 3518 return e._parse( instring, loc, doActions ) 3519 except ParseException as err: 3520 err.__traceback__ = None 3521 if err.loc > maxExcLoc: 3522 maxException = err 3523 maxExcLoc = err.loc 3524 3525 if maxException is not None: 3526 maxException.msg = self.errmsg 3527 raise maxException 3528 else: 3529 raise ParseException(instring, loc, "no defined alternatives to match", self)
3530 3531
3532 - def __ixor__(self, other ):
3533 if isinstance( other, basestring ): 3534 other = ParserElement._literalStringClass( other ) 3535 return self.append( other ) #Or( [ self, other ] )
3536
3537 - def __str__( self ):
3538 if hasattr(self,"name"): 3539 return self.name 3540 3541 if self.strRepr is None: 3542 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3543 3544 return self.strRepr
3545
3546 - def checkRecursion( self, parseElementList ):
3547 subRecCheckList = parseElementList[:] + [ self ] 3548 for e in self.exprs: 3549 e.checkRecursion( subRecCheckList )
3550
3551 3552 -class MatchFirst(ParseExpression):
3553 """ 3554 Requires that at least one C{ParseExpression} is found. 3555 If two expressions match, the first one listed is the one that will match. 3556 May be constructed using the C{'|'} operator. 3557 3558 Example:: 3559 # construct MatchFirst using '|' operator 3560 3561 # watch the order of expressions to match 3562 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3563 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3564 3565 # put more selective expression first 3566 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3567 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3568 """
3569 - def __init__( self, exprs, savelist = False ):
3570 super(MatchFirst,self).__init__(exprs, savelist) 3571 if self.exprs: 3572 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3573 else: 3574 self.mayReturnEmpty = True
3575
3576 - def parseImpl( self, instring, loc, doActions=True ):
3577 maxExcLoc = -1 3578 maxException = None 3579 for e in self.exprs: 3580 try: 3581 ret = e._parse( instring, loc, doActions ) 3582 return ret 3583 except ParseException as err: 3584 if err.loc > maxExcLoc: 3585 maxException = err 3586 maxExcLoc = err.loc 3587 except IndexError: 3588 if len(instring) > maxExcLoc: 3589 maxException = ParseException(instring,len(instring),e.errmsg,self) 3590 maxExcLoc = len(instring) 3591 3592 # only got here if no expression matched, raise exception for match that made it the furthest 3593 else: 3594 if maxException is not None: 3595 maxException.msg = self.errmsg 3596 raise maxException 3597 else: 3598 raise ParseException(instring, loc, "no defined alternatives to match", self)
3599
3600 - def __ior__(self, other ):
3601 if isinstance( other, basestring ): 3602 other = ParserElement._literalStringClass( other ) 3603 return self.append( other ) #MatchFirst( [ self, other ] )
3604
3605 - def __str__( self ):
3606 if hasattr(self,"name"): 3607 return self.name 3608 3609 if self.strRepr is None: 3610 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3611 3612 return self.strRepr
3613
3614 - def checkRecursion( self, parseElementList ):
3615 subRecCheckList = parseElementList[:] + [ self ] 3616 for e in self.exprs: 3617 e.checkRecursion( subRecCheckList )
3618
3619 3620 -class Each(ParseExpression):
3621 """ 3622 Requires all given C{ParseExpression}s to be found, but in any order. 3623 Expressions may be separated by whitespace. 3624 May be constructed using the C{'&'} operator. 3625 3626 Example:: 3627 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3628 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3629 integer = Word(nums) 3630 shape_attr = "shape:" + shape_type("shape") 3631 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3632 color_attr = "color:" + color("color") 3633 size_attr = "size:" + integer("size") 3634 3635 # use Each (using operator '&') to accept attributes in any order 3636 # (shape and posn are required, color and size are optional) 3637 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3638 3639 shape_spec.runTests(''' 3640 shape: SQUARE color: BLACK posn: 100, 120 3641 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3642 color:GREEN size:20 shape:TRIANGLE posn:20,40 3643 ''' 3644 ) 3645 prints:: 3646 shape: SQUARE color: BLACK posn: 100, 120 3647 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3648 - color: BLACK 3649 - posn: ['100', ',', '120'] 3650 - x: 100 3651 - y: 120 3652 - shape: SQUARE 3653 3654 3655 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3656 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3657 - color: BLUE 3658 - posn: ['50', ',', '80'] 3659 - x: 50 3660 - y: 80 3661 - shape: CIRCLE 3662 - size: 50 3663 3664 3665 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3666 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3667 - color: GREEN 3668 - posn: ['20', ',', '40'] 3669 - x: 20 3670 - y: 40 3671 - shape: TRIANGLE 3672 - size: 20 3673 """
3674 - def __init__( self, exprs, savelist = True ):
3675 super(Each,self).__init__(exprs, savelist) 3676 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3677 self.skipWhitespace = True 3678 self.initExprGroups = True
3679
3680 - def parseImpl( self, instring, loc, doActions=True ):
3681 if self.initExprGroups: 3682 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3683 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3684 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3685 self.optionals = opt1 + opt2 3686 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3687 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3688 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3689 self.required += self.multirequired 3690 self.initExprGroups = False 3691 tmpLoc = loc 3692 tmpReqd = self.required[:] 3693 tmpOpt = self.optionals[:] 3694 matchOrder = [] 3695 3696 keepMatching = True 3697 while keepMatching: 3698 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3699 failed = [] 3700 for e in tmpExprs: 3701 try: 3702 tmpLoc = e.tryParse( instring, tmpLoc ) 3703 except ParseException: 3704 failed.append(e) 3705 else: 3706 matchOrder.append(self.opt1map.get(id(e),e)) 3707 if e in tmpReqd: 3708 tmpReqd.remove(e) 3709 elif e in tmpOpt: 3710 tmpOpt.remove(e) 3711 if len(failed) == len(tmpExprs): 3712 keepMatching = False 3713 3714 if tmpReqd: 3715 missing = ", ".join(_ustr(e) for e in tmpReqd) 3716 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3717 3718 # add any unmatched Optionals, in case they have default values defined 3719 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3720 3721 resultlist = [] 3722 for e in matchOrder: 3723 loc,results = e._parse(instring,loc,doActions) 3724 resultlist.append(results) 3725 3726 finalResults = sum(resultlist, ParseResults([])) 3727 return loc, finalResults
3728
3729 - def __str__( self ):
3730 if hasattr(self,"name"): 3731 return self.name 3732 3733 if self.strRepr is None: 3734 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3735 3736 return self.strRepr
3737
3738 - def checkRecursion( self, parseElementList ):
3739 subRecCheckList = parseElementList[:] + [ self ] 3740 for e in self.exprs: 3741 e.checkRecursion( subRecCheckList )
3742
3743 3744 -class ParseElementEnhance(ParserElement):
3745 """ 3746 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3747 """
3748 - def __init__( self, expr, savelist=False ):
3749 super(ParseElementEnhance,self).__init__(savelist) 3750 if isinstance( expr, basestring ): 3751 if issubclass(ParserElement._literalStringClass, Token): 3752 expr = ParserElement._literalStringClass(expr) 3753 else: 3754 expr = ParserElement._literalStringClass(Literal(expr)) 3755 self.expr = expr 3756 self.strRepr = None 3757 if expr is not None: 3758 self.mayIndexError = expr.mayIndexError 3759 self.mayReturnEmpty = expr.mayReturnEmpty 3760 self.setWhitespaceChars( expr.whiteChars ) 3761 self.skipWhitespace = expr.skipWhitespace 3762 self.saveAsList = expr.saveAsList 3763 self.callPreparse = expr.callPreparse 3764 self.ignoreExprs.extend(expr.ignoreExprs)
3765
3766 - def parseImpl( self, instring, loc, doActions=True ):
3767 if self.expr is not None: 3768 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3769 else: 3770 raise ParseException("",loc,self.errmsg,self)
3771
3772 - def leaveWhitespace( self ):
3773 self.skipWhitespace = False 3774 self.expr = self.expr.copy() 3775 if self.expr is not None: 3776 self.expr.leaveWhitespace() 3777 return self
3778
3779 - def ignore( self, other ):
3780 if isinstance( other, Suppress ): 3781 if other not in self.ignoreExprs: 3782 super( ParseElementEnhance, self).ignore( other ) 3783 if self.expr is not None: 3784 self.expr.ignore( self.ignoreExprs[-1] ) 3785 else: 3786 super( ParseElementEnhance, self).ignore( other ) 3787 if self.expr is not None: 3788 self.expr.ignore( self.ignoreExprs[-1] ) 3789 return self
3790
3791 - def streamline( self ):
3792 super(ParseElementEnhance,self).streamline() 3793 if self.expr is not None: 3794 self.expr.streamline() 3795 return self
3796
3797 - def checkRecursion( self, parseElementList ):
3798 if self in parseElementList: 3799 raise RecursiveGrammarException( parseElementList+[self] ) 3800 subRecCheckList = parseElementList[:] + [ self ] 3801 if self.expr is not None: 3802 self.expr.checkRecursion( subRecCheckList )
3803
3804 - def validate( self, validateTrace=[] ):
3805 tmp = validateTrace[:]+[self] 3806 if self.expr is not None: 3807 self.expr.validate(tmp) 3808 self.checkRecursion( [] )
3809
3810 - def __str__( self ):
3811 try: 3812 return super(ParseElementEnhance,self).__str__() 3813 except Exception: 3814 pass 3815 3816 if self.strRepr is None and self.expr is not None: 3817 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3818 return self.strRepr
3819
3820 3821 -class FollowedBy(ParseElementEnhance):
3822 """ 3823 Lookahead matching of the given parse expression. C{FollowedBy} 3824 does I{not} advance the parsing position within the input string, it only 3825 verifies that the specified parse expression matches at the current 3826 position. C{FollowedBy} always returns a null token list. 3827 3828 Example:: 3829 # use FollowedBy to match a label only if it is followed by a ':' 3830 data_word = Word(alphas) 3831 label = data_word + FollowedBy(':') 3832 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3833 3834 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3835 prints:: 3836 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3837 """
3838 - def __init__( self, expr ):
3839 super(FollowedBy,self).__init__(expr) 3840 self.mayReturnEmpty = True
3841
3842 - def parseImpl( self, instring, loc, doActions=True ):
3843 self.expr.tryParse( instring, loc ) 3844 return loc, []
3845
3846 3847 -class NotAny(ParseElementEnhance):
3848 """ 3849 Lookahead to disallow matching with the given parse expression. C{NotAny} 3850 does I{not} advance the parsing position within the input string, it only 3851 verifies that the specified parse expression does I{not} match at the current 3852 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 3853 always returns a null token list. May be constructed using the '~' operator. 3854 3855 Example:: 3856 3857 """
3858 - def __init__( self, expr ):
3859 super(NotAny,self).__init__(expr) 3860 #~ self.leaveWhitespace() 3861 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3862 self.mayReturnEmpty = True 3863 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3864
3865 - def parseImpl( self, instring, loc, doActions=True ):
3866 if self.expr.canParseNext(instring, loc): 3867 raise ParseException(instring, loc, self.errmsg, self) 3868 return loc, []
3869
3870 - def __str__( self ):
3871 if hasattr(self,"name"): 3872 return self.name 3873 3874 if self.strRepr is None: 3875 self.strRepr = "~{" + _ustr(self.expr) + "}" 3876 3877 return self.strRepr
3878
3879 -class _MultipleMatch(ParseElementEnhance):
3880 - def __init__( self, expr, stopOn=None):
3881 super(_MultipleMatch, self).__init__(expr) 3882 self.saveAsList = True 3883 ender = stopOn 3884 if isinstance(ender, basestring): 3885 ender = ParserElement._literalStringClass(ender) 3886 self.not_ender = ~ender if ender is not None else None
3887
3888 - def parseImpl( self, instring, loc, doActions=True ):
3889 self_expr_parse = self.expr._parse 3890 self_skip_ignorables = self._skipIgnorables 3891 check_ender = self.not_ender is not None 3892 if check_ender: 3893 try_not_ender = self.not_ender.tryParse 3894 3895 # must be at least one (but first see if we are the stopOn sentinel; 3896 # if so, fail) 3897 if check_ender: 3898 try_not_ender(instring, loc) 3899 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3900 try: 3901 hasIgnoreExprs = (not not self.ignoreExprs) 3902 while 1: 3903 if check_ender: 3904 try_not_ender(instring, loc) 3905 if hasIgnoreExprs: 3906 preloc = self_skip_ignorables( instring, loc ) 3907 else: 3908 preloc = loc 3909 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3910 if tmptokens or tmptokens.haskeys(): 3911 tokens += tmptokens 3912 except (ParseException,IndexError): 3913 pass 3914 3915 return loc, tokens
3916
3917 -class OneOrMore(_MultipleMatch):
3918 """ 3919 Repetition of one or more of the given expression. 3920 3921 Parameters: 3922 - expr - expression that must match one or more times 3923 - stopOn - (default=C{None}) - expression for a terminating sentinel 3924 (only required if the sentinel would ordinarily match the repetition 3925 expression) 3926 3927 Example:: 3928 data_word = Word(alphas) 3929 label = data_word + FollowedBy(':') 3930 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3931 3932 text = "shape: SQUARE posn: upper left color: BLACK" 3933 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3934 3935 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3936 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3937 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3938 3939 # could also be written as 3940 (attr_expr * (1,)).parseString(text).pprint() 3941 """ 3942
3943 - def __str__( self ):
3944 if hasattr(self,"name"): 3945 return self.name 3946 3947 if self.strRepr is None: 3948 self.strRepr = "{" + _ustr(self.expr) + "}..." 3949 3950 return self.strRepr
3951
3952 -class ZeroOrMore(_MultipleMatch):
3953 """ 3954 Optional repetition of zero or more of the given expression. 3955 3956 Parameters: 3957 - expr - expression that must match zero or more times 3958 - stopOn - (default=C{None}) - expression for a terminating sentinel 3959 (only required if the sentinel would ordinarily match the repetition 3960 expression) 3961 3962 Example: similar to L{OneOrMore} 3963 """
3964 - def __init__( self, expr, stopOn=None):
3965 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3966 self.mayReturnEmpty = True
3967
3968 - def parseImpl( self, instring, loc, doActions=True ):
3969 try: 3970 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3971 except (ParseException,IndexError): 3972 return loc, []
3973
3974 - def __str__( self ):
3975 if hasattr(self,"name"): 3976 return self.name 3977 3978 if self.strRepr is None: 3979 self.strRepr = "[" + _ustr(self.expr) + "]..." 3980 3981 return self.strRepr
3982
3983 -class _NullToken(object):
3984 - def __bool__(self):
3985 return False
3986 __nonzero__ = __bool__
3987 - def __str__(self):
3988 return ""
3989 3990 _optionalNotMatched = _NullToken()
3991 -class Optional(ParseElementEnhance):
3992 """ 3993 Optional matching of the given expression. 3994 3995 Parameters: 3996 - expr - expression that must match zero or more times 3997 - default (optional) - value to be returned if the optional expression is not found. 3998 3999 Example:: 4000 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 4001 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 4002 zip.runTests(''' 4003 # traditional ZIP code 4004 12345 4005 4006 # ZIP+4 form 4007 12101-0001 4008 4009 # invalid ZIP 4010 98765- 4011 ''') 4012 prints:: 4013 # traditional ZIP code 4014 12345 4015 ['12345'] 4016 4017 # ZIP+4 form 4018 12101-0001 4019 ['12101-0001'] 4020 4021 # invalid ZIP 4022 98765- 4023 ^ 4024 FAIL: Expected end of text (at char 5), (line:1, col:6) 4025 """
4026 - def __init__( self, expr, default=_optionalNotMatched ):
4027 super(Optional,self).__init__( expr, savelist=False ) 4028 self.saveAsList = self.expr.saveAsList 4029 self.defaultValue = default 4030 self.mayReturnEmpty = True
4031
4032 - def parseImpl( self, instring, loc, doActions=True ):
4033 try: 4034 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 4035 except (ParseException,IndexError): 4036 if self.defaultValue is not _optionalNotMatched: 4037 if self.expr.resultsName: 4038 tokens = ParseResults([ self.defaultValue ]) 4039 tokens[self.expr.resultsName] = self.defaultValue 4040 else: 4041 tokens = [ self.defaultValue ] 4042 else: 4043 tokens = [] 4044 return loc, tokens
4045
4046 - def __str__( self ):
4047 if hasattr(self,"name"): 4048 return self.name 4049 4050 if self.strRepr is None: 4051 self.strRepr = "[" + _ustr(self.expr) + "]" 4052 4053 return self.strRepr
4054
4055 -class SkipTo(ParseElementEnhance):
4056 """ 4057 Token for skipping over all undefined text until the matched expression is found. 4058 4059 Parameters: 4060 - expr - target expression marking the end of the data to be skipped 4061 - include - (default=C{False}) if True, the target expression is also parsed 4062 (the skipped text and target expression are returned as a 2-element list). 4063 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 4064 comments) that might contain false matches to the target expression 4065 - failOn - (default=C{None}) define expressions that are not allowed to be 4066 included in the skipped test; if found before the target expression is found, 4067 the SkipTo is not a match 4068 4069 Example:: 4070 report = ''' 4071 Outstanding Issues Report - 1 Jan 2000 4072 4073 # | Severity | Description | Days Open 4074 -----+----------+-------------------------------------------+----------- 4075 101 | Critical | Intermittent system crash | 6 4076 94 | Cosmetic | Spelling error on Login ('log|n') | 14 4077 79 | Minor | System slow when running too many reports | 47 4078 ''' 4079 integer = Word(nums) 4080 SEP = Suppress('|') 4081 # use SkipTo to simply match everything up until the next SEP 4082 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 4083 # - parse action will call token.strip() for each matched token, i.e., the description body 4084 string_data = SkipTo(SEP, ignore=quotedString) 4085 string_data.setParseAction(tokenMap(str.strip)) 4086 ticket_expr = (integer("issue_num") + SEP 4087 + string_data("sev") + SEP 4088 + string_data("desc") + SEP 4089 + integer("days_open")) 4090 4091 for tkt in ticket_expr.searchString(report): 4092 print tkt.dump() 4093 prints:: 4094 ['101', 'Critical', 'Intermittent system crash', '6'] 4095 - days_open: 6 4096 - desc: Intermittent system crash 4097 - issue_num: 101 4098 - sev: Critical 4099 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4100 - days_open: 14 4101 - desc: Spelling error on Login ('log|n') 4102 - issue_num: 94 4103 - sev: Cosmetic 4104 ['79', 'Minor', 'System slow when running too many reports', '47'] 4105 - days_open: 47 4106 - desc: System slow when running too many reports 4107 - issue_num: 79 4108 - sev: Minor 4109 """
4110 - def __init__( self, other, include=False, ignore=None, failOn=None ):
4111 super( SkipTo, self ).__init__( other ) 4112 self.ignoreExpr = ignore 4113 self.mayReturnEmpty = True 4114 self.mayIndexError = False 4115 self.includeMatch = include 4116 self.saveAsList = False 4117 if isinstance(failOn, basestring): 4118 self.failOn = ParserElement._literalStringClass(failOn) 4119 else: 4120 self.failOn = failOn 4121 self.errmsg = "No match found for "+_ustr(self.expr)
4122
4123 - def parseImpl( self, instring, loc, doActions=True ):
4124 startloc = loc 4125 instrlen = len(instring) 4126 expr = self.expr 4127 expr_parse = self.expr._parse 4128 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 4129 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 4130 4131 tmploc = loc 4132 while tmploc <= instrlen: 4133 if self_failOn_canParseNext is not None: 4134 # break if failOn expression matches 4135 if self_failOn_canParseNext(instring, tmploc): 4136 break 4137 4138 if self_ignoreExpr_tryParse is not None: 4139 # advance past ignore expressions 4140 while 1: 4141 try: 4142 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 4143 except ParseBaseException: 4144 break 4145 4146 try: 4147 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 4148 except (ParseException, IndexError): 4149 # no match, advance loc in string 4150 tmploc += 1 4151 else: 4152 # matched skipto expr, done 4153 break 4154 4155 else: 4156 # ran off the end of the input string without matching skipto expr, fail 4157 raise ParseException(instring, loc, self.errmsg, self) 4158 4159 # build up return values 4160 loc = tmploc 4161 skiptext = instring[startloc:loc] 4162 skipresult = ParseResults(skiptext) 4163 4164 if self.includeMatch: 4165 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 4166 skipresult += mat 4167 4168 return loc, skipresult
4169
4170 -class Forward(ParseElementEnhance):
4171 """ 4172 Forward declaration of an expression to be defined later - 4173 used for recursive grammars, such as algebraic infix notation. 4174 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 4175 4176 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 4177 Specifically, '|' has a lower precedence than '<<', so that:: 4178 fwdExpr << a | b | c 4179 will actually be evaluated as:: 4180 (fwdExpr << a) | b | c 4181 thereby leaving b and c out as parseable alternatives. It is recommended that you 4182 explicitly group the values inserted into the C{Forward}:: 4183 fwdExpr << (a | b | c) 4184 Converting to use the '<<=' operator instead will avoid this problem. 4185 4186 See L{ParseResults.pprint} for an example of a recursive parser created using 4187 C{Forward}. 4188 """
4189 - def __init__( self, other=None ):
4190 super(Forward,self).__init__( other, savelist=False )
4191
4192 - def __lshift__( self, other ):
4193 if isinstance( other, basestring ): 4194 other = ParserElement._literalStringClass(other) 4195 self.expr = other 4196 self.strRepr = None 4197 self.mayIndexError = self.expr.mayIndexError 4198 self.mayReturnEmpty = self.expr.mayReturnEmpty 4199 self.setWhitespaceChars( self.expr.whiteChars ) 4200 self.skipWhitespace = self.expr.skipWhitespace 4201 self.saveAsList = self.expr.saveAsList 4202 self.ignoreExprs.extend(self.expr.ignoreExprs) 4203 return self
4204
4205 - def __ilshift__(self, other):
4206 return self << other
4207
4208 - def leaveWhitespace( self ):
4209 self.skipWhitespace = False 4210 return self
4211
4212 - def streamline( self ):
4213 if not self.streamlined: 4214 self.streamlined = True 4215 if self.expr is not None: 4216 self.expr.streamline() 4217 return self
4218
4219 - def validate( self, validateTrace=[] ):
4220 if self not in validateTrace: 4221 tmp = validateTrace[:]+[self] 4222 if self.expr is not None: 4223 self.expr.validate(tmp) 4224 self.checkRecursion([])
4225
4226 - def __str__( self ):
4227 if hasattr(self,"name"): 4228 return self.name 4229 return self.__class__.__name__ + ": ..." 4230 4231 # stubbed out for now - creates awful memory and perf issues 4232 self._revertClass = self.__class__ 4233 self.__class__ = _ForwardNoRecurse 4234 try: 4235 if self.expr is not None: 4236 retString = _ustr(self.expr) 4237 else: 4238 retString = "None" 4239 finally: 4240 self.__class__ = self._revertClass 4241 return self.__class__.__name__ + ": " + retString
4242
4243 - def copy(self):
4244 if self.expr is not None: 4245 return super(Forward,self).copy() 4246 else: 4247 ret = Forward() 4248 ret <<= self 4249 return ret
4250
4251 -class _ForwardNoRecurse(Forward):
4252 - def __str__( self ):
4253 return "..."
4254
4255 -class TokenConverter(ParseElementEnhance):
4256 """ 4257 Abstract subclass of C{ParseExpression}, for converting parsed results. 4258 """
4259 - def __init__( self, expr, savelist=False ):
4260 super(TokenConverter,self).__init__( expr )#, savelist ) 4261 self.saveAsList = False
4262
4263 -class Combine(TokenConverter):
4264 """ 4265 Converter to concatenate all matching tokens to a single string. 4266 By default, the matching patterns must also be contiguous in the input string; 4267 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4268 4269 Example:: 4270 real = Word(nums) + '.' + Word(nums) 4271 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4272 # will also erroneously match the following 4273 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4274 4275 real = Combine(Word(nums) + '.' + Word(nums)) 4276 print(real.parseString('3.1416')) # -> ['3.1416'] 4277 # no match when there are internal spaces 4278 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4279 """
4280 - def __init__( self, expr, joinString="", adjacent=True ):
4281 super(Combine,self).__init__( expr ) 4282 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4283 if adjacent: 4284 self.leaveWhitespace() 4285 self.adjacent = adjacent 4286 self.skipWhitespace = True 4287 self.joinString = joinString 4288 self.callPreparse = True
4289
4290 - def ignore( self, other ):
4291 if self.adjacent: 4292 ParserElement.ignore(self, other) 4293 else: 4294 super( Combine, self).ignore( other ) 4295 return self
4296
4297 - def postParse( self, instring, loc, tokenlist ):
4298 retToks = tokenlist.copy() 4299 del retToks[:] 4300 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4301 4302 if self.resultsName and retToks.haskeys(): 4303 return [ retToks ] 4304 else: 4305 return retToks
4306
4307 -class Group(TokenConverter):
4308 """ 4309 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4310 4311 Example:: 4312 ident = Word(alphas) 4313 num = Word(nums) 4314 term = ident | num 4315 func = ident + Optional(delimitedList(term)) 4316 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4317 4318 func = ident + Group(Optional(delimitedList(term))) 4319 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4320 """
4321 - def __init__( self, expr ):
4322 super(Group,self).__init__( expr ) 4323 self.saveAsList = True
4324
4325 - def postParse( self, instring, loc, tokenlist ):
4326 return [ tokenlist ]
4327
4328 -class Dict(TokenConverter):
4329 """ 4330 Converter to return a repetitive expression as a list, but also as a dictionary. 4331 Each element can also be referenced using the first token in the expression as its key. 4332 Useful for tabular report scraping when the first column can be used as a item key. 4333 4334 Example:: 4335 data_word = Word(alphas) 4336 label = data_word + FollowedBy(':') 4337 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4338 4339 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4340 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4341 4342 # print attributes as plain groups 4343 print(OneOrMore(attr_expr).parseString(text).dump()) 4344 4345 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4346 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4347 print(result.dump()) 4348 4349 # access named fields as dict entries, or output as dict 4350 print(result['shape']) 4351 print(result.asDict()) 4352 prints:: 4353 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4354 4355 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4356 - color: light blue 4357 - posn: upper left 4358 - shape: SQUARE 4359 - texture: burlap 4360 SQUARE 4361 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4362 See more examples at L{ParseResults} of accessing fields by results name. 4363 """
4364 - def __init__( self, expr ):
4365 super(Dict,self).__init__( expr ) 4366 self.saveAsList = True
4367
4368 - def postParse( self, instring, loc, tokenlist ):
4369 for i,tok in enumerate(tokenlist): 4370 if len(tok) == 0: 4371 continue 4372 ikey = tok[0] 4373 if isinstance(ikey,int): 4374 ikey = _ustr(tok[0]).strip() 4375 if len(tok)==1: 4376 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4377 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4378 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4379 else: 4380 dictvalue = tok.copy() #ParseResults(i) 4381 del dictvalue[0] 4382 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4383 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4384 else: 4385 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4386 4387 if self.resultsName: 4388 return [ tokenlist ] 4389 else: 4390 return tokenlist
4391
4392 4393 -class Suppress(TokenConverter):
4394 """ 4395 Converter for ignoring the results of a parsed expression. 4396 4397 Example:: 4398 source = "a, b, c,d" 4399 wd = Word(alphas) 4400 wd_list1 = wd + ZeroOrMore(',' + wd) 4401 print(wd_list1.parseString(source)) 4402 4403 # often, delimiters that are useful during parsing are just in the 4404 # way afterward - use Suppress to keep them out of the parsed output 4405 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4406 print(wd_list2.parseString(source)) 4407 prints:: 4408 ['a', ',', 'b', ',', 'c', ',', 'd'] 4409 ['a', 'b', 'c', 'd'] 4410 (See also L{delimitedList}.) 4411 """
4412 - def postParse( self, instring, loc, tokenlist ):
4413 return []
4414
4415 - def suppress( self ):
4416 return self
4417
4418 4419 -class OnlyOnce(object):
4420 """ 4421 Wrapper for parse actions, to ensure they are only called once. 4422 """
4423 - def __init__(self, methodCall):
4424 self.callable = _trim_arity(methodCall) 4425 self.called = False
4426 - def __call__(self,s,l,t):
4427 if not self.called: 4428 results = self.callable(s,l,t) 4429 self.called = True 4430 return results 4431 raise ParseException(s,l,"")
4432 - def reset(self):
4433 self.called = False
4434
4435 -def traceParseAction(f):
4436 """ 4437 Decorator for debugging parse actions. 4438 4439 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 4440 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 4441 4442 Example:: 4443 wd = Word(alphas) 4444 4445 @traceParseAction 4446 def remove_duplicate_chars(tokens): 4447 return ''.join(sorted(set(''.join(tokens)))) 4448 4449 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4450 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4451 prints:: 4452 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4453 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4454 ['dfjkls'] 4455 """ 4456 f = _trim_arity(f) 4457 def z(*paArgs): 4458 thisFunc = f.__name__ 4459 s,l,t = paArgs[-3:] 4460 if len(paArgs)>3: 4461 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4462 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4463 try: 4464 ret = f(*paArgs) 4465 except Exception as exc: 4466 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4467 raise 4468 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4469 return ret
4470 try: 4471 z.__name__ = f.__name__ 4472 except AttributeError: 4473 pass 4474 return z 4475
4476 # 4477 # global helpers 4478 # 4479 -def delimitedList( expr, delim=",", combine=False ):
4480 """ 4481 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4482 By default, the list elements and delimiters can have intervening whitespace, and 4483 comments, but this can be overridden by passing C{combine=True} in the constructor. 4484 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4485 string, with the delimiters included; otherwise, the matching tokens are returned 4486 as a list of tokens, with the delimiters suppressed. 4487 4488 Example:: 4489 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4490 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4491 """ 4492 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4493 if combine: 4494 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4495 else: 4496 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4497
4498 -def countedArray( expr, intExpr=None ):
4499 """ 4500 Helper to define a counted list of expressions. 4501 This helper defines a pattern of the form:: 4502 integer expr expr expr... 4503 where the leading integer tells how many expr expressions follow. 4504 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4505 4506 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 4507 4508 Example:: 4509 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4510 4511 # in this parser, the leading integer value is given in binary, 4512 # '10' indicating that 2 values are in the array 4513 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 4514 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 4515 """ 4516 arrayExpr = Forward() 4517 def countFieldParseAction(s,l,t): 4518 n = t[0] 4519 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4520 return []
4521 if intExpr is None: 4522 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4523 else: 4524 intExpr = intExpr.copy() 4525 intExpr.setName("arrayLen") 4526 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4527 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4528
4529 -def _flatten(L):
4530 ret = [] 4531 for i in L: 4532 if isinstance(i,list): 4533 ret.extend(_flatten(i)) 4534 else: 4535 ret.append(i) 4536 return ret
4537
4538 -def matchPreviousLiteral(expr):
4539 """ 4540 Helper to define an expression that is indirectly defined from 4541 the tokens matched in a previous expression, that is, it looks 4542 for a 'repeat' of a previous expression. For example:: 4543 first = Word(nums) 4544 second = matchPreviousLiteral(first) 4545 matchExpr = first + ":" + second 4546 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4547 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4548 If this is not desired, use C{matchPreviousExpr}. 4549 Do I{not} use with packrat parsing enabled. 4550 """ 4551 rep = Forward() 4552 def copyTokenToRepeater(s,l,t): 4553 if t: 4554 if len(t) == 1: 4555 rep << t[0] 4556 else: 4557 # flatten t tokens 4558 tflat = _flatten(t.asList()) 4559 rep << And(Literal(tt) for tt in tflat) 4560 else: 4561 rep << Empty()
4562 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4563 rep.setName('(prev) ' + _ustr(expr)) 4564 return rep 4565
4566 -def matchPreviousExpr(expr):
4567 """ 4568 Helper to define an expression that is indirectly defined from 4569 the tokens matched in a previous expression, that is, it looks 4570 for a 'repeat' of a previous expression. For example:: 4571 first = Word(nums) 4572 second = matchPreviousExpr(first) 4573 matchExpr = first + ":" + second 4574 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4575 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 4576 the expressions are evaluated first, and then compared, so 4577 C{"1"} is compared with C{"10"}. 4578 Do I{not} use with packrat parsing enabled. 4579 """ 4580 rep = Forward() 4581 e2 = expr.copy() 4582 rep <<= e2 4583 def copyTokenToRepeater(s,l,t): 4584 matchTokens = _flatten(t.asList()) 4585 def mustMatchTheseTokens(s,l,t): 4586 theseTokens = _flatten(t.asList()) 4587 if theseTokens != matchTokens: 4588 raise ParseException("",0,"")
4589 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4590 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4591 rep.setName('(prev) ' + _ustr(expr)) 4592 return rep 4593
4594 -def _escapeRegexRangeChars(s):
4595 #~ escape these chars: ^-] 4596 for c in r"\^-]": 4597 s = s.replace(c,_bslash+c) 4598 s = s.replace("\n",r"\n") 4599 s = s.replace("\t",r"\t") 4600 return _ustr(s)
4601
4602 -def oneOf( strs, caseless=False, useRegex=True ):
4603 """ 4604 Helper to quickly define a set of alternative Literals, and makes sure to do 4605 longest-first testing when there is a conflict, regardless of the input order, 4606 but returns a C{L{MatchFirst}} for best performance. 4607 4608 Parameters: 4609 - strs - a string of space-delimited literals, or a collection of string literals 4610 - caseless - (default=C{False}) - treat all literals as caseless 4611 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4612 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4613 if creating a C{Regex} raises an exception) 4614 4615 Example:: 4616 comp_oper = oneOf("< = > <= >= !=") 4617 var = Word(alphas) 4618 number = Word(nums) 4619 term = var | number 4620 comparison_expr = term + comp_oper + term 4621 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4622 prints:: 4623 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4624 """ 4625 if caseless: 4626 isequal = ( lambda a,b: a.upper() == b.upper() ) 4627 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4628 parseElementClass = CaselessLiteral 4629 else: 4630 isequal = ( lambda a,b: a == b ) 4631 masks = ( lambda a,b: b.startswith(a) ) 4632 parseElementClass = Literal 4633 4634 symbols = [] 4635 if isinstance(strs,basestring): 4636 symbols = strs.split() 4637 elif isinstance(strs, Iterable): 4638 symbols = list(strs) 4639 else: 4640 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4641 SyntaxWarning, stacklevel=2) 4642 if not symbols: 4643 return NoMatch() 4644 4645 i = 0 4646 while i < len(symbols)-1: 4647 cur = symbols[i] 4648 for j,other in enumerate(symbols[i+1:]): 4649 if ( isequal(other, cur) ): 4650 del symbols[i+j+1] 4651 break 4652 elif ( masks(cur, other) ): 4653 del symbols[i+j+1] 4654 symbols.insert(i,other) 4655 cur = other 4656 break 4657 else: 4658 i += 1 4659 4660 if not caseless and useRegex: 4661 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4662 try: 4663 if len(symbols)==len("".join(symbols)): 4664 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4665 else: 4666 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4667 except Exception: 4668 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4669 SyntaxWarning, stacklevel=2) 4670 4671 4672 # last resort, just use MatchFirst 4673 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4674
4675 -def dictOf( key, value ):
4676 """ 4677 Helper to easily and clearly define a dictionary by specifying the respective patterns 4678 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4679 in the proper order. The key pattern can include delimiting markers or punctuation, 4680 as long as they are suppressed, thereby leaving the significant key text. The value 4681 pattern can include named results, so that the C{Dict} results can include named token 4682 fields. 4683 4684 Example:: 4685 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4686 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4687 print(OneOrMore(attr_expr).parseString(text).dump()) 4688 4689 attr_label = label 4690 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4691 4692 # similar to Dict, but simpler call format 4693 result = dictOf(attr_label, attr_value).parseString(text) 4694 print(result.dump()) 4695 print(result['shape']) 4696 print(result.shape) # object attribute access works too 4697 print(result.asDict()) 4698 prints:: 4699 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4700 - color: light blue 4701 - posn: upper left 4702 - shape: SQUARE 4703 - texture: burlap 4704 SQUARE 4705 SQUARE 4706 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4707 """ 4708 return Dict( ZeroOrMore( Group ( key + value ) ) )
4709
4710 -def originalTextFor(expr, asString=True):
4711 """ 4712 Helper to return the original, untokenized text for a given expression. Useful to 4713 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4714 revert separate tokens with intervening whitespace back to the original matching 4715 input text. By default, returns astring containing the original parsed text. 4716 4717 If the optional C{asString} argument is passed as C{False}, then the return value is a 4718 C{L{ParseResults}} containing any results names that were originally matched, and a 4719 single token containing the original matched text from the input string. So if 4720 the expression passed to C{L{originalTextFor}} contains expressions with defined 4721 results names, you must set C{asString} to C{False} if you want to preserve those 4722 results name values. 4723 4724 Example:: 4725 src = "this is test <b> bold <i>text</i> </b> normal text " 4726 for tag in ("b","i"): 4727 opener,closer = makeHTMLTags(tag) 4728 patt = originalTextFor(opener + SkipTo(closer) + closer) 4729 print(patt.searchString(src)[0]) 4730 prints:: 4731 ['<b> bold <i>text</i> </b>'] 4732 ['<i>text</i>'] 4733 """ 4734 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4735 endlocMarker = locMarker.copy() 4736 endlocMarker.callPreparse = False 4737 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4738 if asString: 4739 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4740 else: 4741 def extractText(s,l,t): 4742 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4743 matchExpr.setParseAction(extractText) 4744 matchExpr.ignoreExprs = expr.ignoreExprs 4745 return matchExpr 4746
4747 -def ungroup(expr):
4748 """ 4749 Helper to undo pyparsing's default grouping of And expressions, even 4750 if all but one are non-empty. 4751 """ 4752 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4753
4754 -def locatedExpr(expr):
4755 """ 4756 Helper to decorate a returned token with its starting and ending locations in the input string. 4757 This helper adds the following results names: 4758 - locn_start = location where matched expression begins 4759 - locn_end = location where matched expression ends 4760 - value = the actual parsed results 4761 4762 Be careful if the input text contains C{<TAB>} characters, you may want to call 4763 C{L{ParserElement.parseWithTabs}} 4764 4765 Example:: 4766 wd = Word(alphas) 4767 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4768 print(match) 4769 prints:: 4770 [[0, 'ljsdf', 5]] 4771 [[8, 'lksdjjf', 15]] 4772 [[18, 'lkkjj', 23]] 4773 """ 4774 locator = Empty().setParseAction(lambda s,l,t: l) 4775 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4776 4777 4778 # convenience constants for positional expressions 4779 empty = Empty().setName("empty") 4780 lineStart = LineStart().setName("lineStart") 4781 lineEnd = LineEnd().setName("lineEnd") 4782 stringStart = StringStart().setName("stringStart") 4783 stringEnd = StringEnd().setName("stringEnd") 4784 4785 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4786 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4787 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4788 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) 4789 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 4790 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4791 4792 -def srange(s):
4793 r""" 4794 Helper to easily define string ranges for use in Word construction. Borrows 4795 syntax from regexp '[]' string range definitions:: 4796 srange("[0-9]") -> "0123456789" 4797 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4798 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4799 The input string must be enclosed in []'s, and the returned string is the expanded 4800 character set joined into a single string. 4801 The values enclosed in the []'s may be: 4802 - a single character 4803 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4804 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4805 (C{\0x##} is also supported for backwards compatibility) 4806 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4807 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4808 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4809 """ 4810 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4811 try: 4812 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4813 except Exception: 4814 return ""
4815
4816 -def matchOnlyAtCol(n):
4817 """ 4818 Helper method for defining parse actions that require matching at a specific 4819 column in the input text. 4820 """ 4821 def verifyCol(strg,locn,toks): 4822 if col(locn,strg) != n: 4823 raise ParseException(strg,locn,"matched token not at column %d" % n)
4824 return verifyCol 4825
4826 -def replaceWith(replStr):
4827 """ 4828 Helper method for common parse actions that simply return a literal value. Especially 4829 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4830 4831 Example:: 4832 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4833 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4834 term = na | num 4835 4836 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4837 """ 4838 return lambda s,l,t: [replStr]
4839
4840 -def removeQuotes(s,l,t):
4841 """ 4842 Helper parse action for removing quotation marks from parsed quoted strings. 4843 4844 Example:: 4845 # by default, quotation marks are included in parsed results 4846 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4847 4848 # use removeQuotes to strip quotation marks from parsed results 4849 quotedString.setParseAction(removeQuotes) 4850 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4851 """ 4852 return t[0][1:-1]
4853
4854 -def tokenMap(func, *args):
4855 """ 4856 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4857 args are passed, they are forwarded to the given function as additional arguments after 4858 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4859 parsed data to an integer using base 16. 4860 4861 Example (compare the last to example in L{ParserElement.transformString}:: 4862 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4863 hex_ints.runTests(''' 4864 00 11 22 aa FF 0a 0d 1a 4865 ''') 4866 4867 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4868 OneOrMore(upperword).runTests(''' 4869 my kingdom for a horse 4870 ''') 4871 4872 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4873 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4874 now is the winter of our discontent made glorious summer by this sun of york 4875 ''') 4876 prints:: 4877 00 11 22 aa FF 0a 0d 1a 4878 [0, 17, 34, 170, 255, 10, 13, 26] 4879 4880 my kingdom for a horse 4881 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4882 4883 now is the winter of our discontent made glorious summer by this sun of york 4884 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4885 """ 4886 def pa(s,l,t): 4887 return [func(tokn, *args) for tokn in t]
4888 4889 try: 4890 func_name = getattr(func, '__name__', 4891 getattr(func, '__class__').__name__) 4892 except Exception: 4893 func_name = str(func) 4894 pa.__name__ = func_name 4895 4896 return pa 4897 4898 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4899 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 4900 4901 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4902 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4903 4904 -def _makeTags(tagStr, xml):
4905 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4906 if isinstance(tagStr,basestring): 4907 resname = tagStr 4908 tagStr = Keyword(tagStr, caseless=not xml) 4909 else: 4910 resname = tagStr.name 4911 4912 tagAttrName = Word(alphas,alphanums+"_-:") 4913 if (xml): 4914 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4915 openTag = Suppress("<") + tagStr("tag") + \ 4916 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4917 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4918 else: 4919 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4920 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4921 openTag = Suppress("<") + tagStr("tag") + \ 4922 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4923 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4924 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4925 closeTag = Combine(_L("</") + tagStr + ">") 4926 4927 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4928 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4929 openTag.tag = resname 4930 closeTag.tag = resname 4931 return openTag, closeTag
4932
4933 -def makeHTMLTags(tagStr):
4934 """ 4935 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4936 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4937 4938 Example:: 4939 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4940 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4941 a,a_end = makeHTMLTags("A") 4942 link_expr = a + SkipTo(a_end)("link_text") + a_end 4943 4944 for link in link_expr.searchString(text): 4945 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4946 print(link.link_text, '->', link.href) 4947 prints:: 4948 pyparsing -> http://pyparsing.wikispaces.com 4949 """ 4950 return _makeTags( tagStr, False )
4951
4952 -def makeXMLTags(tagStr):
4953 """ 4954 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4955 tags only in the given upper/lower case. 4956 4957 Example: similar to L{makeHTMLTags} 4958 """ 4959 return _makeTags( tagStr, True )
4960
4961 -def withAttribute(*args,**attrDict):
4962 """ 4963 Helper to create a validating parse action to be used with start tags created 4964 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4965 with a required attribute value, to avoid false matches on common tags such as 4966 C{<TD>} or C{<DIV>}. 4967 4968 Call C{withAttribute} with a series of attribute names and values. Specify the list 4969 of filter attributes names and values as: 4970 - keyword arguments, as in C{(align="right")}, or 4971 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4972 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4973 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4974 For attribute names with a namespace prefix, you must use the second form. Attribute 4975 names are matched insensitive to upper/lower case. 4976 4977 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4978 4979 To verify that the attribute exists, but without specifying a value, pass 4980 C{withAttribute.ANY_VALUE} as the value. 4981 4982 Example:: 4983 html = ''' 4984 <div> 4985 Some text 4986 <div type="grid">1 4 0 1 0</div> 4987 <div type="graph">1,3 2,3 1,1</div> 4988 <div>this has no type</div> 4989 </div> 4990 4991 ''' 4992 div,div_end = makeHTMLTags("div") 4993 4994 # only match div tag having a type attribute with value "grid" 4995 div_grid = div().setParseAction(withAttribute(type="grid")) 4996 grid_expr = div_grid + SkipTo(div | div_end)("body") 4997 for grid_header in grid_expr.searchString(html): 4998 print(grid_header.body) 4999 5000 # construct a match with any div tag having a type attribute, regardless of the value 5001 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 5002 div_expr = div_any_type + SkipTo(div | div_end)("body") 5003 for div_header in div_expr.searchString(html): 5004 print(div_header.body) 5005 prints:: 5006 1 4 0 1 0 5007 5008 1 4 0 1 0 5009 1,3 2,3 1,1 5010 """ 5011 if args: 5012 attrs = args[:] 5013 else: 5014 attrs = attrDict.items() 5015 attrs = [(k,v) for k,v in attrs] 5016 def pa(s,l,tokens): 5017 for attrName,attrValue in attrs: 5018 if attrName not in tokens: 5019 raise ParseException(s,l,"no matching attribute " + attrName) 5020 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 5021 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 5022 (attrName, tokens[attrName], attrValue))
5023 return pa 5024 withAttribute.ANY_VALUE = object()
5025 5026 -def withClass(classname, namespace=''):
5027 """ 5028 Simplified version of C{L{withAttribute}} when matching on a div class - made 5029 difficult because C{class} is a reserved word in Python. 5030 5031 Example:: 5032 html = ''' 5033 <div> 5034 Some text 5035 <div class="grid">1 4 0 1 0</div> 5036 <div class="graph">1,3 2,3 1,1</div> 5037 <div>this &lt;div&gt; has no class</div> 5038 </div> 5039 5040 ''' 5041 div,div_end = makeHTMLTags("div") 5042 div_grid = div().setParseAction(withClass("grid")) 5043 5044 grid_expr = div_grid + SkipTo(div | div_end)("body") 5045 for grid_header in grid_expr.searchString(html): 5046 print(grid_header.body) 5047 5048 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 5049 div_expr = div_any_type + SkipTo(div | div_end)("body") 5050 for div_header in div_expr.searchString(html): 5051 print(div_header.body) 5052 prints:: 5053 1 4 0 1 0 5054 5055 1 4 0 1 0 5056 1,3 2,3 1,1 5057 """ 5058 classattr = "%s:class" % namespace if namespace else "class" 5059 return withAttribute(**{classattr : classname})
5060 5061 opAssoc = _Constants() 5062 opAssoc.LEFT = object() 5063 opAssoc.RIGHT = object()
5064 5065 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5066 """ 5067 Helper method for constructing grammars of expressions made up of 5068 operators working in a precedence hierarchy. Operators may be unary or 5069 binary, left- or right-associative. Parse actions can also be attached 5070 to operator expressions. The generated parser will also recognize the use 5071 of parentheses to override operator precedences (see example below). 5072 5073 Note: if you define a deep operator list, you may see performance issues 5074 when using infixNotation. See L{ParserElement.enablePackrat} for a 5075 mechanism to potentially improve your parser performance. 5076 5077 Parameters: 5078 - baseExpr - expression representing the most basic element for the nested 5079 - opList - list of tuples, one for each operator precedence level in the 5080 expression grammar; each tuple is of the form 5081 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 5082 - opExpr is the pyparsing expression for the operator; 5083 may also be a string, which will be converted to a Literal; 5084 if numTerms is 3, opExpr is a tuple of two expressions, for the 5085 two operators separating the 3 terms 5086 - numTerms is the number of terms for this operator (must 5087 be 1, 2, or 3) 5088 - rightLeftAssoc is the indicator whether the operator is 5089 right or left associative, using the pyparsing-defined 5090 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 5091 - parseAction is the parse action to be associated with 5092 expressions matching this operator expression (the 5093 parse action tuple member may be omitted); if the parse action 5094 is passed a tuple or list of functions, this is equivalent to 5095 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) 5096 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 5097 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 5098 5099 Example:: 5100 # simple example of four-function arithmetic with ints and variable names 5101 integer = pyparsing_common.signed_integer 5102 varname = pyparsing_common.identifier 5103 5104 arith_expr = infixNotation(integer | varname, 5105 [ 5106 ('-', 1, opAssoc.RIGHT), 5107 (oneOf('* /'), 2, opAssoc.LEFT), 5108 (oneOf('+ -'), 2, opAssoc.LEFT), 5109 ]) 5110 5111 arith_expr.runTests(''' 5112 5+3*6 5113 (5+3)*6 5114 -2--11 5115 ''', fullDump=False) 5116 prints:: 5117 5+3*6 5118 [[5, '+', [3, '*', 6]]] 5119 5120 (5+3)*6 5121 [[[5, '+', 3], '*', 6]] 5122 5123 -2--11 5124 [[['-', 2], '-', ['-', 11]]] 5125 """ 5126 ret = Forward() 5127 lastExpr = baseExpr | ( lpar + ret + rpar ) 5128 for i,operDef in enumerate(opList): 5129 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 5130 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 5131 if arity == 3: 5132 if opExpr is None or len(opExpr) != 2: 5133 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 5134 opExpr1, opExpr2 = opExpr 5135 thisExpr = Forward().setName(termName) 5136 if rightLeftAssoc == opAssoc.LEFT: 5137 if arity == 1: 5138 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 5139 elif arity == 2: 5140 if opExpr is not None: 5141 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 5142 else: 5143 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 5144 elif arity == 3: 5145 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 5146 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 5147 else: 5148 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5149 elif rightLeftAssoc == opAssoc.RIGHT: 5150 if arity == 1: 5151 # try to avoid LR with this extra test 5152 if not isinstance(opExpr, Optional): 5153 opExpr = Optional(opExpr) 5154 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 5155 elif arity == 2: 5156 if opExpr is not None: 5157 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 5158 else: 5159 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 5160 elif arity == 3: 5161 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 5162 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 5163 else: 5164 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5165 else: 5166 raise ValueError("operator must indicate right or left associativity") 5167 if pa: 5168 if isinstance(pa, (tuple, list)): 5169 matchExpr.setParseAction(*pa) 5170 else: 5171 matchExpr.setParseAction(pa) 5172 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 5173 lastExpr = thisExpr 5174 ret <<= lastExpr 5175 return ret
5176 5177 operatorPrecedence = infixNotation 5178 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 5179 5180 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 5181 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 5182 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 5183 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 5184 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5185 5186 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5187 """ 5188 Helper method for defining nested lists enclosed in opening and closing 5189 delimiters ("(" and ")" are the default). 5190 5191 Parameters: 5192 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 5193 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 5194 - content - expression for items within the nested lists (default=C{None}) 5195 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 5196 5197 If an expression is not provided for the content argument, the nested 5198 expression will capture all whitespace-delimited content between delimiters 5199 as a list of separate values. 5200 5201 Use the C{ignoreExpr} argument to define expressions that may contain 5202 opening or closing characters that should not be treated as opening 5203 or closing characters for nesting, such as quotedString or a comment 5204 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 5205 The default is L{quotedString}, but if no expressions are to be ignored, 5206 then pass C{None} for this argument. 5207 5208 Example:: 5209 data_type = oneOf("void int short long char float double") 5210 decl_data_type = Combine(data_type + Optional(Word('*'))) 5211 ident = Word(alphas+'_', alphanums+'_') 5212 number = pyparsing_common.number 5213 arg = Group(decl_data_type + ident) 5214 LPAR,RPAR = map(Suppress, "()") 5215 5216 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5217 5218 c_function = (decl_data_type("type") 5219 + ident("name") 5220 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5221 + code_body("body")) 5222 c_function.ignore(cStyleComment) 5223 5224 source_code = ''' 5225 int is_odd(int x) { 5226 return (x%2); 5227 } 5228 5229 int dec_to_hex(char hchar) { 5230 if (hchar >= '0' && hchar <= '9') { 5231 return (ord(hchar)-ord('0')); 5232 } else { 5233 return (10+ord(hchar)-ord('A')); 5234 } 5235 } 5236 ''' 5237 for func in c_function.searchString(source_code): 5238 print("%(name)s (%(type)s) args: %(args)s" % func) 5239 5240 prints:: 5241 is_odd (int) args: [['int', 'x']] 5242 dec_to_hex (int) args: [['char', 'hchar']] 5243 """ 5244 if opener == closer: 5245 raise ValueError("opening and closing strings cannot be the same") 5246 if content is None: 5247 if isinstance(opener,basestring) and isinstance(closer,basestring): 5248 if len(opener) == 1 and len(closer)==1: 5249 if ignoreExpr is not None: 5250 content = (Combine(OneOrMore(~ignoreExpr + 5251 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5252 ).setParseAction(lambda t:t[0].strip())) 5253 else: 5254 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5255 ).setParseAction(lambda t:t[0].strip())) 5256 else: 5257 if ignoreExpr is not None: 5258 content = (Combine(OneOrMore(~ignoreExpr + 5259 ~Literal(opener) + ~Literal(closer) + 5260 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5261 ).setParseAction(lambda t:t[0].strip())) 5262 else: 5263 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5264 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5265 ).setParseAction(lambda t:t[0].strip())) 5266 else: 5267 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5268 ret = Forward() 5269 if ignoreExpr is not None: 5270 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5271 else: 5272 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5273 ret.setName('nested %s%s expression' % (opener,closer)) 5274 return ret
5275
5276 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5277 """ 5278 Helper method for defining space-delimited indentation blocks, such as 5279 those used to define block statements in Python source code. 5280 5281 Parameters: 5282 - blockStatementExpr - expression defining syntax of statement that 5283 is repeated within the indented block 5284 - indentStack - list created by caller to manage indentation stack 5285 (multiple statementWithIndentedBlock expressions within a single grammar 5286 should share a common indentStack) 5287 - indent - boolean indicating whether block must be indented beyond the 5288 the current level; set to False for block of left-most statements 5289 (default=C{True}) 5290 5291 A valid block must contain at least one C{blockStatement}. 5292 5293 Example:: 5294 data = ''' 5295 def A(z): 5296 A1 5297 B = 100 5298 G = A2 5299 A2 5300 A3 5301 B 5302 def BB(a,b,c): 5303 BB1 5304 def BBA(): 5305 bba1 5306 bba2 5307 bba3 5308 C 5309 D 5310 def spam(x,y): 5311 def eggs(z): 5312 pass 5313 ''' 5314 5315 5316 indentStack = [1] 5317 stmt = Forward() 5318 5319 identifier = Word(alphas, alphanums) 5320 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5321 func_body = indentedBlock(stmt, indentStack) 5322 funcDef = Group( funcDecl + func_body ) 5323 5324 rvalue = Forward() 5325 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5326 rvalue << (funcCall | identifier | Word(nums)) 5327 assignment = Group(identifier + "=" + rvalue) 5328 stmt << ( funcDef | assignment | identifier ) 5329 5330 module_body = OneOrMore(stmt) 5331 5332 parseTree = module_body.parseString(data) 5333 parseTree.pprint() 5334 prints:: 5335 [['def', 5336 'A', 5337 ['(', 'z', ')'], 5338 ':', 5339 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5340 'B', 5341 ['def', 5342 'BB', 5343 ['(', 'a', 'b', 'c', ')'], 5344 ':', 5345 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5346 'C', 5347 'D', 5348 ['def', 5349 'spam', 5350 ['(', 'x', 'y', ')'], 5351 ':', 5352 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5353 """ 5354 def checkPeerIndent(s,l,t): 5355 if l >= len(s): return 5356 curCol = col(l,s) 5357 if curCol != indentStack[-1]: 5358 if curCol > indentStack[-1]: 5359 raise ParseFatalException(s,l,"illegal nesting") 5360 raise ParseException(s,l,"not a peer entry")
5361 5362 def checkSubIndent(s,l,t): 5363 curCol = col(l,s) 5364 if curCol > indentStack[-1]: 5365 indentStack.append( curCol ) 5366 else: 5367 raise ParseException(s,l,"not a subentry") 5368 5369 def checkUnindent(s,l,t): 5370 if l >= len(s): return 5371 curCol = col(l,s) 5372 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5373 raise ParseException(s,l,"not an unindent") 5374 indentStack.pop() 5375 5376 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5377 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5378 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5379 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5380 if indent: 5381 smExpr = Group( Optional(NL) + 5382 #~ FollowedBy(blockStatementExpr) + 5383 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5384 else: 5385 smExpr = Group( Optional(NL) + 5386 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5387 blockStatementExpr.ignore(_bslash + LineEnd()) 5388 return smExpr.setName('indented block') 5389 5390 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5391 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5392 5393 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5394 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5395 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5396 -def replaceHTMLEntity(t):
5397 """Helper parser action to replace common HTML entities with their special characters""" 5398 return _htmlEntityMap.get(t.entity)
5399 5400 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 5401 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5402 "Comment of the form C{/* ... */}" 5403 5404 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5405 "Comment of the form C{<!-- ... -->}" 5406 5407 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5408 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5409 "Comment of the form C{// ... (to end of line)}" 5410 5411 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5412 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5413 5414 javaStyleComment = cppStyleComment 5415 "Same as C{L{cppStyleComment}}" 5416 5417 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5418 "Comment of the form C{# ... (to end of line)}" 5419 5420 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5421 Optional( Word(" \t") + 5422 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5423 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5424 """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 5425 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5426 5427 # some other useful expressions - using lower-case class name since we are really using this as a namespace 5428 -class pyparsing_common:
5429 """ 5430 Here are some common low-level expressions that may be useful in jump-starting parser development: 5431 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 5432 - common L{programming identifiers<identifier>} 5433 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5434 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5435 - L{UUID<uuid>} 5436 - L{comma-separated list<comma_separated_list>} 5437 Parse actions: 5438 - C{L{convertToInteger}} 5439 - C{L{convertToFloat}} 5440 - C{L{convertToDate}} 5441 - C{L{convertToDatetime}} 5442 - C{L{stripHTMLTags}} 5443 - C{L{upcaseTokens}} 5444 - C{L{downcaseTokens}} 5445 5446 Example:: 5447 pyparsing_common.number.runTests(''' 5448 # any int or real number, returned as the appropriate type 5449 100 5450 -100 5451 +100 5452 3.14159 5453 6.02e23 5454 1e-12 5455 ''') 5456 5457 pyparsing_common.fnumber.runTests(''' 5458 # any int or real number, returned as float 5459 100 5460 -100 5461 +100 5462 3.14159 5463 6.02e23 5464 1e-12 5465 ''') 5466 5467 pyparsing_common.hex_integer.runTests(''' 5468 # hex numbers 5469 100 5470 FF 5471 ''') 5472 5473 pyparsing_common.fraction.runTests(''' 5474 # fractions 5475 1/2 5476 -3/4 5477 ''') 5478 5479 pyparsing_common.mixed_integer.runTests(''' 5480 # mixed fractions 5481 1 5482 1/2 5483 -3/4 5484 1-3/4 5485 ''') 5486 5487 import uuid 5488 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5489 pyparsing_common.uuid.runTests(''' 5490 # uuid 5491 12345678-1234-5678-1234-567812345678 5492 ''') 5493 prints:: 5494 # any int or real number, returned as the appropriate type 5495 100 5496 [100] 5497 5498 -100 5499 [-100] 5500 5501 +100 5502 [100] 5503 5504 3.14159 5505 [3.14159] 5506 5507 6.02e23 5508 [6.02e+23] 5509 5510 1e-12 5511 [1e-12] 5512 5513 # any int or real number, returned as float 5514 100 5515 [100.0] 5516 5517 -100 5518 [-100.0] 5519 5520 +100 5521 [100.0] 5522 5523 3.14159 5524 [3.14159] 5525 5526 6.02e23 5527 [6.02e+23] 5528 5529 1e-12 5530 [1e-12] 5531 5532 # hex numbers 5533 100 5534 [256] 5535 5536 FF 5537 [255] 5538 5539 # fractions 5540 1/2 5541 [0.5] 5542 5543 -3/4 5544 [-0.75] 5545 5546 # mixed fractions 5547 1 5548 [1] 5549 5550 1/2 5551 [0.5] 5552 5553 -3/4 5554 [-0.75] 5555 5556 1-3/4 5557 [1.75] 5558 5559 # uuid 5560 12345678-1234-5678-1234-567812345678 5561 [UUID('12345678-1234-5678-1234-567812345678')] 5562 """ 5563 5564 convertToInteger = tokenMap(int) 5565 """ 5566 Parse action for converting parsed integers to Python int 5567 """ 5568 5569 convertToFloat = tokenMap(float) 5570 """ 5571 Parse action for converting parsed numbers to Python float 5572 """ 5573 5574 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5575 """expression that parses an unsigned integer, returns an int""" 5576 5577 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5578 """expression that parses a hexadecimal integer, returns an int""" 5579 5580 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5581 """expression that parses an integer with optional leading sign, returns an int""" 5582 5583 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 5584 """fractional expression of an integer divided by an integer, returns a float""" 5585 fraction.addParseAction(lambda t: t[0]/t[-1]) 5586 5587 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5588 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5589 mixed_integer.addParseAction(sum) 5590 5591 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5592 """expression that parses a floating point number and returns a float""" 5593 5594 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5595 """expression that parses a floating point number with optional scientific notation and returns a float""" 5596 5597 # streamlining this expression makes the docs nicer-looking 5598 number = (sci_real | real | signed_integer).streamline() 5599 """any numeric expression, returns the corresponding Python type""" 5600 5601 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5602 """any int or real number, returned as float""" 5603 5604 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5605 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5606 5607 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5608 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5609 5610 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5611 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5612 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5613 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5614 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5615 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5616 "IPv6 address (long, short, or mixed form)" 5617 5618 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5619 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5620 5621 @staticmethod
5622 - def convertToDate(fmt="%Y-%m-%d"):
5623 """ 5624 Helper to create a parse action for converting parsed date string to Python datetime.date 5625 5626 Params - 5627 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5628 5629 Example:: 5630 date_expr = pyparsing_common.iso8601_date.copy() 5631 date_expr.setParseAction(pyparsing_common.convertToDate()) 5632 print(date_expr.parseString("1999-12-31")) 5633 prints:: 5634 [datetime.date(1999, 12, 31)] 5635 """ 5636 def cvt_fn(s,l,t): 5637 try: 5638 return datetime.strptime(t[0], fmt).date() 5639 except ValueError as ve: 5640 raise ParseException(s, l, str(ve))
5641 return cvt_fn
5642 5643 @staticmethod
5644 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5645 """ 5646 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5647 5648 Params - 5649 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5650 5651 Example:: 5652 dt_expr = pyparsing_common.iso8601_datetime.copy() 5653 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5654 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5655 prints:: 5656 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5657 """ 5658 def cvt_fn(s,l,t): 5659 try: 5660 return datetime.strptime(t[0], fmt) 5661 except ValueError as ve: 5662 raise ParseException(s, l, str(ve))
5663 return cvt_fn 5664 5665 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5666 "ISO8601 date (C{yyyy-mm-dd})" 5667 5668 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5669 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5670 5671 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5672 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5673 5674 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5675 @staticmethod
5676 - def stripHTMLTags(s, l, tokens):
5677 """ 5678 Parse action to remove HTML tags from web page HTML source 5679 5680 Example:: 5681 # strip HTML links from normal text 5682 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5683 td,td_end = makeHTMLTags("TD") 5684 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5685 5686 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5687 """ 5688 return pyparsing_common._html_stripper.transformString(tokens[0])
5689 5690 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 5691 + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 5692 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 5693 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 5694 5695 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5696 """Parse action to convert tokens to upper case.""" 5697 5698 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5699 """Parse action to convert tokens to lower case.""" 5700 5701 5702 if __name__ == "__main__": 5703 5704 selectToken = CaselessLiteral("select") 5705 fromToken = CaselessLiteral("from") 5706 5707 ident = Word(alphas, alphanums + "_$") 5708 5709 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5710 columnNameList = Group(delimitedList(columnName)).setName("columns") 5711 columnSpec = ('*' | columnNameList) 5712 5713 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5714 tableNameList = Group(delimitedList(tableName)).setName("tables") 5715 5716 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5717 5718 # demo runTests method, including embedded comments in test string 5719 simpleSQL.runTests(""" 5720 # '*' as column list and dotted table name 5721 select * from SYS.XYZZY 5722 5723 # caseless match on "SELECT", and casts back to "select" 5724 SELECT * from XYZZY, ABC 5725 5726 # list of column names, and mixed case SELECT keyword 5727 Select AA,BB,CC from Sys.dual 5728 5729 # multiple tables 5730 Select A, B, C from Sys.dual, Table2 5731 5732 # invalid SELECT keyword - should fail 5733 Xelect A, B, C from Sys.dual 5734 5735 # incomplete command - should fail 5736 Select 5737 5738 # invalid column name - should fail 5739 Select ^^^ frox Sys.dual 5740 5741 """) 5742 5743 pyparsing_common.number.runTests(""" 5744 100 5745 -100 5746 +100 5747 3.14159 5748 6.02e23 5749 1e-12 5750 """) 5751 5752 # any int or real number, returned as float 5753 pyparsing_common.fnumber.runTests(""" 5754 100 5755 -100 5756 +100 5757 3.14159 5758 6.02e23 5759 1e-12 5760 """) 5761 5762 pyparsing_common.hex_integer.runTests(""" 5763 100 5764 FF 5765 """) 5766 5767 import uuid 5768 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5769 pyparsing_common.uuid.runTests(""" 5770 12345678-1234-5678-1234-567812345678 5771 """) 5772