Package pyparsing :: Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2011  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24  #from __future__ import generators 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.7" 
  62  __versionTime__ = "3 August 2012 05:00" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 'ungroup', 
  92  ] 
  93   
  94  """ 
  95  Detect if we are running version 3.X and make appropriate changes 
  96  Robert A. Clark 
  97  """ 
  98  _PY3K = sys.version_info[0] > 2 
  99  if _PY3K: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104  else: 
 105      _MAX_INT = sys.maxint 
 106      range = xrange 
 107      set = lambda s : dict( [(c,0) for c in s] ) 
 108   
109 - def _ustr(obj):
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 112 then < returns the unicode object | encodes it with the default encoding | ... >. 113 """ 114 if isinstance(obj,unicode): 115 return obj 116 117 try: 118 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 119 # it won't break any existing code. 120 return str(obj) 121 122 except UnicodeEncodeError: 123 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 124 # state that "The return value must be a string object". However, does a 125 # unicode object (being a subclass of basestring) count as a "string 126 # object"? 127 # If so, then return a unicode object: 128 return unicode(obj)
129 # Else encode it... but how? There are many choices... :) 130 # Replace unprintables with escape codes? 131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 132 # Replace unprintables with question marks? 133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 134 # ... 135 136 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 137 singleArgBuiltins = [] 138 import __builtin__ 139 for fname in "sum len sorted reversed list tuple set any all min max".split(): 140 try: 141 singleArgBuiltins.append(getattr(__builtin__,fname)) 142 except AttributeError: 143 continue 144
145 -def _xml_escape(data):
146 """Escape &, <, >, ", ', etc. in a string of data.""" 147 148 # ampersand must be replaced first 149 from_symbols = '&><"\'' 150 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 151 for from_,to_ in zip(from_symbols, to_symbols): 152 data = data.replace(from_, to_) 153 return data
154
155 -class _Constants(object):
156 pass
157 158 alphas = string.ascii_lowercase + string.ascii_uppercase 159 nums = "0123456789" 160 hexnums = nums + "ABCDEFabcdef" 161 alphanums = alphas + nums 162 _bslash = chr(92) 163 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 164
165 -class ParseBaseException(Exception):
166 """base exception class for all parsing runtime exceptions""" 167 # Performance tuning: we construct a *lot* of these, so keep this 168 # constructor as small and fast as possible
169 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
170 self.loc = loc 171 if msg is None: 172 self.msg = pstr 173 self.pstr = "" 174 else: 175 self.msg = msg 176 self.pstr = pstr 177 self.parserElement = elem
178
179 - def __getattr__( self, aname ):
180 """supported attributes by name are: 181 - lineno - returns the line number of the exception text 182 - col - returns the column number of the exception text 183 - line - returns the line containing the exception text 184 """ 185 if( aname == "lineno" ): 186 return lineno( self.loc, self.pstr ) 187 elif( aname in ("col", "column") ): 188 return col( self.loc, self.pstr ) 189 elif( aname == "line" ): 190 return line( self.loc, self.pstr ) 191 else: 192 raise AttributeError(aname)
193
194 - def __str__( self ):
195 return "%s (at char %d), (line:%d, col:%d)" % \ 196 ( self.msg, self.loc, self.lineno, self.column )
197 - def __repr__( self ):
198 return _ustr(self)
199 - def markInputline( self, markerString = ">!<" ):
200 """Extracts the exception line from the input string, and marks 201 the location of the exception with a special symbol. 202 """ 203 line_str = self.line 204 line_column = self.column - 1 205 if markerString: 206 line_str = "".join( [line_str[:line_column], 207 markerString, line_str[line_column:]]) 208 return line_str.strip()
209 - def __dir__(self):
210 return "loc msg pstr parserElement lineno col line " \ 211 "markInputline __str__ __repr__".split()
212
213 -class ParseException(ParseBaseException):
214 """exception thrown when parse expressions don't match class; 215 supported attributes by name are: 216 - lineno - returns the line number of the exception text 217 - col - returns the column number of the exception text 218 - line - returns the line containing the exception text 219 """ 220 pass
221
222 -class ParseFatalException(ParseBaseException):
223 """user-throwable exception thrown when inconsistent parse content 224 is found; stops all parsing immediately""" 225 pass
226
227 -class ParseSyntaxException(ParseFatalException):
228 """just like C{L{ParseFatalException}}, but thrown internally when an 229 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 230 an unbacktrackable syntax error has been found"""
231 - def __init__(self, pe):
232 super(ParseSyntaxException, self).__init__( 233 pe.pstr, pe.loc, pe.msg, pe.parserElement)
234 235 #~ class ReparseException(ParseBaseException): 236 #~ """Experimental class - parse actions can raise this exception to cause 237 #~ pyparsing to reparse the input string: 238 #~ - with a modified input string, and/or 239 #~ - with a modified start location 240 #~ Set the values of the ReparseException in the constructor, and raise the 241 #~ exception in a parse action to cause pyparsing to use the new string/location. 242 #~ Setting the values as None causes no change to be made. 243 #~ """ 244 #~ def __init_( self, newstring, restartLoc ): 245 #~ self.newParseText = newstring 246 #~ self.reparseLoc = restartLoc 247
248 -class RecursiveGrammarException(Exception):
249 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
250 - def __init__( self, parseElementList ):
251 self.parseElementTrace = parseElementList
252
253 - def __str__( self ):
254 return "RecursiveGrammarException: %s" % self.parseElementTrace
255
256 -class _ParseResultsWithOffset(object):
257 - def __init__(self,p1,p2):
258 self.tup = (p1,p2)
259 - def __getitem__(self,i):
260 return self.tup[i]
261 - def __repr__(self):
262 return repr(self.tup)
263 - def setOffset(self,i):
264 self.tup = (self.tup[0],i)
265
266 -class ParseResults(object):
267 """Structured parse results, to provide multiple means of access to the parsed data: 268 - as a list (C{len(results)}) 269 - by list index (C{results[0], results[1]}, etc.) 270 - by attribute (C{results.<resultsName>}) 271 """ 272 #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
273 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
274 if isinstance(toklist, cls): 275 return toklist 276 retobj = object.__new__(cls) 277 retobj.__doinit = True 278 return retobj
279 280 # Performance tuning: we construct a *lot* of these, so keep this 281 # constructor as small and fast as possible
282 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
283 if self.__doinit: 284 self.__doinit = False 285 self.__name = None 286 self.__parent = None 287 self.__accumNames = {} 288 if isinstance(toklist, list): 289 self.__toklist = toklist[:] 290 else: 291 self.__toklist = [toklist] 292 self.__tokdict = dict() 293 294 if name is not None and name: 295 if not modal: 296 self.__accumNames[name] = 0 297 if isinstance(name,int): 298 name = _ustr(name) # will always return a str, but use _ustr for consistency 299 self.__name = name 300 if not toklist in (None,'',[]): 301 if isinstance(toklist,basestring): 302 toklist = [ toklist ] 303 if asList: 304 if isinstance(toklist,ParseResults): 305 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 306 else: 307 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 308 self[name].__name = name 309 else: 310 try: 311 self[name] = toklist[0] 312 except (KeyError,TypeError,IndexError): 313 self[name] = toklist
314
315 - def __getitem__( self, i ):
316 if isinstance( i, (int,slice) ): 317 return self.__toklist[i] 318 else: 319 if i not in self.__accumNames: 320 return self.__tokdict[i][-1][0] 321 else: 322 return ParseResults([ v[0] for v in self.__tokdict[i] ])
323
324 - def __setitem__( self, k, v, isinstance=isinstance ):
325 if isinstance(v,_ParseResultsWithOffset): 326 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 327 sub = v[0] 328 elif isinstance(k,int): 329 self.__toklist[k] = v 330 sub = v 331 else: 332 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 333 sub = v 334 if isinstance(sub,ParseResults): 335 sub.__parent = wkref(self)
336
337 - def __delitem__( self, i ):
338 if isinstance(i,(int,slice)): 339 mylen = len( self.__toklist ) 340 del self.__toklist[i] 341 342 # convert int to slice 343 if isinstance(i, int): 344 if i < 0: 345 i += mylen 346 i = slice(i, i+1) 347 # get removed indices 348 removed = list(range(*i.indices(mylen))) 349 removed.reverse() 350 # fixup indices in token dictionary 351 for name in self.__tokdict: 352 occurrences = self.__tokdict[name] 353 for j in removed: 354 for k, (value, position) in enumerate(occurrences): 355 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 356 else: 357 del self.__tokdict[i]
358
359 - def __contains__( self, k ):
360 return k in self.__tokdict
361
362 - def __len__( self ): return len( self.__toklist )
363 - def __bool__(self): return len( self.__toklist ) > 0
364 __nonzero__ = __bool__
365 - def __iter__( self ): return iter( self.__toklist )
366 - def __reversed__( self ): return iter( self.__toklist[::-1] )
367 - def keys( self ):
368 """Returns all named result keys.""" 369 return self.__tokdict.keys()
370
371 - def pop( self, index=-1 ):
372 """Removes and returns item at specified index (default=last). 373 Will work with either numeric indices or dict-key indicies.""" 374 ret = self[index] 375 del self[index] 376 return ret
377
378 - def get(self, key, defaultValue=None):
379 """Returns named result matching the given key, or if there is no 380 such name, then returns the given C{defaultValue} or C{None} if no 381 C{defaultValue} is specified.""" 382 if key in self: 383 return self[key] 384 else: 385 return defaultValue
386
387 - def insert( self, index, insStr ):
388 """Inserts new element at location index in the list of parsed tokens.""" 389 self.__toklist.insert(index, insStr) 390 # fixup indices in token dictionary 391 for name in self.__tokdict: 392 occurrences = self.__tokdict[name] 393 for k, (value, position) in enumerate(occurrences): 394 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
395
396 - def items( self ):
397 """Returns all named result keys and values as a list of tuples.""" 398 return [(k,self[k]) for k in self.__tokdict]
399
400 - def values( self ):
401 """Returns all named result values.""" 402 return [ v[-1][0] for v in self.__tokdict.values() ]
403
404 - def __getattr__( self, name ):
405 if True: #name not in self.__slots__: 406 if name in self.__tokdict: 407 if name not in self.__accumNames: 408 return self.__tokdict[name][-1][0] 409 else: 410 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 411 else: 412 return "" 413 return None
414
415 - def __add__( self, other ):
416 ret = self.copy() 417 ret += other 418 return ret
419
420 - def __iadd__( self, other ):
421 if other.__tokdict: 422 offset = len(self.__toklist) 423 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 424 otheritems = other.__tokdict.items() 425 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 426 for (k,vlist) in otheritems for v in vlist] 427 for k,v in otherdictitems: 428 self[k] = v 429 if isinstance(v[0],ParseResults): 430 v[0].__parent = wkref(self) 431 432 self.__toklist += other.__toklist 433 self.__accumNames.update( other.__accumNames ) 434 return self
435
436 - def __radd__(self, other):
437 if isinstance(other,int) and other == 0: 438 return self.copy()
439
440 - def __repr__( self ):
441 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
442
443 - def __str__( self ):
444 out = [] 445 for i in self.__toklist: 446 if isinstance(i, ParseResults): 447 out.append(_ustr(i)) 448 else: 449 out.append(repr(i)) 450 return '[' + ', '.join(out) + ']'
451
452 - def _asStringList( self, sep='' ):
453 out = [] 454 for item in self.__toklist: 455 if out and sep: 456 out.append(sep) 457 if isinstance( item, ParseResults ): 458 out += item._asStringList() 459 else: 460 out.append( _ustr(item) ) 461 return out
462
463 - def asList( self ):
464 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 465 out = [] 466 for res in self.__toklist: 467 if isinstance(res,ParseResults): 468 out.append( res.asList() ) 469 else: 470 out.append( res ) 471 return out
472
473 - def asDict( self ):
474 """Returns the named parse results as dictionary.""" 475 return dict( self.items() )
476
477 - def copy( self ):
478 """Returns a new copy of a C{ParseResults} object.""" 479 ret = ParseResults( self.__toklist ) 480 ret.__tokdict = self.__tokdict.copy() 481 ret.__parent = self.__parent 482 ret.__accumNames.update( self.__accumNames ) 483 ret.__name = self.__name 484 return ret
485
486 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
487 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 488 nl = "\n" 489 out = [] 490 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 491 for v in vlist ] ) 492 nextLevelIndent = indent + " " 493 494 # collapse out indents if formatting is not desired 495 if not formatted: 496 indent = "" 497 nextLevelIndent = "" 498 nl = "" 499 500 selfTag = None 501 if doctag is not None: 502 selfTag = doctag 503 else: 504 if self.__name: 505 selfTag = self.__name 506 507 if not selfTag: 508 if namedItemsOnly: 509 return "" 510 else: 511 selfTag = "ITEM" 512 513 out += [ nl, indent, "<", selfTag, ">" ] 514 515 worklist = self.__toklist 516 for i,res in enumerate(worklist): 517 if isinstance(res,ParseResults): 518 if i in namedItems: 519 out += [ res.asXML(namedItems[i], 520 namedItemsOnly and doctag is None, 521 nextLevelIndent, 522 formatted)] 523 else: 524 out += [ res.asXML(None, 525 namedItemsOnly and doctag is None, 526 nextLevelIndent, 527 formatted)] 528 else: 529 # individual token, see if there is a name for it 530 resTag = None 531 if i in namedItems: 532 resTag = namedItems[i] 533 if not resTag: 534 if namedItemsOnly: 535 continue 536 else: 537 resTag = "ITEM" 538 xmlBodyText = _xml_escape(_ustr(res)) 539 out += [ nl, nextLevelIndent, "<", resTag, ">", 540 xmlBodyText, 541 "</", resTag, ">" ] 542 543 out += [ nl, indent, "</", selfTag, ">" ] 544 return "".join(out)
545
546 - def __lookup(self,sub):
547 for k,vlist in self.__tokdict.items(): 548 for v,loc in vlist: 549 if sub is v: 550 return k 551 return None
552
553 - def getName(self):
554 """Returns the results name for this token expression.""" 555 if self.__name: 556 return self.__name 557 elif self.__parent: 558 par = self.__parent() 559 if par: 560 return par.__lookup(self) 561 else: 562 return None 563 elif (len(self) == 1 and 564 len(self.__tokdict) == 1 and 565 self.__tokdict.values()[0][0][1] in (0,-1)): 566 return self.__tokdict.keys()[0] 567 else: 568 return None
569
570 - def dump(self,indent='',depth=0):
571 """Diagnostic method for listing out the contents of a C{ParseResults}. 572 Accepts an optional C{indent} argument so that this string can be embedded 573 in a nested display of other data.""" 574 out = [] 575 out.append( indent+_ustr(self.asList()) ) 576 keys = self.items() 577 keys.sort() 578 for k,v in keys: 579 if out: 580 out.append('\n') 581 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 582 if isinstance(v,ParseResults): 583 if v.keys(): 584 out.append( v.dump(indent,depth+1) ) 585 else: 586 out.append(_ustr(v)) 587 else: 588 out.append(_ustr(v)) 589 return "".join(out)
590 591 # add support for pickle protocol
592 - def __getstate__(self):
593 return ( self.__toklist, 594 ( self.__tokdict.copy(), 595 self.__parent is not None and self.__parent() or None, 596 self.__accumNames, 597 self.__name ) )
598
599 - def __setstate__(self,state):
600 self.__toklist = state[0] 601 (self.__tokdict, 602 par, 603 inAccumNames, 604 self.__name) = state[1] 605 self.__accumNames = {} 606 self.__accumNames.update(inAccumNames) 607 if par is not None: 608 self.__parent = wkref(par) 609 else: 610 self.__parent = None
611
612 - def __dir__(self):
613 return dir(super(ParseResults,self)) + list(self.keys())
614
615 -def col (loc,strg):
616 """Returns current column within a string, counting newlines as line separators. 617 The first column is number 1. 618 619 Note: the default parsing behavior is to expand tabs in the input string 620 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 621 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 622 consistent view of the parsed string, the parse location, and line and column 623 positions within the parsed string. 624 """ 625 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
626
627 -def lineno(loc,strg):
628 """Returns current line number within a string, counting newlines as line separators. 629 The first line is number 1. 630 631 Note: the default parsing behavior is to expand tabs in the input string 632 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 633 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 634 consistent view of the parsed string, the parse location, and line and column 635 positions within the parsed string. 636 """ 637 return strg.count("\n",0,loc) + 1
638
639 -def line( loc, strg ):
640 """Returns the line of text containing loc within a string, counting newlines as line separators. 641 """ 642 lastCR = strg.rfind("\n", 0, loc) 643 nextCR = strg.find("\n", loc) 644 if nextCR >= 0: 645 return strg[lastCR+1:nextCR] 646 else: 647 return strg[lastCR+1:]
648
649 -def _defaultStartDebugAction( instring, loc, expr ):
650 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
651
652 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
653 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
654
655 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
656 print ("Exception raised:" + _ustr(exc))
657
658 -def nullDebugAction(*args):
659 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 660 pass
661 662 'decorator to trim function calls to match the arity of the target' 663 if not _PY3K:
664 - def _trim_arity(func, maxargs=2):
665 if func in singleArgBuiltins: 666 return lambda s,l,t: func(t) 667 limit = [0] 668 def wrapper(*args): 669 while 1: 670 try: 671 return func(*args[limit[0]:]) 672 except TypeError: 673 if limit[0] <= maxargs: 674 limit[0] += 1 675 continue 676 raise
677 return wrapper 678 else:
679 - def _trim_arity(func, maxargs=2):
680 if func in singleArgBuiltins: 681 return lambda s,l,t: func(t) 682 limit = maxargs 683 def wrapper(*args): 684 #~ nonlocal limit 685 while 1: 686 try: 687 return func(*args[limit:]) 688 except TypeError: 689 if limit: 690 limit -= 1 691 continue 692 raise
693 return wrapper 694
695 -class ParserElement(object):
696 """Abstract base level parser element class.""" 697 DEFAULT_WHITE_CHARS = " \n\t\r" 698 verbose_stacktrace = False 699
700 - def setDefaultWhitespaceChars( chars ):
701 """Overrides the default whitespace chars 702 """ 703 ParserElement.DEFAULT_WHITE_CHARS = chars
704 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 705
706 - def inlineLiteralsUsing(cls):
707 """ 708 Set class to be used for inclusion of string literals into a parser. 709 """ 710 ParserElement.literalStringClass = cls
711 inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) 712
713 - def __init__( self, savelist=False ):
714 self.parseAction = list() 715 self.failAction = None 716 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 717 self.strRepr = None 718 self.resultsName = None 719 self.saveAsList = savelist 720 self.skipWhitespace = True 721 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 722 self.copyDefaultWhiteChars = True 723 self.mayReturnEmpty = False # used when checking for left-recursion 724 self.keepTabs = False 725 self.ignoreExprs = list() 726 self.debug = False 727 self.streamlined = False 728 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 729 self.errmsg = "" 730 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 731 self.debugActions = ( None, None, None ) #custom debug actions 732 self.re = None 733 self.callPreparse = True # used to avoid redundant calls to preParse 734 self.callDuringTry = False
735
736 - def copy( self ):
737 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 738 for the same parsing pattern, using copies of the original parse element.""" 739 cpy = copy.copy( self ) 740 cpy.parseAction = self.parseAction[:] 741 cpy.ignoreExprs = self.ignoreExprs[:] 742 if self.copyDefaultWhiteChars: 743 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 744 return cpy
745
746 - def setName( self, name ):
747 """Define name for this expression, for use in debugging.""" 748 self.name = name 749 self.errmsg = "Expected " + self.name 750 if hasattr(self,"exception"): 751 self.exception.msg = self.errmsg 752 return self
753
754 - def setResultsName( self, name, listAllMatches=False ):
755 """Define name for referencing matching tokens as a nested attribute 756 of the returned parse results. 757 NOTE: this returns a *copy* of the original C{ParserElement} object; 758 this is so that the client can define a basic element, such as an 759 integer, and reference it in multiple places with different names. 760 761 You can also set results names using the abbreviated syntax, 762 C{expr("name")} in place of C{expr.setResultsName("name")} - 763 see L{I{__call__}<__call__>}. 764 """ 765 newself = self.copy() 766 if name.endswith("*"): 767 name = name[:-1] 768 listAllMatches=True 769 newself.resultsName = name 770 newself.modalResults = not listAllMatches 771 return newself
772
773 - def setBreak(self,breakFlag = True):
774 """Method to invoke the Python pdb debugger when this element is 775 about to be parsed. Set C{breakFlag} to True to enable, False to 776 disable. 777 """ 778 if breakFlag: 779 _parseMethod = self._parse 780 def breaker(instring, loc, doActions=True, callPreParse=True): 781 import pdb 782 pdb.set_trace() 783 return _parseMethod( instring, loc, doActions, callPreParse )
784 breaker._originalParseMethod = _parseMethod 785 self._parse = breaker 786 else: 787 if hasattr(self._parse,"_originalParseMethod"): 788 self._parse = self._parse._originalParseMethod 789 return self
790
791 - def setParseAction( self, *fns, **kwargs ):
792 """Define action to perform when successfully matching parse element definition. 793 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 794 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 795 - s = the original string being parsed (see note below) 796 - loc = the location of the matching substring 797 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 798 If the functions in fns modify the tokens, they can return them as the return 799 value from fn, and the modified list of tokens will replace the original. 800 Otherwise, fn does not need to return any value. 801 802 Note: the default parsing behavior is to expand tabs in the input string 803 before starting the parsing process. See L{I{parseString}<parseString>} for more information 804 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 805 consistent view of the parsed string, the parse location, and line and column 806 positions within the parsed string. 807 """ 808 self.parseAction = list(map(_trim_arity, list(fns))) 809 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 810 return self
811
812 - def addParseAction( self, *fns, **kwargs ):
813 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 814 self.parseAction += list(map(_trim_arity, list(fns))) 815 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 816 return self
817
818 - def setFailAction( self, fn ):
819 """Define action to perform if parsing fails at this expression. 820 Fail acton fn is a callable function that takes the arguments 821 C{fn(s,loc,expr,err)} where: 822 - s = string being parsed 823 - loc = location where expression match was attempted and failed 824 - expr = the parse expression that failed 825 - err = the exception thrown 826 The function returns no value. It may throw C{L{ParseFatalException}} 827 if it is desired to stop parsing immediately.""" 828 self.failAction = fn 829 return self
830
831 - def _skipIgnorables( self, instring, loc ):
832 exprsFound = True 833 while exprsFound: 834 exprsFound = False 835 for e in self.ignoreExprs: 836 try: 837 while 1: 838 loc,dummy = e._parse( instring, loc ) 839 exprsFound = True 840 except ParseException: 841 pass 842 return loc
843
844 - def preParse( self, instring, loc ):
845 if self.ignoreExprs: 846 loc = self._skipIgnorables( instring, loc ) 847 848 if self.skipWhitespace: 849 wt = self.whiteChars 850 instrlen = len(instring) 851 while loc < instrlen and instring[loc] in wt: 852 loc += 1 853 854 return loc
855
856 - def parseImpl( self, instring, loc, doActions=True ):
857 return loc, []
858
859 - def postParse( self, instring, loc, tokenlist ):
860 return tokenlist
861 862 #~ @profile
863 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
864 debugging = ( self.debug ) #and doActions ) 865 866 if debugging or self.failAction: 867 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 868 if (self.debugActions[0] ): 869 self.debugActions[0]( instring, loc, self ) 870 if callPreParse and self.callPreparse: 871 preloc = self.preParse( instring, loc ) 872 else: 873 preloc = loc 874 tokensStart = preloc 875 try: 876 try: 877 loc,tokens = self.parseImpl( instring, preloc, doActions ) 878 except IndexError: 879 raise ParseException( instring, len(instring), self.errmsg, self ) 880 except ParseBaseException: 881 #~ print ("Exception raised:", err) 882 err = None 883 if self.debugActions[2]: 884 err = sys.exc_info()[1] 885 self.debugActions[2]( instring, tokensStart, self, err ) 886 if self.failAction: 887 if err is None: 888 err = sys.exc_info()[1] 889 self.failAction( instring, tokensStart, self, err ) 890 raise 891 else: 892 if callPreParse and self.callPreparse: 893 preloc = self.preParse( instring, loc ) 894 else: 895 preloc = loc 896 tokensStart = preloc 897 if self.mayIndexError or loc >= len(instring): 898 try: 899 loc,tokens = self.parseImpl( instring, preloc, doActions ) 900 except IndexError: 901 raise ParseException( instring, len(instring), self.errmsg, self ) 902 else: 903 loc,tokens = self.parseImpl( instring, preloc, doActions ) 904 905 tokens = self.postParse( instring, loc, tokens ) 906 907 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 908 if self.parseAction and (doActions or self.callDuringTry): 909 if debugging: 910 try: 911 for fn in self.parseAction: 912 tokens = fn( instring, tokensStart, retTokens ) 913 if tokens is not None: 914 retTokens = ParseResults( tokens, 915 self.resultsName, 916 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 917 modal=self.modalResults ) 918 except ParseBaseException: 919 #~ print "Exception raised in user parse action:", err 920 if (self.debugActions[2] ): 921 err = sys.exc_info()[1] 922 self.debugActions[2]( instring, tokensStart, self, err ) 923 raise 924 else: 925 for fn in self.parseAction: 926 tokens = fn( instring, tokensStart, retTokens ) 927 if tokens is not None: 928 retTokens = ParseResults( tokens, 929 self.resultsName, 930 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 931 modal=self.modalResults ) 932 933 if debugging: 934 #~ print ("Matched",self,"->",retTokens.asList()) 935 if (self.debugActions[1] ): 936 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 937 938 return loc, retTokens
939
940 - def tryParse( self, instring, loc ):
941 try: 942 return self._parse( instring, loc, doActions=False )[0] 943 except ParseFatalException: 944 raise ParseException( instring, loc, self.errmsg, self)
945 946 # this method gets repeatedly called during backtracking with the same arguments - 947 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
948 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
949 lookup = (self,instring,loc,callPreParse,doActions) 950 if lookup in ParserElement._exprArgCache: 951 value = ParserElement._exprArgCache[ lookup ] 952 if isinstance(value, Exception): 953 raise value 954 return (value[0],value[1].copy()) 955 else: 956 try: 957 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 958 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 959 return value 960 except ParseBaseException: 961 pe = sys.exc_info()[1] 962 ParserElement._exprArgCache[ lookup ] = pe 963 raise
964 965 _parse = _parseNoCache 966 967 # argument cache for optimizing repeated calls when backtracking through recursive expressions 968 _exprArgCache = {}
969 - def resetCache():
970 ParserElement._exprArgCache.clear()
971 resetCache = staticmethod(resetCache) 972 973 _packratEnabled = False
974 - def enablePackrat():
975 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 976 Repeated parse attempts at the same string location (which happens 977 often in many complex grammars) can immediately return a cached value, 978 instead of re-executing parsing/validating code. Memoizing is done of 979 both valid results and parsing exceptions. 980 981 This speedup may break existing programs that use parse actions that 982 have side-effects. For this reason, packrat parsing is disabled when 983 you first import pyparsing. To activate the packrat feature, your 984 program must call the class method C{ParserElement.enablePackrat()}. If 985 your program uses C{psyco} to "compile as you go", you must call 986 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 987 Python will crash. For best results, call C{enablePackrat()} immediately 988 after importing pyparsing. 989 """ 990 if not ParserElement._packratEnabled: 991 ParserElement._packratEnabled = True 992 ParserElement._parse = ParserElement._parseCache
993 enablePackrat = staticmethod(enablePackrat) 994
995 - def parseString( self, instring, parseAll=False ):
996 """Execute the parse expression with the given string. 997 This is the main interface to the client code, once the complete 998 expression has been built. 999 1000 If you want the grammar to require that the entire input string be 1001 successfully parsed, then set C{parseAll} to True (equivalent to ending 1002 the grammar with C{L{StringEnd()}}). 1003 1004 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1005 in order to report proper column numbers in parse actions. 1006 If the input string contains tabs and 1007 the grammar uses parse actions that use the C{loc} argument to index into the 1008 string being parsed, you can ensure you have a consistent view of the input 1009 string by: 1010 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1011 (see L{I{parseWithTabs}<parseWithTabs>}) 1012 - define your parse action using the full C{(s,loc,toks)} signature, and 1013 reference the input string using the parse action's C{s} argument 1014 - explictly expand the tabs in your input string before calling 1015 C{parseString} 1016 """ 1017 ParserElement.resetCache() 1018 if not self.streamlined: 1019 self.streamline() 1020 #~ self.saveAsList = True 1021 for e in self.ignoreExprs: 1022 e.streamline() 1023 if not self.keepTabs: 1024 instring = instring.expandtabs() 1025 try: 1026 loc, tokens = self._parse( instring, 0 ) 1027 if parseAll: 1028 loc = self.preParse( instring, loc ) 1029 se = Empty() + StringEnd() 1030 se._parse( instring, loc ) 1031 except ParseBaseException: 1032 if ParserElement.verbose_stacktrace: 1033 raise 1034 else: 1035 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1036 exc = sys.exc_info()[1] 1037 raise exc 1038 else: 1039 return tokens
1040
1041 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1042 """Scan the input string for expression matches. Each match will return the 1043 matching tokens, start location, and end location. May be called with optional 1044 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1045 C{overlap} is specified, then overlapping matches will be reported. 1046 1047 Note that the start and end locations are reported relative to the string 1048 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1049 strings with embedded tabs.""" 1050 if not self.streamlined: 1051 self.streamline() 1052 for e in self.ignoreExprs: 1053 e.streamline() 1054 1055 if not self.keepTabs: 1056 instring = _ustr(instring).expandtabs() 1057 instrlen = len(instring) 1058 loc = 0 1059 preparseFn = self.preParse 1060 parseFn = self._parse 1061 ParserElement.resetCache() 1062 matches = 0 1063 try: 1064 while loc <= instrlen and matches < maxMatches: 1065 try: 1066 preloc = preparseFn( instring, loc ) 1067 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1068 except ParseException: 1069 loc = preloc+1 1070 else: 1071 if nextLoc > loc: 1072 matches += 1 1073 yield tokens, preloc, nextLoc 1074 if overlap: 1075 nextloc = preparseFn( instring, loc ) 1076 if nextloc > loc: 1077 loc = nextLoc 1078 else: 1079 loc += 1 1080 else: 1081 loc = nextLoc 1082 else: 1083 loc = preloc+1 1084 except ParseBaseException: 1085 if ParserElement.verbose_stacktrace: 1086 raise 1087 else: 1088 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1089 exc = sys.exc_info()[1] 1090 raise exc
1091
1092 - def transformString( self, instring ):
1093 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1094 be returned from a parse action. To use C{transformString}, define a grammar and 1095 attach a parse action to it that modifies the returned token list. 1096 Invoking C{transformString()} on a target string will then scan for matches, 1097 and replace the matched text patterns according to the logic in the parse 1098 action. C{transformString()} returns the resulting transformed string.""" 1099 out = [] 1100 lastE = 0 1101 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1102 # keep string locs straight between transformString and scanString 1103 self.keepTabs = True 1104 try: 1105 for t,s,e in self.scanString( instring ): 1106 out.append( instring[lastE:s] ) 1107 if t: 1108 if isinstance(t,ParseResults): 1109 out += t.asList() 1110 elif isinstance(t,list): 1111 out += t 1112 else: 1113 out.append(t) 1114 lastE = e 1115 out.append(instring[lastE:]) 1116 out = [o for o in out if o] 1117 return "".join(map(_ustr,_flatten(out))) 1118 except ParseBaseException: 1119 if ParserElement.verbose_stacktrace: 1120 raise 1121 else: 1122 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1123 exc = sys.exc_info()[1] 1124 raise exc
1125
1126 - def searchString( self, instring, maxMatches=_MAX_INT ):
1127 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1128 to match the given parse expression. May be called with optional 1129 C{maxMatches} argument, to clip searching after 'n' matches are found. 1130 """ 1131 try: 1132 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1133 except ParseBaseException: 1134 if ParserElement.verbose_stacktrace: 1135 raise 1136 else: 1137 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1138 exc = sys.exc_info()[1] 1139 raise exc
1140
1141 - def __add__(self, other ):
1142 """Implementation of + operator - returns C{L{And}}""" 1143 if isinstance( other, basestring ): 1144 other = ParserElement.literalStringClass( other ) 1145 if not isinstance( other, ParserElement ): 1146 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1147 SyntaxWarning, stacklevel=2) 1148 return None 1149 return And( [ self, other ] )
1150
1151 - def __radd__(self, other ):
1152 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1153 if isinstance( other, basestring ): 1154 other = ParserElement.literalStringClass( other ) 1155 if not isinstance( other, ParserElement ): 1156 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1157 SyntaxWarning, stacklevel=2) 1158 return None 1159 return other + self
1160
1161 - def __sub__(self, other):
1162 """Implementation of - operator, returns C{L{And}} with error stop""" 1163 if isinstance( other, basestring ): 1164 other = ParserElement.literalStringClass( other ) 1165 if not isinstance( other, ParserElement ): 1166 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1167 SyntaxWarning, stacklevel=2) 1168 return None 1169 return And( [ self, And._ErrorStop(), other ] )
1170
1171 - def __rsub__(self, other ):
1172 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1173 if isinstance( other, basestring ): 1174 other = ParserElement.literalStringClass( other ) 1175 if not isinstance( other, ParserElement ): 1176 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1177 SyntaxWarning, stacklevel=2) 1178 return None 1179 return other - self
1180
1181 - def __mul__(self,other):
1182 """Implementation of * operator, allows use of C{expr * 3} in place of 1183 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1184 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1185 may also include C{None} as in: 1186 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1187 to C{expr*n + L{ZeroOrMore}(expr)} 1188 (read as "at least n instances of C{expr}") 1189 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1190 (read as "0 to n instances of C{expr}") 1191 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1192 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1193 1194 Note that C{expr*(None,n)} does not raise an exception if 1195 more than n exprs exist in the input stream; that is, 1196 C{expr*(None,n)} does not enforce a maximum number of expr 1197 occurrences. If this behavior is desired, then write 1198 C{expr*(None,n) + ~expr} 1199 1200 """ 1201 if isinstance(other,int): 1202 minElements, optElements = other,0 1203 elif isinstance(other,tuple): 1204 other = (other + (None, None))[:2] 1205 if other[0] is None: 1206 other = (0, other[1]) 1207 if isinstance(other[0],int) and other[1] is None: 1208 if other[0] == 0: 1209 return ZeroOrMore(self) 1210 if other[0] == 1: 1211 return OneOrMore(self) 1212 else: 1213 return self*other[0] + ZeroOrMore(self) 1214 elif isinstance(other[0],int) and isinstance(other[1],int): 1215 minElements, optElements = other 1216 optElements -= minElements 1217 else: 1218 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1219 else: 1220 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1221 1222 if minElements < 0: 1223 raise ValueError("cannot multiply ParserElement by negative value") 1224 if optElements < 0: 1225 raise ValueError("second tuple value must be greater or equal to first tuple value") 1226 if minElements == optElements == 0: 1227 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1228 1229 if (optElements): 1230 def makeOptionalList(n): 1231 if n>1: 1232 return Optional(self + makeOptionalList(n-1)) 1233 else: 1234 return Optional(self)
1235 if minElements: 1236 if minElements == 1: 1237 ret = self + makeOptionalList(optElements) 1238 else: 1239 ret = And([self]*minElements) + makeOptionalList(optElements) 1240 else: 1241 ret = makeOptionalList(optElements) 1242 else: 1243 if minElements == 1: 1244 ret = self 1245 else: 1246 ret = And([self]*minElements) 1247 return ret 1248
1249 - def __rmul__(self, other):
1250 return self.__mul__(other)
1251
1252 - def __or__(self, other ):
1253 """Implementation of | operator - returns C{L{MatchFirst}}""" 1254 if isinstance( other, basestring ): 1255 other = ParserElement.literalStringClass( other ) 1256 if not isinstance( other, ParserElement ): 1257 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1258 SyntaxWarning, stacklevel=2) 1259 return None 1260 return MatchFirst( [ self, other ] )
1261
1262 - def __ror__(self, other ):
1263 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1264 if isinstance( other, basestring ): 1265 other = ParserElement.literalStringClass( other ) 1266 if not isinstance( other, ParserElement ): 1267 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1268 SyntaxWarning, stacklevel=2) 1269 return None 1270 return other | self
1271
1272 - def __xor__(self, other ):
1273 """Implementation of ^ operator - returns C{L{Or}}""" 1274 if isinstance( other, basestring ): 1275 other = ParserElement.literalStringClass( other ) 1276 if not isinstance( other, ParserElement ): 1277 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1278 SyntaxWarning, stacklevel=2) 1279 return None 1280 return Or( [ self, other ] )
1281
1282 - def __rxor__(self, other ):
1283 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1284 if isinstance( other, basestring ): 1285 other = ParserElement.literalStringClass( other ) 1286 if not isinstance( other, ParserElement ): 1287 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1288 SyntaxWarning, stacklevel=2) 1289 return None 1290 return other ^ self
1291
1292 - def __and__(self, other ):
1293 """Implementation of & operator - returns C{L{Each}}""" 1294 if isinstance( other, basestring ): 1295 other = ParserElement.literalStringClass( other ) 1296 if not isinstance( other, ParserElement ): 1297 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1298 SyntaxWarning, stacklevel=2) 1299 return None 1300 return Each( [ self, other ] )
1301
1302 - def __rand__(self, other ):
1303 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1304 if isinstance( other, basestring ): 1305 other = ParserElement.literalStringClass( other ) 1306 if not isinstance( other, ParserElement ): 1307 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1308 SyntaxWarning, stacklevel=2) 1309 return None 1310 return other & self
1311
1312 - def __invert__( self ):
1313 """Implementation of ~ operator - returns C{L{NotAny}}""" 1314 return NotAny( self )
1315
1316 - def __call__(self, name):
1317 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1318 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1319 could be written as:: 1320 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1321 1322 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1323 passed as C{True}. 1324 """ 1325 return self.setResultsName(name)
1326
1327 - def suppress( self ):
1328 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1329 cluttering up returned output. 1330 """ 1331 return Suppress( self )
1332
1333 - def leaveWhitespace( self ):
1334 """Disables the skipping of whitespace before matching the characters in the 1335 C{ParserElement}'s defined pattern. This is normally only used internally by 1336 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1337 """ 1338 self.skipWhitespace = False 1339 return self
1340
1341 - def setWhitespaceChars( self, chars ):
1342 """Overrides the default whitespace chars 1343 """ 1344 self.skipWhitespace = True 1345 self.whiteChars = chars 1346 self.copyDefaultWhiteChars = False 1347 return self
1348
1349 - def parseWithTabs( self ):
1350 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1351 Must be called before C{parseString} when the input grammar contains elements that 1352 match C{<TAB>} characters.""" 1353 self.keepTabs = True 1354 return self
1355
1356 - def ignore( self, other ):
1357 """Define expression to be ignored (e.g., comments) while doing pattern 1358 matching; may be called repeatedly, to define multiple comment or other 1359 ignorable patterns. 1360 """ 1361 if isinstance( other, Suppress ): 1362 if other not in self.ignoreExprs: 1363 self.ignoreExprs.append( other.copy() ) 1364 else: 1365 self.ignoreExprs.append( Suppress( other.copy() ) ) 1366 return self
1367
1368 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1369 """Enable display of debugging messages while doing pattern matching.""" 1370 self.debugActions = (startAction or _defaultStartDebugAction, 1371 successAction or _defaultSuccessDebugAction, 1372 exceptionAction or _defaultExceptionDebugAction) 1373 self.debug = True 1374 return self
1375
1376 - def setDebug( self, flag=True ):
1377 """Enable display of debugging messages while doing pattern matching. 1378 Set C{flag} to True to enable, False to disable.""" 1379 if flag: 1380 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1381 else: 1382 self.debug = False 1383 return self
1384
1385 - def __str__( self ):
1386 return self.name
1387
1388 - def __repr__( self ):
1389 return _ustr(self)
1390
1391 - def streamline( self ):
1392 self.streamlined = True 1393 self.strRepr = None 1394 return self
1395
1396 - def checkRecursion( self, parseElementList ):
1397 pass
1398
1399 - def validate( self, validateTrace=[] ):
1400 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1401 self.checkRecursion( [] )
1402
1403 - def parseFile( self, file_or_filename, parseAll=False ):
1404 """Execute the parse expression on the given file or filename. 1405 If a filename is specified (instead of a file object), 1406 the entire file is opened, read, and closed before parsing. 1407 """ 1408 try: 1409 file_contents = file_or_filename.read() 1410 except AttributeError: 1411 f = open(file_or_filename, "r") 1412 file_contents = f.read() 1413 f.close() 1414 try: 1415 return self.parseString(file_contents, parseAll) 1416 except ParseBaseException: 1417 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1418 exc = sys.exc_info()[1] 1419 raise exc
1420
1421 - def getException(self):
1422 return ParseException("",0,self.errmsg,self)
1423
1424 - def __getattr__(self,aname):
1425 if aname == "myException": 1426 self.myException = ret = self.getException(); 1427 return ret; 1428 else: 1429 raise AttributeError("no such attribute " + aname)
1430
1431 - def __eq__(self,other):
1432 if isinstance(other, ParserElement): 1433 return self is other or self.__dict__ == other.__dict__ 1434 elif isinstance(other, basestring): 1435 try: 1436 self.parseString(_ustr(other), parseAll=True) 1437 return True 1438 except ParseBaseException: 1439 return False 1440 else: 1441 return super(ParserElement,self)==other
1442
1443 - def __ne__(self,other):
1444 return not (self == other)
1445
1446 - def __hash__(self):
1447 return hash(id(self))
1448
1449 - def __req__(self,other):
1450 return self == other
1451
1452 - def __rne__(self,other):
1453 return not (self == other)
1454 1455
1456 -class Token(ParserElement):
1457 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1458 - def __init__( self ):
1459 super(Token,self).__init__( savelist=False )
1460
1461 - def setName(self, name):
1462 s = super(Token,self).setName(name) 1463 self.errmsg = "Expected " + self.name 1464 return s
1465 1466
1467 -class Empty(Token):
1468 """An empty token, will always match."""
1469 - def __init__( self ):
1470 super(Empty,self).__init__() 1471 self.name = "Empty" 1472 self.mayReturnEmpty = True 1473 self.mayIndexError = False
1474 1475
1476 -class NoMatch(Token):
1477 """A token that will never match."""
1478 - def __init__( self ):
1479 super(NoMatch,self).__init__() 1480 self.name = "NoMatch" 1481 self.mayReturnEmpty = True 1482 self.mayIndexError = False 1483 self.errmsg = "Unmatchable token"
1484
1485 - def parseImpl( self, instring, loc, doActions=True ):
1486 exc = self.myException 1487 exc.loc = loc 1488 exc.pstr = instring 1489 raise exc
1490 1491
1492 -class Literal(Token):
1493 """Token to exactly match a specified string."""
1494 - def __init__( self, matchString ):
1495 super(Literal,self).__init__() 1496 self.match = matchString 1497 self.matchLen = len(matchString) 1498 try: 1499 self.firstMatchChar = matchString[0] 1500 except IndexError: 1501 warnings.warn("null string passed to Literal; use Empty() instead", 1502 SyntaxWarning, stacklevel=2) 1503 self.__class__ = Empty 1504 self.name = '"%s"' % _ustr(self.match) 1505 self.errmsg = "Expected " + self.name 1506 self.mayReturnEmpty = False 1507 self.mayIndexError = False
1508 1509 # Performance tuning: this routine gets called a *lot* 1510 # if this is a single character match string and the first character matches, 1511 # short-circuit as quickly as possible, and avoid calling startswith 1512 #~ @profile
1513 - def parseImpl( self, instring, loc, doActions=True ):
1514 if (instring[loc] == self.firstMatchChar and 1515 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1516 return loc+self.matchLen, self.match 1517 #~ raise ParseException( instring, loc, self.errmsg ) 1518 exc = self.myException 1519 exc.loc = loc 1520 exc.pstr = instring 1521 raise exc
1522 _L = Literal 1523 ParserElement.literalStringClass = Literal 1524
1525 -class Keyword(Token):
1526 """Token to exactly match a specified string as a keyword, that is, it must be 1527 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1528 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1529 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1530 Accepts two optional constructor arguments in addition to the keyword string: 1531 C{identChars} is a string of characters that would be valid identifier characters, 1532 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1533 matching, default is C{False}. 1534 """ 1535 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1536
1537 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1538 super(Keyword,self).__init__() 1539 self.match = matchString 1540 self.matchLen = len(matchString) 1541 try: 1542 self.firstMatchChar = matchString[0] 1543 except IndexError: 1544 warnings.warn("null string passed to Keyword; use Empty() instead", 1545 SyntaxWarning, stacklevel=2) 1546 self.name = '"%s"' % self.match 1547 self.errmsg = "Expected " + self.name 1548 self.mayReturnEmpty = False 1549 self.mayIndexError = False 1550 self.caseless = caseless 1551 if caseless: 1552 self.caselessmatch = matchString.upper() 1553 identChars = identChars.upper() 1554 self.identChars = set(identChars)
1555
1556 - def parseImpl( self, instring, loc, doActions=True ):
1557 if self.caseless: 1558 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1560 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1561 return loc+self.matchLen, self.match 1562 else: 1563 if (instring[loc] == self.firstMatchChar and 1564 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1565 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1566 (loc == 0 or instring[loc-1] not in self.identChars) ): 1567 return loc+self.matchLen, self.match 1568 #~ raise ParseException( instring, loc, self.errmsg ) 1569 exc = self.myException 1570 exc.loc = loc 1571 exc.pstr = instring 1572 raise exc
1573
1574 - def copy(self):
1575 c = super(Keyword,self).copy() 1576 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1577 return c
1578
1579 - def setDefaultKeywordChars( chars ):
1580 """Overrides the default Keyword chars 1581 """ 1582 Keyword.DEFAULT_KEYWORD_CHARS = chars
1583 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1584
1585 -class CaselessLiteral(Literal):
1586 """Token to match a specified string, ignoring case of letters. 1587 Note: the matched results will always be in the case of the given 1588 match string, NOT the case of the input text. 1589 """
1590 - def __init__( self, matchString ):
1591 super(CaselessLiteral,self).__init__( matchString.upper() ) 1592 # Preserve the defining literal. 1593 self.returnString = matchString 1594 self.name = "'%s'" % self.returnString 1595 self.errmsg = "Expected " + self.name
1596
1597 - def parseImpl( self, instring, loc, doActions=True ):
1598 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1599 return loc+self.matchLen, self.returnString 1600 #~ raise ParseException( instring, loc, self.errmsg ) 1601 exc = self.myException 1602 exc.loc = loc 1603 exc.pstr = instring 1604 raise exc
1605
1606 -class CaselessKeyword(Keyword):
1607 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1608 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1609
1610 - def parseImpl( self, instring, loc, doActions=True ):
1611 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1612 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1613 return loc+self.matchLen, self.match 1614 #~ raise ParseException( instring, loc, self.errmsg ) 1615 exc = self.myException 1616 exc.loc = loc 1617 exc.pstr = instring 1618 raise exc
1619
1620 -class Word(Token):
1621 """Token for matching words composed of allowed character sets. 1622 Defined with string containing all allowed initial characters, 1623 an optional string containing allowed body characters (if omitted, 1624 defaults to the initial character set), and an optional minimum, 1625 maximum, and/or exact length. The default value for C{min} is 1 (a 1626 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1627 are 0, meaning no maximum or exact length restriction. An optional 1628 C{exclude} parameter can list characters that might be found in 1629 the input C{bodyChars} string; useful to define a word of all printables 1630 except for one or two characters, for instance. 1631 """
1632 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1633 super(Word,self).__init__() 1634 if excludeChars: 1635 initChars = ''.join([c for c in initChars if c not in excludeChars]) 1636 if bodyChars: 1637 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) 1638 self.initCharsOrig = initChars 1639 self.initChars = set(initChars) 1640 if bodyChars : 1641 self.bodyCharsOrig = bodyChars 1642 self.bodyChars = set(bodyChars) 1643 else: 1644 self.bodyCharsOrig = initChars 1645 self.bodyChars = set(initChars) 1646 1647 self.maxSpecified = max > 0 1648 1649 if min < 1: 1650 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1651 1652 self.minLen = min 1653 1654 if max > 0: 1655 self.maxLen = max 1656 else: 1657 self.maxLen = _MAX_INT 1658 1659 if exact > 0: 1660 self.maxLen = exact 1661 self.minLen = exact 1662 1663 self.name = _ustr(self) 1664 self.errmsg = "Expected " + self.name 1665 self.mayIndexError = False 1666 self.asKeyword = asKeyword 1667 1668 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1669 if self.bodyCharsOrig == self.initCharsOrig: 1670 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1671 elif len(self.bodyCharsOrig) == 1: 1672 self.reString = "%s[%s]*" % \ 1673 (re.escape(self.initCharsOrig), 1674 _escapeRegexRangeChars(self.bodyCharsOrig),) 1675 else: 1676 self.reString = "[%s][%s]*" % \ 1677 (_escapeRegexRangeChars(self.initCharsOrig), 1678 _escapeRegexRangeChars(self.bodyCharsOrig),) 1679 if self.asKeyword: 1680 self.reString = r"\b"+self.reString+r"\b" 1681 try: 1682 self.re = re.compile( self.reString ) 1683 except: 1684 self.re = None
1685
1686 - def parseImpl( self, instring, loc, doActions=True ):
1687 if self.re: 1688 result = self.re.match(instring,loc) 1689 if not result: 1690 exc = self.myException 1691 exc.loc = loc 1692 exc.pstr = instring 1693 raise exc 1694 1695 loc = result.end() 1696 return loc, result.group() 1697 1698 if not(instring[ loc ] in self.initChars): 1699 #~ raise ParseException( instring, loc, self.errmsg ) 1700 exc = self.myException 1701 exc.loc = loc 1702 exc.pstr = instring 1703 raise exc 1704 start = loc 1705 loc += 1 1706 instrlen = len(instring) 1707 bodychars = self.bodyChars 1708 maxloc = start + self.maxLen 1709 maxloc = min( maxloc, instrlen ) 1710 while loc < maxloc and instring[loc] in bodychars: 1711 loc += 1 1712 1713 throwException = False 1714 if loc - start < self.minLen: 1715 throwException = True 1716 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1717 throwException = True 1718 if self.asKeyword: 1719 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1720 throwException = True 1721 1722 if throwException: 1723 #~ raise ParseException( instring, loc, self.errmsg ) 1724 exc = self.myException 1725 exc.loc = loc 1726 exc.pstr = instring 1727 raise exc 1728 1729 return loc, instring[start:loc]
1730
1731 - def __str__( self ):
1732 try: 1733 return super(Word,self).__str__() 1734 except: 1735 pass 1736 1737 1738 if self.strRepr is None: 1739 1740 def charsAsStr(s): 1741 if len(s)>4: 1742 return s[:4]+"..." 1743 else: 1744 return s
1745 1746 if ( self.initCharsOrig != self.bodyCharsOrig ): 1747 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1748 else: 1749 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1750 1751 return self.strRepr
1752 1753
1754 -class Regex(Token):
1755 """Token for matching strings that match a given regular expression. 1756 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1757 """ 1758 compiledREtype = type(re.compile("[A-Z]"))
1759 - def __init__( self, pattern, flags=0):
1760 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1761 super(Regex,self).__init__() 1762 1763 if isinstance(pattern, basestring): 1764 if len(pattern) == 0: 1765 warnings.warn("null string passed to Regex; use Empty() instead", 1766 SyntaxWarning, stacklevel=2) 1767 1768 self.pattern = pattern 1769 self.flags = flags 1770 1771 try: 1772 self.re = re.compile(self.pattern, self.flags) 1773 self.reString = self.pattern 1774 except sre_constants.error: 1775 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1776 SyntaxWarning, stacklevel=2) 1777 raise 1778 1779 elif isinstance(pattern, Regex.compiledREtype): 1780 self.re = pattern 1781 self.pattern = \ 1782 self.reString = str(pattern) 1783 self.flags = flags 1784 1785 else: 1786 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1787 1788 self.name = _ustr(self) 1789 self.errmsg = "Expected " + self.name 1790 self.mayIndexError = False 1791 self.mayReturnEmpty = True
1792
1793 - def parseImpl( self, instring, loc, doActions=True ):
1794 result = self.re.match(instring,loc) 1795 if not result: 1796 exc = self.myException 1797 exc.loc = loc 1798 exc.pstr = instring 1799 raise exc 1800 1801 loc = result.end() 1802 d = result.groupdict() 1803 ret = ParseResults(result.group()) 1804 if d: 1805 for k in d: 1806 ret[k] = d[k] 1807 return loc,ret
1808
1809 - def __str__( self ):
1810 try: 1811 return super(Regex,self).__str__() 1812 except: 1813 pass 1814 1815 if self.strRepr is None: 1816 self.strRepr = "Re:(%s)" % repr(self.pattern) 1817 1818 return self.strRepr
1819 1820
1821 -class QuotedString(Token):
1822 """Token for matching strings that are delimited by quoting characters. 1823 """
1824 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1825 """ 1826 Defined with the following parameters: 1827 - quoteChar - string of one or more characters defining the quote delimiting string 1828 - escChar - character to escape quotes, typically backslash (default=None) 1829 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1830 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1831 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1832 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1833 """ 1834 super(QuotedString,self).__init__() 1835 1836 # remove white space from quote chars - wont work anyway 1837 quoteChar = quoteChar.strip() 1838 if len(quoteChar) == 0: 1839 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1840 raise SyntaxError() 1841 1842 if endQuoteChar is None: 1843 endQuoteChar = quoteChar 1844 else: 1845 endQuoteChar = endQuoteChar.strip() 1846 if len(endQuoteChar) == 0: 1847 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1848 raise SyntaxError() 1849 1850 self.quoteChar = quoteChar 1851 self.quoteCharLen = len(quoteChar) 1852 self.firstQuoteChar = quoteChar[0] 1853 self.endQuoteChar = endQuoteChar 1854 self.endQuoteCharLen = len(endQuoteChar) 1855 self.escChar = escChar 1856 self.escQuote = escQuote 1857 self.unquoteResults = unquoteResults 1858 1859 if multiline: 1860 self.flags = re.MULTILINE | re.DOTALL 1861 self.pattern = r'%s(?:[^%s%s]' % \ 1862 ( re.escape(self.quoteChar), 1863 _escapeRegexRangeChars(self.endQuoteChar[0]), 1864 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1865 else: 1866 self.flags = 0 1867 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1868 ( re.escape(self.quoteChar), 1869 _escapeRegexRangeChars(self.endQuoteChar[0]), 1870 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1871 if len(self.endQuoteChar) > 1: 1872 self.pattern += ( 1873 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1874 _escapeRegexRangeChars(self.endQuoteChar[i])) 1875 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1876 ) 1877 if escQuote: 1878 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1879 if escChar: 1880 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1881 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') 1882 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) 1883 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1884 1885 try: 1886 self.re = re.compile(self.pattern, self.flags) 1887 self.reString = self.pattern 1888 except sre_constants.error: 1889 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1890 SyntaxWarning, stacklevel=2) 1891 raise 1892 1893 self.name = _ustr(self) 1894 self.errmsg = "Expected " + self.name 1895 self.mayIndexError = False 1896 self.mayReturnEmpty = True
1897
1898 - def parseImpl( self, instring, loc, doActions=True ):
1899 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1900 if not result: 1901 exc = self.myException 1902 exc.loc = loc 1903 exc.pstr = instring 1904 raise exc 1905 1906 loc = result.end() 1907 ret = result.group() 1908 1909 if self.unquoteResults: 1910 1911 # strip off quotes 1912 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1913 1914 if isinstance(ret,basestring): 1915 # replace escaped characters 1916 if self.escChar: 1917 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1918 1919 # replace escaped quotes 1920 if self.escQuote: 1921 ret = ret.replace(self.escQuote, self.endQuoteChar) 1922 1923 return loc, ret
1924
1925 - def __str__( self ):
1926 try: 1927 return super(QuotedString,self).__str__() 1928 except: 1929 pass 1930 1931 if self.strRepr is None: 1932 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1933 1934 return self.strRepr
1935 1936
1937 -class CharsNotIn(Token):
1938 """Token for matching words composed of characters *not* in a given set. 1939 Defined with string containing all disallowed characters, and an optional 1940 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 1941 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1942 are 0, meaning no maximum or exact length restriction. 1943 """
1944 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1945 super(CharsNotIn,self).__init__() 1946 self.skipWhitespace = False 1947 self.notChars = notChars 1948 1949 if min < 1: 1950 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1951 1952 self.minLen = min 1953 1954 if max > 0: 1955 self.maxLen = max 1956 else: 1957 self.maxLen = _MAX_INT 1958 1959 if exact > 0: 1960 self.maxLen = exact 1961 self.minLen = exact 1962 1963 self.name = _ustr(self) 1964 self.errmsg = "Expected " + self.name 1965 self.mayReturnEmpty = ( self.minLen == 0 ) 1966 self.mayIndexError = False
1967
1968 - def parseImpl( self, instring, loc, doActions=True ):
1969 if instring[loc] in self.notChars: 1970 #~ raise ParseException( instring, loc, self.errmsg ) 1971 exc = self.myException 1972 exc.loc = loc 1973 exc.pstr = instring 1974 raise exc 1975 1976 start = loc 1977 loc += 1 1978 notchars = self.notChars 1979 maxlen = min( start+self.maxLen, len(instring) ) 1980 while loc < maxlen and \ 1981 (instring[loc] not in notchars): 1982 loc += 1 1983 1984 if loc - start < self.minLen: 1985 #~ raise ParseException( instring, loc, self.errmsg ) 1986 exc = self.myException 1987 exc.loc = loc 1988 exc.pstr = instring 1989 raise exc 1990 1991 return loc, instring[start:loc]
1992
1993 - def __str__( self ):
1994 try: 1995 return super(CharsNotIn, self).__str__() 1996 except: 1997 pass 1998 1999 if self.strRepr is None: 2000 if len(self.notChars) > 4: 2001 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2002 else: 2003 self.strRepr = "!W:(%s)" % self.notChars 2004 2005 return self.strRepr
2006
2007 -class White(Token):
2008 """Special matching class for matching whitespace. Normally, whitespace is ignored 2009 by pyparsing grammars. This class is included when some whitespace structures 2010 are significant. Define with a string containing the whitespace characters to be 2011 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2012 as defined for the C{L{Word}} class.""" 2013 whiteStrs = { 2014 " " : "<SPC>", 2015 "\t": "<TAB>", 2016 "\n": "<LF>", 2017 "\r": "<CR>", 2018 "\f": "<FF>", 2019 }
2020 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2021 super(White,self).__init__() 2022 self.matchWhite = ws 2023 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 2024 #~ self.leaveWhitespace() 2025 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 2026 self.mayReturnEmpty = True 2027 self.errmsg = "Expected " + self.name 2028 2029 self.minLen = min 2030 2031 if max > 0: 2032 self.maxLen = max 2033 else: 2034 self.maxLen = _MAX_INT 2035 2036 if exact > 0: 2037 self.maxLen = exact 2038 self.minLen = exact
2039
2040 - def parseImpl( self, instring, loc, doActions=True ):
2041 if not(instring[ loc ] in self.matchWhite): 2042 #~ raise ParseException( instring, loc, self.errmsg ) 2043 exc = self.myException 2044 exc.loc = loc 2045 exc.pstr = instring 2046 raise exc 2047 start = loc 2048 loc += 1 2049 maxloc = start + self.maxLen 2050 maxloc = min( maxloc, len(instring) ) 2051 while loc < maxloc and instring[loc] in self.matchWhite: 2052 loc += 1 2053 2054 if loc - start < self.minLen: 2055 #~ raise ParseException( instring, loc, self.errmsg ) 2056 exc = self.myException 2057 exc.loc = loc 2058 exc.pstr = instring 2059 raise exc 2060 2061 return loc, instring[start:loc]
2062 2063
2064 -class _PositionToken(Token):
2065 - def __init__( self ):
2066 super(_PositionToken,self).__init__() 2067 self.name=self.__class__.__name__ 2068 self.mayReturnEmpty = True 2069 self.mayIndexError = False
2070
2071 -class GoToColumn(_PositionToken):
2072 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2073 - def __init__( self, colno ):
2074 super(GoToColumn,self).__init__() 2075 self.col = colno
2076
2077 - def preParse( self, instring, loc ):
2078 if col(loc,instring) != self.col: 2079 instrlen = len(instring) 2080 if self.ignoreExprs: 2081 loc = self._skipIgnorables( instring, loc ) 2082 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2083 loc += 1 2084 return loc
2085
2086 - def parseImpl( self, instring, loc, doActions=True ):
2087 thiscol = col( loc, instring ) 2088 if thiscol > self.col: 2089 raise ParseException( instring, loc, "Text not in expected column", self ) 2090 newloc = loc + self.col - thiscol 2091 ret = instring[ loc: newloc ] 2092 return newloc, ret
2093
2094 -class LineStart(_PositionToken):
2095 """Matches if current position is at the beginning of a line within the parse string"""
2096 - def __init__( self ):
2097 super(LineStart,self).__init__() 2098 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2099 self.errmsg = "Expected start of line"
2100
2101 - def preParse( self, instring, loc ):
2102 preloc = super(LineStart,self).preParse(instring,loc) 2103 if instring[preloc] == "\n": 2104 loc += 1 2105 return loc
2106
2107 - def parseImpl( self, instring, loc, doActions=True ):
2108 if not( loc==0 or 2109 (loc == self.preParse( instring, 0 )) or 2110 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2111 #~ raise ParseException( instring, loc, "Expected start of line" ) 2112 exc = self.myException 2113 exc.loc = loc 2114 exc.pstr = instring 2115 raise exc 2116 return loc, []
2117
2118 -class LineEnd(_PositionToken):
2119 """Matches if current position is at the end of a line within the parse string"""
2120 - def __init__( self ):
2121 super(LineEnd,self).__init__() 2122 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2123 self.errmsg = "Expected end of line"
2124
2125 - def parseImpl( self, instring, loc, doActions=True ):
2126 if loc<len(instring): 2127 if instring[loc] == "\n": 2128 return loc+1, "\n" 2129 else: 2130 #~ raise ParseException( instring, loc, "Expected end of line" ) 2131 exc = self.myException 2132 exc.loc = loc 2133 exc.pstr = instring 2134 raise exc 2135 elif loc == len(instring): 2136 return loc+1, [] 2137 else: 2138 exc = self.myException 2139 exc.loc = loc 2140 exc.pstr = instring 2141 raise exc
2142
2143 -class StringStart(_PositionToken):
2144 """Matches if current position is at the beginning of the parse string"""
2145 - def __init__( self ):
2146 super(StringStart,self).__init__() 2147 self.errmsg = "Expected start of text"
2148
2149 - def parseImpl( self, instring, loc, doActions=True ):
2150 if loc != 0: 2151 # see if entire string up to here is just whitespace and ignoreables 2152 if loc != self.preParse( instring, 0 ): 2153 #~ raise ParseException( instring, loc, "Expected start of text" ) 2154 exc = self.myException 2155 exc.loc = loc 2156 exc.pstr = instring 2157 raise exc 2158 return loc, []
2159
2160 -class StringEnd(_PositionToken):
2161 """Matches if current position is at the end of the parse string"""
2162 - def __init__( self ):
2163 super(StringEnd,self).__init__() 2164 self.errmsg = "Expected end of text"
2165
2166 - def parseImpl( self, instring, loc, doActions=True ):
2167 if loc < len(instring): 2168 #~ raise ParseException( instring, loc, "Expected end of text" ) 2169 exc = self.myException 2170 exc.loc = loc 2171 exc.pstr = instring 2172 raise exc 2173 elif loc == len(instring): 2174 return loc+1, [] 2175 elif loc > len(instring): 2176 return loc, [] 2177 else: 2178 exc = self.myException 2179 exc.loc = loc 2180 exc.pstr = instring 2181 raise exc
2182
2183 -class WordStart(_PositionToken):
2184 """Matches if the current position is at the beginning of a Word, and 2185 is not preceded by any character in a given set of C{wordChars} 2186 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2187 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2188 the string being parsed, or at the beginning of a line. 2189 """
2190 - def __init__(self, wordChars = printables):
2191 super(WordStart,self).__init__() 2192 self.wordChars = set(wordChars) 2193 self.errmsg = "Not at the start of a word"
2194
2195 - def parseImpl(self, instring, loc, doActions=True ):
2196 if loc != 0: 2197 if (instring[loc-1] in self.wordChars or 2198 instring[loc] not in self.wordChars): 2199 exc = self.myException 2200 exc.loc = loc 2201 exc.pstr = instring 2202 raise exc 2203 return loc, []
2204
2205 -class WordEnd(_PositionToken):
2206 """Matches if the current position is at the end of a Word, and 2207 is not followed by any character in a given set of C{wordChars} 2208 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2209 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2210 the string being parsed, or at the end of a line. 2211 """
2212 - def __init__(self, wordChars = printables):
2213 super(WordEnd,self).__init__() 2214 self.wordChars = set(wordChars) 2215 self.skipWhitespace = False 2216 self.errmsg = "Not at the end of a word"
2217
2218 - def parseImpl(self, instring, loc, doActions=True ):
2219 instrlen = len(instring) 2220 if instrlen>0 and loc<instrlen: 2221 if (instring[loc] in self.wordChars or 2222 instring[loc-1] not in self.wordChars): 2223 #~ raise ParseException( instring, loc, "Expected end of word" ) 2224 exc = self.myException 2225 exc.loc = loc 2226 exc.pstr = instring 2227 raise exc 2228 return loc, []
2229 2230
2231 -class ParseExpression(ParserElement):
2232 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2233 - def __init__( self, exprs, savelist = False ):
2234 super(ParseExpression,self).__init__(savelist) 2235 if isinstance( exprs, list ): 2236 self.exprs = exprs 2237 elif isinstance( exprs, basestring ): 2238 self.exprs = [ Literal( exprs ) ] 2239 else: 2240 try: 2241 self.exprs = list( exprs ) 2242 except TypeError: 2243 self.exprs = [ exprs ] 2244 self.callPreparse = False
2245
2246 - def __getitem__( self, i ):
2247 return self.exprs[i]
2248
2249 - def append( self, other ):
2250 self.exprs.append( other ) 2251 self.strRepr = None 2252 return self
2253
2254 - def leaveWhitespace( self ):
2255 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2256 all contained expressions.""" 2257 self.skipWhitespace = False 2258 self.exprs = [ e.copy() for e in self.exprs ] 2259 for e in self.exprs: 2260 e.leaveWhitespace() 2261 return self
2262
2263 - def ignore( self, other ):
2264 if isinstance( other, Suppress ): 2265 if other not in self.ignoreExprs: 2266 super( ParseExpression, self).ignore( other ) 2267 for e in self.exprs: 2268 e.ignore( self.ignoreExprs[-1] ) 2269 else: 2270 super( ParseExpression, self).ignore( other ) 2271 for e in self.exprs: 2272 e.ignore( self.ignoreExprs[-1] ) 2273 return self
2274
2275 - def __str__( self ):
2276 try: 2277 return super(ParseExpression,self).__str__() 2278 except: 2279 pass 2280 2281 if self.strRepr is None: 2282 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2283 return self.strRepr
2284
2285 - def streamline( self ):
2286 super(ParseExpression,self).streamline() 2287 2288 for e in self.exprs: 2289 e.streamline() 2290 2291 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2292 # but only if there are no parse actions or resultsNames on the nested And's 2293 # (likewise for Or's and MatchFirst's) 2294 if ( len(self.exprs) == 2 ): 2295 other = self.exprs[0] 2296 if ( isinstance( other, self.__class__ ) and 2297 not(other.parseAction) and 2298 other.resultsName is None and 2299 not other.debug ): 2300 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2301 self.strRepr = None 2302 self.mayReturnEmpty |= other.mayReturnEmpty 2303 self.mayIndexError |= other.mayIndexError 2304 2305 other = self.exprs[-1] 2306 if ( isinstance( other, self.__class__ ) and 2307 not(other.parseAction) and 2308 other.resultsName is None and 2309 not other.debug ): 2310 self.exprs = self.exprs[:-1] + other.exprs[:] 2311 self.strRepr = None 2312 self.mayReturnEmpty |= other.mayReturnEmpty 2313 self.mayIndexError |= other.mayIndexError 2314 2315 return self
2316
2317 - def setResultsName( self, name, listAllMatches=False ):
2318 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2319 return ret
2320
2321 - def validate( self, validateTrace=[] ):
2322 tmp = validateTrace[:]+[self] 2323 for e in self.exprs: 2324 e.validate(tmp) 2325 self.checkRecursion( [] )
2326
2327 - def copy(self):
2328 ret = super(ParseExpression,self).copy() 2329 ret.exprs = [e.copy() for e in self.exprs] 2330 return ret
2331
2332 -class And(ParseExpression):
2333 """Requires all given C{ParseExpression}s to be found in the given order. 2334 Expressions may be separated by whitespace. 2335 May be constructed using the C{'+'} operator. 2336 """ 2337
2338 - class _ErrorStop(Empty):
2339 - def __init__(self, *args, **kwargs):
2340 super(And._ErrorStop,self).__init__(*args, **kwargs) 2341 self.leaveWhitespace()
2342
2343 - def __init__( self, exprs, savelist = True ):
2344 super(And,self).__init__(exprs, savelist) 2345 self.mayReturnEmpty = True 2346 for e in self.exprs: 2347 if not e.mayReturnEmpty: 2348 self.mayReturnEmpty = False 2349 break 2350 self.setWhitespaceChars( exprs[0].whiteChars ) 2351 self.skipWhitespace = exprs[0].skipWhitespace 2352 self.callPreparse = True
2353
2354 - def parseImpl( self, instring, loc, doActions=True ):
2355 # pass False as last arg to _parse for first element, since we already 2356 # pre-parsed the string as part of our And pre-parsing 2357 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2358 errorStop = False 2359 for e in self.exprs[1:]: 2360 if isinstance(e, And._ErrorStop): 2361 errorStop = True 2362 continue 2363 if errorStop: 2364 try: 2365 loc, exprtokens = e._parse( instring, loc, doActions ) 2366 except ParseSyntaxException: 2367 raise 2368 except ParseBaseException: 2369 pe = sys.exc_info()[1] 2370 raise ParseSyntaxException(pe) 2371 except IndexError: 2372 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2373 else: 2374 loc, exprtokens = e._parse( instring, loc, doActions ) 2375 if exprtokens or exprtokens.keys(): 2376 resultlist += exprtokens 2377 return loc, resultlist
2378
2379 - def __iadd__(self, other ):
2380 if isinstance( other, basestring ): 2381 other = Literal( other ) 2382 return self.append( other ) #And( [ self, other ] )
2383
2384 - def checkRecursion( self, parseElementList ):
2385 subRecCheckList = parseElementList[:] + [ self ] 2386 for e in self.exprs: 2387 e.checkRecursion( subRecCheckList ) 2388 if not e.mayReturnEmpty: 2389 break
2390
2391 - def __str__( self ):
2392 if hasattr(self,"name"): 2393 return self.name 2394 2395 if self.strRepr is None: 2396 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2397 2398 return self.strRepr
2399 2400
2401 -class Or(ParseExpression):
2402 """Requires that at least one C{ParseExpression} is found. 2403 If two expressions match, the expression that matches the longest string will be used. 2404 May be constructed using the C{'^'} operator. 2405 """
2406 - def __init__( self, exprs, savelist = False ):
2407 super(Or,self).__init__(exprs, savelist) 2408 self.mayReturnEmpty = False 2409 for e in self.exprs: 2410 if e.mayReturnEmpty: 2411 self.mayReturnEmpty = True 2412 break
2413
2414 - def parseImpl( self, instring, loc, doActions=True ):
2415 maxExcLoc = -1 2416 maxMatchLoc = -1 2417 maxException = None 2418 for e in self.exprs: 2419 try: 2420 loc2 = e.tryParse( instring, loc ) 2421 except ParseException: 2422 err = sys.exc_info()[1] 2423 if err.loc > maxExcLoc: 2424 maxException = err 2425 maxExcLoc = err.loc 2426 except IndexError: 2427 if len(instring) > maxExcLoc: 2428 maxException = ParseException(instring,len(instring),e.errmsg,self) 2429 maxExcLoc = len(instring) 2430 else: 2431 if loc2 > maxMatchLoc: 2432 maxMatchLoc = loc2 2433 maxMatchExp = e 2434 2435 if maxMatchLoc < 0: 2436 if maxException is not None: 2437 raise maxException 2438 else: 2439 raise ParseException(instring, loc, "no defined alternatives to match", self) 2440 2441 return maxMatchExp._parse( instring, loc, doActions )
2442
2443 - def __ixor__(self, other ):
2444 if isinstance( other, basestring ): 2445 other = ParserElement.literalStringClass( other ) 2446 return self.append( other ) #Or( [ self, other ] )
2447
2448 - def __str__( self ):
2449 if hasattr(self,"name"): 2450 return self.name 2451 2452 if self.strRepr is None: 2453 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2454 2455 return self.strRepr
2456
2457 - def checkRecursion( self, parseElementList ):
2458 subRecCheckList = parseElementList[:] + [ self ] 2459 for e in self.exprs: 2460 e.checkRecursion( subRecCheckList )
2461 2462
2463 -class MatchFirst(ParseExpression):
2464 """Requires that at least one C{ParseExpression} is found. 2465 If two expressions match, the first one listed is the one that will match. 2466 May be constructed using the C{'|'} operator. 2467 """
2468 - def __init__( self, exprs, savelist = False ):
2469 super(MatchFirst,self).__init__(exprs, savelist) 2470 if exprs: 2471 self.mayReturnEmpty = False 2472 for e in self.exprs: 2473 if e.mayReturnEmpty: 2474 self.mayReturnEmpty = True 2475 break 2476 else: 2477 self.mayReturnEmpty = True
2478
2479 - def parseImpl( self, instring, loc, doActions=True ):
2480 maxExcLoc = -1 2481 maxException = None 2482 for e in self.exprs: 2483 try: 2484 ret = e._parse( instring, loc, doActions ) 2485 return ret 2486 except ParseException, err: 2487 if err.loc > maxExcLoc: 2488 maxException = err 2489 maxExcLoc = err.loc 2490 except IndexError: 2491 if len(instring) > maxExcLoc: 2492 maxException = ParseException(instring,len(instring),e.errmsg,self) 2493 maxExcLoc = len(instring) 2494 2495 # only got here if no expression matched, raise exception for match that made it the furthest 2496 else: 2497 if maxException is not None: 2498 raise maxException 2499 else: 2500 raise ParseException(instring, loc, "no defined alternatives to match", self)
2501
2502 - def __ior__(self, other ):
2503 if isinstance( other, basestring ): 2504 other = ParserElement.literalStringClass( other ) 2505 return self.append( other ) #MatchFirst( [ self, other ] )
2506
2507 - def __str__( self ):
2508 if hasattr(self,"name"): 2509 return self.name 2510 2511 if self.strRepr is None: 2512 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2513 2514 return self.strRepr
2515
2516 - def checkRecursion( self, parseElementList ):
2517 subRecCheckList = parseElementList[:] + [ self ] 2518 for e in self.exprs: 2519 e.checkRecursion( subRecCheckList )
2520 2521
2522 -class Each(ParseExpression):
2523 """Requires all given C{ParseExpression}s to be found, but in any order. 2524 Expressions may be separated by whitespace. 2525 May be constructed using the C{'&'} operator. 2526 """
2527 - def __init__( self, exprs, savelist = True ):
2528 super(Each,self).__init__(exprs, savelist) 2529 self.mayReturnEmpty = True 2530 for e in self.exprs: 2531 if not e.mayReturnEmpty: 2532 self.mayReturnEmpty = False 2533 break 2534 self.skipWhitespace = True 2535 self.initExprGroups = True
2536
2537 - def parseImpl( self, instring, loc, doActions=True ):
2538 if self.initExprGroups: 2539 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2540 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2541 self.optionals = opt1 + opt2 2542 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2543 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2544 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2545 self.required += self.multirequired 2546 self.initExprGroups = False 2547 tmpLoc = loc 2548 tmpReqd = self.required[:] 2549 tmpOpt = self.optionals[:] 2550 matchOrder = [] 2551 2552 keepMatching = True 2553 while keepMatching: 2554 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2555 failed = [] 2556 for e in tmpExprs: 2557 try: 2558 tmpLoc = e.tryParse( instring, tmpLoc ) 2559 except ParseException: 2560 failed.append(e) 2561 else: 2562 matchOrder.append(e) 2563 if e in tmpReqd: 2564 tmpReqd.remove(e) 2565 elif e in tmpOpt: 2566 tmpOpt.remove(e) 2567 if len(failed) == len(tmpExprs): 2568 keepMatching = False 2569 2570 if tmpReqd: 2571 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2572 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2573 2574 # add any unmatched Optionals, in case they have default values defined 2575 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2576 2577 resultlist = [] 2578 for e in matchOrder: 2579 loc,results = e._parse(instring,loc,doActions) 2580 resultlist.append(results) 2581 2582 finalResults = ParseResults([]) 2583 for r in resultlist: 2584 dups = {} 2585 for k in r.keys(): 2586 if k in finalResults.keys(): 2587 tmp = ParseResults(finalResults[k]) 2588 tmp += ParseResults(r[k]) 2589 dups[k] = tmp 2590 finalResults += ParseResults(r) 2591 for k,v in dups.items(): 2592 finalResults[k] = v 2593 return loc, finalResults
2594
2595 - def __str__( self ):
2596 if hasattr(self,"name"): 2597 return self.name 2598 2599 if self.strRepr is None: 2600 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2601 2602 return self.strRepr
2603
2604 - def checkRecursion( self, parseElementList ):
2605 subRecCheckList = parseElementList[:] + [ self ] 2606 for e in self.exprs: 2607 e.checkRecursion( subRecCheckList )
2608 2609
2610 -class ParseElementEnhance(ParserElement):
2611 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2612 - def __init__( self, expr, savelist=False ):
2613 super(ParseElementEnhance,self).__init__(savelist) 2614 if isinstance( expr, basestring ): 2615 expr = Literal(expr) 2616 self.expr = expr 2617 self.strRepr = None 2618 if expr is not None: 2619 self.mayIndexError = expr.mayIndexError 2620 self.mayReturnEmpty = expr.mayReturnEmpty 2621 self.setWhitespaceChars( expr.whiteChars ) 2622 self.skipWhitespace = expr.skipWhitespace 2623 self.saveAsList = expr.saveAsList 2624 self.callPreparse = expr.callPreparse 2625 self.ignoreExprs.extend(expr.ignoreExprs)
2626
2627 - def parseImpl( self, instring, loc, doActions=True ):
2628 if self.expr is not None: 2629 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2630 else: 2631 raise ParseException("",loc,self.errmsg,self)
2632
2633 - def leaveWhitespace( self ):
2634 self.skipWhitespace = False 2635 self.expr = self.expr.copy() 2636 if self.expr is not None: 2637 self.expr.leaveWhitespace() 2638 return self
2639
2640 - def ignore( self, other ):
2641 if isinstance( other, Suppress ): 2642 if other not in self.ignoreExprs: 2643 super( ParseElementEnhance, self).ignore( other ) 2644 if self.expr is not None: 2645 self.expr.ignore( self.ignoreExprs[-1] ) 2646 else: 2647 super( ParseElementEnhance, self).ignore( other ) 2648 if self.expr is not None: 2649 self.expr.ignore( self.ignoreExprs[-1] ) 2650 return self
2651
2652 - def streamline( self ):
2653 super(ParseElementEnhance,self).streamline() 2654 if self.expr is not None: 2655 self.expr.streamline() 2656 return self
2657
2658 - def checkRecursion( self, parseElementList ):
2659 if self in parseElementList: 2660 raise RecursiveGrammarException( parseElementList+[self] ) 2661 subRecCheckList = parseElementList[:] + [ self ] 2662 if self.expr is not None: 2663 self.expr.checkRecursion( subRecCheckList )
2664
2665 - def validate( self, validateTrace=[] ):
2666 tmp = validateTrace[:]+[self] 2667 if self.expr is not None: 2668 self.expr.validate(tmp) 2669 self.checkRecursion( [] )
2670
2671 - def __str__( self ):
2672 try: 2673 return super(ParseElementEnhance,self).__str__() 2674 except: 2675 pass 2676 2677 if self.strRepr is None and self.expr is not None: 2678 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2679 return self.strRepr
2680 2681
2682 -class FollowedBy(ParseElementEnhance):
2683 """Lookahead matching of the given parse expression. C{FollowedBy} 2684 does *not* advance the parsing position within the input string, it only 2685 verifies that the specified parse expression matches at the current 2686 position. C{FollowedBy} always returns a null token list."""
2687 - def __init__( self, expr ):
2688 super(FollowedBy,self).__init__(expr) 2689 self.mayReturnEmpty = True
2690
2691 - def parseImpl( self, instring, loc, doActions=True ):
2692 self.expr.tryParse( instring, loc ) 2693 return loc, []
2694 2695
2696 -class NotAny(ParseElementEnhance):
2697 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2698 does *not* advance the parsing position within the input string, it only 2699 verifies that the specified parse expression does *not* match at the current 2700 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2701 always returns a null token list. May be constructed using the '~' operator."""
2702 - def __init__( self, expr ):
2703 super(NotAny,self).__init__(expr) 2704 #~ self.leaveWhitespace() 2705 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2706 self.mayReturnEmpty = True 2707 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2708
2709 - def parseImpl( self, instring, loc, doActions=True ):
2710 try: 2711 self.expr.tryParse( instring, loc ) 2712 except (ParseException,IndexError): 2713 pass 2714 else: 2715 #~ raise ParseException(instring, loc, self.errmsg ) 2716 exc = self.myException 2717 exc.loc = loc 2718 exc.pstr = instring 2719 raise exc 2720 return loc, []
2721
2722 - def __str__( self ):
2723 if hasattr(self,"name"): 2724 return self.name 2725 2726 if self.strRepr is None: 2727 self.strRepr = "~{" + _ustr(self.expr) + "}" 2728 2729 return self.strRepr
2730 2731
2732 -class ZeroOrMore(ParseElementEnhance):
2733 """Optional repetition of zero or more of the given expression."""
2734 - def __init__( self, expr ):
2735 super(ZeroOrMore,self).__init__(expr) 2736 self.mayReturnEmpty = True
2737
2738 - def parseImpl( self, instring, loc, doActions=True ):
2739 tokens = [] 2740 try: 2741 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2742 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2743 while 1: 2744 if hasIgnoreExprs: 2745 preloc = self._skipIgnorables( instring, loc ) 2746 else: 2747 preloc = loc 2748 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2749 if tmptokens or tmptokens.keys(): 2750 tokens += tmptokens 2751 except (ParseException,IndexError): 2752 pass 2753 2754 return loc, tokens
2755
2756 - def __str__( self ):
2757 if hasattr(self,"name"): 2758 return self.name 2759 2760 if self.strRepr is None: 2761 self.strRepr = "[" + _ustr(self.expr) + "]..." 2762 2763 return self.strRepr
2764
2765 - def setResultsName( self, name, listAllMatches=False ):
2766 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2767 ret.saveAsList = True 2768 return ret
2769 2770
2771 -class OneOrMore(ParseElementEnhance):
2772 """Repetition of one or more of the given expression."""
2773 - def parseImpl( self, instring, loc, doActions=True ):
2774 # must be at least one 2775 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2776 try: 2777 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2778 while 1: 2779 if hasIgnoreExprs: 2780 preloc = self._skipIgnorables( instring, loc ) 2781 else: 2782 preloc = loc 2783 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2784 if tmptokens or tmptokens.keys(): 2785 tokens += tmptokens 2786 except (ParseException,IndexError): 2787 pass 2788 2789 return loc, tokens
2790
2791 - def __str__( self ):
2792 if hasattr(self,"name"): 2793 return self.name 2794 2795 if self.strRepr is None: 2796 self.strRepr = "{" + _ustr(self.expr) + "}..." 2797 2798 return self.strRepr
2799
2800 - def setResultsName( self, name, listAllMatches=False ):
2801 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2802 ret.saveAsList = True 2803 return ret
2804
2805 -class _NullToken(object):
2806 - def __bool__(self):
2807 return False
2808 __nonzero__ = __bool__
2809 - def __str__(self):
2810 return ""
2811 2812 _optionalNotMatched = _NullToken()
2813 -class Optional(ParseElementEnhance):
2814 """Optional matching of the given expression. 2815 A default return string can also be specified, if the optional expression 2816 is not found. 2817 """
2818 - def __init__( self, exprs, default=_optionalNotMatched ):
2819 super(Optional,self).__init__( exprs, savelist=False ) 2820 self.defaultValue = default 2821 self.mayReturnEmpty = True
2822
2823 - def parseImpl( self, instring, loc, doActions=True ):
2824 try: 2825 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2826 except (ParseException,IndexError): 2827 if self.defaultValue is not _optionalNotMatched: 2828 if self.expr.resultsName: 2829 tokens = ParseResults([ self.defaultValue ]) 2830 tokens[self.expr.resultsName] = self.defaultValue 2831 else: 2832 tokens = [ self.defaultValue ] 2833 else: 2834 tokens = [] 2835 return loc, tokens
2836
2837 - def __str__( self ):
2838 if hasattr(self,"name"): 2839 return self.name 2840 2841 if self.strRepr is None: 2842 self.strRepr = "[" + _ustr(self.expr) + "]" 2843 2844 return self.strRepr
2845 2846
2847 -class SkipTo(ParseElementEnhance):
2848 """Token for skipping over all undefined text until the matched expression is found. 2849 If C{include} is set to true, the matched expression is also parsed (the skipped text 2850 and matched expression are returned as a 2-element list). The C{ignore} 2851 argument is used to define grammars (typically quoted strings and comments) that 2852 might contain false matches. 2853 """
2854 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2855 super( SkipTo, self ).__init__( other ) 2856 self.ignoreExpr = ignore 2857 self.mayReturnEmpty = True 2858 self.mayIndexError = False 2859 self.includeMatch = include 2860 self.asList = False 2861 if failOn is not None and isinstance(failOn, basestring): 2862 self.failOn = Literal(failOn) 2863 else: 2864 self.failOn = failOn 2865 self.errmsg = "No match found for "+_ustr(self.expr)
2866
2867 - def parseImpl( self, instring, loc, doActions=True ):
2868 startLoc = loc 2869 instrlen = len(instring) 2870 expr = self.expr 2871 failParse = False 2872 while loc <= instrlen: 2873 try: 2874 if self.failOn: 2875 try: 2876 self.failOn.tryParse(instring, loc) 2877 except ParseBaseException: 2878 pass 2879 else: 2880 failParse = True 2881 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2882 failParse = False 2883 if self.ignoreExpr is not None: 2884 while 1: 2885 try: 2886 loc = self.ignoreExpr.tryParse(instring,loc) 2887 # print "found ignoreExpr, advance to", loc 2888 except ParseBaseException: 2889 break 2890 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2891 skipText = instring[startLoc:loc] 2892 if self.includeMatch: 2893 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2894 if mat: 2895 skipRes = ParseResults( skipText ) 2896 skipRes += mat 2897 return loc, [ skipRes ] 2898 else: 2899 return loc, [ skipText ] 2900 else: 2901 return loc, [ skipText ] 2902 except (ParseException,IndexError): 2903 if failParse: 2904 raise 2905 else: 2906 loc += 1 2907 exc = self.myException 2908 exc.loc = loc 2909 exc.pstr = instring 2910 raise exc
2911
2912 -class Forward(ParseElementEnhance):
2913 """Forward declaration of an expression to be defined later - 2914 used for recursive grammars, such as algebraic infix notation. 2915 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2916 2917 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2918 Specifically, '|' has a lower precedence than '<<', so that:: 2919 fwdExpr << a | b | c 2920 will actually be evaluated as:: 2921 (fwdExpr << a) | b | c 2922 thereby leaving b and c out as parseable alternatives. It is recommended that you 2923 explicitly group the values inserted into the C{Forward}:: 2924 fwdExpr << (a | b | c) 2925 Converting to use the '<<=' operator instead will avoid this problem. 2926 """
2927 - def __init__( self, other=None ):
2928 super(Forward,self).__init__( other, savelist=False )
2929
2930 - def __lshift__( self, other ):
2931 if isinstance( other, basestring ): 2932 other = ParserElement.literalStringClass(other) 2933 self.expr = other 2934 self.mayReturnEmpty = other.mayReturnEmpty 2935 self.strRepr = None 2936 self.mayIndexError = self.expr.mayIndexError 2937 self.mayReturnEmpty = self.expr.mayReturnEmpty 2938 self.setWhitespaceChars( self.expr.whiteChars ) 2939 self.skipWhitespace = self.expr.skipWhitespace 2940 self.saveAsList = self.expr.saveAsList 2941 self.ignoreExprs.extend(self.expr.ignoreExprs) 2942 return None
2943 __ilshift__ = __lshift__ 2944
2945 - def leaveWhitespace( self ):
2946 self.skipWhitespace = False 2947 return self
2948
2949 - def streamline( self ):
2950 if not self.streamlined: 2951 self.streamlined = True 2952 if self.expr is not None: 2953 self.expr.streamline() 2954 return self
2955
2956 - def validate( self, validateTrace=[] ):
2957 if self not in validateTrace: 2958 tmp = validateTrace[:]+[self] 2959 if self.expr is not None: 2960 self.expr.validate(tmp) 2961 self.checkRecursion([])
2962
2963 - def __str__( self ):
2964 if hasattr(self,"name"): 2965 return self.name 2966 2967 self._revertClass = self.__class__ 2968 self.__class__ = _ForwardNoRecurse 2969 try: 2970 if self.expr is not None: 2971 retString = _ustr(self.expr) 2972 else: 2973 retString = "None" 2974 finally: 2975 self.__class__ = self._revertClass 2976 return self.__class__.__name__ + ": " + retString
2977
2978 - def copy(self):
2979 if self.expr is not None: 2980 return super(Forward,self).copy() 2981 else: 2982 ret = Forward() 2983 ret << self 2984 return ret
2985
2986 -class _ForwardNoRecurse(Forward):
2987 - def __str__( self ):
2988 return "..."
2989
2990 -class TokenConverter(ParseElementEnhance):
2991 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
2992 - def __init__( self, expr, savelist=False ):
2993 super(TokenConverter,self).__init__( expr )#, savelist ) 2994 self.saveAsList = False
2995
2996 -class Upcase(TokenConverter):
2997 """Converter to upper case all matching tokens."""
2998 - def __init__(self, *args):
2999 super(Upcase,self).__init__(*args) 3000 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 3001 DeprecationWarning,stacklevel=2)
3002
3003 - def postParse( self, instring, loc, tokenlist ):
3004 return list(map( string.upper, tokenlist ))
3005 3006
3007 -class Combine(TokenConverter):
3008 """Converter to concatenate all matching tokens to a single string. 3009 By default, the matching patterns must also be contiguous in the input string; 3010 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3011 """
3012 - def __init__( self, expr, joinString="", adjacent=True ):
3013 super(Combine,self).__init__( expr ) 3014 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3015 if adjacent: 3016 self.leaveWhitespace() 3017 self.adjacent = adjacent 3018 self.skipWhitespace = True 3019 self.joinString = joinString 3020 self.callPreparse = True
3021
3022 - def ignore( self, other ):
3023 if self.adjacent: 3024 ParserElement.ignore(self, other) 3025 else: 3026 super( Combine, self).ignore( other ) 3027 return self
3028
3029 - def postParse( self, instring, loc, tokenlist ):
3030 retToks = tokenlist.copy() 3031 del retToks[:] 3032 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3033 3034 if self.resultsName and len(retToks.keys())>0: 3035 return [ retToks ] 3036 else: 3037 return retToks
3038
3039 -class Group(TokenConverter):
3040 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3041 - def __init__( self, expr ):
3042 super(Group,self).__init__( expr ) 3043 self.saveAsList = True
3044
3045 - def postParse( self, instring, loc, tokenlist ):
3046 return [ tokenlist ]
3047
3048 -class Dict(TokenConverter):
3049 """Converter to return a repetitive expression as a list, but also as a dictionary. 3050 Each element can also be referenced using the first token in the expression as its key. 3051 Useful for tabular report scraping when the first column can be used as a item key. 3052 """
3053 - def __init__( self, exprs ):
3054 super(Dict,self).__init__( exprs ) 3055 self.saveAsList = True
3056
3057 - def postParse( self, instring, loc, tokenlist ):
3058 for i,tok in enumerate(tokenlist): 3059 if len(tok) == 0: 3060 continue 3061 ikey = tok[0] 3062 if isinstance(ikey,int): 3063 ikey = _ustr(tok[0]).strip() 3064 if len(tok)==1: 3065 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3066 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3067 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3068 else: 3069 dictvalue = tok.copy() #ParseResults(i) 3070 del dictvalue[0] 3071 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3072 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3073 else: 3074 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3075 3076 if self.resultsName: 3077 return [ tokenlist ] 3078 else: 3079 return tokenlist
3080 3081
3082 -class Suppress(TokenConverter):
3083 """Converter for ignoring the results of a parsed expression."""
3084 - def postParse( self, instring, loc, tokenlist ):
3085 return []
3086
3087 - def suppress( self ):
3088 return self
3089 3090
3091 -class OnlyOnce(object):
3092 """Wrapper for parse actions, to ensure they are only called once."""
3093 - def __init__(self, methodCall):
3094 self.callable = _trim_arity(methodCall) 3095 self.called = False
3096 - def __call__(self,s,l,t):
3097 if not self.called: 3098 results = self.callable(s,l,t) 3099 self.called = True 3100 return results 3101 raise ParseException(s,l,"")
3102 - def reset(self):
3103 self.called = False
3104
3105 -def traceParseAction(f):
3106 """Decorator for debugging parse actions.""" 3107 f = _trim_arity(f) 3108 def z(*paArgs): 3109 thisFunc = f.func_name 3110 s,l,t = paArgs[-3:] 3111 if len(paArgs)>3: 3112 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3113 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3114 try: 3115 ret = f(*paArgs) 3116 except Exception: 3117 exc = sys.exc_info()[1] 3118 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3119 raise 3120 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3121 return ret
3122 try: 3123 z.__name__ = f.__name__ 3124 except AttributeError: 3125 pass 3126 return z 3127 3128 # 3129 # global helpers 3130 #
3131 -def delimitedList( expr, delim=",", combine=False ):
3132 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3133 By default, the list elements and delimiters can have intervening whitespace, and 3134 comments, but this can be overridden by passing C{combine=True} in the constructor. 3135 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3136 string, with the delimiters included; otherwise, the matching tokens are returned 3137 as a list of tokens, with the delimiters suppressed. 3138 """ 3139 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3140 if combine: 3141 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3142 else: 3143 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3144
3145 -def countedArray( expr, intExpr=None ):
3146 """Helper to define a counted list of expressions. 3147 This helper defines a pattern of the form:: 3148 integer expr expr expr... 3149 where the leading integer tells how many expr expressions follow. 3150 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3151 """ 3152 arrayExpr = Forward() 3153 def countFieldParseAction(s,l,t): 3154 n = t[0] 3155 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3156 return []
3157 if intExpr is None: 3158 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3159 else: 3160 intExpr = intExpr.copy() 3161 intExpr.setName("arrayLen") 3162 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3163 return ( intExpr + arrayExpr ) 3164
3165 -def _flatten(L):
3166 ret = [] 3167 for i in L: 3168 if isinstance(i,list): 3169 ret.extend(_flatten(i)) 3170 else: 3171 ret.append(i) 3172 return ret
3173
3174 -def matchPreviousLiteral(expr):
3175 """Helper to define an expression that is indirectly defined from 3176 the tokens matched in a previous expression, that is, it looks 3177 for a 'repeat' of a previous expression. For example:: 3178 first = Word(nums) 3179 second = matchPreviousLiteral(first) 3180 matchExpr = first + ":" + second 3181 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3182 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3183 If this is not desired, use C{matchPreviousExpr}. 3184 Do *not* use with packrat parsing enabled. 3185 """ 3186 rep = Forward() 3187 def copyTokenToRepeater(s,l,t): 3188 if t: 3189 if len(t) == 1: 3190 rep << t[0] 3191 else: 3192 # flatten t tokens 3193 tflat = _flatten(t.asList()) 3194 rep << And( [ Literal(tt) for tt in tflat ] ) 3195 else: 3196 rep << Empty()
3197 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3198 return rep 3199
3200 -def matchPreviousExpr(expr):
3201 """Helper to define an expression that is indirectly defined from 3202 the tokens matched in a previous expression, that is, it looks 3203 for a 'repeat' of a previous expression. For example:: 3204 first = Word(nums) 3205 second = matchPreviousExpr(first) 3206 matchExpr = first + ":" + second 3207 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3208 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3209 the expressions are evaluated first, and then compared, so 3210 C{"1"} is compared with C{"10"}. 3211 Do *not* use with packrat parsing enabled. 3212 """ 3213 rep = Forward() 3214 e2 = expr.copy() 3215 rep << e2 3216 def copyTokenToRepeater(s,l,t): 3217 matchTokens = _flatten(t.asList()) 3218 def mustMatchTheseTokens(s,l,t): 3219 theseTokens = _flatten(t.asList()) 3220 if theseTokens != matchTokens: 3221 raise ParseException("",0,"")
3222 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3223 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3224 return rep 3225
3226 -def _escapeRegexRangeChars(s):
3227 #~ escape these chars: ^-] 3228 for c in r"\^-]": 3229 s = s.replace(c,_bslash+c) 3230 s = s.replace("\n",r"\n") 3231 s = s.replace("\t",r"\t") 3232 return _ustr(s)
3233
3234 -def oneOf( strs, caseless=False, useRegex=True ):
3235 """Helper to quickly define a set of alternative Literals, and makes sure to do 3236 longest-first testing when there is a conflict, regardless of the input order, 3237 but returns a C{L{MatchFirst}} for best performance. 3238 3239 Parameters: 3240 - strs - a string of space-delimited literals, or a list of string literals 3241 - caseless - (default=False) - treat all literals as caseless 3242 - useRegex - (default=True) - as an optimization, will generate a Regex 3243 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3244 if creating a C{Regex} raises an exception) 3245 """ 3246 if caseless: 3247 isequal = ( lambda a,b: a.upper() == b.upper() ) 3248 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3249 parseElementClass = CaselessLiteral 3250 else: 3251 isequal = ( lambda a,b: a == b ) 3252 masks = ( lambda a,b: b.startswith(a) ) 3253 parseElementClass = Literal 3254 3255 if isinstance(strs,(list,tuple)): 3256 symbols = list(strs[:]) 3257 elif isinstance(strs,basestring): 3258 symbols = strs.split() 3259 else: 3260 warnings.warn("Invalid argument to oneOf, expected string or list", 3261 SyntaxWarning, stacklevel=2) 3262 3263 i = 0 3264 while i < len(symbols)-1: 3265 cur = symbols[i] 3266 for j,other in enumerate(symbols[i+1:]): 3267 if ( isequal(other, cur) ): 3268 del symbols[i+j+1] 3269 break 3270 elif ( masks(cur, other) ): 3271 del symbols[i+j+1] 3272 symbols.insert(i,other) 3273 cur = other 3274 break 3275 else: 3276 i += 1 3277 3278 if not caseless and useRegex: 3279 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3280 try: 3281 if len(symbols)==len("".join(symbols)): 3282 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3283 else: 3284 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3285 except: 3286 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3287 SyntaxWarning, stacklevel=2) 3288 3289 3290 # last resort, just use MatchFirst 3291 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3292
3293 -def dictOf( key, value ):
3294 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3295 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3296 in the proper order. The key pattern can include delimiting markers or punctuation, 3297 as long as they are suppressed, thereby leaving the significant key text. The value 3298 pattern can include named results, so that the C{Dict} results can include named token 3299 fields. 3300 """ 3301 return Dict( ZeroOrMore( Group ( key + value ) ) )
3302
3303 -def originalTextFor(expr, asString=True):
3304 """Helper to return the original, untokenized text for a given expression. Useful to 3305 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3306 revert separate tokens with intervening whitespace back to the original matching 3307 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3308 require the inspect module to chase up the call stack. By default, returns a 3309 string containing the original parsed text. 3310 3311 If the optional C{asString} argument is passed as C{False}, then the return value is a 3312 C{L{ParseResults}} containing any results names that were originally matched, and a 3313 single token containing the original matched text from the input string. So if 3314 the expression passed to C{L{originalTextFor}} contains expressions with defined 3315 results names, you must set C{asString} to C{False} if you want to preserve those 3316 results name values.""" 3317 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3318 endlocMarker = locMarker.copy() 3319 endlocMarker.callPreparse = False 3320 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3321 if asString: 3322 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3323 else: 3324 def extractText(s,l,t): 3325 del t[:] 3326 t.insert(0, s[t._original_start:t._original_end]) 3327 del t["_original_start"] 3328 del t["_original_end"]
3329 matchExpr.setParseAction(extractText) 3330 return matchExpr 3331
3332 -def ungroup(expr):
3333 """Helper to undo pyparsing's default grouping of And expressions, even 3334 if all but one are non-empty.""" 3335 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3336 3337 # convenience constants for positional expressions 3338 empty = Empty().setName("empty") 3339 lineStart = LineStart().setName("lineStart") 3340 lineEnd = LineEnd().setName("lineEnd") 3341 stringStart = StringStart().setName("stringStart") 3342 stringEnd = StringEnd().setName("stringEnd") 3343 3344 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3345 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3346 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3347 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3348 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3349 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3350 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3351 3352 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3353
3354 -def srange(s):
3355 r"""Helper to easily define string ranges for use in Word construction. Borrows 3356 syntax from regexp '[]' string range definitions:: 3357 srange("[0-9]") -> "0123456789" 3358 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3359 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3360 The input string must be enclosed in []'s, and the returned string is the expanded 3361 character set joined into a single string. 3362 The values enclosed in the []'s may be:: 3363 a single character 3364 an escaped character with a leading backslash (such as \- or \]) 3365 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3366 (\0x## is also supported for backwards compatibility) 3367 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3368 a range of any of the above, separated by a dash ('a-z', etc.) 3369 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3370 """ 3371 try: 3372 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3373 except: 3374 return ""
3375
3376 -def matchOnlyAtCol(n):
3377 """Helper method for defining parse actions that require matching at a specific 3378 column in the input text. 3379 """ 3380 def verifyCol(strg,locn,toks): 3381 if col(locn,strg) != n: 3382 raise ParseException(strg,locn,"matched token not at column %d" % n)
3383 return verifyCol 3384
3385 -def replaceWith(replStr):
3386 """Helper method for common parse actions that simply return a literal value. Especially 3387 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3388 """ 3389 def _replFunc(*args): 3390 return [replStr]
3391 return _replFunc 3392
3393 -def removeQuotes(s,l,t):
3394 """Helper parse action for removing quotation marks from parsed quoted strings. 3395 To use, add this parse action to quoted string using:: 3396 quotedString.setParseAction( removeQuotes ) 3397 """ 3398 return t[0][1:-1]
3399
3400 -def upcaseTokens(s,l,t):
3401 """Helper parse action to convert tokens to upper case.""" 3402 return [ tt.upper() for tt in map(_ustr,t) ]
3403
3404 -def downcaseTokens(s,l,t):
3405 """Helper parse action to convert tokens to lower case.""" 3406 return [ tt.lower() for tt in map(_ustr,t) ]
3407
3408 -def keepOriginalText(s,startLoc,t):
3409 """DEPRECATED - use new helper method C{L{originalTextFor}}. 3410 Helper parse action to preserve original parsed text, 3411 overriding any nested parse actions.""" 3412 try: 3413 endloc = getTokensEndLoc() 3414 except ParseException: 3415 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3416 del t[:] 3417 t += ParseResults(s[startLoc:endloc]) 3418 return t
3419
3420 -def getTokensEndLoc():
3421 """Method to be called from within a parse action to determine the end 3422 location of the parsed tokens.""" 3423 import inspect 3424 fstack = inspect.stack() 3425 try: 3426 # search up the stack (through intervening argument normalizers) for correct calling routine 3427 for f in fstack[2:]: 3428 if f[3] == "_parseNoCache": 3429 endloc = f[0].f_locals["loc"] 3430 return endloc 3431 else: 3432 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3433 finally: 3434 del fstack
3435
3436 -def _makeTags(tagStr, xml):
3437 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3438 if isinstance(tagStr,basestring): 3439 resname = tagStr 3440 tagStr = Keyword(tagStr, caseless=not xml) 3441 else: 3442 resname = tagStr.name 3443 3444 tagAttrName = Word(alphas,alphanums+"_-:") 3445 if (xml): 3446 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3447 openTag = Suppress("<") + tagStr("tag") + \ 3448 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3449 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3450 else: 3451 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3452 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3453 openTag = Suppress("<") + tagStr("tag") + \ 3454 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3455 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3456 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3457 closeTag = Combine(_L("</") + tagStr + ">") 3458 3459 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3460 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3461 openTag.tag = resname 3462 closeTag.tag = resname 3463 return openTag, closeTag
3464
3465 -def makeHTMLTags(tagStr):
3466 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3467 return _makeTags( tagStr, False )
3468
3469 -def makeXMLTags(tagStr):
3470 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3471 return _makeTags( tagStr, True )
3472
3473 -def withAttribute(*args,**attrDict):
3474 """Helper to create a validating parse action to be used with start tags created 3475 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3476 with a required attribute value, to avoid false matches on common tags such as 3477 C{<TD>} or C{<DIV>}. 3478 3479 Call C{withAttribute} with a series of attribute names and values. Specify the list 3480 of filter attributes names and values as: 3481 - keyword arguments, as in C{(align="right")}, or 3482 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3483 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3484 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3485 For attribute names with a namespace prefix, you must use the second form. Attribute 3486 names are matched insensitive to upper/lower case. 3487 3488 To verify that the attribute exists, but without specifying a value, pass 3489 C{withAttribute.ANY_VALUE} as the value. 3490 """ 3491 if args: 3492 attrs = args[:] 3493 else: 3494 attrs = attrDict.items() 3495 attrs = [(k,v) for k,v in attrs] 3496 def pa(s,l,tokens): 3497 for attrName,attrValue in attrs: 3498 if attrName not in tokens: 3499 raise ParseException(s,l,"no matching attribute " + attrName) 3500 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3501 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3502 (attrName, tokens[attrName], attrValue))
3503 return pa 3504 withAttribute.ANY_VALUE = object() 3505 3506 opAssoc = _Constants() 3507 opAssoc.LEFT = object() 3508 opAssoc.RIGHT = object() 3509
3510 -def operatorPrecedence( baseExpr, opList ):
3511 """Helper method for constructing grammars of expressions made up of 3512 operators working in a precedence hierarchy. Operators may be unary or 3513 binary, left- or right-associative. Parse actions can also be attached 3514 to operator expressions. 3515 3516 Parameters: 3517 - baseExpr - expression representing the most basic element for the nested 3518 - opList - list of tuples, one for each operator precedence level in the 3519 expression grammar; each tuple is of the form 3520 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3521 - opExpr is the pyparsing expression for the operator; 3522 may also be a string, which will be converted to a Literal; 3523 if numTerms is 3, opExpr is a tuple of two expressions, for the 3524 two operators separating the 3 terms 3525 - numTerms is the number of terms for this operator (must 3526 be 1, 2, or 3) 3527 - rightLeftAssoc is the indicator whether the operator is 3528 right or left associative, using the pyparsing-defined 3529 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3530 - parseAction is the parse action to be associated with 3531 expressions matching this operator expression (the 3532 parse action tuple member may be omitted) 3533 """ 3534 ret = Forward() 3535 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3536 for i,operDef in enumerate(opList): 3537 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3538 if arity == 3: 3539 if opExpr is None or len(opExpr) != 2: 3540 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3541 opExpr1, opExpr2 = opExpr 3542 thisExpr = Forward()#.setName("expr%d" % i) 3543 if rightLeftAssoc == opAssoc.LEFT: 3544 if arity == 1: 3545 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3546 elif arity == 2: 3547 if opExpr is not None: 3548 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3549 else: 3550 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3551 elif arity == 3: 3552 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3553 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3554 else: 3555 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3556 elif rightLeftAssoc == opAssoc.RIGHT: 3557 if arity == 1: 3558 # try to avoid LR with this extra test 3559 if not isinstance(opExpr, Optional): 3560 opExpr = Optional(opExpr) 3561 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3562 elif arity == 2: 3563 if opExpr is not None: 3564 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3565 else: 3566 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3567 elif arity == 3: 3568 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3569 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3570 else: 3571 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3572 else: 3573 raise ValueError("operator must indicate right or left associativity") 3574 if pa: 3575 matchExpr.setParseAction( pa ) 3576 thisExpr << ( matchExpr | lastExpr ) 3577 lastExpr = thisExpr 3578 ret << lastExpr 3579 return ret
3580 3581 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3582 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3583 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3584 unicodeString = Combine(_L('u') + quotedString.copy()) 3585
3586 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3587 """Helper method for defining nested lists enclosed in opening and closing 3588 delimiters ("(" and ")" are the default). 3589 3590 Parameters: 3591 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3592 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3593 - content - expression for items within the nested lists (default=None) 3594 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3595 3596 If an expression is not provided for the content argument, the nested 3597 expression will capture all whitespace-delimited content between delimiters 3598 as a list of separate values. 3599 3600 Use the C{ignoreExpr} argument to define expressions that may contain 3601 opening or closing characters that should not be treated as opening 3602 or closing characters for nesting, such as quotedString or a comment 3603 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3604 The default is L{quotedString}, but if no expressions are to be ignored, 3605 then pass C{None} for this argument. 3606 """ 3607 if opener == closer: 3608 raise ValueError("opening and closing strings cannot be the same") 3609 if content is None: 3610 if isinstance(opener,basestring) and isinstance(closer,basestring): 3611 if len(opener) == 1 and len(closer)==1: 3612 if ignoreExpr is not None: 3613 content = (Combine(OneOrMore(~ignoreExpr + 3614 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3615 ).setParseAction(lambda t:t[0].strip())) 3616 else: 3617 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3618 ).setParseAction(lambda t:t[0].strip())) 3619 else: 3620 if ignoreExpr is not None: 3621 content = (Combine(OneOrMore(~ignoreExpr + 3622 ~Literal(opener) + ~Literal(closer) + 3623 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3624 ).setParseAction(lambda t:t[0].strip())) 3625 else: 3626 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3627 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3628 ).setParseAction(lambda t:t[0].strip())) 3629 else: 3630 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3631 ret = Forward() 3632 if ignoreExpr is not None: 3633 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3634 else: 3635 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3636 return ret
3637
3638 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3639 """Helper method for defining space-delimited indentation blocks, such as 3640 those used to define block statements in Python source code. 3641 3642 Parameters: 3643 - blockStatementExpr - expression defining syntax of statement that 3644 is repeated within the indented block 3645 - indentStack - list created by caller to manage indentation stack 3646 (multiple statementWithIndentedBlock expressions within a single grammar 3647 should share a common indentStack) 3648 - indent - boolean indicating whether block must be indented beyond the 3649 the current level; set to False for block of left-most statements 3650 (default=True) 3651 3652 A valid block must contain at least one C{blockStatement}. 3653 """ 3654 def checkPeerIndent(s,l,t): 3655 if l >= len(s): return 3656 curCol = col(l,s) 3657 if curCol != indentStack[-1]: 3658 if curCol > indentStack[-1]: 3659 raise ParseFatalException(s,l,"illegal nesting") 3660 raise ParseException(s,l,"not a peer entry")
3661 3662 def checkSubIndent(s,l,t): 3663 curCol = col(l,s) 3664 if curCol > indentStack[-1]: 3665 indentStack.append( curCol ) 3666 else: 3667 raise ParseException(s,l,"not a subentry") 3668 3669 def checkUnindent(s,l,t): 3670 if l >= len(s): return 3671 curCol = col(l,s) 3672 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3673 raise ParseException(s,l,"not an unindent") 3674 indentStack.pop() 3675 3676 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3677 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3678 PEER = Empty().setParseAction(checkPeerIndent) 3679 UNDENT = Empty().setParseAction(checkUnindent) 3680 if indent: 3681 smExpr = Group( Optional(NL) + 3682 #~ FollowedBy(blockStatementExpr) + 3683 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3684 else: 3685 smExpr = Group( Optional(NL) + 3686 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3687 blockStatementExpr.ignore(_bslash + LineEnd()) 3688 return smExpr 3689 3690 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3691 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3692 3693 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3694 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3695 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3696 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3697 3698 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3699 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3700 3701 htmlComment = Regex(r"<!--[\s\S]*?-->") 3702 restOfLine = Regex(r".*").leaveWhitespace() 3703 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3704 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3705 3706 javaStyleComment = cppStyleComment 3707 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3708 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3709 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 3710 Optional( Word(" \t") + 3711 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3712 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3713 3714 3715 if __name__ == "__main__": 3716
3717 - def test( teststring ):
3718 try: 3719 tokens = simpleSQL.parseString( teststring ) 3720 tokenlist = tokens.asList() 3721 print (teststring + "->" + str(tokenlist)) 3722 print ("tokens = " + str(tokens)) 3723 print ("tokens.columns = " + str(tokens.columns)) 3724 print ("tokens.tables = " + str(tokens.tables)) 3725 print (tokens.asXML("SQL",True)) 3726 except ParseBaseException: 3727 err = sys.exc_info()[1] 3728 print (teststring + "->") 3729 print (err.line) 3730 print (" "*(err.column-1) + "^") 3731 print (err) 3732 print()
3733 3734 selectToken = CaselessLiteral( "select" ) 3735 fromToken = CaselessLiteral( "from" ) 3736 3737 ident = Word( alphas, alphanums + "_$" ) 3738 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3739 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3740 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3741 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3742 simpleSQL = ( selectToken + \ 3743 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3744 fromToken + \ 3745 tableNameList.setResultsName( "tables" ) ) 3746 3747 test( "SELECT * from XYZZY, ABC" ) 3748 test( "select * from SYS.XYZZY" ) 3749 test( "Select A from Sys.dual" ) 3750 test( "Select AA,BB,CC from Sys.dual" ) 3751 test( "Select A, B, C from Sys.dual" ) 3752 test( "Select A, B, C from Sys.dual" ) 3753 test( "Xelect A, B, C from Sys.dual" ) 3754 test( "Select A, B, C frox Sys.dual" ) 3755 test( "Select" ) 3756 test( "Select ^^^ frox Sys.dual" ) 3757 test( "Select A, B, C from Sys.dual, Table2 " ) 3758