Package pywbem :: Module yacc
[frames] | no frames]

Source Code for Module pywbem.yacc

   1  # ----------------------------------------------------------------------------- 
   2  # ply: yacc.py 
   3  # 
   4  # Author(s): David M. Beazley (dave@dabeaz.com) 
   5  # 
   6  # Copyright (C) 2001-2009, David M. Beazley 
   7  # 
   8  # This library is free software; you can redistribute it and/or 
   9  # modify it under the terms of the GNU Lesser General Public 
  10  # License as published by the Free Software Foundation; either 
  11  # version 2.1 of the License, or (at your option) any later version. 
  12  # 
  13  # This library is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
  16  # Lesser General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU Lesser General Public 
  19  # License along with this library; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21  # 
  22  # See the file COPYING for a complete copy of the LGPL. 
  23  # 
  24  # 
  25  # This implements an LR parser that is constructed from grammar rules defined 
  26  # as Python functions. The grammer is specified by supplying the BNF inside 
  27  # Python documentation strings.  The inspiration for this technique was borrowed 
  28  # from John Aycock's Spark parsing system.  PLY might be viewed as cross between 
  29  # Spark and the GNU bison utility. 
  30  # 
  31  # The current implementation is only somewhat object-oriented. The 
  32  # LR parser itself is defined in terms of an object (which allows multiple 
  33  # parsers to co-exist).  However, most of the variables used during table 
  34  # construction are defined in terms of global variables.  Users shouldn't 
  35  # notice unless they are trying to define multiple parsers at the same 
  36  # time using threads (in which case they should have their head examined). 
  37  # 
  38  # This implementation supports both SLR and LALR(1) parsing.  LALR(1) 
  39  # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 
  40  # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 
  41  # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced 
  42  # by the more efficient DeRemer and Pennello algorithm. 
  43  # 
  44  # :::::::: WARNING ::::::: 
  45  # 
  46  # Construction of LR parsing tables is fairly complicated and expensive. 
  47  # To make this module run fast, a *LOT* of work has been put into 
  48  # optimization---often at the expensive of readability and what might 
  49  # consider to be good Python "coding style."   Modify the code at your 
  50  # own risk! 
  51  # ---------------------------------------------------------------------------- 
  52   
  53  __version__    = "3.0" 
  54  __tabversion__ = "3.0"       # Table version 
  55   
  56  #----------------------------------------------------------------------------- 
  57  #                     === User configurable parameters === 
  58  # 
  59  # Change these to modify the default behavior of yacc (if you wish) 
  60  #----------------------------------------------------------------------------- 
  61   
  62  yaccdebug   = 1                # Debugging mode.  If set, yacc generates a 
  63                                 # a 'parser.out' file in the current directory 
  64   
  65  debug_file  = 'parser.out'     # Default name of the debugging file 
  66  tab_module  = 'parsetab'       # Default name of the table module 
  67  default_lr  = 'LALR'           # Default LR table generation method 
  68   
  69  error_count = 3                # Number of symbols that must be shifted to leave recovery mode 
  70   
  71  yaccdevel   = 0                # Set to True if developing yacc.  This turns off optimized 
  72                                 # implementations of certain functions. 
  73   
  74  resultlimit = 40               # Size limit of results when running in debug mode. 
  75   
  76  import re, types, sys, os.path 
  77   
  78  # Compatibility function for python 2.6/3.0 
  79  if sys.version_info[0] < 3: 
80 - def func_code(f):
81 return f.func_code
82 else:
83 - def func_code(f):
84 return f.__code__
85 86 # Compatibility 87 try: 88 MAXINT = sys.maxint 89 except AttributeError: 90 MAXINT = sys.maxsize 91 92 # Python 2.x/3.0 compatibility.
93 -def load_ply_lex():
94 if sys.version_info[0] < 3: 95 import lex 96 else: 97 import ply.lex as lex 98 return lex
99 100 # This object is a stand-in for a logging object created by the 101 # logging module. PLY will use this by default to create things 102 # such as the parser.out file. If a user wants more detailed 103 # information, they can create their own logging object and pass 104 # it into PLY. 105
106 -class PlyLogger(object):
107 - def __init__(self,f):
108 self.f = f
109 - def debug(self,msg,*args,**kwargs):
110 self.f.write((msg % args) + "\n")
111 info = debug 112
113 - def warning(self,msg,*args,**kwargs):
114 self.f.write("WARNING: "+ (msg % args) + "\n")
115
116 - def error(self,msg,*args,**kwargs):
117 self.f.write("ERROR: " + (msg % args) + "\n")
118 119 critical = debug
120 121 # Null logger is used when no output is generated. Does nothing.
122 -class NullLogger(object):
123 - def __getattribute__(self,name):
124 return self
125 - def __call__(self,*args,**kwargs):
126 return self
127 128 # Exception raised for yacc-related errors
129 -class YaccError(Exception): pass
130 131 # Format the result message that the parser produces when running in debug mode.
132 -def format_result(r):
133 repr_str = repr(r) 134 if '\n' in repr_str: repr_str = repr(repr_str) 135 if len(repr_str) > resultlimit: 136 repr_str = repr_str[:resultlimit]+" ..." 137 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) 138 return result
139 140 141 # Format stack entries when the parser is running in debug mode
142 -def format_stack_entry(r):
143 repr_str = repr(r) 144 if '\n' in repr_str: repr_str = repr(repr_str) 145 if len(repr_str) < 16: 146 return repr_str 147 else: 148 return "<%s @ 0x%x>" % (type(r).__name__,id(r))
149 150 #----------------------------------------------------------------------------- 151 # === LR Parsing Engine === 152 # 153 # The following classes are used for the LR parser itself. These are not 154 # used during table construction and are independent of the actual LR 155 # table generation algorithm 156 #----------------------------------------------------------------------------- 157 158 # This class is used to hold non-terminal grammar symbols during parsing. 159 # It normally has the following attributes set: 160 # .type = Grammar symbol type 161 # .value = Symbol value 162 # .lineno = Starting line number 163 # .endlineno = Ending line number (optional, set automatically) 164 # .lexpos = Starting lex position 165 # .endlexpos = Ending lex position (optional, set automatically) 166
167 -class YaccSymbol:
168 - def __str__(self): return self.type
169 - def __repr__(self): return str(self)
170 171 # This class is a wrapper around the objects actually passed to each 172 # grammar rule. Index lookup and assignment actually assign the 173 # .value attribute of the underlying YaccSymbol object. 174 # The lineno() method returns the line number of a given 175 # item (or 0 if not defined). The linespan() method returns 176 # a tuple of (startline,endline) representing the range of lines 177 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 178 # representing the range of positional information for a symbol. 179
180 -class YaccProduction:
181 - def __init__(self,s,stack=None):
182 self.slice = s 183 self.stack = stack 184 self.lexer = None 185 self.parser= None
186 - def __getitem__(self,n):
187 if n >= 0: return self.slice[n].value 188 else: return self.stack[n].value
189
190 - def __setitem__(self,n,v):
191 self.slice[n].value = v
192
193 - def __getslice__(self,i,j):
194 return [s.value for s in self.slice[i:j]]
195
196 - def __len__(self):
197 return len(self.slice)
198
199 - def lineno(self,n):
200 return getattr(self.slice[n],"lineno",0)
201
202 - def set_lineno(self,n,lineno):
203 self.slice[n].lineno = n
204
205 - def linespan(self,n):
206 startline = getattr(self.slice[n],"lineno",0) 207 endline = getattr(self.slice[n],"endlineno",startline) 208 return startline,endline
209
210 - def lexpos(self,n):
211 return getattr(self.slice[n],"lexpos",0)
212
213 - def lexspan(self,n):
214 startpos = getattr(self.slice[n],"lexpos",0) 215 endpos = getattr(self.slice[n],"endlexpos",startpos) 216 return startpos,endpos
217
218 - def error(self):
219 raise SyntaxError
220 221 222 # ----------------------------------------------------------------------------- 223 # == LRParser == 224 # 225 # The LR Parsing engine. 226 # ----------------------------------------------------------------------------- 227
228 -class LRParser:
229 - def __init__(self,lrtab,errorf):
230 self.productions = lrtab.lr_productions 231 self.action = lrtab.lr_action 232 self.goto = lrtab.lr_goto 233 self.errorfunc = errorf
234
235 - def errok(self):
236 self.errorok = 1
237
238 - def restart(self):
239 del self.statestack[:] 240 del self.symstack[:] 241 sym = YaccSymbol() 242 sym.type = '$end' 243 self.symstack.append(sym) 244 self.statestack.append(0)
245
246 - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
247 if debug or yaccdevel: 248 if isinstance(debug,int): 249 debug = PlyLogger(sys.stderr) 250 return self.parsedebug(input,lexer,debug,tracking,tokenfunc) 251 elif tracking: 252 return self.parseopt(input,lexer,debug,tracking,tokenfunc) 253 else: 254 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
255 256 257 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 258 # parsedebug(). 259 # 260 # This is the debugging enabled version of parse(). All changes made to the 261 # parsing engine should be made here. For the non-debugging version, 262 # copy this code to a method parseopt() and delete all of the sections 263 # enclosed in: 264 # 265 # #--! DEBUG 266 # statements 267 # #--! DEBUG 268 # 269 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 270
271 - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
272 lookahead = None # Current lookahead symbol 273 lookaheadstack = [ ] # Stack of lookahead symbols 274 actions = self.action # Local reference to action table (to avoid lookup on self.) 275 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 276 prod = self.productions # Local reference to production list (to avoid lookup on self.) 277 pslice = YaccProduction(None) # Production object passed to grammar rules 278 errorcount = 0 # Used during error recovery 279 280 # --! DEBUG 281 debug.info("PLY: PARSE DEBUG START") 282 # --! DEBUG 283 284 # If no lexer was given, we will try to use the lex module 285 if not lexer: 286 lex = load_ply_lex() 287 lexer = lex.lexer 288 289 # Set up the lexer and parser objects on pslice 290 pslice.lexer = lexer 291 pslice.parser = self 292 293 # If input was supplied, pass to lexer 294 if input is not None: 295 lexer.input(input) 296 297 if tokenfunc is None: 298 # Tokenize function 299 get_token = lexer.token 300 else: 301 get_token = tokenfunc 302 303 # Set up the state and symbol stacks 304 305 statestack = [ ] # Stack of parsing states 306 self.statestack = statestack 307 symstack = [ ] # Stack of grammar symbols 308 self.symstack = symstack 309 310 pslice.stack = symstack # Put in the production 311 errtoken = None # Err token 312 313 # The start state is assumed to be (0,$end) 314 315 statestack.append(0) 316 sym = YaccSymbol() 317 sym.type = "$end" 318 symstack.append(sym) 319 state = 0 320 while 1: 321 # Get the next symbol on the input. If a lookahead symbol 322 # is already set, we just use that. Otherwise, we'll pull 323 # the next token off of the lookaheadstack or from the lexer 324 325 # --! DEBUG 326 debug.debug('') 327 debug.debug('State : %s', state) 328 # --! DEBUG 329 330 if not lookahead: 331 if not lookaheadstack: 332 lookahead = get_token() # Get the next token 333 else: 334 lookahead = lookaheadstack.pop() 335 if not lookahead: 336 lookahead = YaccSymbol() 337 lookahead.type = "$end" 338 339 # --! DEBUG 340 debug.debug('Stack : %s', 341 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 342 # --! DEBUG 343 344 # Check the action table 345 ltype = lookahead.type 346 t = actions[state].get(ltype) 347 348 if t is not None: 349 if t > 0: 350 # shift a symbol on the stack 351 statestack.append(t) 352 state = t 353 354 # --! DEBUG 355 debug.debug("Action : Shift and goto state %s", t) 356 # --! DEBUG 357 358 symstack.append(lookahead) 359 lookahead = None 360 361 # Decrease error count on successful shift 362 if errorcount: errorcount -=1 363 continue 364 365 if t < 0: 366 # reduce a symbol on the stack, emit a production 367 p = prod[-t] 368 pname = p.name 369 plen = p.len 370 371 # Get production function 372 sym = YaccSymbol() 373 sym.type = pname # Production name 374 sym.value = None 375 376 # --! DEBUG 377 if plen: 378 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) 379 else: 380 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) 381 382 # --! DEBUG 383 384 if plen: 385 targ = symstack[-plen-1:] 386 targ[0] = sym 387 388 # --! TRACKING 389 if tracking: 390 t1 = targ[1] 391 sym.lineno = t1.lineno 392 sym.lexpos = t1.lexpos 393 t1 = targ[-1] 394 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 395 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 396 397 # --! TRACKING 398 399 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 400 # The code enclosed in this section is duplicated 401 # below as a performance optimization. Make sure 402 # changes get made in both locations. 403 404 pslice.slice = targ 405 406 try: 407 # Call the grammar rule with our special slice object 408 del symstack[-plen:] 409 del statestack[-plen:] 410 p.callable(pslice) 411 # --! DEBUG 412 debug.info("Result : %s", format_result(pslice[0])) 413 # --! DEBUG 414 symstack.append(sym) 415 state = goto[statestack[-1]][pname] 416 statestack.append(state) 417 except SyntaxError: 418 # If an error was set. Enter error recovery state 419 lookaheadstack.append(lookahead) 420 symstack.pop() 421 statestack.pop() 422 state = statestack[-1] 423 sym.type = 'error' 424 lookahead = sym 425 errorcount = error_count 426 self.errorok = 0 427 continue 428 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 429 430 else: 431 432 # --! TRACKING 433 if tracking: 434 sym.lineno = lexer.lineno 435 sym.lexpos = lexer.lexpos 436 # --! TRACKING 437 438 targ = [ sym ] 439 440 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 441 # The code enclosed in this section is duplicated 442 # above as a performance optimization. Make sure 443 # changes get made in both locations. 444 445 pslice.slice = targ 446 447 try: 448 # Call the grammar rule with our special slice object 449 p.callable(pslice) 450 # --! DEBUG 451 debug.info("Result : %s", format_result(pslice[0])) 452 # --! DEBUG 453 symstack.append(sym) 454 state = goto[statestack[-1]][pname] 455 statestack.append(state) 456 except SyntaxError: 457 # If an error was set. Enter error recovery state 458 lookaheadstack.append(lookahead) 459 symstack.pop() 460 statestack.pop() 461 state = statestack[-1] 462 sym.type = 'error' 463 lookahead = sym 464 errorcount = error_count 465 self.errorok = 0 466 continue 467 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 468 469 if t == 0: 470 n = symstack[-1] 471 result = getattr(n,"value",None) 472 # --! DEBUG 473 debug.info("Done : Returning %s", format_result(result)) 474 debug.info("PLY: PARSE DEBUG END") 475 # --! DEBUG 476 return result 477 478 if t == None: 479 480 # --! DEBUG 481 debug.error('Error : %s', 482 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 483 # --! DEBUG 484 485 # We have some kind of parsing error here. To handle 486 # this, we are going to push the current token onto 487 # the tokenstack and replace it with an 'error' token. 488 # If there are any synchronization rules, they may 489 # catch it. 490 # 491 # In addition to pushing the error token, we call call 492 # the user defined p_error() function if this is the 493 # first syntax error. This function is only called if 494 # errorcount == 0. 495 if errorcount == 0 or self.errorok: 496 errorcount = error_count 497 self.errorok = 0 498 errtoken = lookahead 499 if errtoken.type == "$end": 500 errtoken = None # End of file! 501 if self.errorfunc: 502 global errok,token,restart 503 errok = self.errok # Set some special functions available in error recovery 504 token = get_token 505 restart = self.restart 506 if errtoken and not hasattr(errtoken,'lexer'): 507 errtoken.lexer = lexer 508 tok = self.errorfunc(errtoken) 509 del errok, token, restart # Delete special functions 510 511 if self.errorok: 512 # User must have done some kind of panic 513 # mode recovery on their own. The 514 # returned token is the next lookahead 515 lookahead = tok 516 errtoken = None 517 continue 518 else: 519 if errtoken: 520 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 521 else: lineno = 0 522 if lineno: 523 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 524 else: 525 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 526 else: 527 sys.stderr.write("yacc: Parse error in input. EOF\n") 528 return 529 530 else: 531 errorcount = error_count 532 533 # case 1: the statestack only has 1 entry on it. If we're in this state, the 534 # entire parse has been rolled back and we're completely hosed. The token is 535 # discarded and we just keep going. 536 537 if len(statestack) <= 1 and lookahead.type != "$end": 538 lookahead = None 539 errtoken = None 540 state = 0 541 # Nuke the pushback stack 542 del lookaheadstack[:] 543 continue 544 545 # case 2: the statestack has a couple of entries on it, but we're 546 # at the end of the file. nuke the top entry and generate an error token 547 548 # Start nuking entries on the stack 549 if lookahead.type == "$end": 550 # Whoa. We're really hosed here. Bail out 551 return 552 553 if lookahead.type != 'error': 554 sym = symstack[-1] 555 if sym.type == 'error': 556 # Hmmm. Error is on top of stack, we'll just nuke input 557 # symbol and continue 558 lookahead = None 559 continue 560 t = YaccSymbol() 561 t.type = 'error' 562 if hasattr(lookahead,"lineno"): 563 t.lineno = lookahead.lineno 564 t.value = lookahead 565 lookaheadstack.append(lookahead) 566 lookahead = t 567 else: 568 symstack.pop() 569 statestack.pop() 570 state = statestack[-1] # Potential bug fix 571 572 continue 573 574 # Call an error function here 575 raise RuntimeError("yacc: internal parser error!!!\n")
576 577 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 578 # parseopt(). 579 # 580 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. 581 # Edit the debug version above, then copy any modifications to the method 582 # below while removing #--! DEBUG sections. 583 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 584 585
586 - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
587 lookahead = None # Current lookahead symbol 588 lookaheadstack = [ ] # Stack of lookahead symbols 589 actions = self.action # Local reference to action table (to avoid lookup on self.) 590 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 591 prod = self.productions # Local reference to production list (to avoid lookup on self.) 592 pslice = YaccProduction(None) # Production object passed to grammar rules 593 errorcount = 0 # Used during error recovery 594 595 # If no lexer was given, we will try to use the lex module 596 if not lexer: 597 lex = load_ply_lex() 598 lexer = lex.lexer 599 600 # Set up the lexer and parser objects on pslice 601 pslice.lexer = lexer 602 pslice.parser = self 603 604 # If input was supplied, pass to lexer 605 if input is not None: 606 lexer.input(input) 607 608 if tokenfunc is None: 609 # Tokenize function 610 get_token = lexer.token 611 else: 612 get_token = tokenfunc 613 614 # Set up the state and symbol stacks 615 616 statestack = [ ] # Stack of parsing states 617 self.statestack = statestack 618 symstack = [ ] # Stack of grammar symbols 619 self.symstack = symstack 620 621 pslice.stack = symstack # Put in the production 622 errtoken = None # Err token 623 624 # The start state is assumed to be (0,$end) 625 626 statestack.append(0) 627 sym = YaccSymbol() 628 sym.type = '$end' 629 symstack.append(sym) 630 state = 0 631 while 1: 632 # Get the next symbol on the input. If a lookahead symbol 633 # is already set, we just use that. Otherwise, we'll pull 634 # the next token off of the lookaheadstack or from the lexer 635 636 if not lookahead: 637 if not lookaheadstack: 638 lookahead = get_token() # Get the next token 639 else: 640 lookahead = lookaheadstack.pop() 641 if not lookahead: 642 lookahead = YaccSymbol() 643 lookahead.type = '$end' 644 645 # Check the action table 646 ltype = lookahead.type 647 t = actions[state].get(ltype) 648 649 if t is not None: 650 if t > 0: 651 # shift a symbol on the stack 652 statestack.append(t) 653 state = t 654 655 symstack.append(lookahead) 656 lookahead = None 657 658 # Decrease error count on successful shift 659 if errorcount: errorcount -=1 660 continue 661 662 if t < 0: 663 # reduce a symbol on the stack, emit a production 664 p = prod[-t] 665 pname = p.name 666 plen = p.len 667 668 # Get production function 669 sym = YaccSymbol() 670 sym.type = pname # Production name 671 sym.value = None 672 673 if plen: 674 targ = symstack[-plen-1:] 675 targ[0] = sym 676 677 # --! TRACKING 678 if tracking: 679 t1 = targ[1] 680 sym.lineno = t1.lineno 681 sym.lexpos = t1.lexpos 682 t1 = targ[-1] 683 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 684 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 685 686 # --! TRACKING 687 688 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 689 # The code enclosed in this section is duplicated 690 # below as a performance optimization. Make sure 691 # changes get made in both locations. 692 693 pslice.slice = targ 694 695 try: 696 # Call the grammar rule with our special slice object 697 del symstack[-plen:] 698 del statestack[-plen:] 699 p.callable(pslice) 700 symstack.append(sym) 701 state = goto[statestack[-1]][pname] 702 statestack.append(state) 703 except SyntaxError: 704 # If an error was set. Enter error recovery state 705 lookaheadstack.append(lookahead) 706 symstack.pop() 707 statestack.pop() 708 state = statestack[-1] 709 sym.type = 'error' 710 lookahead = sym 711 errorcount = error_count 712 self.errorok = 0 713 continue 714 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 715 716 else: 717 718 # --! TRACKING 719 if tracking: 720 sym.lineno = lexer.lineno 721 sym.lexpos = lexer.lexpos 722 # --! TRACKING 723 724 targ = [ sym ] 725 726 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 727 # The code enclosed in this section is duplicated 728 # above as a performance optimization. Make sure 729 # changes get made in both locations. 730 731 pslice.slice = targ 732 733 try: 734 # Call the grammar rule with our special slice object 735 p.callable(pslice) 736 symstack.append(sym) 737 state = goto[statestack[-1]][pname] 738 statestack.append(state) 739 except SyntaxError: 740 # If an error was set. Enter error recovery state 741 lookaheadstack.append(lookahead) 742 symstack.pop() 743 statestack.pop() 744 state = statestack[-1] 745 sym.type = 'error' 746 lookahead = sym 747 errorcount = error_count 748 self.errorok = 0 749 continue 750 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 751 752 if t == 0: 753 n = symstack[-1] 754 return getattr(n,"value",None) 755 756 if t == None: 757 758 # We have some kind of parsing error here. To handle 759 # this, we are going to push the current token onto 760 # the tokenstack and replace it with an 'error' token. 761 # If there are any synchronization rules, they may 762 # catch it. 763 # 764 # In addition to pushing the error token, we call call 765 # the user defined p_error() function if this is the 766 # first syntax error. This function is only called if 767 # errorcount == 0. 768 if errorcount == 0 or self.errorok: 769 errorcount = error_count 770 self.errorok = 0 771 errtoken = lookahead 772 if errtoken.type == '$end': 773 errtoken = None # End of file! 774 if self.errorfunc: 775 global errok,token,restart 776 errok = self.errok # Set some special functions available in error recovery 777 token = get_token 778 restart = self.restart 779 if errtoken and not hasattr(errtoken,'lexer'): 780 errtoken.lexer = lexer 781 tok = self.errorfunc(errtoken) 782 del errok, token, restart # Delete special functions 783 784 if self.errorok: 785 # User must have done some kind of panic 786 # mode recovery on their own. The 787 # returned token is the next lookahead 788 lookahead = tok 789 errtoken = None 790 continue 791 else: 792 if errtoken: 793 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 794 else: lineno = 0 795 if lineno: 796 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 797 else: 798 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 799 else: 800 sys.stderr.write("yacc: Parse error in input. EOF\n") 801 return 802 803 else: 804 errorcount = error_count 805 806 # case 1: the statestack only has 1 entry on it. If we're in this state, the 807 # entire parse has been rolled back and we're completely hosed. The token is 808 # discarded and we just keep going. 809 810 if len(statestack) <= 1 and lookahead.type != '$end': 811 lookahead = None 812 errtoken = None 813 state = 0 814 # Nuke the pushback stack 815 del lookaheadstack[:] 816 continue 817 818 # case 2: the statestack has a couple of entries on it, but we're 819 # at the end of the file. nuke the top entry and generate an error token 820 821 # Start nuking entries on the stack 822 if lookahead.type == '$end': 823 # Whoa. We're really hosed here. Bail out 824 return 825 826 if lookahead.type != 'error': 827 sym = symstack[-1] 828 if sym.type == 'error': 829 # Hmmm. Error is on top of stack, we'll just nuke input 830 # symbol and continue 831 lookahead = None 832 continue 833 t = YaccSymbol() 834 t.type = 'error' 835 if hasattr(lookahead,"lineno"): 836 t.lineno = lookahead.lineno 837 t.value = lookahead 838 lookaheadstack.append(lookahead) 839 lookahead = t 840 else: 841 symstack.pop() 842 statestack.pop() 843 state = statestack[-1] # Potential bug fix 844 845 continue 846 847 # Call an error function here 848 raise RuntimeError("yacc: internal parser error!!!\n")
849 850 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 851 # parseopt_notrack(). 852 # 853 # Optimized version of parseopt() with line number tracking removed. 854 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove 855 # code in the #--! TRACKING sections 856 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 857
858 - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
859 lookahead = None # Current lookahead symbol 860 lookaheadstack = [ ] # Stack of lookahead symbols 861 actions = self.action # Local reference to action table (to avoid lookup on self.) 862 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 863 prod = self.productions # Local reference to production list (to avoid lookup on self.) 864 pslice = YaccProduction(None) # Production object passed to grammar rules 865 errorcount = 0 # Used during error recovery 866 867 # If no lexer was given, we will try to use the lex module 868 if not lexer: 869 lex = load_ply_lex() 870 lexer = lex.lexer 871 872 # Set up the lexer and parser objects on pslice 873 pslice.lexer = lexer 874 pslice.parser = self 875 876 # If input was supplied, pass to lexer 877 if input is not None: 878 lexer.input(input) 879 880 if tokenfunc is None: 881 # Tokenize function 882 get_token = lexer.token 883 else: 884 get_token = tokenfunc 885 886 # Set up the state and symbol stacks 887 888 statestack = [ ] # Stack of parsing states 889 self.statestack = statestack 890 symstack = [ ] # Stack of grammar symbols 891 self.symstack = symstack 892 893 pslice.stack = symstack # Put in the production 894 errtoken = None # Err token 895 896 # The start state is assumed to be (0,$end) 897 898 statestack.append(0) 899 sym = YaccSymbol() 900 sym.type = '$end' 901 symstack.append(sym) 902 state = 0 903 while 1: 904 # Get the next symbol on the input. If a lookahead symbol 905 # is already set, we just use that. Otherwise, we'll pull 906 # the next token off of the lookaheadstack or from the lexer 907 908 if not lookahead: 909 if not lookaheadstack: 910 lookahead = get_token() # Get the next token 911 else: 912 lookahead = lookaheadstack.pop() 913 if not lookahead: 914 lookahead = YaccSymbol() 915 lookahead.type = '$end' 916 917 # Check the action table 918 ltype = lookahead.type 919 t = actions[state].get(ltype) 920 921 if t is not None: 922 if t > 0: 923 # shift a symbol on the stack 924 statestack.append(t) 925 state = t 926 927 symstack.append(lookahead) 928 lookahead = None 929 930 # Decrease error count on successful shift 931 if errorcount: errorcount -=1 932 continue 933 934 if t < 0: 935 # reduce a symbol on the stack, emit a production 936 p = prod[-t] 937 pname = p.name 938 plen = p.len 939 940 # Get production function 941 sym = YaccSymbol() 942 sym.type = pname # Production name 943 sym.value = None 944 945 if plen: 946 targ = symstack[-plen-1:] 947 targ[0] = sym 948 949 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 950 # The code enclosed in this section is duplicated 951 # below as a performance optimization. Make sure 952 # changes get made in both locations. 953 954 pslice.slice = targ 955 956 try: 957 # Call the grammar rule with our special slice object 958 del symstack[-plen:] 959 del statestack[-plen:] 960 p.callable(pslice) 961 symstack.append(sym) 962 state = goto[statestack[-1]][pname] 963 statestack.append(state) 964 except SyntaxError: 965 # If an error was set. Enter error recovery state 966 lookaheadstack.append(lookahead) 967 symstack.pop() 968 statestack.pop() 969 state = statestack[-1] 970 sym.type = 'error' 971 lookahead = sym 972 errorcount = error_count 973 self.errorok = 0 974 continue 975 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 976 977 else: 978 979 targ = [ sym ] 980 981 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 982 # The code enclosed in this section is duplicated 983 # above as a performance optimization. Make sure 984 # changes get made in both locations. 985 986 pslice.slice = targ 987 988 try: 989 # Call the grammar rule with our special slice object 990 p.callable(pslice) 991 symstack.append(sym) 992 state = goto[statestack[-1]][pname] 993 statestack.append(state) 994 except SyntaxError: 995 # If an error was set. Enter error recovery state 996 lookaheadstack.append(lookahead) 997 symstack.pop() 998 statestack.pop() 999 state = statestack[-1] 1000 sym.type = 'error' 1001 lookahead = sym 1002 errorcount = error_count 1003 self.errorok = 0 1004 continue 1005 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1006 1007 if t == 0: 1008 n = symstack[-1] 1009 return getattr(n,"value",None) 1010 1011 if t == None: 1012 1013 # We have some kind of parsing error here. To handle 1014 # this, we are going to push the current token onto 1015 # the tokenstack and replace it with an 'error' token. 1016 # If there are any synchronization rules, they may 1017 # catch it. 1018 # 1019 # In addition to pushing the error token, we call call 1020 # the user defined p_error() function if this is the 1021 # first syntax error. This function is only called if 1022 # errorcount == 0. 1023 if errorcount == 0 or self.errorok: 1024 errorcount = error_count 1025 self.errorok = 0 1026 errtoken = lookahead 1027 if errtoken.type == '$end': 1028 errtoken = None # End of file! 1029 if self.errorfunc: 1030 global errok,token,restart 1031 errok = self.errok # Set some special functions available in error recovery 1032 token = get_token 1033 restart = self.restart 1034 if errtoken and not hasattr(errtoken,'lexer'): 1035 errtoken.lexer = lexer 1036 tok = self.errorfunc(errtoken) 1037 del errok, token, restart # Delete special functions 1038 1039 if self.errorok: 1040 # User must have done some kind of panic 1041 # mode recovery on their own. The 1042 # returned token is the next lookahead 1043 lookahead = tok 1044 errtoken = None 1045 continue 1046 else: 1047 if errtoken: 1048 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 1049 else: lineno = 0 1050 if lineno: 1051 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 1052 else: 1053 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 1054 else: 1055 sys.stderr.write("yacc: Parse error in input. EOF\n") 1056 return 1057 1058 else: 1059 errorcount = error_count 1060 1061 # case 1: the statestack only has 1 entry on it. If we're in this state, the 1062 # entire parse has been rolled back and we're completely hosed. The token is 1063 # discarded and we just keep going. 1064 1065 if len(statestack) <= 1 and lookahead.type != '$end': 1066 lookahead = None 1067 errtoken = None 1068 state = 0 1069 # Nuke the pushback stack 1070 del lookaheadstack[:] 1071 continue 1072 1073 # case 2: the statestack has a couple of entries on it, but we're 1074 # at the end of the file. nuke the top entry and generate an error token 1075 1076 # Start nuking entries on the stack 1077 if lookahead.type == '$end': 1078 # Whoa. We're really hosed here. Bail out 1079 return 1080 1081 if lookahead.type != 'error': 1082 sym = symstack[-1] 1083 if sym.type == 'error': 1084 # Hmmm. Error is on top of stack, we'll just nuke input 1085 # symbol and continue 1086 lookahead = None 1087 continue 1088 t = YaccSymbol() 1089 t.type = 'error' 1090 if hasattr(lookahead,"lineno"): 1091 t.lineno = lookahead.lineno 1092 t.value = lookahead 1093 lookaheadstack.append(lookahead) 1094 lookahead = t 1095 else: 1096 symstack.pop() 1097 statestack.pop() 1098 state = statestack[-1] # Potential bug fix 1099 1100 continue 1101 1102 # Call an error function here 1103 raise RuntimeError("yacc: internal parser error!!!\n")
1104 1105 # ----------------------------------------------------------------------------- 1106 # === Grammar Representation === 1107 # 1108 # The following functions, classes, and variables are used to represent and 1109 # manipulate the rules that make up a grammar. 1110 # ----------------------------------------------------------------------------- 1111 1112 import re 1113 1114 # regex matching identifiers 1115 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1116 1117 # ----------------------------------------------------------------------------- 1118 # class Production: 1119 # 1120 # This class stores the raw information about a single production or grammar rule. 1121 # A grammar rule refers to a specification such as this: 1122 # 1123 # expr : expr PLUS term 1124 # 1125 # Here are the basic attributes defined on all productions 1126 # 1127 # name - Name of the production. For example 'expr' 1128 # prod - A list of symbols on the right side ['expr','PLUS','term'] 1129 # prec - Production precedence level 1130 # number - Production number. 1131 # func - Function that executes on reduce 1132 # file - File where production function is defined 1133 # lineno - Line number where production function is defined 1134 # 1135 # The following attributes are defined or optional. 1136 # 1137 # len - Length of the production (number of symbols on right hand side) 1138 # usyms - Set of unique symbols found in the production 1139 # ----------------------------------------------------------------------------- 1140
1141 -class Production(object):
1142 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1143 self.name = name 1144 self.prod = tuple(prod) 1145 self.number = number 1146 self.func = func 1147 self.callable = None 1148 self.file = file 1149 self.line = line 1150 self.prec = precedence 1151 1152 # Internal settings used during table construction 1153 1154 self.len = len(self.prod) # Length of the production 1155 1156 # Create a list of unique production symbols used in the production 1157 self.usyms = [ ] 1158 for s in self.prod: 1159 if s not in self.usyms: 1160 self.usyms.append(s) 1161 1162 # List of all LR items for the production 1163 self.lr_items = [] 1164 self.lr_next = None 1165 1166 # Create a string representation 1167 if self.prod: 1168 self.str = "%s -> %s" % (self.name," ".join(self.prod)) 1169 else: 1170 self.str = "%s -> <empty>" % self.name
1171
1172 - def __str__(self):
1173 return self.str
1174
1175 - def __repr__(self):
1176 return "Production("+str(self)+")"
1177
1178 - def __len__(self):
1179 return len(self.prod)
1180
1181 - def __nonzero__(self):
1182 return 1
1183
1184 - def __getitem__(self,index):
1185 return self.prod[index]
1186 1187 # Return the nth lr_item from the production (or None if at the end)
1188 - def lr_item(self,n):
1189 if n > len(self.prod): return None 1190 p = LRItem(self,n) 1191 1192 # Precompute the list of productions immediately following. Hack. Remove later 1193 try: 1194 p.lr_after = Prodnames[p.prod[n+1]] 1195 except (IndexError,KeyError): 1196 p.lr_after = [] 1197 try: 1198 p.lr_before = p.prod[n-1] 1199 except IndexError: 1200 p.lr_before = None 1201 1202 return p
1203 1204 # Bind the production function name to a callable
1205 - def bind(self,pdict):
1206 if self.func: 1207 self.callable = pdict[self.func]
1208 1209 # This class serves as a minimal standin for Production objects when 1210 # reading table data from files. It only contains information 1211 # actually used by the LR parsing engine, plus some additional 1212 # debugging information.
1213 -class MiniProduction(object):
1214 - def __init__(self,str,name,len,func,file,line):
1215 self.name = name 1216 self.len = len 1217 self.func = func 1218 self.callable = None 1219 self.file = file 1220 self.line = line 1221 self.str = str
1222 - def __str__(self):
1223 return self.str
1224 - def __repr__(self):
1225 return "MiniProduction(%s)" % self.str
1226 1227 # Bind the production function name to a callable
1228 - def bind(self,pdict):
1229 if self.func: 1230 self.callable = pdict[self.func]
1231 1232 1233 # ----------------------------------------------------------------------------- 1234 # class LRItem 1235 # 1236 # This class represents a specific stage of parsing a production rule. For 1237 # example: 1238 # 1239 # expr : expr . PLUS term 1240 # 1241 # In the above, the "." represents the current location of the parse. Here 1242 # basic attributes: 1243 # 1244 # name - Name of the production. For example 'expr' 1245 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1246 # number - Production number. 1247 # 1248 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1249 # then lr_next refers to 'expr -> expr PLUS . term' 1250 # lr_index - LR item index (location of the ".") in the prod list. 1251 # lookaheads - LALR lookahead symbols for this item 1252 # len - Length of the production (number of symbols on right hand side) 1253 # lr_after - List of all productions that immediately follow 1254 # lr_before - Grammar symbol immediately before 1255 # ----------------------------------------------------------------------------- 1256
1257 -class LRItem(object):
1258 - def __init__(self,p,n):
1259 self.name = p.name 1260 self.prod = list(p.prod) 1261 self.number = p.number 1262 self.lr_index = n 1263 self.lookaheads = { } 1264 self.prod.insert(n,".") 1265 self.prod = tuple(self.prod) 1266 self.len = len(self.prod) 1267 self.usyms = p.usyms
1268
1269 - def __str__(self):
1270 if self.prod: 1271 s = "%s -> %s" % (self.name," ".join(self.prod)) 1272 else: 1273 s = "%s -> <empty>" % self.name 1274 return s
1275
1276 - def __repr__(self):
1277 return "LRItem("+str(self)+")"
1278
1279 - def __len__(self):
1280 return len(self.prod)
1281
1282 - def __getitem__(self,index):
1283 return self.prod[index]
1284 1285 # ----------------------------------------------------------------------------- 1286 # rightmost_terminal() 1287 # 1288 # Return the rightmost terminal from a list of symbols. Used in add_production() 1289 # -----------------------------------------------------------------------------
1290 -def rightmost_terminal(symbols, terminals):
1291 i = len(symbols) - 1 1292 while i >= 0: 1293 if symbols[i] in terminals: 1294 return symbols[i] 1295 i -= 1 1296 return None
1297 1298 # ----------------------------------------------------------------------------- 1299 # === GRAMMAR CLASS === 1300 # 1301 # The following class represents the contents of the specified grammar along 1302 # with various computed properties such as first sets, follow sets, LR items, etc. 1303 # This data is used for critical parts of the table generation process later. 1304 # ----------------------------------------------------------------------------- 1305
1306 -class GrammarError(YaccError): pass
1307
1308 -class Grammar(object):
1309 - def __init__(self,terminals):
1310 self.Productions = [None] # A list of all of the productions. The first 1311 # entry is always reserved for the purpose of 1312 # building an augmented grammar 1313 1314 self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 1315 # productions of that nonterminal. 1316 1317 self.Prodmap = { } # A dictionary that is only used to detect duplicate 1318 # productions. 1319 1320 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a 1321 # list of the rules where they are used. 1322 1323 for term in terminals: 1324 self.Terminals[term] = [] 1325 1326 self.Terminals['error'] = [] 1327 1328 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list 1329 # of rule numbers where they are used. 1330 1331 self.First = { } # A dictionary of precomputed FIRST(x) symbols 1332 1333 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 1334 1335 self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the 1336 # form ('right',level) or ('nonassoc', level) or ('left',level) 1337 1338 self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. 1339 # This is only used to provide error checking and to generate 1340 # a warning about unused precedence rules. 1341 1342 self.Start = None # Starting symbol for the grammar
1343 1344
1345 - def __len__(self):
1346 return len(self.Productions)
1347
1348 - def __getitem__(self,index):
1349 return self.Productions[index]
1350 1351 # ----------------------------------------------------------------------------- 1352 # set_precedence() 1353 # 1354 # Sets the precedence for a given terminal. assoc is the associativity such as 1355 # 'left','right', or 'nonassoc'. level is a numeric level. 1356 # 1357 # ----------------------------------------------------------------------------- 1358
1359 - def set_precedence(self,term,assoc,level):
1360 assert self.Productions == [None],"Must call set_precedence() before add_production()" 1361 if term in self.Precedence: 1362 raise GrammarError("Precedence already specified for terminal '%s'" % term) 1363 if assoc not in ['left','right','nonassoc']: 1364 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1365 self.Precedence[term] = (assoc,level)
1366 1367 # ----------------------------------------------------------------------------- 1368 # add_production() 1369 # 1370 # Given an action function, this function assembles a production rule and 1371 # computes its precedence level. 1372 # 1373 # The production rule is supplied as a list of symbols. For example, 1374 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1375 # symbols ['expr','PLUS','term']. 1376 # 1377 # Precedence is determined by the precedence of the right-most non-terminal 1378 # or the precedence of a terminal specified by %prec. 1379 # 1380 # A variety of error checks are performed to make sure production symbols 1381 # are valid and that %prec is used correctly. 1382 # ----------------------------------------------------------------------------- 1383
1384 - def add_production(self,prodname,syms,func=None,file='',line=0):
1385 1386 if prodname in self.Terminals: 1387 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) 1388 if prodname == 'error': 1389 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) 1390 if not _is_identifier.match(prodname): 1391 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) 1392 1393 # Look for literal tokens 1394 for n,s in enumerate(syms): 1395 if s[0] in "'\"": 1396 try: 1397 c = eval(s) 1398 if (len(c) > 1): 1399 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) 1400 if not c in self.Terminals: 1401 self.Terminals[c] = [] 1402 syms[n] = c 1403 continue 1404 except SyntaxError: 1405 pass 1406 if not _is_identifier.match(s) and s != '%prec': 1407 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) 1408 1409 # Determine the precedence level 1410 if '%prec' in syms: 1411 if syms[-1] == '%prec': 1412 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) 1413 if syms[-2] != '%prec': 1414 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) 1415 precname = syms[-1] 1416 prodprec = self.Precedence.get(precname,None) 1417 if not prodprec: 1418 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) 1419 else: 1420 self.UsedPrecedence[precname] = 1 1421 del syms[-2:] # Drop %prec from the rule 1422 else: 1423 # If no %prec, precedence is determined by the rightmost terminal symbol 1424 precname = rightmost_terminal(syms,self.Terminals) 1425 prodprec = self.Precedence.get(precname,('right',0)) 1426 1427 # See if the rule is already in the rulemap 1428 map = "%s -> %s" % (prodname,syms) 1429 if map in self.Prodmap: 1430 m = self.Prodmap[map] 1431 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + 1432 "Previous definition at %s:%d" % (file,line, m.file, m.line)) 1433 1434 # From this point on, everything is valid. Create a new Production instance 1435 pnumber = len(self.Productions) 1436 if not prodname in self.Nonterminals: 1437 self.Nonterminals[prodname] = [ ] 1438 1439 # Add the production number to Terminals and Nonterminals 1440 for t in syms: 1441 if t in self.Terminals: 1442 self.Terminals[t].append(pnumber) 1443 else: 1444 if not t in self.Nonterminals: 1445 self.Nonterminals[t] = [ ] 1446 self.Nonterminals[t].append(pnumber) 1447 1448 # Create a production and add it to the list of productions 1449 p = Production(pnumber,prodname,syms,prodprec,func,file,line) 1450 self.Productions.append(p) 1451 self.Prodmap[map] = p 1452 1453 # Add to the global productions list 1454 try: 1455 self.Prodnames[prodname].append(p) 1456 except KeyError: 1457 self.Prodnames[prodname] = [ p ] 1458 return 0
1459 1460 # ----------------------------------------------------------------------------- 1461 # set_start() 1462 # 1463 # Sets the starting symbol and creates the augmented grammar. Production 1464 # rule 0 is S' -> start where start is the start symbol. 1465 # ----------------------------------------------------------------------------- 1466
1467 - def set_start(self,start=None):
1468 if not start: 1469 start = self.Productions[1].name 1470 if start not in self.Nonterminals: 1471 raise GrammarError("start symbol %s undefined" % start) 1472 self.Productions[0] = Production(0,"S'",[start]) 1473 self.Nonterminals[start].append(0) 1474 self.Start = start
1475 1476 # ----------------------------------------------------------------------------- 1477 # find_unreachable() 1478 # 1479 # Find all of the nonterminal symbols that can't be reached from the starting 1480 # symbol. Returns a list of nonterminals that can't be reached. 1481 # ----------------------------------------------------------------------------- 1482
1483 - def find_unreachable(self):
1484 1485 # Mark all symbols that are reachable from a symbol s 1486 def mark_reachable_from(s): 1487 if reachable[s]: 1488 # We've already reached symbol s. 1489 return 1490 reachable[s] = 1 1491 for p in self.Prodnames.get(s,[]): 1492 for r in p.prod: 1493 mark_reachable_from(r)
1494 1495 reachable = { } 1496 for s in list(self.Terminals) + list(self.Nonterminals): 1497 reachable[s] = 0 1498 1499 mark_reachable_from( self.Productions[0].prod[0] ) 1500 1501 return [s for s in list(self.Nonterminals) 1502 if not reachable[s]]
1503 1504 # ----------------------------------------------------------------------------- 1505 # infinite_cycles() 1506 # 1507 # This function looks at the various parsing rules and tries to detect 1508 # infinite recursion cycles (grammar rules where there is no possible way 1509 # to derive a string of only terminals). 1510 # ----------------------------------------------------------------------------- 1511
1512 - def infinite_cycles(self):
1513 terminates = {} 1514 1515 # Terminals: 1516 for t in self.Terminals: 1517 terminates[t] = 1 1518 1519 terminates['$end'] = 1 1520 1521 # Nonterminals: 1522 1523 # Initialize to false: 1524 for n in self.Nonterminals: 1525 terminates[n] = 0 1526 1527 # Then propagate termination until no change: 1528 while 1: 1529 some_change = 0 1530 for (n,pl) in self.Prodnames.items(): 1531 # Nonterminal n terminates iff any of its productions terminates. 1532 for p in pl: 1533 # Production p terminates iff all of its rhs symbols terminate. 1534 for s in p.prod: 1535 if not terminates[s]: 1536 # The symbol s does not terminate, 1537 # so production p does not terminate. 1538 p_terminates = 0 1539 break 1540 else: 1541 # didn't break from the loop, 1542 # so every symbol s terminates 1543 # so production p terminates. 1544 p_terminates = 1 1545 1546 if p_terminates: 1547 # symbol n terminates! 1548 if not terminates[n]: 1549 terminates[n] = 1 1550 some_change = 1 1551 # Don't need to consider any more productions for this n. 1552 break 1553 1554 if not some_change: 1555 break 1556 1557 infinite = [] 1558 for (s,term) in terminates.items(): 1559 if not term: 1560 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1561 # s is used-but-not-defined, and we've already warned of that, 1562 # so it would be overkill to say that it's also non-terminating. 1563 pass 1564 else: 1565 infinite.append(s) 1566 1567 return infinite
1568 1569 1570 # ----------------------------------------------------------------------------- 1571 # undefined_symbols() 1572 # 1573 # Find all symbols that were used the grammar, but not defined as tokens or 1574 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1575 # and prod is the production where the symbol was used. 1576 # -----------------------------------------------------------------------------
1577 - def undefined_symbols(self):
1578 result = [] 1579 for p in self.Productions: 1580 if not p: continue 1581 1582 for s in p.prod: 1583 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1584 result.append((s,p)) 1585 return result
1586 1587 # ----------------------------------------------------------------------------- 1588 # unused_terminals() 1589 # 1590 # Find all terminals that were defined, but not used by the grammar. Returns 1591 # a list of all symbols. 1592 # -----------------------------------------------------------------------------
1593 - def unused_terminals(self):
1594 unused_tok = [] 1595 for s,v in self.Terminals.items(): 1596 if s != 'error' and not v: 1597 unused_tok.append(s) 1598 1599 return unused_tok
1600 1601 # ------------------------------------------------------------------------------ 1602 # unused_rules() 1603 # 1604 # Find all grammar rules that were defined, but not used (maybe not reachable) 1605 # Returns a list of productions. 1606 # ------------------------------------------------------------------------------ 1607
1608 - def unused_rules(self):
1609 unused_prod = [] 1610 for s,v in self.Nonterminals.items(): 1611 if not v: 1612 p = self.Prodnames[s][0] 1613 unused_prod.append(p) 1614 return unused_prod
1615 1616 # ----------------------------------------------------------------------------- 1617 # unused_precedence() 1618 # 1619 # Returns a list of tuples (term,precedence) corresponding to precedence 1620 # rules that were never used by the grammar. term is the name of the terminal 1621 # on which precedence was applied and precedence is a string such as 'left' or 1622 # 'right' corresponding to the type of precedence. 1623 # ----------------------------------------------------------------------------- 1624
1625 - def unused_precedence(self):
1626 unused = [] 1627 for termname in self.Precedence: 1628 if not (termname in self.Terminals or termname in self.UsedPrecedence): 1629 unused.append((termname,self.Precedence[termname][0])) 1630 1631 return unused
1632 1633 # ------------------------------------------------------------------------- 1634 # _first() 1635 # 1636 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1637 # 1638 # During execution of compute_first1, the result may be incomplete. 1639 # Afterward (e.g., when called from compute_follow()), it will be complete. 1640 # -------------------------------------------------------------------------
1641 - def _first(self,beta):
1642 1643 # We are computing First(x1,x2,x3,...,xn) 1644 result = [ ] 1645 for x in beta: 1646 x_produces_empty = 0 1647 1648 # Add all the non-<empty> symbols of First[x] to the result. 1649 for f in self.First[x]: 1650 if f == '<empty>': 1651 x_produces_empty = 1 1652 else: 1653 if f not in result: result.append(f) 1654 1655 if x_produces_empty: 1656 # We have to consider the next x in beta, 1657 # i.e. stay in the loop. 1658 pass 1659 else: 1660 # We don't have to consider any further symbols in beta. 1661 break 1662 else: 1663 # There was no 'break' from the loop, 1664 # so x_produces_empty was true for all x in beta, 1665 # so beta produces empty as well. 1666 result.append('<empty>') 1667 1668 return result
1669 1670 # ------------------------------------------------------------------------- 1671 # compute_first() 1672 # 1673 # Compute the value of FIRST1(X) for all symbols 1674 # -------------------------------------------------------------------------
1675 - def compute_first(self):
1676 if self.First: 1677 return self.First 1678 1679 # Terminals: 1680 for t in self.Terminals: 1681 self.First[t] = [t] 1682 1683 self.First['$end'] = ['$end'] 1684 1685 # Nonterminals: 1686 1687 # Initialize to the empty set: 1688 for n in self.Nonterminals: 1689 self.First[n] = [] 1690 1691 # Then propagate symbols until no change: 1692 while 1: 1693 some_change = 0 1694 for n in self.Nonterminals: 1695 for p in self.Prodnames[n]: 1696 for f in self._first(p.prod): 1697 if f not in self.First[n]: 1698 self.First[n].append( f ) 1699 some_change = 1 1700 if not some_change: 1701 break 1702 1703 return self.First
1704 1705 # --------------------------------------------------------------------- 1706 # compute_follow() 1707 # 1708 # Computes all of the follow sets for every non-terminal symbol. The 1709 # follow set is the set of all symbols that might follow a given 1710 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1711 # ---------------------------------------------------------------------
1712 - def compute_follow(self,start=None):
1713 # If already computed, return the result 1714 if self.Follow: 1715 return self.Follow 1716 1717 # If first sets not computed yet, do that first. 1718 if not self.First: 1719 self.compute_first() 1720 1721 # Add '$end' to the follow list of the start symbol 1722 for k in self.Nonterminals: 1723 self.Follow[k] = [ ] 1724 1725 if not start: 1726 start = self.Productions[1].name 1727 1728 self.Follow[start] = [ '$end' ] 1729 1730 while 1: 1731 didadd = 0 1732 for p in self.Productions[1:]: 1733 # Here is the production set 1734 for i in range(len(p.prod)): 1735 B = p.prod[i] 1736 if B in self.Nonterminals: 1737 # Okay. We got a non-terminal in a production 1738 fst = self._first(p.prod[i+1:]) 1739 hasempty = 0 1740 for f in fst: 1741 if f != '<empty>' and f not in self.Follow[B]: 1742 self.Follow[B].append(f) 1743 didadd = 1 1744 if f == '<empty>': 1745 hasempty = 1 1746 if hasempty or i == (len(p.prod)-1): 1747 # Add elements of follow(a) to follow(b) 1748 for f in self.Follow[p.name]: 1749 if f not in self.Follow[B]: 1750 self.Follow[B].append(f) 1751 didadd = 1 1752 if not didadd: break 1753 return self.Follow
1754 1755 1756 # ----------------------------------------------------------------------------- 1757 # build_lritems() 1758 # 1759 # This function walks the list of productions and builds a complete set of the 1760 # LR items. The LR items are stored in two ways: First, they are uniquely 1761 # numbered and placed in the list _lritems. Second, a linked list of LR items 1762 # is built for each production. For example: 1763 # 1764 # E -> E PLUS E 1765 # 1766 # Creates the list 1767 # 1768 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1769 # ----------------------------------------------------------------------------- 1770
1771 - def build_lritems(self):
1772 for p in self.Productions: 1773 lastlri = p 1774 i = 0 1775 lr_items = [] 1776 while 1: 1777 if i > len(p): 1778 lri = None 1779 else: 1780 lri = LRItem(p,i) 1781 # Precompute the list of productions immediately following 1782 try: 1783 lri.lr_after = self.Prodnames[lri.prod[i+1]] 1784 except (IndexError,KeyError): 1785 lri.lr_after = [] 1786 try: 1787 lri.lr_before = lri.prod[i-1] 1788 except IndexError: 1789 lri.lr_before = None 1790 1791 lastlri.lr_next = lri 1792 if not lri: break 1793 lr_items.append(lri) 1794 lastlri = lri 1795 i += 1 1796 p.lr_items = lr_items
1797 1798 # ----------------------------------------------------------------------------- 1799 # == Class LRTable == 1800 # 1801 # This basic class represents a basic table of LR parsing information. 1802 # Methods for generating the tables are not defined here. They are defined 1803 # in the derived class LRGeneratedTable. 1804 # ----------------------------------------------------------------------------- 1805
1806 -class VersionError(YaccError): pass
1807
1808 -class LRTable(object):
1809 - def __init__(self):
1810 self.lr_action = None 1811 self.lr_goto = None 1812 self.lr_productions = None 1813 self.lr_method = None
1814
1815 - def read_table(self,module):
1816 if isinstance(module,types.ModuleType): 1817 parsetab = module 1818 else: 1819 if sys.version_info[0] < 3: 1820 exec("import %s as parsetab" % module) 1821 else: 1822 env = { } 1823 exec("import %s as parsetab" % module, env, env) 1824 parsetab = env['parsetab'] 1825 1826 if parsetab._tabversion != __tabversion__: 1827 raise VersionError("yacc table file version is out of date") 1828 1829 self.lr_action = parsetab._lr_action 1830 self.lr_goto = parsetab._lr_goto 1831 1832 self.lr_productions = [] 1833 for p in parsetab._lr_productions: 1834 self.lr_productions.append(MiniProduction(*p)) 1835 1836 self.lr_method = parsetab._lr_method 1837 return parsetab._lr_signature
1838 1839 # Bind all production function names to callable objects in pdict
1840 - def bind_callables(self,pdict):
1841 for p in self.lr_productions: 1842 p.bind(pdict)
1843 1844 # ----------------------------------------------------------------------------- 1845 # === LR Generator === 1846 # 1847 # The following classes and functions are used to generate LR parsing tables on 1848 # a grammar. 1849 # ----------------------------------------------------------------------------- 1850 1851 # ----------------------------------------------------------------------------- 1852 # digraph() 1853 # traverse() 1854 # 1855 # The following two functions are used to compute set valued functions 1856 # of the form: 1857 # 1858 # F(x) = F'(x) U U{F(y) | x R y} 1859 # 1860 # This is used to compute the values of Read() sets as well as FOLLOW sets 1861 # in LALR(1) generation. 1862 # 1863 # Inputs: X - An input set 1864 # R - A relation 1865 # FP - Set-valued function 1866 # ------------------------------------------------------------------------------ 1867
1868 -def digraph(X,R,FP):
1869 N = { } 1870 for x in X: 1871 N[x] = 0 1872 stack = [] 1873 F = { } 1874 for x in X: 1875 if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1876 return F
1877
1878 -def traverse(x,N,stack,F,X,R,FP):
1879 stack.append(x) 1880 d = len(stack) 1881 N[x] = d 1882 F[x] = FP(x) # F(X) <- F'(x) 1883 1884 rel = R(x) # Get y's related to x 1885 for y in rel: 1886 if N[y] == 0: 1887 traverse(y,N,stack,F,X,R,FP) 1888 N[x] = min(N[x],N[y]) 1889 for a in F.get(y,[]): 1890 if a not in F[x]: F[x].append(a) 1891 if N[x] == d: 1892 N[stack[-1]] = MAXINT 1893 F[stack[-1]] = F[x] 1894 element = stack.pop() 1895 while element != x: 1896 N[stack[-1]] = MAXINT 1897 F[stack[-1]] = F[x] 1898 element = stack.pop()
1899
1900 -class LALRError(YaccError): pass
1901 1902 # ----------------------------------------------------------------------------- 1903 # == LRGeneratedTable == 1904 # 1905 # This class implements the LR table generation algorithm. There are no 1906 # public methods except for write() 1907 # ----------------------------------------------------------------------------- 1908
1909 -class LRGeneratedTable(LRTable):
1910 - def __init__(self,grammar,method='LALR',log=None):
1911 if method not in ['SLR','LALR']: 1912 raise LALRError("Unsupported method %s" % method) 1913 1914 self.grammar = grammar 1915 self.lr_method = method 1916 1917 # Set up the logger 1918 if not log: 1919 log = NullLogger() 1920 self.log = log 1921 1922 # Internal attributes 1923 self.lr_action = {} # Action table 1924 self.lr_goto = {} # Goto table 1925 self.lr_productions = grammar.Productions # Copy of grammar Production array 1926 self.lr_goto_cache = {} # Cache of computed gotos 1927 self.lr0_cidhash = {} # Cache of closures 1928 1929 self._add_count = 0 # Internal counter used to detect cycles 1930 1931 # Diagonistic information filled in by the table generator 1932 self.sr_conflict = 0 1933 self.rr_conflict = 0 1934 self.conflicts = [] # List of conflicts 1935 1936 self.sr_conflicts = [] 1937 self.rr_conflicts = [] 1938 1939 # Build the tables 1940 self.grammar.build_lritems() 1941 self.grammar.compute_first() 1942 self.grammar.compute_follow() 1943 self.lr_parse_table()
1944 1945 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1946
1947 - def lr0_closure(self,I):
1948 self._add_count += 1 1949 1950 # Add everything in I to J 1951 J = I[:] 1952 didadd = 1 1953 while didadd: 1954 didadd = 0 1955 for j in J: 1956 for x in j.lr_after: 1957 if getattr(x,"lr0_added",0) == self._add_count: continue 1958 # Add B --> .G to J 1959 J.append(x.lr_next) 1960 x.lr0_added = self._add_count 1961 didadd = 1 1962 1963 return J
1964 1965 # Compute the LR(0) goto function goto(I,X) where I is a set 1966 # of LR(0) items and X is a grammar symbol. This function is written 1967 # in a way that guarantees uniqueness of the generated goto sets 1968 # (i.e. the same goto set will never be returned as two different Python 1969 # objects). With uniqueness, we can later do fast set comparisons using 1970 # id(obj) instead of element-wise comparison. 1971
1972 - def lr0_goto(self,I,x):
1973 # First we look for a previously cached entry 1974 g = self.lr_goto_cache.get((id(I),x),None) 1975 if g: return g 1976 1977 # Now we generate the goto set in a way that guarantees uniqueness 1978 # of the result 1979 1980 s = self.lr_goto_cache.get(x,None) 1981 if not s: 1982 s = { } 1983 self.lr_goto_cache[x] = s 1984 1985 gs = [ ] 1986 for p in I: 1987 n = p.lr_next 1988 if n and n.lr_before == x: 1989 s1 = s.get(id(n),None) 1990 if not s1: 1991 s1 = { } 1992 s[id(n)] = s1 1993 gs.append(n) 1994 s = s1 1995 g = s.get('$end',None) 1996 if not g: 1997 if gs: 1998 g = self.lr0_closure(gs) 1999 s['$end'] = g 2000 else: 2001 s['$end'] = gs 2002 self.lr_goto_cache[(id(I),x)] = g 2003 return g
2004 2005 # Compute the LR(0) sets of item function
2006 - def lr0_items(self):
2007 2008 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] 2009 i = 0 2010 for I in C: 2011 self.lr0_cidhash[id(I)] = i 2012 i += 1 2013 2014 # Loop over the items in C and each grammar symbols 2015 i = 0 2016 while i < len(C): 2017 I = C[i] 2018 i += 1 2019 2020 # Collect all of the symbols that could possibly be in the goto(I,X) sets 2021 asyms = { } 2022 for ii in I: 2023 for s in ii.usyms: 2024 asyms[s] = None 2025 2026 for x in asyms: 2027 g = self.lr0_goto(I,x) 2028 if not g: continue 2029 if id(g) in self.lr0_cidhash: continue 2030 self.lr0_cidhash[id(g)] = len(C) 2031 C.append(g) 2032 2033 return C
2034 2035 # ----------------------------------------------------------------------------- 2036 # ==== LALR(1) Parsing ==== 2037 # 2038 # LALR(1) parsing is almost exactly the same as SLR except that instead of 2039 # relying upon Follow() sets when performing reductions, a more selective 2040 # lookahead set that incorporates the state of the LR(0) machine is utilized. 2041 # Thus, we mainly just have to focus on calculating the lookahead sets. 2042 # 2043 # The method used here is due to DeRemer and Pennelo (1982). 2044 # 2045 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2046 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2047 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2048 # 2049 # Further details can also be found in: 2050 # 2051 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2052 # McGraw-Hill Book Company, (1985). 2053 # 2054 # ----------------------------------------------------------------------------- 2055 2056 # ----------------------------------------------------------------------------- 2057 # compute_nullable_nonterminals() 2058 # 2059 # Creates a dictionary containing all of the non-terminals that might produce 2060 # an empty production. 2061 # ----------------------------------------------------------------------------- 2062
2064 nullable = {} 2065 num_nullable = 0 2066 while 1: 2067 for p in self.grammar.Productions[1:]: 2068 if p.len == 0: 2069 nullable[p.name] = 1 2070 continue 2071 for t in p.prod: 2072 if not t in nullable: break 2073 else: 2074 nullable[p.name] = 1 2075 if len(nullable) == num_nullable: break 2076 num_nullable = len(nullable) 2077 return nullable
2078 2079 # ----------------------------------------------------------------------------- 2080 # find_nonterminal_trans(C) 2081 # 2082 # Given a set of LR(0) items, this functions finds all of the non-terminal 2083 # transitions. These are transitions in which a dot appears immediately before 2084 # a non-terminal. Returns a list of tuples of the form (state,N) where state 2085 # is the state number and N is the nonterminal symbol. 2086 # 2087 # The input C is the set of LR(0) items. 2088 # ----------------------------------------------------------------------------- 2089
2090 - def find_nonterminal_transitions(self,C):
2091 trans = [] 2092 for state in range(len(C)): 2093 for p in C[state]: 2094 if p.lr_index < p.len - 1: 2095 t = (state,p.prod[p.lr_index+1]) 2096 if t[1] in self.grammar.Nonterminals: 2097 if t not in trans: trans.append(t) 2098 state = state + 1 2099 return trans
2100 2101 # ----------------------------------------------------------------------------- 2102 # dr_relation() 2103 # 2104 # Computes the DR(p,A) relationships for non-terminal transitions. The input 2105 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2106 # 2107 # Returns a list of terminals. 2108 # ----------------------------------------------------------------------------- 2109
2110 - def dr_relation(self,C,trans,nullable):
2111 dr_set = { } 2112 state,N = trans 2113 terms = [] 2114 2115 g = self.lr0_goto(C[state],N) 2116 for p in g: 2117 if p.lr_index < p.len - 1: 2118 a = p.prod[p.lr_index+1] 2119 if a in self.grammar.Terminals: 2120 if a not in terms: terms.append(a) 2121 2122 # This extra bit is to handle the start state 2123 if state == 0 and N == self.grammar.Productions[0].prod[0]: 2124 terms.append('$end') 2125 2126 return terms
2127 2128 # ----------------------------------------------------------------------------- 2129 # reads_relation() 2130 # 2131 # Computes the READS() relation (p,A) READS (t,C). 2132 # ----------------------------------------------------------------------------- 2133
2134 - def reads_relation(self,C, trans, empty):
2135 # Look for empty transitions 2136 rel = [] 2137 state, N = trans 2138 2139 g = self.lr0_goto(C[state],N) 2140 j = self.lr0_cidhash.get(id(g),-1) 2141 for p in g: 2142 if p.lr_index < p.len - 1: 2143 a = p.prod[p.lr_index + 1] 2144 if a in empty: 2145 rel.append((j,a)) 2146 2147 return rel
2148 2149 # ----------------------------------------------------------------------------- 2150 # compute_lookback_includes() 2151 # 2152 # Determines the lookback and includes relations 2153 # 2154 # LOOKBACK: 2155 # 2156 # This relation is determined by running the LR(0) state machine forward. 2157 # For example, starting with a production "N : . A B C", we run it forward 2158 # to obtain "N : A B C ." We then build a relationship between this final 2159 # state and the starting state. These relationships are stored in a dictionary 2160 # lookdict. 2161 # 2162 # INCLUDES: 2163 # 2164 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2165 # 2166 # This relation is used to determine non-terminal transitions that occur 2167 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2168 # if the following holds: 2169 # 2170 # B -> LAT, where T -> epsilon and p' -L-> p 2171 # 2172 # L is essentially a prefix (which may be empty), T is a suffix that must be 2173 # able to derive an empty string. State p' must lead to state p with the string L. 2174 # 2175 # ----------------------------------------------------------------------------- 2176
2177 - def compute_lookback_includes(self,C,trans,nullable):
2178 2179 lookdict = {} # Dictionary of lookback relations 2180 includedict = {} # Dictionary of include relations 2181 2182 # Make a dictionary of non-terminal transitions 2183 dtrans = {} 2184 for t in trans: 2185 dtrans[t] = 1 2186 2187 # Loop over all transitions and compute lookbacks and includes 2188 for state,N in trans: 2189 lookb = [] 2190 includes = [] 2191 for p in C[state]: 2192 if p.name != N: continue 2193 2194 # Okay, we have a name match. We now follow the production all the way 2195 # through the state machine until we get the . on the right hand side 2196 2197 lr_index = p.lr_index 2198 j = state 2199 while lr_index < p.len - 1: 2200 lr_index = lr_index + 1 2201 t = p.prod[lr_index] 2202 2203 # Check to see if this symbol and state are a non-terminal transition 2204 if (j,t) in dtrans: 2205 # Yes. Okay, there is some chance that this is an includes relation 2206 # the only way to know for certain is whether the rest of the 2207 # production derives empty 2208 2209 li = lr_index + 1 2210 while li < p.len: 2211 if p.prod[li] in self.grammar.Terminals: break # No forget it 2212 if not p.prod[li] in nullable: break 2213 li = li + 1 2214 else: 2215 # Appears to be a relation between (j,t) and (state,N) 2216 includes.append((j,t)) 2217 2218 g = self.lr0_goto(C[j],t) # Go to next set 2219 j = self.lr0_cidhash.get(id(g),-1) # Go to next state 2220 2221 # When we get here, j is the final state, now we have to locate the production 2222 for r in C[j]: 2223 if r.name != p.name: continue 2224 if r.len != p.len: continue 2225 i = 0 2226 # This look is comparing a production ". A B C" with "A B C ." 2227 while i < r.lr_index: 2228 if r.prod[i] != p.prod[i+1]: break 2229 i = i + 1 2230 else: 2231 lookb.append((j,r)) 2232 for i in includes: 2233 if not i in includedict: includedict[i] = [] 2234 includedict[i].append((state,N)) 2235 lookdict[(state,N)] = lookb 2236 2237 return lookdict,includedict
2238 2239 # ----------------------------------------------------------------------------- 2240 # compute_read_sets() 2241 # 2242 # Given a set of LR(0) items, this function computes the read sets. 2243 # 2244 # Inputs: C = Set of LR(0) items 2245 # ntrans = Set of nonterminal transitions 2246 # nullable = Set of empty transitions 2247 # 2248 # Returns a set containing the read sets 2249 # ----------------------------------------------------------------------------- 2250
2251 - def compute_read_sets(self,C, ntrans, nullable):
2252 FP = lambda x: self.dr_relation(C,x,nullable) 2253 R = lambda x: self.reads_relation(C,x,nullable) 2254 F = digraph(ntrans,R,FP) 2255 return F
2256 2257 # ----------------------------------------------------------------------------- 2258 # compute_follow_sets() 2259 # 2260 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2261 # and an include set, this function computes the follow sets 2262 # 2263 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2264 # 2265 # Inputs: 2266 # ntrans = Set of nonterminal transitions 2267 # readsets = Readset (previously computed) 2268 # inclsets = Include sets (previously computed) 2269 # 2270 # Returns a set containing the follow sets 2271 # ----------------------------------------------------------------------------- 2272
2273 - def compute_follow_sets(self,ntrans,readsets,inclsets):
2274 FP = lambda x: readsets[x] 2275 R = lambda x: inclsets.get(x,[]) 2276 F = digraph(ntrans,R,FP) 2277 return F
2278 2279 # ----------------------------------------------------------------------------- 2280 # add_lookaheads() 2281 # 2282 # Attaches the lookahead symbols to grammar rules. 2283 # 2284 # Inputs: lookbacks - Set of lookback relations 2285 # followset - Computed follow set 2286 # 2287 # This function directly attaches the lookaheads to productions contained 2288 # in the lookbacks set 2289 # ----------------------------------------------------------------------------- 2290
2291 - def add_lookaheads(self,lookbacks,followset):
2292 for trans,lb in lookbacks.items(): 2293 # Loop over productions in lookback 2294 for state,p in lb: 2295 if not state in p.lookaheads: 2296 p.lookaheads[state] = [] 2297 f = followset.get(trans,[]) 2298 for a in f: 2299 if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2300 2301 # ----------------------------------------------------------------------------- 2302 # add_lalr_lookaheads() 2303 # 2304 # This function does all of the work of adding lookahead information for use 2305 # with LALR parsing 2306 # ----------------------------------------------------------------------------- 2307
2308 - def add_lalr_lookaheads(self,C):
2309 # Determine all of the nullable nonterminals 2310 nullable = self.compute_nullable_nonterminals() 2311 2312 # Find all non-terminal transitions 2313 trans = self.find_nonterminal_transitions(C) 2314 2315 # Compute read sets 2316 readsets = self.compute_read_sets(C,trans,nullable) 2317 2318 # Compute lookback/includes relations 2319 lookd, included = self.compute_lookback_includes(C,trans,nullable) 2320 2321 # Compute LALR FOLLOW sets 2322 followsets = self.compute_follow_sets(trans,readsets,included) 2323 2324 # Add all of the lookaheads 2325 self.add_lookaheads(lookd,followsets)
2326 2327 # ----------------------------------------------------------------------------- 2328 # lr_parse_table() 2329 # 2330 # This function constructs the parse tables for SLR or LALR 2331 # -----------------------------------------------------------------------------
2332 - def lr_parse_table(self):
2333 goto = self.lr_goto # Goto array 2334 action = self.lr_action # Action array 2335 log = self.log # Logger for output 2336 2337 actionp = { } # Action production array (temporary) 2338 2339 log.info("Parsing method: %s", self.lr_method) 2340 2341 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2342 # This determines the number of states 2343 2344 C = self.lr0_items() 2345 2346 if self.lr_method == 'LALR': 2347 self.add_lalr_lookaheads(C) 2348 2349 # Build the parser table, state by state 2350 st = 0 2351 for I in C: 2352 # Loop over each production in I 2353 actlist = [ ] # List of actions 2354 st_action = { } 2355 st_actionp = { } 2356 st_goto = { } 2357 log.info("") 2358 log.info("state %d", st) 2359 log.info("") 2360 for p in I: 2361 log.info(" (%d) %s", p.number, str(p)) 2362 log.info("") 2363 2364 for p in I: 2365 if p.len == p.lr_index + 1: 2366 if p.name == "S'": 2367 # Start symbol. Accept! 2368 st_action["$end"] = 0 2369 st_actionp["$end"] = p 2370 else: 2371 # We are at the end of a production. Reduce! 2372 if self.lr_method == 'LALR': 2373 laheads = p.lookaheads[st] 2374 else: 2375 laheads = self.grammar.Follow[p.name] 2376 for a in laheads: 2377 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 2378 r = st_action.get(a,None) 2379 if r is not None: 2380 # Whoa. Have a shift/reduce or reduce/reduce conflict 2381 if r > 0: 2382 # Need to decide on shift or reduce here 2383 # By default we favor shifting. Need to add 2384 # some precedence rules here. 2385 sprec,slevel = self.grammar.Productions[st_actionp[a].number].prec 2386 rprec,rlevel = self.grammar.Precedence.get(a,('right',0)) 2387 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2388 # We really need to reduce here. 2389 st_action[a] = -p.number 2390 st_actionp[a] = p 2391 if not slevel and not rlevel: 2392 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2393 self.sr_conflicts.append((st,a,'reduce')) 2394 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2395 st_action[a] = None 2396 else: 2397 # Hmmm. Guess we'll keep the shift 2398 if not rlevel: 2399 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2400 self.sr_conflicts.append((st,a,'shift')) 2401 elif r < 0: 2402 # Reduce/reduce conflict. In this case, we favor the rule 2403 # that was defined first in the grammar file 2404 oldp = self.grammar.Productions[-r] 2405 pp = self.grammar.Productions[p.number] 2406 if oldp.line > pp.line: 2407 st_action[a] = -p.number 2408 st_actionp[a] = p 2409 chosenp,rejectp = pp,oldp 2410 else: 2411 chosenp,rejectp = oldp,pp 2412 self.rr_conflicts.append((st,chosenp,rejectp)) 2413 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) 2414 else: 2415 raise LALRError("Unknown conflict in state %d" % st) 2416 else: 2417 st_action[a] = -p.number 2418 st_actionp[a] = p 2419 else: 2420 i = p.lr_index 2421 a = p.prod[i+1] # Get symbol right after the "." 2422 if a in self.grammar.Terminals: 2423 g = self.lr0_goto(I,a) 2424 j = self.lr0_cidhash.get(id(g),-1) 2425 if j >= 0: 2426 # We are in a shift state 2427 actlist.append((a,p,"shift and go to state %d" % j)) 2428 r = st_action.get(a,None) 2429 if r is not None: 2430 # Whoa have a shift/reduce or shift/shift conflict 2431 if r > 0: 2432 if r != j: 2433 raise LALRError("Shift/shift conflict in state %d" % st) 2434 elif r < 0: 2435 # Do a precedence check. 2436 # - if precedence of reduce rule is higher, we reduce. 2437 # - if precedence of reduce is same and left assoc, we reduce. 2438 # - otherwise we shift 2439 rprec,rlevel = self.grammar.Productions[st_actionp[a].number].prec 2440 sprec,slevel = self.grammar.Precedence.get(a,('right',0)) 2441 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2442 # We decide to shift here... highest precedence to shift 2443 st_action[a] = j 2444 st_actionp[a] = p 2445 if not rlevel: 2446 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2447 self.sr_conflicts.append((st,a,'shift')) 2448 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2449 st_action[a] = None 2450 else: 2451 # Hmmm. Guess we'll keep the reduce 2452 if not slevel and not rlevel: 2453 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2454 self.sr_conflicts.append((st,a,'reduce')) 2455 2456 else: 2457 raise LALRError("Unknown conflict in state %d" % st) 2458 else: 2459 st_action[a] = j 2460 st_actionp[a] = p 2461 2462 # Print the actions associated with each terminal 2463 _actprint = { } 2464 for a,p,m in actlist: 2465 if a in st_action: 2466 if p is st_actionp[a]: 2467 log.info(" %-15s %s",a,m) 2468 _actprint[(a,m)] = 1 2469 log.info("") 2470 # Print the actions that were not used. (debugging) 2471 not_used = 0 2472 for a,p,m in actlist: 2473 if a in st_action: 2474 if p is not st_actionp[a]: 2475 if not (a,m) in _actprint: 2476 log.debug(" ! %-15s [ %s ]",a,m) 2477 not_used = 1 2478 _actprint[(a,m)] = 1 2479 if not_used: 2480 log.debug("") 2481 2482 # Construct the goto table for this state 2483 2484 nkeys = { } 2485 for ii in I: 2486 for s in ii.usyms: 2487 if s in self.grammar.Nonterminals: 2488 nkeys[s] = None 2489 for n in nkeys: 2490 g = self.lr0_goto(I,n) 2491 j = self.lr0_cidhash.get(id(g),-1) 2492 if j >= 0: 2493 st_goto[n] = j 2494 log.info(" %-30s shift and go to state %d",n,j) 2495 2496 action[st] = st_action 2497 actionp[st] = st_actionp 2498 goto[st] = st_goto 2499 st += 1
2500 2501 2502 # ----------------------------------------------------------------------------- 2503 # write() 2504 # 2505 # This function writes the LR parsing tables to a file 2506 # ----------------------------------------------------------------------------- 2507
2508 - def write_table(self,modulename,outputdir='',signature=""):
2509 basemodulename = modulename.split(".")[-1] 2510 filename = os.path.join(outputdir,basemodulename) + ".py" 2511 try: 2512 f = open(filename,"w") 2513 2514 f.write(""" 2515 # %s 2516 # This file is automatically generated. Do not edit. 2517 _tabversion = %r 2518 2519 _lr_method = %r 2520 2521 _lr_signature = %r 2522 """ % (filename, __tabversion__, self.lr_method, signature)) 2523 2524 # Change smaller to 0 to go back to original tables 2525 smaller = 1 2526 2527 # Factor out names to try and make smaller 2528 if smaller: 2529 items = { } 2530 2531 for s,nd in self.lr_action.items(): 2532 for name,v in nd.items(): 2533 i = items.get(name) 2534 if not i: 2535 i = ([],[]) 2536 items[name] = i 2537 i[0].append(s) 2538 i[1].append(v) 2539 2540 f.write("\n_lr_action_items = {") 2541 for k,v in items.items(): 2542 f.write("%r:([" % k) 2543 for i in v[0]: 2544 f.write("%r," % i) 2545 f.write("],[") 2546 for i in v[1]: 2547 f.write("%r," % i) 2548 2549 f.write("]),") 2550 f.write("}\n") 2551 2552 f.write(""" 2553 _lr_action = { } 2554 for _k, _v in _lr_action_items.items(): 2555 for _x,_y in zip(_v[0],_v[1]): 2556 if not _x in _lr_action: _lr_action[_x] = { } 2557 _lr_action[_x][_k] = _y 2558 del _lr_action_items 2559 """) 2560 2561 else: 2562 f.write("\n_lr_action = { "); 2563 for k,v in self.lr_action.items(): 2564 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2565 f.write("}\n"); 2566 2567 if smaller: 2568 # Factor out names to try and make smaller 2569 items = { } 2570 2571 for s,nd in self.lr_goto.items(): 2572 for name,v in nd.items(): 2573 i = items.get(name) 2574 if not i: 2575 i = ([],[]) 2576 items[name] = i 2577 i[0].append(s) 2578 i[1].append(v) 2579 2580 f.write("\n_lr_goto_items = {") 2581 for k,v in items.items(): 2582 f.write("%r:([" % k) 2583 for i in v[0]: 2584 f.write("%r," % i) 2585 f.write("],[") 2586 for i in v[1]: 2587 f.write("%r," % i) 2588 2589 f.write("]),") 2590 f.write("}\n") 2591 2592 f.write(""" 2593 _lr_goto = { } 2594 for _k, _v in _lr_goto_items.items(): 2595 for _x,_y in zip(_v[0],_v[1]): 2596 if not _x in _lr_goto: _lr_goto[_x] = { } 2597 _lr_goto[_x][_k] = _y 2598 del _lr_goto_items 2599 """) 2600 else: 2601 f.write("\n_lr_goto = { "); 2602 for k,v in self.lr_goto.items(): 2603 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2604 f.write("}\n"); 2605 2606 # Write production table 2607 f.write("_lr_productions = [\n") 2608 for p in self.lr_productions: 2609 if p.func: 2610 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) 2611 else: 2612 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) 2613 f.write("]\n") 2614 f.close() 2615 2616 except IOError: 2617 e = sys.exc_info()[1] 2618 sys.stderr.write("Unable to create '%s'\n" % filename) 2619 sys.stderr.write(str(e)+"\n") 2620 return
2621 2622 2623 # ----------------------------------------------------------------------------- 2624 # === INTROSPECTION === 2625 # 2626 # The following functions and classes are used to implement the PLY 2627 # introspection features followed by the yacc() function itself. 2628 # ----------------------------------------------------------------------------- 2629 2630 # ----------------------------------------------------------------------------- 2631 # get_caller_module_dict() 2632 # 2633 # This function returns a dictionary containing all of the symbols defined within 2634 # a caller further down the call stack. This is used to get the environment 2635 # associated with the yacc() call if none was provided. 2636 # ----------------------------------------------------------------------------- 2637
2638 -def get_caller_module_dict(levels):
2639 try: 2640 raise RuntimeError 2641 except RuntimeError: 2642 e,b,t = sys.exc_info() 2643 f = t.tb_frame 2644 while levels > 0: 2645 f = f.f_back 2646 levels -= 1 2647 ldict = f.f_globals.copy() 2648 if f.f_globals != f.f_locals: 2649 ldict.update(f.f_locals) 2650 2651 return ldict
2652 2653 # ----------------------------------------------------------------------------- 2654 # parse_grammar() 2655 # 2656 # This takes a raw grammar rule string and parses it into production data 2657 # -----------------------------------------------------------------------------
2658 -def parse_grammar(doc,file,line):
2659 grammar = [] 2660 # Split the doc string into lines 2661 pstrings = doc.splitlines() 2662 lastp = None 2663 dline = line 2664 for ps in pstrings: 2665 dline += 1 2666 p = ps.split() 2667 if not p: continue 2668 try: 2669 if p[0] == '|': 2670 # This is a continuation of a previous rule 2671 if not lastp: 2672 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) 2673 prodname = lastp 2674 syms = p[1:] 2675 else: 2676 prodname = p[0] 2677 lastp = prodname 2678 syms = p[2:] 2679 assign = p[1] 2680 if assign != ':' and assign != '::=': 2681 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) 2682 2683 grammar.append((file,dline,prodname,syms)) 2684 except SyntaxError: 2685 raise 2686 except Exception: 2687 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) 2688 2689 return grammar
2690 2691 # ----------------------------------------------------------------------------- 2692 # ParserReflect() 2693 # 2694 # This class represents information extracted for building a parser including 2695 # start symbol, error function, tokens, precedence list, action functions, 2696 # etc. 2697 # -----------------------------------------------------------------------------
2698 -class ParserReflect(object):
2699 - def __init__(self,pdict,log=None):
2700 self.pdict = pdict 2701 self.start = None 2702 self.error_func = None 2703 self.tokens = None 2704 self.files = {} 2705 self.grammar = [] 2706 self.error = 0 2707 2708 if log is None: 2709 self.log = PlyLogger(sys.stderr) 2710 else: 2711 self.log = log
2712 2713 # Get all of the basic information
2714 - def get_all(self):
2715 self.get_start() 2716 self.get_error_func() 2717 self.get_tokens() 2718 self.get_precedence() 2719 self.get_pfunctions()
2720 2721 # Validate all of the information
2722 - def validate_all(self):
2723 self.validate_start() 2724 self.validate_error_func() 2725 self.validate_tokens() 2726 self.validate_precedence() 2727 self.validate_pfunctions() 2728 self.validate_files() 2729 return self.error
2730 2731 # Compute a signature over the grammar
2732 - def signature(self):
2733 from binascii import crc32 2734 sig = 0 2735 try: 2736 if self.start: 2737 sig = crc32(self.start.encode('latin-1'),sig) 2738 if self.prec: 2739 sig = crc32("".join(["".join(p) for p in self.prec]).encode('latin-1'),sig) 2740 if self.tokens: 2741 sig = crc32(" ".join(self.tokens).encode('latin-1'),sig) 2742 for f in self.pfuncs: 2743 if f[3]: 2744 sig = crc32(f[3].encode('latin-1'),sig) 2745 except (TypeError,ValueError): 2746 pass 2747 return sig
2748 2749 # ----------------------------------------------------------------------------- 2750 # validate_file() 2751 # 2752 # This method checks to see if there are duplicated p_rulename() functions 2753 # in the parser module file. Without this function, it is really easy for 2754 # users to make mistakes by cutting and pasting code fragments (and it's a real 2755 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2756 # we just do a little regular expression pattern matching of def statements 2757 # to try and detect duplicates. 2758 # ----------------------------------------------------------------------------- 2759
2760 - def validate_files(self):
2761 # Match def p_funcname( 2762 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 2763 2764 for filename in self.files.keys(): 2765 base,ext = os.path.splitext(filename) 2766 if ext != '.py': return 1 # No idea. Assume it's okay. 2767 2768 try: 2769 f = open(filename) 2770 lines = f.readlines() 2771 f.close() 2772 except IOError: 2773 continue 2774 2775 counthash = { } 2776 for linen,l in enumerate(lines): 2777 linen += 1 2778 m = fre.match(l) 2779 if m: 2780 name = m.group(1) 2781 prev = counthash.get(name) 2782 if not prev: 2783 counthash[name] = linen 2784 else: 2785 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2786 2787 # Get the start symbol
2788 - def get_start(self):
2789 self.start = self.pdict.get('start')
2790 2791 # Validate the start symbol
2792 - def validate_start(self):
2793 if self.start is not None: 2794 if not isinstance(self.start,str): 2795 self.log.error("'start' must be a string")
2796 2797 # Look for error handler
2798 - def get_error_func(self):
2799 self.error_func = self.pdict.get('p_error')
2800 2801 # Validate the error function
2802 - def validate_error_func(self):
2803 if self.error_func: 2804 if isinstance(self.error_func,types.FunctionType): 2805 ismethod = 0 2806 elif isinstance(self.error_func, types.MethodType): 2807 ismethod = 1 2808 else: 2809 self.log.error("'p_error' defined, but is not a function or method") 2810 self.error = 1 2811 return 2812 2813 eline = func_code(self.error_func).co_firstlineno 2814 efile = func_code(self.error_func).co_filename 2815 self.files[efile] = 1 2816 2817 if (func_code(self.error_func).co_argcount != 1+ismethod): 2818 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) 2819 self.error = 1
2820 2821 # Get the tokens map
2822 - def get_tokens(self):
2823 tokens = self.pdict.get("tokens",None) 2824 if not tokens: 2825 self.log.error("No token list is defined") 2826 self.error = 1 2827 return 2828 2829 if not isinstance(tokens,(list, tuple)): 2830 self.log.error("tokens must be a list or tuple") 2831 self.error = 1 2832 return 2833 2834 if not tokens: 2835 self.log.error("tokens is empty") 2836 self.error = 1 2837 return 2838 2839 self.tokens = tokens
2840 2841 # Validate the tokens
2842 - def validate_tokens(self):
2843 # Validate the tokens. 2844 if 'error' in self.tokens: 2845 self.log.error("Illegal token name 'error'. Is a reserved word") 2846 self.error = 1 2847 return 2848 2849 terminals = {} 2850 for n in self.tokens: 2851 if n in terminals: 2852 self.log.warning("Token '%s' multiply defined", n) 2853 terminals[n] = 1
2854 2855 # Get the precedence map (if any)
2856 - def get_precedence(self):
2857 self.prec = self.pdict.get("precedence",None)
2858 2859 # Validate and parse the precedence map
2860 - def validate_precedence(self):
2861 preclist = [] 2862 if self.prec: 2863 if not isinstance(self.prec,(list,tuple)): 2864 self.log.error("precedence must be a list or tuple") 2865 self.error = 1 2866 return 2867 for level,p in enumerate(self.prec): 2868 if not isinstance(p,(list,tuple)): 2869 self.log.error("Bad precedence table") 2870 self.error = 1 2871 return 2872 2873 if len(p) < 2: 2874 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) 2875 self.error = 1 2876 return 2877 assoc = p[0] 2878 if not isinstance(assoc,str): 2879 self.log.error("precedence associativity must be a string") 2880 self.error = 1 2881 return 2882 for term in p[1:]: 2883 if not isinstance(term,str): 2884 self.log.error("precedence items must be strings") 2885 self.error = 1 2886 return 2887 preclist.append((term,assoc,level+1)) 2888 self.preclist = preclist
2889 2890 # Get all p_functions from the grammar
2891 - def get_pfunctions(self):
2892 p_functions = [] 2893 for name, item in self.pdict.items(): 2894 if name[:2] != 'p_': continue 2895 if name == 'p_error': continue 2896 if isinstance(item,(types.FunctionType,types.MethodType)): 2897 line = func_code(item).co_firstlineno 2898 file = func_code(item).co_filename 2899 p_functions.append((line,file,name,item.__doc__)) 2900 2901 # Sort all of the actions by line number 2902 p_functions.sort() 2903 self.pfuncs = p_functions
2904 2905 2906 # Validate all of the p_functions
2907 - def validate_pfunctions(self):
2908 grammar = [] 2909 # Check for non-empty symbols 2910 if len(self.pfuncs) == 0: 2911 self.log.error("no rules of the form p_rulename are defined") 2912 self.error = 1 2913 return 2914 2915 for line, file, name, doc in self.pfuncs: 2916 func = self.pdict[name] 2917 if isinstance(func, types.MethodType): 2918 reqargs = 2 2919 else: 2920 reqargs = 1 2921 if func_code(func).co_argcount > reqargs: 2922 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) 2923 self.error = 1 2924 elif func_code(func).co_argcount < reqargs: 2925 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) 2926 self.error = 1 2927 elif not func.__doc__: 2928 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) 2929 else: 2930 try: 2931 parsed_g = parse_grammar(doc,file,line) 2932 for g in parsed_g: 2933 grammar.append((name, g)) 2934 except SyntaxError: 2935 e = sys.exc_info()[1] 2936 self.log.error(str(e)) 2937 self.error = 1 2938 2939 # Looks like a valid grammar rule 2940 # Mark the file in which defined. 2941 self.files[file] = 1 2942 2943 # Secondary validation step that looks for p_ definitions that are not functions 2944 # or functions that look like they might be grammar rules. 2945 2946 for n,v in self.pdict.items(): 2947 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue 2948 if n[0:2] == 't_': continue 2949 if n[0:2] == 'p_' and n != 'p_error': 2950 self.log.warning("'%s' not defined as a function", n) 2951 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or 2952 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): 2953 try: 2954 doc = v.__doc__.split(" ") 2955 if doc[1] == ':': 2956 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", 2957 func_code(v).co_filename, func_code(v).co_firstlineno,n) 2958 except Exception: 2959 pass 2960 2961 self.grammar = grammar
2962 2963 # ----------------------------------------------------------------------------- 2964 # yacc(module) 2965 # 2966 # Build a parser 2967 # ----------------------------------------------------------------------------- 2968
2969 -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 2970 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', 2971 debuglog=None, errorlog = None):
2972 2973 global parse # Reference to the parsing method of the last built parser 2974 2975 if errorlog is None: 2976 errorlog = PlyLogger(sys.stderr) 2977 2978 # Get the module dictionary used for the parser 2979 if module: 2980 _items = [(k,getattr(module,k)) for k in dir(module)] 2981 pdict = dict(_items) 2982 else: 2983 pdict = get_caller_module_dict(2) 2984 2985 # Collect parser information from the dictionary 2986 pinfo = ParserReflect(pdict,log=errorlog) 2987 pinfo.get_all() 2988 2989 if pinfo.error: 2990 raise YaccError("Unable to build parser") 2991 2992 # Check signature against table files (if any) 2993 signature = pinfo.signature() 2994 2995 # Read the tables 2996 try: 2997 lr = LRTable() 2998 read_signature = lr.read_table(tabmodule) 2999 if optimize or (read_signature == signature): 3000 try: 3001 lr.bind_callables(pinfo.pdict) 3002 parser = LRParser(lr,pinfo.error_func) 3003 parse = parser.parse 3004 return parser 3005 except Exception: 3006 e = sys.exc_info()[1] 3007 errorlog.warning("There was a problem loading the table file: %s", repr(e)) 3008 except VersionError: 3009 e = sys.exc_info() 3010 errorlog.warning(str(e)) 3011 except Exception: 3012 pass 3013 3014 if debuglog is None: 3015 if debug: 3016 debuglog = PlyLogger(open(debugfile,"w")) 3017 else: 3018 debuglog = NullLogger() 3019 3020 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 3021 3022 3023 errors = 0 3024 3025 # Validate the parser information 3026 if pinfo.validate_all(): 3027 raise YaccError("Unable to build parser") 3028 3029 if not pinfo.error_func: 3030 errorlog.warning("no p_error() function is defined") 3031 3032 # Create a grammar object 3033 grammar = Grammar(pinfo.tokens) 3034 3035 # Set precedence level for terminals 3036 for term, assoc, level in pinfo.preclist: 3037 try: 3038 grammar.set_precedence(term,assoc,level) 3039 except GrammarError: 3040 e = sys.exc_info()[1] 3041 errorlog.warning("%s",str(e)) 3042 3043 # Add productions to the grammar 3044 for funcname, gram in pinfo.grammar: 3045 file, line, prodname, syms = gram 3046 try: 3047 grammar.add_production(prodname,syms,funcname,file,line) 3048 except GrammarError: 3049 e = sys.exc_info()[1] 3050 errorlog.error("%s",str(e)) 3051 errors = 1 3052 3053 # Set the grammar start symbols 3054 try: 3055 grammar.set_start(pinfo.start) 3056 except GrammarError: 3057 e = sys.exc_info()[1] 3058 errorlog.error(str(e)) 3059 errors = 1 3060 3061 if errors: 3062 raise YaccError("Unable to build parser") 3063 3064 # Verify the grammar structure 3065 undefined_symbols = grammar.undefined_symbols() 3066 for sym, prod in undefined_symbols: 3067 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) 3068 errors = 1 3069 3070 unused_terminals = grammar.unused_terminals() 3071 if unused_terminals: 3072 debuglog.info("") 3073 debuglog.info("Unused terminals:") 3074 debuglog.info("") 3075 for term in unused_terminals: 3076 errorlog.warning("Token '%s' defined, but not used", term) 3077 debuglog.info(" %s", term) 3078 3079 # Print out all productions to the debug log 3080 if debug: 3081 debuglog.info("") 3082 debuglog.info("Grammar") 3083 debuglog.info("") 3084 for n,p in enumerate(grammar.Productions): 3085 debuglog.info("Rule %-5d %s", n+1, p) 3086 3087 # Find unused non-terminals 3088 unused_rules = grammar.unused_rules() 3089 for prod in unused_rules: 3090 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) 3091 3092 if len(unused_terminals) == 1: 3093 errorlog.warning("There is 1 unused token") 3094 if len(unused_terminals) > 1: 3095 errorlog.warning("There are %d unused tokens", len(unused_terminals)) 3096 3097 if len(unused_rules) == 1: 3098 errorlog.warning("There is 1 unused rule") 3099 if len(unused_rules) > 1: 3100 errorlog.warning("There are %d unused rules", len(unused_rules)) 3101 3102 if debug: 3103 debuglog.info("") 3104 debuglog.info("Terminals, with rules where they appear") 3105 debuglog.info("") 3106 terms = list(grammar.Terminals) 3107 terms.sort() 3108 for term in terms: 3109 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 3110 3111 debuglog.info("") 3112 debuglog.info("Nonterminals, with rules where they appear") 3113 debuglog.info("") 3114 nonterms = list(grammar.Nonterminals) 3115 nonterms.sort() 3116 for nonterm in nonterms: 3117 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 3118 debuglog.info("") 3119 3120 if check_recursion: 3121 unreachable = grammar.find_unreachable() 3122 for u in unreachable: 3123 errorlog.warning("Symbol '%s' is unreachable",u) 3124 3125 infinite = grammar.infinite_cycles() 3126 for inf in infinite: 3127 errorlog.error("Infinite recursion detected for symbol '%s'", inf) 3128 errors = 1 3129 3130 unused_prec = grammar.unused_precedence() 3131 for term, assoc in unused_prec: 3132 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) 3133 errors = 1 3134 3135 if errors: 3136 raise YaccError("Unable to build parser") 3137 3138 # Run the LRGeneratedTable on the grammar 3139 if debug: 3140 errorlog.debug("Generating %s tables", method) 3141 3142 lr = LRGeneratedTable(grammar,method,debuglog) 3143 3144 num_sr = len(lr.sr_conflicts) 3145 3146 # Report shift/reduce and reduce/reduce conflicts 3147 if num_sr == 1: 3148 errorlog.warning("1 shift/reduce conflict") 3149 elif num_sr > 1: 3150 errorlog.warning("%d shift/reduce conflicts", num_sr) 3151 3152 num_rr = len(lr.rr_conflicts) 3153 if num_rr == 1: 3154 errorlog.warning("1 reduce/reduce conflict") 3155 elif num_rr > 1: 3156 errorlog.warning("%d reduce/reduce conflicts", num_rr) 3157 3158 # Write out conflicts to the output file 3159 if debug and (lr.sr_conflicts or lr.rr_conflicts): 3160 debuglog.warning("") 3161 debuglog.warning("Conflicts:") 3162 debuglog.warning("") 3163 3164 for state, tok, resolution in lr.sr_conflicts: 3165 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) 3166 3167 for state, rule, rejected in lr.rr_conflicts: 3168 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3169 debuglog.warning("rejected rule (%s)", rejected) 3170 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3171 errorlog.warning("rejected rule (%s)", rejected) 3172 3173 # Write the table file if requested 3174 if write_tables: 3175 lr.write_table(tabmodule,outputdir,signature) 3176 3177 # Build the parser 3178 lr.bind_callables(pinfo.pdict) 3179 parser = LRParser(lr,pinfo.error_func) 3180 3181 parse = parser.parse 3182 return parser
3183