280 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			280 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | 
 | ||
|  | from TextTools.TextTools import * | ||
|  | 
 | ||
|  | ##################################################### | ||
|  | # FOLLOWING IS THE BOOTSTRAP PARSER, HAND-CODED! | ||
|  | 
 | ||
|  | parsernamelist = [ | ||
|  | 'declarationset', # 0 | ||
|  | 'declaration', # 1 | ||
|  | 'implicit_group', # 2 --> no longer used | ||
|  | 'added_token', # 3 | ||
|  | 'seq_added_token', #4 | ||
|  | 'fo_added_token', #5 | ||
|  | 'or_added_token', #6 | ||
|  | 'and_added_token', #7 | ||
|  | 'element_token', #8 | ||
|  | 'group', #9 | ||
|  | 'negpos_indicator', #10 | ||
|  | 'occurence_indicator', #11 | ||
|  | 'unreportedname', #12 | ||
|  | 'name', #13 | ||
|  | '<ts>', # 14 | ||
|  | 'literal', #15 | ||
|  | 'range', # 16 | ||
|  | 'CHARBRACE', #17 | ||
|  | 'CHARDASH', # 18 | ||
|  | 'CHARRANGE', # 19 | ||
|  | 'CHARNOBRACE', # 20 | ||
|  | 'ESCAPEDCHAR', # 21 | ||
|  | 'SPECIALESCAPEDCHAR', # 22 | ||
|  | 'OCTALESCAPEDCHAR' # 23 | ||
|  | ] | ||
|  | 
 | ||
|  | parsertuplelist = range( 24 ) | ||
|  | 
 | ||
|  | 
 | ||
|  | 
 | ||
|  | parsertuplelist[0] = ( # declarationset | ||
|  | 	('declaration', TableInList,(parsertuplelist, 1)), # must be at least one declaration | ||
|  | 	('declaration', TableInList,(parsertuplelist, 1),1,0) | ||
|  | ) | ||
|  | parsertuplelist[1] = ( # declaration | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	(None, SubTable, ( | ||
|  | 			('unreportedname', TableInList,(parsertuplelist, 12),1,2), | ||
|  | 			('name', TableInList,(parsertuplelist, 13)), # name | ||
|  | 		) | ||
|  | 	), | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	(None, Word, ':='),  | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8)), | ||
|  | 	(None, SubTable, ( # added_token | ||
|  | 		('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), | ||
|  | 		('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), | ||
|  | 		('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), | ||
|  | 		('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), | ||
|  | 		(None, Fail, Here), | ||
|  | 		('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), | ||
|  | 		('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), | ||
|  | 		('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), | ||
|  | 		('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), | ||
|  | 	),1,1), | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | ) | ||
|  | parsertuplelist[3] = ( # added_token | ||
|  | 	('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), | ||
|  | 	('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), | ||
|  | 	('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), | ||
|  | 	('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), | ||
|  | 	(None, Fail, Here), | ||
|  | 	('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), | ||
|  | 	('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), | ||
|  | 	('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), | ||
|  | 	('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), | ||
|  | ) | ||
|  | parsertuplelist[4] = ( # seq_added_token | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	(None, Is, ','), | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8)), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),4,1), # ts | ||
|  | 	(None, Is, ',',3,1), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),2,1), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8),1,-3), | ||
|  | ) | ||
|  | parsertuplelist[5] = ( # fo_added_token | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	(None, Is, '/'), | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8)), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),4,1), # ts | ||
|  | 	(None, Is, '/',3,1), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),2,1), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8),1,-3), | ||
|  | ) | ||
|  | parsertuplelist[6] = ( # or_added_token | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	(None, Is, '|'), | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8)), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),4,1), # ts | ||
|  | 	(None, Is, '|',3,1), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),2,1), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8),1,-3), | ||
|  | ) | ||
|  | parsertuplelist[7] = ( # and_added_token | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	(None, Is, '&'), | ||
|  | 	(None, TableInList,(parsertuplelist, 14)), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8)), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),4,1), # ts | ||
|  | 	(None, Is, '&',3,1), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),2,1), # ts | ||
|  | 	('element_token', TableInList,(parsertuplelist, 8),1,-3), | ||
|  | ) | ||
|  | parsertuplelist[8] = ( # element_token | ||
|  | 	('negpos_indicator', TableInList,(parsertuplelist, 10),1,1), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),1,1), # ts, very inefficient :( | ||
|  | 	('literal', TableInList, (parsertuplelist,15),1, 4 ), | ||
|  | 	('range', TableInList, (parsertuplelist,16),1, 3  ), | ||
|  | 	('group', TableInList, (parsertuplelist,9),1, 2  ), | ||
|  | 	('name', TableInList, (parsertuplelist,13) ), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),1,1), # ts, very inefficient :( | ||
|  | 	('occurence_indicator', TableInList,(parsertuplelist, 11), 1,1), | ||
|  | ) | ||
|  | parsertuplelist[9] = ( # group | ||
|  | 	(None, Is, '('), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),1,1), # ts | ||
|  | 	('element_token', TableInList, (parsertuplelist,8) ), | ||
|  | 	(None, SubTable, ( # added_token | ||
|  | 		('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), | ||
|  | 		('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), | ||
|  | 		('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), | ||
|  | 		('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), | ||
|  | 		(None, Fail, Here), | ||
|  | 		('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), | ||
|  | 		('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), | ||
|  | 		('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), | ||
|  | 		('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), | ||
|  | 	),1,1), | ||
|  | 	(None, TableInList,(parsertuplelist, 14),1,1), # ts | ||
|  | 	(None, Is, ')'), | ||
|  | ) | ||
|  | parsertuplelist[10] = ( # negpos_indicator | ||
|  | 	(None, Is, "+",1,2), | ||
|  | 	(None, Is, "-"), | ||
|  | ) | ||
|  | parsertuplelist[11]  = ( #occurence_indicator | ||
|  | 	(None, Is, "+",1,3), | ||
|  | 	(None, Is, "*",1,2), | ||
|  | 	(None, Is, '?'), | ||
|  | ) | ||
|  | parsertuplelist[12] = ( #unreportedname | ||
|  | 	(None, Is, '<'), | ||
|  | 	('name', TableInList, (parsertuplelist, 13)), # inefficiency in final system :( | ||
|  | 	(None, Is, '>'), | ||
|  | ) | ||
|  | parsertuplelist[13] = ( # name | ||
|  | 	(None, IsIn, alpha+'_'), | ||
|  | 	(None, AllIn, alphanumeric+'_',1,1) | ||
|  | ) | ||
|  | 
 | ||
|  | parsertuplelist[14] = ( # ts (whitespace) | ||
|  | 		(None, AllIn, ' \011\012\013\014\015',1,1), | ||
|  | 		(None, SubTable, ( | ||
|  | 				(None, Is, '#' ), | ||
|  | 				(None, AllNotIn, '\n',1,1 ) # problem if there's a comment at the end of the file :( | ||
|  | 			) | ||
|  | 			,1,-1 ), | ||
|  | 		) | ||
|  | # this isn't actually used in the bootstrap parser... | ||
|  | _specialescapedchar = parsertuplelist[22] = ( # SPECIALESCAPEDCHAR | ||
|  | 	('SPECIALESCAPEDCHAR', IsIn, '\\abfnrtv'), | ||
|  | ) | ||
|  | _octalescapechar = parsertuplelist[23] = ( # OCTALESCAPEDCHAR | ||
|  | 	(None, IsIn, '01234567'), | ||
|  | 	(None, IsIn, '01234567',2), | ||
|  | 	(None, IsIn, '01234567',1), | ||
|  | ) | ||
|  | _escapedchar = parsertuplelist[21] = ( # escapedcharacter | ||
|  | 	(None, Is, '\\' ), | ||
|  | 	('SPECIALESCAPEDCHAR', IsIn, '\\abfnrtv',1,4), | ||
|  | 	('OCTALESCAPEDCHAR', SubTable, _octalescapechar) | ||
|  | ) | ||
|  | 	 | ||
|  | _charnobrace = parsertuplelist[20] = ( # charnobrace | ||
|  | 	('ESCAPEDCHAR', Table, _escapedchar, 1,2), | ||
|  | 	('CHAR', IsNot, ']'), | ||
|  | ) | ||
|  | _rangedef = parsertuplelist[19] = ( # charrange | ||
|  | 	('CHARNOBRACE', Table, _charnobrace ), | ||
|  | 	(None, Is, '-'), | ||
|  | 	('CHARNOBRACE', Table, _charnobrace ), | ||
|  | ) | ||
|  | 	 | ||
|  | 	 | ||
|  | parsertuplelist[16] = ( #range | ||
|  | 	(None, Is, '['), | ||
|  | 	('CHARBRACE', Is, ']',1,1), | ||
|  | 	('CHARDASH', Is, '-',1,1), | ||
|  | 	('CHARRANGE', Table, _rangedef, 1,0), | ||
|  | 	(None, SubTable, _charnobrace, 1,-1), | ||
|  | 	(None, Is, ']') | ||
|  | ) | ||
|  | 
 | ||
|  | _sqstr = ( | ||
|  | 		(None, Is, "'" ), | ||
|  | #		(None, Is, "'",1, 5 ), # immediate close | ||
|  | 		(None, AllNotIn, "\\'",1,1 ), # all not an escape or end | ||
|  | 		(None, Is, "\\", 2, 1),   # is an escaped char | ||
|  | 		(None, Skip, 1, 1, -2),   # consume the escaped char and loop back | ||
|  | 		(None, Is, "'" ) # in case there was no matching ', which would also cause a fail for allnotin | ||
|  | 	) | ||
|  | _dblstr = ( | ||
|  | 		(None, Is, '"' ), | ||
|  | #		(None, Is, '"',1, 5 ), # immediate close | ||
|  | 		(None, AllNotIn, '\\"' ,1,1), # not an escaped or end | ||
|  | 		(None, Is, "\\", 2, 1),   # is an escaped char | ||
|  | 		(None, Skip, 1, 1, -2),   # consume the escaped char and loop back | ||
|  | 		(None, Is, '"' ) # in case there was no matching ", which would also cause a fail for allnotin | ||
|  | 	) | ||
|  | 
 | ||
|  | 
 | ||
|  | 	       | ||
|  | # literal             :=  ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') | ||
|  | 
 | ||
|  | parsertuplelist[15] = ( # literal | ||
|  | 	(None, Is, "'", 4, 1 ), | ||
|  | 	('CHARNOSNGLQUOTE', AllNotIn, "\\'",1,1 ), # all not an escape or end | ||
|  | 	('ESCAPEDCHAR', Table, _escapedchar, 1, -1), | ||
|  | 	(None, Is, "'", 1,5 ), | ||
|  | 	(None, Is, '"' ), | ||
|  | 	('CHARNODBLQUOTE', AllNotIn, '\\"',1,1 ), # all not an escape or end | ||
|  | 	('ESCAPEDCHAR', Table, _escapedchar, 1, -1), | ||
|  | 	(None, Is, '"'), | ||
|  | ) | ||
|  | 
 | ||
|  | declaration = r'''declarationset      :=  declaration+
 | ||
|  | declaration         :=  ts , (unreportedname/name) ,ts,':=',ts, element_token, ( seq_added_token / fo_added_token / or_added_token / and_added_token )*, ts | ||
|  | seq_added_token     :=  (ts,',',ts, element_token)+ | ||
|  | fo_added_token      :=  (ts,'/',ts, element_token)+ | ||
|  | or_added_token      :=  (ts,'|',ts, element_token)+ # not currently supported | ||
|  | and_added_token     :=  (ts,'&',ts, element_token)+ # not currently supported | ||
|  | element_token       :=  negpos_indicator?, ts, (literal/range/group/name),ts, occurence_indicator? | ||
|  | group               :=  '(',ts, element_token, ( seq_added_token / fo_added_token / or_added_token / and_added_token )*, ts, ')' | ||
|  | 
 | ||
|  | negpos_indicator    :=  '+'/'-' | ||
|  | occurence_indicator :=  '+'/'*'/'?' | ||
|  | unreportedname      :=  '<', name, '>' | ||
|  | name                :=  [a-zA-Z_],[a-zA-Z0-9_]* | ||
|  | <ts>                :=  ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )* | ||
|  | literal             :=  ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') | ||
|  | 
 | ||
|  | 
 | ||
|  | range               :=  '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' | ||
|  | CHARBRACE           :=  ']' | ||
|  | CHARDASH            :=  '-' | ||
|  | CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE | ||
|  | CHARNOBRACE         :=  ESCAPEDCHAR/CHAR | ||
|  | CHAR                :=  -[]] | ||
|  | ESCAPEDCHAR         :=  '\\',( SPECIALESCAPEDCHAR / OCTALESCAPEDCHAR ) | ||
|  | SPECIALESCAPEDCHAR  :=  [\\abfnrtv] | ||
|  | OCTALESCAPEDCHAR    :=  [0-7],[0-7]?,[0-7]? | ||
|  | CHARNODBLQUOTE      :=  -[\\"]+ | ||
|  | CHARNOSNGLQUOTE     :=  -[\\']+ | ||
|  | '''
 | ||
|  | 
 | ||
|  | def parse( instr = declaration, parserelement = 'declarationset' ): | ||
|  | 	tbl = ( | ||
|  | 		(parserelement, Table, parsertuplelist[parsernamelist.index( parserelement )] ), | ||
|  | 	) | ||
|  | 	return tag( instr,  tbl) | ||
|  | 
 | ||
|  | if __name__ == '__main__': | ||
|  | 	import sys, pprint | ||
|  | 	pprint.pprint( apply( parse, tuple( sys.argv[1:] ) ) ) | ||
|  | 	 | ||
|  | 
 |