晋太元中,武陵人捕鱼为业。缘溪行,忘路之远近。忽逢桃花林,夹岸数百步,中无杂树,芳草鲜美,落英缤纷。渔人甚异之,复前行,欲穷其林。 林尽水源,便得一山,山有小口,仿佛若有光。便舍船,从口入。初极狭,才通人。复行数十步,豁然开朗。土地平旷,屋舍俨然,有良田、美池、桑竹之属。阡陌交通,鸡犬相闻。其中往来种作,男女衣着,悉如外人。黄发垂髫,并怡然自乐。 见渔人,乃大惊,问所从来。具答之。便要还家,设酒杀鸡作食。村中闻有此人,咸来问讯。自云先世避秦时乱,率妻子邑人来此绝境,不复出焉,遂与外人间隔。问今是何世,乃不知有汉,无论魏晋。此人一一为具言所闻,皆叹惋。余人各复延至其家,皆出酒食。停数日,辞去。此中人语云:“不足为外人道也。”(间隔 一作:隔绝) 既出,得其船,便扶向路,处处志之。及郡下,诣太守,说如此。太守即遣人随其往,寻向所志,遂迷,不复得路。 南阳刘子骥,高尚士也,闻之,欣然规往。未果,寻病终。后遂无问津者。
|
Server : Apache System : Linux srv.rainic.com 4.18.0-553.47.1.el8_10.x86_64 #1 SMP Wed Apr 2 05:45:37 EDT 2025 x86_64 User : rainic ( 1014) PHP Version : 7.4.33 Disable Function : exec,passthru,shell_exec,system Directory : /usr/lib/python3.6/site-packages/pycparser/ |
Upload File : |
#------------------------------------------------------------------------------
# pycparser: c_lexer.py
#
# CLexer class: lexer for the C language
#
# Copyright (C) 2008-2015, Eli Bendersky
# License: BSD
#------------------------------------------------------------------------------
import re
import sys
from ply import lex
from ply.lex import TOKEN
class CLexer(object):
""" A lexer for the C language. After building it, set the
input text with input(), and call token() to get new
tokens.
The public attribute filename can be set to an initial
filaneme, but the lexer will update it upon #line
directives.
"""
def __init__(self, error_func, on_lbrace_func, on_rbrace_func,
type_lookup_func):
""" Create a new Lexer.
error_func:
An error function. Will be called with an error
message, line and column as arguments, in case of
an error during lexing.
on_lbrace_func, on_rbrace_func:
Called when an LBRACE or RBRACE is encountered
(likely to push/pop type_lookup_func's scope)
type_lookup_func:
A type lookup function. Given a string, it must
return True IFF this string is a name of a type
that was defined with a typedef earlier.
"""
self.error_func = error_func
self.on_lbrace_func = on_lbrace_func
self.on_rbrace_func = on_rbrace_func
self.type_lookup_func = type_lookup_func
self.filename = ''
# Keeps track of the last token returned from self.token()
self.last_token = None
# Allow either "# line" or "# <num>" to support GCC's
# cpp output
#
self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
self.pragma_pattern = re.compile('[ \t]*pragma\W')
def build(self, **kwargs):
""" Builds the lexer from the specification. Must be
called after the lexer object is created.
This method exists separately, because the PLY
manual warns against calling lex.lex inside
__init__
"""
self.lexer = lex.lex(object=self, **kwargs)
def reset_lineno(self):
""" Resets the internal line number counter of the lexer.
"""
self.lexer.lineno = 1
def input(self, text):
self.lexer.input(text)
def token(self):
self.last_token = self.lexer.token()
return self.last_token
def find_tok_column(self, token):
""" Find the column of the token in its line.
"""
last_cr = self.lexer.lexdata.rfind('\n', 0, token.lexpos)
return token.lexpos - last_cr
######################-- PRIVATE --######################
##
## Internal auxiliary methods
##
def _error(self, msg, token):
location = self._make_tok_location(token)
self.error_func(msg, location[0], location[1])
self.lexer.skip(1)
def _make_tok_location(self, token):
return (token.lineno, self.find_tok_column(token))
##
## Reserved keywords
##
keywords = (
'_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST',
'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG',
'REGISTER', 'OFFSETOF',
'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
'VOLATILE', 'WHILE',
)
keyword_map = {}
for keyword in keywords:
if keyword == '_BOOL':
keyword_map['_Bool'] = keyword
elif keyword == '_COMPLEX':
keyword_map['_Complex'] = keyword
else:
keyword_map[keyword.lower()] = keyword
##
## All the tokens recognized by the lexer
##
tokens = keywords + (
# Identifiers
'ID',
# Type identifiers (identifiers previously defined as
# types with typedef)
'TYPEID',
# constants
'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN',
'FLOAT_CONST', 'HEX_FLOAT_CONST',
'CHAR_CONST',
'WCHAR_CONST',
# String literals
'STRING_LITERAL',
'WSTRING_LITERAL',
# Operators
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
'OREQUAL',
# Increment/decrement
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Conditional operator (?)
'CONDOP',
# Delimeters
'LPAREN', 'RPAREN', # ( )
'LBRACKET', 'RBRACKET', # [ ]
'LBRACE', 'RBRACE', # { }
'COMMA', 'PERIOD', # . ,
'SEMI', 'COLON', # ; :
# Ellipsis (...)
'ELLIPSIS',
# pre-processor
'PPHASH', # '#'
)
##
## Regexes for use in tokens
##
##
# valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
hex_prefix = '0[xX]'
hex_digits = '[0-9a-fA-F]+'
bin_prefix = '0[bB]'
bin_digits = '[01]+'
# integer constants (K&R2: A.2.5.1)
integer_suffix_opt = r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
octal_constant = '0[0-7]*'+integer_suffix_opt
hex_constant = hex_prefix+hex_digits+integer_suffix_opt
bin_constant = bin_prefix+bin_digits+integer_suffix_opt
bad_octal_constant = '0[0-7]*[89]'
# character constants (K&R2: A.2.5.2)
# Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
# directives with Windows paths as filenames (..\..\dir\file)
# For the same reason, decimal_escape allows all digit sequences. We want to
# parse all correct code, even if it means to sometimes parse incorrect
# code.
#
simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
decimal_escape = r"""(\d+)"""
hex_escape = r"""(x[0-9a-fA-F]+)"""
bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
char_const = "'"+cconst_char+"'"
wchar_const = 'L'+char_const
unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
# string literals (K&R2: A.2.6)
string_char = r"""([^"\\\n]|"""+escape_sequence+')'
string_literal = '"'+string_char+'*"'
wstring_literal = 'L'+string_literal
bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
# floating constants (K&R2: A.2.5.3)
exponent_part = r"""([eE][-+]?[0-9]+)"""
fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
binary_exponent_part = r'''([pP][+-]?[0-9]+)'''
hex_fractional_constant = '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))"""
hex_floating_constant = '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+binary_exponent_part+'[FfLl]?)'
##
## Lexer states: used for preprocessor \n-terminated directives
##
states = (
# ppline: preprocessor line directives
#
('ppline', 'exclusive'),
# pppragma: pragma
#
('pppragma', 'exclusive'),
)
def t_PPHASH(self, t):
r'[ \t]*\#'
if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
t.lexer.begin('ppline')
self.pp_line = self.pp_filename = None
elif self.pragma_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
t.lexer.begin('pppragma')
else:
t.type = 'PPHASH'
return t
##
## Rules for the ppline state
##
@TOKEN(string_literal)
def t_ppline_FILENAME(self, t):
if self.pp_line is None:
self._error('filename before line number in #line', t)
else:
self.pp_filename = t.value.lstrip('"').rstrip('"')
@TOKEN(decimal_constant)
def t_ppline_LINE_NUMBER(self, t):
if self.pp_line is None:
self.pp_line = t.value
else:
# Ignore: GCC's cpp sometimes inserts a numeric flag
# after the file name
pass
def t_ppline_NEWLINE(self, t):
r'\n'
if self.pp_line is None:
self._error('line number missing in #line', t)
else:
self.lexer.lineno = int(self.pp_line)
if self.pp_filename is not None:
self.filename = self.pp_filename
t.lexer.begin('INITIAL')
def t_ppline_PPLINE(self, t):
r'line'
pass
t_ppline_ignore = ' \t'
def t_ppline_error(self, t):
self._error('invalid #line directive', t)
##
## Rules for the pppragma state
##
def t_pppragma_NEWLINE(self, t):
r'\n'
t.lexer.lineno += 1
t.lexer.begin('INITIAL')
def t_pppragma_PPPRAGMA(self, t):
r'pragma'
pass
t_pppragma_ignore = ' \t<>.-{}();=+-*/$%@&^~!?:,0123456789'
@TOKEN(string_literal)
def t_pppragma_STR(self, t): pass
@TOKEN(identifier)
def t_pppragma_ID(self, t): pass
def t_pppragma_error(self, t):
self._error('invalid #pragma directive', t)
##
## Rules for the normal state
##
t_ignore = ' \t'
# Newlines
def t_NEWLINE(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'\^='
# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'
# ->
t_ARROW = r'->'
# ?
t_CONDOP = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Scope delimiters
# To see why on_lbrace_func is needed, consider:
# typedef char TT;
# void foo(int TT) { TT = 10; }
# TT x = 5;
# Outside the function, TT is a typedef, but inside (starting and ending
# with the braces) it's a parameter. The trouble begins with yacc's
# lookahead token. If we open a new scope in brace_open, then TT has
# already been read and incorrectly interpreted as TYPEID. So, we need
# to open and close scopes from within the lexer.
# Similar for the TT immediately outside the end of the function.
#
@TOKEN(r'\{')
def t_LBRACE(self, t):
self.on_lbrace_func()
return t
@TOKEN(r'\}')
def t_RBRACE(self, t):
self.on_rbrace_func()
return t
t_STRING_LITERAL = string_literal
# The following floating and integer constants are defined as
# functions to impose a strict order (otherwise, decimal
# is placed before the others because its regex is longer,
# and this is bad)
#
@TOKEN(floating_constant)
def t_FLOAT_CONST(self, t):
return t
@TOKEN(hex_floating_constant)
def t_HEX_FLOAT_CONST(self, t):
return t
@TOKEN(hex_constant)
def t_INT_CONST_HEX(self, t):
return t
@TOKEN(bin_constant)
def t_INT_CONST_BIN(self, t):
return t
@TOKEN(bad_octal_constant)
def t_BAD_CONST_OCT(self, t):
msg = "Invalid octal constant"
self._error(msg, t)
@TOKEN(octal_constant)
def t_INT_CONST_OCT(self, t):
return t
@TOKEN(decimal_constant)
def t_INT_CONST_DEC(self, t):
return t
# Must come before bad_char_const, to prevent it from
# catching valid char constants as invalid
#
@TOKEN(char_const)
def t_CHAR_CONST(self, t):
return t
@TOKEN(wchar_const)
def t_WCHAR_CONST(self, t):
return t
@TOKEN(unmatched_quote)
def t_UNMATCHED_QUOTE(self, t):
msg = "Unmatched '"
self._error(msg, t)
@TOKEN(bad_char_const)
def t_BAD_CHAR_CONST(self, t):
msg = "Invalid char constant %s" % t.value
self._error(msg, t)
@TOKEN(wstring_literal)
def t_WSTRING_LITERAL(self, t):
return t
# unmatched string literals are caught by the preprocessor
@TOKEN(bad_string_literal)
def t_BAD_STRING_LITERAL(self, t):
msg = "String contains invalid escape code"
self._error(msg, t)
@TOKEN(identifier)
def t_ID(self, t):
t.type = self.keyword_map.get(t.value, "ID")
if t.type == 'ID' and self.type_lookup_func(t.value):
t.type = "TYPEID"
return t
def t_error(self, t):
msg = 'Illegal character %s' % repr(t.value[0])
self._error(msg, t)