First :D

2011-11-20 22:23:31 +13:00 · 2011-11-20 22:23:31 +13:00 · 37588421f3
commit 37588421f3
100 changed files with 22673 additions and 0 deletions
--- a/plugins/pycparser/pycparser/init.py
+++ b/plugins/pycparser/pycparser/init.py
@ -0,0 +1,75 @@
+#-----------------------------------------------------------------
+# pycparser: __init__.py
+#
+# This package file exports some convenience functions for
+# interacting with pycparser
+#
+# Copyright (C) 2008-2009, Eli Bendersky
+# License: LGPL
+#-----------------------------------------------------------------
+
+__all__ = ['c_lexer', 'c_parser', 'c_ast']
+__version__ = '1.05'
+
+from subprocess import Popen, PIPE
+from types import ListType
+
+from c_parser import CParser
+
+
+def parse_file( filename, use_cpp=False,
+                cpp_path='cpp', cpp_args=''):
+    """ Parse a C file using pycparser.
+
+        filename:
+            Name of the file you want to parse.
+
+        use_cpp:
+            Set to True if you want to execute the C pre-processor
+            on the file prior to parsing it.
+
+        cpp_path:
+            If use_cpp is True, this is the path to 'cpp' on your
+            system. If no path is provided, it attempts to just
+            execute 'cpp', so it must be in your PATH.
+
+        cpp_args:
+            If use_cpp is True, set this to the command line
+            arguments strings to cpp. Be careful with quotes -
+            it's best to pass a raw string (r'') here.
+            For example:
+            r'-I../utils/fake_libc_include'
+            If several arguments are required, pass a list of
+            strings.
+
+        When successful, an AST is returned. ParseError can be
+        thrown if the file doesn't parse successfully.
+
+        Errors from cpp will be printed out.
+    """
+    if use_cpp:
+        path_list = [cpp_path]
+        if isinstance(cpp_args, ListType):
+            path_list += cpp_args
+        elif cpp_args != '':
+            path_list += [cpp_args]
+        path_list += [filename]
+
+        # Note the use of universal_newlines to treat all newlines
+        # as \n for Python's purpose
+        #
+        pipe = Popen(   path_list,
+                        stdout=PIPE,
+                        universal_newlines=True)
+        text = pipe.communicate()[0]
+    else:
+        text = open(filename).read()
+
+    parser = CParser()
+    return parser.parse(text, filename)
+
+
+if __name__ == "__main__":
+    pass
+
+
--- a/plugins/pycparser/pycparser/c_ast.py
+++ b/plugins/pycparser/pycparser/c_ast.py
--- a/plugins/pycparser/pycparser/c_lexer.py
+++ b/plugins/pycparser/pycparser/c_lexer.py
@ -0,0 +1,443 @@
+#-----------------------------------------------------------------
+# pycparser: clex.py
+#
+# CLexer class: lexer for the C language
+#
+# Copyright (C) 2008, Eli Bendersky
+# License: LGPL
+#-----------------------------------------------------------------
+
+import re
+import sys
+
+import ply.lex
+from ply.lex import TOKEN
+
+
+class CLexer(object):
+    """ A lexer for the C language. After building it, set the
+        input text with input(), and call token() to get new
+        tokens.
+
+        The public attribute filename can be set to an initial
+        filaneme, but the lexer will update it upon #line
+        directives.
+    """
+    def __init__(self, error_func, type_lookup_func):
+        """ Create a new Lexer.
+
+            error_func:
+                An error function. Will be called with an error
+                message, line and column as arguments, in case of
+                an error during lexing.
+
+            type_lookup_func:
+                A type lookup function. Given a string, it must
+                return True IFF this string is a name of a type
+                that was defined with a typedef earlier.
+        """
+        self.error_func = error_func
+        self.type_lookup_func = type_lookup_func
+        self.filename = ''
+
+        # Allow either "# line" or "# <num>" to support GCC's
+        # cpp output
+        #
+        self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
+
+    def build(self, **kwargs):
+        """ Builds the lexer from the specification. Must be
+            called after the lexer object is created.
+
+            This method exists separately, because the PLY
+            manual warns against calling lex.lex inside
+            __init__
+        """
+        self.lexer = ply.lex.lex(object=self, **kwargs)
+
+    def reset_lineno(self):
+        """ Resets the internal line number counter of the lexer.
+        """
+        self.lexer.lineno = 1
+
+    def input(self, text):
+        self.lexer.input(text)
+
+    def token(self):
+        g = self.lexer.token()
+        return g
+
+    ######################--   PRIVATE   --######################
+
+    ##
+    ## Internal auxiliary methods
+    ##
+    def _error(self, msg, token):
+        location = self._make_tok_location(token)
+        self.error_func(msg, location[0], location[1])
+        self.lexer.skip(1)
+
+    def _find_tok_column(self, token):
+        i = token.lexpos
+        while i > 0:
+            if self.lexer.lexdata[i] == '\n': break
+            i -= 1
+        return (token.lexpos - i) + 1
+
+    def _make_tok_location(self, token):
+        return (token.lineno, self._find_tok_column(token))
+
+    ##
+    ## Reserved keywords
+    ##
+    keywords = (
+        'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE',
+        'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
+        'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
+        'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
+        'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
+        'VOLATILE', 'WHILE',
+    )
+
+    keyword_map = {}
+    for r in keywords:
+        keyword_map[r.lower()] = r
+
+    ##
+    ## All the tokens recognized by the lexer
+    ##
+    tokens = keywords + (
+        # Identifiers
+        'ID',
+
+        # Type identifiers (identifiers previously defined as
+        # types with typedef)
+        'TYPEID',
+
+        # constants
+        'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
+        'FLOAT_CONST',
+        'CHAR_CONST',
+        'WCHAR_CONST',
+
+        # String literals
+        'STRING_LITERAL',
+        'WSTRING_LITERAL',
+
+        # Operators
+        'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
+        'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
+        'LOR', 'LAND', 'LNOT',
+        'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
+
+        # Assignment
+        'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
+        'PLUSEQUAL', 'MINUSEQUAL',
+        'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
+        'OREQUAL',
+
+        # Increment/decrement
+        'PLUSPLUS', 'MINUSMINUS',
+
+        # Structure dereference (->)
+        'ARROW',
+
+        # Conditional operator (?)
+        'CONDOP',
+
+        # Delimeters
+        'LPAREN', 'RPAREN',         # ( )
+        'LBRACKET', 'RBRACKET',     # [ ]
+        'LBRACE', 'RBRACE',         # { }
+        'COMMA', 'PERIOD',          # . ,
+        'SEMI', 'COLON',            # ; :
+
+        # Ellipsis (...)
+        'ELLIPSIS',
+
+        # pre-processor
+        'PPHASH',      # '#'
+    )
+
+    ##
+    ## Regexes for use in tokens
+    ##
+    ##
+
+    # valid C identifiers (K&R2: A.2.3)
+    identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
+
+    # integer constants (K&R2: A.2.5.1)
+    integer_suffix_opt = r'(([uU][lL])|([lL][uU])|[uU]|[lL])?'
+    decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
+    octal_constant = '0[0-7]*'+integer_suffix_opt
+    hex_constant = '0[xX][0-9a-fA-F]+'+integer_suffix_opt
+
+    bad_octal_constant = '0[0-7]*[89]'
+
+    # character constants (K&R2: A.2.5.2)
+    # Note: a-zA-Z are allowed as escape chars to support #line
+    # directives with Windows paths as filenames (\dir\file...)
+    #
+    simple_escape = r"""([a-zA-Z\\?'"])"""
+    octal_escape = r"""([0-7]{1,3})"""
+    hex_escape = r"""(x[0-9a-fA-F]+)"""
+    bad_escape = r"""([\\][^a-zA-Z\\?'"x0-7])"""
+
+    escape_sequence = r"""(\\("""+simple_escape+'|'+octal_escape+'|'+hex_escape+'))'
+    cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
+    char_const = "'"+cconst_char+"'"
+    wchar_const = 'L'+char_const
+    unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
+    bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
+
+    # string literals (K&R2: A.2.6)
+    string_char = r"""([^"\\\n]|"""+escape_sequence+')'
+    string_literal = '"'+string_char+'*"'
+    wstring_literal = 'L'+string_literal
+    bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
+
+    # floating constants (K&R2: A.2.5.3)
+    exponent_part = r"""([eE][-+]?[0-9]+)"""
+    fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
+    floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
+
+    ##
+    ## Lexer states
+    ##
+    states = (
+        # ppline: preprocessor line directives
+        #
+        ('ppline', 'exclusive'),
+    )
+
+    def t_PPHASH(self, t):
+        r'[ \t]*\#'
+        m = self.line_pattern.match(
+            t.lexer.lexdata, pos=t.lexer.lexpos)
+
+        if m:
+            t.lexer.begin('ppline')
+            self.pp_line = self.pp_filename = None
+            #~ print "ppline starts on line %s" % t.lexer.lineno
+        else:
+            t.type = 'PPHASH'
+            return t
+
+    ##
+    ## Rules for the ppline state
+    ##
+    @TOKEN(string_literal)
+    def t_ppline_FILENAME(self, t):
+        if self.pp_line is None:
+            self._error('filename before line number in #line', t)
+        else:
+            self.pp_filename = t.value.lstrip('"').rstrip('"')
+            #~ print "PP got filename: ", self.pp_filename
+
+    @TOKEN(decimal_constant)
+    def t_ppline_LINE_NUMBER(self, t):
+        if self.pp_line is None:
+            self.pp_line = t.value
+        else:
+            # Ignore: GCC's cpp sometimes inserts a numeric flag
+            # after the file name
+            pass
+
+    def t_ppline_NEWLINE(self, t):
+        r'\n'
+
+        if self.pp_line is None:
+            self._error('line number missing in #line', t)
+        else:
+            self.lexer.lineno = int(self.pp_line)
+
+            if self.pp_filename is not None:
+                self.filename = self.pp_filename
+
+        t.lexer.begin('INITIAL')
+
+    def t_ppline_PPLINE(self, t):
+        r'line'
+        pass
+
+    t_ppline_ignore = ' \t'
+
+    def t_ppline_error(self, t):
+        msg = 'invalid #line directive'
+        self._error(msg, t)
+
+    ##
+    ## Rules for the normal state
+    ##
+    t_ignore = ' \t'
+
+    # Newlines
+    def t_NEWLINE(self, t):
+        r'\n+'
+        t.lexer.lineno += t.value.count("\n")
+
+    # Operators
+    t_PLUS              = r'\+'
+    t_MINUS             = r'-'
+    t_TIMES             = r'\*'
+    t_DIVIDE            = r'/'
+    t_MOD               = r'%'
+    t_OR                = r'\|'
+    t_AND               = r'&'
+    t_NOT               = r'~'
+    t_XOR               = r'\^'
+    t_LSHIFT            = r'<<'
+    t_RSHIFT            = r'>>'
+    t_LOR               = r'\|\|'
+    t_LAND              = r'&&'
+    t_LNOT              = r'!'
+    t_LT                = r'<'
+    t_GT                = r'>'
+    t_LE                = r'<='
+    t_GE                = r'>='
+    t_EQ                = r'=='
+    t_NE                = r'!='
+
+    # Assignment operators
+    t_EQUALS            = r'='
+    t_TIMESEQUAL        = r'\*='
+    t_DIVEQUAL          = r'/='
+    t_MODEQUAL          = r'%='
+    t_PLUSEQUAL         = r'\+='
+    t_MINUSEQUAL        = r'-='
+    t_LSHIFTEQUAL       = r'<<='
+    t_RSHIFTEQUAL       = r'>>='
+    t_ANDEQUAL          = r'&='
+    t_OREQUAL           = r'\|='
+    t_XOREQUAL          = r'^='
+
+    # Increment/decrement
+    t_PLUSPLUS          = r'\+\+'
+    t_MINUSMINUS        = r'--'
+
+    # ->
+    t_ARROW             = r'->'
+
+    # ?
+    t_CONDOP            = r'\?'
+
+    # Delimeters
+    t_LPAREN            = r'\('
+    t_RPAREN            = r'\)'
+    t_LBRACKET          = r'\['
+    t_RBRACKET          = r'\]'
+    t_LBRACE            = r'\{'
+    t_RBRACE            = r'\}'
+    t_COMMA             = r','
+    t_PERIOD            = r'\.'
+    t_SEMI              = r';'
+    t_COLON             = r':'
+    t_ELLIPSIS          = r'\.\.\.'
+
+    t_STRING_LITERAL    = string_literal
+
+    # The following floating and integer constants are defined as
+    # functions to impose a strict order (otherwise, decimal
+    # is placed before the others because its regex is longer,
+    # and this is bad)
+    #
+    @TOKEN(floating_constant)
+    def t_FLOAT_CONST(self, t):
+        return t
+
+    @TOKEN(hex_constant)
+    def t_INT_CONST_HEX(self, t):
+        return t
+
+    @TOKEN(bad_octal_constant)
+    def t_BAD_CONST_OCT(self, t):
+        msg = "Invalid octal constant"
+        self._error(msg, t)
+
+    @TOKEN(octal_constant)
+    def t_INT_CONST_OCT(self, t):
+        return t
+
+    @TOKEN(decimal_constant)
+    def t_INT_CONST_DEC(self, t):
+        return t
+
+    # Must come before bad_char_const, to prevent it from
+    # catching valid char constants as invalid
+    #
+    @TOKEN(char_const)
+    def t_CHAR_CONST(self, t):
+        return t
+
+    @TOKEN(wchar_const)
+    def t_WCHAR_CONST(self, t):
+        return t
+
+    @TOKEN(unmatched_quote)
+    def t_UNMATCHED_QUOTE(self, t):
+        msg = "Unmatched '"
+        self._error(msg, t)
+
+    @TOKEN(bad_char_const)
+    def t_BAD_CHAR_CONST(self, t):
+        msg = "Invalid char constant %s" % t.value
+        self._error(msg, t)
+
+    @TOKEN(wstring_literal)
+    def t_WSTRING_LITERAL(self, t):
+        return t
+
+    # unmatched string literals are caught by the preprocessor
+
+    @TOKEN(bad_string_literal)
+    def t_BAD_STRING_LITERAL(self, t):
+        msg = "String contains invalid escape code"
+        self._error(msg, t)
+
+    @TOKEN(identifier)
+    def t_ID(self, t):
+        t.type = self.keyword_map.get(t.value, "ID")
+
+        if t.type == 'ID' and self.type_lookup_func(t.value):
+            t.type = "TYPEID"
+
+        return t
+
+    def t_error(self, t):
+        msg = 'Illegal character %s' % repr(t.value[0])
+        self._error(msg, t)
+
+
+if __name__ == "__main__":
+    filename = '../zp.c'
+    text = open(filename).read()
+
+    #~ text = '"'+r"""ka \p ka"""+'"'
+    text = r"""
+    546
+        #line 66 "kwas\df.h"
+        id 4
+        # 5
+        dsf
+    """
+
+    def errfoo(msg, a, b):
+        print msg
+        sys.exit()
+
+    def typelookup(namd):
+        return False
+
+    clex = CLexer(errfoo, typelookup)
+    clex.build()
+    clex.input(text)
+
+    while 1:
+        tok = clex.token()
+        if not tok: break
+
+        #~ print type(tok)
+        print "-", tok.value, tok.type, tok.lineno, clex.filename, tok.lexpos
+
+
--- a/plugins/pycparser/pycparser/c_parser.py
+++ b/plugins/pycparser/pycparser/c_parser.py
--- a/plugins/pycparser/pycparser/ply/init.py
+++ b/plugins/pycparser/pycparser/ply/init.py
@ -0,0 +1,4 @@
+# PLY package
+# Author: David Beazley (dave@dabeaz.com)
+
+__all__ = ['lex','yacc']
--- a/plugins/pycparser/pycparser/ply/lex.py
+++ b/plugins/pycparser/pycparser/ply/lex.py
--- a/plugins/pycparser/pycparser/ply/yacc.py
+++ b/plugins/pycparser/pycparser/ply/yacc.py
--- a/plugins/pycparser/pycparser/plyparser.py
+++ b/plugins/pycparser/pycparser/plyparser.py
@ -0,0 +1,67 @@
+#-----------------------------------------------------------------
+# plyparser.py
+#
+# PLYParser class and other utilites for simplifying programming
+# parsers with PLY
+#
+# Copyright (C) 2008-2009, Eli Bendersky
+# License: LGPL
+#-----------------------------------------------------------------
+
+
+class Coord(object):
+    """ Coordinates of a syntactic element. Consists of:
+            - File name
+            - Line number
+            - (optional) column number, for the Lexer
+    """
+    def __init__(self, file, line, column=None):
+        self.file = file
+        self.line = line
+        self.column = column
+
+    def __str__(self):
+        str = "%s:%s" % (self.file, self.line)
+        if self.column: str += ":%s" % self.column
+        return str
+
+
+class ParseError(Exception): pass
+
+
+class PLYParser(object):
+    def _create_opt_rule(self, rulename):
+        """ Given a rule name, creates an optional ply.yacc rule
+            for it. The name of the optional rule is
+            <rulename>_opt
+        """
+        optname = rulename + '_opt'
+
+        def optrule(self, p):
+            p[0] = p[1]
+
+        optrule.__doc__ = '%s : empty\n| %s' % (optname, rulename)
+        optrule.__name__ = 'p_%s' % optname
+        setattr(self.__class__, optrule.__name__, optrule)
+
+    def _coord(self, lineno, column=None):
+        return Coord(
+                file=self.clex.filename,
+                line=lineno,
+                column=column)
+
+    def _parse_error(self, msg, coord):
+        raise ParseError("%s: %s" % (coord, msg))
+
+
+if __name__ == '__main__':
+    pp = PLYParser()
+    pp._create_opt_rule('java')
+
+    ar = [4, 6]
+    pp.p_java_opt(ar)
+
+    print ar
+    print pp.p_java_opt.__doc__
+
+    print dir(pp)