aboutsummaryrefslogtreecommitdiffstats
path: root/tools/syz-headerparser
diff options
context:
space:
mode:
authorZubin Mithra <zsm@google.com>2017-08-23 14:01:57 +0100
committerDmitry Vyukov <dvyukov@google.com>2017-08-25 17:23:52 +0200
commitc3631fc789181c23aa51396f0ff66cd488e4b4f7 (patch)
tree7ffbc3082b6f8cac56de418bd51ee54d98eb3df7 /tools/syz-headerparser
parent3f1aca4826c84d52da6047a8a5c9325727525c8d (diff)
tools: add headerparser as a tool to assist in writing system call descriptions
The tool can be found inside tools/syz-headerparser. Details on how to use headerparser can be found inside docs/headerparser_usage.md.
Diffstat (limited to 'tools/syz-headerparser')
-rw-r--r--tools/syz-headerparser/headerlib/__init__.py0
-rw-r--r--tools/syz-headerparser/headerlib/container.py229
-rw-r--r--tools/syz-headerparser/headerlib/header_preprocessor.py142
-rw-r--r--tools/syz-headerparser/headerlib/struct_walker.py250
-rw-r--r--tools/syz-headerparser/headerparser.py61
-rw-r--r--tools/syz-headerparser/test_headers/th_a.h22
-rw-r--r--tools/syz-headerparser/test_headers/th_b.h27
7 files changed, 731 insertions, 0 deletions
diff --git a/tools/syz-headerparser/headerlib/__init__.py b/tools/syz-headerparser/headerlib/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/syz-headerparser/headerlib/__init__.py
diff --git a/tools/syz-headerparser/headerlib/container.py b/tools/syz-headerparser/headerlib/container.py
new file mode 100644
index 000000000..cc46a206c
--- /dev/null
+++ b/tools/syz-headerparser/headerlib/container.py
@@ -0,0 +1,229 @@
+# Copyright 2017 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+'''
+This module contains container classes for holding struct, struct fields, and a global
+namespace for struct objects obtained from multiple header files.
+'''
+
+import logging
+
+from headerlib.struct_walker import StructWalker
+
+
+class StructRepr(object):
+ '''
+ This class is a container for a single struct type. `fr_list` is a list of all items
+ inside the struct, along with type information.
+ '''
+
+ def __init__(self, struct_name, fr_list, loglvl=logging.INFO):
+ self.struct_name = struct_name
+ self.fr_list = fr_list
+ self.global_hierarchy = {}
+ self._setuplogging(loglvl)
+
+ def __str__(self):
+ return self._output_syzkaller_fmt()
+
+ def _setuplogging(self, loglvl):
+ self.logger = logging.getLogger(self.__class__.__name__)
+ formatter = logging.Formatter('DEBUG:%(name)s:%(message)s')
+ sh = logging.StreamHandler()
+ sh.setFormatter(formatter)
+ sh.setLevel(loglvl)
+ self.logger.addHandler(sh)
+ self.logger.setLevel(loglvl)
+
+ def _output_syzkaller_fmt(self):
+ header = '%s {' % (self.struct_name)
+ body = self.get_syzkaller_field_body()[:-1]
+ footer = '}'
+ return '\n'.join([header, body, footer])
+
+ def get_syzkaller_field_body(self):
+ '''
+ Returns the metadata description for a struct field in syzkaller format.
+ eg: "len intptr".
+ In cases where more than one syzkaller type maps to a native type, return
+ a string with possible syzkaller types seperated by '|'.
+ '''
+
+ def _get_syzkaller_type(native_type):
+ syzkaller_types = {
+ 'size_t' : 'len|fileoff|intN',
+ 'ssize_t' : 'len|intN',
+ 'unsigned int' : 'len|fileoff|int32',
+ 'int' : 'len|fileoff|flags|int32',
+ 'long' : 'len|fileoff|flags|intN',
+ 'unsigned long' : 'len|fileoff|flags|intN',
+ 'unsigned long long': 'len|fileoff|intN',
+ 'char*' : 'ptr[in|out, string]|ptr[in, filename]',
+ 'char**' : 'ptr[in, [ptr[in|out, string]]]',
+ 'void*' : 'ptr[in|out, string]|ptr[in|out, array]',
+ 'void (*)()' : 'vma',
+ 'uint64_t' : 'len|int64',
+ 'int64_t' : 'len|int64',
+ 'uint32_t' : 'len|int32',
+ 'int32_t' : 'len|int32',
+ 'uint16_t' : 'len|int16',
+ 'int16_t' : 'len|int16',
+ 'uint8_t' : 'len|int8',
+ 'int8_t' : 'len|int8',
+ }
+ if '[' in native_type and ']' in native_type:
+ return 'array'
+
+ # If we have a pointer to a struct object
+ elif 'struct ' in native_type:
+ if '*' in native_type:
+ return 'ptr|buffer|array'
+ else:
+ return native_type.split(' ')[-1]
+
+ elif 'enum ' in native_type:
+ return native_type.split(' ')[-1]
+
+ # typedef types
+ return syzkaller_types.get(native_type, native_type)
+
+ body = ''
+ rows = []
+ for field in self.fr_list:
+ rows.append((field.field_identifier, _get_syzkaller_type(field.field_type), field.field_type))
+
+ maxcolwidth = lambda rows, x: max([len(row[x])+5 for row in rows])
+ col1_width = maxcolwidth(rows, 0)
+ col2_width = maxcolwidth(rows, 1)
+ for row in rows:
+ body += ' '*10 + '%s%s#(%s)\n' % (row[0].ljust(col1_width), row[1].ljust(col2_width), row[2])
+
+ return body
+
+ def get_fields(self):
+ '''
+ Get a list of all fields in this struct.
+ '''
+ return self.fr_list
+
+ def set_global_hierarchy(self, global_hierarchy):
+ '''
+ Set a reference to the global heirarchy of structs. This is useful when unrolling
+ structs.
+ '''
+ self.global_hierarchy = global_hierarchy
+
+
+class FieldRepr(object):
+ '''
+ This class is a container for a single item in a struct. field_type refers to the
+ type of the item. field_identifier refers to the name/label of the item. field_extra
+ is any item specific metadata. In cases where the field_type refers to another struct
+ (whose items we are aware of), field_extra points to its StructRepr instance. This is
+ used for struct unrolling in cases where an instance of "struct B" is an item inside
+ "struct A".
+ '''
+
+ def __init__(self, field_type, field_identifier):
+ self._field_type = field_type
+ self._field_identifier = field_identifier
+ self._field_extra = None
+
+ @property
+ def field_type(self):
+ '''Retrieve the field type.'''
+ return self._field_type
+ @field_type.setter
+ def field_type(self, field_type):
+ self._field_type = field_type
+
+ @property
+ def field_identifier(self):
+ '''Retrieve the field identifier.'''
+ return self._field_identifier
+ @field_identifier.setter
+ def field_identifier(self, field_identifier):
+ self._field_identifier = field_identifier
+
+ @property
+ def field_extra(self):
+ '''Retrieve any field specific metadata object.'''
+ return self._field_extra
+ @field_extra.setter
+ def field_extra(self, field_extra):
+ self._field_extra = field_extra
+
+
+class GlobalHierarchy(dict):
+ '''
+ This class is a global container for structs and their items across a list
+ of header files. Each struct is stored key'd by the struct name, and represented
+ by an instance of `StructRepr`.
+ '''
+
+ def __init__(self, filenames, loglvl=logging.INFO,
+ include_lines='', output_fmt=''):
+ super(GlobalHierarchy, self).__init__()
+ self.filenames = filenames
+ self.include_lines = include_lines
+ self.loglvl = loglvl
+ self._setuplogging()
+ if self.filenames:
+ self.load_header_files()
+
+ def __str__(self):
+ return self._output_syzkaller_fmt()
+
+ def _setuplogging(self):
+ self.logger = logging.getLogger(self.__class__.__name__)
+ formatter = logging.Formatter('DEBUG:%(name)s:%(message)s')
+ sh = logging.StreamHandler()
+ sh.setFormatter(formatter)
+ sh.setLevel(self.loglvl)
+ self.logger.addHandler(sh)
+ self.logger.setLevel(self.loglvl)
+
+ @staticmethod
+ def _get_struct_name(struct_type):
+ return struct_type.split()[-1]
+
+ def _output_syzkaller_fmt(self):
+ return ''
+
+ def add_header_file(self, filename):
+ '''Add a header file to the list of headers we are about to parse.'''
+ self.filenames.append(filename)
+
+ def load_header_files(self):
+ '''
+ Parse the list of header files and generate StructRepr instances to represent each
+ struct object. Maintain a global view of all structs.
+ '''
+ self.logger.debug('load_header_files : %s', str(self.filenames))
+ struct_walker = StructWalker(filenames=self.filenames, include_lines=self.include_lines,
+ loglvl=self.loglvl)
+ local_hierarchy = struct_walker.generate_local_hierarchy()
+
+ for struct_name in local_hierarchy:
+ fr_list = [FieldRepr(i[0], i[1]) for i in local_hierarchy[struct_name]]
+ sr = StructRepr(struct_name, fr_list, loglvl=self.loglvl)
+ sr.set_global_hierarchy(self)
+ self["struct %s" % (struct_name)] = sr
+
+ for struct_name in self.keys():
+ sr = self[struct_name]
+ for field in sr.get_fields():
+ # If the item is a struct object, we link it against an
+ # instance of its corresponding `sr`
+ if field.field_type in self:
+ field.field_extra = self[field.field_type]
+
+ def get_metadata_structs(self):
+ '''
+ Generate metadata structs for all structs that this global namespace knows about.
+ '''
+ metadata_structs = ""
+ for struct_name in sorted(self.keys()):
+ sr = self[struct_name]
+ metadata_structs += str(sr) + "\n"
+ return metadata_structs.strip()
diff --git a/tools/syz-headerparser/headerlib/header_preprocessor.py b/tools/syz-headerparser/headerlib/header_preprocessor.py
new file mode 100644
index 000000000..429d9688e
--- /dev/null
+++ b/tools/syz-headerparser/headerlib/header_preprocessor.py
@@ -0,0 +1,142 @@
+# Copyright 2017 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+'''
+This module provides classes which implement header file preprocessing.
+'''
+
+import logging
+import ntpath
+import os
+import subprocess
+import tempfile
+import traceback
+
+import pycparser
+
+template = '''
+#include <stdbool.h>
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+
+// ------ MAKE PYCPARSER HAPPY ------
+#define __attribute__(...)
+#define __inline inline
+#define __restrict
+#define __extension__
+// #define __sighandler_t int
+#define __user
+
+#define __asm__(...)
+#define __volatile__(...)
+#define __signed__ signed
+#define __int128_t unsigned long long // Hacky
+#define __alignof__(...) 0
+
+#define INIT // regex
+typedef unsigned int size_t;
+// ------ MAKE PYCPARSER HAPPY ------
+
+#include <stdint.h>
+%(include_lines)s
+%(header_file_includes)s
+'''
+
+
+class HeaderFilePreprocessorException(Exception):
+ '''Exceptions raised from HeaderFileParser. '''
+ pass
+
+
+class HeaderFilePreprocessor(object):
+ '''
+ Given a C header filename, perform pre-processing and return an
+ ast that can be used for further processing.
+
+ Usage :
+
+ >>> import tempfile
+ >>> t = tempfile.NamedTemporaryFile()
+ >>> contents = """
+ ... struct ARRAY_OF_POINTERS_CONTAINER {
+ ... unsigned int *ptr[10];
+ ... int **n;
+ ... };
+ ...
+ ... struct ARRAY_CONTAINER {
+ ... int g[10];
+ ... int h[20][30];
+ ... };
+ ...
+ ... struct REGULAR_STRUCT {
+ ... int x;
+ ... char *y;
+ ... void *ptr;
+ ... };
+ ...
+ ... struct STRUCT_WITH_STRUCT_PTR {
+ ... struct REGULAR_STRUCT *struct_ptr;
+ ... int z;
+ ... };
+ ... """
+ >>> t.write(contents) ; t.flush()
+ >>> h = HeaderFilePreprocessor([t.name])
+ >>> ast = h.get_ast()
+ >>> print type(ast)
+ <class 'pycparser.c_ast.FileAST'>
+ '''
+
+ def __init__(self, filenames, include_lines='', loglvl=logging.INFO):
+ self.filenames = filenames
+ self.include_lines = include_lines
+ self._setuplogging(loglvl)
+ self._mktempfiles()
+ self._copyfiles()
+ self._gcc_preprocess()
+
+ def execute(self, cmd):
+ self.logger.debug('HeaderFilePreprocessor.execute: %s', cmd)
+ p = subprocess.Popen(cmd, shell=True)
+ try:
+ os.waitpid(p.pid, 0)
+ except OSError as exception:
+ raise HeaderFilePreprocessorException(exception)
+
+ def _setuplogging(self, loglvl):
+ self.logger = logging.getLogger(self.__class__.__name__)
+ formatter = logging.Formatter('DEBUG:%(name)s:%(message)s')
+ sh = logging.StreamHandler()
+ sh.setFormatter(formatter)
+ sh.setLevel(loglvl)
+ self.logger.addHandler(sh)
+ self.logger.setLevel(loglvl)
+
+ def _copyfiles(self):
+ self.execute('cp %s %s' % (' '.join(self.filenames), self.tempdir))
+
+ def _mktempfiles(self):
+ self.tempdir = tempfile.mkdtemp()
+ self.temp_sourcefile = os.path.join(self.tempdir, 'source.c')
+ self.temp_objectfile = os.path.join(self.tempdir, 'source.o')
+ self.logger.debug(('HeaderFilePreprocessor._mktempfiles: sourcefile=%s'
+ 'objectfile=%s'), self.temp_sourcefile, self.temp_objectfile)
+
+ header_file_includes = ''
+ include_lines = self.include_lines
+ for name in self.filenames:
+ header_file_includes = '%s#include "%s"\n' % (header_file_includes,
+ ntpath.basename(name))
+
+ open(self.temp_sourcefile, 'w').write(template % (locals()))
+
+ def _gcc_preprocess(self):
+ self.execute('gcc -I. -E -P -c %s > %s'
+ % (self.temp_sourcefile, self.temp_objectfile))
+
+ def _get_ast(self):
+ return pycparser.parse_file(self.temp_objectfile)
+
+ def get_ast(self):
+ try:
+ return self._get_ast()
+ except pycparser.plyparser.ParseError as e:
+ raise HeaderFilePreprocessorException(e)
diff --git a/tools/syz-headerparser/headerlib/struct_walker.py b/tools/syz-headerparser/headerlib/struct_walker.py
new file mode 100644
index 000000000..94cc89895
--- /dev/null
+++ b/tools/syz-headerparser/headerlib/struct_walker.py
@@ -0,0 +1,250 @@
+# Copyright 2017 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+'''
+This module provides classes which implement AST traversal in order to extract
+items belonging to a struct.
+'''
+
+import collections
+import logging
+
+from pycparser import c_ast
+from header_preprocessor import HeaderFilePreprocessor
+
+
+class StructWalkerException(Exception):
+ pass
+
+
+class StructWalker(c_ast.NodeVisitor):
+ '''
+ Given an ast obtained by parsing a header file, return a hierarchy
+ dictionary. The ast is expected to be of type pycparser.c_ast.FileAST.
+
+ Usage :
+
+ >>> import tempfile
+ >>> t = tempfile.NamedTemporaryFile()
+ >>> contents = """
+ ... #define STRUCT_SIZE 1337
+ ... struct ARRAY_OF_POINTERS_CONTAINER {
+ ... unsigned int *ptr[10];
+ ... int **n;
+ ... };
+ ... struct ARRAY_CONTAINER {
+ ... int g[10];
+ ... int h[20][30];
+ ... };
+ ... struct REGULAR_STRUCT {
+ ... int x;
+ ... char *y;
+ ... void *ptr;
+ ... };
+ ... struct STRUCT_WITH_STRUCT_PTR {
+ ... struct REGULAR_STRUCT *struct_ptr;
+ ... int z;
+ ... };
+ ... struct STRUCT_WITH_STRUCT_INST {
+ ... struct REGULAR_STRUCT regular_struct_inst;
+ ... int a;
+ ... };
+ ... struct STRUCT_WITH_STRUCT_ARRAY {
+ ... struct REGULAR_STRUCT regular_struct_array[100];
+ ... int b;
+ ... };
+ ... struct STRUCT_WITH_ANONYMOUS_STRUCT {
+ ... struct {
+ ... int g;
+ ... int h;
+ ... int i;
+ ... } anonymous_struct;
+ ... };
+ ... struct STRUCT_WITH_ANONYMOUS_UNION {
+ ... union {
+ ... int t;
+ ... char r[100];
+ ... } anonymous_union;
+ ... };
+ ... struct STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO {
+ ... struct REGULAR_STRUCT regular_struct_array[STRUCT_SIZE];
+ ... };
+ ... struct STRUCT_WITH_2D_ARRAY_INST {
+ ... struct REGULAR_STRUCT regular_struct_array_2D[10][10];
+ ... };
+ ... struct NESTED_ANONYMOUS_STRUCT {
+ ... struct {
+ ... int x;
+ ... struct {
+ ... int y;
+ ... int z;
+ ... } level_2;
+ ... } level_1;
+ ... };
+ ... """
+ >>> t.write(contents) ; t.flush()
+ >>> struct_walker = StructWalker(filenames=[t.name])
+ >>> local_hierarchy = struct_walker.generate_local_hierarchy()
+ >>> for k in local_hierarchy:
+ ... print k
+ ... print local_hierarchy[k]
+ ARRAY_OF_POINTERS_CONTAINER
+ [('unsigned int*[10]', 'ptr'), ('int**', 'n')]
+ STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO
+ [('struct REGULAR_STRUCT[1337]', 'regular_struct_array')]
+ STRUCT_WITH_2D_ARRAY_INST
+ [('struct REGULAR_STRUCT[10][10]', 'regular_struct_array_2D')]
+ STRUCT_WITH_STRUCT_ARRAY
+ [('struct REGULAR_STRUCT[100]', 'regular_struct_array'), ('int', 'b')]
+ NESTED_ANONYMOUS_STRUCT
+ [('int', 'level_1.x'), ('int', 'level_1.level_2.y'), ('int', 'level_1.level_2.z')]
+ STRUCT_WITH_ANONYMOUS_STRUCT
+ [('int', 'anonymous_struct.g'), ('int', 'anonymous_struct.h'), ('int', 'anonymous_struct.i')]
+ STRUCT_WITH_ANONYMOUS_UNION
+ [('int', 'anonymous_union.t'), ('char[100]', 'anonymous_union.r')]
+ STRUCT_WITH_STRUCT_INST
+ [('struct REGULAR_STRUCT', 'regular_struct_inst'), ('int', 'a')]
+ ARRAY_CONTAINER
+ [('int[10]', 'g'), ('int[20][30]', 'h')]
+ REGULAR_STRUCT
+ [('int', 'x'), ('char*', 'y'), ('void*', 'ptr')]
+ STRUCT_WITH_STRUCT_PTR
+ [('struct REGULAR_STRUCT*', 'struct_ptr'), ('int', 'z')]
+ '''
+
+ def __init__(self, ast=None, filenames=[], include_lines='', loglvl=logging.INFO):
+ super(StructWalker, self).__init__()
+ self.ast = ast
+ self.filenames = filenames
+
+ if not filenames and not ast:
+ raise StructWalkerException('Specify either "filename" or "ast" to create'
+ 'StructParser object')
+
+ if not self.ast:
+ self.ast = HeaderFilePreprocessor(self.filenames, include_lines=include_lines,
+ loglvl=loglvl).get_ast()
+
+ self.include_lines = include_lines
+ self.local_structs_hierarchy = {}
+ self._setuplogging(loglvl)
+
+ def _setuplogging(self, loglvl):
+ self.logger = logging.getLogger(self.__class__.__name__)
+ formatter = logging.Formatter('DEBUG:%(name)s:%(message)s')
+ sh = logging.StreamHandler()
+ sh.setFormatter(formatter)
+ sh.setLevel(loglvl)
+ self.logger.addHandler(sh)
+ self.logger.setLevel(loglvl)
+
+ def _format_item(self, processed_item):
+ fmt_type = processed_item['type']
+ fmt_type = ' '.join(fmt_type)
+
+ self.logger.debug('_format_item : %s', processed_item)
+
+ if 'is_ptr' in processed_item and 'is_fnptr' not in processed_item:
+ fmt_type = '%s%s' % (fmt_type, '*' * processed_item['is_ptr'])
+
+ if 'is_array' in processed_item and 'array_size' in processed_item:
+ size_str = str(processed_item['array_size']).replace(', ', '][')
+ fmt_type = '%s%s' % (fmt_type, size_str)
+
+ fmt_identifier = processed_item['identifier']
+
+ return [(fmt_type, fmt_identifier)]
+
+ def _recursive_process_item(self, item_ast, processed_item, parent):
+ self.logger.debug('--- _recursive_process_item : %s', type(item_ast))
+ if isinstance(item_ast, c_ast.Decl):
+ processed_item['identifier'] = item_ast.name
+ return self._recursive_process_item(item_ast.type, processed_item, item_ast)
+
+ elif isinstance(item_ast, c_ast.TypeDecl):
+ return self._recursive_process_item(item_ast.type, processed_item, item_ast)
+
+ elif isinstance(item_ast, c_ast.IdentifierType):
+ if len(item_ast.names) > 0:
+ processed_item['type'] = item_ast.names
+ return self._format_item(processed_item)
+
+ elif (isinstance(item_ast, c_ast.Struct) or
+ isinstance(item_ast, c_ast.Union)):
+ if not item_ast.name:
+ nodename, _items_list = self._traverse_ast(item_ast, toplevel=False)
+ try:
+ items_list = [(i[0], '%s.%s' % (parent.declname, i[1])) for i in _items_list]
+ except AttributeError as e:
+ self.logger.info('-- Encountered anonymous_struct/anonymous_union with no name')
+ raise StructWalkerException('Encountered anonymous_struct/anonymous_union with no name')
+
+ return items_list
+ else:
+ processed_item['type'] = ['struct %s' % (item_ast.name)]
+ return self._format_item(processed_item)
+
+ elif isinstance(item_ast, c_ast.PtrDecl):
+ if 'is_ptr' not in processed_item:
+ processed_item['is_ptr'] = 0
+ processed_item['is_ptr'] = processed_item['is_ptr'] + 1
+ return self._recursive_process_item(item_ast.type, processed_item, item_ast)
+
+ elif isinstance(item_ast, c_ast.ArrayDecl):
+ processed_item['is_array'] = True
+ if 'array_size' not in processed_item:
+ processed_item['array_size'] = []
+ processed_item['array_size'].append(int(item_ast.dim.value))
+ return self._recursive_process_item(item_ast.type, processed_item, item_ast)
+
+ elif isinstance(item_ast, c_ast.Enum):
+ processed_item['type'] = ['enum %s' % (item_ast.name)]
+ return self._format_item(processed_item)
+
+ elif isinstance(item_ast, c_ast.FuncDecl):
+ processed_item['is_fnptr'] = True
+ processed_item['type'] = ['void (*)()']
+ return self._format_item(processed_item)
+
+ def _traverse_ast(self, node, toplevel=True):
+ items_list = []
+
+ # Argument structs are used as types, hence anonymous top-level
+ # structs are ignored.
+ if toplevel and not node.name:
+ return None
+
+ if not node.children():
+ return None
+
+ self.logger.debug('>>> Struct name = %s, coord: %s', node.name, node.coord)
+ for child in node.children():
+ item = self._recursive_process_item(child[1], {}, None)
+ items_list.extend(item)
+
+ self.logger.debug('_traverse_ast returns: %s', str((node.name, items_list)))
+ return (node.name, items_list)
+
+ def visit_Struct(self, node, *a):
+ if node.name in self.local_structs_hierarchy:
+ self.logger.info('Encountered %s again. Ignoring.', repr(node.name))
+ return
+
+ try:
+ desc = self._traverse_ast(node)
+ except StructWalkerException as e:
+ self.logger.info('-- Exception raised by StructWalkerException in %s,'
+ 'inspect manually.',
+ repr(node.name))
+ self.logger.info(str(e))
+ return
+
+ if not desc:
+ return
+
+ struct_name, struct_items = desc
+ self.local_structs_hierarchy[struct_name] = struct_items
+
+ def generate_local_hierarchy(self):
+ self.visit(self.ast)
+ return self.local_structs_hierarchy
diff --git a/tools/syz-headerparser/headerparser.py b/tools/syz-headerparser/headerparser.py
new file mode 100644
index 000000000..e9f64f2f6
--- /dev/null
+++ b/tools/syz-headerparser/headerparser.py
@@ -0,0 +1,61 @@
+# Copyright 2017 syzkaller project authors. All rights reserved.
+# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+'''
+This scripts takes as input a list of header files and generates metadata
+files to make syzkaller device descriptions.
+'''
+
+import argparse
+import logging
+import sys
+import traceback
+
+from headerlib.header_preprocessor import HeaderFilePreprocessorException
+from headerlib.container import GlobalHierarchy
+
+
+def main():
+ """
+ python parser.py --filename=A.h,B.h
+ """
+
+ parser = argparse.ArgumentParser(description='Parse header files to output fuzzer'
+ 'struct metadata.')
+ parser.add_argument('--filenames',
+ help='comma-separated header filenames',
+ dest='filenames',
+ required=True)
+ parser.add_argument('--debug',
+ help='print debug-information at every level of parsing',
+ action='store_true')
+ parser.add_argument('--include',
+ help='include the specified file as the first line of the processed header files',
+ required=False,
+ const='',
+ nargs='?')
+
+ args = parser.parse_args()
+
+ loglvl = logging.INFO
+
+ if args.debug:
+ loglvl = logging.DEBUG
+
+ include_lines = ''
+ if args.include:
+ include_lines = open(args.include, 'r').read()
+
+ try:
+ gh = GlobalHierarchy(filenames=args.filenames.split(','),
+ loglvl=loglvl, include_lines=include_lines)
+ except HeaderFilePreprocessorException as e:
+ excdata = traceback.format_exc().splitlines()
+ logging.error(excdata[-1])
+ sys.exit(-1)
+
+
+ print gh.get_metadata_structs()
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/syz-headerparser/test_headers/th_a.h b/tools/syz-headerparser/test_headers/th_a.h
new file mode 100644
index 000000000..f2afc8752
--- /dev/null
+++ b/tools/syz-headerparser/test_headers/th_a.h
@@ -0,0 +1,22 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#ifndef __TEST_HEADER_A
+#define __TEST_HEADER_A
+
+#define RANDOM_MACRO_1 1
+#define RANDOM_MACRO_2 2
+
+struct A {
+ struct B *B_item;
+ const char *char_ptr;
+ unsigned int an_unsigned_int;
+ /*
+ * Some comments
+ */
+ bool a_bool;
+ bool another_bool;
+ some_type var;
+};
+
+#endif /* __TEST_HEADER_A */
diff --git a/tools/syz-headerparser/test_headers/th_b.h b/tools/syz-headerparser/test_headers/th_b.h
new file mode 100644
index 000000000..4b32e6041
--- /dev/null
+++ b/tools/syz-headerparser/test_headers/th_b.h
@@ -0,0 +1,27 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#ifndef _TEST_HEADER_B
+#define _TEST_HEADER_B
+
+#include <linux/types.h> /* header comment */
+
+enum random_enum {
+ ONE = 1<<0,
+ TWO = 1<<1,
+};
+
+struct B {
+ unsigned long B1;
+ unsigned long B2;
+};
+
+struct struct_containing_union {
+ int something;
+ union {
+ char *a_char;
+ struct B *B_ptr;
+ } a_union;
+};
+
+#endif /* _TEST_HEADER_B */