first commit

This commit is contained in:
2025-08-07 13:15:31 +01:00
commit d903893b4c
21854 changed files with 4461308 additions and 0 deletions

View File

@ -0,0 +1,124 @@
import glob
import os
import os.path
# XXX need tests:
# * walk_tree()
# * glob_tree()
# * iter_files_by_suffix()
C_SOURCE_SUFFIXES = ('.c', '.h')
def _walk_tree(root, *,
_walk=os.walk,
):
# A wrapper around os.walk that resolves the filenames.
for parent, _, names in _walk(root):
for name in names:
yield os.path.join(parent, name)
def walk_tree(root, *,
suffix=None,
walk=_walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
if suffix and not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in walk(root):
if suffix and not filename.endswith(suffix):
continue
yield filename
def glob_tree(root, *,
suffix=None,
_glob=glob.iglob,
_escape=glob.escape,
_join=os.path.join,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
suffix = suffix or ''
if not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in _glob(_join(_escape(root), f'*{suffix}')):
yield filename
for filename in _glob(_join(_escape(root), f'**/*{suffix}')):
yield filename
def iter_files(root, suffix=None, relparent=None, *,
get_files=None,
_glob=glob_tree,
_walk=walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "root" is a non-string iterable then do the same for each of
those trees.
If "suffix" is provided then only files with that suffix will
be included.
if "relparent" is provided then it is used to resolve each
filename as a relative path.
"""
if get_files is None:
get_files = os.walk
if not isinstance(root, str):
roots = root
for root in roots:
yield from iter_files(root, suffix, relparent,
get_files=get_files,
_glob=_glob, _walk=_walk)
return
# Use the right "walk" function.
if get_files in (glob.glob, glob.iglob, glob_tree):
get_files = _glob
else:
_files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
# Handle a single suffix.
if suffix and not isinstance(suffix, str):
filenames = get_files(root)
suffix = tuple(suffix)
else:
filenames = get_files(root, suffix=suffix)
suffix = None
for filename in filenames:
if suffix and not isinstance(suffix, str): # multiple suffixes
if not filename.endswith(suffix):
continue
if relparent:
filename = os.path.relpath(filename, relparent)
yield filename
def iter_files_by_suffix(root, suffixes, relparent=None, *,
walk=walk_tree,
_iter_files=iter_files,
):
"""Yield each file in the tree that has the given suffixes.
Unlike iter_files(), the results are in the original suffix order.
"""
if isinstance(suffixes, str):
suffixes = [suffixes]
# XXX Ignore repeated suffixes?
for suffix in suffixes:
yield from _iter_files(root, suffix, relparent)

View File

@ -0,0 +1,138 @@
from collections import namedtuple
import re
from .util import classonly, _NTBase
# XXX need tests:
# * ID.match()
UNKNOWN = '???'
NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
"""A unique ID for a single symbol or declaration."""
__slots__ = ()
# XXX Add optional conditions (tuple of strings) field.
#conditions = Slot()
@classonly
def from_raw(cls, raw):
if not raw:
return None
if isinstance(raw, str):
return cls(None, None, raw)
try:
name, = raw
filename = None
except ValueError:
try:
filename, name = raw
except ValueError:
return super().from_raw(raw)
return cls(filename, None, name)
def __new__(cls, filename, funcname, name):
self = super().__new__(
cls,
filename=str(filename) if filename else None,
funcname=str(funcname) if funcname else None,
name=str(name) if name else None,
)
#cls.conditions.set(self, tuple(str(s) if s else None
# for s in conditions or ()))
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.name:
raise TypeError('missing name')
else:
if not NAME_RE.match(self.name):
raise ValueError(
f'name must be an identifier, got {self.name!r}')
# Symbols from a binary might not have filename/funcname info.
if self.funcname:
if not self.filename:
raise TypeError('missing filename')
if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
raise ValueError(
f'name must be an identifier, got {self.funcname!r}')
# XXX Require the filename (at least UNKONWN)?
# XXX Check the filename?
@property
def islocal(self):
return self.funcname is not None
def match(self, other, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return True if the two match.
At least one of the two must be completely valid (no UNKNOWN
anywhere). Otherwise False is returned. The remaining one
*may* have UNKNOWN for both funcname and filename. It must
have a valid name though.
The caller is responsible for knowing which of the two is valid
(and which to use if both are valid).
"""
# First check the name.
if self.name is None:
return False
if other.name != self.name:
return False
# Then check the filename.
if self.filename is None:
return False
if other.filename is None:
return False
if self.filename == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
elif self.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif other.filename == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
elif other.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif not match_files(self.filename, other.filename):
return False
# Finally, check the funcname.
if self.funcname == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
else:
return other.funcname is not None
elif other.funcname == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
else:
return self.funcname is not None
elif self.funcname == other.funcname:
# Both are valid.
return True
return False

View File

@ -0,0 +1,11 @@
def basic(variables, *,
_print=print):
"""Print each row simply."""
for var in variables:
if var.funcname:
line = f'{var.filename}:{var.funcname}():{var.name}'
else:
line = f'{var.filename}:{var.name}'
line = f'{line:<64} {var.vartype}'
_print(line)

View File

@ -0,0 +1,243 @@
import csv
import subprocess
_NOT_SET = object()
def run_cmd(argv, **kwargs):
proc = subprocess.run(
argv,
#capture_output=True,
#stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
text=True,
check=True,
**kwargs
)
return proc.stdout
def read_tsv(infile, header, *,
_open=open,
_get_reader=csv.reader,
):
"""Yield each row of the given TSV (tab-separated) file."""
if isinstance(infile, str):
with _open(infile, newline='') as infile:
yield from read_tsv(infile, header,
_open=_open,
_get_reader=_get_reader,
)
return
lines = iter(infile)
# Validate the header.
try:
actualheader = next(lines).strip()
except StopIteration:
actualheader = ''
if actualheader != header:
raise ValueError(f'bad header {actualheader!r}')
for row in _get_reader(lines, delimiter='\t'):
yield tuple(v.strip() for v in row)
def write_tsv(outfile, header, rows, *,
_open=open,
_get_writer=csv.writer,
):
"""Write each of the rows to the given TSV (tab-separated) file."""
if isinstance(outfile, str):
with _open(outfile, 'w', newline='') as outfile:
return write_tsv(outfile, header, rows,
_open=_open,
_get_writer=_get_writer,
)
if isinstance(header, str):
header = header.split('\t')
writer = _get_writer(outfile, delimiter='\t')
writer.writerow(header)
for row in rows:
writer.writerow('' if v is None else str(v)
for v in row)
class Slot:
"""A descriptor that provides a slot.
This is useful for types that can't have slots via __slots__,
e.g. tuple subclasses.
"""
__slots__ = ('initial', 'default', 'readonly', 'instances', 'name')
def __init__(self, initial=_NOT_SET, *,
default=_NOT_SET,
readonly=False,
):
self.initial = initial
self.default = default
self.readonly = readonly
# The instance cache is not inherently tied to the normal
# lifetime of the instances. So must do something in order to
# avoid keeping the instances alive by holding a reference here.
# Ideally we would use weakref.WeakValueDictionary to do this.
# However, most builtin types do not support weakrefs. So
# instead we monkey-patch __del__ on the attached class to clear
# the instance.
self.instances = {}
self.name = None
def __set_name__(self, cls, name):
if self.name is not None:
raise TypeError('already used')
self.name = name
try:
slotnames = cls.__slot_names__
except AttributeError:
slotnames = cls.__slot_names__ = []
slotnames.append(name)
self._ensure___del__(cls, slotnames)
def __get__(self, obj, cls):
if obj is None: # called on the class
return self
try:
value = self.instances[id(obj)]
except KeyError:
if self.initial is _NOT_SET:
value = self.default
else:
value = self.initial
self.instances[id(obj)] = value
if value is _NOT_SET:
raise AttributeError(self.name)
# XXX Optionally make a copy?
return value
def __set__(self, obj, value):
if self.readonly:
raise AttributeError(f'{self.name} is readonly')
# XXX Optionally coerce?
self.instances[id(obj)] = value
def __delete__(self, obj):
if self.readonly:
raise AttributeError(f'{self.name} is readonly')
self.instances[id(obj)] = self.default # XXX refleak?
def _ensure___del__(self, cls, slotnames): # See the comment in __init__().
try:
old___del__ = cls.__del__
except AttributeError:
old___del__ = (lambda s: None)
else:
if getattr(old___del__, '_slotted', False):
return
def __del__(_self):
for name in slotnames:
delattr(_self, name)
old___del__(_self)
__del__._slotted = True
cls.__del__ = __del__
def set(self, obj, value):
"""Update the cached value for an object.
This works even if the descriptor is read-only. This is
particularly useful when initializing the object (e.g. in
its __new__ or __init__).
"""
self.instances[id(obj)] = value
class classonly:
"""A non-data descriptor that makes a value only visible on the class.
This is like the "classmethod" builtin, but does not show up on
instances of the class. It may be used as a decorator.
"""
def __init__(self, value):
self.value = value
self.getter = classmethod(value).__get__
self.name = None
def __set_name__(self, cls, name):
if self.name is not None:
raise TypeError('already used')
self.name = name
def __get__(self, obj, cls):
if obj is not None:
raise AttributeError(self.name)
# called on the class
return self.getter(None, cls)
class _NTBase:
__slots__ = ()
@classonly
def from_raw(cls, raw):
if not raw:
return None
elif isinstance(raw, cls):
return raw
elif isinstance(raw, str):
return cls.from_string(raw)
else:
if hasattr(raw, 'items'):
return cls(**raw)
try:
args = tuple(raw)
except TypeError:
pass
else:
return cls(*args)
raise NotImplementedError
@classonly
def from_string(cls, value):
"""Return a new instance based on the given string."""
raise NotImplementedError
@classmethod
def _make(cls, iterable): # The default _make() is not subclass-friendly.
return cls.__new__(cls, *iterable)
# XXX Always validate?
#def __init__(self, *args, **kwargs):
# self.validate()
# XXX The default __repr__() is not subclass-friendly (where the name changes).
#def __repr__(self):
# _, _, sig = super().__repr__().partition('(')
# return f'{self.__class__.__name__}({sig}'
# To make sorting work with None:
def __lt__(self, other):
try:
return super().__lt__(other)
except TypeError:
if None in self:
return True
elif None in other:
return False
else:
raise
def validate(self):
return
# XXX Always validate?
#def _replace(self, **kwargs):
# obj = super()._replace(**kwargs)
# obj.validate()
# return obj

View File

@ -0,0 +1,339 @@
import re
import shlex
import subprocess
from ..common.info import UNKNOWN
from . import source
IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
TYPE_QUAL = r'(?:const|volatile)'
VAR_TYPE_SPEC = r'''(?:
void |
(?:
(?:(?:un)?signed\s+)?
(?:
char |
short |
int |
long |
long\s+int |
long\s+long
) |
) |
float |
double |
{IDENTIFIER} |
(?:struct|union)\s+{IDENTIFIER}
)'''
POINTER = rf'''(?:
(?:\s+const)?\s*[*]
)'''
#STRUCT = r'''(?:
# (?:struct|(struct\s+%s))\s*[{]
# [^}]*
# [}]
# )''' % (IDENTIFIER)
#UNION = r'''(?:
# (?:union|(union\s+%s))\s*[{]
# [^}]*
# [}]
# )''' % (IDENTIFIER)
#DECL_SPEC = rf'''(?:
# ({VAR_TYPE_SPEC}) |
# ({STRUCT}) |
# ({UNION})
# )'''
FUNC_START = rf'''(?:
(?:
(?:
extern |
static |
static\s+inline
)\s+
)?
#(?:const\s+)?
{VAR_TYPE_SPEC}
)'''
#GLOBAL_VAR_START = rf'''(?:
# (?:
# (?:
# extern |
# static
# )\s+
# )?
# (?:
# {TYPE_QUAL}
# (?:\s+{TYPE_QUAL})?
# )?\s+
# {VAR_TYPE_SPEC}
# )'''
GLOBAL_DECL_START_RE = re.compile(rf'''
^
(?:
({FUNC_START})
)
''', re.VERBOSE)
LOCAL_VAR_START = rf'''(?:
(?:
(?:
register |
static
)\s+
)?
(?:
(?:
{TYPE_QUAL}
(?:\s+{TYPE_QUAL})?
)\s+
)?
{VAR_TYPE_SPEC}
{POINTER}?
)'''
LOCAL_STMT_START_RE = re.compile(rf'''
^
(?:
({LOCAL_VAR_START})
)
''', re.VERBOSE)
def iter_global_declarations(lines):
"""Yield (decl, body) for each global declaration in the given lines.
For function definitions the header is reduced to one line and
the body is provided as-is. For other compound declarations (e.g.
struct) the entire declaration is reduced to one line and "body"
is None. Likewise for simple declarations (e.g. variables).
Declarations inside function bodies are ignored, though their text
is provided in the function body.
"""
# XXX Bail out upon bogus syntax.
lines = source.iter_clean_lines(lines)
for line in lines:
if not GLOBAL_DECL_START_RE.match(line):
continue
# We only need functions here, since we only need locals for now.
if line.endswith(';'):
continue
if line.endswith('{') and '(' not in line:
continue
# Capture the function.
# (assume no func is a one-liner)
decl = line
while '{' not in line: # assume no inline structs, etc.
try:
line = next(lines)
except StopIteration:
return
decl += ' ' + line
body, end = _extract_block(lines)
if end is None:
return
assert end == '}'
yield (f'{decl}\n{body}\n{end}', body)
def iter_local_statements(lines):
"""Yield (lines, blocks) for each statement in the given lines.
For simple statements, "blocks" is None and the statement is reduced
to a single line. For compound statements, "blocks" is a pair of
(header, body) for each block in the statement. The headers are
reduced to a single line each, but the bpdies are provided as-is.
"""
# XXX Bail out upon bogus syntax.
lines = source.iter_clean_lines(lines)
for line in lines:
if not LOCAL_STMT_START_RE.match(line):
continue
stmt = line
blocks = None
if not line.endswith(';'):
# XXX Support compound & multiline simple statements.
#blocks = []
continue
yield (stmt, blocks)
def _extract_block(lines):
end = None
depth = 1
body = []
for line in lines:
depth += line.count('{') - line.count('}')
if depth == 0:
end = line
break
body.append(line)
return '\n'.join(body), end
def parse_func(stmt, body):
"""Return (name, signature) for the given function definition."""
header, _, end = stmt.partition(body)
assert end.strip() == '}'
assert header.strip().endswith('{')
header, _, _= header.rpartition('{')
signature = ' '.join(header.strip().splitlines())
_, _, name = signature.split('(')[0].strip().rpartition(' ')
assert name
return name, signature
#TYPE_SPEC = rf'''(?:
# )'''
#VAR_DECLARATOR = rf'''(?:
# )'''
#VAR_DECL = rf'''(?:
# {TYPE_SPEC}+
# {VAR_DECLARATOR}
# \s*
# )'''
#VAR_DECLARATION = rf'''(?:
# {VAR_DECL}
# (?: = [^=] [^;]* )?
# ;
# )'''
#
#
#def parse_variable(decl, *, inFunc=False):
# """Return [(name, storage, vartype)] for the given variable declaration."""
# ...
def _parse_var(stmt):
"""Return (name, vartype) for the given variable declaration."""
stmt = stmt.rstrip(';')
m = LOCAL_STMT_START_RE.match(stmt)
assert m
vartype = m.group(0)
name = stmt[len(vartype):].partition('=')[0].strip()
if name.startswith('('):
name, _, after = name[1:].partition(')')
assert after
name = name.replace('*', '* ')
inside, _, name = name.strip().rpartition(' ')
vartype = f'{vartype} ({inside.strip()}){after}'
else:
name = name.replace('*', '* ')
before, _, name = name.rpartition(' ')
vartype = f'{vartype} {before}'
vartype = vartype.strip()
while ' ' in vartype:
vartype = vartype.replace(' ', ' ')
return name, vartype
def extract_storage(decl, *, infunc=None):
"""Return (storage, vartype) based on the given declaration.
The default storage is "implicit" (or "local" if infunc is True).
"""
if decl == UNKNOWN:
return decl
if decl.startswith('static '):
return 'static'
#return 'static', decl.partition(' ')[2].strip()
elif decl.startswith('extern '):
return 'extern'
#return 'extern', decl.partition(' ')[2].strip()
elif re.match('.*\b(static|extern)\b', decl):
raise NotImplementedError
elif infunc:
return 'local'
else:
return 'implicit'
def parse_compound(stmt, blocks):
"""Return (headers, bodies) for the given compound statement."""
# XXX Identify declarations inside compound statements
# (if/switch/for/while).
raise NotImplementedError
def iter_variables(filename, *,
preprocessed=False,
_iter_source_lines=source.iter_lines,
_iter_global=iter_global_declarations,
_iter_local=iter_local_statements,
_parse_func=parse_func,
_parse_var=_parse_var,
_parse_compound=parse_compound,
):
"""Yield (funcname, name, vartype) for every variable in the given file."""
if preprocessed:
raise NotImplementedError
lines = _iter_source_lines(filename)
for stmt, body in _iter_global(lines):
# At the file top-level we only have to worry about vars & funcs.
if not body:
name, vartype = _parse_var(stmt)
if name:
yield (None, name, vartype)
else:
funcname, _ = _parse_func(stmt, body)
localvars = _iter_locals(body,
_iter_statements=_iter_local,
_parse_var=_parse_var,
_parse_compound=_parse_compound,
)
for name, vartype in localvars:
yield (funcname, name, vartype)
def _iter_locals(lines, *,
_iter_statements=iter_local_statements,
_parse_var=_parse_var,
_parse_compound=parse_compound,
):
compound = [lines]
while compound:
body = compound.pop(0)
bodylines = body.splitlines()
for stmt, blocks in _iter_statements(bodylines):
if not blocks:
name, vartype = _parse_var(stmt)
if name:
yield (name, vartype)
else:
headers, bodies = _parse_compound(stmt, blocks)
for header in headers:
for line in header:
name, vartype = _parse_var(line)
if name:
yield (name, vartype)
compound.extend(bodies)
def iter_all(filename, *,
preprocessed=False,
):
"""Yield a Declaration for each one found.
If there are duplicates, due to preprocessor conditionals, then
they are checked to make sure they are the same.
"""
# XXX For the moment we cheat.
for funcname, name, decl in iter_variables(filename,
preprocessed=preprocessed):
yield 'variable', funcname, name, decl

View File

@ -0,0 +1,107 @@
from ..common.info import UNKNOWN, ID
from . import declarations
# XXX need tests:
# * variables
# * variable
# * variable_from_id
def _iter_vars(filenames, preprocessed, *,
handle_id=None,
_iter_decls=declarations.iter_all,
):
if handle_id is None:
handle_id = ID
for filename in filenames or ():
for kind, funcname, name, decl in _iter_decls(filename,
preprocessed=preprocessed,
):
if kind != 'variable':
continue
varid = handle_id(filename, funcname, name)
yield varid, decl
# XXX Add a "handle_var" arg like we did for get_resolver()?
def variables(*filenames,
perfilecache=None,
preprocessed=False,
known=None, # for types
handle_id=None,
_iter_vars=_iter_vars,
):
"""Yield (varid, decl) for each variable found in the given files.
If "preprocessed" is provided (and not False/None) then it is used
to decide which tool to use to parse the source code after it runs
through the C preprocessor. Otherwise the raw
"""
if len(filenames) == 1 and not (filenames[0], str):
filenames, = filenames
if perfilecache is None:
yield from _iter_vars(filenames, preprocessed)
else:
# XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
raise NotImplementedError
def variable(name, filenames, *,
local=False,
perfilecache=None,
preprocessed=False,
handle_id=None,
_iter_vars=variables,
):
"""Return (varid, decl) for the first found variable that matches.
If "local" is True then the first matching local variable in the
file will always be returned. To avoid that, pass perfilecache and
pop each variable from the cache after using it.
"""
for varid, decl in _iter_vars(filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
):
if varid.name != name:
continue
if local:
if varid.funcname:
if varid.funcname == UNKNOWN:
raise NotImplementedError
return varid, decl
elif not varid.funcname:
return varid, decl
else:
return None, None # No matching variable was found.
def variable_from_id(id, filenames, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_var=variable,
):
"""Return (varid, decl) for the first found variable that matches."""
local = False
if isinstance(id, str):
name = id
else:
if id.funcname == UNKNOWN:
local = True
elif id.funcname:
raise NotImplementedError
name = id.name
if id.filename and id.filename != UNKNOWN:
filenames = [id.filename]
return _get_var(name, filenames,
local=local,
perfilecache=perfilecache,
preprocessed=preprocessed,
handle_id=handle_id,
)

View File

@ -0,0 +1,179 @@
import re
from ..common.info import UNKNOWN, ID
from .preprocessor import _iter_clean_lines
_NOT_SET = object()
def get_srclines(filename, *,
cache=None,
_open=open,
_iter_lines=_iter_clean_lines,
):
"""Return the file's lines as a list.
Each line will have trailing whitespace removed (including newline).
If a cache is given the it is used.
"""
if cache is not None:
try:
return cache[filename]
except KeyError:
pass
with _open(filename) as srcfile:
srclines = [line
for _, line in _iter_lines(srcfile)
if not line.startswith('#')]
for i, line in enumerate(srclines):
srclines[i] = line.rstrip()
if cache is not None:
cache[filename] = srclines
return srclines
def parse_variable_declaration(srcline):
"""Return (name, decl) for the given declaration line."""
# XXX possible false negatives...
decl, sep, _ = srcline.partition('=')
if not sep:
if not srcline.endswith(';'):
return None, None
decl = decl.strip(';')
decl = decl.strip()
m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
if not m:
return None, None
name = m.group(1)
return name, decl
def parse_variable(srcline, funcname=None):
"""Return (varid, decl) for the variable declared on the line (or None)."""
line = srcline.strip()
# XXX Handle more than just static variables.
if line.startswith('static '):
if '(' in line and '[' not in line:
# a function
return None, None
return parse_variable_declaration(line)
else:
return None, None
def iter_variables(filename, *,
srccache=None,
parse_variable=None,
_get_srclines=get_srclines,
_default_parse_variable=parse_variable,
):
"""Yield (varid, decl) for each variable in the given source file."""
if parse_variable is None:
parse_variable = _default_parse_variable
indent = ''
prev = ''
funcname = None
for line in _get_srclines(filename, cache=srccache):
# remember current funcname
if funcname:
if line == indent + '}':
funcname = None
continue
else:
if '(' in prev and line == indent + '{':
if not prev.startswith('__attribute__'):
funcname = prev.split('(')[0].split()[-1]
prev = ''
continue
indent = line[:-len(line.lstrip())]
prev = line
info = parse_variable(line, funcname)
if isinstance(info, list):
for name, _funcname, decl in info:
yield ID(filename, _funcname, name), decl
continue
name, decl = info
if name is None:
continue
yield ID(filename, funcname, name), decl
def _match_varid(variable, name, funcname, ignored=None):
if ignored and variable in ignored:
return False
if variable.name != name:
return False
if funcname == UNKNOWN:
if not variable.funcname:
return False
elif variable.funcname != funcname:
return False
return True
def find_variable(filename, funcname, name, *,
ignored=None,
srccache=None, # {filename: lines}
parse_variable=None,
_iter_variables=iter_variables,
):
"""Return the matching variable.
Return None if the variable is not found.
"""
for varid, decl in _iter_variables(filename,
srccache=srccache,
parse_variable=parse_variable,
):
if _match_varid(varid, name, funcname, ignored):
return varid, decl
else:
return None
def find_variables(varids, filenames=None, *,
srccache=_NOT_SET,
parse_variable=None,
_find_symbol=find_variable,
):
"""Yield (varid, decl) for each ID.
If the variable is not found then its decl will be UNKNOWN. That
way there will be one resulting variable per given ID.
"""
if srccache is _NOT_SET:
srccache = {}
used = set()
for varid in varids:
if varid.filename and varid.filename != UNKNOWN:
srcfiles = [varid.filename]
else:
if not filenames:
yield varid, UNKNOWN
continue
srcfiles = filenames
for filename in srcfiles:
varid, decl = _find_varid(filename, varid.funcname, varid.name,
ignored=used,
srccache=srccache,
parse_variable=parse_variable,
)
if varid:
yield varid, decl
used.add(varid)
break
else:
yield varid, UNKNOWN

View File

@ -0,0 +1,511 @@
from collections import namedtuple
import shlex
import os
import re
from ..common import util, info
CONTINUATION = '\\' + os.linesep
IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
def _coerce_str(value):
if not value:
return ''
return str(value).strip()
#############################
# directives
DIRECTIVE_START = r'''
(?:
^ \s*
[#] \s*
)'''
DIRECTIVE_TEXT = r'''
(?:
(?: \s+ ( .*\S ) )?
\s* $
)'''
DIRECTIVE = rf'''
(?:
{DIRECTIVE_START}
(
include |
error | warning |
pragma |
define | undef |
if | ifdef | ifndef | elseif | else | endif |
__FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
)
{DIRECTIVE_TEXT}
)'''
# (?:
# [^\\\n] |
# \\ [^\n] |
# \\ \n
# )+
# ) \n
# )'''
DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
DEFINE = rf'''
(?:
{DIRECTIVE_START} define \s+
(?:
( \w*[a-zA-Z]\w* )
(?: \s* [(] ([^)]*) [)] )?
)
{DIRECTIVE_TEXT}
)'''
DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
def parse_directive(line):
"""Return the appropriate directive for the given line."""
line = line.strip()
if line.startswith('#'):
line = line[1:].lstrip()
line = '#' + line
directive = line
#directive = '#' + line
while ' ' in directive:
directive = directive.replace(' ', ' ')
return _parse_directive(directive)
def _parse_directive(line):
m = DEFINE_RE.match(line)
if m:
name, args, text = m.groups()
if args:
args = [a.strip() for a in args.split(',')]
return Macro(name, args, text)
else:
return Constant(name, text)
m = DIRECTIVE_RE.match(line)
if not m:
raise ValueError(f'unsupported directive {line!r}')
kind, text = m.groups()
if not text:
if kind not in ('else', 'endif'):
raise ValueError(f'missing text in directive {line!r}')
elif kind in ('else', 'endif', 'define'):
raise ValueError(f'unexpected text in directive {line!r}')
if kind == 'include':
directive = Include(text)
elif kind in IfDirective.KINDS:
directive = IfDirective(kind, text)
else:
directive = OtherDirective(kind, text)
directive.validate()
return directive
class PreprocessorDirective(util._NTBase):
"""The base class for directives."""
__slots__ = ()
KINDS = frozenset([
'include',
'pragma',
'error', 'warning',
'define', 'undef',
'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
'__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
])
@property
def text(self):
return ' '.join(v for v in self[1:] if v and v.strip()) or None
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.kind:
raise TypeError('missing kind')
elif self.kind not in self.KINDS:
raise ValueError
# text can be anything, including None.
class Constant(PreprocessorDirective,
namedtuple('Constant', 'kind name value')):
"""A single "constant" directive ("define")."""
__slots__ = ()
def __new__(cls, name, value=None):
self = super().__new__(
cls,
'define',
name=_coerce_str(name) or None,
value=_coerce_str(value) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.name:
raise TypeError('missing name')
elif not IDENTIFIER_RE.match(self.name):
raise ValueError(f'name must be identifier, got {self.name!r}')
# value can be anything, including None
class Macro(PreprocessorDirective,
namedtuple('Macro', 'kind name args body')):
"""A single "macro" directive ("define")."""
__slots__ = ()
def __new__(cls, name, args, body=None):
# "args" must be a string or an iterable of strings (or "empty").
if isinstance(args, str):
args = [v.strip() for v in args.split(',')]
if args:
args = tuple(_coerce_str(a) or None for a in args)
self = super().__new__(
cls,
kind='define',
name=_coerce_str(name) or None,
args=args if args else (),
body=_coerce_str(body) or None,
)
return self
@property
def text(self):
if self.body:
return f'{self.name}({", ".join(self.args)}) {self.body}'
else:
return f'{self.name}({", ".join(self.args)})'
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.name:
raise TypeError('missing name')
elif not IDENTIFIER_RE.match(self.name):
raise ValueError(f'name must be identifier, got {self.name!r}')
for arg in self.args:
if not arg:
raise ValueError(f'missing arg in {self.args}')
elif not IDENTIFIER_RE.match(arg):
raise ValueError(f'arg must be identifier, got {arg!r}')
# body can be anything, including None
class IfDirective(PreprocessorDirective,
namedtuple('IfDirective', 'kind condition')):
"""A single conditional directive (e.g. "if", "ifdef").
This only includes directives that actually provide conditions. The
related directives "else" and "endif" are covered by OtherDirective
instead.
"""
__slots__ = ()
KINDS = frozenset([
'if',
'ifdef',
'ifndef',
'elseif',
])
@classmethod
def _condition_from_raw(cls, raw, kind):
#return Condition.from_raw(raw, _kind=kind)
condition = _coerce_str(raw)
if not condition:
return None
if kind == 'ifdef':
condition = f'defined({condition})'
elif kind == 'ifndef':
condition = f'! defined({condition})'
return condition
def __new__(cls, kind, condition):
kind = _coerce_str(kind)
self = super().__new__(
cls,
kind=kind or None,
condition=cls._condition_from_raw(condition, kind),
)
return self
@property
def text(self):
if self.kind == 'ifdef':
return self.condition[8:-1] # strip "defined("
elif self.kind == 'ifndef':
return self.condition[10:-1] # strip "! defined("
else:
return self.condition
#return str(self.condition)
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.condition:
raise TypeError('missing condition')
#else:
# for cond in self.condition:
# if not cond:
# raise ValueError(f'missing condition in {self.condition}')
# cond.validate()
# if self.kind in ('ifdef', 'ifndef'):
# if len(self.condition) != 1:
# raise ValueError('too many condition')
# if self.kind == 'ifdef':
# if not self.condition[0].startswith('defined '):
# raise ValueError('bad condition')
# else:
# if not self.condition[0].startswith('! defined '):
# raise ValueError('bad condition')
class Include(PreprocessorDirective,
namedtuple('Include', 'kind file')):
"""A single "include" directive.
Supported "file" values are either follow the bracket style
(<stdio>) or double quotes ("spam.h").
"""
__slots__ = ()
def __new__(cls, file):
self = super().__new__(
cls,
kind='include',
file=_coerce_str(file) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.file:
raise TypeError('missing file')
class OtherDirective(PreprocessorDirective,
namedtuple('OtherDirective', 'kind text')):
"""A single directive not covered by another class.
This includes the "else", "endif", and "undef" directives, which are
otherwise inherently related to the directives covered by the
Constant, Macro, and IfCondition classes.
Note that all directives must have a text value, except for "else"
and "endif" (which must have no text).
"""
__slots__ = ()
KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
def __new__(cls, kind, text):
self = super().__new__(
cls,
kind=_coerce_str(kind) or None,
text=_coerce_str(text) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if self.text:
if self.kind in ('else', 'endif'):
raise ValueError('unexpected text in directive')
elif self.kind not in ('else', 'endif'):
raise TypeError('missing text')
#############################
# iterating lines
def _recompute_conditions(directive, ifstack):
if directive.kind in ('if', 'ifdef', 'ifndef'):
ifstack.append(
([], directive.condition))
elif directive.kind == 'elseif':
if ifstack:
negated, active = ifstack.pop()
if active:
negated.append(active)
else:
negated = []
ifstack.append(
(negated, directive.condition))
elif directive.kind == 'else':
if ifstack:
negated, active = ifstack.pop()
if active:
negated.append(active)
ifstack.append(
(negated, None))
elif directive.kind == 'endif':
if ifstack:
ifstack.pop()
conditions = []
for negated, active in ifstack:
for condition in negated:
conditions.append(f'! ({condition})')
if active:
conditions.append(active)
return tuple(conditions)
def _iter_clean_lines(lines):
lines = iter(enumerate(lines, 1))
for lno, line in lines:
# Handle line continuations.
while line.endswith(CONTINUATION):
try:
lno, _line = next(lines)
except StopIteration:
break
line = line[:-len(CONTINUATION)] + ' ' + _line
# Deal with comments.
after = line
line = ''
while True:
# Look for a comment.
before, begin, remainder = after.partition('/*')
if '//' in before:
before, _, _ = before.partition('//')
line += before + ' ' # per the C99 spec
break
line += before
if not begin:
break
line += ' ' # per the C99 spec
# Go until we find the end of the comment.
_, end, after = remainder.partition('*/')
while not end:
try:
lno, remainder = next(lines)
except StopIteration:
raise Exception('unterminated comment')
_, end, after = remainder.partition('*/')
yield lno, line
def iter_lines(lines, *,
_iter_clean_lines=_iter_clean_lines,
_parse_directive=_parse_directive,
_recompute_conditions=_recompute_conditions,
):
"""Yield (lno, line, directive, active conditions) for each given line.
This is effectively a subset of the operations taking place in
translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
section 5.1.1.2. Line continuations are removed and comments
replaced with a single space. (In both cases "lno" will be the last
line involved.) Otherwise each line is returned as-is.
"lno" is the (1-indexed) line number for the line.
"directive" will be a PreprocessorDirective or None, depending on
whether or not there is a directive on the line.
"active conditions" is the set of preprocessor conditions (e.g.
"defined()") under which the current line of code will be included
in compilation. That set is derived from every conditional
directive block (e.g. "if defined()", "ifdef", "else") containing
that line. That includes nested directives. Note that the
current line does not affect the active conditions for iteself.
It only impacts subsequent lines. That applies to directives
that close blocks (e.g. "endif") just as much as conditional
directvies. Also note that "else" and "elseif" directives
update the active conditions (for later lines), rather than
adding to them.
"""
ifstack = []
conditions = ()
for lno, line in _iter_clean_lines(lines):
stripped = line.strip()
if not stripped.startswith('#'):
yield lno, line, None, conditions
continue
directive = '#' + stripped[1:].lstrip()
while ' ' in directive:
directive = directive.replace(' ', ' ')
directive = _parse_directive(directive)
yield lno, line, directive, conditions
if directive.kind in ('else', 'endif'):
conditions = _recompute_conditions(directive, ifstack)
elif isinstance(directive, IfDirective):
conditions = _recompute_conditions(directive, ifstack)
#############################
# running (platform-specific?)
def _gcc(filename, *,
_get_argv=(lambda: _get_gcc_argv()),
_run=util.run_cmd,
):
argv = _get_argv()
argv.extend([
'-E', filename,
])
output = _run(argv)
return output
def _get_gcc_argv(*,
_open=open,
_run=util.run_cmd,
):
with _open('/tmp/print.mk', 'w') as tmpfile:
tmpfile.write('print-%:\n')
#tmpfile.write('\t@echo $* = $($*)\n')
tmpfile.write('\t@echo $($*)\n')
argv = ['/usr/bin/make',
'-f', 'Makefile',
'-f', '/tmp/print.mk',
'print-CC',
'print-PY_CORE_CFLAGS',
]
output = _run(argv)
gcc, cflags = output.strip().splitlines()
argv = shlex.split(gcc.strip())
cflags = shlex.split(cflags.strip())
return argv + cflags
def run(filename, *,
_gcc=_gcc,
):
"""Return the text of the given file after running the preprocessor."""
return _gcc(filename)

View File

@ -0,0 +1,34 @@
from . import preprocessor
def iter_clean_lines(lines):
incomment = False
for line in lines:
# Deal with comments.
if incomment:
_, sep, line = line.partition('*/')
if sep:
incomment = False
continue
line, _, _ = line.partition('//')
line, sep, remainder = line.partition('/*')
if sep:
_, sep, after = remainder.partition('*/')
if not sep:
incomment = True
continue
line += ' ' + after
# Ignore blank lines and leading/trailing whitespace.
line = line.strip()
if not line:
continue
yield line
def iter_lines(filename, *,
preprocess=preprocessor.run,
):
content = preprocess(filename)
return iter(content.splitlines())

View File

@ -0,0 +1,117 @@
import os.path
import shutil
from c_analyzer.common import util, info
from .info import Symbol
# XXX need tests:
# * iter_symbols
NM_KINDS = {
'b': Symbol.KIND.VARIABLE, # uninitialized
'd': Symbol.KIND.VARIABLE, # initialized
#'g': Symbol.KIND.VARIABLE, # uninitialized
#'s': Symbol.KIND.VARIABLE, # initialized
't': Symbol.KIND.FUNCTION,
}
SPECIAL_SYMBOLS = {
# binary format (e.g. ELF)
'__bss_start',
'__data_start',
'__dso_handle',
'_DYNAMIC',
'_edata',
'_end',
'__environ@@GLIBC_2.2.5',
'_GLOBAL_OFFSET_TABLE_',
'__JCR_END__',
'__JCR_LIST__',
'__TMC_END__',
}
def _is_special_symbol(name):
if name in SPECIAL_SYMBOLS:
return True
if '@@GLIBC' in name:
return True
return False
def iter_symbols(binfile, *,
nm=None,
handle_id=None,
_which=shutil.which,
_run=util.run_cmd,
):
"""Yield a Symbol for each relevant entry reported by the "nm" command."""
if nm is None:
nm = _which('nm')
if not nm:
raise NotImplementedError
if handle_id is None:
handle_id = info.ID
argv = [nm,
'--line-numbers',
binfile,
]
try:
output = _run(argv)
except Exception:
if nm is None:
# XXX Use dumpbin.exe /SYMBOLS on Windows.
raise NotImplementedError
raise
for line in output.splitlines():
(name, kind, external, filename, funcname,
) = _parse_nm_line(line)
if kind != Symbol.KIND.VARIABLE:
continue
elif _is_special_symbol(name):
continue
yield Symbol(
id=handle_id(filename, funcname, name),
kind=kind,
external=external,
)
def _parse_nm_line(line):
_origline = line
_, _, line = line.partition(' ') # strip off the address
line = line.strip()
kind, _, line = line.partition(' ')
line = line.strip()
external = kind.isupper()
kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
name, _, filename = line.partition('\t')
name = name.strip()
if filename:
filename = os.path.relpath(filename.partition(':')[0])
else:
filename = info.UNKNOWN
name, islocal = _parse_nm_name(name, kind)
funcname = info.UNKNOWN if islocal else None
return name, kind, external, filename, funcname
def _parse_nm_name(name, kind):
if kind != Symbol.KIND.VARIABLE:
return name, None
if _is_special_symbol(name):
return name, None
actual, sep, digits = name.partition('.')
if not sep:
return name, False
if not digits.isdigit():
raise Exception(f'got bogus name {name}')
return actual, True

View File

@ -0,0 +1,175 @@
import os
import os.path
import shutil
from ..common import files
from ..common.info import UNKNOWN, ID
from ..parser import find as p_find
from . import _nm
from .info import Symbol
# XXX need tests:
# * get_resolver()
# * get_resolver_from_dirs()
# * symbol()
# * symbols()
# * variables()
def _resolve_known(symbol, knownvars):
for varid in knownvars:
if symbol.match(varid):
break
else:
return None
return knownvars.pop(varid)
def get_resolver(filenames=None, known=None, *,
handle_var,
check_filename=None,
perfilecache=None,
preprocessed=False,
_from_source=p_find.variable_from_id,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"handle_var" is a callable that takes (ID, decl) and returns a
Variable. Variable.from_id is a suitable callable.
The returned func takes a single Symbol and returns a corresponding
Variable. If the symbol was located then the variable will be
valid, populated with the corresponding information. Otherwise None
is returned.
"""
knownvars = (known or {}).get('variables')
if knownvars:
knownvars = dict(knownvars) # a copy
if filenames:
if check_filename is None:
filenames = list(filenames)
def check_filename(filename):
return filename in filenames
def resolve(symbol):
# XXX Check "found" instead?
if not check_filename(symbol.filename):
return None
found = _resolve_known(symbol, knownvars)
if found is None:
#return None
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
found = handle_var(varid, decl)
return found
else:
def resolve(symbol):
return _resolve_known(symbol, knownvars)
elif filenames:
def resolve(symbol):
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return handle_var(varid, decl)
else:
def resolve(symbol):
return None
return resolve
def get_resolver_from_dirs(dirnames, known=None, *,
handle_var,
suffixes=('.c',),
perfilecache=None,
preprocessed=False,
_iter_files=files.iter_files_by_suffix,
_get_resolver=get_resolver,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"dirnames" should be absolute paths. If not then they will be
resolved relative to CWD.
See get_resolver().
"""
dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
for d in dirnames]
filenames = _iter_files(dirnames, suffixes)
def check_filename(filename):
for dirname in dirnames:
if filename.startswith(dirname):
return True
else:
return False
return _get_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
def symbol(symbol, filenames, known=None, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_resolver=get_resolver,
):
"""Return a Variable for the one matching the given symbol.
"symbol" can be one of several objects:
* Symbol - use the contained info
* name (str) - look for a global variable with that name
* (filename, name) - look for named global in file
* (filename, funcname, name) - look for named local in file
A name is always required. If the filename is None, "", or
"UNKNOWN" then all files will be searched. If the funcname is
"" or "UNKNOWN" then only local variables will be searched for.
"""
resolve = _get_resolver(known, filenames,
handle_id=handle_id,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return resolve(symbol)
def _get_platform_tool():
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
elif nm := shutil.which('nm'):
return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
else:
raise NotImplementedError
def symbols(binfile, *,
handle_id=None,
_file_exists=os.path.exists,
_get_platform_tool=_get_platform_tool,
):
"""Yield a Symbol for each one found in the binary."""
if not _file_exists(binfile):
raise Exception('executable missing (need to build it first?)')
_iter_symbols = _get_platform_tool()
yield from _iter_symbols(binfile, handle_id)
def variables(binfile, *,
resolve,
handle_id=None,
_iter_symbols=symbols,
):
"""Yield (Variable, Symbol) for each found symbol."""
for symbol in _iter_symbols(binfile, handle_id=handle_id):
if symbol.kind != Symbol.KIND.VARIABLE:
continue
var = resolve(symbol) or None
yield var, symbol

View File

@ -0,0 +1,51 @@
from collections import namedtuple
from c_analyzer.common.info import ID
from c_analyzer.common.util import classonly, _NTBase
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
"""Info for a single compilation symbol."""
__slots__ = ()
class KIND:
VARIABLE = 'variable'
FUNCTION = 'function'
OTHER = 'other'
@classonly
def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None):
"""Return a new symbol based on the given name."""
id = ID(filename, None, name)
return cls(id, kind, external)
def __new__(cls, id, kind=KIND.VARIABLE, external=None):
self = super().__new__(
cls,
id=ID.from_raw(id),
kind=str(kind) if kind else None,
external=bool(external) if external is not None else None,
)
return self
def __hash__(self):
return hash(self.id)
def __getattr__(self, name):
return getattr(self.id, name)
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.id:
raise TypeError('missing id')
else:
self.id.validate()
if not self.kind:
raise TypeError('missing kind')
elif self.kind not in vars(self.KIND).values():
raise ValueError(f'unsupported kind {self.kind}')
if self.external is None:
raise TypeError('missing external')

View File

@ -0,0 +1,75 @@
from ..common import files
from ..common.info import UNKNOWN
from ..parser import (
find as p_find,
)
from ..symbols import (
info as s_info,
find as s_find,
)
from .info import Variable
# XXX need tests:
# * vars_from_source
def _remove_cached(cache, var):
if not cache:
return
try:
cached = cache[var.filename]
cached.remove(var)
except (KeyError, IndexError):
pass
def vars_from_binary(binfile, *,
known=None,
filenames=None,
handle_id=None,
check_filename=None,
handle_var=Variable.from_id,
_iter_vars=s_find.variables,
_get_symbol_resolver=s_find.get_resolver,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
cache = {}
resolve = _get_symbol_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=cache,
)
for var, symbol in _iter_vars(binfile,
resolve=resolve,
handle_id=handle_id,
):
if var is None:
var = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield var
_remove_cached(cache, var)
def vars_from_source(filenames, *,
preprocessed=None,
known=None,
handle_id=None,
handle_var=Variable.from_id,
iter_vars=p_find.variables,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
cache = {}
for varid, decl in iter_vars(filenames or (),
perfilecache=cache,
preprocessed=preprocessed,
known=known,
handle_id=handle_id,
):
var = handle_var(varid, decl)
yield var
_remove_cached(cache, var)

View File

@ -0,0 +1,93 @@
from collections import namedtuple
from ..common.info import ID, UNKNOWN
from ..common.util import classonly, _NTBase
def normalize_vartype(vartype):
"""Return the canonical form for a variable type (or func signature)."""
# We allow empty strring through for semantic reasons.
if vartype is None:
return None
# XXX finish!
# XXX Return (modifiers, type, pointer)?
return str(vartype)
# XXX Variable.vartype -> decl (Declaration).
class Variable(_NTBase,
namedtuple('Variable', 'id storage vartype')):
"""Information about a single variable declaration."""
__slots__ = ()
STORAGE = (
'static',
'extern',
'implicit',
'local',
)
@classonly
def from_parts(cls, filename, funcname, name, decl, storage=None):
varid = ID(filename, funcname, name)
if storage is None:
self = cls.from_id(varid, decl)
else:
self = cls(varid, storage, decl)
return self
@classonly
def from_id(cls, varid, decl):
from ..parser.declarations import extract_storage
storage = extract_storage(decl, infunc=varid.funcname)
return cls(varid, storage, decl)
def __new__(cls, id, storage, vartype):
self = super().__new__(
cls,
id=ID.from_raw(id),
storage=str(storage) if storage else None,
vartype=normalize_vartype(vartype) if vartype else None,
)
return self
def __hash__(self):
return hash(self.id)
def __getattr__(self, name):
return getattr(self.id, name)
def _validate_id(self):
if not self.id:
raise TypeError('missing id')
if not self.filename or self.filename == UNKNOWN:
raise TypeError(f'id missing filename ({self.id})')
if self.funcname and self.funcname == UNKNOWN:
raise TypeError(f'id missing funcname ({self.id})')
self.id.validate()
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
self._validate_id()
if self.storage is None or self.storage == UNKNOWN:
raise TypeError('missing storage')
elif self.storage not in self.STORAGE:
raise ValueError(f'unsupported storage {self.storage:r}')
if self.vartype is None or self.vartype == UNKNOWN:
raise TypeError('missing vartype')
@property
def isglobal(self):
return self.storage != 'local'
@property
def isconst(self):
return 'const' in self.vartype.split()

View File

@ -0,0 +1,91 @@
import csv
from ..common.info import ID, UNKNOWN
from ..common.util import read_tsv
from .info import Variable
# XXX need tests:
# * read_file()
# * look_up_variable()
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
HEADER = '\t'.join(COLUMNS)
def read_file(infile, *,
_read_tsv=read_tsv,
):
"""Yield (kind, id, decl) for each row in the data file.
The caller is responsible for validating each row.
"""
for row in _read_tsv(infile, HEADER):
filename, funcname, name, kind, declaration = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
yield kind, id, declaration
def from_file(infile, *,
handle_var=Variable.from_id,
_read_file=read_file,
):
"""Return the info for known declarations in the given file."""
known = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for kind, id, decl in _read_file(infile):
if kind == 'variable':
values = known['variables']
value = handle_var(id, decl)
else:
raise ValueError(f'unsupported kind in row {row}')
value.validate()
values[id] = value
return known
def look_up_variable(varid, knownvars, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return the known Variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
if not knownvars:
return None
if varid.funcname == UNKNOWN:
if not varid.filename or varid.filename == UNKNOWN:
for varid in knownvars:
if not varid.funcname:
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
else:
for varid in knownvars:
if not varid.funcname:
continue
if not match_files(varid.filename, varid.filename):
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
elif not varid.filename or varid.filename == UNKNOWN:
raise NotImplementedError
else:
return knownvars.get(varid.id)