first commit
This commit is contained in:
@ -0,0 +1,124 @@
|
||||
import glob
|
||||
import os
|
||||
import os.path
|
||||
|
||||
# XXX need tests:
|
||||
# * walk_tree()
|
||||
# * glob_tree()
|
||||
# * iter_files_by_suffix()
|
||||
|
||||
|
||||
C_SOURCE_SUFFIXES = ('.c', '.h')
|
||||
|
||||
|
||||
def _walk_tree(root, *,
|
||||
_walk=os.walk,
|
||||
):
|
||||
# A wrapper around os.walk that resolves the filenames.
|
||||
for parent, _, names in _walk(root):
|
||||
for name in names:
|
||||
yield os.path.join(parent, name)
|
||||
|
||||
|
||||
def walk_tree(root, *,
|
||||
suffix=None,
|
||||
walk=_walk_tree,
|
||||
):
|
||||
"""Yield each file in the tree under the given directory name.
|
||||
|
||||
If "suffix" is provided then only files with that suffix will
|
||||
be included.
|
||||
"""
|
||||
if suffix and not isinstance(suffix, str):
|
||||
raise ValueError('suffix must be a string')
|
||||
|
||||
for filename in walk(root):
|
||||
if suffix and not filename.endswith(suffix):
|
||||
continue
|
||||
yield filename
|
||||
|
||||
|
||||
def glob_tree(root, *,
|
||||
suffix=None,
|
||||
_glob=glob.iglob,
|
||||
_escape=glob.escape,
|
||||
_join=os.path.join,
|
||||
):
|
||||
"""Yield each file in the tree under the given directory name.
|
||||
|
||||
If "suffix" is provided then only files with that suffix will
|
||||
be included.
|
||||
"""
|
||||
suffix = suffix or ''
|
||||
if not isinstance(suffix, str):
|
||||
raise ValueError('suffix must be a string')
|
||||
|
||||
for filename in _glob(_join(_escape(root), f'*{suffix}')):
|
||||
yield filename
|
||||
for filename in _glob(_join(_escape(root), f'**/*{suffix}')):
|
||||
yield filename
|
||||
|
||||
|
||||
def iter_files(root, suffix=None, relparent=None, *,
|
||||
get_files=None,
|
||||
_glob=glob_tree,
|
||||
_walk=walk_tree,
|
||||
):
|
||||
"""Yield each file in the tree under the given directory name.
|
||||
|
||||
If "root" is a non-string iterable then do the same for each of
|
||||
those trees.
|
||||
|
||||
If "suffix" is provided then only files with that suffix will
|
||||
be included.
|
||||
|
||||
if "relparent" is provided then it is used to resolve each
|
||||
filename as a relative path.
|
||||
"""
|
||||
if get_files is None:
|
||||
get_files = os.walk
|
||||
if not isinstance(root, str):
|
||||
roots = root
|
||||
for root in roots:
|
||||
yield from iter_files(root, suffix, relparent,
|
||||
get_files=get_files,
|
||||
_glob=_glob, _walk=_walk)
|
||||
return
|
||||
|
||||
# Use the right "walk" function.
|
||||
if get_files in (glob.glob, glob.iglob, glob_tree):
|
||||
get_files = _glob
|
||||
else:
|
||||
_files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
|
||||
get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
|
||||
|
||||
# Handle a single suffix.
|
||||
if suffix and not isinstance(suffix, str):
|
||||
filenames = get_files(root)
|
||||
suffix = tuple(suffix)
|
||||
else:
|
||||
filenames = get_files(root, suffix=suffix)
|
||||
suffix = None
|
||||
|
||||
for filename in filenames:
|
||||
if suffix and not isinstance(suffix, str): # multiple suffixes
|
||||
if not filename.endswith(suffix):
|
||||
continue
|
||||
if relparent:
|
||||
filename = os.path.relpath(filename, relparent)
|
||||
yield filename
|
||||
|
||||
|
||||
def iter_files_by_suffix(root, suffixes, relparent=None, *,
|
||||
walk=walk_tree,
|
||||
_iter_files=iter_files,
|
||||
):
|
||||
"""Yield each file in the tree that has the given suffixes.
|
||||
|
||||
Unlike iter_files(), the results are in the original suffix order.
|
||||
"""
|
||||
if isinstance(suffixes, str):
|
||||
suffixes = [suffixes]
|
||||
# XXX Ignore repeated suffixes?
|
||||
for suffix in suffixes:
|
||||
yield from _iter_files(root, suffix, relparent)
|
||||
@ -0,0 +1,138 @@
|
||||
from collections import namedtuple
|
||||
import re
|
||||
|
||||
from .util import classonly, _NTBase
|
||||
|
||||
# XXX need tests:
|
||||
# * ID.match()
|
||||
|
||||
|
||||
UNKNOWN = '???'
|
||||
|
||||
NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
|
||||
|
||||
|
||||
class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
|
||||
"""A unique ID for a single symbol or declaration."""
|
||||
|
||||
__slots__ = ()
|
||||
# XXX Add optional conditions (tuple of strings) field.
|
||||
#conditions = Slot()
|
||||
|
||||
@classonly
|
||||
def from_raw(cls, raw):
|
||||
if not raw:
|
||||
return None
|
||||
if isinstance(raw, str):
|
||||
return cls(None, None, raw)
|
||||
try:
|
||||
name, = raw
|
||||
filename = None
|
||||
except ValueError:
|
||||
try:
|
||||
filename, name = raw
|
||||
except ValueError:
|
||||
return super().from_raw(raw)
|
||||
return cls(filename, None, name)
|
||||
|
||||
def __new__(cls, filename, funcname, name):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
filename=str(filename) if filename else None,
|
||||
funcname=str(funcname) if funcname else None,
|
||||
name=str(name) if name else None,
|
||||
)
|
||||
#cls.conditions.set(self, tuple(str(s) if s else None
|
||||
# for s in conditions or ()))
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
if not self.name:
|
||||
raise TypeError('missing name')
|
||||
else:
|
||||
if not NAME_RE.match(self.name):
|
||||
raise ValueError(
|
||||
f'name must be an identifier, got {self.name!r}')
|
||||
|
||||
# Symbols from a binary might not have filename/funcname info.
|
||||
|
||||
if self.funcname:
|
||||
if not self.filename:
|
||||
raise TypeError('missing filename')
|
||||
if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
|
||||
raise ValueError(
|
||||
f'name must be an identifier, got {self.funcname!r}')
|
||||
|
||||
# XXX Require the filename (at least UNKONWN)?
|
||||
# XXX Check the filename?
|
||||
|
||||
@property
|
||||
def islocal(self):
|
||||
return self.funcname is not None
|
||||
|
||||
def match(self, other, *,
|
||||
match_files=(lambda f1, f2: f1 == f2),
|
||||
):
|
||||
"""Return True if the two match.
|
||||
|
||||
At least one of the two must be completely valid (no UNKNOWN
|
||||
anywhere). Otherwise False is returned. The remaining one
|
||||
*may* have UNKNOWN for both funcname and filename. It must
|
||||
have a valid name though.
|
||||
|
||||
The caller is responsible for knowing which of the two is valid
|
||||
(and which to use if both are valid).
|
||||
"""
|
||||
# First check the name.
|
||||
if self.name is None:
|
||||
return False
|
||||
if other.name != self.name:
|
||||
return False
|
||||
|
||||
# Then check the filename.
|
||||
if self.filename is None:
|
||||
return False
|
||||
if other.filename is None:
|
||||
return False
|
||||
if self.filename == UNKNOWN:
|
||||
# "other" must be the valid one.
|
||||
if other.funcname == UNKNOWN:
|
||||
return False
|
||||
elif self.funcname != UNKNOWN:
|
||||
# XXX Try matching funcname even though we don't
|
||||
# know the filename?
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return True
|
||||
elif other.filename == UNKNOWN:
|
||||
# "self" must be the valid one.
|
||||
if self.funcname == UNKNOWN:
|
||||
return False
|
||||
elif other.funcname != UNKNOWN:
|
||||
# XXX Try matching funcname even though we don't
|
||||
# know the filename?
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return True
|
||||
elif not match_files(self.filename, other.filename):
|
||||
return False
|
||||
|
||||
# Finally, check the funcname.
|
||||
if self.funcname == UNKNOWN:
|
||||
# "other" must be the valid one.
|
||||
if other.funcname == UNKNOWN:
|
||||
return False
|
||||
else:
|
||||
return other.funcname is not None
|
||||
elif other.funcname == UNKNOWN:
|
||||
# "self" must be the valid one.
|
||||
if self.funcname == UNKNOWN:
|
||||
return False
|
||||
else:
|
||||
return self.funcname is not None
|
||||
elif self.funcname == other.funcname:
|
||||
# Both are valid.
|
||||
return True
|
||||
|
||||
return False
|
||||
@ -0,0 +1,11 @@
|
||||
|
||||
def basic(variables, *,
|
||||
_print=print):
|
||||
"""Print each row simply."""
|
||||
for var in variables:
|
||||
if var.funcname:
|
||||
line = f'{var.filename}:{var.funcname}():{var.name}'
|
||||
else:
|
||||
line = f'{var.filename}:{var.name}'
|
||||
line = f'{line:<64} {var.vartype}'
|
||||
_print(line)
|
||||
@ -0,0 +1,243 @@
|
||||
import csv
|
||||
import subprocess
|
||||
|
||||
|
||||
_NOT_SET = object()
|
||||
|
||||
|
||||
def run_cmd(argv, **kwargs):
|
||||
proc = subprocess.run(
|
||||
argv,
|
||||
#capture_output=True,
|
||||
#stderr=subprocess.STDOUT,
|
||||
stdout=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True,
|
||||
**kwargs
|
||||
)
|
||||
return proc.stdout
|
||||
|
||||
|
||||
def read_tsv(infile, header, *,
|
||||
_open=open,
|
||||
_get_reader=csv.reader,
|
||||
):
|
||||
"""Yield each row of the given TSV (tab-separated) file."""
|
||||
if isinstance(infile, str):
|
||||
with _open(infile, newline='') as infile:
|
||||
yield from read_tsv(infile, header,
|
||||
_open=_open,
|
||||
_get_reader=_get_reader,
|
||||
)
|
||||
return
|
||||
lines = iter(infile)
|
||||
|
||||
# Validate the header.
|
||||
try:
|
||||
actualheader = next(lines).strip()
|
||||
except StopIteration:
|
||||
actualheader = ''
|
||||
if actualheader != header:
|
||||
raise ValueError(f'bad header {actualheader!r}')
|
||||
|
||||
for row in _get_reader(lines, delimiter='\t'):
|
||||
yield tuple(v.strip() for v in row)
|
||||
|
||||
|
||||
def write_tsv(outfile, header, rows, *,
|
||||
_open=open,
|
||||
_get_writer=csv.writer,
|
||||
):
|
||||
"""Write each of the rows to the given TSV (tab-separated) file."""
|
||||
if isinstance(outfile, str):
|
||||
with _open(outfile, 'w', newline='') as outfile:
|
||||
return write_tsv(outfile, header, rows,
|
||||
_open=_open,
|
||||
_get_writer=_get_writer,
|
||||
)
|
||||
|
||||
if isinstance(header, str):
|
||||
header = header.split('\t')
|
||||
writer = _get_writer(outfile, delimiter='\t')
|
||||
writer.writerow(header)
|
||||
for row in rows:
|
||||
writer.writerow('' if v is None else str(v)
|
||||
for v in row)
|
||||
|
||||
|
||||
class Slot:
|
||||
"""A descriptor that provides a slot.
|
||||
|
||||
This is useful for types that can't have slots via __slots__,
|
||||
e.g. tuple subclasses.
|
||||
"""
|
||||
|
||||
__slots__ = ('initial', 'default', 'readonly', 'instances', 'name')
|
||||
|
||||
def __init__(self, initial=_NOT_SET, *,
|
||||
default=_NOT_SET,
|
||||
readonly=False,
|
||||
):
|
||||
self.initial = initial
|
||||
self.default = default
|
||||
self.readonly = readonly
|
||||
|
||||
# The instance cache is not inherently tied to the normal
|
||||
# lifetime of the instances. So must do something in order to
|
||||
# avoid keeping the instances alive by holding a reference here.
|
||||
# Ideally we would use weakref.WeakValueDictionary to do this.
|
||||
# However, most builtin types do not support weakrefs. So
|
||||
# instead we monkey-patch __del__ on the attached class to clear
|
||||
# the instance.
|
||||
self.instances = {}
|
||||
self.name = None
|
||||
|
||||
def __set_name__(self, cls, name):
|
||||
if self.name is not None:
|
||||
raise TypeError('already used')
|
||||
self.name = name
|
||||
try:
|
||||
slotnames = cls.__slot_names__
|
||||
except AttributeError:
|
||||
slotnames = cls.__slot_names__ = []
|
||||
slotnames.append(name)
|
||||
self._ensure___del__(cls, slotnames)
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is None: # called on the class
|
||||
return self
|
||||
try:
|
||||
value = self.instances[id(obj)]
|
||||
except KeyError:
|
||||
if self.initial is _NOT_SET:
|
||||
value = self.default
|
||||
else:
|
||||
value = self.initial
|
||||
self.instances[id(obj)] = value
|
||||
if value is _NOT_SET:
|
||||
raise AttributeError(self.name)
|
||||
# XXX Optionally make a copy?
|
||||
return value
|
||||
|
||||
def __set__(self, obj, value):
|
||||
if self.readonly:
|
||||
raise AttributeError(f'{self.name} is readonly')
|
||||
# XXX Optionally coerce?
|
||||
self.instances[id(obj)] = value
|
||||
|
||||
def __delete__(self, obj):
|
||||
if self.readonly:
|
||||
raise AttributeError(f'{self.name} is readonly')
|
||||
self.instances[id(obj)] = self.default # XXX refleak?
|
||||
|
||||
def _ensure___del__(self, cls, slotnames): # See the comment in __init__().
|
||||
try:
|
||||
old___del__ = cls.__del__
|
||||
except AttributeError:
|
||||
old___del__ = (lambda s: None)
|
||||
else:
|
||||
if getattr(old___del__, '_slotted', False):
|
||||
return
|
||||
|
||||
def __del__(_self):
|
||||
for name in slotnames:
|
||||
delattr(_self, name)
|
||||
old___del__(_self)
|
||||
__del__._slotted = True
|
||||
cls.__del__ = __del__
|
||||
|
||||
def set(self, obj, value):
|
||||
"""Update the cached value for an object.
|
||||
|
||||
This works even if the descriptor is read-only. This is
|
||||
particularly useful when initializing the object (e.g. in
|
||||
its __new__ or __init__).
|
||||
"""
|
||||
self.instances[id(obj)] = value
|
||||
|
||||
|
||||
class classonly:
|
||||
"""A non-data descriptor that makes a value only visible on the class.
|
||||
|
||||
This is like the "classmethod" builtin, but does not show up on
|
||||
instances of the class. It may be used as a decorator.
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
self.getter = classmethod(value).__get__
|
||||
self.name = None
|
||||
|
||||
def __set_name__(self, cls, name):
|
||||
if self.name is not None:
|
||||
raise TypeError('already used')
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is not None:
|
||||
raise AttributeError(self.name)
|
||||
# called on the class
|
||||
return self.getter(None, cls)
|
||||
|
||||
|
||||
class _NTBase:
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
@classonly
|
||||
def from_raw(cls, raw):
|
||||
if not raw:
|
||||
return None
|
||||
elif isinstance(raw, cls):
|
||||
return raw
|
||||
elif isinstance(raw, str):
|
||||
return cls.from_string(raw)
|
||||
else:
|
||||
if hasattr(raw, 'items'):
|
||||
return cls(**raw)
|
||||
try:
|
||||
args = tuple(raw)
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
return cls(*args)
|
||||
raise NotImplementedError
|
||||
|
||||
@classonly
|
||||
def from_string(cls, value):
|
||||
"""Return a new instance based on the given string."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def _make(cls, iterable): # The default _make() is not subclass-friendly.
|
||||
return cls.__new__(cls, *iterable)
|
||||
|
||||
# XXX Always validate?
|
||||
#def __init__(self, *args, **kwargs):
|
||||
# self.validate()
|
||||
|
||||
# XXX The default __repr__() is not subclass-friendly (where the name changes).
|
||||
#def __repr__(self):
|
||||
# _, _, sig = super().__repr__().partition('(')
|
||||
# return f'{self.__class__.__name__}({sig}'
|
||||
|
||||
# To make sorting work with None:
|
||||
def __lt__(self, other):
|
||||
try:
|
||||
return super().__lt__(other)
|
||||
except TypeError:
|
||||
if None in self:
|
||||
return True
|
||||
elif None in other:
|
||||
return False
|
||||
else:
|
||||
raise
|
||||
|
||||
def validate(self):
|
||||
return
|
||||
|
||||
# XXX Always validate?
|
||||
#def _replace(self, **kwargs):
|
||||
# obj = super()._replace(**kwargs)
|
||||
# obj.validate()
|
||||
# return obj
|
||||
@ -0,0 +1,339 @@
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
|
||||
from ..common.info import UNKNOWN
|
||||
|
||||
from . import source
|
||||
|
||||
|
||||
IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
|
||||
|
||||
TYPE_QUAL = r'(?:const|volatile)'
|
||||
|
||||
VAR_TYPE_SPEC = r'''(?:
|
||||
void |
|
||||
(?:
|
||||
(?:(?:un)?signed\s+)?
|
||||
(?:
|
||||
char |
|
||||
short |
|
||||
int |
|
||||
long |
|
||||
long\s+int |
|
||||
long\s+long
|
||||
) |
|
||||
) |
|
||||
float |
|
||||
double |
|
||||
{IDENTIFIER} |
|
||||
(?:struct|union)\s+{IDENTIFIER}
|
||||
)'''
|
||||
|
||||
POINTER = rf'''(?:
|
||||
(?:\s+const)?\s*[*]
|
||||
)'''
|
||||
|
||||
#STRUCT = r'''(?:
|
||||
# (?:struct|(struct\s+%s))\s*[{]
|
||||
# [^}]*
|
||||
# [}]
|
||||
# )''' % (IDENTIFIER)
|
||||
#UNION = r'''(?:
|
||||
# (?:union|(union\s+%s))\s*[{]
|
||||
# [^}]*
|
||||
# [}]
|
||||
# )''' % (IDENTIFIER)
|
||||
#DECL_SPEC = rf'''(?:
|
||||
# ({VAR_TYPE_SPEC}) |
|
||||
# ({STRUCT}) |
|
||||
# ({UNION})
|
||||
# )'''
|
||||
|
||||
FUNC_START = rf'''(?:
|
||||
(?:
|
||||
(?:
|
||||
extern |
|
||||
static |
|
||||
static\s+inline
|
||||
)\s+
|
||||
)?
|
||||
#(?:const\s+)?
|
||||
{VAR_TYPE_SPEC}
|
||||
)'''
|
||||
#GLOBAL_VAR_START = rf'''(?:
|
||||
# (?:
|
||||
# (?:
|
||||
# extern |
|
||||
# static
|
||||
# )\s+
|
||||
# )?
|
||||
# (?:
|
||||
# {TYPE_QUAL}
|
||||
# (?:\s+{TYPE_QUAL})?
|
||||
# )?\s+
|
||||
# {VAR_TYPE_SPEC}
|
||||
# )'''
|
||||
GLOBAL_DECL_START_RE = re.compile(rf'''
|
||||
^
|
||||
(?:
|
||||
({FUNC_START})
|
||||
)
|
||||
''', re.VERBOSE)
|
||||
|
||||
LOCAL_VAR_START = rf'''(?:
|
||||
(?:
|
||||
(?:
|
||||
register |
|
||||
static
|
||||
)\s+
|
||||
)?
|
||||
(?:
|
||||
(?:
|
||||
{TYPE_QUAL}
|
||||
(?:\s+{TYPE_QUAL})?
|
||||
)\s+
|
||||
)?
|
||||
{VAR_TYPE_SPEC}
|
||||
{POINTER}?
|
||||
)'''
|
||||
LOCAL_STMT_START_RE = re.compile(rf'''
|
||||
^
|
||||
(?:
|
||||
({LOCAL_VAR_START})
|
||||
)
|
||||
''', re.VERBOSE)
|
||||
|
||||
|
||||
def iter_global_declarations(lines):
|
||||
"""Yield (decl, body) for each global declaration in the given lines.
|
||||
|
||||
For function definitions the header is reduced to one line and
|
||||
the body is provided as-is. For other compound declarations (e.g.
|
||||
struct) the entire declaration is reduced to one line and "body"
|
||||
is None. Likewise for simple declarations (e.g. variables).
|
||||
|
||||
Declarations inside function bodies are ignored, though their text
|
||||
is provided in the function body.
|
||||
"""
|
||||
# XXX Bail out upon bogus syntax.
|
||||
lines = source.iter_clean_lines(lines)
|
||||
for line in lines:
|
||||
if not GLOBAL_DECL_START_RE.match(line):
|
||||
continue
|
||||
# We only need functions here, since we only need locals for now.
|
||||
if line.endswith(';'):
|
||||
continue
|
||||
if line.endswith('{') and '(' not in line:
|
||||
continue
|
||||
|
||||
# Capture the function.
|
||||
# (assume no func is a one-liner)
|
||||
decl = line
|
||||
while '{' not in line: # assume no inline structs, etc.
|
||||
try:
|
||||
line = next(lines)
|
||||
except StopIteration:
|
||||
return
|
||||
decl += ' ' + line
|
||||
|
||||
body, end = _extract_block(lines)
|
||||
if end is None:
|
||||
return
|
||||
assert end == '}'
|
||||
yield (f'{decl}\n{body}\n{end}', body)
|
||||
|
||||
|
||||
def iter_local_statements(lines):
|
||||
"""Yield (lines, blocks) for each statement in the given lines.
|
||||
|
||||
For simple statements, "blocks" is None and the statement is reduced
|
||||
to a single line. For compound statements, "blocks" is a pair of
|
||||
(header, body) for each block in the statement. The headers are
|
||||
reduced to a single line each, but the bpdies are provided as-is.
|
||||
"""
|
||||
# XXX Bail out upon bogus syntax.
|
||||
lines = source.iter_clean_lines(lines)
|
||||
for line in lines:
|
||||
if not LOCAL_STMT_START_RE.match(line):
|
||||
continue
|
||||
|
||||
stmt = line
|
||||
blocks = None
|
||||
if not line.endswith(';'):
|
||||
# XXX Support compound & multiline simple statements.
|
||||
#blocks = []
|
||||
continue
|
||||
|
||||
yield (stmt, blocks)
|
||||
|
||||
|
||||
def _extract_block(lines):
|
||||
end = None
|
||||
depth = 1
|
||||
body = []
|
||||
for line in lines:
|
||||
depth += line.count('{') - line.count('}')
|
||||
if depth == 0:
|
||||
end = line
|
||||
break
|
||||
body.append(line)
|
||||
return '\n'.join(body), end
|
||||
|
||||
|
||||
def parse_func(stmt, body):
|
||||
"""Return (name, signature) for the given function definition."""
|
||||
header, _, end = stmt.partition(body)
|
||||
assert end.strip() == '}'
|
||||
assert header.strip().endswith('{')
|
||||
header, _, _= header.rpartition('{')
|
||||
|
||||
signature = ' '.join(header.strip().splitlines())
|
||||
|
||||
_, _, name = signature.split('(')[0].strip().rpartition(' ')
|
||||
assert name
|
||||
|
||||
return name, signature
|
||||
|
||||
|
||||
#TYPE_SPEC = rf'''(?:
|
||||
# )'''
|
||||
#VAR_DECLARATOR = rf'''(?:
|
||||
# )'''
|
||||
#VAR_DECL = rf'''(?:
|
||||
# {TYPE_SPEC}+
|
||||
# {VAR_DECLARATOR}
|
||||
# \s*
|
||||
# )'''
|
||||
#VAR_DECLARATION = rf'''(?:
|
||||
# {VAR_DECL}
|
||||
# (?: = [^=] [^;]* )?
|
||||
# ;
|
||||
# )'''
|
||||
#
|
||||
#
|
||||
#def parse_variable(decl, *, inFunc=False):
|
||||
# """Return [(name, storage, vartype)] for the given variable declaration."""
|
||||
# ...
|
||||
|
||||
|
||||
def _parse_var(stmt):
|
||||
"""Return (name, vartype) for the given variable declaration."""
|
||||
stmt = stmt.rstrip(';')
|
||||
m = LOCAL_STMT_START_RE.match(stmt)
|
||||
assert m
|
||||
vartype = m.group(0)
|
||||
name = stmt[len(vartype):].partition('=')[0].strip()
|
||||
|
||||
if name.startswith('('):
|
||||
name, _, after = name[1:].partition(')')
|
||||
assert after
|
||||
name = name.replace('*', '* ')
|
||||
inside, _, name = name.strip().rpartition(' ')
|
||||
vartype = f'{vartype} ({inside.strip()}){after}'
|
||||
else:
|
||||
name = name.replace('*', '* ')
|
||||
before, _, name = name.rpartition(' ')
|
||||
vartype = f'{vartype} {before}'
|
||||
|
||||
vartype = vartype.strip()
|
||||
while ' ' in vartype:
|
||||
vartype = vartype.replace(' ', ' ')
|
||||
|
||||
return name, vartype
|
||||
|
||||
|
||||
def extract_storage(decl, *, infunc=None):
|
||||
"""Return (storage, vartype) based on the given declaration.
|
||||
|
||||
The default storage is "implicit" (or "local" if infunc is True).
|
||||
"""
|
||||
if decl == UNKNOWN:
|
||||
return decl
|
||||
if decl.startswith('static '):
|
||||
return 'static'
|
||||
#return 'static', decl.partition(' ')[2].strip()
|
||||
elif decl.startswith('extern '):
|
||||
return 'extern'
|
||||
#return 'extern', decl.partition(' ')[2].strip()
|
||||
elif re.match('.*\b(static|extern)\b', decl):
|
||||
raise NotImplementedError
|
||||
elif infunc:
|
||||
return 'local'
|
||||
else:
|
||||
return 'implicit'
|
||||
|
||||
|
||||
def parse_compound(stmt, blocks):
|
||||
"""Return (headers, bodies) for the given compound statement."""
|
||||
# XXX Identify declarations inside compound statements
|
||||
# (if/switch/for/while).
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def iter_variables(filename, *,
|
||||
preprocessed=False,
|
||||
_iter_source_lines=source.iter_lines,
|
||||
_iter_global=iter_global_declarations,
|
||||
_iter_local=iter_local_statements,
|
||||
_parse_func=parse_func,
|
||||
_parse_var=_parse_var,
|
||||
_parse_compound=parse_compound,
|
||||
):
|
||||
"""Yield (funcname, name, vartype) for every variable in the given file."""
|
||||
if preprocessed:
|
||||
raise NotImplementedError
|
||||
lines = _iter_source_lines(filename)
|
||||
for stmt, body in _iter_global(lines):
|
||||
# At the file top-level we only have to worry about vars & funcs.
|
||||
if not body:
|
||||
name, vartype = _parse_var(stmt)
|
||||
if name:
|
||||
yield (None, name, vartype)
|
||||
else:
|
||||
funcname, _ = _parse_func(stmt, body)
|
||||
localvars = _iter_locals(body,
|
||||
_iter_statements=_iter_local,
|
||||
_parse_var=_parse_var,
|
||||
_parse_compound=_parse_compound,
|
||||
)
|
||||
for name, vartype in localvars:
|
||||
yield (funcname, name, vartype)
|
||||
|
||||
|
||||
def _iter_locals(lines, *,
|
||||
_iter_statements=iter_local_statements,
|
||||
_parse_var=_parse_var,
|
||||
_parse_compound=parse_compound,
|
||||
):
|
||||
compound = [lines]
|
||||
while compound:
|
||||
body = compound.pop(0)
|
||||
bodylines = body.splitlines()
|
||||
for stmt, blocks in _iter_statements(bodylines):
|
||||
if not blocks:
|
||||
name, vartype = _parse_var(stmt)
|
||||
if name:
|
||||
yield (name, vartype)
|
||||
else:
|
||||
headers, bodies = _parse_compound(stmt, blocks)
|
||||
for header in headers:
|
||||
for line in header:
|
||||
name, vartype = _parse_var(line)
|
||||
if name:
|
||||
yield (name, vartype)
|
||||
compound.extend(bodies)
|
||||
|
||||
|
||||
def iter_all(filename, *,
|
||||
preprocessed=False,
|
||||
):
|
||||
"""Yield a Declaration for each one found.
|
||||
|
||||
If there are duplicates, due to preprocessor conditionals, then
|
||||
they are checked to make sure they are the same.
|
||||
"""
|
||||
# XXX For the moment we cheat.
|
||||
for funcname, name, decl in iter_variables(filename,
|
||||
preprocessed=preprocessed):
|
||||
yield 'variable', funcname, name, decl
|
||||
@ -0,0 +1,107 @@
|
||||
from ..common.info import UNKNOWN, ID
|
||||
|
||||
from . import declarations
|
||||
|
||||
# XXX need tests:
|
||||
# * variables
|
||||
# * variable
|
||||
# * variable_from_id
|
||||
|
||||
|
||||
def _iter_vars(filenames, preprocessed, *,
|
||||
handle_id=None,
|
||||
_iter_decls=declarations.iter_all,
|
||||
):
|
||||
if handle_id is None:
|
||||
handle_id = ID
|
||||
|
||||
for filename in filenames or ():
|
||||
for kind, funcname, name, decl in _iter_decls(filename,
|
||||
preprocessed=preprocessed,
|
||||
):
|
||||
if kind != 'variable':
|
||||
continue
|
||||
varid = handle_id(filename, funcname, name)
|
||||
yield varid, decl
|
||||
|
||||
|
||||
# XXX Add a "handle_var" arg like we did for get_resolver()?
|
||||
|
||||
def variables(*filenames,
|
||||
perfilecache=None,
|
||||
preprocessed=False,
|
||||
known=None, # for types
|
||||
handle_id=None,
|
||||
_iter_vars=_iter_vars,
|
||||
):
|
||||
"""Yield (varid, decl) for each variable found in the given files.
|
||||
|
||||
If "preprocessed" is provided (and not False/None) then it is used
|
||||
to decide which tool to use to parse the source code after it runs
|
||||
through the C preprocessor. Otherwise the raw
|
||||
"""
|
||||
if len(filenames) == 1 and not (filenames[0], str):
|
||||
filenames, = filenames
|
||||
|
||||
if perfilecache is None:
|
||||
yield from _iter_vars(filenames, preprocessed)
|
||||
else:
|
||||
# XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def variable(name, filenames, *,
|
||||
local=False,
|
||||
perfilecache=None,
|
||||
preprocessed=False,
|
||||
handle_id=None,
|
||||
_iter_vars=variables,
|
||||
):
|
||||
"""Return (varid, decl) for the first found variable that matches.
|
||||
|
||||
If "local" is True then the first matching local variable in the
|
||||
file will always be returned. To avoid that, pass perfilecache and
|
||||
pop each variable from the cache after using it.
|
||||
"""
|
||||
for varid, decl in _iter_vars(filenames,
|
||||
perfilecache=perfilecache,
|
||||
preprocessed=preprocessed,
|
||||
):
|
||||
if varid.name != name:
|
||||
continue
|
||||
if local:
|
||||
if varid.funcname:
|
||||
if varid.funcname == UNKNOWN:
|
||||
raise NotImplementedError
|
||||
return varid, decl
|
||||
elif not varid.funcname:
|
||||
return varid, decl
|
||||
else:
|
||||
return None, None # No matching variable was found.
|
||||
|
||||
|
||||
def variable_from_id(id, filenames, *,
|
||||
perfilecache=None,
|
||||
preprocessed=False,
|
||||
handle_id=None,
|
||||
_get_var=variable,
|
||||
):
|
||||
"""Return (varid, decl) for the first found variable that matches."""
|
||||
local = False
|
||||
if isinstance(id, str):
|
||||
name = id
|
||||
else:
|
||||
if id.funcname == UNKNOWN:
|
||||
local = True
|
||||
elif id.funcname:
|
||||
raise NotImplementedError
|
||||
|
||||
name = id.name
|
||||
if id.filename and id.filename != UNKNOWN:
|
||||
filenames = [id.filename]
|
||||
return _get_var(name, filenames,
|
||||
local=local,
|
||||
perfilecache=perfilecache,
|
||||
preprocessed=preprocessed,
|
||||
handle_id=handle_id,
|
||||
)
|
||||
@ -0,0 +1,179 @@
|
||||
import re
|
||||
|
||||
from ..common.info import UNKNOWN, ID
|
||||
|
||||
from .preprocessor import _iter_clean_lines
|
||||
|
||||
|
||||
_NOT_SET = object()
|
||||
|
||||
|
||||
def get_srclines(filename, *,
|
||||
cache=None,
|
||||
_open=open,
|
||||
_iter_lines=_iter_clean_lines,
|
||||
):
|
||||
"""Return the file's lines as a list.
|
||||
|
||||
Each line will have trailing whitespace removed (including newline).
|
||||
|
||||
If a cache is given the it is used.
|
||||
"""
|
||||
if cache is not None:
|
||||
try:
|
||||
return cache[filename]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
with _open(filename) as srcfile:
|
||||
srclines = [line
|
||||
for _, line in _iter_lines(srcfile)
|
||||
if not line.startswith('#')]
|
||||
for i, line in enumerate(srclines):
|
||||
srclines[i] = line.rstrip()
|
||||
|
||||
if cache is not None:
|
||||
cache[filename] = srclines
|
||||
return srclines
|
||||
|
||||
|
||||
def parse_variable_declaration(srcline):
|
||||
"""Return (name, decl) for the given declaration line."""
|
||||
# XXX possible false negatives...
|
||||
decl, sep, _ = srcline.partition('=')
|
||||
if not sep:
|
||||
if not srcline.endswith(';'):
|
||||
return None, None
|
||||
decl = decl.strip(';')
|
||||
decl = decl.strip()
|
||||
m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
|
||||
if not m:
|
||||
return None, None
|
||||
name = m.group(1)
|
||||
return name, decl
|
||||
|
||||
|
||||
def parse_variable(srcline, funcname=None):
|
||||
"""Return (varid, decl) for the variable declared on the line (or None)."""
|
||||
line = srcline.strip()
|
||||
|
||||
# XXX Handle more than just static variables.
|
||||
if line.startswith('static '):
|
||||
if '(' in line and '[' not in line:
|
||||
# a function
|
||||
return None, None
|
||||
return parse_variable_declaration(line)
|
||||
else:
|
||||
return None, None
|
||||
|
||||
|
||||
def iter_variables(filename, *,
|
||||
srccache=None,
|
||||
parse_variable=None,
|
||||
_get_srclines=get_srclines,
|
||||
_default_parse_variable=parse_variable,
|
||||
):
|
||||
"""Yield (varid, decl) for each variable in the given source file."""
|
||||
if parse_variable is None:
|
||||
parse_variable = _default_parse_variable
|
||||
|
||||
indent = ''
|
||||
prev = ''
|
||||
funcname = None
|
||||
for line in _get_srclines(filename, cache=srccache):
|
||||
# remember current funcname
|
||||
if funcname:
|
||||
if line == indent + '}':
|
||||
funcname = None
|
||||
continue
|
||||
else:
|
||||
if '(' in prev and line == indent + '{':
|
||||
if not prev.startswith('__attribute__'):
|
||||
funcname = prev.split('(')[0].split()[-1]
|
||||
prev = ''
|
||||
continue
|
||||
indent = line[:-len(line.lstrip())]
|
||||
prev = line
|
||||
|
||||
info = parse_variable(line, funcname)
|
||||
if isinstance(info, list):
|
||||
for name, _funcname, decl in info:
|
||||
yield ID(filename, _funcname, name), decl
|
||||
continue
|
||||
name, decl = info
|
||||
|
||||
if name is None:
|
||||
continue
|
||||
yield ID(filename, funcname, name), decl
|
||||
|
||||
|
||||
def _match_varid(variable, name, funcname, ignored=None):
|
||||
if ignored and variable in ignored:
|
||||
return False
|
||||
|
||||
if variable.name != name:
|
||||
return False
|
||||
|
||||
if funcname == UNKNOWN:
|
||||
if not variable.funcname:
|
||||
return False
|
||||
elif variable.funcname != funcname:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def find_variable(filename, funcname, name, *,
|
||||
ignored=None,
|
||||
srccache=None, # {filename: lines}
|
||||
parse_variable=None,
|
||||
_iter_variables=iter_variables,
|
||||
):
|
||||
"""Return the matching variable.
|
||||
|
||||
Return None if the variable is not found.
|
||||
"""
|
||||
for varid, decl in _iter_variables(filename,
|
||||
srccache=srccache,
|
||||
parse_variable=parse_variable,
|
||||
):
|
||||
if _match_varid(varid, name, funcname, ignored):
|
||||
return varid, decl
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def find_variables(varids, filenames=None, *,
|
||||
srccache=_NOT_SET,
|
||||
parse_variable=None,
|
||||
_find_symbol=find_variable,
|
||||
):
|
||||
"""Yield (varid, decl) for each ID.
|
||||
|
||||
If the variable is not found then its decl will be UNKNOWN. That
|
||||
way there will be one resulting variable per given ID.
|
||||
"""
|
||||
if srccache is _NOT_SET:
|
||||
srccache = {}
|
||||
|
||||
used = set()
|
||||
for varid in varids:
|
||||
if varid.filename and varid.filename != UNKNOWN:
|
||||
srcfiles = [varid.filename]
|
||||
else:
|
||||
if not filenames:
|
||||
yield varid, UNKNOWN
|
||||
continue
|
||||
srcfiles = filenames
|
||||
for filename in srcfiles:
|
||||
varid, decl = _find_varid(filename, varid.funcname, varid.name,
|
||||
ignored=used,
|
||||
srccache=srccache,
|
||||
parse_variable=parse_variable,
|
||||
)
|
||||
if varid:
|
||||
yield varid, decl
|
||||
used.add(varid)
|
||||
break
|
||||
else:
|
||||
yield varid, UNKNOWN
|
||||
@ -0,0 +1,511 @@
|
||||
from collections import namedtuple
|
||||
import shlex
|
||||
import os
|
||||
import re
|
||||
|
||||
from ..common import util, info
|
||||
|
||||
|
||||
CONTINUATION = '\\' + os.linesep
|
||||
|
||||
IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
|
||||
IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
|
||||
|
||||
|
||||
def _coerce_str(value):
|
||||
if not value:
|
||||
return ''
|
||||
return str(value).strip()
|
||||
|
||||
|
||||
#############################
|
||||
# directives
|
||||
|
||||
DIRECTIVE_START = r'''
|
||||
(?:
|
||||
^ \s*
|
||||
[#] \s*
|
||||
)'''
|
||||
DIRECTIVE_TEXT = r'''
|
||||
(?:
|
||||
(?: \s+ ( .*\S ) )?
|
||||
\s* $
|
||||
)'''
|
||||
DIRECTIVE = rf'''
|
||||
(?:
|
||||
{DIRECTIVE_START}
|
||||
(
|
||||
include |
|
||||
error | warning |
|
||||
pragma |
|
||||
define | undef |
|
||||
if | ifdef | ifndef | elseif | else | endif |
|
||||
__FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
|
||||
)
|
||||
{DIRECTIVE_TEXT}
|
||||
)'''
|
||||
# (?:
|
||||
# [^\\\n] |
|
||||
# \\ [^\n] |
|
||||
# \\ \n
|
||||
# )+
|
||||
# ) \n
|
||||
# )'''
|
||||
DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
|
||||
|
||||
DEFINE = rf'''
|
||||
(?:
|
||||
{DIRECTIVE_START} define \s+
|
||||
(?:
|
||||
( \w*[a-zA-Z]\w* )
|
||||
(?: \s* [(] ([^)]*) [)] )?
|
||||
)
|
||||
{DIRECTIVE_TEXT}
|
||||
)'''
|
||||
DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
|
||||
|
||||
|
||||
def parse_directive(line):
|
||||
"""Return the appropriate directive for the given line."""
|
||||
line = line.strip()
|
||||
if line.startswith('#'):
|
||||
line = line[1:].lstrip()
|
||||
line = '#' + line
|
||||
directive = line
|
||||
#directive = '#' + line
|
||||
while ' ' in directive:
|
||||
directive = directive.replace(' ', ' ')
|
||||
return _parse_directive(directive)
|
||||
|
||||
|
||||
def _parse_directive(line):
|
||||
m = DEFINE_RE.match(line)
|
||||
if m:
|
||||
name, args, text = m.groups()
|
||||
if args:
|
||||
args = [a.strip() for a in args.split(',')]
|
||||
return Macro(name, args, text)
|
||||
else:
|
||||
return Constant(name, text)
|
||||
|
||||
m = DIRECTIVE_RE.match(line)
|
||||
if not m:
|
||||
raise ValueError(f'unsupported directive {line!r}')
|
||||
kind, text = m.groups()
|
||||
if not text:
|
||||
if kind not in ('else', 'endif'):
|
||||
raise ValueError(f'missing text in directive {line!r}')
|
||||
elif kind in ('else', 'endif', 'define'):
|
||||
raise ValueError(f'unexpected text in directive {line!r}')
|
||||
if kind == 'include':
|
||||
directive = Include(text)
|
||||
elif kind in IfDirective.KINDS:
|
||||
directive = IfDirective(kind, text)
|
||||
else:
|
||||
directive = OtherDirective(kind, text)
|
||||
directive.validate()
|
||||
return directive
|
||||
|
||||
|
||||
class PreprocessorDirective(util._NTBase):
|
||||
"""The base class for directives."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
KINDS = frozenset([
|
||||
'include',
|
||||
'pragma',
|
||||
'error', 'warning',
|
||||
'define', 'undef',
|
||||
'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
|
||||
'__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
|
||||
])
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return ' '.join(v for v in self[1:] if v and v.strip()) or None
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.kind:
|
||||
raise TypeError('missing kind')
|
||||
elif self.kind not in self.KINDS:
|
||||
raise ValueError
|
||||
|
||||
# text can be anything, including None.
|
||||
|
||||
|
||||
class Constant(PreprocessorDirective,
|
||||
namedtuple('Constant', 'kind name value')):
|
||||
"""A single "constant" directive ("define")."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, name, value=None):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
'define',
|
||||
name=_coerce_str(name) or None,
|
||||
value=_coerce_str(value) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.name:
|
||||
raise TypeError('missing name')
|
||||
elif not IDENTIFIER_RE.match(self.name):
|
||||
raise ValueError(f'name must be identifier, got {self.name!r}')
|
||||
|
||||
# value can be anything, including None
|
||||
|
||||
|
||||
class Macro(PreprocessorDirective,
|
||||
namedtuple('Macro', 'kind name args body')):
|
||||
"""A single "macro" directive ("define")."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, name, args, body=None):
|
||||
# "args" must be a string or an iterable of strings (or "empty").
|
||||
if isinstance(args, str):
|
||||
args = [v.strip() for v in args.split(',')]
|
||||
if args:
|
||||
args = tuple(_coerce_str(a) or None for a in args)
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind='define',
|
||||
name=_coerce_str(name) or None,
|
||||
args=args if args else (),
|
||||
body=_coerce_str(body) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
if self.body:
|
||||
return f'{self.name}({", ".join(self.args)}) {self.body}'
|
||||
else:
|
||||
return f'{self.name}({", ".join(self.args)})'
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.name:
|
||||
raise TypeError('missing name')
|
||||
elif not IDENTIFIER_RE.match(self.name):
|
||||
raise ValueError(f'name must be identifier, got {self.name!r}')
|
||||
|
||||
for arg in self.args:
|
||||
if not arg:
|
||||
raise ValueError(f'missing arg in {self.args}')
|
||||
elif not IDENTIFIER_RE.match(arg):
|
||||
raise ValueError(f'arg must be identifier, got {arg!r}')
|
||||
|
||||
# body can be anything, including None
|
||||
|
||||
|
||||
class IfDirective(PreprocessorDirective,
|
||||
namedtuple('IfDirective', 'kind condition')):
|
||||
"""A single conditional directive (e.g. "if", "ifdef").
|
||||
|
||||
This only includes directives that actually provide conditions. The
|
||||
related directives "else" and "endif" are covered by OtherDirective
|
||||
instead.
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
KINDS = frozenset([
|
||||
'if',
|
||||
'ifdef',
|
||||
'ifndef',
|
||||
'elseif',
|
||||
])
|
||||
|
||||
@classmethod
|
||||
def _condition_from_raw(cls, raw, kind):
|
||||
#return Condition.from_raw(raw, _kind=kind)
|
||||
condition = _coerce_str(raw)
|
||||
if not condition:
|
||||
return None
|
||||
|
||||
if kind == 'ifdef':
|
||||
condition = f'defined({condition})'
|
||||
elif kind == 'ifndef':
|
||||
condition = f'! defined({condition})'
|
||||
|
||||
return condition
|
||||
|
||||
def __new__(cls, kind, condition):
|
||||
kind = _coerce_str(kind)
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind=kind or None,
|
||||
condition=cls._condition_from_raw(condition, kind),
|
||||
)
|
||||
return self
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
if self.kind == 'ifdef':
|
||||
return self.condition[8:-1] # strip "defined("
|
||||
elif self.kind == 'ifndef':
|
||||
return self.condition[10:-1] # strip "! defined("
|
||||
else:
|
||||
return self.condition
|
||||
#return str(self.condition)
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.condition:
|
||||
raise TypeError('missing condition')
|
||||
#else:
|
||||
# for cond in self.condition:
|
||||
# if not cond:
|
||||
# raise ValueError(f'missing condition in {self.condition}')
|
||||
# cond.validate()
|
||||
# if self.kind in ('ifdef', 'ifndef'):
|
||||
# if len(self.condition) != 1:
|
||||
# raise ValueError('too many condition')
|
||||
# if self.kind == 'ifdef':
|
||||
# if not self.condition[0].startswith('defined '):
|
||||
# raise ValueError('bad condition')
|
||||
# else:
|
||||
# if not self.condition[0].startswith('! defined '):
|
||||
# raise ValueError('bad condition')
|
||||
|
||||
|
||||
class Include(PreprocessorDirective,
|
||||
namedtuple('Include', 'kind file')):
|
||||
"""A single "include" directive.
|
||||
|
||||
Supported "file" values are either follow the bracket style
|
||||
(<stdio>) or double quotes ("spam.h").
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, file):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind='include',
|
||||
file=_coerce_str(file) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.file:
|
||||
raise TypeError('missing file')
|
||||
|
||||
|
||||
class OtherDirective(PreprocessorDirective,
|
||||
namedtuple('OtherDirective', 'kind text')):
|
||||
"""A single directive not covered by another class.
|
||||
|
||||
This includes the "else", "endif", and "undef" directives, which are
|
||||
otherwise inherently related to the directives covered by the
|
||||
Constant, Macro, and IfCondition classes.
|
||||
|
||||
Note that all directives must have a text value, except for "else"
|
||||
and "endif" (which must have no text).
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
|
||||
|
||||
def __new__(cls, kind, text):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind=_coerce_str(kind) or None,
|
||||
text=_coerce_str(text) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if self.text:
|
||||
if self.kind in ('else', 'endif'):
|
||||
raise ValueError('unexpected text in directive')
|
||||
elif self.kind not in ('else', 'endif'):
|
||||
raise TypeError('missing text')
|
||||
|
||||
|
||||
#############################
|
||||
# iterating lines
|
||||
|
||||
def _recompute_conditions(directive, ifstack):
|
||||
if directive.kind in ('if', 'ifdef', 'ifndef'):
|
||||
ifstack.append(
|
||||
([], directive.condition))
|
||||
elif directive.kind == 'elseif':
|
||||
if ifstack:
|
||||
negated, active = ifstack.pop()
|
||||
if active:
|
||||
negated.append(active)
|
||||
else:
|
||||
negated = []
|
||||
ifstack.append(
|
||||
(negated, directive.condition))
|
||||
elif directive.kind == 'else':
|
||||
if ifstack:
|
||||
negated, active = ifstack.pop()
|
||||
if active:
|
||||
negated.append(active)
|
||||
ifstack.append(
|
||||
(negated, None))
|
||||
elif directive.kind == 'endif':
|
||||
if ifstack:
|
||||
ifstack.pop()
|
||||
|
||||
conditions = []
|
||||
for negated, active in ifstack:
|
||||
for condition in negated:
|
||||
conditions.append(f'! ({condition})')
|
||||
if active:
|
||||
conditions.append(active)
|
||||
return tuple(conditions)
|
||||
|
||||
|
||||
def _iter_clean_lines(lines):
|
||||
lines = iter(enumerate(lines, 1))
|
||||
for lno, line in lines:
|
||||
# Handle line continuations.
|
||||
while line.endswith(CONTINUATION):
|
||||
try:
|
||||
lno, _line = next(lines)
|
||||
except StopIteration:
|
||||
break
|
||||
line = line[:-len(CONTINUATION)] + ' ' + _line
|
||||
|
||||
# Deal with comments.
|
||||
after = line
|
||||
line = ''
|
||||
while True:
|
||||
# Look for a comment.
|
||||
before, begin, remainder = after.partition('/*')
|
||||
if '//' in before:
|
||||
before, _, _ = before.partition('//')
|
||||
line += before + ' ' # per the C99 spec
|
||||
break
|
||||
line += before
|
||||
if not begin:
|
||||
break
|
||||
line += ' ' # per the C99 spec
|
||||
|
||||
# Go until we find the end of the comment.
|
||||
_, end, after = remainder.partition('*/')
|
||||
while not end:
|
||||
try:
|
||||
lno, remainder = next(lines)
|
||||
except StopIteration:
|
||||
raise Exception('unterminated comment')
|
||||
_, end, after = remainder.partition('*/')
|
||||
|
||||
yield lno, line
|
||||
|
||||
|
||||
def iter_lines(lines, *,
|
||||
_iter_clean_lines=_iter_clean_lines,
|
||||
_parse_directive=_parse_directive,
|
||||
_recompute_conditions=_recompute_conditions,
|
||||
):
|
||||
"""Yield (lno, line, directive, active conditions) for each given line.
|
||||
|
||||
This is effectively a subset of the operations taking place in
|
||||
translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
|
||||
section 5.1.1.2. Line continuations are removed and comments
|
||||
replaced with a single space. (In both cases "lno" will be the last
|
||||
line involved.) Otherwise each line is returned as-is.
|
||||
|
||||
"lno" is the (1-indexed) line number for the line.
|
||||
|
||||
"directive" will be a PreprocessorDirective or None, depending on
|
||||
whether or not there is a directive on the line.
|
||||
|
||||
"active conditions" is the set of preprocessor conditions (e.g.
|
||||
"defined()") under which the current line of code will be included
|
||||
in compilation. That set is derived from every conditional
|
||||
directive block (e.g. "if defined()", "ifdef", "else") containing
|
||||
that line. That includes nested directives. Note that the
|
||||
current line does not affect the active conditions for iteself.
|
||||
It only impacts subsequent lines. That applies to directives
|
||||
that close blocks (e.g. "endif") just as much as conditional
|
||||
directvies. Also note that "else" and "elseif" directives
|
||||
update the active conditions (for later lines), rather than
|
||||
adding to them.
|
||||
"""
|
||||
ifstack = []
|
||||
conditions = ()
|
||||
for lno, line in _iter_clean_lines(lines):
|
||||
stripped = line.strip()
|
||||
if not stripped.startswith('#'):
|
||||
yield lno, line, None, conditions
|
||||
continue
|
||||
|
||||
directive = '#' + stripped[1:].lstrip()
|
||||
while ' ' in directive:
|
||||
directive = directive.replace(' ', ' ')
|
||||
directive = _parse_directive(directive)
|
||||
yield lno, line, directive, conditions
|
||||
|
||||
if directive.kind in ('else', 'endif'):
|
||||
conditions = _recompute_conditions(directive, ifstack)
|
||||
elif isinstance(directive, IfDirective):
|
||||
conditions = _recompute_conditions(directive, ifstack)
|
||||
|
||||
|
||||
#############################
|
||||
# running (platform-specific?)
|
||||
|
||||
def _gcc(filename, *,
|
||||
_get_argv=(lambda: _get_gcc_argv()),
|
||||
_run=util.run_cmd,
|
||||
):
|
||||
argv = _get_argv()
|
||||
argv.extend([
|
||||
'-E', filename,
|
||||
])
|
||||
output = _run(argv)
|
||||
return output
|
||||
|
||||
|
||||
def _get_gcc_argv(*,
|
||||
_open=open,
|
||||
_run=util.run_cmd,
|
||||
):
|
||||
with _open('/tmp/print.mk', 'w') as tmpfile:
|
||||
tmpfile.write('print-%:\n')
|
||||
#tmpfile.write('\t@echo $* = $($*)\n')
|
||||
tmpfile.write('\t@echo $($*)\n')
|
||||
argv = ['/usr/bin/make',
|
||||
'-f', 'Makefile',
|
||||
'-f', '/tmp/print.mk',
|
||||
'print-CC',
|
||||
'print-PY_CORE_CFLAGS',
|
||||
]
|
||||
output = _run(argv)
|
||||
gcc, cflags = output.strip().splitlines()
|
||||
argv = shlex.split(gcc.strip())
|
||||
cflags = shlex.split(cflags.strip())
|
||||
return argv + cflags
|
||||
|
||||
|
||||
def run(filename, *,
|
||||
_gcc=_gcc,
|
||||
):
|
||||
"""Return the text of the given file after running the preprocessor."""
|
||||
return _gcc(filename)
|
||||
@ -0,0 +1,34 @@
|
||||
from . import preprocessor
|
||||
|
||||
|
||||
def iter_clean_lines(lines):
|
||||
incomment = False
|
||||
for line in lines:
|
||||
# Deal with comments.
|
||||
if incomment:
|
||||
_, sep, line = line.partition('*/')
|
||||
if sep:
|
||||
incomment = False
|
||||
continue
|
||||
line, _, _ = line.partition('//')
|
||||
line, sep, remainder = line.partition('/*')
|
||||
if sep:
|
||||
_, sep, after = remainder.partition('*/')
|
||||
if not sep:
|
||||
incomment = True
|
||||
continue
|
||||
line += ' ' + after
|
||||
|
||||
# Ignore blank lines and leading/trailing whitespace.
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
yield line
|
||||
|
||||
|
||||
def iter_lines(filename, *,
|
||||
preprocess=preprocessor.run,
|
||||
):
|
||||
content = preprocess(filename)
|
||||
return iter(content.splitlines())
|
||||
@ -0,0 +1,117 @@
|
||||
import os.path
|
||||
import shutil
|
||||
|
||||
from c_analyzer.common import util, info
|
||||
|
||||
from .info import Symbol
|
||||
|
||||
|
||||
# XXX need tests:
|
||||
# * iter_symbols
|
||||
|
||||
NM_KINDS = {
|
||||
'b': Symbol.KIND.VARIABLE, # uninitialized
|
||||
'd': Symbol.KIND.VARIABLE, # initialized
|
||||
#'g': Symbol.KIND.VARIABLE, # uninitialized
|
||||
#'s': Symbol.KIND.VARIABLE, # initialized
|
||||
't': Symbol.KIND.FUNCTION,
|
||||
}
|
||||
|
||||
SPECIAL_SYMBOLS = {
|
||||
# binary format (e.g. ELF)
|
||||
'__bss_start',
|
||||
'__data_start',
|
||||
'__dso_handle',
|
||||
'_DYNAMIC',
|
||||
'_edata',
|
||||
'_end',
|
||||
'__environ@@GLIBC_2.2.5',
|
||||
'_GLOBAL_OFFSET_TABLE_',
|
||||
'__JCR_END__',
|
||||
'__JCR_LIST__',
|
||||
'__TMC_END__',
|
||||
}
|
||||
|
||||
|
||||
def _is_special_symbol(name):
|
||||
if name in SPECIAL_SYMBOLS:
|
||||
return True
|
||||
if '@@GLIBC' in name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def iter_symbols(binfile, *,
|
||||
nm=None,
|
||||
handle_id=None,
|
||||
_which=shutil.which,
|
||||
_run=util.run_cmd,
|
||||
):
|
||||
"""Yield a Symbol for each relevant entry reported by the "nm" command."""
|
||||
if nm is None:
|
||||
nm = _which('nm')
|
||||
if not nm:
|
||||
raise NotImplementedError
|
||||
if handle_id is None:
|
||||
handle_id = info.ID
|
||||
|
||||
argv = [nm,
|
||||
'--line-numbers',
|
||||
binfile,
|
||||
]
|
||||
try:
|
||||
output = _run(argv)
|
||||
except Exception:
|
||||
if nm is None:
|
||||
# XXX Use dumpbin.exe /SYMBOLS on Windows.
|
||||
raise NotImplementedError
|
||||
raise
|
||||
for line in output.splitlines():
|
||||
(name, kind, external, filename, funcname,
|
||||
) = _parse_nm_line(line)
|
||||
if kind != Symbol.KIND.VARIABLE:
|
||||
continue
|
||||
elif _is_special_symbol(name):
|
||||
continue
|
||||
yield Symbol(
|
||||
id=handle_id(filename, funcname, name),
|
||||
kind=kind,
|
||||
external=external,
|
||||
)
|
||||
|
||||
|
||||
def _parse_nm_line(line):
|
||||
_origline = line
|
||||
_, _, line = line.partition(' ') # strip off the address
|
||||
line = line.strip()
|
||||
|
||||
kind, _, line = line.partition(' ')
|
||||
line = line.strip()
|
||||
external = kind.isupper()
|
||||
kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
|
||||
|
||||
name, _, filename = line.partition('\t')
|
||||
name = name.strip()
|
||||
if filename:
|
||||
filename = os.path.relpath(filename.partition(':')[0])
|
||||
else:
|
||||
filename = info.UNKNOWN
|
||||
|
||||
name, islocal = _parse_nm_name(name, kind)
|
||||
funcname = info.UNKNOWN if islocal else None
|
||||
return name, kind, external, filename, funcname
|
||||
|
||||
|
||||
def _parse_nm_name(name, kind):
|
||||
if kind != Symbol.KIND.VARIABLE:
|
||||
return name, None
|
||||
if _is_special_symbol(name):
|
||||
return name, None
|
||||
|
||||
actual, sep, digits = name.partition('.')
|
||||
if not sep:
|
||||
return name, False
|
||||
|
||||
if not digits.isdigit():
|
||||
raise Exception(f'got bogus name {name}')
|
||||
return actual, True
|
||||
@ -0,0 +1,175 @@
|
||||
import os
|
||||
import os.path
|
||||
import shutil
|
||||
|
||||
from ..common import files
|
||||
from ..common.info import UNKNOWN, ID
|
||||
from ..parser import find as p_find
|
||||
|
||||
from . import _nm
|
||||
from .info import Symbol
|
||||
|
||||
# XXX need tests:
|
||||
# * get_resolver()
|
||||
# * get_resolver_from_dirs()
|
||||
# * symbol()
|
||||
# * symbols()
|
||||
# * variables()
|
||||
|
||||
|
||||
def _resolve_known(symbol, knownvars):
|
||||
for varid in knownvars:
|
||||
if symbol.match(varid):
|
||||
break
|
||||
else:
|
||||
return None
|
||||
return knownvars.pop(varid)
|
||||
|
||||
|
||||
def get_resolver(filenames=None, known=None, *,
|
||||
handle_var,
|
||||
check_filename=None,
|
||||
perfilecache=None,
|
||||
preprocessed=False,
|
||||
_from_source=p_find.variable_from_id,
|
||||
):
|
||||
"""Return a "resolver" func for the given known vars/types and filenames.
|
||||
|
||||
"handle_var" is a callable that takes (ID, decl) and returns a
|
||||
Variable. Variable.from_id is a suitable callable.
|
||||
|
||||
The returned func takes a single Symbol and returns a corresponding
|
||||
Variable. If the symbol was located then the variable will be
|
||||
valid, populated with the corresponding information. Otherwise None
|
||||
is returned.
|
||||
"""
|
||||
knownvars = (known or {}).get('variables')
|
||||
if knownvars:
|
||||
knownvars = dict(knownvars) # a copy
|
||||
if filenames:
|
||||
if check_filename is None:
|
||||
filenames = list(filenames)
|
||||
def check_filename(filename):
|
||||
return filename in filenames
|
||||
def resolve(symbol):
|
||||
# XXX Check "found" instead?
|
||||
if not check_filename(symbol.filename):
|
||||
return None
|
||||
found = _resolve_known(symbol, knownvars)
|
||||
if found is None:
|
||||
#return None
|
||||
varid, decl = _from_source(symbol, filenames,
|
||||
perfilecache=perfilecache,
|
||||
preprocessed=preprocessed,
|
||||
)
|
||||
found = handle_var(varid, decl)
|
||||
return found
|
||||
else:
|
||||
def resolve(symbol):
|
||||
return _resolve_known(symbol, knownvars)
|
||||
elif filenames:
|
||||
def resolve(symbol):
|
||||
varid, decl = _from_source(symbol, filenames,
|
||||
perfilecache=perfilecache,
|
||||
preprocessed=preprocessed,
|
||||
)
|
||||
return handle_var(varid, decl)
|
||||
else:
|
||||
def resolve(symbol):
|
||||
return None
|
||||
return resolve
|
||||
|
||||
|
||||
def get_resolver_from_dirs(dirnames, known=None, *,
|
||||
handle_var,
|
||||
suffixes=('.c',),
|
||||
perfilecache=None,
|
||||
preprocessed=False,
|
||||
_iter_files=files.iter_files_by_suffix,
|
||||
_get_resolver=get_resolver,
|
||||
):
|
||||
"""Return a "resolver" func for the given known vars/types and filenames.
|
||||
|
||||
"dirnames" should be absolute paths. If not then they will be
|
||||
resolved relative to CWD.
|
||||
|
||||
See get_resolver().
|
||||
"""
|
||||
dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
|
||||
for d in dirnames]
|
||||
filenames = _iter_files(dirnames, suffixes)
|
||||
def check_filename(filename):
|
||||
for dirname in dirnames:
|
||||
if filename.startswith(dirname):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return _get_resolver(filenames, known,
|
||||
handle_var=handle_var,
|
||||
check_filename=check_filename,
|
||||
perfilecache=perfilecache,
|
||||
preprocessed=preprocessed,
|
||||
)
|
||||
|
||||
|
||||
def symbol(symbol, filenames, known=None, *,
|
||||
perfilecache=None,
|
||||
preprocessed=False,
|
||||
handle_id=None,
|
||||
_get_resolver=get_resolver,
|
||||
):
|
||||
"""Return a Variable for the one matching the given symbol.
|
||||
|
||||
"symbol" can be one of several objects:
|
||||
|
||||
* Symbol - use the contained info
|
||||
* name (str) - look for a global variable with that name
|
||||
* (filename, name) - look for named global in file
|
||||
* (filename, funcname, name) - look for named local in file
|
||||
|
||||
A name is always required. If the filename is None, "", or
|
||||
"UNKNOWN" then all files will be searched. If the funcname is
|
||||
"" or "UNKNOWN" then only local variables will be searched for.
|
||||
"""
|
||||
resolve = _get_resolver(known, filenames,
|
||||
handle_id=handle_id,
|
||||
perfilecache=perfilecache,
|
||||
preprocessed=preprocessed,
|
||||
)
|
||||
return resolve(symbol)
|
||||
|
||||
|
||||
def _get_platform_tool():
|
||||
if os.name == 'nt':
|
||||
# XXX Support this.
|
||||
raise NotImplementedError
|
||||
elif nm := shutil.which('nm'):
|
||||
return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def symbols(binfile, *,
|
||||
handle_id=None,
|
||||
_file_exists=os.path.exists,
|
||||
_get_platform_tool=_get_platform_tool,
|
||||
):
|
||||
"""Yield a Symbol for each one found in the binary."""
|
||||
if not _file_exists(binfile):
|
||||
raise Exception('executable missing (need to build it first?)')
|
||||
|
||||
_iter_symbols = _get_platform_tool()
|
||||
yield from _iter_symbols(binfile, handle_id)
|
||||
|
||||
|
||||
def variables(binfile, *,
|
||||
resolve,
|
||||
handle_id=None,
|
||||
_iter_symbols=symbols,
|
||||
):
|
||||
"""Yield (Variable, Symbol) for each found symbol."""
|
||||
for symbol in _iter_symbols(binfile, handle_id=handle_id):
|
||||
if symbol.kind != Symbol.KIND.VARIABLE:
|
||||
continue
|
||||
var = resolve(symbol) or None
|
||||
yield var, symbol
|
||||
@ -0,0 +1,51 @@
|
||||
from collections import namedtuple
|
||||
|
||||
from c_analyzer.common.info import ID
|
||||
from c_analyzer.common.util import classonly, _NTBase
|
||||
|
||||
|
||||
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
|
||||
"""Info for a single compilation symbol."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
class KIND:
|
||||
VARIABLE = 'variable'
|
||||
FUNCTION = 'function'
|
||||
OTHER = 'other'
|
||||
|
||||
@classonly
|
||||
def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None):
|
||||
"""Return a new symbol based on the given name."""
|
||||
id = ID(filename, None, name)
|
||||
return cls(id, kind, external)
|
||||
|
||||
def __new__(cls, id, kind=KIND.VARIABLE, external=None):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
id=ID.from_raw(id),
|
||||
kind=str(kind) if kind else None,
|
||||
external=bool(external) if external is not None else None,
|
||||
)
|
||||
return self
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.id)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.id, name)
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
if not self.id:
|
||||
raise TypeError('missing id')
|
||||
else:
|
||||
self.id.validate()
|
||||
|
||||
if not self.kind:
|
||||
raise TypeError('missing kind')
|
||||
elif self.kind not in vars(self.KIND).values():
|
||||
raise ValueError(f'unsupported kind {self.kind}')
|
||||
|
||||
if self.external is None:
|
||||
raise TypeError('missing external')
|
||||
@ -0,0 +1,75 @@
|
||||
from ..common import files
|
||||
from ..common.info import UNKNOWN
|
||||
from ..parser import (
|
||||
find as p_find,
|
||||
)
|
||||
from ..symbols import (
|
||||
info as s_info,
|
||||
find as s_find,
|
||||
)
|
||||
from .info import Variable
|
||||
|
||||
# XXX need tests:
|
||||
# * vars_from_source
|
||||
|
||||
|
||||
def _remove_cached(cache, var):
|
||||
if not cache:
|
||||
return
|
||||
try:
|
||||
cached = cache[var.filename]
|
||||
cached.remove(var)
|
||||
except (KeyError, IndexError):
|
||||
pass
|
||||
|
||||
|
||||
def vars_from_binary(binfile, *,
|
||||
known=None,
|
||||
filenames=None,
|
||||
handle_id=None,
|
||||
check_filename=None,
|
||||
handle_var=Variable.from_id,
|
||||
_iter_vars=s_find.variables,
|
||||
_get_symbol_resolver=s_find.get_resolver,
|
||||
):
|
||||
"""Yield a Variable for each found Symbol.
|
||||
|
||||
Details are filled in from the given "known" variables and types.
|
||||
"""
|
||||
cache = {}
|
||||
resolve = _get_symbol_resolver(filenames, known,
|
||||
handle_var=handle_var,
|
||||
check_filename=check_filename,
|
||||
perfilecache=cache,
|
||||
)
|
||||
for var, symbol in _iter_vars(binfile,
|
||||
resolve=resolve,
|
||||
handle_id=handle_id,
|
||||
):
|
||||
if var is None:
|
||||
var = Variable(symbol.id, UNKNOWN, UNKNOWN)
|
||||
yield var
|
||||
_remove_cached(cache, var)
|
||||
|
||||
|
||||
def vars_from_source(filenames, *,
|
||||
preprocessed=None,
|
||||
known=None,
|
||||
handle_id=None,
|
||||
handle_var=Variable.from_id,
|
||||
iter_vars=p_find.variables,
|
||||
):
|
||||
"""Yield a Variable for each declaration in the raw source code.
|
||||
|
||||
Details are filled in from the given "known" variables and types.
|
||||
"""
|
||||
cache = {}
|
||||
for varid, decl in iter_vars(filenames or (),
|
||||
perfilecache=cache,
|
||||
preprocessed=preprocessed,
|
||||
known=known,
|
||||
handle_id=handle_id,
|
||||
):
|
||||
var = handle_var(varid, decl)
|
||||
yield var
|
||||
_remove_cached(cache, var)
|
||||
@ -0,0 +1,93 @@
|
||||
from collections import namedtuple
|
||||
|
||||
from ..common.info import ID, UNKNOWN
|
||||
from ..common.util import classonly, _NTBase
|
||||
|
||||
|
||||
def normalize_vartype(vartype):
|
||||
"""Return the canonical form for a variable type (or func signature)."""
|
||||
# We allow empty strring through for semantic reasons.
|
||||
if vartype is None:
|
||||
return None
|
||||
|
||||
# XXX finish!
|
||||
# XXX Return (modifiers, type, pointer)?
|
||||
return str(vartype)
|
||||
|
||||
|
||||
# XXX Variable.vartype -> decl (Declaration).
|
||||
|
||||
class Variable(_NTBase,
|
||||
namedtuple('Variable', 'id storage vartype')):
|
||||
"""Information about a single variable declaration."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
STORAGE = (
|
||||
'static',
|
||||
'extern',
|
||||
'implicit',
|
||||
'local',
|
||||
)
|
||||
|
||||
@classonly
|
||||
def from_parts(cls, filename, funcname, name, decl, storage=None):
|
||||
varid = ID(filename, funcname, name)
|
||||
if storage is None:
|
||||
self = cls.from_id(varid, decl)
|
||||
else:
|
||||
self = cls(varid, storage, decl)
|
||||
return self
|
||||
|
||||
@classonly
|
||||
def from_id(cls, varid, decl):
|
||||
from ..parser.declarations import extract_storage
|
||||
storage = extract_storage(decl, infunc=varid.funcname)
|
||||
return cls(varid, storage, decl)
|
||||
|
||||
def __new__(cls, id, storage, vartype):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
id=ID.from_raw(id),
|
||||
storage=str(storage) if storage else None,
|
||||
vartype=normalize_vartype(vartype) if vartype else None,
|
||||
)
|
||||
return self
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.id)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.id, name)
|
||||
|
||||
def _validate_id(self):
|
||||
if not self.id:
|
||||
raise TypeError('missing id')
|
||||
|
||||
if not self.filename or self.filename == UNKNOWN:
|
||||
raise TypeError(f'id missing filename ({self.id})')
|
||||
|
||||
if self.funcname and self.funcname == UNKNOWN:
|
||||
raise TypeError(f'id missing funcname ({self.id})')
|
||||
|
||||
self.id.validate()
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
self._validate_id()
|
||||
|
||||
if self.storage is None or self.storage == UNKNOWN:
|
||||
raise TypeError('missing storage')
|
||||
elif self.storage not in self.STORAGE:
|
||||
raise ValueError(f'unsupported storage {self.storage:r}')
|
||||
|
||||
if self.vartype is None or self.vartype == UNKNOWN:
|
||||
raise TypeError('missing vartype')
|
||||
|
||||
@property
|
||||
def isglobal(self):
|
||||
return self.storage != 'local'
|
||||
|
||||
@property
|
||||
def isconst(self):
|
||||
return 'const' in self.vartype.split()
|
||||
@ -0,0 +1,91 @@
|
||||
import csv
|
||||
|
||||
from ..common.info import ID, UNKNOWN
|
||||
from ..common.util import read_tsv
|
||||
from .info import Variable
|
||||
|
||||
|
||||
# XXX need tests:
|
||||
# * read_file()
|
||||
# * look_up_variable()
|
||||
|
||||
|
||||
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
|
||||
HEADER = '\t'.join(COLUMNS)
|
||||
|
||||
|
||||
def read_file(infile, *,
|
||||
_read_tsv=read_tsv,
|
||||
):
|
||||
"""Yield (kind, id, decl) for each row in the data file.
|
||||
|
||||
The caller is responsible for validating each row.
|
||||
"""
|
||||
for row in _read_tsv(infile, HEADER):
|
||||
filename, funcname, name, kind, declaration = row
|
||||
if not funcname or funcname == '-':
|
||||
funcname = None
|
||||
id = ID(filename, funcname, name)
|
||||
yield kind, id, declaration
|
||||
|
||||
|
||||
def from_file(infile, *,
|
||||
handle_var=Variable.from_id,
|
||||
_read_file=read_file,
|
||||
):
|
||||
"""Return the info for known declarations in the given file."""
|
||||
known = {
|
||||
'variables': {},
|
||||
#'types': {},
|
||||
#'constants': {},
|
||||
#'macros': {},
|
||||
}
|
||||
for kind, id, decl in _read_file(infile):
|
||||
if kind == 'variable':
|
||||
values = known['variables']
|
||||
value = handle_var(id, decl)
|
||||
else:
|
||||
raise ValueError(f'unsupported kind in row {row}')
|
||||
value.validate()
|
||||
values[id] = value
|
||||
return known
|
||||
|
||||
|
||||
def look_up_variable(varid, knownvars, *,
|
||||
match_files=(lambda f1, f2: f1 == f2),
|
||||
):
|
||||
"""Return the known Variable matching the given ID.
|
||||
|
||||
"knownvars" is a mapping of ID to Variable.
|
||||
|
||||
"match_files" is used to verify if two filenames point to
|
||||
the same file.
|
||||
|
||||
If no match is found then None is returned.
|
||||
"""
|
||||
if not knownvars:
|
||||
return None
|
||||
|
||||
if varid.funcname == UNKNOWN:
|
||||
if not varid.filename or varid.filename == UNKNOWN:
|
||||
for varid in knownvars:
|
||||
if not varid.funcname:
|
||||
continue
|
||||
if varid.name == varid.name:
|
||||
return knownvars[varid]
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
for varid in knownvars:
|
||||
if not varid.funcname:
|
||||
continue
|
||||
if not match_files(varid.filename, varid.filename):
|
||||
continue
|
||||
if varid.name == varid.name:
|
||||
return knownvars[varid]
|
||||
else:
|
||||
return None
|
||||
elif not varid.filename or varid.filename == UNKNOWN:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return knownvars.get(varid.id)
|
||||
Reference in New Issue
Block a user