first commit

This commit is contained in:
2025-08-07 13:15:31 +01:00
commit d903893b4c
21854 changed files with 4461308 additions and 0 deletions

View File

@ -0,0 +1,329 @@
# The code here consists of hacks for pre-populating the known.tsv file.
from c_analyzer.parser.preprocessor import _iter_clean_lines
from c_analyzer.parser.naive import (
iter_variables, parse_variable_declaration, find_variables,
)
from c_analyzer.common.known import HEADER as KNOWN_HEADER
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import Variable
from c_analyzer.util import write_tsv
from . import SOURCE_DIRS, REPO_ROOT
from .known import DATA_FILE as KNOWN_FILE
from .files import iter_cpython_files
POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
POTS += tuple('const ' + v for v in POTS)
STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
def _parse_global(line, funcname=None):
line = line.strip()
if line.startswith('static '):
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
name, decl = parse_variable_declaration(line)
elif line.startswith('_Py_static_string('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(',')[0].strip()
elif line.startswith('_Py_IDENTIFIER('):
decl = line.strip(';').strip()
name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
elif funcname:
return None, None
# global-only
elif line.startswith('PyAPI_DATA('): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('extern '): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('PyDoc_VAR('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(')')[0].strip()
elif line.startswith(POTS): # implied static
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith('struct '):
if not line.endswith(' = {'):
return None, None
if not line.partition(' ')[2].startswith(STRUCTS):
return None, None
# implied static
name, decl = parse_variable_declaration(line)
# file-specific
elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
# Objects/typeobject.c
funcname = line.split('(')[1].split(',')[0]
return [
('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
]
elif line.startswith('WRAP_METHOD('):
# Objects/weakrefobject.c
funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
return [
('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
]
else:
return None, None
return name, decl
def _pop_cached(varcache, filename, funcname, name, *,
_iter_variables=iter_variables,
):
# Look for the file.
try:
cached = varcache[filename]
except KeyError:
cached = varcache[filename] = {}
for variable in _iter_variables(filename,
parse_variable=_parse_global,
):
variable._isglobal = True
cached[variable.id] = variable
for var in cached:
print(' ', var)
# Look for the variable.
if funcname == UNKNOWN:
for varid in cached:
if varid.name == name:
break
else:
return None
return cached.pop(varid)
else:
return cached.pop((filename, funcname, name), None)
def find_matching_variable(varid, varcache, allfilenames, *,
_pop_cached=_pop_cached,
):
if varid.filename and varid.filename != UNKNOWN:
filenames = [varid.filename]
else:
filenames = allfilenames
for filename in filenames:
variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
if variable is not None:
return variable
else:
if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
for filename in allfilenames:
if not filename.endswith('.h'):
continue
variable = _pop_cached(varcache, filename, None, varid.name)
if variable is not None:
return variable
return None
MULTILINE = {
# Python/Python-ast.c
'Load_singleton': 'PyObject *',
'Store_singleton': 'PyObject *',
'Del_singleton': 'PyObject *',
'AugLoad_singleton': 'PyObject *',
'AugStore_singleton': 'PyObject *',
'Param_singleton': 'PyObject *',
'And_singleton': 'PyObject *',
'Or_singleton': 'PyObject *',
'Add_singleton': 'static PyObject *',
'Sub_singleton': 'static PyObject *',
'Mult_singleton': 'static PyObject *',
'MatMult_singleton': 'static PyObject *',
'Div_singleton': 'static PyObject *',
'Mod_singleton': 'static PyObject *',
'Pow_singleton': 'static PyObject *',
'LShift_singleton': 'static PyObject *',
'RShift_singleton': 'static PyObject *',
'BitOr_singleton': 'static PyObject *',
'BitXor_singleton': 'static PyObject *',
'BitAnd_singleton': 'static PyObject *',
'FloorDiv_singleton': 'static PyObject *',
'Invert_singleton': 'static PyObject *',
'Not_singleton': 'static PyObject *',
'UAdd_singleton': 'static PyObject *',
'USub_singleton': 'static PyObject *',
'Eq_singleton': 'static PyObject *',
'NotEq_singleton': 'static PyObject *',
'Lt_singleton': 'static PyObject *',
'LtE_singleton': 'static PyObject *',
'Gt_singleton': 'static PyObject *',
'GtE_singleton': 'static PyObject *',
'Is_singleton': 'static PyObject *',
'IsNot_singleton': 'static PyObject *',
'In_singleton': 'static PyObject *',
'NotIn_singleton': 'static PyObject *',
# Python/symtable.c
'top': 'static identifier ',
'lambda': 'static identifier ',
'genexpr': 'static identifier ',
'listcomp': 'static identifier ',
'setcomp': 'static identifier ',
'dictcomp': 'static identifier ',
'__class__': 'static identifier ',
# Python/compile.c
'__doc__': 'static PyObject *',
'__annotations__': 'static PyObject *',
# Objects/floatobject.c
'double_format': 'static float_format_type ',
'float_format': 'static float_format_type ',
'detected_double_format': 'static float_format_type ',
'detected_float_format': 'static float_format_type ',
# Parser/listnode.c
'level': 'static int ',
'atbol': 'static int ',
# Python/dtoa.c
'private_mem': 'static double private_mem[PRIVATE_mem]',
'pmem_next': 'static double *',
# Modules/_weakref.c
'weakref_functions': 'static PyMethodDef ',
}
INLINE = {
# Modules/_tracemalloc.c
'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
# Modules/faulthandler.c
'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
# Modules/signalmodule.c
'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
# Python/dynload_shlib.c
'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
# Objects/obmalloc.c
'_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
# Python/bootstrap_hash.c
'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
}
FUNC = {
# Objects/object.c
'_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
# Parser/myreadline.c
'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
# Python/pylifecycle.c
'_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
# Parser/myreadline.c
'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
}
IMPLIED = {
# Objects/boolobject.c
'_Py_FalseStruct': 'static struct _longobject ',
'_Py_TrueStruct': 'static struct _longobject ',
# Modules/config.c
'_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
}
GLOBALS = {}
GLOBALS.update(MULTILINE)
GLOBALS.update(INLINE)
GLOBALS.update(FUNC)
GLOBALS.update(IMPLIED)
LOCALS = {
'buildinfo': ('Modules/getbuildinfo.c',
'Py_GetBuildInfo',
'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
'methods': ('Python/codecs.c',
'_PyCodecRegistry_Init',
'static struct { char *name; PyMethodDef def; } methods[]'),
}
def _known(symbol):
if symbol.funcname:
if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
raise KeyError(symbol.name)
filename, funcname, decl = LOCALS[symbol.name]
varid = ID(filename, funcname, symbol.name)
elif not symbol.filename or symbol.filename == UNKNOWN:
raise KeyError(symbol.name)
else:
varid = symbol.id
try:
decl = GLOBALS[symbol.name]
except KeyError:
if symbol.name.endswith('_methods'):
decl = 'static PyMethodDef '
elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
decl = 'static PyTypeObject '
else:
raise
if symbol.name not in decl:
decl = decl + symbol.name
return Variable(varid, 'static', decl)
def known_row(varid, decl):
return (
varid.filename,
varid.funcname or '-',
varid.name,
'variable',
decl,
)
def known_rows(symbols, *,
cached=True,
_get_filenames=iter_cpython_files,
_find_match=find_matching_variable,
_find_symbols=find_variables,
_as_known=known_row,
):
filenames = list(_get_filenames())
cache = {}
if cached:
for symbol in symbols:
try:
found = _known(symbol)
except KeyError:
found = _find_match(symbol, cache, filenames)
if found is None:
found = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield _as_known(found.id, found.vartype)
else:
raise NotImplementedError # XXX incorporate KNOWN
for variable in _find_symbols(symbols, filenames,
srccache=cache,
parse_variable=_parse_global,
):
#variable = variable._replace(
# filename=os.path.relpath(variable.filename, REPO_ROOT))
if variable.funcname == UNKNOWN:
print(variable)
if variable.vartype== UNKNOWN:
print(variable)
yield _as_known(variable.id, variable.vartype)
def generate(symbols, filename=None, *,
_generate_rows=known_rows,
_write_tsv=write_tsv,
):
if not filename:
filename = KNOWN_FILE + '.new'
rows = _generate_rows(symbols)
_write_tsv(filename, KNOWN_HEADER, rows)
if __name__ == '__main__':
from c_symbols import binary
symbols = binary.iter_symbols(
binary.PYTHON,
find_local_symbol=None,
)
generate(symbols)