first commit

This commit is contained in:
2025-08-07 13:15:31 +01:00
commit d903893b4c
21854 changed files with 4461308 additions and 0 deletions

View File

@ -0,0 +1,72 @@
#######################################
# C Globals and CPython Runtime State.
CPython's C code makes extensive use of global variables (whether static
globals or static locals). Each such variable falls into one of several
categories:
* strictly const data
* used exclusively in main or in the REPL
* process-global state (e.g. managing process-level resources
like signals and file descriptors)
* Python "global" runtime state
* per-interpreter runtime state
The last one can be a problem as soon as anyone creates a second
interpreter (AKA "subinterpreter") in a process. It is definitely a
problem under subinterpreters if they are no longer sharing the GIL,
since the GIL protects us from a lot of race conditions. Keep in mind
that ultimately *all* objects (PyObject) should be treated as
per-interpreter state. This includes "static types", freelists,
_PyIdentifier, and singletons. Take that in for a second. It has
significant implications on where we use static variables!
Be aware that module-global state (stored in C statics) is a kind of
per-interpreter state. There have been efforts across many years, and
still going, to provide extension module authors mechanisms to store
that state safely (see PEPs 3121, 489, etc.).
(Note that there has been discussion around support for running multiple
Python runtimes in the same process. That would ends up with the same
problems, relative to static variables, that subinterpreters have.)
Historically we have been bad at keeping per-interpreter state out of
static variables, mostly because until recently subinterpreters were
not widely used nor even factored in to solutions. However, the
feature is growing in popularity and use in the community.
Mandate: "Eliminate use of static variables for per-interpreter state."
The "c-statics.py" script in this directory, along with its accompanying
data files, are part of the effort to resolve existing problems with
our use of static variables and to prevent future problems.
#-------------------------
## statics for actually-global state (and runtime state consolidation)
In general, holding any kind of state in static variables
increases maintenance burden and increases the complexity of code (e.g.
we use TSS to identify the active thread state). So it is a good idea
to avoid using statics for state even if for the "global" runtime or
for process-global state.
Relative to maintenance burden, one problem is where the runtime
state is spread throughout the codebase in dozens of individual
globals. Unlike the other globals, the runtime state represents a set
of values that are constantly shifting in a complex way. When they are
spread out it's harder to get a clear picture of what the runtime
involves. Furthermore, when they are spread out it complicates efforts
that change the runtime.
Consequently, the globals for Python's runtime state have been
consolidated under a single top-level _PyRuntime global. No new globals
should be added for runtime state. Instead, they should be added to
_PyRuntimeState or one of its sub-structs. The tools in this directory
are run as part of the test suite to ensure that no new globals have
been added. The script can be run manually as well:
./python Lib/test/test_c_statics/c-statics.py check
If it reports any globals then they should be resolved. If the globals
are runtime state then they should be folded into _PyRuntimeState.
Otherwise they should be marked as ignored.

View File

@ -0,0 +1,29 @@
import os.path
import sys
TOOL_ROOT = os.path.abspath(
os.path.dirname( # c-analyzer/
os.path.dirname(__file__))) # cpython/
DATA_DIR = TOOL_ROOT
REPO_ROOT = (
os.path.dirname( # ..
os.path.dirname(TOOL_ROOT))) # Tools/
INCLUDE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Include',
]]
SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Python',
'Parser',
'Objects',
'Modules',
]]
#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable
# Clean up the namespace.
del sys
del os

View File

@ -0,0 +1,212 @@
import argparse
import re
import sys
from c_analyzer.common import show
from c_analyzer.common.info import UNKNOWN
from . import SOURCE_DIRS
from .find import supported_vars
from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from .supported import IGNORED_FILE
def _check_results(unknown, knownvars, used):
def _match_unused_global(variable):
found = []
for varid in knownvars:
if varid in used:
continue
if varid.funcname is not None:
continue
if varid.name != variable.name:
continue
if variable.filename and variable.filename != UNKNOWN:
if variable.filename == varid.filename:
found.append(varid)
else:
found.append(varid)
return found
badknown = set()
for variable in sorted(unknown):
msg = None
if variable.funcname != UNKNOWN:
msg = f'could not find global symbol {variable.id}'
elif m := _match_unused_global(variable):
assert isinstance(m, list)
badknown.update(m)
elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are.
unknown.remove(variable)
else:
msg = f'could not find local symbol {variable.id}'
if msg:
#raise Exception(msg)
print(msg)
if badknown:
print('---')
print(f'{len(badknown)} globals in known.tsv, but may actually be local:')
for varid in sorted(badknown):
print(f'{varid.filename:30} {varid.name}')
unused = sorted(varid
for varid in set(knownvars) - used
if varid.name != 'id') # XXX Figure out where these variables are.
if unused:
print('---')
print(f'did not use {len(unused)} known vars:')
for varid in unused:
print(f'{varid.filename:30} {varid.funcname or "-":20} {varid.name}')
raise Exception('not all known symbols used')
if unknown:
print('---')
raise Exception('could not find all symbols')
# XXX Move this check to its own command.
def cmd_check_cache(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
_known_from_file=known_from_file,
_find=supported_vars,
):
known = _known_from_file(known)
used = set()
unknown = set()
for var, supported in _find(known=known, ignored=ignored):
if supported is None:
unknown.add(var)
continue
used.add(var.id)
_check_results(unknown, known['variables'], used)
def cmd_check(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
_find=supported_vars,
_show=show.basic,
_print=print,
):
"""
Fail if there are unsupported globals variables.
In the failure case, the list of unsupported variables
will be printed out.
"""
unsupported = []
for var, supported in _find(known=known, ignored=ignored):
if not supported:
unsupported.append(var)
if not unsupported:
#_print('okay')
return
_print('ERROR: found unsupported global variables')
_print()
_show(sorted(unsupported))
_print(f' ({len(unsupported)} total)')
sys.exit(1)
def cmd_show(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False,
_find=supported_vars,
_show=show.basic,
_print=print,
):
"""
Print out the list of found global variables.
The variables will be distinguished as "supported" or "unsupported".
"""
allsupported = []
allunsupported = []
for found, supported in _find(known=known,
ignored=ignored,
skip_objects=skip_objects,
):
if supported is None:
continue
(allsupported if supported else allunsupported
).append(found)
_print('supported:')
_print('----------')
_show(sorted(allsupported))
_print(f' ({len(allsupported)} total)')
_print()
_print('unsupported:')
_print('------------')
_show(sorted(allunsupported))
_print(f' ({len(allunsupported)} total)')
#############################
# the script
COMMANDS = {
'check': cmd_check,
'show': cmd_show,
}
PROG = sys.argv[0]
PROG = 'c-globals.py'
def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
common = argparse.ArgumentParser(add_help=False)
common.add_argument('--ignored', metavar='FILE',
default=IGNORED_FILE,
help='path to file that lists ignored vars')
common.add_argument('--known', metavar='FILE',
default=KNOWN_FILE,
help='path to file that lists known types')
#common.add_argument('dirs', metavar='DIR', nargs='*',
# default=SOURCE_DIRS,
# help='a directory to check')
parser = argparse.ArgumentParser(
prog=prog,
)
subs = parser.add_subparsers(dest='cmd')
check = subs.add_parser('check', parents=[common])
show = subs.add_parser('show', parents=[common])
show.add_argument('--skip-objects', action='store_true')
if _fail is None:
def _fail(msg):
parser.error(msg)
# Now parse the args.
args = parser.parse_args(argv)
ns = vars(args)
cmd = ns.pop('cmd')
if not cmd:
_fail('missing command')
return cmd, ns
def main(cmd, cmdkwargs=None, *, _COMMANDS=COMMANDS):
try:
cmdfunc = _COMMANDS[cmd]
except KeyError:
raise ValueError(
f'unsupported cmd {cmd!r}' if cmd else 'missing cmd')
cmdfunc(cmd, **cmdkwargs or {})
if __name__ == '__main__':
cmd, cmdkwargs = parse_args()
main(cmd, cmdkwargs)

View File

@ -0,0 +1,329 @@
# The code here consists of hacks for pre-populating the known.tsv file.
from c_analyzer.parser.preprocessor import _iter_clean_lines
from c_analyzer.parser.naive import (
iter_variables, parse_variable_declaration, find_variables,
)
from c_analyzer.common.known import HEADER as KNOWN_HEADER
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import Variable
from c_analyzer.util import write_tsv
from . import SOURCE_DIRS, REPO_ROOT
from .known import DATA_FILE as KNOWN_FILE
from .files import iter_cpython_files
POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
POTS += tuple('const ' + v for v in POTS)
STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
def _parse_global(line, funcname=None):
line = line.strip()
if line.startswith('static '):
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
name, decl = parse_variable_declaration(line)
elif line.startswith('_Py_static_string('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(',')[0].strip()
elif line.startswith('_Py_IDENTIFIER('):
decl = line.strip(';').strip()
name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
elif funcname:
return None, None
# global-only
elif line.startswith('PyAPI_DATA('): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('extern '): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('PyDoc_VAR('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(')')[0].strip()
elif line.startswith(POTS): # implied static
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith('struct '):
if not line.endswith(' = {'):
return None, None
if not line.partition(' ')[2].startswith(STRUCTS):
return None, None
# implied static
name, decl = parse_variable_declaration(line)
# file-specific
elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
# Objects/typeobject.c
funcname = line.split('(')[1].split(',')[0]
return [
('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
]
elif line.startswith('WRAP_METHOD('):
# Objects/weakrefobject.c
funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
return [
('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
]
else:
return None, None
return name, decl
def _pop_cached(varcache, filename, funcname, name, *,
_iter_variables=iter_variables,
):
# Look for the file.
try:
cached = varcache[filename]
except KeyError:
cached = varcache[filename] = {}
for variable in _iter_variables(filename,
parse_variable=_parse_global,
):
variable._isglobal = True
cached[variable.id] = variable
for var in cached:
print(' ', var)
# Look for the variable.
if funcname == UNKNOWN:
for varid in cached:
if varid.name == name:
break
else:
return None
return cached.pop(varid)
else:
return cached.pop((filename, funcname, name), None)
def find_matching_variable(varid, varcache, allfilenames, *,
_pop_cached=_pop_cached,
):
if varid.filename and varid.filename != UNKNOWN:
filenames = [varid.filename]
else:
filenames = allfilenames
for filename in filenames:
variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
if variable is not None:
return variable
else:
if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
for filename in allfilenames:
if not filename.endswith('.h'):
continue
variable = _pop_cached(varcache, filename, None, varid.name)
if variable is not None:
return variable
return None
MULTILINE = {
# Python/Python-ast.c
'Load_singleton': 'PyObject *',
'Store_singleton': 'PyObject *',
'Del_singleton': 'PyObject *',
'AugLoad_singleton': 'PyObject *',
'AugStore_singleton': 'PyObject *',
'Param_singleton': 'PyObject *',
'And_singleton': 'PyObject *',
'Or_singleton': 'PyObject *',
'Add_singleton': 'static PyObject *',
'Sub_singleton': 'static PyObject *',
'Mult_singleton': 'static PyObject *',
'MatMult_singleton': 'static PyObject *',
'Div_singleton': 'static PyObject *',
'Mod_singleton': 'static PyObject *',
'Pow_singleton': 'static PyObject *',
'LShift_singleton': 'static PyObject *',
'RShift_singleton': 'static PyObject *',
'BitOr_singleton': 'static PyObject *',
'BitXor_singleton': 'static PyObject *',
'BitAnd_singleton': 'static PyObject *',
'FloorDiv_singleton': 'static PyObject *',
'Invert_singleton': 'static PyObject *',
'Not_singleton': 'static PyObject *',
'UAdd_singleton': 'static PyObject *',
'USub_singleton': 'static PyObject *',
'Eq_singleton': 'static PyObject *',
'NotEq_singleton': 'static PyObject *',
'Lt_singleton': 'static PyObject *',
'LtE_singleton': 'static PyObject *',
'Gt_singleton': 'static PyObject *',
'GtE_singleton': 'static PyObject *',
'Is_singleton': 'static PyObject *',
'IsNot_singleton': 'static PyObject *',
'In_singleton': 'static PyObject *',
'NotIn_singleton': 'static PyObject *',
# Python/symtable.c
'top': 'static identifier ',
'lambda': 'static identifier ',
'genexpr': 'static identifier ',
'listcomp': 'static identifier ',
'setcomp': 'static identifier ',
'dictcomp': 'static identifier ',
'__class__': 'static identifier ',
# Python/compile.c
'__doc__': 'static PyObject *',
'__annotations__': 'static PyObject *',
# Objects/floatobject.c
'double_format': 'static float_format_type ',
'float_format': 'static float_format_type ',
'detected_double_format': 'static float_format_type ',
'detected_float_format': 'static float_format_type ',
# Parser/listnode.c
'level': 'static int ',
'atbol': 'static int ',
# Python/dtoa.c
'private_mem': 'static double private_mem[PRIVATE_mem]',
'pmem_next': 'static double *',
# Modules/_weakref.c
'weakref_functions': 'static PyMethodDef ',
}
INLINE = {
# Modules/_tracemalloc.c
'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
# Modules/faulthandler.c
'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
# Modules/signalmodule.c
'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
# Python/dynload_shlib.c
'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
# Objects/obmalloc.c
'_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
# Python/bootstrap_hash.c
'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
}
FUNC = {
# Objects/object.c
'_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
# Parser/myreadline.c
'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
# Python/pylifecycle.c
'_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
# Parser/myreadline.c
'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
}
IMPLIED = {
# Objects/boolobject.c
'_Py_FalseStruct': 'static struct _longobject ',
'_Py_TrueStruct': 'static struct _longobject ',
# Modules/config.c
'_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
}
GLOBALS = {}
GLOBALS.update(MULTILINE)
GLOBALS.update(INLINE)
GLOBALS.update(FUNC)
GLOBALS.update(IMPLIED)
LOCALS = {
'buildinfo': ('Modules/getbuildinfo.c',
'Py_GetBuildInfo',
'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
'methods': ('Python/codecs.c',
'_PyCodecRegistry_Init',
'static struct { char *name; PyMethodDef def; } methods[]'),
}
def _known(symbol):
if symbol.funcname:
if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
raise KeyError(symbol.name)
filename, funcname, decl = LOCALS[symbol.name]
varid = ID(filename, funcname, symbol.name)
elif not symbol.filename or symbol.filename == UNKNOWN:
raise KeyError(symbol.name)
else:
varid = symbol.id
try:
decl = GLOBALS[symbol.name]
except KeyError:
if symbol.name.endswith('_methods'):
decl = 'static PyMethodDef '
elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
decl = 'static PyTypeObject '
else:
raise
if symbol.name not in decl:
decl = decl + symbol.name
return Variable(varid, 'static', decl)
def known_row(varid, decl):
return (
varid.filename,
varid.funcname or '-',
varid.name,
'variable',
decl,
)
def known_rows(symbols, *,
cached=True,
_get_filenames=iter_cpython_files,
_find_match=find_matching_variable,
_find_symbols=find_variables,
_as_known=known_row,
):
filenames = list(_get_filenames())
cache = {}
if cached:
for symbol in symbols:
try:
found = _known(symbol)
except KeyError:
found = _find_match(symbol, cache, filenames)
if found is None:
found = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield _as_known(found.id, found.vartype)
else:
raise NotImplementedError # XXX incorporate KNOWN
for variable in _find_symbols(symbols, filenames,
srccache=cache,
parse_variable=_parse_global,
):
#variable = variable._replace(
# filename=os.path.relpath(variable.filename, REPO_ROOT))
if variable.funcname == UNKNOWN:
print(variable)
if variable.vartype== UNKNOWN:
print(variable)
yield _as_known(variable.id, variable.vartype)
def generate(symbols, filename=None, *,
_generate_rows=known_rows,
_write_tsv=write_tsv,
):
if not filename:
filename = KNOWN_FILE + '.new'
rows = _generate_rows(symbols)
_write_tsv(filename, KNOWN_HEADER, rows)
if __name__ == '__main__':
from c_symbols import binary
symbols = binary.iter_symbols(
binary.PYTHON,
find_local_symbol=None,
)
generate(symbols)

View File

@ -0,0 +1,29 @@
from c_analyzer.common.files import (
C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix,
)
from . import SOURCE_DIRS, REPO_ROOT
# XXX need tests:
# * iter_files()
def iter_files(*,
walk=walk_tree,
_files=iter_files_by_suffix,
):
"""Yield each file in the tree for each of the given directory names."""
excludedtrees = [
os.path.join('Include', 'cpython', ''),
]
def is_excluded(filename):
for root in excludedtrees:
if filename.startswith(root):
return True
return False
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
walk=walk,
):
if is_excluded(filename):
continue
yield filename

View File

@ -0,0 +1,101 @@
import os.path
from c_analyzer.common import files
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import find as _common
from . import SOURCE_DIRS, PYTHON, REPO_ROOT
from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from .supported import (
ignored_from_file, IGNORED_FILE, is_supported, _is_object,
)
# XXX need tests:
# * vars_from_binary()
# * vars_from_source()
# * supported_vars()
def _handle_id(filename, funcname, name, *,
_relpath=os.path.relpath,
):
filename = _relpath(filename, REPO_ROOT)
return ID(filename, funcname, name)
def vars_from_binary(*,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_binary,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
# XXX For now we only use known variables (no source lookup).
filenames = None
yield from _iter_vars(PYTHON,
known=known,
filenames=filenames,
handle_id=_handle_id,
check_filename=(lambda n: True),
)
def vars_from_source(*,
preprocessed=None,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_source,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
yield from _iter_vars(filenames,
preprocessed=preprocessed,
known=known,
handle_id=_handle_id,
)
def supported_vars(*,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False,
_known_from_file=known_from_file,
_ignored_from_file=ignored_from_file,
_iter_vars=vars_from_binary,
_is_supported=is_supported,
):
"""Yield (var, is supported) for each found variable."""
if isinstance(known, str):
known = _known_from_file(known)
if isinstance(ignored, str):
ignored = _ignored_from_file(ignored)
for var in _iter_vars(known=known):
if not var.isglobal:
continue
elif var.vartype == UNKNOWN:
yield var, None
# XXX Support proper filters instead.
elif skip_objects and _is_object(found.vartype):
continue
else:
yield var, _is_supported(var, ignored, known)

View File

@ -0,0 +1,66 @@
import csv
import os.path
from c_analyzer.parser.declarations import extract_storage
from c_analyzer.variables import known as _common
from c_analyzer.variables.info import Variable
from . import DATA_DIR
# XXX need tests:
# * from_file()
# * look_up_variable()
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
def _get_storage(decl, infunc):
# statics
if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return 'static'
if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
return 'static'
if decl.startswith('PyDoc_VAR('):
return 'static'
if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
return 'static'
if decl.startswith('WRAP_METHOD('):
return 'static'
# public extern
if decl.startswith('PyAPI_DATA('):
return 'extern'
# Fall back to the normal handler.
return extract_storage(decl, infunc=infunc)
def _handle_var(varid, decl):
# if varid.name == 'id' and decl == UNKNOWN:
# # None of these are variables.
# decl = 'int id';
storage = _get_storage(decl, varid.funcname)
return Variable(varid, storage, decl)
def from_file(infile=DATA_FILE, *,
_from_file=_common.from_file,
_handle_var=_handle_var,
):
"""Return the info for known declarations in the given file."""
return _from_file(infile, handle_var=_handle_var)
def look_up_variable(varid, knownvars, *,
_lookup=_common.look_up_variable,
):
"""Return the known variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
return _lookup(varid, knownvars)

View File

@ -0,0 +1,398 @@
import os.path
import re
from c_analyzer.common.info import ID
from c_analyzer.common.util import read_tsv, write_tsv
from . import DATA_DIR
# XXX need tests:
# * generate / script
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
# XXX Move these to ignored.tsv.
IGNORED = {
# global
'PyImport_FrozenModules': 'process-global',
'M___hello__': 'process-global',
'inittab_copy': 'process-global',
'PyHash_Func': 'process-global',
'_Py_HashSecret_Initialized': 'process-global',
'_TARGET_LOCALES': 'process-global',
# startup (only changed before/during)
'_PyRuntime': 'runtime startup',
'runtime_initialized': 'runtime startup',
'static_arg_parsers': 'runtime startup',
'orig_argv': 'runtime startup',
'opt_ptr': 'runtime startup',
'_preinit_warnoptions': 'runtime startup',
'_Py_StandardStreamEncoding': 'runtime startup',
'Py_FileSystemDefaultEncoding': 'runtime startup',
'_Py_StandardStreamErrors': 'runtime startup',
'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
'Py_BytesWarningFlag': 'runtime startup',
'Py_DebugFlag': 'runtime startup',
'Py_DontWriteBytecodeFlag': 'runtime startup',
'Py_FrozenFlag': 'runtime startup',
'Py_HashRandomizationFlag': 'runtime startup',
'Py_IgnoreEnvironmentFlag': 'runtime startup',
'Py_InspectFlag': 'runtime startup',
'Py_InteractiveFlag': 'runtime startup',
'Py_IsolatedFlag': 'runtime startup',
'Py_NoSiteFlag': 'runtime startup',
'Py_NoUserSiteDirectory': 'runtime startup',
'Py_OptimizeFlag': 'runtime startup',
'Py_QuietFlag': 'runtime startup',
'Py_UTF8Mode': 'runtime startup',
'Py_UnbufferedStdioFlag': 'runtime startup',
'Py_VerboseFlag': 'runtime startup',
'_Py_path_config': 'runtime startup',
'_PyOS_optarg': 'runtime startup',
'_PyOS_opterr': 'runtime startup',
'_PyOS_optind': 'runtime startup',
'_Py_HashSecret': 'runtime startup',
# REPL
'_PyOS_ReadlineLock': 'repl',
'_PyOS_ReadlineTState': 'repl',
# effectively const
'tracemalloc_empty_traceback': 'const',
'_empty_bitmap_node': 'const',
'posix_constants_pathconf': 'const',
'posix_constants_confstr': 'const',
'posix_constants_sysconf': 'const',
'_PySys_ImplCacheTag': 'const',
'_PySys_ImplName': 'const',
'PyImport_Inittab': 'const',
'_PyImport_DynLoadFiletab': 'const',
'_PyParser_Grammar': 'const',
'Py_hexdigits': 'const',
'_PyImport_Inittab': 'const',
'_PyByteArray_empty_string': 'const',
'_PyLong_DigitValue': 'const',
'_Py_SwappedOp': 'const',
'PyStructSequence_UnnamedField': 'const',
# signals are main-thread only
'faulthandler_handlers': 'signals are main-thread only',
'user_signals': 'signals are main-thread only',
'wakeup': 'signals are main-thread only',
# hacks
'_PySet_Dummy': 'only used as a placeholder',
}
BENIGN = 'races here are benign and unlikely'
def is_supported(variable, ignored=None, known=None, *,
_ignored=(lambda *a, **k: _is_ignored(*a, **k)),
_vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
):
"""Return True if the given global variable is okay in CPython."""
if _ignored(variable,
ignored and ignored.get('variables')):
return True
elif _vartype_okay(variable.vartype,
ignored.get('types')):
return True
else:
return False
def _is_ignored(variable, ignoredvars=None, *,
_IGNORED=IGNORED,
):
"""Return the reason if the variable is a supported global.
Return None if the variable is not a supported global.
"""
if ignoredvars and (reason := ignoredvars.get(variable.id)):
return reason
if variable.funcname is None:
if reason := _IGNORED.get(variable.name):
return reason
# compiler
if variable.filename == 'Python/graminit.c':
if variable.vartype.startswith('static state '):
return 'compiler'
if variable.filename == 'Python/symtable.c':
if variable.vartype.startswith('static identifier '):
return 'compiler'
if variable.filename == 'Python/Python-ast.c':
# These should be const.
if variable.name.endswith('_field'):
return 'compiler'
if variable.name.endswith('_attribute'):
return 'compiler'
# other
if variable.filename == 'Python/dtoa.c':
# guarded by lock?
if variable.name in ('p5s', 'freelist'):
return 'dtoa is thread-safe?'
if variable.name in ('private_mem', 'pmem_next'):
return 'dtoa is thread-safe?'
if variable.filename == 'Python/thread.c':
# Threads do not become an issue until after these have been set
# and these never get changed after that.
if variable.name in ('initialized', 'thread_debug'):
return 'thread-safe'
if variable.filename == 'Python/getversion.c':
if variable.name == 'version':
# Races are benign here, as well as unlikely.
return BENIGN
if variable.filename == 'Python/fileutils.c':
if variable.name == 'force_ascii':
return BENIGN
if variable.name == 'ioctl_works':
return BENIGN
if variable.name == '_Py_open_cloexec_works':
return BENIGN
if variable.filename == 'Python/codecs.c':
if variable.name == 'ucnhash_CAPI':
return BENIGN
if variable.filename == 'Python/bootstrap_hash.c':
if variable.name == 'getrandom_works':
return BENIGN
if variable.filename == 'Objects/unicodeobject.c':
if variable.name == 'ucnhash_CAPI':
return BENIGN
if variable.name == 'bloom_linebreak':
# *mostly* benign
return BENIGN
if variable.filename == 'Modules/getbuildinfo.c':
if variable.name == 'buildinfo':
# The static is used for pre-allocation.
return BENIGN
if variable.filename == 'Modules/posixmodule.c':
if variable.name == 'ticks_per_second':
return BENIGN
if variable.name == 'dup3_works':
return BENIGN
if variable.filename == 'Modules/timemodule.c':
if variable.name == 'ticks_per_second':
return BENIGN
if variable.filename == 'Objects/longobject.c':
if variable.name == 'log_base_BASE':
return BENIGN
if variable.name == 'convwidth_base':
return BENIGN
if variable.name == 'convmultmax_base':
return BENIGN
return None
def _is_vartype_okay(vartype, ignoredtypes=None):
if _is_object(vartype):
return None
if vartype.startswith('static const '):
return 'const'
if vartype.startswith('const '):
return 'const'
# components for TypeObject definitions
for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
if name in vartype:
return 'const'
for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
'PyBufferProcs', 'PyAsyncMethods'):
if name in vartype:
return 'const'
for name in ('slotdef', 'newfunc'):
if name in vartype:
return 'const'
# structseq
for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
if name in vartype:
return 'const'
# other definiitions
if 'PyModuleDef' in vartype:
return 'const'
# thread-safe
if '_Py_atomic_int' in vartype:
return 'thread-safe'
if 'pthread_condattr_t' in vartype:
return 'thread-safe'
# startup
if '_Py_PreInitEntry' in vartype:
return 'startup'
# global
# if 'PyMemAllocatorEx' in vartype:
# return True
# others
# if 'PyThread_type_lock' in vartype:
# return True
# XXX ???
# _Py_tss_t
# _Py_hashtable_t
# stack_t
# _PyUnicode_Name_CAPI
# functions
if '(' in vartype and '[' not in vartype:
return 'function pointer'
# XXX finish!
# * allow const values?
#raise NotImplementedError
return None
PYOBJECT_RE = re.compile(r'''
^
(
# must start with "static "
static \s+
(
identifier
)
\b
) |
(
# may start with "static "
( static \s+ )?
(
.*
(
PyObject |
PyTypeObject |
_? Py \w+ Object |
_PyArg_Parser |
_Py_Identifier |
traceback_t |
PyAsyncGenASend |
_PyAsyncGenWrappedValue |
PyContext |
method_cache_entry
)
\b
) |
(
(
_Py_IDENTIFIER |
_Py_static_string
)
[(]
)
)
''', re.VERBOSE)
def _is_object(vartype):
if 'PyDictKeysObject' in vartype:
return False
if PYOBJECT_RE.match(vartype):
return True
if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
return True
# XXX Add more?
#for part in vartype.split():
# # XXX const is automatic True?
# if part == 'PyObject' or part.startswith('PyObject['):
# return True
return False
def ignored_from_file(infile, *,
_read_tsv=read_tsv,
):
"""Yield a Variable for each ignored var in the file."""
ignored = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for row in _read_tsv(infile, IGNORED_HEADER):
filename, funcname, name, kind, reason = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
if kind == 'variable':
values = ignored['variables']
else:
raise ValueError(f'unsupported kind in row {row}')
values[id] = reason
return ignored
##################################
# generate
def _get_row(varid, reason):
return (
varid.filename,
varid.funcname or '-',
varid.name,
'variable',
str(reason),
)
def _get_rows(variables, ignored=None, *,
_as_row=_get_row,
_is_ignored=_is_ignored,
_vartype_okay=_is_vartype_okay,
):
count = 0
for variable in variables:
reason = _is_ignored(variable,
ignored and ignored.get('variables'),
)
if not reason:
reason = _vartype_okay(variable.vartype,
ignored and ignored.get('types'))
if not reason:
continue
print(' ', variable, repr(reason))
yield _as_row(variable.id, reason)
count += 1
print(f'total: {count}')
def _generate_ignored_file(variables, filename=None, *,
_generate_rows=_get_rows,
_write_tsv=write_tsv,
):
if not filename:
filename = IGNORED_FILE + '.new'
rows = _generate_rows(variables)
_write_tsv(filename, IGNORED_HEADER, rows)
if __name__ == '__main__':
from cpython import SOURCE_DIRS
from cpython.known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
# XXX This is wrong!
from . import find
known = known_from_file(KNOWN_FILE)
knownvars = (known or {}).get('variables')
variables = find.globals_from_binary(knownvars=knownvars,
dirnames=SOURCE_DIRS)
_generate_ignored_file(variables)