399 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			399 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os.path
 | |
| import re
 | |
| 
 | |
| from c_analyzer.common.info import ID
 | |
| from c_analyzer.common.util import read_tsv, write_tsv
 | |
| 
 | |
| from . import DATA_DIR
 | |
| 
 | |
| # XXX need tests:
 | |
| # * generate / script
 | |
| 
 | |
| 
 | |
| IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
 | |
| 
 | |
| IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
 | |
| IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
 | |
| 
 | |
| # XXX Move these to ignored.tsv.
 | |
| IGNORED = {
 | |
|         # global
 | |
|         'PyImport_FrozenModules': 'process-global',
 | |
|         'M___hello__': 'process-global',
 | |
|         'inittab_copy': 'process-global',
 | |
|         'PyHash_Func': 'process-global',
 | |
|         '_Py_HashSecret_Initialized': 'process-global',
 | |
|         '_TARGET_LOCALES': 'process-global',
 | |
| 
 | |
|         # startup (only changed before/during)
 | |
|         '_PyRuntime': 'runtime startup',
 | |
|         'runtime_initialized': 'runtime startup',
 | |
|         'static_arg_parsers': 'runtime startup',
 | |
|         'orig_argv': 'runtime startup',
 | |
|         'opt_ptr': 'runtime startup',
 | |
|         '_preinit_warnoptions': 'runtime startup',
 | |
|         '_Py_StandardStreamEncoding': 'runtime startup',
 | |
|         'Py_FileSystemDefaultEncoding': 'runtime startup',
 | |
|         '_Py_StandardStreamErrors': 'runtime startup',
 | |
|         'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
 | |
|         'Py_BytesWarningFlag': 'runtime startup',
 | |
|         'Py_DebugFlag': 'runtime startup',
 | |
|         'Py_DontWriteBytecodeFlag': 'runtime startup',
 | |
|         'Py_FrozenFlag': 'runtime startup',
 | |
|         'Py_HashRandomizationFlag': 'runtime startup',
 | |
|         'Py_IgnoreEnvironmentFlag': 'runtime startup',
 | |
|         'Py_InspectFlag': 'runtime startup',
 | |
|         'Py_InteractiveFlag': 'runtime startup',
 | |
|         'Py_IsolatedFlag': 'runtime startup',
 | |
|         'Py_NoSiteFlag': 'runtime startup',
 | |
|         'Py_NoUserSiteDirectory': 'runtime startup',
 | |
|         'Py_OptimizeFlag': 'runtime startup',
 | |
|         'Py_QuietFlag': 'runtime startup',
 | |
|         'Py_UTF8Mode': 'runtime startup',
 | |
|         'Py_UnbufferedStdioFlag': 'runtime startup',
 | |
|         'Py_VerboseFlag': 'runtime startup',
 | |
|         '_Py_path_config': 'runtime startup',
 | |
|         '_PyOS_optarg': 'runtime startup',
 | |
|         '_PyOS_opterr': 'runtime startup',
 | |
|         '_PyOS_optind': 'runtime startup',
 | |
|         '_Py_HashSecret': 'runtime startup',
 | |
| 
 | |
|         # REPL
 | |
|         '_PyOS_ReadlineLock': 'repl',
 | |
|         '_PyOS_ReadlineTState': 'repl',
 | |
| 
 | |
|         # effectively const
 | |
|         'tracemalloc_empty_traceback': 'const',
 | |
|         '_empty_bitmap_node': 'const',
 | |
|         'posix_constants_pathconf': 'const',
 | |
|         'posix_constants_confstr': 'const',
 | |
|         'posix_constants_sysconf': 'const',
 | |
|         '_PySys_ImplCacheTag': 'const',
 | |
|         '_PySys_ImplName': 'const',
 | |
|         'PyImport_Inittab': 'const',
 | |
|         '_PyImport_DynLoadFiletab': 'const',
 | |
|         '_PyParser_Grammar': 'const',
 | |
|         'Py_hexdigits': 'const',
 | |
|         '_PyImport_Inittab': 'const',
 | |
|         '_PyByteArray_empty_string': 'const',
 | |
|         '_PyLong_DigitValue': 'const',
 | |
|         '_Py_SwappedOp': 'const',
 | |
|         'PyStructSequence_UnnamedField': 'const',
 | |
| 
 | |
|         # signals are main-thread only
 | |
|         'faulthandler_handlers': 'signals are main-thread only',
 | |
|         'user_signals': 'signals are main-thread only',
 | |
|         'wakeup': 'signals are main-thread only',
 | |
| 
 | |
|         # hacks
 | |
|         '_PySet_Dummy': 'only used as a placeholder',
 | |
|         }
 | |
| 
 | |
| BENIGN = 'races here are benign and unlikely'
 | |
| 
 | |
| 
 | |
| def is_supported(variable, ignored=None, known=None, *,
 | |
|                  _ignored=(lambda *a, **k: _is_ignored(*a, **k)),
 | |
|                  _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
 | |
|                  ):
 | |
|     """Return True if the given global variable is okay in CPython."""
 | |
|     if _ignored(variable,
 | |
|                 ignored and ignored.get('variables')):
 | |
|         return True
 | |
|     elif _vartype_okay(variable.vartype,
 | |
|                        ignored.get('types')):
 | |
|         return True
 | |
|     else:
 | |
|         return False
 | |
| 
 | |
| 
 | |
| def _is_ignored(variable, ignoredvars=None, *,
 | |
|                 _IGNORED=IGNORED,
 | |
|                 ):
 | |
|     """Return the reason if the variable is a supported global.
 | |
| 
 | |
|     Return None if the variable is not a supported global.
 | |
|     """
 | |
|     if ignoredvars and (reason := ignoredvars.get(variable.id)):
 | |
|         return reason
 | |
| 
 | |
|     if variable.funcname is None:
 | |
|         if reason := _IGNORED.get(variable.name):
 | |
|             return reason
 | |
| 
 | |
|     # compiler
 | |
|     if variable.filename == 'Python/graminit.c':
 | |
|         if variable.vartype.startswith('static state '):
 | |
|             return 'compiler'
 | |
|     if variable.filename == 'Python/symtable.c':
 | |
|         if variable.vartype.startswith('static identifier '):
 | |
|             return 'compiler'
 | |
|     if variable.filename == 'Python/Python-ast.c':
 | |
|         # These should be const.
 | |
|         if variable.name.endswith('_field'):
 | |
|             return 'compiler'
 | |
|         if variable.name.endswith('_attribute'):
 | |
|             return 'compiler'
 | |
| 
 | |
|     # other
 | |
|     if variable.filename == 'Python/dtoa.c':
 | |
|         # guarded by lock?
 | |
|         if variable.name in ('p5s', 'freelist'):
 | |
|             return 'dtoa is thread-safe?'
 | |
|         if variable.name in ('private_mem', 'pmem_next'):
 | |
|             return 'dtoa is thread-safe?'
 | |
|     if variable.filename == 'Python/thread.c':
 | |
|         # Threads do not become an issue until after these have been set
 | |
|         # and these never get changed after that.
 | |
|         if variable.name in ('initialized', 'thread_debug'):
 | |
|             return 'thread-safe'
 | |
|     if variable.filename == 'Python/getversion.c':
 | |
|         if variable.name == 'version':
 | |
|             # Races are benign here, as well as unlikely.
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Python/fileutils.c':
 | |
|         if variable.name == 'force_ascii':
 | |
|             return BENIGN
 | |
|         if variable.name == 'ioctl_works':
 | |
|             return BENIGN
 | |
|         if variable.name == '_Py_open_cloexec_works':
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Python/codecs.c':
 | |
|         if variable.name == 'ucnhash_CAPI':
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Python/bootstrap_hash.c':
 | |
|         if variable.name == 'getrandom_works':
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Objects/unicodeobject.c':
 | |
|         if variable.name == 'ucnhash_CAPI':
 | |
|             return BENIGN
 | |
|         if variable.name == 'bloom_linebreak':
 | |
|             # *mostly* benign
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Modules/getbuildinfo.c':
 | |
|         if variable.name == 'buildinfo':
 | |
|             # The static is used for pre-allocation.
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Modules/posixmodule.c':
 | |
|         if variable.name == 'ticks_per_second':
 | |
|             return BENIGN
 | |
|         if variable.name == 'dup3_works':
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Modules/timemodule.c':
 | |
|         if variable.name == 'ticks_per_second':
 | |
|             return BENIGN
 | |
|     if variable.filename == 'Objects/longobject.c':
 | |
|         if variable.name == 'log_base_BASE':
 | |
|             return BENIGN
 | |
|         if variable.name == 'convwidth_base':
 | |
|             return BENIGN
 | |
|         if variable.name == 'convmultmax_base':
 | |
|             return BENIGN
 | |
| 
 | |
|     return None
 | |
| 
 | |
| 
 | |
| def _is_vartype_okay(vartype, ignoredtypes=None):
 | |
|     if _is_object(vartype):
 | |
|         return None
 | |
| 
 | |
|     if vartype.startswith('static const '):
 | |
|         return 'const'
 | |
|     if vartype.startswith('const '):
 | |
|         return 'const'
 | |
| 
 | |
|     # components for TypeObject definitions
 | |
|     for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
 | |
|         if name in vartype:
 | |
|             return 'const'
 | |
|     for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
 | |
|                  'PyBufferProcs', 'PyAsyncMethods'):
 | |
|         if name in vartype:
 | |
|             return 'const'
 | |
|     for name in ('slotdef', 'newfunc'):
 | |
|         if name in vartype:
 | |
|             return 'const'
 | |
| 
 | |
|     # structseq
 | |
|     for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
 | |
|         if name in vartype:
 | |
|             return 'const'
 | |
| 
 | |
|     # other definiitions
 | |
|     if 'PyModuleDef' in vartype:
 | |
|         return 'const'
 | |
| 
 | |
|     # thread-safe
 | |
|     if '_Py_atomic_int' in vartype:
 | |
|         return 'thread-safe'
 | |
|     if 'pthread_condattr_t' in vartype:
 | |
|         return 'thread-safe'
 | |
| 
 | |
|     # startup
 | |
|     if '_Py_PreInitEntry' in vartype:
 | |
|         return 'startup'
 | |
| 
 | |
|     # global
 | |
| #    if 'PyMemAllocatorEx' in vartype:
 | |
| #        return True
 | |
| 
 | |
|     # others
 | |
| #    if 'PyThread_type_lock' in vartype:
 | |
| #        return True
 | |
| 
 | |
|     # XXX ???
 | |
|     # _Py_tss_t
 | |
|     # _Py_hashtable_t
 | |
|     # stack_t
 | |
|     # _PyUnicode_Name_CAPI
 | |
| 
 | |
|     # functions
 | |
|     if '(' in vartype and '[' not in vartype:
 | |
|         return 'function pointer'
 | |
| 
 | |
|     # XXX finish!
 | |
|     # * allow const values?
 | |
|     #raise NotImplementedError
 | |
|     return None
 | |
| 
 | |
| 
 | |
| PYOBJECT_RE = re.compile(r'''
 | |
|         ^
 | |
|         (
 | |
|             # must start with "static "
 | |
|             static \s+
 | |
|             (
 | |
|                 identifier
 | |
|             )
 | |
|             \b
 | |
|         ) |
 | |
|         (
 | |
|             # may start with "static "
 | |
|             ( static \s+ )?
 | |
|             (
 | |
|                 .*
 | |
|                 (
 | |
|                     PyObject |
 | |
|                     PyTypeObject |
 | |
|                     _? Py \w+ Object |
 | |
|                     _PyArg_Parser |
 | |
|                     _Py_Identifier |
 | |
|                     traceback_t |
 | |
|                     PyAsyncGenASend |
 | |
|                     _PyAsyncGenWrappedValue |
 | |
|                     PyContext |
 | |
|                     method_cache_entry
 | |
|                 )
 | |
|                 \b
 | |
|             ) |
 | |
|             (
 | |
|                 (
 | |
|                     _Py_IDENTIFIER |
 | |
|                     _Py_static_string
 | |
|                 )
 | |
|                 [(]
 | |
|             )
 | |
|         )
 | |
|         ''', re.VERBOSE)
 | |
| 
 | |
| 
 | |
| def _is_object(vartype):
 | |
|     if 'PyDictKeysObject' in vartype:
 | |
|         return False
 | |
|     if PYOBJECT_RE.match(vartype):
 | |
|         return True
 | |
|     if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
 | |
|         return True
 | |
| 
 | |
|     # XXX Add more?
 | |
| 
 | |
|     #for part in vartype.split():
 | |
|     #    # XXX const is automatic True?
 | |
|     #    if part == 'PyObject' or part.startswith('PyObject['):
 | |
|     #        return True
 | |
|     return False
 | |
| 
 | |
| 
 | |
| def ignored_from_file(infile, *,
 | |
|                       _read_tsv=read_tsv,
 | |
|                       ):
 | |
|     """Yield a Variable for each ignored var in the file."""
 | |
|     ignored = {
 | |
|         'variables': {},
 | |
|         #'types': {},
 | |
|         #'constants': {},
 | |
|         #'macros': {},
 | |
|         }
 | |
|     for row in _read_tsv(infile, IGNORED_HEADER):
 | |
|         filename, funcname, name, kind, reason = row
 | |
|         if not funcname or funcname == '-':
 | |
|             funcname = None
 | |
|         id = ID(filename, funcname, name)
 | |
|         if kind == 'variable':
 | |
|             values = ignored['variables']
 | |
|         else:
 | |
|             raise ValueError(f'unsupported kind in row {row}')
 | |
|         values[id] = reason
 | |
|     return ignored
 | |
| 
 | |
| 
 | |
| ##################################
 | |
| # generate
 | |
| 
 | |
| def _get_row(varid, reason):
 | |
|     return (
 | |
|             varid.filename,
 | |
|             varid.funcname or '-',
 | |
|             varid.name,
 | |
|             'variable',
 | |
|             str(reason),
 | |
|             )
 | |
| 
 | |
| 
 | |
| def _get_rows(variables, ignored=None, *,
 | |
|               _as_row=_get_row,
 | |
|               _is_ignored=_is_ignored,
 | |
|               _vartype_okay=_is_vartype_okay,
 | |
|               ):
 | |
|     count = 0
 | |
|     for variable in variables:
 | |
|         reason = _is_ignored(variable,
 | |
|                              ignored and ignored.get('variables'),
 | |
|                              )
 | |
|         if not reason:
 | |
|             reason = _vartype_okay(variable.vartype,
 | |
|                                    ignored and ignored.get('types'))
 | |
|         if not reason:
 | |
|             continue
 | |
| 
 | |
|         print(' ', variable, repr(reason))
 | |
|         yield _as_row(variable.id, reason)
 | |
|         count += 1
 | |
|     print(f'total: {count}')
 | |
| 
 | |
| 
 | |
| def _generate_ignored_file(variables, filename=None, *,
 | |
|                            _generate_rows=_get_rows,
 | |
|                            _write_tsv=write_tsv,
 | |
|                            ):
 | |
|     if not filename:
 | |
|         filename = IGNORED_FILE + '.new'
 | |
|     rows = _generate_rows(variables)
 | |
|     _write_tsv(filename, IGNORED_HEADER, rows)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     from cpython import SOURCE_DIRS
 | |
|     from cpython.known import (
 | |
|         from_file as known_from_file,
 | |
|         DATA_FILE as KNOWN_FILE,
 | |
|         )
 | |
|     # XXX This is wrong!
 | |
|     from . import find
 | |
|     known = known_from_file(KNOWN_FILE)
 | |
|     knownvars = (known or {}).get('variables')
 | |
|     variables = find.globals_from_binary(knownvars=knownvars,
 | |
|                                          dirnames=SOURCE_DIRS)
 | |
| 
 | |
|     _generate_ignored_file(variables)
 |