62 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			62 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """This script generates a Python codec module from a Windows Code Page.
 | |
| 
 | |
| It uses the function MultiByteToWideChar to generate a decoding table.
 | |
| """
 | |
| 
 | |
| import ctypes
 | |
| from ctypes import wintypes
 | |
| from gencodec import codegen
 | |
| import unicodedata
 | |
| 
 | |
| def genwinmap(codepage):
 | |
|     MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
 | |
|     MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
 | |
|                                     wintypes.LPCSTR, ctypes.c_int,
 | |
|                                     wintypes.LPWSTR, ctypes.c_int]
 | |
|     MultiByteToWideChar.restype = ctypes.c_int
 | |
| 
 | |
|     enc2uni = {}
 | |
| 
 | |
|     for i in list(range(32)) + [127]:
 | |
|         enc2uni[i] = (i, 'CONTROL CHARACTER')
 | |
| 
 | |
|     for i in range(256):
 | |
|         buf = ctypes.create_unicode_buffer(2)
 | |
|         ret = MultiByteToWideChar(
 | |
|             codepage, 0,
 | |
|             bytes([i]), 1,
 | |
|             buf, 2)
 | |
|         assert ret == 1, "invalid code page"
 | |
|         assert buf[1] == '\x00'
 | |
|         try:
 | |
|             name = unicodedata.name(buf[0])
 | |
|         except ValueError:
 | |
|             try:
 | |
|                 name = enc2uni[i][1]
 | |
|             except KeyError:
 | |
|                 name = ''
 | |
| 
 | |
|         enc2uni[i] = (ord(buf[0]), name)
 | |
| 
 | |
|     return enc2uni
 | |
| 
 | |
| def genwincodec(codepage):
 | |
|     import platform
 | |
|     map = genwinmap(codepage)
 | |
|     encodingname = 'cp%d' % codepage
 | |
|     code = codegen("", map, encodingname)
 | |
|     # Replace first lines with our own docstring
 | |
|     code = '''\
 | |
| """Python Character Mapping Codec %s generated on Windows:
 | |
| %s with the command:
 | |
|   python Tools/unicode/genwincodec.py %s
 | |
| """#"
 | |
| ''' % (encodingname, ' '.join(platform.win32_ver()), codepage
 | |
|       ) + code.split('"""#"', 1)[1]
 | |
| 
 | |
|     print(code)
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     import sys
 | |
|     genwincodec(int(sys.argv[1]))
 |