2 + 2 = 5: Monkey-patching CPython with ctypes to conform to Party doctrine

2 + 2 = 5
Monkey-patching CPython with ctypes to
conform to Party doctrine

Prior art and reference
• forbiddenfruit
• https://github.com/clarete/forbiddenfruit
• python-doublescript
• https://github.com/fdintino/python-doublescript

Test-driven development
class TwoPlusTwoTestCase(TestCase): 
 
def test_two_plus_two(self): 
with two_plus_two_equals(5): 
self.assertEqual(2 + 2, 5)

Naive approach
old_int_add = int.__add__ 
 
def int_add(a, b): 
if a == b == 2: 
return 5 
else: 
return old_int_add(a, b) 
 
int.__add__ = int_add
int.__dict__['__add__'] = int_add
TypeError: can't set attributes of built-in/extension type 'int'
TypeError: 'dictproxy' object does not support item assignment

ctypes crash course
from ctypes import (
pythonapi, Structure, c_char_p, CFUNCTYPE)

ctypes.pythonapi
>>> from ctypes import pythonapi, c_char_p 
 
>>> pythonapi.Py_GetVersion.restype = c_char_p 
>>> pythonapi.Py_GetVersion() 
 
2.7.13 (default, Feb 23 2017, 08:50:00)
[GCC 4.2.1 Compatible Apple LLVM 8.0.0 (clang-800.0.42.1)]

ctypes.py_object
from ctypes import pythonapi, py_object 
 
PyNumber_Absolute = pythonapi.PyNumber_Absolute 
PyNumber_Absolute.argtypes = [py_object] 
PyNumber_Absolute.restype = py_object 
 
PyNumber_Absolute(-3) # 3

ctypes.Structure
class PyObject(Structure): 
_fields_ = [ 
('ob_refcnt', Py_ssize_t), 
('ob_type', py_object), 
]

_CData.from_address(), POINTER
class PyObject(Structure): 
_fields_ = [ 
('ob_refcnt', Py_ssize_t), 
('ob_type', py_object)] 
 
py_object_p = ctypes.POINTER(py_object) # We will use this later
foo = "foo" 
 
pyobj = PyObject.from_address(id(foo)) 
 
print(pyobj.ob_refcnt) # 7 
print(sys.getrefcount(foo)) # 8

Overriding int.__add__
>>> print type(int.__dict__)
<type 'dictproxy'>
# Python 3
>>> print(type(int.__dict__))
<class 'mappingproxy'>

typedef struct { 
PyObject_HEAD 
PyObject *dict; 
} proxyobject;

class DictProxy(PyObject): 
_fields_ = [ 
('dict', ctypes.POINTER(PyObject)), 
]

def mutable_class_dict(cls): 
dp = DictProxy.from_address(id(cls.__dict__)) 
temp = {} 
pythonapi.PyDict_SetItem( 
py_object(temp), 
py_object(None), 
dp.dict) 
return temp[None]

old_int_add = int.__add__ 
 
if a == b == 2: 
return 5 
else: 
return old_int_add(a, b) 
int_dict = mutable_class_dict(int) 
int_dict['__add__'] = int_add

>>> 2 + 2 
4
 
>>> (2).__add__(2) 
5

Why doesn’t overriding __add__ suffice?
PyObject * 
PyNumber_Add(PyObject *v, PyObject *w) 
{ 
PyObject *result = binary_op1(v, w, NB_SLOT(nb_add)); 
if (result == Py_NotImplemented) { /* ... */ } 
return result; 
}

/* object.h */
typedef struct _typeobject {
PyObject_VAR_HEAD 
const char *tp_name; /* For printing, in format "<module>.<name>" */ 
Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ 
 
/* Methods to implement standard operations */ 
destructor tp_dealloc; 
printfunc tp_print; 
getattrfunc tp_getattr; 
/* ... */ 
 
/* Method suites for standard classes */ 
PySequenceMethods *tp_as_sequence; 
PyMappingMethods *tp_as_mapping; 
 
/* ... */ 
} PyTypeObject;
PyNumberMethods *tp_as_number;

/* object.h */
typedef PyObject * (*binaryfunc)(PyObject *, PyObject *);
typedef struct { 
binaryfunc nb_add; 
binaryfunc nb_subtract; 
binaryfunc nb_multiply; 
/* ... */ 
} PyNumberMethods;
• binaryfunc: a pointer to a function that takes two PyObject pointers as
arguments and returns a pointer to a PyObject
• Use ctypes.CFUNCTYPE(return_type, *arg_types)
binaryfunc = ctypes.CFUNCTYPE(py_object_p, py_object_p, py_object_p)

/* object.h */
typedef struct { 
/* ... */ 
} PyNumberMethods;
• Use ctypes.Structure to represent the PyNumberMethods struct:
class PyNumberMethods(Structure): 
_fields_ = [ 
('nb_add', binaryfunc), 
('nb_subtract', binaryfunc), 
('nb_multiply', binaryfunc), 
# ... 
]
typedef struct { 
/* ... */ 
} PyNumberMethods;

class PyTypeObject(PyObject): 
_fields_ = [ 
('ob_size', Py_ssize_t), 
('tp_name', c_char_p), 
('tp_basicsize', Py_ssize_t), 
('tp_itemsize', Py_ssize_t), 
('...', c_void_p * 6), # skip 6 functions, like tp_repr, for brevity 
('tp_as_number', POINTER(PyNumberMethods)),
# ... 
] 
PyInt_Type = PyTypeObject.from_address(id(int))
>>> PyInt_Type.tp_as_number.contents.nb_add(2, 2) 
4

def get_pointer_addr(cdata): 
tmp_pointer = ctypes.cast(ctypes.byref(cdata), POINTER(c_void_p)) 
return tmp_pointer.contents.value 
@contextlib.contextmanager 
def two_plus_two_equals(new_sum): 
old_nb_add_addr = get_pointer_addr(
Py_IntType.tp_as_number.contents.nb_add) 
old_nb_add = binaryfunc(old_nb_add_addr) 
 
if a == b == 2: 
return new_sum 
else: 
return old_nb_add(a, b) 
 
nb_add = binaryfunc(int_add) 
Py_IntType.tp_as_number.contents.nb_add = nb_add 
yield 
Py_IntType.tp_as_number.contents.nb_add = old_nb_add

>>> with two_plus_two_equals(5): 
... print(2 + 2) 
4
>>> with two_plus_two_equals(5): 
... print(eval("2 + 2")) 
5
"2 + 2"

Using the dis module to see what’s going on
import dis
two = 2 
 
def add_two_plus_two(): 
return two + two
>>> dis.dis(add_two_plus_two)
2 0 LOAD_GLOBAL 0 (two)
3 LOAD_GLOBAL 0 (two)
6 BINARY_ADD
7 RETURN_VALUE

The BINARY_ADD instruction opcode
/* ceval.c: PyEval_EvalFrameEx */ 
TARGET_NOARG(BINARY_ADD) { 
w = POP(); 
v = TOP(); 
if (PyInt_CheckExact(v) && PyInt_CheckExact(w)) { 
/* INLINE: int + int */ 
register long a, b, i; 
a = PyInt_AS_LONG(v); 
b = PyInt_AS_LONG(w); 
i = (long)((unsigned long)a + b); 
x = PyInt_FromLong(i); 
} 
/* ... */ 
}
PyInt_CheckExact(v) PyInt_CheckExact(w)

class int2(int): 
def __add__(self, other): 
if self == other == 2: 
return 5 
else: 
return int.__add__(self, other) 
>>> (2).__class__ = int2
Solution: change (2).__class__ to something
other than int
TypeError: __class__ assignment: only for heap types

def set_type(obj, new_type): 
old_type = obj.__class__ 
 
new_c_typeobj = PyTypeObject.from_address(id(new_type)) 
if new_c_typeobj.tp_flags & Py_TPFLAGS.HEAPTYPE: 
Py_INCREF(new_type) 
 
c_obj = PyObject.from_address(id(obj)) 
c_obj.ob_type = new_type 
 
old_c_typeobj = PyTypeObject.from_address(id(old_type)) 
if old_c_typeobj.tp_flags & Py_TPFLAGS.HEAPTYPE: 
Py_DECREF(old_type) 
 
@contextlib.contextmanager 
def override_type(obj, new_type): 
old_type = obj.__class__ 
set_type(obj, new_type) 
yield 
set_type(obj, old_type)

>>> with override_type(2, int2): 
... print(eval("2 + 2")) 
5
>>> two = 2
... print(two + two)
5
... print(2 + 2)
4

Final obstacle: peephole optimization
• When we disassembled the bytecode earlier, we used a
variable two rather than a literal 2:
import dis
two = 2 
 
return two + two
2 0 LOAD_GLOBAL 0 (two)
3 LOAD_GLOBAL 0 (two)
6 BINARY_ADD
7 RETURN_VALUE

Final obstacle
• What happens if we use a literal 2?
import dis
return 2 + 2
4 0 LOAD_CONST 2 (4)
3 RETURN_VALUE
?!

What’s going on?
• peephole optimization: an optimization technique in compilers
where certain recognized instructions are replaced with
shorter or faster versions.
• In CPython, performed by the C function PyCode_Optimize
• Does not occur in an eval, hence why eval("2 + 2") works.

PyCode_Optimize
007d0850 PUSH R15
007d0852 PUSH R14
007d0854 MOV R14, RSI
007d0857 PUSH R13
...
NoOp_PyCode_Optimize
00ccc010 MOV R11, 0x...beee
00ccc01a MOV R10, 0x...2010
00ccc024 CLC
00ccc025 JMP R11
...
PyCode_Optimize
007d0850 JMP 0xbb7000
007d0856 NOP
007d0857 PUSH R13
...
007d0850 JMP 0xbb7000
007d0856 NOP
Disabling with a trampoline function

Success!
class TwoPlusTwoTestCase(TestCase): 
 
def test_two_plus_two(self): 
with two_plus_two_equals(5): 
self.assertEqual(2 + 2, 5)
$ python runtests.py
.
----------------------------------------------------------------------
Ran 1 test in 0.187s
OK

github.com/fdintino/python-doublescript
@frankiedintino
frankie@theatlantic.com

2 + 2 = 5: Monkey-patching CPython with ctypes to conform to Party doctrine

Recommended

Recommended

More Related Content

What's hot

What's hot (20)

Similar to 2 + 2 = 5: Monkey-patching CPython with ctypes to conform to Party doctrine

Similar to 2 + 2 = 5: Monkey-patching CPython with ctypes to conform to Party doctrine (20)

Recently uploaded

Recently uploaded (20)

2 + 2 = 5: Monkey-patching CPython with ctypes to conform to Party doctrine