PyPy v7.3.11 release

much

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_add(x, 10)
    if a < 15:
        if x < 6:
            return 0
        return 1
    return 2

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_add(x, 10)
    if a < 15:
        return 0
    return 2

from z3 import BitVec, Implies, prove
x = BitVec('x', 64)
a = x + 10
cond1 = a < 15
cond2 = x < 6
prove(Implies(cond1, cond2))

counterexample
[x = 9223372036854775803]
def add_to_solver(self, ops, state):
    for op in ops:
        if op.type != 'v': # is it an operation with a result
            res = self.newvar(op)
        else: # or does it return void
            res = None

       # ...

        # convert arguments
        if op.numargs() == 1:
            arg0 = self.convertarg(op, 0)
        elif op.numargs() == 2:
            arg0 = self.convertarg(op, 0)
            arg1 = self.convertarg(op, 1)

        # compute results
        if opname == "int_add":
            expr = arg0 + arg1
        elif opname == "int_sub":
            expr = arg0 - arg1
        elif opname == "int_mul":
            expr = arg0 * arg1
        elif opname == "int_and":
            expr = arg0 & arg1
        elif opname == "int_or":
            expr = arg0 | arg1
        elif opname == "int_xor":
            expr = arg0 ^ arg1

        # ...  more operations, some shown below

        self.solver.add(res == expr)

def newvar(self, box, repr=None):
    # ... some logic around making the string representation
    # somewhat nicer omitted
    result = z3.BitVec(repr, LONG_BIT)
    self.box_to_z3[box] = result
    return result

def convert(self, box):
    if isinstance(box, ConstInt):
        return z3.BitVecVal(box.getint(), LONG_BIT)
    return self.box_to_z3[box]

def convertarg(self, box, arg):
    return self.convert(box.getarg(arg))

def cond(self, z3expr):
    return z3.If(z3expr, TRUEBV, FALSEBV)


def add_to_solver(self, ops, state):
        # ... start as above

        # more cases
        elif opname == "int_eq":
            expr = self.cond(arg0 == arg1)
        elif opname == "int_ne":
            expr = self.cond(arg0 != arg1)
        elif opname == "int_lt":
            expr = self.cond(arg0 < arg1)
        elif opname == "int_le":
            expr = self.cond(arg0 <= arg1)
        elif opname == "int_gt":
            expr = self.cond(arg0 > arg1)
        elif opname == "int_ge":
            expr = self.cond(arg0 >= arg1)
        elif opname == "int_is_true":
            expr = self.cond(arg0 != FALSEBV)
        elif opname == "uint_lt":
            expr = self.cond(z3.ULT(arg0, arg1))
        elif opname == "uint_le":
            expr = self.cond(z3.ULE(arg0, arg1))
        elif opname == "uint_gt":
            expr = self.cond(z3.UGT(arg0, arg1))
        elif opname == "uint_ge":
            expr = self.cond(z3.UGE(arg0, arg1))
        elif opname == "int_is_zero":
            expr = self.cond(arg0 == FALSEBV)

        # ... rest as above

def guard_to_condition(self, guard, state):
    opname = guard.getopname()
    if opname == "guard_true":
        return self.convertarg(guard, 0) == TRUEBV
    elif opname == "guard_false":
        return self.convertarg(guard, 0) == FALSEBV
    elif opname == "guard_value":
        return self.convertarg(guard, 0) == self.convertarg(guard, 1)

    # ... some more exist, shown below

def add_to_solver(self, ops, state):

        # ... more cases

        elif opname == "int_add_ovf":
            expr = arg0 + arg1
            m = z3.SignExt(LONG_BIT, arg0) + z3.SignExt(LONG_BIT, arg1)
            state.no_ovf = m == z3.SignExt(LONG_BIT, expr)
        elif opname == "int_sub_ovf":
            expr = arg0 - arg1
            m = z3.SignExt(LONG_BIT, arg0) - z3.SignExt(LONG_BIT, arg1)
            state.no_ovf = m == z3.SignExt(LONG_BIT, expr)
        elif opname == "int_mul_ovf":
            expr = arg0 * arg1
            m = z3.SignExt(LONG_BIT, arg0) * z3.SignExt(LONG_BIT, arg1)
            state.no_ovf = m == z3.SignExt(LONG_BIT, expr)

        # ...

def guard_to_condition(self, guard, state):

    # ... more cases

    elif opname == "guard_no_overflow":
        assert state.no_ovf is not None
        return state.no_ovf
    elif opname == "guard_overflow":
        assert state.no_ovf is not None
        return z3.Not(state.no_ovf)

    # ... more cases

# input                       # optimized
[i0]                          [i0]
i1 = int_add(i0, 10)          i1 = int_add(i0, 10)
i2 = int_lt(i1, 15)           i2 = int_lt(i1, 15)
guard_true(i2)                guard_true(i2)
i3 = int_lt(i0, 6)            jump(0)
guard_true(i3)
jump(0)

i1unoptimized == input_i0 + 10
i2unoptimized == If(i1unoptimized < 15, 1, 0)
i1optimized == input_i0 + 10
i2optimized == If(i1optimized < 15, 1, 0)

i1unoptimized == input_i0 + 10
i2unoptimized == If(i1unoptimized < 15, 1, 0)
i1optimized == input_i0 + 10
i2optimized == If(i1optimized < 15, 1, 0)
i1optimized == 1
i2optimized == 1

...
i3unoptimized == If(input_i0 < 6, 1, 0)

input_i0 = 9223372036854775800
i1unoptimized = 9223372036854775810
i2unoptimized = 0
i1optimized = 9223372036854775810
i2optimized = 1
i3unoptimized = 0

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_sub(0, x)
    if a < 0:
        if x > 0:
            return 0
        return 1
    return 2

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_sub(0, x)
    if a < 0:
        return 0
    return 2

[i0, i1, i2, i3, i4, i5] # example values: 9, 11, -8, -95, 46, 57
i6 = int_add_ovf(i3, i0) # -86
guard_no_overflow()
i7 = int_sub(i2, -35/ci) # 27
i8 = uint_ge(i3, i5) # 1
guard_true(i8)
i9 = int_lt(i7, i8) # 0
i10 = int_mul_ovf(34/ci, i7) # 918
guard_no_overflow()
i11 = int_and(i10, 63/ci) # 22
i12 = int_rshift(i3, i11) # -1
i13 = int_is_zero(i7) # 0
i14 = int_is_true(i13) # 0
guard_false(i13)
i15 = int_lt(i8, i4) # 1
i16 = int_and(i6, i0) # 8
i17 = uint_ge(i6, -6/ci) # 0
finish()
[a, b]
c = int_add(a, b)
r = int_sub_ovf(c, b)
guard_no_ovf()
finish(r)

[a, b]
finish(a)

[a, b]
c = int_add_ovf(a, b)
guard_no_ovf()
r = int_sub(c, b)
finish(r)

[a, b]
c = int_add_ovf(a, b)
guard_no_ovf()
finish(a)

  $ conda create -c conda-forge -n my-pypy-env pypy python=3.8
  $ conda activate my-pypy-env

  $ conda config --set channel_priority strict

import pytest
from typing import Optional, Any


class Value:
    def find(self):
        raise NotImplementedError("abstract")

    def _set_forwarded(self, value):
        raise NotImplementedError("abstract")


class Operation(Value):
    def __init__(
        self, name: str, args: list[Value]
    ):
        self.name = name
        self.args = args
        self.forwarded = None
        self.info = None

    def __repr__(self):
        return (
            f"Operation({self.name}, "
            f"{self.args}, {self.forwarded}, "
            f"{self.info})"
        )

    def find(self) -> Value:
        op = self
        while isinstance(op, Operation):
            next = op.forwarded
            if next is None:
                return op
            op = next
        return op

    def arg(self, index):
        return self.args[index].find()

    def make_equal_to(self, value: Value):
        self.find()._set_forwarded(value)

    def _set_forwarded(self, value: Value):
        self.forwarded = value


class Constant(Value):
    def __init__(self, value: Any):
        self.value = value

    def __repr__(self):
        return f"Constant({self.value})"

    def find(self):
        return self

    def _set_forwarded(self, value: Value):
        assert (
            isinstance(value, Constant)
            and value.value == self.value
        )

class Block(list):
    def opbuilder(opname):
        def wraparg(arg):
            if not isinstance(arg, Value):
                arg = Constant(arg)
            return arg
        def build(self, *args):
            # construct an Operation, wrap the
            # arguments in Constants if necessary
            op = Operation(opname,
                [wraparg(arg) for arg in args])
            # add it to self, the basic block
            self.append(op)
            return op
        return build

    # a bunch of operations we support
    add = opbuilder("add")
    mul = opbuilder("mul")
    getarg = opbuilder("getarg")
    dummy = opbuilder("dummy")
    lshift = opbuilder("lshift")
    # some new one for this post
    alloc = opbuilder("alloc")
    load = opbuilder("load")
    store = opbuilder("store")
    print = opbuilder("print")

def bb_to_str(bb: Block, varprefix: str = "var"):
    def arg_to_str(arg: Value):
        if isinstance(arg, Constant):
            return str(arg.value)
        else:
            return varnames[arg]

    varnames = {}
    res = []
    for index, op in enumerate(bb):
        var = f"{varprefix}{index}"
        varnames[op] = var
        arguments = ", ".join(
            arg_to_str(op.arg(i))
                for i in range(len(op.args))
        )
        strop = f"{var} = {op.name}({arguments})"
        res.append(strop)
    return "\n".join(res)

var0 = getarg(0)
obj0 = alloc()
store(obj0, 0, var0)
var1 = load(obj0, 0)
print(var1)
def test_interpret():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(obj, 0, var0)
    var1 = bb.load(obj, 0)
    bb.print(var1)
    assert interpret(bb, 17) == 17

class Object:
    def __init__(self):
        self.contents: dict[int, Any] = {}

    def store(self, idx : int, value : Any):
        self.contents[idx] = value

    def load(self, idx : int):
        return self.contents[idx]

def get_num(op, index=1):
    assert isinstance(op.arg(index), Constant)
    return op.arg(index).value

def interpret(bb : Block, *args : tuple[Any]):
    def argval(op, i):
        arg = op.arg(i)
        if isinstance(arg, Constant):
            return arg.value
        else:
            assert isinstance(arg, Operation)
            return arg.info

    for index, op in enumerate(bb):
        if op.name == "getarg":
            res = args[get_num(op, 0)]
        elif op.name == "alloc":
            res = Object()
        elif op.name == "load":
            fieldnum = get_num(op)
            res = argval(op, 0).load(fieldnum)
        elif op.name == "store":
            obj = argval(op, 0)
            fieldnum = get_num(op)
            fieldvalue = argval(op, 2)
            obj.store(fieldnum, fieldvalue)
            # no result, only side effect
            continue
        elif op.name == "print":
            res = argval(op, 0)
            print(res)
            return res
        else:
            raise NotImplementedError(
                f"{op.name} not supported")
        op.info = res

var0 = getarg(0)
obj0 = alloc()
store(obj0, 0, var0)
var1 = load(obj0, 0)
print(var1)
var0 = getarg(0)
print(var0)
def test_remove_unused_allocation():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(obj, 0, var0)
    var1 = bb.load(obj, 0)
    bb.print(var1)
    opt_bb = optimize_alloc_removal(bb)
    # the virtual object looks like this:
    #  obj
    # ┌──────────┐
    # │ 0: var0  │
    # └──────────┘
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = print(optvar0)"""

class VirtualObject:
    def __init__(self):
        self.contents: dict[int, Value] = {}

    def store(self, idx, value):
        self.contents[idx] = value

    def load(self, idx):
        return self.contents[idx]

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            field = get_num(op)
            op.make_equal_to(info.load(field))
            continue
        if op.name == "store":
            info = op.arg(0).info
            field = get_num(op)
            info.store(field, op.arg(2))
            continue
        opt_bb.append(op)
    return opt_bb

def test_remove_two_allocations():
    bb = Block()
    var0 = bb.getarg(0)
    obj0 = bb.alloc()
    sto1 = bb.store(obj0, 0, var0)
    obj1 = bb.alloc()
    sto2 = bb.store(obj1, 0, obj0)
    var1 = bb.load(obj1, 0)
    var2 = bb.load(var1, 0)
    bb.print(var2)
    # the virtual objects look like this:
    #  obj0
    # ┌──────┐
    # │ 0: ╷ │
    # └────┼─┘
    #      │
    #      ▼
    #     obj1
    #   ┌─────────┐
    #   │ 0: var0 │
    #   └─────────┘
    # therefore
    # var1 is the same as obj0
    # var2 is the same as var0
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = print(optvar0)"""

def test_materialize():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(var0, 0, obj)
    opt_bb = optimize_alloc_removal(bb)
    #  obj is virtual, without any fields
    # ┌───────┐
    # │ empty │
    # └───────┘
    # then we store a reference to obj into
    # field 0 of var0. Since var0 is not virtual,
    # obj escapes, so we have to put it back
    # into the optimized basic block
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar0, 0, optvar1)"""
    # so far, fails like this:
    # the line:
    # info.store(field, op.arg(2))
    # produces an AttributeError because info
    # is None

def materialize(opt_bb, value: Operation) -> None:
    assert not isinstance(value, Constant)
    assert isinstance(value, Operation)
    info = value.info
    assert isinstance(info, VirtualObject)
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            field = get_num(op)
            op.make_equal_to(info.load(field))
            continue
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
            else: # not virtual
                # first materialize the
                # right hand side
                materialize(opt_bb, op.arg(2))
                # then emit the store via
                # the general path below
        opt_bb.append(op)
    return opt_bb

def test_dont_materialize_twice():
    # obj is again an empty virtual object,
    # and we store it into var0 *twice*.
    # this should only materialize it once
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto0 = bb.store(var0, 0, obj)
    sto1 = bb.store(var0, 0, obj)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar0, 0, optvar1)
optvar3 = store(optvar0, 0, optvar1)"""
    # fails so far: the operations that we get
    # at the moment are:
    # optvar0 = getarg(0)
    # optvar1 = alloc()
    # optvar2 = store(optvar0, 0, optvar1)
    # optvar3 = alloc()
    # optvar4 = store(optvar0, 0, optvar3)
    # ie the object is materialized twice,
    # which is incorrect

def materialize(opt_bb, value: Operation) -> None:
    assert not isinstance(value, Constant)
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # but only once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_non_virtuals():
    # in this example we store a non-virtual var1
    # into another non-virtual var0
    # this should just lead to no optimization at
    # all
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    sto = bb.store(var0, 0, var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = store(optvar0, 0, optvar1)"""

def test_materialization_constants():
    # in this example we store the constant 17
    # into the non-virtual var0
    # again, this will not be optimized
    bb = Block()
    var0 = bb.getarg(0)
    sto = bb.store(var0, 0, 17)
    opt_bb = optimize_alloc_removal(bb)
    # the previous line fails so far, triggering
    # the assert:
    # assert not isinstance(value, Constant)
    # in materialize
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = store(optvar0, 0, 17)"""

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # but only once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_fields():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    obj = bb.alloc()
    contents0 = bb.store(obj, 0, 8)
    contents1 = bb.store(obj, 1, var1)
    sto = bb.store(var0, 0, obj)

    # the virtual obj looks like this
    #  obj
    # ┌──────┬──────────┐
    # │ 0: 8 │ 1: var1  │
    # └──────┴──────────┘
    # then it needs to be materialized
    # this is the first example where a virtual
    # object that we want to materialize has any
    # content and is not just an empty object
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = alloc()
optvar3 = store(optvar2, 0, 8)
optvar4 = store(optvar2, 1, optvar1)
optvar5 = store(optvar0, 0, optvar2)"""
    # fails so far! the operations we get
    # at the moment are:
    # optvar0 = getarg(0)
    # optvar1 = getarg(1)
    # optvar2 = alloc()
    # optvar3 = store(optvar0, 0, optvar2)
    # which is wrong, because the store operations
    # into optvar1 got lost

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # put the content back
    for idx, val in info.contents.items():
        # re-create store operation
        opt_bb.store(value, idx, val)
    # only materialize once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_chained_objects():
    bb = Block()
    var0 = bb.getarg(0)
    obj0 = bb.alloc()
    obj1 = bb.alloc()
    contents = bb.store(obj0, 0, obj1)
    const = bb.store(obj1, 0, 1337)
    sto = bb.store(var0, 0, obj0)
    #  obj0
    # ┌──────┐
    # │ 0: ╷ │
    # └────┼─┘
    #      │
    #      ▼
    #     obj1
    #   ┌─────────┐
    #   │ 0: 1337 │
    #   └─────────┘
    # now obj0 escapes
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = alloc()
optvar3 = store(optvar2, 0, 1337)
optvar4 = store(optvar1, 0, optvar2)
optvar5 = store(optvar0, 0, optvar1)"""
    # fails in an annoying way! the resulting
    # basic block is not in proper SSA form
    # so printing it fails. The optimized
    # block would look like this:
    # optvar0 = getarg(0)
    # optvar1 = alloc()
    # optvar3 = store(optvar1, 0, optvar2)
    # optvar4 = store(optvar0, 0, optvar1)
    # where optvar2 is an ``alloc`` Operation
    # that is not itself in the output block

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # put the content back
    for idx, val in sorted(info.contents.items()):
        # materialize recursively
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)
    # only materialize once
    value.info = None

# optimize_alloc_removal unchanged

def test_object_graph_cycles():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.store(var1, 0, var1)
    var3 = bb.store(var0, 1, var1)
    #   ┌────────┐
    #   ▼        │
    #  obj0      │
    # ┌──────┐   │
    # │ 0: ╷ │   │
    # └────┼─┘   │
    #      │     │
    #      └─────┘
    # obj0 points to itself, and then it is
    # escaped
    opt_bb = optimize_alloc_removal(bb)
    # the previous line fails with an
    # InfiniteRecursionError
    # materialize calls itself, infinitely

    # what we want is instead this output:
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar1, 0, optvar1)
optvar3 = store(optvar0, 1, optvar1)"""

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # only materialize once
    value.info = None
    # put the content back
    for idx, val in sorted(info.contents.items()):
        # materialize recursively
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)

def test_load_non_virtual():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.load(var0, 0)
    bb.print(var1)
    # the next line fails in the line
    # op.make_equal_to(info.load(field))
    # because info is None
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = load(optvar0, 0)
optvar2 = print(optvar1)"""

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                op.make_equal_to(info.load(field))
                continue
            # otherwise not virtual, use the
            # general path below
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
            else: # not virtual
                # first materialize the
                # right hand side
                materialize(opt_bb, op.arg(2))
                # then emit the store via
                # the general path below
        opt_bb.append(op)
    return opt_bb

def test_materialize_on_other_ops():
    # materialize not just on store
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.print(var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = print(optvar1)"""
    # again, the resulting basic block is not in
    # valid SSA form

# materialize is unchanged
def materialize(opt_bb, value: Value) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if not info:
        # Already materialized
        return
    assert value.name == "alloc"
    opt_bb.append(value)
    value.info = None
    for idx, val in sorted(info.contents.items()):
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                op.make_equal_to(info.load(field))
                continue
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
        # materialize all the arguments of
        # operations that are put into the
        # output basic block
        for arg in op.args:
            materialize(opt_bb, arg.find())
        opt_bb.append(op)
    return opt_bb

def test_sink_allocations():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.store(var1, 0, 123)
    var3 = bb.store(var1, 1, 456)
    var4 = bb.load(var1, 0)
    var5 = bb.load(var1, 1)
    var6 = bb.add(var4, var5)
    var7 = bb.store(var1, 0, var6)
    var8 = bb.store(var0, 1, var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(123, 456)
optvar2 = alloc()
optvar3 = store(optvar2, 0, optvar1)
optvar4 = store(optvar2, 1, 456)
optvar5 = store(optvar0, 1, optvar2)"""

var1 = add(b, 17)
var2 = mul(a, var1)
var3 = add(b, 17)
var4 = add(var2, var3)
import pytest
from typing import Optional, Any

class Value:
    pass

class Constant(Value):
    def __init__(self, value: Any):
        self.value = value

    def __repr__(self):
        return f"Constant({self.value})"

class Operation(Value):
    def __init__(self, name: str, args: list[Value]):
        self.name = name
        self.args = args

    def __repr__(self):
        return f"Operation({self.name}, {self.args})"

    def arg(self, index: int):
        return self.args[index]

def test_construct_example():
    # first we need something to represent
    # "a" and "b". In our limited view, we don't
    # know where they come from, so we will define
    # them with a pseudo-operation called "getarg"
    # which takes a number n as an argument and
    # returns the n-th input argument. The proper
    # SSA way to do this would be phi-nodes.

    a = Operation("getarg", [Constant(0)])
    b = Operation("getarg", [Constant(1)])
    # var1 = add(b, 17)
    var1 = Operation("add", [b, Constant(17)])
    # var2 = mul(a, var1)
    var2 = Operation("mul", [a, var1])
    # var3 = add(b, 17)
    var3 = Operation("add", [b, Constant(17)])
    # var4 = add(var2, var3)
    var4 = Operation("add", [var2, var3])

    sequence = [a, b, var1, var2, var3, var4]
    # nothing to test really, it shouldn't crash

class Block(list):
    def opbuilder(opname):
        def wraparg(arg):
            if not isinstance(arg, Value):
                arg = Constant(arg)
            return arg
        def build(self, *args):
            # construct an Operation, wrap the
            # arguments in Constants if necessary
            op = Operation(opname,
                [wraparg(arg) for arg in args])
            # add it to self, the basic block
            self.append(op)
            return op
        return build

    # a bunch of operations we support
    add = opbuilder("add")
    mul = opbuilder("mul")
    getarg = opbuilder("getarg")
    dummy = opbuilder("dummy")
    lshift = opbuilder("lshift")

def test_convencience_block_construction():
    bb = Block()
    # a again with getarg, the following line
    # defines the Operation instance and
    # immediately adds it to the basic block bb
    a = bb.getarg(0)
    assert len(bb) == 1
    assert bb[0].name == "getarg"

    # it's a Constant
    assert bb[0].args[0].value == 0

    # b with getarg
    b = bb.getarg(1)
    # var1 = add(b, 17)
    var1 = bb.add(b, 17)
    # var2 = mul(a, var1)
    var2 = bb.mul(a, var1)
    # var3 = add(b, 17)
    var3 = bb.add(b, 17)
    # var4 = add(var2, var3)
    var4 = bb.add(var2, var3)
    assert len(bb) == 6

[Operation('getarg', [Constant(0)]),
 Operation('getarg', [Constant(1)]),
 Operation('add',
           [Operation('getarg',
                      [Constant(1)]),
                 Constant(17)]),
 Operation('mul',
           [Operation('getarg',
                      [Constant(0)]),
                 Operation('add',
                           [Operation('getarg',
                                      [Constant(1)]),
                            Constant(17)])]),
 Operation('add',
           [Operation('getarg',
                      [Constant(1)]),
            Constant(17)]),
 Operation('add',
           [Operation('mul',
                       [Operation('getarg',
                                  [Constant(0)]),
                             Operation('add',
                                       [Operation('getarg',
                                                  [Constant(1)]),
                                        Constant(17)])]),
                 Operation('add',
                           [Operation('getarg',
                                           [Constant(1)]),
                                 Constant(17)])])]

def bb_to_str(bb: Block, varprefix: str = "var"):
    # the implementation is not too important,
    # look at the test below to see what the
    # result looks like

    def arg_to_str(arg: Value):
        if isinstance(arg, Constant):
            return str(arg.value)
        else:
            # the key must exist, otherwise it's
            # not a valid SSA basic block:
            # the variable must be defined before
            # its first use
            return varnames[arg]

    varnames = {}
    res = []
    for index, op in enumerate(bb):
        # give the operation a name used while
        # printing:
        var = f"{varprefix}{index}"
        varnames[op] = var
        arguments = ", ".join(
            arg_to_str(op.arg(i))
                for i in range(len(op.args))
        )
        strop = f"{var} = {op.name}({arguments})"
        res.append(strop)
    return "\n".join(res)

def test_basicblock_to_str():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, var0)

    assert bb_to_str(bb) == """\
var0 = getarg(0)
var1 = add(5, 4)
var2 = add(var1, var0)"""

    # with a different prefix for the invented
    # variable names:
    assert bb_to_str(bb, "x") == """\
x0 = getarg(0)
x1 = add(5, 4)
x2 = add(x1, x0)"""

    # and our running example:
    bb = Block()
    a = bb.getarg(0)
    b = bb.getarg(1)
    var1 = bb.add(b, 17)
    var2 = bb.mul(a, var1)
    var3 = bb.add(b, 17)
    var4 = bb.add(var2, var3)

    assert bb_to_str(bb, "v") == """\
v0 = getarg(0)
v1 = getarg(1)
v2 = add(v1, 17)
v3 = mul(v0, v2)
v4 = add(v1, 17)
v5 = add(v3, v4)"""
    # Note the re-numbering of the variables! We
    # don't attach names to Operations at all, so
    # the printing will just number them in
    # sequence, can sometimes be a source of
    # confusion.

v0 = getarg(0)
v1 = getarg(1)
v2 = add(v1, 17)
v3 = mul(v0, v2)
v4 = add(v1, 17)
v5 = add(v3, v4)
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = add(optvar1, 17)
optvar3 = mul(optvar0, optvar2)
optvar4 = add(optvar3, optvar2)
class Value:
    def find(self):
        raise NotImplementedError("abstract")
    def _set_forwarded(self, value):
        raise NotImplementedError("abstract")


class Operation(Value):
    def __init__(self, name: str, args: list[Value]):
        self.name = name
        self.args = args
        self.forwarded = None

    def __repr__(self):
        return (
            f"Operation({self.name},"
            f"{self.args}, {self.forwarded})"
        )

    def find(self) -> Value:
        # returns the "representative" value of
        # self, in the union-find sense
        op = self
        while isinstance(op, Operation):
            # could do path compression here too
            # but not essential
            next = op.forwarded
            if next is None:
                return op
            op = next
        return op

    def arg(self, index):
        # change to above: return the
        # representative of argument 'index'
        return self.args[index].find()

    def make_equal_to(self, value: Value):
        # this is "union" in the union-find sense,
        # but the direction is important! The
        # representative of the union of Operations
        # must be either a Constant or an operation
        # that we know for sure is not optimized
        # away.

        self.find()._set_forwarded(value)

    def _set_forwarded(self, value: Value):
        self.forwarded = value


class Constant(Value):
    def __init__(self, value: Any):
        self.value = value

    def __repr__(self):
        return f"Constant({self.value})"

    def find(self):
        return self

    def _set_forwarded(self, value: Value):
        # if we found out that an Operation is
        # equal to a constant, it's a compiler bug
        # to find out that it's equal to another
        # constant
        assert isinstance(value, Constant) and \
            value.value == self.value

def test_union_find():
    # construct three operation, and unify them
    # step by step
    bb = Block()
    a1 = bb.dummy(1)
    a2 = bb.dummy(2)
    a3 = bb.dummy(3)

    # at the beginning, every op is its own
    # representative, that means every
    # operation is in a singleton set
    # {a1} {a2} {a3}
    assert a1.find() is a1
    assert a2.find() is a2
    assert a3.find() is a3

    # now we unify a2 and a1, then the sets are
    # {a1, a2} {a3}
    a2.make_equal_to(a1)
    # they both return a1 as the representative
    assert a1.find() is a1
    assert a2.find() is a1
    # a3 is still different
    assert a3.find() is a3

    # now they are all in the same set {a1, a2, a3}
    a3.make_equal_to(a2)
    assert a1.find() is a1
    assert a2.find() is a1
    assert a3.find() is a1

    # now they are still all the same, and we
    # also learned that they are the same as the
    # constant 6
    # the single remaining set then is
    # {6, a1, a2, a3}
    c = Constant(6)
    a2.make_equal_to(c)
    assert a1.find() is c
    assert a2.find() is c
    assert a3.find() is c

    # union with the same constant again is fine
    a2.make_equal_to(c)

def constfold_buggy(bb: Block) -> Block:
    opt_bb = Block()

    for op in bb:
        # basic idea: go over the list and do
        # constant folding of add where possible
        if op.name == "add":
            arg0 = op.args[0]
            arg1 = op.args[1]
            if isinstance(arg0, Constant) and \
                    isinstance(arg1, Constant):
                # can constant-fold! that means we
                # learned a new equality, namely
                # that op is equal to a specific
                # constant
                value = arg0.value + arg1.value
                op.make_equal_to(Constant(value))
                # don't need to have the operation
                # in the optimized basic block
                continue
        # otherwise the operation is not
        # constant-foldable and we put into the
        # output list
        opt_bb.append(op)
    return opt_bb


def test_constfold_simple():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, var0)

    opt_bb = constfold_buggy(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(9, optvar0)"""

@pytest.mark.xfail
def test_constfold_buggy_limitation():
    # this test fails! it shows the problem with
    # the above simple constfold_buggy pass

    bb = Block()
    var0 = bb.getarg(0)
    # this is folded
    var1 = bb.add(5, 4)
    # we want this folded too, but it doesn't work
    var2 = bb.add(var1, 10)
    var3 = bb.add(var2, var0)

    opt_bb = constfold_buggy(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(19, optvar0)"""

optvar0 = getarg(0)
optvar1 = add(9, 10)
optvar2 = add(optvar1, optvar0)
def constfold(bb: Block) -> Block:
    opt_bb = Block()

    for op in bb:
        # basic idea: go over the list and do
        # constant folding of add where possible
        if op.name == "add":
            # >>> changed
            arg0 = op.arg(0) # uses .find()
            arg1 = op.arg(1) # uses .find()
            # <<< end changes
            if isinstance(arg0, Constant) and \
                    isinstance(arg1, Constant):
                # can constant-fold! that means we
                # learned a new equality, namely
                # that op is equal to a specific
                # constant
                value = arg0.value + arg1.value
                op.make_equal_to(Constant(value))
                # don't need to have the operation
                # in the optimized basic block
                continue
        # otherwise the operation is not
        # constant-foldable and we put into the
        # output list
        opt_bb.append(op)
    return opt_bb


def test_constfold_two_ops():
    # now it works!
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, 10)
    var3 = bb.add(var2, var0)
    opt_bb = constfold(bb)

    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(19, optvar0)"""

def cse(bb: Block) -> Block:
    # structure is the same, loop over the input,
    # add some but not all operations to the
    # output

    opt_bb = Block()

    for op in bb:
        # only do CSE for add here, but it
        # generalizes
        if op.name == "add":
            arg0 = op.arg(0)
            arg1 = op.arg(1)
            # Check whether we have emitted the
            # same operation already
            prev_op = find_prev_add_op(
                arg0, arg1, opt_bb)
            if prev_op is not None:
                # if yes, we can optimize op away
                # and replace it with the earlier
                # result, which is an Operation
                # that was already emitted to
                # opt_bb
                op.make_equal_to(prev_op)
                continue
        opt_bb.append(op)
    return opt_bb


def eq_value(val0, val1):
    if isinstance(val0, Constant) and \
            isinstance(val1, Constant):
        # constants compare by their value
        return val0.value == val1.value
    # everything else by identity
    return val0 is val1


def find_prev_add_op(arg0: Value, arg1: Value,
        opt_bb: Block) -> Optional[Operation]:
    # Really naive and quadratic implementation.
    # What we do is walk over the already emitted
    # operations and see whether we emitted an add
    # with the current arguments already. A real
    # implementation might use a hashmap of some
    # kind, or at least only look at a limited
    # window of instructions.
    for opt_op in opt_bb:
        if opt_op.name != "add":
            continue
        # It's important to call arg here,
        # for the same reason why we
        # needed it in constfold: we need to
        # make sure .find() is called
        if eq_value(arg0, opt_op.arg(0)) and \
                eq_value(arg1, opt_op.arg(1)):
            return opt_op
    return None


def test_cse():
    bb = Block()
    a = bb.getarg(0)
    b = bb.getarg(1)
    var1 = bb.add(b, 17)
    var2 = bb.mul(a, var1)
    var3 = bb.add(b, 17)
    var4 = bb.add(var2, var3)

    opt_bb = cse(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = add(optvar1, 17)
optvar3 = mul(optvar0, optvar2)
optvar4 = add(optvar3, optvar2)"""

def strength_reduce(bb: Block) -> Block:
    opt_bb = Block()
    for op in bb:
        if op.name == "add":
            arg0 = op.arg(0)
            arg1 = op.arg(1)
            if arg0 is arg1:
                # x + x turns into x << 1
                newop = opt_bb.lshift(arg0, 1)
                op.make_equal_to(newop)
                continue
        opt_bb.append(op)
    return opt_bb

def test_strength_reduce():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(var0, var0)

    opt_bb = strength_reduce(bb)

    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = lshift(optvar0, 1)"""

def optimize(bb: Block) -> Block:
    opt_bb = Block()

    for op in bb:
        if op.name == "add":
            arg0 = op.arg(0)
            arg1 = op.arg(1)

            # constant folding
            if isinstance(arg0, Constant) and \
                    isinstance(arg1, Constant):
                value = arg0.value + arg1.value
                op.make_equal_to(Constant(value))
                continue

            # cse
            prev_op = find_prev_add_op(
                arg0, arg1, opt_bb)
            if prev_op is not None:
                op.make_equal_to(prev_op)
                continue

            # strength reduce:
            # x + x turns into x << 1
            if arg0 is arg1:
                newop = opt_bb.lshift(arg0, 1)
                op.make_equal_to(newop)
                continue

            # and while we are at it, let's do some
            # arithmetic simplification:
            # a + 0 => a
            if eq_value(arg0, Constant(0)):
                op.make_equal_to(arg1)
                continue
            if eq_value(arg1, Constant(0)):
                op.make_equal_to(arg0)
                continue
        opt_bb.append(op)
    return opt_bb


def test_single_pass():
    bb = Block()
    # constant folding
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, 10)
    var3 = bb.add(var2, var0)

    opt_bb = optimize(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(19, optvar0)"""

    # cse + strength reduction
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    var2 = bb.add(var0, var1)
    var3 = bb.add(var0, var1) # the same as var3
    var4 = bb.add(var2, 2)
    var5 = bb.add(var3, 2) # the same as var4
    var6 = bb.add(var4, var5)

    opt_bb = optimize(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = add(optvar0, optvar1)
optvar3 = add(optvar2, 2)
optvar4 = lshift(optvar3, 1)"""

    # removing + 0
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(16, -16)
    var2 = bb.add(var0, var1)
    var3 = bb.add(0, var2)
    var4 = bb.add(var2, var3)

    opt_bb = optimize(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = lshift(optvar0, 1)"""

class TestW_IntObject:
    ...

    def test_hash(self):
        w_x = W_IntObject(42)
        w_result = w_x.descr_hash(self.space)
        assert isinstance(w_result, W_IntObject)
        assert w_result.intval == 42

def test_hash():
    assert hash(42) == 42

def test_load_attr(self):
    src = '''
        class A(object):
            pass
        a = A()
        a.x = 1
        def main(n):
            i = 0
            while i < n:
                i = i + a.x
            return i
    '''
    log = self.run(src, [1000])
    assert log.result == 1000
    loop, = log.loops_by_filename(self.filepath)
    assert loop.match("""
        i9 = int_lt(i5, i6)
        guard_true(i9, descr=...)
        guard_not_invalidated(descr=...)
        i10 = int_add(i5, 1)
        --TICK--
        jump(..., descr=...)
    """)

PyPy v7.3.11: release of python 2.7, 3.8, and 3.9

What is PyPy?

What else is new?

Background: Python Integers in the PyPy JIT

Background: Bounds Analysis in PyPy's JIT

Motivation: A JIT Miscompilation

How could we have avoided this bug?

Z3 Proof of Concept

SMT Checking of the JIT Optimizer

Finding the Bug, Again

Second bug

Generating Random Traces

Bugs Found

Future Work and Conclusion

Acknowledgements

PyPy v7.3.10: release of python 2.7, 3.8, and 3.9

What is PyPy?

What else is new?

Greatest Hits

Personal Favourites

Interpreter

Version 1: Naive Attempt

Version 2: Re-Materializing Allocations

Version 3: Don't Materialize Twice

Version 4: Materialization of Constants

Version 5: Materializing Fields

Version 6: Recursive Materialization

Version 7: Dealing with Object Cycles

Version 8: Loading from non-virtual objects

Version 9 (Final): Materialize on Other Operations

Conclusion

Footnotes

Topics and goals

What is a sprint?

Location

Exact times

Registration

Implementing the Intermediate Representation

Storing Equivalences between Operations Using a Union-Find Data Structure

Constant Folding

Common Subexpression Elimination

Strength Reduction

Putting Things Together

Conclusion

Some Further Pointers

How is PyPy Tested?

Background

PyPy Testing History

Interpreter-Level Tests

Application-Level Tests

The CPython Test Suite

Testing for Performance Regressions

Conclusion

Footnotes

The PyPy blogposts

Recent Posts

Archives

Tags