From 3694b9e82116051e48b484d9d803c209197370d3 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Sun, 22 May 2022 17:49:22 +0200
Subject: [PATCH 1/7] corrected split optimisation

---
 pystencils/astnodes.py                   |  2 +-
 pystencils/backends/cbackend.py          |  3 +-
 pystencils/transformations.py            |  9 ++--
 pystencils_tests/test_transformations.py | 63 ++++++++++++++++++++++--
 4 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py
index ef0bcc6d7..f3ed2711c 100644
--- a/pystencils/astnodes.py
+++ b/pystencils/astnodes.py
@@ -448,7 +448,7 @@ class LoopOverCoordinate(Node):
     def new_loop_with_different_body(self, new_body):
         result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop,
                                     self.step, self.is_block_loop)
-        result.prefix_lines = [l for l in self.prefix_lines]
+        result.prefix_lines = [prefix_line for prefix_line in self.prefix_lines]
         return result
 
     def subs(self, subs_dict):
diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 486bd1260..499a80498 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -444,7 +444,8 @@ class CustomSympyPrinter(CCodePrinter):
     def _print_Pow(self, expr):
         """Don't use std::pow function, for small integer exponents, write as multiplication"""
         if isinstance(expr.exp, sp.Integer) and (-8 < expr.exp < 8):
-            raise NotImplementedError("This pow should be simplified already?")
+            raise ValueError(f"This expression: {expr} contains a pow function that should be simplified already with "
+                             f"a sequence of multiplications")
         return super(CustomSympyPrinter, self)._print_Pow(expr)
 
     # TODO don't print ones in sp.Mul
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index 43beefd25..06005cbf7 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -13,6 +13,7 @@ from pystencils.typing import (CastFunc, PointerType, StructType, TypedSymbol, g
                                ReinterpretCastFunc, get_next_parent_of_type, parents_of_type)
 from pystencils.field import Field, FieldType
 from pystencils.typing import FieldPointerSymbol
+from pystencils.sympyextensions import fast_subs
 from pystencils.simp.assignment_collection import AssignmentCollection
 from pystencils.slicing import normalize_slice
 from pystencils.integer_functions import int_div
@@ -650,11 +651,11 @@ def split_inner_loop(ast_node: ast.Node, symbol_groups):
                        and which no symbol in a symbol group depends on, are not updated!
     """
     all_loops = ast_node.atoms(ast.LoopOverCoordinate)
-    inner_loop = [l for l in all_loops if l.is_innermost_loop]
+    inner_loop = [loop for loop in all_loops if loop.is_innermost_loop]
     assert len(inner_loop) == 1, "Error in AST: multiple innermost loops. Was split transformation already called?"
     inner_loop = inner_loop[0]
     assert type(inner_loop.body) is ast.Block
-    outer_loop = [l for l in all_loops if l.is_outermost_loop]
+    outer_loop = [loop for loop in all_loops if loop.is_outermost_loop]
     assert len(outer_loop) == 1, "Error in AST, multiple outermost loops."
     outer_loop = outer_loop[0]
 
@@ -688,8 +689,8 @@ def split_inner_loop(ast_node: ast.Node, symbol_groups):
         assignment_group = []
         for assignment in inner_loop.body.args:
             if assignment.lhs in symbols_resolved:
-                new_rhs = assignment.rhs.subs(
-                    symbols_with_temporary_array.items())
+                # use fast_subs here because it checks if multiplications should be evaluated or not
+                new_rhs = fast_subs(assignment.rhs, symbols_with_temporary_array)
                 if not isinstance(assignment.lhs, Field.Access) and assignment.lhs in symbol_group:
                     assert type(assignment.lhs) is TypedSymbol
                     new_ts = TypedSymbol(assignment.lhs.name, PointerType(assignment.lhs.dtype))
diff --git a/pystencils_tests/test_transformations.py b/pystencils_tests/test_transformations.py
index 3ede70a85..d6e6888b5 100644
--- a/pystencils_tests/test_transformations.py
+++ b/pystencils_tests/test_transformations.py
@@ -1,5 +1,7 @@
+import sympy as sp
+
 import pystencils as ps
-from pystencils import TypedSymbol
+from pystencils import fields, TypedSymbol
 from pystencils.astnodes import LoopOverCoordinate, SympyAssignment
 from pystencils.typing import create_type
 from pystencils.transformations import filtered_tree_iteration, get_loop_hierarchy, get_loop_counter_symbol_hierarchy
@@ -10,8 +12,8 @@ def test_loop_information():
     update_rule = ps.Assignment(g[0, 0], f[0, 0])
 
     ast = ps.create_kernel(update_rule)
-    inner_loops = [l for l in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)
-                   if l.is_innermost_loop]
+    inner_loops = [loop for loop in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)
+                   if loop.is_innermost_loop]
 
     loop_order = []
     for i in get_loop_hierarchy(inner_loops[0].args[0]):
@@ -23,3 +25,58 @@ def test_loop_information():
 
     assert loop_symbols == [TypedSymbol("ctr_1", create_type("int"), nonnegative=True),
                             TypedSymbol("ctr_0", create_type("int"), nonnegative=True)]
+
+
+def test_split_optimisation():
+    src, dst = fields(f"src(9), dst(9): [2D]", layout='fzyx')
+
+    stencil = ((0, 0), (0, 1), (0, -1), (-1, 0), (1, 0), (-1, 1), (1, 1), (-1, -1), (1, -1))
+    w = [sp.Rational(4, 9)]
+    w += [sp.Rational(1, 9)] * 4
+    w += [sp.Rational(1, 36)] * 4
+    cs = sp.Rational(1, 3)
+
+    subexpressions = []
+    main_assignements = []
+
+    rho = sp.symbols("rho")
+    velo = sp.symbols("u_:2")
+
+    density = 0
+    velocity_x = 0
+    velocity_y = 0
+    for d in stencil:
+        density += src[d]
+        velocity_x += d[0] * src[d]
+        velocity_y += d[1] * src[d]
+
+    subexpressions.append(ps.Assignment(rho, density))
+    subexpressions.append(ps.Assignment(velo[0], velocity_x))
+    subexpressions.append(ps.Assignment(velo[1], velocity_y))
+
+    for i, d in enumerate(stencil):
+        u_d = velo[0] * d[0] + velo[1] * d[1]
+        u_2 = velo[0] * velo[0] + velo[1] * velo[1]
+
+        expr = w[i] * rho * (1 + u_d / cs + u_d ** 2 / (2 * cs ** 2) - u_2 / (2 * cs))
+
+        main_assignements.append(ps.Assignment(dst.center_vector[i], expr))
+
+    ac = ps.AssignmentCollection(main_assignments=main_assignements, subexpressions=subexpressions)
+
+    simplification_hint = {'density': rho,
+                           'velocity': (velo[0], velo[1]),
+                           'split_groups': [[rho, velo[0], velo[1], dst.center_vector[0]],
+                                            [dst.center_vector[1], dst.center_vector[2]],
+                                            [dst.center_vector[3], dst.center_vector[4]],
+                                            [dst.center_vector[5], dst.center_vector[6]],
+                                            [dst.center_vector[7], dst.center_vector[8]]]}
+
+    ac.simplification_hints = simplification_hint
+    ast = ps.create_kernel(ac)
+
+    code = ps.get_code_str(ast)
+    # after the split optimisation the two for loops are split into 6
+    assert code.count("for") == 6
+
+    print(code)
-- 
GitLab


From 49150419560cc096574414bef3af9dc465654f53 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 23 May 2022 11:41:12 +0200
Subject: [PATCH 2/7] Added non evaluation

---
 pystencils/cpu/vectorization.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index b3236a3c5..78b6aac0f 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -3,13 +3,13 @@ from typing import Container, Union
 
 import numpy as np
 import sympy as sp
+from sympy import evaluate
 from sympy.logic.boolalg import BooleanFunction, BooleanAtom
 
 import pystencils.astnodes as ast
 from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
 from pystencils.typing import (BasicType, PointerType, TypedSymbol, VectorType, CastFunc, collate_types,
                                get_type_of_expression, VectorMemoryAccess)
-from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
 from pystencils.functions import DivFunc
 from pystencils.field import Field
 from pystencils.integer_functions import modulo_ceil, modulo_floor
@@ -133,7 +133,6 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
     vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,
                                                 strided, keep_loop_stop, assume_sufficient_line_padding,
                                                 default_float_type)
-    # is in vectorize_inner_loops_and_adapt_load_stores.. insert_vector_casts(kernel_ast, default_float_type)
 
 
 def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontemporal_fields,
@@ -143,8 +142,8 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
     vector_width = ast_node.instruction_set['width']
 
     all_loops = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment)
-    inner_loops = [n for n in all_loops if n.is_innermost_loop]
-    zero_loop_counters = {l.loop_counter_symbol: 0 for l in all_loops}
+    inner_loops = [loop for loop in all_loops if loop.is_innermost_loop]
+    zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops}
 
     for loop_node in inner_loops:
         loop_range = loop_node.stop - loop_node.start
@@ -158,7 +157,9 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
             loop_node.stop = new_stop
         else:
             cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start
-            loop_nodes = [l for l in cut_loop(loop_node, [cutting_point]).args if isinstance(l, ast.LoopOverCoordinate)]
+            with evaluate(False):
+                loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
+                              if isinstance(loop, ast.LoopOverCoordinate)]
             assert len(loop_nodes) in (0, 1, 2)  # 2 for main and tail loop, 1 if loop range divisible by vector width
             if len(loop_nodes) == 0:
                 continue
@@ -179,8 +180,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
                     successful = False
                     break
                 typed_symbol = base.label
-                assert type(typed_symbol.dtype) is PointerType, \
-                    f"Type of access is {typed_symbol.dtype}, {indexed}"
+                assert type(typed_symbol.dtype) is PointerType, f"Type of access is {typed_symbol.dtype}, {indexed}"
 
                 vec_type = VectorType(typed_symbol.dtype.base_type, vector_width)
                 use_aligned_access = aligned_access and assume_aligned
@@ -202,7 +202,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
             continue
 
         loop_node.step = vector_width
-        loop_node.subs(substitutions)
+        loop_node.fast_subs(substitutions)
         vector_int_width = ast_node.instruction_set['intwidth']
         arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width))
         arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)),
@@ -254,8 +254,7 @@ def mask_conditionals(loop_body):
 def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
     """Inserts necessary casts from scalar values to vector values."""
 
-    handled_functions = (sp.Add, sp.Mul, fast_division, fast_sqrt, fast_inv_sqrt, vec_any, vec_all, DivFunc,
-                         sp.UnevaluatedExpr, sp.Abs)
+    handled_functions = (sp.Add, sp.Mul, vec_any, vec_all, DivFunc, sp.UnevaluatedExpr, sp.Abs)
 
     def visit_expr(expr, default_type='double'):  # TODO Vectorization Revamp: get rid of default_type
         if isinstance(expr, VectorMemoryAccess):
@@ -286,7 +285,7 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
                 if dtype is not int:
                     if dtype is np.float32:
                         default_type = 'float'
-                    expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:])
+                    expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:], evaluate=False)
             new_args = [visit_expr(a, default_type) for a in expr.args]
             arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
             if not any(type(t) is VectorType for t in arg_types):
@@ -296,7 +295,10 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
                 casted_args = [
                     CastFunc(a, target_type) if t != target_type and not isinstance(a, VectorMemoryAccess) else a
                     for a, t in zip(new_args, arg_types)]
-                return expr.func(*casted_args)
+                if isinstance(expr.func, (sp.Mul, sp.Add)):
+                    return expr.func(*casted_args, evaluate=False)
+                else:
+                    return expr.func(*casted_args)
         elif expr.func is sp.Pow:
             new_arg = visit_expr(expr.args[0], default_type)
             return expr.func(new_arg, expr.args[1])
-- 
GitLab


From 1fcf409f479c6476ee22c71b97fc38c73132f70e Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 23 May 2022 11:46:53 +0200
Subject: [PATCH 3/7] Removed fast approximations

---
 pystencils_tests/test_vectorization.py | 31 --------------------------
 1 file changed, 31 deletions(-)

diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py
index 19f266b12..f526341ec 100644
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -8,7 +8,6 @@ import sympy as sp
 import pystencils as ps
 from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
 from pystencils.cpu.vectorization import vectorize
-from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
 from pystencils.enums import Target
 from pystencils.transformations import replace_inner_stride_with_one
 
@@ -19,7 +18,6 @@ else:
     instruction_set = None
 
 
-
 # TODO: Skip tests if no instruction set is available and check all codes if they are really vectorised !
 def test_vector_type_propagation(instruction_set=instruction_set):
     a, b, c, d, e = sp.symbols("a b c d e")
@@ -276,35 +274,6 @@ def test_vectorised_pow(instruction_set=instruction_set):
     ast.compile()
 
 
-def test_vectorised_fast_approximations(instruction_set=instruction_set):
-    # fast_approximations are a gpu thing
-    arr = np.zeros((24, 24))
-    f, g = ps.fields(f=arr, g=arr)
-
-    expr = sp.sqrt(f[0, 0] + f[1, 0])
-    assignment = ps.Assignment(g[0, 0], insert_fast_sqrts(expr))
-    ast = ps.create_kernel(assignment)
-    vectorize(ast, instruction_set=instruction_set)
-
-    with pytest.raises(Exception):
-        ast.compile()
-
-    expr = f[0, 0] / f[1, 0]
-    assignment = ps.Assignment(g[0, 0], insert_fast_divisions(expr))
-    ast = ps.create_kernel(assignment)
-    vectorize(ast, instruction_set=instruction_set)
-
-    with pytest.raises(Exception):
-        ast.compile()
-
-    assignment = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
-    ast = ps.create_kernel(insert_fast_sqrts(assignment))
-    vectorize(ast, instruction_set=instruction_set)
-
-    with pytest.raises(Exception):
-        ast.compile()
-
-
 def test_issue40(*_):
     """https://i10git.cs.fau.de/pycodegen/pystencils/-/issues/40"""
     opt = {'instruction_set': "avx512", 'assume_aligned': False,
-- 
GitLab


From 4da1ce82b245b328f921aec43f04b4729a74b99a Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 23 May 2022 11:55:51 +0200
Subject: [PATCH 4/7] Try

---
 pystencils/cpu/vectorization.py                 | 5 ++---
 pystencils_tests/test_vectorization_specific.py | 3 ++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index 78b6aac0f..63552bbc7 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -157,9 +157,8 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
             loop_node.stop = new_stop
         else:
             cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start
-            with evaluate(False):
-                loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
-                              if isinstance(loop, ast.LoopOverCoordinate)]
+            loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
+                          if isinstance(loop, ast.LoopOverCoordinate)]
             assert len(loop_nodes) in (0, 1, 2)  # 2 for main and tail loop, 1 if loop range divisible by vector width
             if len(loop_nodes) == 0:
                 continue
diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py
index 367250dda..49152a420 100644
--- a/pystencils_tests/test_vectorization_specific.py
+++ b/pystencils_tests/test_vectorization_specific.py
@@ -120,7 +120,8 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set
     update_rule = ps.Assignment(dst[0, 0], src[0, 0])
     opt = {'instruction_set': instruction_set, 'assume_aligned': True,
            'nontemporal': True, 'assume_inner_stride_one': True}
-    config = pystencils.config.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, ghost_layers=gl_kernel)
+    config = pystencils.config.CreateKernelConfig(target=dh.default_target,
+                                                  cpu_vectorize_info=opt, ghost_layers=gl_kernel)
     ast = ps.create_kernel(update_rule, config=config)
     kernel = ast.compile()
     if gl_kernel != gl_field:
-- 
GitLab


From 10b2337304df76b020b666748af6e8bd149a5eff Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 23 May 2022 12:05:33 +0200
Subject: [PATCH 5/7] Testing

---
 pystencils/cpu/vectorization.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index 63552bbc7..1daf1beee 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -157,8 +157,9 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
             loop_node.stop = new_stop
         else:
             cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start
-            loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
-                          if isinstance(loop, ast.LoopOverCoordinate)]
+            with evaluate(False):
+                loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
+                              if isinstance(loop, ast.LoopOverCoordinate)]
             assert len(loop_nodes) in (0, 1, 2)  # 2 for main and tail loop, 1 if loop range divisible by vector width
             if len(loop_nodes) == 0:
                 continue
@@ -201,7 +202,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
             continue
 
         loop_node.step = vector_width
-        loop_node.fast_subs(substitutions)
+        loop_node.subs(substitutions)
         vector_int_width = ast_node.instruction_set['intwidth']
         arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width))
         arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)),
@@ -284,7 +285,7 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
                 if dtype is not int:
                     if dtype is np.float32:
                         default_type = 'float'
-                    expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:], evaluate=False)
+                    expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:])
             new_args = [visit_expr(a, default_type) for a in expr.args]
             arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
             if not any(type(t) is VectorType for t in arg_types):
@@ -294,10 +295,7 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
                 casted_args = [
                     CastFunc(a, target_type) if t != target_type and not isinstance(a, VectorMemoryAccess) else a
                     for a, t in zip(new_args, arg_types)]
-                if isinstance(expr.func, (sp.Mul, sp.Add)):
-                    return expr.func(*casted_args, evaluate=False)
-                else:
-                    return expr.func(*casted_args)
+                return expr.func(*casted_args)
         elif expr.func is sp.Pow:
             new_arg = visit_expr(expr.args[0], default_type)
             return expr.func(new_arg, expr.args[1])
-- 
GitLab


From 3e5566949e510289c50d4bbf939fbd76584cbff7 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 23 May 2022 14:03:10 +0200
Subject: [PATCH 6/7] Testing

---
 pystencils/backends/cbackend.py | 10 ++++++++--
 pystencils/cpu/vectorization.py |  5 ++---
 pystencils/transformations.py   |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 499a80498..de9cb0d31 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -768,7 +768,10 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
         return processed
 
     def _print_Pow(self, expr):
-        result = self._scalarFallback('_print_Pow', expr)
+        try:
+            result = self._scalarFallback('_print_Pow', expr)
+        except ValueError:
+            result = None
         if result:
             return result
 
@@ -800,7 +803,10 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
         # noinspection PyProtectedMember
         from sympy.core.mul import _keep_coeff
 
-        result = self._scalarFallback('_print_Mul', expr)
+        if not inside_add:
+            result = self._scalarFallback('_print_Mul', expr)
+        else:
+            result = None
         if result:
             return result
 
diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index 1daf1beee..d733822ca 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -157,9 +157,8 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
             loop_node.stop = new_stop
         else:
             cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start
-            with evaluate(False):
-                loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
-                              if isinstance(loop, ast.LoopOverCoordinate)]
+            loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
+                          if isinstance(loop, ast.LoopOverCoordinate)]
             assert len(loop_nodes) in (0, 1, 2)  # 2 for main and tail loop, 1 if loop range divisible by vector width
             if len(loop_nodes) == 0:
                 continue
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index 06005cbf7..cce06c2c8 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -2,7 +2,7 @@ import hashlib
 import pickle
 import warnings
 from collections import OrderedDict
-from copy import deepcopy
+from copy import deepcopy, copy
 from types import MappingProxyType
 
 import sympy as sp
-- 
GitLab


From 5e0075341d83c66547f9d79206109c6a0fd33673 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 23 May 2022 14:07:11 +0200
Subject: [PATCH 7/7] Fix flake8

---
 pystencils/cpu/vectorization.py | 1 -
 pystencils/transformations.py   | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index d733822ca..3141eb400 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -3,7 +3,6 @@ from typing import Container, Union
 
 import numpy as np
 import sympy as sp
-from sympy import evaluate
 from sympy.logic.boolalg import BooleanFunction, BooleanAtom
 
 import pystencils.astnodes as ast
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index cce06c2c8..06005cbf7 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -2,7 +2,7 @@ import hashlib
 import pickle
 import warnings
 from collections import OrderedDict
-from copy import deepcopy, copy
+from copy import deepcopy
 from types import MappingProxyType
 
 import sympy as sp
-- 
GitLab