From b1c5e21fb5dc9eb10f18abbf66153dc17ee9c5b5 Mon Sep 17 00:00:00 2001
From: Michael Kuron <mkuron@icp.uni-stuttgart.de>
Date: Thu, 6 Apr 2023 08:27:06 +0200
Subject: [PATCH] Remove support for non-power-of-2 SVE vector widths

---
 .gitlab-ci.yml                               |  2 +-
 pystencils/backends/arm_instruction_sets.py  |  6 ++----
 pystencils/backends/simd_instruction_sets.py | 14 +++++---------
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e50894bd6..f333e761d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -175,7 +175,7 @@ arm64v9:
   extends: .multiarch_template
   image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
   variables:
-    PYSTENCILS_SIMD: "sve256,sve512,sve"
+    PYSTENCILS_SIMD: "sve128,sve256,sve512,sve"
   before_script:
     - *multiarch_before_script
     - sed -i s/march=native/march=armv8-a+sve/g ~/.config/pystencils/config.json
diff --git a/pystencils/backends/arm_instruction_sets.py b/pystencils/backends/arm_instruction_sets.py
index 73ea7eb44..9aa8f6c0a 100644
--- a/pystencils/backends/arm_instruction_sets.py
+++ b/pystencils/backends/arm_instruction_sets.py
@@ -151,9 +151,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
         result['any'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) > 0'
         result['all'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) == 16*0xff'
 
-    if instruction_set == 'sve' or bitwidth & (bitwidth - 1) == 0:
-        # only power-of-2 vector sizes will evenly divide a cacheline
-        result['cachelineSize'] = 'cachelineSize()'
-        result['cachelineZero'] = 'cachelineZero((void*) {0})'
+    result['cachelineSize'] = 'cachelineSize()'
+    result['cachelineZero'] = 'cachelineZero((void*) {0})'
 
     return result
diff --git a/pystencils/backends/simd_instruction_sets.py b/pystencils/backends/simd_instruction_sets.py
index cdb2ee5cf..7d0d028c0 100644
--- a/pystencils/backends/simd_instruction_sets.py
+++ b/pystencils/backends/simd_instruction_sets.py
@@ -1,4 +1,3 @@
-import math
 import os
 import platform
 from ctypes import CDLL
@@ -86,15 +85,12 @@ def get_supported_instruction_sets():
     if flags.issuperset(required_sve_flags):
         if platform.system() == 'Linux':
             libc = CDLL('libc.so.6')
-            native_length = 8 * libc.prctl(51, 0, 0, 0, 0)  # PR_SVE_GET_VL
-            if native_length < 0:
+            length = 8 * libc.prctl(51, 0, 0, 0, 0)  # PR_SVE_GET_VL
+            if length < 0:
                 raise OSError("SVE length query failed")
-            pwr2_length = int(2**math.floor(math.log2(native_length)))
-            if pwr2_length % 256 == 0:
-                result.append(f"sve{pwr2_length//2}")
-            if native_length != pwr2_length:
-                result.append(f"sve{pwr2_length}")
-            result.append(f"sve{native_length}")
+            while length > 128:
+                result.append(f"sve{length}")
+                length //= 2
         result.append("sve")
     return result
 
-- 
GitLab