From b1c5e21fb5dc9eb10f18abbf66153dc17ee9c5b5 Mon Sep 17 00:00:00 2001 From: Michael Kuron <mkuron@icp.uni-stuttgart.de> Date: Thu, 6 Apr 2023 08:27:06 +0200 Subject: [PATCH] Remove support for non-power-of-2 SVE vector widths --- .gitlab-ci.yml | 2 +- pystencils/backends/arm_instruction_sets.py | 6 ++---- pystencils/backends/simd_instruction_sets.py | 14 +++++--------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e50894bd6..f333e761d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -175,7 +175,7 @@ arm64v9: extends: .multiarch_template image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64 variables: - PYSTENCILS_SIMD: "sve256,sve512,sve" + PYSTENCILS_SIMD: "sve128,sve256,sve512,sve" before_script: - *multiarch_before_script - sed -i s/march=native/march=armv8-a+sve/g ~/.config/pystencils/config.json diff --git a/pystencils/backends/arm_instruction_sets.py b/pystencils/backends/arm_instruction_sets.py index 73ea7eb44..9aa8f6c0a 100644 --- a/pystencils/backends/arm_instruction_sets.py +++ b/pystencils/backends/arm_instruction_sets.py @@ -151,9 +151,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): result['any'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) > 0' result['all'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) == 16*0xff' - if instruction_set == 'sve' or bitwidth & (bitwidth - 1) == 0: - # only power-of-2 vector sizes will evenly divide a cacheline - result['cachelineSize'] = 'cachelineSize()' - result['cachelineZero'] = 'cachelineZero((void*) {0})' + result['cachelineSize'] = 'cachelineSize()' + result['cachelineZero'] = 'cachelineZero((void*) {0})' return result diff --git a/pystencils/backends/simd_instruction_sets.py b/pystencils/backends/simd_instruction_sets.py index cdb2ee5cf..7d0d028c0 100644 --- a/pystencils/backends/simd_instruction_sets.py +++ b/pystencils/backends/simd_instruction_sets.py @@ -1,4 +1,3 @@ -import math import os import platform from ctypes import CDLL @@ -86,15 +85,12 @@ def get_supported_instruction_sets(): if flags.issuperset(required_sve_flags): if platform.system() == 'Linux': libc = CDLL('libc.so.6') - native_length = 8 * libc.prctl(51, 0, 0, 0, 0) # PR_SVE_GET_VL - if native_length < 0: + length = 8 * libc.prctl(51, 0, 0, 0, 0) # PR_SVE_GET_VL + if length < 0: raise OSError("SVE length query failed") - pwr2_length = int(2**math.floor(math.log2(native_length))) - if pwr2_length % 256 == 0: - result.append(f"sve{pwr2_length//2}") - if native_length != pwr2_length: - result.append(f"sve{pwr2_length}") - result.append(f"sve{native_length}") + while length > 128: + result.append(f"sve{length}") + length //= 2 result.append("sve") return result -- GitLab