Merge branch 'PinnedMem' into 'master'

Implement Pinned GPU memory See merge request !331

Merge branch 'PinnedMem' into 'master'
Implement Pinned GPU memory See merge request !331
e3f6d8da · Michael Kuron · 9d897d24 · 08ed41d1 · e3f6d8da · e3f6d8da
Commit e3f6d8da authored 1 year ago by Michael Kuron
--- a/pystencils/datahandling/serial_datahandling.py
+++ b/pystencils/datahandling/serial_datahandling.py
@@ -127,8 +127,12 @@ class SerialDataHandling(DataHandling):

        # cpu_arr is always created - since there is no create_gpu_array_with_layout()
        byte_offset = ghost_layers * np.dtype(dtype).itemsize
-        cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
-                                                 byte_offset=byte_offset, **kwargs)
+
+        if gpu:
+            cpu_arr = self.array_handler.pinned_numpy_array(shape=kwargs['shape'], layout=layout_tuple, dtype=dtype)
+        else:
+            cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
+                                                     byte_offset=byte_offset, **kwargs)

        if alignment and gpu:
            raise NotImplementedError("Alignment for GPU fields not supported")

--- a/pystencils/gpu/gpu_array_handler.py
+++ b/pystencils/gpu/gpu_array_handler.py
 try:
    import cupy as cp
+    import cupyx as cpx
 except ImportError:
    cp = None
+    cpx = None

 import numpy as np
 import pystencils
@@ -25,27 +27,75 @@ class GPUArrayHandler:

    @staticmethod
    def to_gpu(numpy_array):
-        with cp.cuda.Device(pystencils.GPU_DEVICE):
+        swaps = _get_index_swaps(numpy_array)
+        if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
+            with cp.cuda.Device(pystencils.GPU_DEVICE):
+                gpu_array = cp.asarray(numpy_array.base)
+            for a, b in reversed(swaps):
+                gpu_array = gpu_array.swapaxes(a, b)
+            return gpu_array
+        else:
            return cp.asarray(numpy_array)

    @staticmethod
    def upload(array, numpy_array):
-        with cp.cuda.Device(pystencils.GPU_DEVICE):
-            array.set(numpy_array)
+        if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
+            with cp.cuda.Device(pystencils.GPU_DEVICE):
+                array.base.set(numpy_array.base)
+        else:
+            with cp.cuda.Device(pystencils.GPU_DEVICE):
+                array.set(numpy_array)

    @staticmethod
    def download(array, numpy_array):
-        with cp.cuda.Device(pystencils.GPU_DEVICE):
-            numpy_array[:] = array.get()
+        if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
+            with cp.cuda.Device(pystencils.GPU_DEVICE):
+                numpy_array.base[:] = array.base.get()
+        else:
+            with cp.cuda.Device(pystencils.GPU_DEVICE):
+                numpy_array[:] = array.get()

    @staticmethod
    def randn(shape, dtype=np.float64):
        with cp.cuda.Device(pystencils.GPU_DEVICE):
            return cp.random.randn(*shape, dtype=dtype)

+    @staticmethod
+    def pinned_numpy_array(layout, shape, dtype):
+        assert set(layout) == set(range(len(shape))), "Wrong layout descriptor"
+        cur_layout = list(range(len(shape)))
+        swaps = []
+        for i in range(len(layout)):
+            if cur_layout[i] != layout[i]:
+                index_to_swap_with = cur_layout.index(layout[i])
+                swaps.append((i, index_to_swap_with))
+                cur_layout[i], cur_layout[index_to_swap_with] = cur_layout[index_to_swap_with], cur_layout[i]
+        assert tuple(cur_layout) == tuple(layout)
+
+        shape = list(shape)
+        for a, b in swaps:
+            shape[a], shape[b] = shape[b], shape[a]
+
+        res = cpx.empty_pinned(tuple(shape), order='c', dtype=dtype)
+
+        for a, b in reversed(swaps):
+            res = res.swapaxes(a, b)
+        return res
+
    from_numpy = to_gpu


 class GPUNotAvailableHandler:
    def __getattribute__(self, name):
        raise NotImplementedError("Unable to utilise cupy! Please make sure cupy works correctly in your setup!")
+
+
+def _get_index_swaps(array):
+    swaps = []
+    if array.base is not None and isinstance(array.base, np.ndarray):
+        for stride in array.base.strides:
+            index_base = array.base.strides.index(stride)
+            index_view = array.strides.index(stride)
+            if index_base != index_view and (index_view, index_base) not in swaps:
+                swaps.append((index_base, index_view))
+    return swaps
--- a/pystencils_tests/test_datahandling.py
+++ b/pystencils_tests/test_datahandling.py
@@ -251,6 +251,20 @@ def test_add_arrays():
    assert y == dh.fields['y']


+@pytest.mark.parametrize('shape', [(17, 12), (7, 11, 18)])
+@pytest.mark.parametrize('layout', ['zyxf', 'fzyx'])
+def test_add_arrays_with_layout(shape, layout):
+    pytest.importorskip('cupy')
+
+    dh = create_data_handling(domain_size=shape, default_layout=layout, default_target=ps.Target.GPU)
+    f1 = dh.add_array("f1", values_per_cell=19)
+    dh.fill(f1.name, 1.0)
+
+    assert dh.cpu_arrays[f1.name].shape == dh.gpu_arrays[f1.name].shape
+    assert dh.cpu_arrays[f1.name].strides == dh.gpu_arrays[f1.name].strides
+    assert dh.cpu_arrays[f1.name].dtype == dh.gpu_arrays[f1.name].dtype
+
+
 def test_get_kwarg():
    domain_shape = (10, 10)
    field_description = 'src, dst'
@@ -373,3 +387,18 @@ def test_array_handler():

    random_array = array_handler.randn(size)

+    cpu_array = np.empty((20, 40), dtype=np.float64)
+    gpu_array = array_handler.to_gpu(cpu_array)
+
+    assert cpu_array.base is None
+    assert gpu_array.base is None
+    assert gpu_array.strides == cpu_array.strides
+
+    cpu_array2 = np.empty((20, 40), dtype=np.float64)
+    cpu_array2 = cpu_array2.swapaxes(0, 1)
+    gpu_array2 = array_handler.to_gpu(cpu_array2)
+
+    assert cpu_array2.base is not None
+    assert gpu_array2.base is not None
+    assert gpu_array2.strides == cpu_array2.strides
+