diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py new file mode 100644 index 00000000..f7f99e33 --- /dev/null +++ b/sparse/mlir_backend/_common.py @@ -0,0 +1,14 @@ +import abc +import functools + +from mlir import ir + + +class MlirType(abc.ABC): + @classmethod + @abc.abstractmethod + def get_mlir_type(cls) -> ir.Type: ... + + +def fn_cache(f, maxsize: int | None = None): + return functools.wraps(f)(functools.lru_cache(maxsize=maxsize)(f)) diff --git a/sparse/mlir_backend/_constructors.py b/sparse/mlir_backend/_constructors.py index fb26e4f6..502938a7 100644 --- a/sparse/mlir_backend/_constructors.py +++ b/sparse/mlir_backend/_constructors.py @@ -1,5 +1,7 @@ import ctypes import ctypes.util +import functools +import weakref import mlir.execution_engine import mlir.passmanager @@ -9,9 +11,26 @@ import numpy as np import scipy.sparse as sps -from ._core import DEBUG, MLIR_C_RUNNER_UTILS, SCRIPT_PATH, ctx -from ._dtypes import DType, Float64, Index -from ._memref import MemrefF64_1D, MemrefIdx_1D +from ._common import fn_cache +from ._core import CWD, DEBUG, MLIR_C_RUNNER_UTILS, ctx +from ._dtypes import DType, Index, asdtype +from ._memref import make_memref_ctype, ranked_memref_from_np + + +def _hold_self_ref_in_ret(fn): + @functools.wraps(fn) + def wrapped(self, *a, **kw): + ptr = ctypes.py_object(self) + ctypes.pythonapi.Py_IncRef(ptr) + ret = fn(self, *a, **kw) + + def finalizer(ptr): + ctypes.pythonapi.Py_DecRef(ptr) + + weakref.finalize(ret, finalizer, ptr) + return ret + + return wrapped class Tensor: @@ -26,21 +45,21 @@ def __init__(self, obj, module, tensor_type, disassemble_fn, values_dtype, index def __del__(self): self.module.invoke("free_tensor", ctypes.pointer(self.obj)) + @_hold_self_ref_in_ret def to_scipy_sparse(self): """ Returns scipy.sparse or ndarray """ - return self.disassemble_fn(self.module, self.obj) + return self.disassemble_fn(self.module, self.obj, self.values_dtype) class DenseFormat: - modules = {} - + @fn_cache def get_module(shape: tuple[int], values_dtype: DType, index_dtype: DType): with ir.Location.unknown(ctx): module = ir.Module.create() - values_dtype = values_dtype.get() - index_dtype = index_dtype.get() + values_dtype = values_dtype.get_mlir_type() + index_dtype = index_dtype.get_mlir_type() index_width = getattr(index_dtype, "width", 0) levels = (sparse_tensor.LevelType.dense, sparse_tensor.LevelType.dense) ordering = ir.AffineMap.get_permutation([0, 1]) @@ -78,18 +97,19 @@ def free_tensor(tensor_shaped): disassemble.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() free_tensor.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() if DEBUG: - (SCRIPT_PATH / "dense_module.mlir").write_text(str(module)) + (CWD / "dense_module.mlir").write_text(str(module)) pm = mlir.passmanager.PassManager.parse("builtin.module(sparsifier{create-sparse-deallocs=1})") pm.run(module.operation) if DEBUG: - (SCRIPT_PATH / "dense_module_opt.mlir").write_text(str(module)) + (CWD / "dense_module_opt.mlir").write_text(str(module)) module = mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS]) return (module, dense_shaped) @classmethod def assemble(cls, module, arr: np.ndarray) -> ctypes.c_void_p: - data = MemrefF64_1D.from_numpy(arr.flatten()) + assert arr.ndim == 2 + data = ranked_memref_from_np(arr.flatten()) out = ctypes.c_void_p() module.invoke( "assemble", @@ -99,10 +119,10 @@ def assemble(cls, module, arr: np.ndarray) -> ctypes.c_void_p: return out @classmethod - def disassemble(cls, module: ir.Module, ptr: ctypes.c_void_p) -> np.ndarray: + def disassemble(cls, module: ir.Module, ptr: ctypes.c_void_p, dtype: type[DType]) -> np.ndarray: class Dense(ctypes.Structure): _fields_ = [ - ("data", MemrefF64_1D), + ("data", make_memref_ctype(dtype, 1)), ("data_len", np.ctypeslib.c_intp), ("shape_x", np.ctypeslib.c_intp), ("shape_y", np.ctypeslib.c_intp), @@ -110,7 +130,7 @@ class Dense(ctypes.Structure): def to_np(self) -> np.ndarray: data = self.data.to_numpy()[: self.data_len] - return data.copy().reshape((self.shape_x, self.shape_y)) + return data.reshape((self.shape_x, self.shape_y)) arr = Dense() module.invoke( @@ -122,18 +142,17 @@ def to_np(self) -> np.ndarray: class COOFormat: - modules = {} # TODO: implement + ... class CSRFormat: - modules = {} - - def get_module(shape: tuple[int], values_dtype: DType, index_dtype: DType): + @fn_cache + def get_module(shape: tuple[int], values_dtype: type[DType], index_dtype: type[DType]): with ir.Location.unknown(ctx): module = ir.Module.create() - values_dtype = values_dtype.get() - index_dtype = index_dtype.get() + values_dtype = values_dtype.get_mlir_type() + index_dtype = index_dtype.get_mlir_type() index_width = getattr(index_dtype, "width", 0) levels = (sparse_tensor.LevelType.dense, sparse_tensor.LevelType.compressed) ordering = ir.AffineMap.get_permutation([0, 1]) @@ -175,11 +194,11 @@ def free_tensor(tensor_shaped): disassemble.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() free_tensor.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() if DEBUG: - (SCRIPT_PATH / "scr_module.mlir").write_text(str(module)) + (CWD / "csr_module.mlir").write_text(str(module)) pm = mlir.passmanager.PassManager.parse("builtin.module(sparsifier{create-sparse-deallocs=1})") pm.run(module.operation) if DEBUG: - (SCRIPT_PATH / "csr_module_opt.mlir").write_text(str(module)) + (CWD / "csr_module_opt.mlir").write_text(str(module)) module = mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS]) return (module, csr_shaped) @@ -189,20 +208,20 @@ def assemble(cls, module: ir.Module, arr: sps.csr_array) -> ctypes.c_void_p: out = ctypes.c_void_p() module.invoke( "assemble", - ctypes.pointer(ctypes.pointer(MemrefIdx_1D.from_numpy(arr.indptr))), - ctypes.pointer(ctypes.pointer(MemrefIdx_1D.from_numpy(arr.indices))), - ctypes.pointer(ctypes.pointer(MemrefF64_1D.from_numpy(arr.data))), + ctypes.pointer(ctypes.pointer(ranked_memref_from_np(arr.indptr))), + ctypes.pointer(ctypes.pointer(ranked_memref_from_np(arr.indices))), + ctypes.pointer(ctypes.pointer(ranked_memref_from_np(arr.data))), ctypes.pointer(out), ) return out @classmethod - def disassemble(cls, module: ir.Module, ptr: ctypes.c_void_p) -> sps.csr_array: + def disassemble(cls, module: ir.Module, ptr: ctypes.c_void_p, dtype: type[DType]) -> sps.csr_array: class Csr(ctypes.Structure): _fields_ = [ - ("data", MemrefF64_1D), - ("pos", MemrefIdx_1D), - ("crd", MemrefIdx_1D), + ("data", make_memref_ctype(dtype, 1)), + ("pos", make_memref_ctype(Index, 1)), + ("crd", make_memref_ctype(Index, 1)), ("data_len", np.ctypeslib.c_intp), ("pos_len", np.ctypeslib.c_intp), ("crd_len", np.ctypeslib.c_intp), @@ -214,7 +233,7 @@ def to_sps(self) -> sps.csr_array: pos = self.pos.to_numpy()[: self.pos_len] crd = self.crd.to_numpy()[: self.crd_len] data = self.data.to_numpy()[: self.data_len] - return sps.csr_array((data.copy(), crd.copy(), pos.copy()), shape=(self.shape_x, self.shape_y)) + return sps.csr_array((data, crd, pos), shape=(self.shape_x, self.shape_y)) arr = Csr() module.invoke( @@ -235,23 +254,21 @@ def _is_numpy_obj(x) -> bool: def asarray(obj) -> Tensor: # TODO: discover obj's dtype - values_dtype = Float64 - index_dtype = Index + values_dtype = asdtype(obj.dtype) # TODO: support other scipy formats if _is_scipy_sparse_obj(obj): format_class = CSRFormat + # This can be int32 or int64 + index_dtype = asdtype(obj.indptr.dtype) elif _is_numpy_obj(obj): format_class = DenseFormat + index_dtype = Index else: raise Exception(f"{type(obj)} not supported.") # TODO: support proper caching - if hash(obj.shape) in format_class.modules: - module, tensor_type = format_class.modules[hash(obj.shape)] - else: - module, tensor_type = format_class.get_module(obj.shape, values_dtype, index_dtype) - format_class.modules[hash(obj.shape)] = module, tensor_type + module, tensor_type = format_class.get_module(obj.shape, values_dtype, index_dtype) assembled_obj = format_class.assemble(module, obj) return Tensor(assembled_obj, module, tensor_type, format_class.disassemble, values_dtype, index_dtype) diff --git a/sparse/mlir_backend/_core.py b/sparse/mlir_backend/_core.py index 9af10fb3..330a492f 100644 --- a/sparse/mlir_backend/_core.py +++ b/sparse/mlir_backend/_core.py @@ -5,7 +5,7 @@ from mlir.ir import Context DEBUG = bool(int(os.environ.get("DEBUG", "0"))) -SCRIPT_PATH = pathlib.Path(__file__).parent +CWD = pathlib.Path(".") MLIR_C_RUNNER_UTILS = ctypes.util.find_library("mlir_c_runner_utils") libc = ctypes.CDLL(ctypes.util.find_library("c")) if os.name != "nt" else ctypes.cdll.msvcrt diff --git a/sparse/mlir_backend/_dtypes.py b/sparse/mlir_backend/_dtypes.py index 72e6bac5..5ee5a461 100644 --- a/sparse/mlir_backend/_dtypes.py +++ b/sparse/mlir_backend/_dtypes.py @@ -1,71 +1,119 @@ +import inspect +import math +import sys +import typing + from mlir import ir import numpy as np +from ._common import MlirType + + +def _get_pointer_width() -> int: + return round(math.log2(sys.maxsize + 1.0)) + 1 + + +_PTR_WIDTH = _get_pointer_width() + + +def _make_int_classes(namespace: dict[str, object], bit_widths: typing.Iterable[int]) -> None: + for bw in bit_widths: + + class SignedBW(SignedIntegerDType): + np_dtype = getattr(np, f"int{bw}") + bit_width = bw + + @classmethod + def get_mlir_type(cls): + return ir.IntegerType.get_signless(cls.bit_width) + + SignedBW.__name__ = f"Int{bw}" + SignedBW.__module__ = __name__ + + class UnsignedBW(UnsignedIntegerDType): + np_dtype = getattr(np, f"uint{bw}") + bit_width = bw + + @classmethod + def get_mlir_type(cls): + return ir.IntegerType.get_signless(cls.bit_width) + + UnsignedBW.__name__ = f"UInt{bw}" + UnsignedBW.__module__ = __name__ + + namespace[SignedBW.__name__] = SignedBW + namespace[UnsignedBW.__name__] = UnsignedBW -class DType: - pass +class DType(MlirType): + np_dtype: np.dtype + bit_width: int -class Float64(DType): + +class FloatingDType(DType): ... + + +class Float64(FloatingDType): np_dtype = np.float64 + bit_width = 64 @classmethod - def get(cls): + def get_mlir_type(cls): return ir.F64Type.get() -class Float32(DType): +class Float32(FloatingDType): np_dtype = np.float32 + bit_width = 32 @classmethod - def get(cls): + def get_mlir_type(cls): return ir.F32Type.get() -class Int64(DType): - np_dtype = np.int64 +class Float16(FloatingDType): + np_dtype = np.float16 + bit_width = 16 @classmethod - def get(cls): - return ir.IntegerType.get_signed(64) + def get_mlir_type(cls): + return ir.F16Type.get() -class UInt64(DType): - np_dtype = np.uint64 +class IntegerDType(DType): ... - @classmethod - def get(cls): - return ir.IntegerType.get_unsigned(64) +class UnsignedIntegerDType(IntegerDType): ... -class Int32(DType): - np_dtype = np.int32 - - @classmethod - def get(cls): - return ir.IntegerType.get_signed(32) +class SignedIntegerDType(IntegerDType): ... -class UInt32(DType): - np_dtype = np.uint32 - @classmethod - def get(cls): - return ir.IntegerType.get_unsigned(32) +_make_int_classes(locals(), [8, 16, 32, 64]) class Index(DType): np_dtype = np.intp @classmethod - def get(cls): + def get_mlir_type(cls): return ir.IndexType.get() -class SignlessInt64(DType): - np_dtype = np.int64 +IntP: type[SignedIntegerDType] = locals()[f"Int{_PTR_WIDTH}"] +UIntP: type[UnsignedIntegerDType] = locals()[f"UInt{_PTR_WIDTH}"] - @classmethod - def get(cls): - return ir.IntegerType.get_signless(64) + +def isdtype(dt, /) -> bool: + return isinstance(dt, type) and issubclass(dt, DType) and not inspect.isabstract(dt) + + +NUMPY_DTYPE_MAP = {np.dtype(dt.np_dtype): dt for dt in locals().values() if isdtype(dt)} + + +def asdtype(dt, /) -> type[DType]: + if isdtype(dt): + return dt + + return NUMPY_DTYPE_MAP[np.dtype(dt)] diff --git a/sparse/mlir_backend/_memref.py b/sparse/mlir_backend/_memref.py index fc38f82a..387f3481 100644 --- a/sparse/mlir_backend/_memref.py +++ b/sparse/mlir_backend/_memref.py @@ -2,8 +2,17 @@ import numpy as np +from ._common import fn_cache +from ._dtypes import DType, asdtype -def make_memref_ctype(dtype: np.dtype, rank: int) -> type[ctypes.Structure]: + +def make_memref_ctype(dtype: type[DType], rank: int) -> type[ctypes.Structure]: + dtype = np.dtype(asdtype(dtype).np_dtype) + return _make_memref_ctype(dtype, rank) + + +@fn_cache +def _make_memref_ctype(dtype: np.dtype, rank: int) -> type[ctypes.Structure]: ctype = np.ctypeslib.as_ctypes_type(dtype) ptr_t = ctypes.POINTER(ctype) @@ -50,10 +59,6 @@ def __hash__(self) -> int: return MemrefType -MemrefF64_1D = make_memref_ctype(np.float64, 1) -MemrefF32_1D = make_memref_ctype(np.float32, 1) -MemrefInt64_1D = make_memref_ctype(np.int64, 1) -MemrefInt32_1D = make_memref_ctype(np.int32, 1) -MemrefUInt64_1D = make_memref_ctype(np.uint64, 1) -MemrefUInt32_1D = make_memref_ctype(np.uint32, 1) -MemrefIdx_1D = make_memref_ctype(np.intp, 1) +def ranked_memref_from_np(arr: np.ndarray) -> ctypes.Structure: + memref_type = _make_memref_ctype(arr.dtype, arr.ndim) + return memref_type.from_numpy(arr) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index e2d3aa07..6d04dc60 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -6,13 +6,16 @@ from mlir.dialects import arith, func, linalg, sparse_tensor, tensor from ._constructors import Tensor -from ._core import DEBUG, MLIR_C_RUNNER_UTILS, SCRIPT_PATH, ctx +from ._core import CWD, DEBUG, MLIR_C_RUNNER_UTILS, ctx +from ._dtypes import DType, FloatingDType -def get_add_module(a_tensor_type, b_tensor_type, out_tensor_type, dtype): +def get_add_module(a_tensor_type, b_tensor_type, out_tensor_type, dtype: type[DType]): with ir.Location.unknown(ctx): module = ir.Module.create() - dtype = dtype.get() + # TODO: add support for complex dialect/dtypes + arith_op = arith.AddFOp if issubclass(dtype, FloatingDType) else arith.AddIOp + dtype = dtype.get_mlir_type() ordering = ir.AffineMap.get_permutation([0, 1]) with ir.InsertionPoint(module.body): @@ -34,7 +37,7 @@ def add(a, b): overlap = res.regions[0].blocks.append(dtype, dtype) with ir.InsertionPoint(overlap): arg0, arg1 = overlap.arguments - overlap_res = arith.AddFOp(arg0, arg1) + overlap_res = arith_op(arg0, arg1) sparse_tensor.YieldOp(result=overlap_res) left_region = res.regions[1].blocks.append(dtype) with ir.InsertionPoint(left_region): @@ -49,12 +52,12 @@ def add(a, b): add.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() if DEBUG: - (SCRIPT_PATH / "add_module.mlir").write_text(str(module)) + (CWD / "add_module.mlir").write_text(str(module)) pm = mlir.passmanager.PassManager.parse("builtin.module(sparsifier{create-sparse-deallocs=1})") pm.run(module.operation) if DEBUG: - (SCRIPT_PATH / "add_module_opt.mlir").write_text(str(module)) + (CWD / "add_module_opt.mlir").write_text(str(module)) return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS]) diff --git a/sparse/mlir_backend/tests/conftest.py b/sparse/mlir_backend/tests/conftest.py index f411f45b..abe9b4e9 100644 --- a/sparse/mlir_backend/tests/conftest.py +++ b/sparse/mlir_backend/tests/conftest.py @@ -4,5 +4,5 @@ @pytest.fixture(scope="module") -def rng(): +def rng() -> np.random.Generator: return np.random.default_rng(42) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 32a865af..3b4cf11b 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -1,3 +1,5 @@ +import typing + import sparse import pytest @@ -8,17 +10,74 @@ if sparse._BACKEND != sparse._BackendType.MLIR: pytest.skip("skipping MLIR tests", allow_module_level=True) +parametrize_dtypes = pytest.mark.parametrize( + "dtype", + [ + np.int8, + np.uint8, + np.int16, + np.uint16, + np.int32, + np.uint32, + np.int64, + np.uint64, + np.float32, + np.float64, + ], +) + def assert_csr_equal(expected: sps.csr_array, actual: sps.csr_array) -> None: np.testing.assert_array_equal(expected.todense(), actual.todense()) + # Broken due to https://github.com/scipy/scipy/issues/21442 + # desired.sort_indices() + # desired.sum_duplicates() + # desired.prune() + + # actual.sort_indices() + # actual.sum_duplicates() + # actual.prune() + + # np.testing.assert_array_equal(desired.todense(), actual.todense()) + + # np.testing.assert_array_equal(desired.indptr, actual.indptr) + # np.testing.assert_array_equal(desired.indices, actual.indices) + # np.testing.assert_array_equal(desired.data, actual.data) + + +def generate_sampler(dtype: np.dtype, rng: np.random.Generator) -> typing.Callable[[tuple[int, ...]], np.ndarray]: + dtype = np.dtype(dtype) + if np.issubdtype(dtype, np.signedinteger): + + def sampler_signed(size: tuple[int, ...]): + return rng.integers(-10, 10, dtype=dtype, endpoint=True, size=size) + + return sampler_signed + if np.issubdtype(dtype, np.unsignedinteger): -def test_constructors(rng): + def sampler_unsigned(size: tuple[int, ...]): + return rng.integers(0, 10, dtype=dtype, endpoint=True, size=size) + + return sampler_unsigned + + if np.issubdtype(dtype, np.floating): + + def sampler_real_floating(size: tuple[int, ...]): + return -10 + 20 * rng.random(dtype=dtype, size=size) + + return sampler_real_floating + + raise NotImplementedError(f"{dtype=} not yet supported.") + + +@parametrize_dtypes +def test_constructors(rng, dtype): SHAPE = (10, 5) DENSITY = 0.5 - - a = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=np.float64, random_state=rng) - c = np.arange(50, dtype=np.float64).reshape((10, 5)) + sampler = generate_sampler(dtype, rng) + a = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=dtype, random_state=rng, data_sampler=sampler) + c = np.arange(50, dtype=dtype).reshape((10, 5)) a_tensor = sparse.asarray(a) c_tensor = sparse.asarray(c) @@ -27,16 +86,18 @@ def test_constructors(rng): assert_csr_equal(a, a_retured) c_returned = c_tensor.to_scipy_sparse() - np.testing.assert_array_equal(c_returned, c) + np.testing.assert_equal(c, c_returned) -def test_add(rng): +@parametrize_dtypes +def test_add(rng, dtype): SHAPE = (10, 5) DENSITY = 0.5 + sampler = generate_sampler(dtype, rng) - a = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=np.float64, random_state=rng) - b = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=np.float64, random_state=rng) - c = np.arange(50, dtype=np.float64).reshape((10, 5)) + a = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=dtype, random_state=rng, data_sampler=sampler) + b = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=dtype, random_state=rng, data_sampler=sampler) + c = np.arange(50, dtype=dtype).reshape((10, 5)) a_tensor = sparse.asarray(a) b_tensor = sparse.asarray(b)