From 33ee71b0c6f13224f3031cd8b42921c748ce9ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Mon, 3 Mar 2025 11:37:42 -0500 Subject: [PATCH 01/15] Test numexpr against pytest-run-parallel on 3.13t --- .gitignore | 1 + numexpr/interpreter.cpp | 6 ++++-- numexpr/necompiler.py | 37 +++++++++++++++++++++++------------ numexpr/tests/test_numexpr.py | 10 +++++++++- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 928bf15..7bf6f98 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ artifact/ numexpr.egg-info/ *.pyc *.swp +*.so *~ doc/_build site.cfg diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index edebd71..32f6c37 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -556,7 +556,7 @@ stringcontains(const char *haystack_start, const char *needle_start, npy_intp ma size_t si = 0; size_t min_len = min(needle_len, haystack_len); - while (*haystack && *needle && si < min_len) + while (si < min_len && *haystack && *needle) { ok &= *haystack++ == *needle++; si++; @@ -573,7 +573,7 @@ stringcontains(const char *haystack_start, const char *needle_start, npy_intp ma } /* calc haystack length */ - while (*haystack && si < haystack_len) { + while (si < haystack_len && *haystack) { haystack++; si++; } @@ -652,6 +652,7 @@ int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, /* Then finish off the rest */ if (block_size > 0) do { + block_size = *size_ptr; #define REDUCTION_INNER_LOOP #define BLOCK_SIZE block_size #include "interp_body.cpp" @@ -698,6 +699,7 @@ vm_engine_iter_outer_reduce_task(NpyIter *iter, npy_intp *memsteps, /* Then finish off the rest */ if (block_size > 0) do { + block_size = *size_ptr; #define BLOCK_SIZE block_size #define NO_OUTPUT_BUFFERING // Because it's a reduction #include "interp_body.cpp" diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 98aee4c..296c41b 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -774,9 +774,12 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): # Dictionaries for caching variable names and compiled expressions -_names_cache = CacheDict(256) -_numexpr_cache = CacheDict(256) -_numexpr_last = ContextDict() +# _names_cache = CacheDict(256) +_names_cache = threading.local() +# _numexpr_cache = CacheDict(256) +_numexpr_cache = threading.local() +# _numexpr_last = ContextDict() +_numexpr_last = threading.local() evaluate_lock = threading.Lock() def validate(ex: str, @@ -853,6 +856,14 @@ def validate(ex: str, """ global _numexpr_last + if not hasattr(_numexpr_last, 'l'): + _numexpr_last.l = ContextDict() + + if not hasattr(_names_cache, 'c'): + _names_cache.c = CacheDict(256) + + if not hasattr(_numexpr_cache, 'c'): + _numexpr_cache.c = CacheDict(256) try: @@ -868,9 +879,9 @@ def validate(ex: str, # Get the names for this expression context = getContext(kwargs) expr_key = (ex, tuple(sorted(context.items()))) - if expr_key not in _names_cache: - _names_cache[expr_key] = getExprNames(ex, context, sanitize=sanitize) - names, ex_uses_vml = _names_cache[expr_key] + if expr_key not in _names_cache.c: + _names_cache.c[expr_key] = getExprNames(ex, context, sanitize=sanitize) + names, ex_uses_vml = _names_cache.c[expr_key] arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth) # Create a signature @@ -880,12 +891,12 @@ def validate(ex: str, # Look up numexpr if possible. numexpr_key = expr_key + (tuple(signature),) try: - compiled_ex = _numexpr_cache[numexpr_key] + compiled_ex = _numexpr_cache.c[numexpr_key] except KeyError: - compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context) + compiled_ex = _numexpr_cache.c[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context) kwargs = {'out': out, 'order': order, 'casting': casting, 'ex_uses_vml': ex_uses_vml} - _numexpr_last.set(ex=compiled_ex, argnames=names, kwargs=kwargs) + _numexpr_last.l.set(ex=compiled_ex, argnames=names, kwargs=kwargs) except Exception as e: return e return None @@ -987,13 +998,15 @@ def re_evaluate(local_dict: Optional[Dict] = None, not set this value. """ global _numexpr_last + if not hasattr(_numexpr_last, 'l'): + _numexpr_last.l = ContextDict() try: - compiled_ex = _numexpr_last['ex'] + compiled_ex = _numexpr_last.l['ex'] except KeyError: raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`") - argnames = _numexpr_last['argnames'] + argnames = _numexpr_last.l['argnames'] args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth) - kwargs = _numexpr_last['kwargs'] + kwargs = _numexpr_last.l['kwargs'] with evaluate_lock: return compiled_ex(*args, **kwargs) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 62210b4..3970bab 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -16,6 +16,7 @@ import warnings from contextlib import contextmanager import subprocess +import pytest import numpy as np from numpy import ( @@ -318,6 +319,7 @@ def test_refcount(self): evaluate('1') assert sys.getrefcount(a) == 2 + @pytest.mark.thread_unsafe def test_locals_clears_globals(self): # Check for issue #313, whereby clearing f_locals also clear f_globals # if in the top-frame. This cannot be done inside `unittest` as it is always @@ -341,6 +343,7 @@ def test_locals_clears_globals(self): +@pytest.mark.thread_unsafe class test_numexpr2(test_numexpr): """Testing with 2 threads""" nthreads = 2 @@ -512,6 +515,7 @@ def test_illegal_value(self): else: self.fail() + @pytest.mark.thread_unsafe def test_sanitize(self): with _environment('NUMEXPR_SANITIZE', '1'): # Forbid dunder @@ -590,7 +594,7 @@ def test_sanitize(self): x = np.array(['a', 'b'], dtype=bytes) evaluate("x == 'b:'") - + @pytest.mark.thread_unsafe def test_no_sanitize(self): try: # Errors on compile() after eval() evaluate('import os;', sanitize=False) @@ -677,6 +681,7 @@ def test_ex_uses_vml(self): if 'sparc' not in platform.machine(): # Execution order set here so as to not use too many threads # during the rest of the execution. See #33 for details. + @pytest.mark.thread_unsafe def test_changing_nthreads_00_inc(self): a = linspace(-1, 1, 1000000) b = ((.25 * a + .75) * a - 1.5) * a - 2 @@ -685,6 +690,7 @@ def test_changing_nthreads_00_inc(self): c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c) + @pytest.mark.thread_unsafe def test_changing_nthreads_01_dec(self): a = linspace(-1, 1, 1000000) b = ((.25 * a + .75) * a - 1.5) * a - 2 @@ -1123,6 +1129,7 @@ def _environment(key, value): del os.environ[key] # Test cases for the threading configuration +@pytest.mark.thread_unsafe class test_threading_config(TestCase): def test_max_threads_unset(self): # Has to be done in a subprocess as `importlib.reload` doesn't let us @@ -1306,6 +1313,7 @@ def _worker(qout=None): # Case test for subprocesses (via multiprocessing module) class test_subprocess(TestCase): + @pytest.mark.thread_unsafe def test_multiprocess(self): try: import multiprocessing as mp From 8680084f04fc5c4b8a5fc015d9f0cf8174c62cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 26 Feb 2025 11:13:08 -0500 Subject: [PATCH 02/15] Mock pytest in case is not available --- numexpr/tests/test_numexpr.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 3970bab..98ae459 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -16,7 +16,6 @@ import warnings from contextlib import contextmanager import subprocess -import pytest import numpy as np from numpy import ( @@ -37,6 +36,13 @@ from numexpr.utils import detect_number_of_cores import unittest +from unittest.mock import MagicMock + +try: + import pytest + pytest_available = True +except ImportError: + pytest_available = False TestCase = unittest.TestCase @@ -45,6 +51,15 @@ MAX_THREADS = 16 +if not pytest_available: + def identity(f): + return f + + pytest = MagicMock() + pytest.mark = MagicMock() + pytest.mark.thread_unsafe = identity + + class test_numexpr(TestCase): """Testing with 1 thread""" nthreads = 1 From 8af34da07df516ae8935d7e893bc8199b799eb51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 26 Feb 2025 14:39:01 -0500 Subject: [PATCH 03/15] Build free-threaded wheels --- .github/workflows/build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 42e6a9d..8b3142f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,12 +24,13 @@ jobs: CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" + CIBW_FREE_THREADED_SUPPORT: true strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: ["cp3{10,11,12,13}-*"] - p_ver: ["3.10-3.13"] + cibw_build: ["cp3{10,11,12,13,13t}-*"] + p_ver: ["3.10-3.13+3.13t"] exclude: - os: windows-latest arch: aarch64 From 706cb9d40715f6684d47fde8f60979e17762528b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Fri, 28 Feb 2025 16:33:45 -0500 Subject: [PATCH 04/15] Use CIBW_ENABLE --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8b3142f..e3edadd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,7 +24,7 @@ jobs: CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" - CIBW_FREE_THREADED_SUPPORT: true + CIBW_ENABLE: true strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] From 318145546c9564ecd0c8289662cebef1957a1d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Fri, 28 Feb 2025 17:26:12 -0500 Subject: [PATCH 05/15] Use pytest for testing --- .github/workflows/build.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3edadd..31c3a96 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,9 +6,15 @@ permissions: contents: read env: - CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy + CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest + CIBW_ENVIRONMENT: > + IS_FREETHREADED=$(python -c "import sysconfig;print(sysconfig.get_config_var('Py_GIL_DISABLED'))") + PYTEST_RUN_PARALLEL=$([ "$IS_FREETHREADED" == "1" ] && echo "pytest-run-parallel" || echo "") + PARALLEL_THREADS=$([ "$IS_FREETHREADED" == "1" ] && echo "--parallel-threads=4" || echo "") + CIBW_BEFORE_BUILD_LINUX: > + pip install setuptools oldest-supported-numpy pytest $PYTEST_RUN_PARALLEL CIBW_BUILD_VERBOSITY: 1 - CIBW_TEST_COMMAND: python -c "import sys, numexpr; sys.exit(0 if numexpr.test().wasSuccessful() else 1)" + CIBW_TEST_COMMAND: pytest $PARALLEL_THREADS numexpr/tests CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. From 61076a277e704e6a43ecadaf4c311e2ad816fc74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Mon, 3 Mar 2025 11:19:05 -0500 Subject: [PATCH 06/15] Update env variable value --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31c3a96..6fc4ee4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,7 @@ permissions: env: CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest - CIBW_ENVIRONMENT: > + CIBW_ENVIRONMENT_LINUX: > IS_FREETHREADED=$(python -c "import sysconfig;print(sysconfig.get_config_var('Py_GIL_DISABLED'))") PYTEST_RUN_PARALLEL=$([ "$IS_FREETHREADED" == "1" ] && echo "pytest-run-parallel" || echo "") PARALLEL_THREADS=$([ "$IS_FREETHREADED" == "1" ] && echo "--parallel-threads=4" || echo "") @@ -30,7 +30,7 @@ jobs: CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" - CIBW_ENABLE: true + CIBW_ENABLE: cpython-freethreading strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] From 1d15ad415311f0c0676c09628e1cbe93c58ab650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Tue, 4 Mar 2025 18:00:21 -0500 Subject: [PATCH 07/15] Move free-threaded builds to an indindependent job --- .github/workflows/build.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6fc4ee4..11f6afd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,14 +7,9 @@ permissions: env: CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest - CIBW_ENVIRONMENT_LINUX: > - IS_FREETHREADED=$(python -c "import sysconfig;print(sysconfig.get_config_var('Py_GIL_DISABLED'))") - PYTEST_RUN_PARALLEL=$([ "$IS_FREETHREADED" == "1" ] && echo "pytest-run-parallel" || echo "") - PARALLEL_THREADS=$([ "$IS_FREETHREADED" == "1" ] && echo "--parallel-threads=4" || echo "") - CIBW_BEFORE_BUILD_LINUX: > - pip install setuptools oldest-supported-numpy pytest $PYTEST_RUN_PARALLEL + CIBW_BEFORE_TEST: pip install pytest CIBW_BUILD_VERBOSITY: 1 - CIBW_TEST_COMMAND: pytest $PARALLEL_THREADS numexpr/tests + CIBW_TEST_COMMAND: pytest numexpr/tests CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. @@ -35,7 +30,7 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: ["cp3{10,11,12,13,13t}-*"] + cibw_build: ["cp3{10,11,12,13}-*", "cp313t-*"] p_ver: ["3.10-3.13+3.13t"] exclude: - os: windows-latest @@ -60,6 +55,13 @@ jobs: if: ${{ matrix.arch == 'aarch64' }} name: Set up QEMU + - name: Setup free-threading variables + shell: bash -l {0} + run: | + echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" + echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" + echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 numexpr/tests" >> "$GITHUB_ENV" + - name: Build wheels run: | python -m cibuildwheel --output-dir wheelhouse From 40f04d21c0d76b50847c9dd01ed2281fec1b9ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 5 Mar 2025 12:30:49 -0500 Subject: [PATCH 08/15] Set free-threading variables only under free-threaded conditions --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 11f6afd..f6a68b4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ env: jobs: build_wheels: - name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.p_ver }} + name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.cibw_build }} runs-on: ${{ matrix.os }} permissions: contents: write @@ -27,6 +27,7 @@ jobs: CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading strategy: + fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] @@ -56,6 +57,7 @@ jobs: name: Set up QEMU - name: Setup free-threading variables + if: ${{ endsWith(matrix.cibw_build, 't-*') }} shell: bash -l {0} run: | echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" From 0fb95ec22cd1b070bd46b428cb404a4534d1f5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 5 Mar 2025 12:32:07 -0500 Subject: [PATCH 09/15] Execute pytest with --pyargs --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f6a68b4..0761a58 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,7 +9,7 @@ env: CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest CIBW_BEFORE_TEST: pip install pytest CIBW_BUILD_VERBOSITY: 1 - CIBW_TEST_COMMAND: pytest numexpr/tests + CIBW_TEST_COMMAND: pytest --pyargs numexpr CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. @@ -62,7 +62,7 @@ jobs: run: | echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" - echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 numexpr/tests" >> "$GITHUB_ENV" + echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" - name: Build wheels run: | From e75d15f718f861d914618fb7abd505591a22d1e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 5 Mar 2025 13:00:12 -0500 Subject: [PATCH 10/15] Add section in README regarding free-threading --- README.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.rst b/README.rst index 9033d51..264fd2b 100644 --- a/README.rst +++ b/README.rst @@ -159,6 +159,24 @@ Usage array([ True, False, False], dtype=bool) +Free-threading support +---------------------- +Starting on CPython 3.13 onwards there is a new distribution that disables the +Global Interpreter Lock (GIL) altogether, thus increasing the performance yields +under multi-threaded conditions on a single interpreter, as opposed to having to use +multiprocessing. + +Whilst numexpr has been demonstrated to work under free-threaded +CPython, considerations need to be taken when using numexpr native parallel +implementation vs using Python threads directly in order to prevent oversubscription, +we recommend either using the main CPython interpreter thread to spawn multiple C threads +using the parallel numexpr API, or spawning multiple CPython threads that do not use +the parallel API. + +For more information about free-threaded CPython, we recommend visiting the following +`community Wiki ` + + Documentation ------------- From 8fc5991d8ed28da5be4dd763ac48b7f26f719b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Thu, 6 Mar 2025 13:57:20 -0500 Subject: [PATCH 11/15] Address duplicate artifact uploads --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0761a58..ffb05e2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -77,6 +77,7 @@ jobs: - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/* + name: numexpr-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.cibw_build }} - name: Upload to GitHub Release uses: softprops/action-gh-release@v1 From d412bd675f2c9e1c6c84793a1f4da5713a3e7a1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Thu, 6 Mar 2025 15:43:46 -0500 Subject: [PATCH 12/15] Use an independent label to signal freethreaded artifacts --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ffb05e2..a01a78a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -63,6 +63,7 @@ jobs: echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" + echo "ARTIFACT_LABEL=freethreaded" >> "$GITHUB_ENV" - name: Build wheels run: | @@ -77,7 +78,7 @@ jobs: - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/* - name: numexpr-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.cibw_build }} + name: numexpr-${{ matrix.os }}-${{ matrix.arch }}-${{ env.ARTIFACT_LABEL || 'standard' }} - name: Upload to GitHub Release uses: softprops/action-gh-release@v1 From cf0aba60ed3873bd1a0d0239fcf595908bc22c18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Fri, 7 Mar 2025 12:22:39 -0500 Subject: [PATCH 13/15] Mark numexpr interpreter as free-threaded safe --- numexpr/module.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 66b5b77..442bcd0 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -380,7 +380,7 @@ Py_set_num_threads(PyObject *self, PyObject *args) } static PyObject* -Py_get_num_threads(PyObject *self, PyObject *args) +Py_get_num_threads(PyObject *self, PyObject *args) { int n_thread; n_thread = gs.nthreads; @@ -477,6 +477,10 @@ PyInit_interpreter(void) { if (m == NULL) INITERROR; + #ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); + #endif + Py_INCREF(&NumExprType); PyModule_AddObject(m, "NumExpr", (PyObject *)&NumExprType); From cc2842dda9d812475bf2de0c02371682333156f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 12 Mar 2025 13:49:40 -0500 Subject: [PATCH 14/15] Ensure single thread write to gs.init_sentinels_done --- numexpr/module.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 442bcd0..0a012e2 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -51,7 +51,9 @@ void *th_worker(void *tidptr) while (1) { /* Sentinels have to be initialised yet */ - gs.init_sentinels_done = 0; + if(tid == 0) { + gs.init_sentinels_done = 0; + } /* Meeting point for all threads (wait for initialization) */ pthread_mutex_lock(&gs.count_threads_mutex); From 7be1ca9a3b1acead7dfd5e608061dfa741d7d76b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Mon, 17 Mar 2025 11:43:31 -0500 Subject: [PATCH 15/15] Address review comments --- numexpr/module.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 0a012e2..a42042b 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -47,12 +47,13 @@ void *th_worker(void *tidptr) char **errmsg; // For output buffering if needed vector out_buffer; + int init_sentinels_done = 0; while (1) { /* Sentinels have to be initialised yet */ - if(tid == 0) { - gs.init_sentinels_done = 0; + if (tid == 0) { + init_sentinels_done = 0; } /* Meeting point for all threads (wait for initialization) */