Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test numexpr against pytest-run-parallel on 3.13t #504

Closed
wants to merge 15 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -6,30 +6,33 @@ permissions:
contents: read

env:
CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy
CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest
CIBW_BEFORE_TEST: pip install pytest
CIBW_BUILD_VERBOSITY: 1
CIBW_TEST_COMMAND: python -c "import sys, numexpr; sys.exit(0 if numexpr.test().wasSuccessful() else 1)"
CIBW_TEST_COMMAND: pytest --pyargs numexpr
CIBW_TEST_SKIP: "*macosx*arm64*"
# Building for musllinux and aarch64 takes way too much time.
# Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it.
CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*"

jobs:
build_wheels:
name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.p_ver }}
name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.cibw_build }}
runs-on: ${{ matrix.os }}
permissions:
contents: write
env:
CIBW_BUILD: ${{ matrix.cibw_build }}
CIBW_ARCHS_LINUX: ${{ matrix.arch }}
CIBW_ARCHS_MACOS: "x86_64 arm64"
CIBW_ENABLE: cpython-freethreading
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
arch: [x86_64, aarch64]
cibw_build: ["cp3{10,11,12,13}-*"]
p_ver: ["3.10-3.13"]
cibw_build: ["cp3{10,11,12,13}-*", "cp313t-*"]
p_ver: ["3.10-3.13+3.13t"]
exclude:
- os: windows-latest
arch: aarch64
@@ -53,6 +56,15 @@ jobs:
if: ${{ matrix.arch == 'aarch64' }}
name: Set up QEMU

- name: Setup free-threading variables
if: ${{ endsWith(matrix.cibw_build, 't-*') }}
shell: bash -l {0}
run: |
echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV"
echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV"
echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV"
echo "ARTIFACT_LABEL=freethreaded" >> "$GITHUB_ENV"

- name: Build wheels
run: |
python -m cibuildwheel --output-dir wheelhouse
@@ -66,6 +78,7 @@ jobs:
- uses: actions/upload-artifact@v4
with:
path: ./wheelhouse/*
name: numexpr-${{ matrix.os }}-${{ matrix.arch }}-${{ env.ARTIFACT_LABEL || 'standard' }}

- name: Upload to GitHub Release
uses: softprops/action-gh-release@v1
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ artifact/
numexpr.egg-info/
*.pyc
*.swp
*.so
*~
doc/_build
site.cfg
18 changes: 18 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -159,6 +159,24 @@ Usage
array([ True, False, False], dtype=bool)


Free-threading support
----------------------
Starting on CPython 3.13 onwards there is a new distribution that disables the
Global Interpreter Lock (GIL) altogether, thus increasing the performance yields
under multi-threaded conditions on a single interpreter, as opposed to having to use
multiprocessing.

Whilst numexpr has been demonstrated to work under free-threaded
CPython, considerations need to be taken when using numexpr native parallel
implementation vs using Python threads directly in order to prevent oversubscription,
we recommend either using the main CPython interpreter thread to spawn multiple C threads
using the parallel numexpr API, or spawning multiple CPython threads that do not use
the parallel API.

For more information about free-threaded CPython, we recommend visiting the following
`community Wiki <https://py-free-threading.github.io/>`


Documentation
-------------

6 changes: 4 additions & 2 deletions numexpr/interpreter.cpp
Original file line number Diff line number Diff line change
@@ -556,7 +556,7 @@ stringcontains(const char *haystack_start, const char *needle_start, npy_intp ma

size_t si = 0;
size_t min_len = min(needle_len, haystack_len);
while (*haystack && *needle && si < min_len)
while (si < min_len && *haystack && *needle)
{
ok &= *haystack++ == *needle++;
si++;
@@ -573,7 +573,7 @@ stringcontains(const char *haystack_start, const char *needle_start, npy_intp ma
}

/* calc haystack length */
while (*haystack && si < haystack_len) {
while (si < haystack_len && *haystack) {
haystack++;
si++;
}
@@ -652,6 +652,7 @@ int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps,

/* Then finish off the rest */
if (block_size > 0) do {
block_size = *size_ptr;
#define REDUCTION_INNER_LOOP
#define BLOCK_SIZE block_size
#include "interp_body.cpp"
@@ -698,6 +699,7 @@ vm_engine_iter_outer_reduce_task(NpyIter *iter, npy_intp *memsteps,

/* Then finish off the rest */
if (block_size > 0) do {
block_size = *size_ptr;
#define BLOCK_SIZE block_size
#define NO_OUTPUT_BUFFERING // Because it's a reduction
#include "interp_body.cpp"
11 changes: 9 additions & 2 deletions numexpr/module.cpp
Original file line number Diff line number Diff line change
@@ -47,11 +47,14 @@ void *th_worker(void *tidptr)
char **errmsg;
// For output buffering if needed
vector<char> out_buffer;
int init_sentinels_done = 0;

while (1) {

/* Sentinels have to be initialised yet */
gs.init_sentinels_done = 0;
if (tid == 0) {
init_sentinels_done = 0;
}

/* Meeting point for all threads (wait for initialization) */
pthread_mutex_lock(&gs.count_threads_mutex);
@@ -380,7 +383,7 @@ Py_set_num_threads(PyObject *self, PyObject *args)
}

static PyObject*
Py_get_num_threads(PyObject *self, PyObject *args)
Py_get_num_threads(PyObject *self, PyObject *args)
{
int n_thread;
n_thread = gs.nthreads;
@@ -477,6 +480,10 @@ PyInit_interpreter(void) {
if (m == NULL)
INITERROR;

#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
#endif

Py_INCREF(&NumExprType);
PyModule_AddObject(m, "NumExpr", (PyObject *)&NumExprType);

37 changes: 25 additions & 12 deletions numexpr/necompiler.py
Original file line number Diff line number Diff line change
@@ -774,9 +774,12 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2):


# Dictionaries for caching variable names and compiled expressions
_names_cache = CacheDict(256)
_numexpr_cache = CacheDict(256)
_numexpr_last = ContextDict()
# _names_cache = CacheDict(256)
_names_cache = threading.local()
# _numexpr_cache = CacheDict(256)
_numexpr_cache = threading.local()
# _numexpr_last = ContextDict()
_numexpr_last = threading.local()
evaluate_lock = threading.Lock()

def validate(ex: str,
@@ -853,6 +856,14 @@ def validate(ex: str,

"""
global _numexpr_last
if not hasattr(_numexpr_last, 'l'):
_numexpr_last.l = ContextDict()

if not hasattr(_names_cache, 'c'):
_names_cache.c = CacheDict(256)

if not hasattr(_numexpr_cache, 'c'):
_numexpr_cache.c = CacheDict(256)

try:

@@ -868,9 +879,9 @@ def validate(ex: str,
# Get the names for this expression
context = getContext(kwargs)
expr_key = (ex, tuple(sorted(context.items())))
if expr_key not in _names_cache:
_names_cache[expr_key] = getExprNames(ex, context, sanitize=sanitize)
names, ex_uses_vml = _names_cache[expr_key]
if expr_key not in _names_cache.c:
_names_cache.c[expr_key] = getExprNames(ex, context, sanitize=sanitize)
names, ex_uses_vml = _names_cache.c[expr_key]
arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth)

# Create a signature
@@ -880,12 +891,12 @@ def validate(ex: str,
# Look up numexpr if possible.
numexpr_key = expr_key + (tuple(signature),)
try:
compiled_ex = _numexpr_cache[numexpr_key]
compiled_ex = _numexpr_cache.c[numexpr_key]
except KeyError:
compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context)
compiled_ex = _numexpr_cache.c[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context)
kwargs = {'out': out, 'order': order, 'casting': casting,
'ex_uses_vml': ex_uses_vml}
_numexpr_last.set(ex=compiled_ex, argnames=names, kwargs=kwargs)
_numexpr_last.l.set(ex=compiled_ex, argnames=names, kwargs=kwargs)
except Exception as e:
return e
return None
@@ -987,13 +998,15 @@ def re_evaluate(local_dict: Optional[Dict] = None,
not set this value.
"""
global _numexpr_last
if not hasattr(_numexpr_last, 'l'):
_numexpr_last.l = ContextDict()

try:
compiled_ex = _numexpr_last['ex']
compiled_ex = _numexpr_last.l['ex']
except KeyError:
raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`")
argnames = _numexpr_last['argnames']
argnames = _numexpr_last.l['argnames']
args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth)
kwargs = _numexpr_last['kwargs']
kwargs = _numexpr_last.l['kwargs']
with evaluate_lock:
return compiled_ex(*args, **kwargs)
25 changes: 24 additions & 1 deletion numexpr/tests/test_numexpr.py
Original file line number Diff line number Diff line change
@@ -36,6 +36,13 @@
from numexpr.utils import detect_number_of_cores

import unittest
from unittest.mock import MagicMock

try:
import pytest
pytest_available = True
except ImportError:
pytest_available = False

TestCase = unittest.TestCase

@@ -44,6 +51,15 @@
MAX_THREADS = 16


if not pytest_available:
def identity(f):
return f

pytest = MagicMock()
pytest.mark = MagicMock()
pytest.mark.thread_unsafe = identity

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be better to register thread_unsafe also if pytest is installed but pytest-run-parallel isn't. That way the test suite can be run with pytest also without pytest-run-parallel



class test_numexpr(TestCase):
"""Testing with 1 thread"""
nthreads = 1
@@ -318,6 +334,7 @@ def test_refcount(self):
evaluate('1')
assert sys.getrefcount(a) == 2

@pytest.mark.thread_unsafe
def test_locals_clears_globals(self):
# Check for issue #313, whereby clearing f_locals also clear f_globals
# if in the top-frame. This cannot be done inside `unittest` as it is always
@@ -341,6 +358,7 @@ def test_locals_clears_globals(self):



@pytest.mark.thread_unsafe
class test_numexpr2(test_numexpr):
"""Testing with 2 threads"""
nthreads = 2
@@ -512,6 +530,7 @@ def test_illegal_value(self):
else:
self.fail()

@pytest.mark.thread_unsafe
def test_sanitize(self):
with _environment('NUMEXPR_SANITIZE', '1'):
# Forbid dunder
@@ -590,7 +609,7 @@ def test_sanitize(self):
x = np.array(['a', 'b'], dtype=bytes)
evaluate("x == 'b:'")


@pytest.mark.thread_unsafe
def test_no_sanitize(self):
try: # Errors on compile() after eval()
evaluate('import os;', sanitize=False)
@@ -677,6 +696,7 @@ def test_ex_uses_vml(self):
if 'sparc' not in platform.machine():
# Execution order set here so as to not use too many threads
# during the rest of the execution. See #33 for details.
@pytest.mark.thread_unsafe
def test_changing_nthreads_00_inc(self):
a = linspace(-1, 1, 1000000)
b = ((.25 * a + .75) * a - 1.5) * a - 2
@@ -685,6 +705,7 @@ def test_changing_nthreads_00_inc(self):
c = evaluate("((.25*a + .75)*a - 1.5)*a - 2")
assert_array_almost_equal(b, c)

@pytest.mark.thread_unsafe
def test_changing_nthreads_01_dec(self):
a = linspace(-1, 1, 1000000)
b = ((.25 * a + .75) * a - 1.5) * a - 2
@@ -1123,6 +1144,7 @@ def _environment(key, value):
del os.environ[key]

# Test cases for the threading configuration
@pytest.mark.thread_unsafe
class test_threading_config(TestCase):
def test_max_threads_unset(self):
# Has to be done in a subprocess as `importlib.reload` doesn't let us
@@ -1306,6 +1328,7 @@ def _worker(qout=None):

# Case test for subprocesses (via multiprocessing module)
class test_subprocess(TestCase):
@pytest.mark.thread_unsafe
def test_multiprocess(self):
try:
import multiprocessing as mp