Skip to content

Commit ac1e7ec

Browse files
authored
apacheGH-45358: [C++][Python] Add MemoryPool method to print statistics (apache#45359)
### Rationale for this change Add a MemoryPool method to print allocator-specific statistics to stderr, to help diagnose perceived memory consumption issues. Also add missing Python bindings for `MemoryPool::total_bytes_allocated` and `MemoryPool::num_allocations`. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: apache#45358 Authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent 14e45a2 commit ac1e7ec

File tree

7 files changed

+93
-0
lines changed

7 files changed

+93
-0
lines changed

cpp/src/arrow/memory_pool.cc

+28
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ class DebugAllocator {
266266
}
267267
}
268268

269+
static void PrintStats() { WrappedAllocator::PrintStats(); }
270+
269271
private:
270272
static Result<int64_t> RawSize(int64_t size) {
271273
if (ARROW_PREDICT_FALSE(internal::AddWithOverflow(size, kOverhead, &size))) {
@@ -378,6 +380,12 @@ class SystemAllocator {
378380
// The return value of malloc_trim is not an error but to inform
379381
// you if memory was actually released or not, which we do not care about here
380382
ARROW_UNUSED(malloc_trim(0));
383+
#endif
384+
}
385+
386+
static void PrintStats() {
387+
#ifdef __GLIBC__
388+
malloc_stats();
381389
#endif
382390
}
383391
};
@@ -430,6 +438,8 @@ class MimallocAllocator {
430438
mi_free(ptr);
431439
}
432440
}
441+
442+
static void PrintStats() { mi_stats_print_out(nullptr, nullptr); }
433443
};
434444

435445
#endif // defined(ARROW_MIMALLOC)
@@ -512,6 +522,8 @@ class BaseMemoryPoolImpl : public MemoryPool {
512522

513523
void ReleaseUnused() override { Allocator::ReleaseUnused(); }
514524

525+
void PrintStats() override { Allocator::PrintStats(); }
526+
515527
int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
516528

517529
int64_t max_memory() const override { return stats_.max_memory(); }
@@ -724,6 +736,10 @@ void LoggingMemoryPool::Free(uint8_t* buffer, int64_t size, int64_t alignment) {
724736
std::cout << "Free: size = " << size << ", alignment = " << alignment << std::endl;
725737
}
726738

739+
void LoggingMemoryPool::ReleaseUnused() { pool_->ReleaseUnused(); }
740+
741+
void LoggingMemoryPool::PrintStats() { pool_->PrintStats(); }
742+
727743
int64_t LoggingMemoryPool::bytes_allocated() const {
728744
int64_t nb_bytes = pool_->bytes_allocated();
729745
std::cout << "bytes_allocated: " << nb_bytes << std::endl;
@@ -775,6 +791,14 @@ class ProxyMemoryPool::ProxyMemoryPoolImpl {
775791
stats_.DidFreeBytes(size);
776792
}
777793

794+
void ReleaseUnused() { pool_->ReleaseUnused(); }
795+
796+
void PrintStats() {
797+
// XXX these are the allocation stats for the underlying allocator, not
798+
// the subset allocated through the ProxyMemoryPool
799+
pool_->PrintStats();
800+
}
801+
778802
int64_t bytes_allocated() const { return stats_.bytes_allocated(); }
779803

780804
int64_t max_memory() const { return stats_.max_memory(); }
@@ -809,6 +833,10 @@ void ProxyMemoryPool::Free(uint8_t* buffer, int64_t size, int64_t alignment) {
809833
return impl_->Free(buffer, size, alignment);
810834
}
811835

836+
void ProxyMemoryPool::ReleaseUnused() { impl_->ReleaseUnused(); }
837+
838+
void ProxyMemoryPool::PrintStats() { impl_->PrintStats(); }
839+
812840
int64_t ProxyMemoryPool::bytes_allocated() const { return impl_->bytes_allocated(); }
813841

814842
int64_t ProxyMemoryPool::max_memory() const { return impl_->max_memory(); }

cpp/src/arrow/memory_pool.h

+10
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ class ARROW_EXPORT MemoryPool {
151151
/// unable to fulfill the request due to fragmentation.
152152
virtual void ReleaseUnused() {}
153153

154+
/// Print statistics
155+
///
156+
/// Print allocation statistics on stderr. The output format is
157+
/// implementation-specific. Not all memory pools implement this method.
158+
virtual void PrintStats() {}
159+
154160
/// The number of bytes that were allocated and not yet free'd through
155161
/// this allocator.
156162
virtual int64_t bytes_allocated() const = 0;
@@ -187,6 +193,8 @@ class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
187193
Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment,
188194
uint8_t** ptr) override;
189195
void Free(uint8_t* buffer, int64_t size, int64_t alignment) override;
196+
void ReleaseUnused() override;
197+
void PrintStats() override;
190198

191199
int64_t bytes_allocated() const override;
192200

@@ -219,6 +227,8 @@ class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
219227
Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment,
220228
uint8_t** ptr) override;
221229
void Free(uint8_t* buffer, int64_t size, int64_t alignment) override;
230+
void ReleaseUnused() override;
231+
void PrintStats() override;
222232

223233
int64_t bytes_allocated() const override;
224234

cpp/src/arrow/memory_pool_internal.h

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class JemallocAllocator {
4444
uint8_t** ptr);
4545
static void DeallocateAligned(uint8_t* ptr, int64_t size, int64_t alignment);
4646
static void ReleaseUnused();
47+
static void PrintStats();
4748
};
4849

4950
#endif // defined(ARROW_JEMALLOC)

cpp/src/arrow/memory_pool_jemalloc.cc

+4
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ void JemallocAllocator::ReleaseUnused() {
131131
mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, 0);
132132
}
133133

134+
void JemallocAllocator::PrintStats() {
135+
malloc_stats_print(nullptr, nullptr, /*opts=*/"");
136+
}
137+
134138
} // namespace internal
135139

136140
} // namespace memory_pool

python/pyarrow/includes/libarrow.pxd

+3
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,11 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
323323
cdef cppclass CMemoryPool" arrow::MemoryPool":
324324
int64_t bytes_allocated()
325325
int64_t max_memory()
326+
int64_t total_bytes_allocated()
327+
int64_t num_allocations()
326328
c_string backend_name()
327329
void ReleaseUnused()
330+
void PrintStats()
328331

329332
cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
330333
CLoggingMemoryPool(CMemoryPool*)

python/pyarrow/memory.pxi

+25
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ cdef class MemoryPool(_Weakrefable):
5858
"""
5959
return self.pool.bytes_allocated()
6060

61+
def total_bytes_allocated(self):
62+
"""
63+
Return the total number of bytes that have been allocated from this
64+
memory pool.
65+
"""
66+
return self.pool.total_bytes_allocated()
67+
6168
def max_memory(self):
6269
"""
6370
Return the peak memory allocation in this memory pool.
@@ -69,6 +76,23 @@ cdef class MemoryPool(_Weakrefable):
6976
ret = self.pool.max_memory()
7077
return ret if ret >= 0 else None
7178

79+
def num_allocations(self):
80+
"""
81+
Return the number of allocations or reallocations that were made
82+
using this memory pool.
83+
"""
84+
return self.pool.num_allocations()
85+
86+
def print_stats(self):
87+
"""
88+
Print statistics about this memory pool.
89+
90+
The output format is implementation-specific. Not all memory pools
91+
implement this method.
92+
"""
93+
with nogil:
94+
self.pool.PrintStats()
95+
7296
@property
7397
def backend_name(self):
7498
"""
@@ -83,6 +107,7 @@ cdef class MemoryPool(_Weakrefable):
83107
f"bytes_allocated={self.bytes_allocated()} "
84108
f"max_memory={self.max_memory()}>")
85109

110+
86111
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
87112
if memory_pool is None:
88113
return c_get_memory_pool()

python/pyarrow/tests/test_memory.py

+22
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,17 @@ def check_allocated_bytes(pool):
6767
"""
6868
allocated_before = pool.bytes_allocated()
6969
max_mem_before = pool.max_memory()
70+
num_allocations_before = pool.num_allocations()
7071
with allocate_bytes(pool, 512):
7172
assert pool.bytes_allocated() == allocated_before + 512
7273
new_max_memory = pool.max_memory()
7374
assert pool.max_memory() >= max_mem_before
75+
num_allocations_after = pool.num_allocations()
76+
assert num_allocations_after > num_allocations_before
77+
assert num_allocations_after < num_allocations_before + 5
7478
assert pool.bytes_allocated() == allocated_before
7579
assert pool.max_memory() == new_max_memory
80+
assert pool.num_allocations() == num_allocations_after
7681

7782

7883
def test_default_allocated_bytes():
@@ -271,3 +276,20 @@ def test_debug_memory_pool_unknown(pool_factory):
271276
"Valid values are 'abort', 'trap', 'warn', 'none'."
272277
)
273278
check_debug_memory_pool_disabled(pool_factory, env_value, msg)
279+
280+
281+
@pytest.mark.parametrize('pool_factory', supported_factories())
282+
def test_print_stats(pool_factory):
283+
code = f"""if 1:
284+
import pyarrow as pa
285+
286+
pool = pa.{pool_factory.__name__}()
287+
buf = pa.allocate_buffer(64, memory_pool=pool)
288+
pool.print_stats()
289+
"""
290+
res = subprocess.run([sys.executable, "-c", code], check=True,
291+
universal_newlines=True, stdout=subprocess.PIPE,
292+
stderr=subprocess.PIPE)
293+
if sys.platform == "linux":
294+
# On Linux at least, all memory pools should emit statistics
295+
assert res.stderr.strip() != ""

0 commit comments

Comments
 (0)