From 36653033e905419d3ce90bee047e1631097b30cf Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Fri, 14 Mar 2025 14:07:06 +0200 Subject: [PATCH 1/3] Document deprecated process runtime metrics --- docs/integrations/system-metrics.md | 16 +++++++++---- .../_internal/integrations/system_metrics.py | 22 ++++++++++++++---- .../otel_integrations/test_system_metrics.py | 23 +++++++++++++------ 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/docs/integrations/system-metrics.md b/docs/integrations/system-metrics.md index 860d26377..41ec7fa84 100644 --- a/docs/integrations/system-metrics.md +++ b/docs/integrations/system-metrics.md @@ -35,8 +35,8 @@ logfire.instrument_system_metrics({ }) ``` -1. `process.runtime.cpu.utilization` will lead to exporting a metric that is actually named `process.runtime.cpython.cpu.utilization` or a similar name depending on the Python implementation used. The `None` value means that there are no fields to configure for this metric. The value of this metric is `[psutil.Process().cpu_percent()](https://psutil.readthedocs.io/en/latest/#psutil.Process.cpu_percent) / 100`, i.e. the fraction of CPU time used by this process, where 1 means using 100% of a single CPU core. The value can be greater than 1 if the process uses multiple cores. -2. The `None` value means that there are no fields to configure for this metric. The value of this metric is `[psutil.cpu_percent()](https://psutil.readthedocs.io/en/latest/#psutil.cpu_percent) / 100`, i.e. the fraction of CPU time used by the whole system, where 1 means using 100% of all CPU cores. +1. `process.runtime.cpu.utilization` will lead to exporting a metric that is actually named `process.runtime.cpython.cpu.utilization` or a similar name depending on the Python implementation used. The `None` value means that there are no fields to configure for this metric. The value of this metric is [`psutil.Process().cpu_percent()`](https://psutil.readthedocs.io/en/latest/#psutil.Process.cpu_percent)`/100`, i.e. the fraction of CPU time used by this process, where 1 means using 100% of a single CPU core. The value can be greater than 1 if the process uses multiple cores. In the next major release, the default will instead emit `'process.cpu.utilization'`, which is the same metric but with a simpler name. +2. The `None` value means that there are no fields to configure for this metric. The value of this metric is [`psutil.cpu_percent()`](https://psutil.readthedocs.io/en/latest/#psutil.cpu_percent)`/100`, i.e. the fraction of CPU time used by the whole system, where 1 means using 100% of all CPU cores. 3. The value here is a list of 'modes' of memory. The full list can be seen in the [`psutil` documentation](https://psutil.readthedocs.io/en/latest/#psutil.virtual_memory). `available` is "the memory that can be given instantly to processes without the system going into swap. This is calculated by summing different memory metrics that vary depending on the platform. It is supposed to be used to monitor actual memory usage in a cross platform fashion." The value of the metric is a number between 0 and 1, and subtracting the value from 1 gives the fraction of memory used. 4. This is the fraction of available swap used. The value is a number between 0 and 1. @@ -69,13 +69,21 @@ logfire.instrument_system_metrics({ 'system.network.errors': ['transmit', 'receive'], 'system.network.io': ['transmit', 'receive'], 'system.thread_count': None, + 'process.context_switches': ['involuntary', 'voluntary'], + 'process.runtime.gc_count': None, + 'process.open_file_descriptor.count': None, + 'process.cpu.time': ['user', 'system'], + 'process.cpu.utilization': ['user', 'system'], + 'process.memory.usage': None, + 'process.memory.virtual': None, + 'process.thread.count': None, + # These are deprecated and equivalent to some of the above. + # base='full' will stop including them in the next major release. 'process.runtime.memory': ['rss', 'vms'], 'process.runtime.cpu.time': ['user', 'system'], - 'process.runtime.gc_count': None, 'process.runtime.thread_count': None, 'process.runtime.cpu.utilization': None, 'process.runtime.context_switches': ['involuntary', 'voluntary'], - 'process.open_file_descriptor.count': None, }) ``` diff --git a/logfire/_internal/integrations/system_metrics.py b/logfire/_internal/integrations/system_metrics.py index af652c6c9..ccd1e3161 100644 --- a/logfire/_internal/integrations/system_metrics.py +++ b/logfire/_internal/integrations/system_metrics.py @@ -43,13 +43,20 @@ 'system.network.io', 'system.network.connections', 'system.thread_count', + 'process.open_file_descriptor.count', + 'process.context_switches', + 'process.cpu.time', + 'process.cpu.utilization', + 'process.memory.usage', + 'process.memory.virtual', + 'process.thread.count', + 'process.runtime.gc_count', + # ##### These are deprecated: 'process.runtime.memory', 'process.runtime.cpu.time', - 'process.runtime.gc_count', 'process.runtime.thread_count', 'process.runtime.cpu.utilization', 'process.runtime.context_switches', - 'process.open_file_descriptor.count', ] ] = Literal[ # type: ignore # but pyright doesn't like it 'system.cpu.simple_utilization', @@ -68,13 +75,20 @@ 'system.network.io', 'system.network.connections', 'system.thread_count', + 'process.open_file_descriptor.count', + 'process.context_switches', + 'process.cpu.time', + 'process.cpu.utilization', + 'process.memory.usage', + 'process.memory.virtual', + 'process.thread.count', + 'process.runtime.gc_count', + # ##### These are deprecated: 'process.runtime.memory', 'process.runtime.cpu.time', - 'process.runtime.gc_count', 'process.runtime.thread_count', 'process.runtime.cpu.utilization', 'process.runtime.context_switches', - 'process.open_file_descriptor.count', ] Config = Dict[MetricName, Optional[Iterable[str]]] diff --git a/tests/otel_integrations/test_system_metrics.py b/tests/otel_integrations/test_system_metrics.py index 03682b331..f7a8a6e17 100644 --- a/tests/otel_integrations/test_system_metrics.py +++ b/tests/otel_integrations/test_system_metrics.py @@ -36,12 +36,15 @@ def test_default_system_metrics_collection(metrics_reader: InMemoryMetricReader) ) -# TODO FIX THIS -@pytest.mark.xfail def test_all_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> None: logfire.instrument_system_metrics(base='full') assert get_collected_metric_names(metrics_reader) == snapshot( [ + 'process.context_switches', + 'process.cpu.time', + 'process.cpu.utilization', + 'process.memory.usage', + 'process.memory.virtual', 'process.open_file_descriptor.count', 'process.runtime.cpython.context_switches', 'process.runtime.cpython.cpu.utilization', @@ -49,6 +52,7 @@ def test_all_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> 'process.runtime.cpython.gc_count', 'process.runtime.cpython.memory', 'process.runtime.cpython.thread_count', + 'process.thread.count', 'system.cpu.simple_utilization', 'system.cpu.time', 'system.cpu.utilization', @@ -82,8 +86,6 @@ def test_basic_base(): }, 'Docs need to be updated if this test fails' -# TODO FIX THIS -@pytest.mark.xfail def test_full_base(): config = get_base_config('full') config.pop('system.network.connections', None) @@ -137,13 +139,20 @@ def test_full_base(): 'system.network.errors': ['transmit', 'receive'], 'system.network.io': ['transmit', 'receive'], 'system.thread_count': None, + 'process.runtime.gc_count': None, + 'process.open_file_descriptor.count': None, + 'process.memory.usage': None, + 'process.memory.virtual': None, + 'process.cpu.time': ['user', 'system'], + 'process.cpu.utilization': ['user', 'system'], + 'process.thread.count': None, + 'process.context_switches': ['involuntary', 'voluntary'], + # These are deprecated: 'process.runtime.memory': ['rss', 'vms'], 'process.runtime.cpu.time': ['user', 'system'], - 'process.runtime.gc_count': None, - 'process.runtime.thread_count': None, 'process.runtime.cpu.utilization': None, + 'process.runtime.thread_count': None, 'process.runtime.context_switches': ['involuntary', 'voluntary'], - 'process.open_file_descriptor.count': None, }, 'Docs and the MetricName type need to be updated if this test fails' From c1b82a273328c68e8e83db63828d6d7788cb9dd8 Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Fri, 14 Mar 2025 14:47:41 +0200 Subject: [PATCH 2/3] Stop implicitly emitting deprecated process runtime metrics --- docs/integrations/system-metrics.md | 11 ++------ .../_internal/integrations/system_metrics.py | 28 +++++++++---------- .../otel_integrations/test_system_metrics.py | 15 ++-------- 3 files changed, 17 insertions(+), 37 deletions(-) diff --git a/docs/integrations/system-metrics.md b/docs/integrations/system-metrics.md index 41ec7fa84..881421dbd 100644 --- a/docs/integrations/system-metrics.md +++ b/docs/integrations/system-metrics.md @@ -28,14 +28,14 @@ By default, `instrument_system_metrics` collects only the metrics it needs to di ```py logfire.instrument_system_metrics({ - 'process.runtime.cpu.utilization': None, # (1)! + 'process.cpu.utilization': None, # (1)! 'system.cpu.simple_utilization': None, # (2)! 'system.memory.utilization': ['available'], # (3)! 'system.swap.utilization': ['used'], # (4)! }) ``` -1. `process.runtime.cpu.utilization` will lead to exporting a metric that is actually named `process.runtime.cpython.cpu.utilization` or a similar name depending on the Python implementation used. The `None` value means that there are no fields to configure for this metric. The value of this metric is [`psutil.Process().cpu_percent()`](https://psutil.readthedocs.io/en/latest/#psutil.Process.cpu_percent)`/100`, i.e. the fraction of CPU time used by this process, where 1 means using 100% of a single CPU core. The value can be greater than 1 if the process uses multiple cores. In the next major release, the default will instead emit `'process.cpu.utilization'`, which is the same metric but with a simpler name. +1. The `None` value means that there are no fields to configure for this metric. The value of this metric is [`psutil.Process().cpu_percent()`](https://psutil.readthedocs.io/en/latest/#psutil.Process.cpu_percent)`/100`, i.e. the fraction of CPU time used by this process, where 1 means using 100% of a single CPU core. The value can be greater than 1 if the process uses multiple cores. 2. The `None` value means that there are no fields to configure for this metric. The value of this metric is [`psutil.cpu_percent()`](https://psutil.readthedocs.io/en/latest/#psutil.cpu_percent)`/100`, i.e. the fraction of CPU time used by the whole system, where 1 means using 100% of all CPU cores. 3. The value here is a list of 'modes' of memory. The full list can be seen in the [`psutil` documentation](https://psutil.readthedocs.io/en/latest/#psutil.virtual_memory). `available` is "the memory that can be given instantly to processes without the system going into swap. This is calculated by summing different memory metrics that vary depending on the platform. It is supposed to be used to monitor actual memory usage in a cross platform fashion." The value of the metric is a number between 0 and 1, and subtracting the value from 1 gives the fraction of memory used. 4. This is the fraction of available swap used. The value is a number between 0 and 1. @@ -77,13 +77,6 @@ logfire.instrument_system_metrics({ 'process.memory.usage': None, 'process.memory.virtual': None, 'process.thread.count': None, - # These are deprecated and equivalent to some of the above. - # base='full' will stop including them in the next major release. - 'process.runtime.memory': ['rss', 'vms'], - 'process.runtime.cpu.time': ['user', 'system'], - 'process.runtime.thread_count': None, - 'process.runtime.cpu.utilization': None, - 'process.runtime.context_switches': ['involuntary', 'voluntary'], }) ``` diff --git a/logfire/_internal/integrations/system_metrics.py b/logfire/_internal/integrations/system_metrics.py index ccd1e3161..1beea4f43 100644 --- a/logfire/_internal/integrations/system_metrics.py +++ b/logfire/_internal/integrations/system_metrics.py @@ -51,12 +51,6 @@ 'process.memory.virtual', 'process.thread.count', 'process.runtime.gc_count', - # ##### These are deprecated: - 'process.runtime.memory', - 'process.runtime.cpu.time', - 'process.runtime.thread_count', - 'process.runtime.cpu.utilization', - 'process.runtime.context_switches', ] ] = Literal[ # type: ignore # but pyright doesn't like it 'system.cpu.simple_utilization', @@ -83,12 +77,6 @@ 'process.memory.virtual', 'process.thread.count', 'process.runtime.gc_count', - # ##### These are deprecated: - 'process.runtime.memory', - 'process.runtime.cpu.time', - 'process.runtime.thread_count', - 'process.runtime.cpu.utilization', - 'process.runtime.context_switches', ] Config = Dict[MetricName, Optional[Iterable[str]]] @@ -121,8 +109,17 @@ # upstream pr: https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2008 FULL_CONFIG.pop('system.network.connections', None) +for _deprecated in [ + 'process.runtime.memory', + 'process.runtime.cpu.time', + 'process.runtime.thread_count', + 'process.runtime.cpu.utilization', + 'process.runtime.context_switches', +]: + FULL_CONFIG.pop(_deprecated, None) # type: ignore + BASIC_CONFIG: Config = { - 'process.runtime.cpu.utilization': None, + 'process.cpu.utilization': None, 'system.cpu.simple_utilization': None, # The actually used memory ratio can be calculated as `1 - available`. 'system.memory.utilization': ['available'], @@ -149,10 +146,11 @@ def instrument_system_metrics(logfire_instance: Logfire, config: Config | None = if 'system.cpu.simple_utilization' in config: measure_simple_cpu_utilization(logfire_instance) - if 'process.runtime.cpu.utilization' in config: + if 'process.runtime.cpu.utilization' in config: # type: ignore # Override OTEL here, see comment in measure_process_runtime_cpu_utilization..callback. + # (The name is also deprecated by OTEL, but that's not really important) measure_process_runtime_cpu_utilization(logfire_instance) - del config['process.runtime.cpu.utilization'] + del config['process.runtime.cpu.utilization'] # type: ignore instrumentor = SystemMetricsInstrumentor(config=config) # type: ignore instrumentor.instrument(meter_provider=logfire_instance.config.get_meter_provider()) diff --git a/tests/otel_integrations/test_system_metrics.py b/tests/otel_integrations/test_system_metrics.py index f7a8a6e17..e76d59f52 100644 --- a/tests/otel_integrations/test_system_metrics.py +++ b/tests/otel_integrations/test_system_metrics.py @@ -28,7 +28,7 @@ def test_default_system_metrics_collection(metrics_reader: InMemoryMetricReader) logfire.instrument_system_metrics() assert get_collected_metric_names(metrics_reader) == snapshot( [ - 'process.runtime.cpython.cpu.utilization', + 'process.cpu.utilization', 'system.cpu.simple_utilization', 'system.memory.utilization', 'system.swap.utilization', @@ -46,12 +46,7 @@ def test_all_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> 'process.memory.usage', 'process.memory.virtual', 'process.open_file_descriptor.count', - 'process.runtime.cpython.context_switches', - 'process.runtime.cpython.cpu.utilization', - 'process.runtime.cpython.cpu_time', 'process.runtime.cpython.gc_count', - 'process.runtime.cpython.memory', - 'process.runtime.cpython.thread_count', 'process.thread.count', 'system.cpu.simple_utilization', 'system.cpu.time', @@ -79,7 +74,7 @@ def test_custom_system_metrics_collection(metrics_reader: InMemoryMetricReader) def test_basic_base(): assert get_base_config('basic') == { - 'process.runtime.cpu.utilization': None, + 'process.cpu.utilization': None, 'system.cpu.simple_utilization': None, 'system.memory.utilization': ['available'], 'system.swap.utilization': ['used'], @@ -147,12 +142,6 @@ def test_full_base(): 'process.cpu.utilization': ['user', 'system'], 'process.thread.count': None, 'process.context_switches': ['involuntary', 'voluntary'], - # These are deprecated: - 'process.runtime.memory': ['rss', 'vms'], - 'process.runtime.cpu.time': ['user', 'system'], - 'process.runtime.cpu.utilization': None, - 'process.runtime.thread_count': None, - 'process.runtime.context_switches': ['involuntary', 'voluntary'], }, 'Docs and the MetricName type need to be updated if this test fails' From 65b4a4394ad40b886a51d7fcab41023de12947d8 Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Fri, 14 Mar 2025 14:56:47 +0200 Subject: [PATCH 3/3] test measure_process_runtime_cpu_utilization --- tests/otel_integrations/test_system_metrics.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/otel_integrations/test_system_metrics.py b/tests/otel_integrations/test_system_metrics.py index e76d59f52..4bedfd433 100644 --- a/tests/otel_integrations/test_system_metrics.py +++ b/tests/otel_integrations/test_system_metrics.py @@ -68,8 +68,10 @@ def test_all_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> def test_custom_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> None: - logfire.instrument_system_metrics({'system.memory.utilization': ['available']}, base=None) - assert get_collected_metric_names(metrics_reader) == ['system.memory.utilization'] + # This metric is now deprecated by OTEL, but there isn't a strong reason to stop allowing it when requested, + # and I also want to test measure_process_runtime_cpu_utilization. + logfire.instrument_system_metrics({'process.runtime.cpu.utilization': None}, base=None) # type: ignore + assert get_collected_metric_names(metrics_reader) == ['process.runtime.cpython.cpu.utilization'] def test_basic_base():