Skip to content

Commit dd858ea

Browse files
committed
chore: update otel example
1 parent 1278a80 commit dd858ea

8 files changed

+164
-22
lines changed

example/otel/README.md

+16
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,25 @@ UPTRACE_DSN=http://project2_secret_token@localhost:14317/2 go run client.go
4040
trace: http://localhost:14318/traces/ee029d8782242c8ed38b16d961093b35
4141
```
4242

43+
![Redis trace](./image/redis-trace.png)
44+
4345
You can also open Uptrace UI at [http://localhost:14318](http://localhost:14318) to view available
4446
spans, logs, and metrics.
4547

48+
## Redis monitoring
49+
50+
You can also [monitor Redis performance](https://uptrace.dev/opentelemetry/redis-monitoring.html)
51+
metrics By installing OpenTelemetry Collector.
52+
53+
[OpenTelemetry Collector](https://uptrace.dev/opentelemetry/collector.html) is an agent that pulls
54+
telemetry data from systems you want to monitor and sends it to APM tools using the OpenTelemetry
55+
protocol (OTLP).
56+
57+
When telemetry data reaches Uptrace, it automatically generates a Redis dashboard from a pre-defined
58+
template.
59+
60+
![Redis dashboard](./image/metrics.png)
61+
4662
## Links
4763

4864
- [Uptrace open-source APM](https://uptrace.dev/get/open-source-apm.html)

example/otel/config/alertmanager.yml

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# See https://prometheus.io/docs/alerting/latest/configuration/ for details.
2+
3+
global:
4+
# The smarthost and SMTP sender used for mail notifications.
5+
smtp_smarthost: 'mailhog:1025'
6+
smtp_from: '[email protected]'
7+
smtp_require_tls: false
8+
9+
receivers:
10+
- name: 'team-X'
11+
email_configs:
12+
13+
send_resolved: true
14+
15+
# The root route on which each incoming alert enters.
16+
route:
17+
# The labels by which incoming alerts are grouped together. For example,
18+
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
19+
# be batched into a single group.
20+
group_by: ['alertname', 'cluster', 'service']
21+
22+
# When a new group of alerts is created by an incoming alert, wait at
23+
# least 'group_wait' to send the initial notification.
24+
# This way ensures that you get multiple alerts for the same group that start
25+
# firing shortly after another are batched together on the first
26+
# notification.
27+
group_wait: 30s
28+
29+
# When the first notification was sent, wait 'group_interval' to send a batch
30+
# of new alerts that started firing for that group.
31+
group_interval: 5m
32+
33+
# If an alert has successfully been sent, wait 'repeat_interval' to
34+
# resend them.
35+
repeat_interval: 3h
36+
37+
# A default receiver
38+
receiver: team-X
39+
40+
# All the above attributes are inherited by all child routes and can
41+
# overwritten on each.
42+
43+
# The child route trees.
44+
routes:
45+
# This route matches error alerts created from spans or logs.
46+
- matchers:
47+
- alert_kind="error"
48+
group_interval: 24h
49+
receiver: team-X
50+
51+
# The directory from which notification templates are read.
52+
templates:
53+
- '/etc/alertmanager/template/*.tmpl'
File renamed without changes.
File renamed without changes.

example/otel/docker-compose.yml

+21-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ services:
1818
- '9000:9000'
1919

2020
uptrace:
21-
image: 'uptrace/uptrace:1.1.0'
21+
image: 'uptrace/uptrace:1.2.0'
2222
#image: 'uptrace/uptrace-dev:latest'
2323
restart: on-failure
2424
volumes:
@@ -36,19 +36,34 @@ services:
3636
otel-collector:
3737
image: otel/opentelemetry-collector-contrib:0.58.0
3838
restart: on-failure
39-
user: '0:0' # required for logs
4039
volumes:
41-
- ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml
42-
- /var/lib/docker/containers:/var/lib/docker/containers:ro
43-
- /var/log:/var/log:ro
40+
- ./config/otel-collector.yaml:/etc/otelcol-contrib/config.yaml
4441
ports:
4542
- '4317:4317'
4643
- '4318:4318'
4744

4845
vector:
4946
image: timberio/vector:0.24.X-alpine
5047
volumes:
51-
- ./vector.toml:/etc/vector/vector.toml:ro
48+
- ./config/vector.toml:/etc/vector/vector.toml:ro
49+
50+
alertmanager:
51+
image: prom/alertmanager:v0.24.0
52+
restart: on-failure
53+
volumes:
54+
- ./config/alertmanager.yml:/etc/alertmanager/config.yml
55+
- alertmanager_data:/alertmanager
56+
ports:
57+
- 9093:9093
58+
command:
59+
- '--config.file=/etc/alertmanager/config.yml'
60+
- '--storage.path=/alertmanager'
61+
62+
mailhog:
63+
image: mailhog/mailhog:v1.0.1
64+
restart: on-failure
65+
ports:
66+
- '8025:8025'
5267

5368
redis-server:
5469
image: redis

example/otel/image/metrics.png

31.6 KB
Loading

example/otel/image/redis-trace.png

21 KB
Loading

example/otel/uptrace.yml

+74-16
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,16 @@
1313
## foo: $$FOO_BAR
1414
##
1515

16+
##
17+
## ClickHouse database credentials.
18+
##
19+
ch:
20+
# Connection string for ClickHouse database. For example:
21+
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
22+
#
23+
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
24+
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
25+
1626
##
1727
## A list of pre-configured projects. Each project is fully isolated.
1828
##
@@ -26,6 +36,10 @@ projects:
2636
- service.name
2737
- host.name
2838
- deployment.environment
39+
# Group spans by deployment.environment attribute.
40+
group_by_env: false
41+
# Group funcs spans by service.name attribute.
42+
group_funcs_by_service: false
2943

3044
# Other projects can be used to monitor your applications.
3145
# To monitor micro-services or multiple related services, use a single project.
@@ -36,6 +50,49 @@ projects:
3650
- service.name
3751
- host.name
3852
- deployment.environment
53+
# Group spans by deployment.environment attribute.
54+
group_by_env: false
55+
# Group funcs spans by service.name attribute.
56+
group_funcs_by_service: false
57+
58+
##
59+
## Create metrics from spans and events.
60+
##
61+
metrics_from_spans:
62+
- name: uptrace.tracing.spans_duration
63+
description: Spans duration (excluding events)
64+
instrument: histogram
65+
unit: microseconds
66+
value: span.duration / 1000
67+
attrs:
68+
- span.system as system
69+
- service.name as service
70+
- host.name as host
71+
- span.status_code as status
72+
where: not span.is_event
73+
74+
- name: uptrace.tracing.spans
75+
description: Spans count (excluding events)
76+
instrument: counter
77+
unit: 1
78+
value: span.count
79+
attrs:
80+
- span.system as system
81+
- service.name as service
82+
- host.name as host
83+
- span.status_code as status
84+
where: not span.is_event
85+
86+
- name: uptrace.tracing.events
87+
description: Events count (excluding spans)
88+
instrument: counter
89+
unit: 1
90+
value: span.count
91+
attrs:
92+
- span.system as system
93+
- service.name as service
94+
- host.name as host
95+
where: span.is_event
3996

4097
##
4198
## To require authentication, uncomment the following section.
@@ -78,16 +135,6 @@ auth:
78135
# # Defaults to 'preferred_username'.
79136
# claim: preferred_username
80137

81-
##
82-
## ClickHouse database credentials.
83-
##
84-
ch:
85-
# Connection string for ClickHouse database. For example:
86-
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
87-
#
88-
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
89-
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
90-
91138
##
92139
## Alerting rules for monitoring metrics.
93140
##
@@ -102,8 +149,8 @@ alerting:
102149
- $net_errors > 0 group by host.name
103150
# for the last 5 minutes
104151
for: 5m
105-
# in the project id=1
106-
projects: [1]
152+
annotations:
153+
summary: '{{ $labels.host_name }} has high number of net errors: {{ $values.net_errors }}'
107154

108155
- name: Filesystem usage >= 90%
109156
metrics:
@@ -114,15 +161,26 @@ alerting:
114161
- where device !~ "loop"
115162
- $fs_usage{state="used"} / $fs_usage >= 0.9
116163
for: 5m
117-
projects: [1]
164+
annotations:
165+
summary: '{{ $labels.host_name }} has high FS usage: {{ $values.fs_usage }}'
118166

119167
- name: Uptrace is dropping spans
120168
metrics:
121169
- uptrace.projects.spans as $spans
122170
query:
123171
- $spans{type=dropped} > 0
124172
for: 1m
125-
projects: [1]
173+
annotations:
174+
summary: 'Uptrace has dropped {{ $values.spans }} spans'
175+
176+
- name: Always firing (for fun and testing)
177+
metrics:
178+
- process.runtime.go.goroutines as $goroutines
179+
query:
180+
- $goroutines >= 0 group by host.name
181+
for: 1m
182+
annotations:
183+
summary: '{{ $labels.host_name }} has high number of goroutines: {{ $values.goroutines }}'
126184

127185
# Create alerts from error logs and span events.
128186
create_alerts_from_spans:
@@ -139,8 +197,8 @@ alerting:
139197
##
140198
alertmanager_client:
141199
# AlertManager API endpoints that Uptrace uses to manage alerts.
142-
# urls:
143-
# - 'http://alertmanager:9093/api/v2/alerts'
200+
urls:
201+
- 'http://alertmanager:9093/api/v2/alerts'
144202

145203
##
146204
## Various options to tweak ClickHouse schema.

0 commit comments

Comments
 (0)