mirror of
https://github.com/dragonflydb/dragonfly
synced 2024-11-21 23:19:53 +00:00
feat: expose fiber responsiveness metrics (#2125)
Should allow track caches where Dragonfly is not responsive to I/O due to big CPU tasks. Also, update the local grafana dashboard. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
b9781c4903
commit
c7db025a48
2
helio
2
helio
@ -1 +1 @@
|
|||||||
Subproject commit fe7ec28642c1b699bdc8839296f354d797ee0365
|
Subproject commit 1fea6effc72919649c815afb04e9c7829b0240ab
|
@ -850,7 +850,7 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
|
|||||||
AppendMetricValue("role", 1, {"role"}, {m.is_master ? "master" : "replica"}, &resp->body());
|
AppendMetricValue("role", 1, {"role"}, {m.is_master ? "master" : "replica"}, &resp->body());
|
||||||
AppendMetricWithoutLabels("master", "1 if master 0 if replica", m.is_master ? 1 : 0,
|
AppendMetricWithoutLabels("master", "1 if master 0 if replica", m.is_master ? 1 : 0,
|
||||||
MetricType::GAUGE, &resp->body());
|
MetricType::GAUGE, &resp->body());
|
||||||
AppendMetricWithoutLabels("uptime_in_seconds", "", m.uptime, MetricType::GAUGE, &resp->body());
|
AppendMetricWithoutLabels("uptime_in_seconds", "", m.uptime, MetricType::COUNTER, &resp->body());
|
||||||
|
|
||||||
// Clients metrics
|
// Clients metrics
|
||||||
AppendMetricWithoutLabels("connected_clients", "", m.conn_stats.num_conns, MetricType::GAUGE,
|
AppendMetricWithoutLabels("connected_clients", "", m.conn_stats.num_conns, MetricType::GAUGE,
|
||||||
@ -923,7 +923,7 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
|
|||||||
&command_metrics);
|
&command_metrics);
|
||||||
for (const auto& [name, stat] : m.cmd_stats_map) {
|
for (const auto& [name, stat] : m.cmd_stats_map) {
|
||||||
const auto calls = stat.first;
|
const auto calls = stat.first;
|
||||||
const auto duration_seconds = stat.second * 0.001;
|
const double duration_seconds = stat.second * 0.001;
|
||||||
AppendMetricValue("commands_total", calls, {"cmd"}, {name}, &command_metrics);
|
AppendMetricValue("commands_total", calls, {"cmd"}, {name}, &command_metrics);
|
||||||
AppendMetricValue("commands_duration_seconds_total", duration_seconds, {"cmd"}, {name},
|
AppendMetricValue("commands_duration_seconds_total", duration_seconds, {"cmd"}, {name},
|
||||||
&command_metrics);
|
&command_metrics);
|
||||||
@ -944,6 +944,18 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
|
|||||||
absl::StrAppend(&resp->body(), replication_lag_metrics);
|
absl::StrAppend(&resp->body(), replication_lag_metrics);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AppendMetricWithoutLabels("fiber_switch_total", "", m.fiber_switch_cnt, MetricType::COUNTER,
|
||||||
|
&resp->body());
|
||||||
|
double delay_seconds = m.fiber_switch_delay_ns * 1e-9;
|
||||||
|
AppendMetricWithoutLabels("fiber_switch_delay_seconds_total", "", delay_seconds,
|
||||||
|
MetricType::COUNTER, &resp->body());
|
||||||
|
|
||||||
|
AppendMetricWithoutLabels("fiber_longrun_total", "", m.fiber_longrun_cnt, MetricType::COUNTER,
|
||||||
|
&resp->body());
|
||||||
|
double longrun_seconds = m.fiber_longrun_ns * 1e-9;
|
||||||
|
AppendMetricWithoutLabels("fiber_longrun_seconds_total", "", longrun_seconds, MetricType::COUNTER,
|
||||||
|
&resp->body());
|
||||||
|
|
||||||
absl::StrAppend(&resp->body(), db_key_metrics);
|
absl::StrAppend(&resp->body(), db_key_metrics);
|
||||||
absl::StrAppend(&resp->body(), db_key_expire_metrics);
|
absl::StrAppend(&resp->body(), db_key_expire_metrics);
|
||||||
}
|
}
|
||||||
@ -1402,6 +1414,11 @@ Metrics ServerFamily::GetMetrics() const {
|
|||||||
|
|
||||||
lock_guard lk(mu);
|
lock_guard lk(mu);
|
||||||
|
|
||||||
|
result.fiber_switch_cnt += fb2::FiberSwitchEpoch();
|
||||||
|
result.fiber_switch_delay_ns += fb2::FiberSwitchDelay();
|
||||||
|
result.fiber_longrun_cnt += fb2::FiberLongRunCnt();
|
||||||
|
result.fiber_longrun_ns += fb2::FiberLongRunSum();
|
||||||
|
|
||||||
result.coordinator_stats += ss->stats;
|
result.coordinator_stats += ss->stats;
|
||||||
result.conn_stats += ss->connection_stats;
|
result.conn_stats += ss->connection_stats;
|
||||||
|
|
||||||
|
@ -86,6 +86,12 @@ struct Metrics {
|
|||||||
size_t small_string_bytes = 0;
|
size_t small_string_bytes = 0;
|
||||||
uint32_t traverse_ttl_per_sec = 0;
|
uint32_t traverse_ttl_per_sec = 0;
|
||||||
uint32_t delete_ttl_per_sec = 0;
|
uint32_t delete_ttl_per_sec = 0;
|
||||||
|
uint64_t fiber_switch_cnt = 0;
|
||||||
|
uint64_t fiber_switch_delay_ns = 0;
|
||||||
|
|
||||||
|
// Statistics about fibers running for a long time (more than 1ms).
|
||||||
|
uint64_t fiber_longrun_cnt = 0;
|
||||||
|
uint64_t fiber_longrun_ns = 0;
|
||||||
|
|
||||||
std::map<std::string, std::pair<uint64_t, uint64_t>> cmd_stats_map; // command call frequencies
|
std::map<std::string, std::pair<uint64_t, uint64_t>> cmd_stats_map; // command call frequencies
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@
|
|||||||
},
|
},
|
||||||
"textMode": "auto"
|
"textMode": "auto"
|
||||||
},
|
},
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
@ -191,7 +191,7 @@
|
|||||||
},
|
},
|
||||||
"textMode": "auto"
|
"textMode": "auto"
|
||||||
},
|
},
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
@ -282,7 +282,7 @@
|
|||||||
"showThresholdLabels": false,
|
"showThresholdLabels": false,
|
||||||
"showThresholdMarkers": true
|
"showThresholdMarkers": true
|
||||||
},
|
},
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
@ -350,7 +350,7 @@
|
|||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
@ -456,7 +456,7 @@
|
|||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
@ -574,7 +574,7 @@
|
|||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
@ -690,7 +690,7 @@
|
|||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
@ -791,7 +791,7 @@
|
|||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
@ -912,7 +912,7 @@
|
|||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
@ -1033,7 +1033,7 @@
|
|||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "9.3.6",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
@ -1110,7 +1110,6 @@
|
|||||||
"mode": "palette-classic"
|
"mode": "palette-classic"
|
||||||
},
|
},
|
||||||
"custom": {
|
"custom": {
|
||||||
"axisBorderShow": false,
|
|
||||||
"axisCenteredZero": false,
|
"axisCenteredZero": false,
|
||||||
"axisColorMode": "text",
|
"axisColorMode": "text",
|
||||||
"axisLabel": "",
|
"axisLabel": "",
|
||||||
@ -1124,7 +1123,6 @@
|
|||||||
"tooltip": false,
|
"tooltip": false,
|
||||||
"viz": false
|
"viz": false
|
||||||
},
|
},
|
||||||
"insertNulls": false,
|
|
||||||
"lineInterpolation": "linear",
|
"lineInterpolation": "linear",
|
||||||
"lineWidth": 1,
|
"lineWidth": 1,
|
||||||
"pointSize": 5,
|
"pointSize": 5,
|
||||||
@ -1155,7 +1153,7 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"unit": "µs"
|
"unit": "s"
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -1187,7 +1185,107 @@
|
|||||||
"disableTextWrap": false,
|
"disableTextWrap": false,
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"expr":
|
"expr":
|
||||||
"rate(dragonfly_fiber_switch_delay_seconds_total[$__rate_interval])*1000000/rate(dragonfly_fiber_switch_total[$__rate_interval])",
|
"rate(dragonfly_fiber_switch_delay_seconds_total[$__rate_interval])/rate(dragonfly_fiber_switch_total[$__rate_interval])",
|
||||||
|
"fullMetaSearch": false,
|
||||||
|
"includeNullMetadata": false,
|
||||||
|
"instant": false,
|
||||||
|
"legendFormat": "__auto",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A",
|
||||||
|
"useBackend": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "FiberSwitchDelay",
|
||||||
|
"transformations": [],
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 0,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"viz": false
|
||||||
|
},
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
|
"showPoints": "auto",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "s"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 30
|
||||||
|
},
|
||||||
|
"id": 20,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "PBFA97CFB590B2093"
|
||||||
|
},
|
||||||
|
"disableTextWrap": false,
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr":
|
||||||
|
"rate(dragonfly_fiber_longrun_seconds_total[$__rate_interval])/rate(dragonfly_fiber_longrun_total[$__rate_interval])",
|
||||||
"fullMetaSearch": false,
|
"fullMetaSearch": false,
|
||||||
"includeNullMetadata": false,
|
"includeNullMetadata": false,
|
||||||
"instant": false,
|
"instant": false,
|
||||||
@ -1203,7 +1301,8 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"refresh": "",
|
"refresh": "",
|
||||||
"schemaVersion": 38,
|
"schemaVersion": 37,
|
||||||
|
"style": "dark",
|
||||||
"tags": [
|
"tags": [
|
||||||
"prometheus",
|
"prometheus",
|
||||||
"dragonfly"
|
"dragonfly"
|
||||||
|
@ -29,14 +29,14 @@ scrape_configs:
|
|||||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
|
||||||
- job_name: dragonfly
|
- job_name: dragonfly
|
||||||
scrape_interval: 5s
|
scrape_interval: 1s
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['host.docker.internal:6379']
|
- targets: ['host.docker.internal:6379']
|
||||||
|
|
||||||
- job_name: 'prometheus'
|
- job_name: 'prometheus'
|
||||||
|
|
||||||
# Override the global default and scrape targets from this job every 5 seconds.
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
scrape_interval: 5s
|
scrape_interval: 1s
|
||||||
|
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['localhost:9090']
|
- targets: ['localhost:9090']
|
||||||
@ -45,7 +45,7 @@ scrape_configs:
|
|||||||
- job_name: 'node-exporter'
|
- job_name: 'node-exporter'
|
||||||
|
|
||||||
# Override the global default and scrape targets from this job every 5 seconds.
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
scrape_interval: 5s
|
scrape_interval: 1s
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['node-exporter:9100']
|
- targets: ['node-exporter:9100']
|
||||||
labels:
|
labels:
|
||||||
|
Loading…
Reference in New Issue
Block a user