diff --git a/src/homework/05-virtualization/5.5/ansible/.gitignore b/src/homework/05-virtualization/5.5/ansible/.gitignore new file mode 100644 index 0000000..75fe095 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/.gitignore @@ -0,0 +1 @@ +inventory \ No newline at end of file diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/Dockerfile b/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/Dockerfile new file mode 100644 index 0000000..972598d --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/Dockerfile @@ -0,0 +1,7 @@ +FROM prom/alertmanager:v0.15.3 + +COPY conf /etc/alertmanager/ + +ENTRYPOINT [ "/etc/alertmanager/docker-entrypoint.sh" ] +CMD [ "--config.file=/etc/alertmanager/alertmanager.yml", \ + "--storage.path=/alertmanager" ] diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/conf/alertmanager.yml b/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/conf/alertmanager.yml new file mode 100644 index 0000000..4859ab9 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/conf/alertmanager.yml @@ -0,0 +1,11 @@ +route: + receiver: 'slack' + +receivers: + - name: 'slack' + slack_configs: + - send_resolved: true + text: "{{ .CommonAnnotations.description }}" + #username: # + #channel: # + #api_url: # diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/conf/docker-entrypoint.sh b/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/conf/docker-entrypoint.sh new file mode 100755 index 0000000..41e9d96 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/alertmanager/conf/docker-entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/sh -e + +cat /etc/alertmanager/alertmanager.yml |\ + sed "s@#api_url: #@api_url: '$SLACK_URL'@g" |\ + sed "s@#channel: #@channel: '#$SLACK_CHANNEL'@g" |\ + sed "s@#username: #@username: '$SLACK_USER'@g" > /tmp/alertmanager.yml + +mv /tmp/alertmanager.yml /etc/alertmanager/alertmanager.yml + +set -- /bin/alertmanager "$@" + +exec "$@" diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/caddy/Caddyfile b/src/homework/05-virtualization/5.5/ansible/monitoring/caddy/Caddyfile new file mode 100644 index 0000000..e1b46ec --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/caddy/Caddyfile @@ -0,0 +1,40 @@ +:9090 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / prometheus:9090 { + transparent + } + + errors stderr + tls off +} + +:9093 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / alertmanager:9093 { + transparent + } + + errors stderr + tls off +} + +:9094 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / unsee:8080 { + transparent + } + + errors stderr + tls off +} + +:3000 { + proxy / grafana:3000 { + transparent + websocket + } + + errors stderr + tls off +} + diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/docker-compose.yml b/src/homework/05-virtualization/5.5/ansible/monitoring/docker-compose.yml new file mode 100644 index 0000000..11c7905 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/docker-compose.yml @@ -0,0 +1,206 @@ +version: "3.3" + +networks: + net: + driver: overlay + attachable: true + +volumes: + prometheus: {} + grafana: {} + alertmanager: {} + +configs: + caddy_config: + file: ./caddy/Caddyfile + dockerd_config: + file: ./dockerd-exporter/Caddyfile + node_rules: + file: ./prometheus/rules/swarm_node.rules.yml + task_rules: + file: ./prometheus/rules/swarm_task.rules.yml + +services: + dockerd-exporter: + image: stefanprodan/caddy + networks: + - net + environment: + - DOCKER_GWBRIDGE_IP=172.18.0.1 + configs: + - source: dockerd_config + target: /etc/caddy/Caddyfile + deploy: + mode: global + resources: + limits: + memory: 128M + reservations: + memory: 64M + + cadvisor: + image: google/cadvisor + networks: + - net + command: -logtostderr -docker_only + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - /:/rootfs:ro + - /var/run:/var/run + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + deploy: + mode: global + resources: + limits: + memory: 128M + reservations: + memory: 64M + + grafana: + image: stefanprodan/swarmprom-grafana:5.3.4 + networks: + - net + environment: + - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin} + - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} + - GF_USERS_ALLOW_SIGN_UP=false + #- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL:-localhost} + #- GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-false} + #- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:-grafana@test.com} + #- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana} + #- GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25} + #- GF_SMTP_USER=${GF_SMTP_USER} + #- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD} + volumes: + - grafana:/var/lib/grafana + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 128M + reservations: + memory: 64M + + alertmanager: + image: stefanprodan/swarmprom-alertmanager:v0.14.0 + networks: + - net + environment: + - SLACK_URL=${SLACK_URL:-https://hooks.slack.com/services/TOKEN} + - SLACK_CHANNEL=${SLACK_CHANNEL:-general} + - SLACK_USER=${SLACK_USER:-alertmanager} + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + volumes: + - alertmanager:/alertmanager + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 128M + reservations: + memory: 64M + + unsee: + image: cloudflare/unsee:v0.8.0 + networks: + - net + environment: + - "ALERTMANAGER_URIS=default:http://alertmanager:9093" + deploy: + mode: replicated + replicas: 1 + + node-exporter: + image: stefanprodan/swarmprom-node-exporter:v0.16.0 + networks: + - net + environment: + - NODE_ID={{.Node.ID}} + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /etc/hostname:/etc/nodename + command: + - '--path.sysfs=/host/sys' + - '--path.procfs=/host/proc' + - '--collector.textfile.directory=/etc/node-exporter/' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' + - '--no-collector.ipvs' + deploy: + mode: global + resources: + limits: + memory: 128M + reservations: + memory: 64M + + prometheus: + image: stefanprodan/swarmprom-prometheus:v2.5.0 + networks: + - net + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}' + volumes: + - prometheus:/prometheus + configs: + - source: node_rules + target: /etc/prometheus/swarm_node.rules.yml + - source: task_rules + target: /etc/prometheus/swarm_task.rules.yml + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 2048M + reservations: + memory: 128M + + caddy: + image: stefanprodan/caddy + ports: + - "3000:3000" + - "9090:9090" + - "9093:9093" + - "9094:9094" + networks: + - net + environment: + - ADMIN_USER=${ADMIN_USER:-admin} + - ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} + configs: + - source: caddy_config + target: /etc/caddy/Caddyfile + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 128M + reservations: + memory: 64M + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000"] + interval: 5s + timeout: 1s + retries: 5 diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/dockerd-exporter/Caddyfile b/src/homework/05-virtualization/5.5/ansible/monitoring/dockerd-exporter/Caddyfile new file mode 100644 index 0000000..5036d47 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/dockerd-exporter/Caddyfile @@ -0,0 +1,8 @@ +:9323 { + proxy / {$DOCKER_GWBRIDGE_IP}:9323 { + transparent + } + + errors stderr + tls off +} diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/Dockerfile b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/Dockerfile new file mode 100644 index 0000000..1e6d7df --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/Dockerfile @@ -0,0 +1,10 @@ +FROM grafana/grafana:5.3.4 +# https://hub.docker.com/r/grafana/grafana/tags/ + +COPY datasources /etc/grafana/provisioning/datasources/ +COPY swarmprom_dashboards.yml /etc/grafana/provisioning/dashboards/ +COPY dashboards /etc/grafana/dashboards/ + +ENV GF_SECURITY_ADMIN_PASSWORD=admin \ + GF_SECURITY_ADMIN_USER=admin \ + GF_PATHS_PROVISIONING=/etc/grafana/provisioning/ diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-nodes-dash.json b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-nodes-dash.json new file mode 100644 index 0000000..ef5cf84 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-nodes-dash.json @@ -0,0 +1,2064 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Docker Swarm nodes metrics", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1547535746076, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 1, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "hideTimeOverride": true, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "topk(1, sum((node_time_seconds - node_boot_time_seconds) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "hideTimeOverride": true, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(node_cpu_seconds_total{mode=\"idle\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "CPUs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "hideTimeOverride": true, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "10,25,100", + "timeFrom": "1m", + "timeShift": null, + "title": "CPU Idle", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load5 * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load5 {{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System Load by Node", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage by Node", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 11 + }, + "hideTimeOverride": true, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_memory_MemTotal_bytes * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 3, + "y": 11 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 7, + "y": 11 + }, + "hideTimeOverride": true, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total swap memory used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 10, + "y": 11 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_SwapTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "5,10,100", + "title": "Used swap memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 14, + "y": 11 + }, + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "5,10,100", + "title": "Swap used / total RAM memory ratio", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 17, + "y": 11 + }, + "hideTimeOverride": true, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_filesystem_size_bytes{mountpoint=\"/rootfs\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Disk Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 11 + }, + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Disk Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_Slab_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Used {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(node_memory_Cached * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cached {{node_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 21, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Used {{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Swap memory usage by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_disk_read_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Read {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(irate(node_disk_written_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Written {{node_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_disk_reads_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reads {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(irate(node_disk_writes_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes {{node_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(node_cpu_seconds_total{mode=\"iowait\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU IO Wait by Node", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 3, + "gridPos": { + "h": 7, + "w": 18, + "x": 0, + "y": 43 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Running Containers by Service", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 43 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total Containers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 50 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "IN {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "OUT {{node_name}}", + "metric": "", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers Network Traffic by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 57 + }, + "hideTimeOverride": true, + "id": 20, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_meta) by (node_id, node_name, instance)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "timeFrom": "1s", + "title": "Cluster members", + "transform": "table", + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "swarmprom" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Swarm Node", + "multi": false, + "name": "node_id", + "options": [], + "query": "node_meta", + "refresh": 1, + "regex": "/node_id=\"([^\"]+)\"/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "label_values({node_id=\"$tag\"},node_name)", + "tags": [ + "ofdocker", + "ofmon" + ], + "tagsQuery": "label_values(node_meta, node_name)", + "type": "query", + "useTags": true + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "30s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Docker Swarm Nodes", + "uid": "BPlb-Sgik", + "version": 3 +} diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-prometheus-dash.json b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-prometheus-dash.json new file mode 100644 index 0000000..00d881a --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-prometheus-dash.json @@ -0,0 +1,1213 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "links": [ + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Grafana Docs", + "tooltip": "", + "type": "link", + "url": "http://docs.grafana.org/" + }, + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "panels": [ + { + "aliasColors": { + "prometheus": "#C15C17", + "{instance=\"localhost:9090\",job=\"prometheus\"}": "#CCA300" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus\"}[5m]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "samples", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Samples Appended", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(5, max(scrape_duration_seconds) by (job))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Scrape Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(process_resident_memory_bytes{job=\"prometheus\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "p8s process resident memory", + "refId": "D", + "step": 20 + }, + { + "expr": "process_virtual_memory_bytes{job=\"prometheus\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "virtual memory", + "refId": "C", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Profile", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 5 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_active_appenders{job=\"prometheus\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "active_appenders", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "expr": "sum(process_open_fds{job=\"prometheus\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "open_fds", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Active Appenders", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "prometheus": "#F9BA8F", + "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 5 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_blocks_loaded{job=\"prometheus\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "blocks", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Blocks Loaded", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "description": "", + "fill": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 5 + }, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_chunks{job=\"prometheus\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "chunks", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Head Chunks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 5 + }, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "duration-p99", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_gc_duration_seconds{job=\"prometheus\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "duration-p99", + "refId": "A", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_head_gc_duration_seconds_count{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "collections", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Head Block GC Activity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "description": "", + "fill": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "duration-p99", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_bucket{job=\"prometheus\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "duration-{{p99}}", + "refId": "A", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_compactions_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "compactions", + "refId": "B", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_compactions_failed_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "failed", + "refId": "C", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_compactions_triggered_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "triggered", + "refId": "D", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compaction Activity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_tsdb_reloads_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reloads", + "refId": "A", + "step": 20 + }, + { + "expr": "rate(prometheus_tsdb_reloads_failures_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "failures", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reload Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_engine_query_duration_seconds{job=\"prometheus\", quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{slice}}_p99", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Query Durations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 35, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_rule_group_duration_seconds{job=\"prometheus\"}) by (quantile)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Rule Group Eval Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_rule_group_iterations_missed_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "missed", + "refId": "B", + "step": 10 + }, + { + "expr": "rate(prometheus_rule_group_iterations_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "iterations", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Rule Group Eval Activity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "revision": "1.0", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Prometheus 2.0 Stats", + "uid": "mGFfYSRiz", + "version": 1 +} diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-services-dash.json b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-services-dash.json new file mode 100644 index 0000000..06f8834 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/dashboards/swarmprom-services-dash.json @@ -0,0 +1,1690 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Docker Swarm stacks and services metrics", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1635094255540, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "hideTimeOverride": true, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_node_id =~\"$node_id\"}) by (container_label_com_docker_swarm_node_id))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "hideTimeOverride": true, + "id": 21, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(container_tasks_state{container_label_com_docker_stack_namespace=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_stack_namespace))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Stacks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "hideTimeOverride": true, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_service_name=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_swarm_service_name))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Services", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "hideTimeOverride": true, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Containers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 5, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Service Tasks", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 32, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(engine_daemon_health_checks_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "checks", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(increase(engine_daemon_health_checks_failed_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "failed", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Health Checks", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 20, + "x": 0, + "y": 11 + }, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[1m])) by (container_label_com_docker_swarm_service_name) * 100 ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_label_com_docker_swarm_service_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage by Service", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 11 + }, + "hideTimeOverride": true, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "10,25,100", + "timeFrom": "1m", + "timeShift": null, + "title": "CPU Idle", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 33, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval])) by (name)) * 100 ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage by Container (top 10)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 20, + "x": 0, + "y": 25 + }, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Used {{container_label_com_docker_swarm_service_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(container_memory_cache{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cached {{container_label_com_docker_swarm_service_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage by Service", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 25 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, avg_over_time(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage by Container (top 10)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_label_com_docker_swarm_service_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network received by Service", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_label_com_docker_swarm_service_name}}", + "metric": "", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network transmitted by Service", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 53 + }, + "id": 31, + "legend": { + "avg": true, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Received", + "refId": "A", + "step": 4 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmited", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 10, + "x": 10, + "y": 53 + }, + "id": 26, + "legend": { + "alignAsTable": false, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_fs_reads_total[$interval]) )", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reads", + "refId": "A", + "step": 4 + }, + { + "expr": "sum(irate(container_fs_writes_total[$interval])) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes ", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 53 + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Disk Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "swarmprom" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Swarm Node", + "multi": false, + "name": "node_id", + "options": [], + "query": "node_meta", + "refresh": 2, + "regex": "/node_id=\"([^\"]+)\"/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "label_values({node_id=\"$tag\"},node_name)", + "tags": [ + "node01.netology.yc", + "node05.netology.yc", + "node04.netology.yc", + "node03.netology.yc", + "node06.netology.yc", + "node02.netology.yc" + ], + "tagsQuery": "label_values(node_meta, node_name)", + "type": "query", + "useTags": true + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "30s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Docker Swarm Services", + "uid": "zr_baSRmk", + "version": 1 +} diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/datasources/prometheus.yaml b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/datasources/prometheus.yaml new file mode 100644 index 0000000..31a8607 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/datasources/prometheus.yaml @@ -0,0 +1,13 @@ +apiVersion: 1 + +deleteDatasources: + - name: Prometheus + +datasources: +- name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + version: 1 + editable: true diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/swarmprom_dashboards.yml b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/swarmprom_dashboards.yml new file mode 100644 index 0000000..ee18e35 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/grafana/swarmprom_dashboards.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /etc/grafana/dashboards diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/node-exporter/Dockerfile b/src/homework/05-virtualization/5.5/ansible/monitoring/node-exporter/Dockerfile new file mode 100644 index 0000000..dd3b4b6 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/node-exporter/Dockerfile @@ -0,0 +1,10 @@ +FROM prom/node-exporter:v0.16.0 + +ENV NODE_ID=none + +USER root + +COPY conf /etc/node-exporter/ + +ENTRYPOINT [ "/etc/node-exporter/docker-entrypoint.sh" ] +CMD [ "/bin/node_exporter" ] diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/node-exporter/conf/docker-entrypoint.sh b/src/homework/05-virtualization/5.5/ansible/monitoring/node-exporter/conf/docker-entrypoint.sh new file mode 100755 index 0000000..db422f4 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/node-exporter/conf/docker-entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh -e + +NODE_NAME=$(cat /etc/nodename) +echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom + +set -- /bin/node_exporter "$@" + +exec "$@" diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/Dockerfile b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/Dockerfile new file mode 100644 index 0000000..cb31a08 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/Dockerfile @@ -0,0 +1,10 @@ +FROM prom/prometheus:v2.5.0 +# https://hub.docker.com/r/prom/prometheus/tags/ + +ENV WEAVE_TOKEN=none + +COPY conf /etc/prometheus/ + +ENTRYPOINT [ "/etc/prometheus/docker-entrypoint.sh" ] +CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ + "--storage.tsdb.path=/prometheus" ] diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/conf/docker-entrypoint.sh b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/conf/docker-entrypoint.sh new file mode 100755 index 0000000..3acd9f3 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/conf/docker-entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/sh -e + +cat /etc/prometheus/prometheus.yml > /tmp/prometheus.yml +cat /etc/prometheus/weave-cortex.yml | \ + sed "s@#password: #@password: '$WEAVE_TOKEN'@g" > /tmp/weave-cortex.yml + +#JOBS=mongo-exporter:9111 redis-exporter:9112 + +if [ ${JOBS+x} ]; then + +for job in $JOBS +do +echo "adding job $job" + +SERVICE=$(echo "$job" | cut -d":" -f1) +PORT=$(echo "$job" | cut -d":" -f2) + +cat >>/tmp/prometheus.yml <>/tmp/weave-cortex.yml <# + +global: + scrape_interval: 15s + evaluation_interval: 15s + + external_labels: + monitor: 'promswarm' + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'dockerd-exporter' + dns_sd_configs: + - names: + - 'tasks.dockerd-exporter' + type: 'A' + port: 9323 + + - job_name: 'cadvisor' + dns_sd_configs: + - names: + - 'tasks.cadvisor' + type: 'A' + port: 8080 + + - job_name: 'node-exporter' + dns_sd_configs: + - names: + - 'tasks.node-exporter' + type: 'A' + port: 9100 diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/rules/swarm_node.rules.yml b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/rules/swarm_node.rules.yml new file mode 100644 index 0000000..5b0eaaf --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/rules/swarm_node.rules.yml @@ -0,0 +1,44 @@ +groups: +- name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_node.rules.yml + rules: + - alert: node_cpu_usage + expr: 100 - (avg(irate(node_cpu_seconds_total{mode="idle"}[1m]) * ON(instance) GROUP_LEFT(node_name) + node_meta * 100) BY (node_name)) > 50 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize + $value}}%. + summary: CPU alert for Swarm node '{{ $labels.node_name }}' + - alert: node_memory_usage + expr: sum(((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) + * ON(instance) GROUP_LEFT(node_name) node_meta * 100) BY (node_name) > 80 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} memory usage is at {{ humanize + $value}}%. + summary: Memory alert for Swarm node '{{ $labels.node_name }}' + - alert: node_disk_usage + expr: ((node_filesystem_size_bytes{mountpoint="/rootfs"} - node_filesystem_free_bytes{mountpoint="/rootfs"}) + * 100 / node_filesystem_size_bytes{mountpoint="/rootfs"}) * ON(instance) GROUP_LEFT(node_name) + node_meta > 85 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize + $value}}%. + summary: Disk alert for Swarm node '{{ $labels.node_name }}' + - alert: node_disk_fill_rate_6h + expr: predict_linear(node_filesystem_free_bytes{mountpoint="/rootfs"}[1h], 6 * 3600) * ON(instance) + GROUP_LEFT(node_name) node_meta < 0 + for: 1h + labels: + severity: critical + annotations: + description: Swarm node {{ $labels.node_name }} disk is going to fill up in + 6h. + summary: Disk fill alert for Swarm node '{{ $labels.node_name }}' diff --git a/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/rules/swarm_task.rules.yml b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/rules/swarm_task.rules.yml new file mode 100644 index 0000000..db9aa7e --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/monitoring/prometheus/rules/swarm_task.rules.yml @@ -0,0 +1,24 @@ +groups: +- name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_task.rules.yml + rules: + - alert: task_high_cpu_usage_50 + expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m])) + BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) + * 100 > 50 + for: 1m + annotations: + description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ + $labels.container_label_com_docker_swarm_node_id }}'' CPU usage is at {{ humanize + $value}}%.' + summary: CPU alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name + }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' + - alert: task_high_memory_usage_1g + expr: sum(container_memory_rss{container_label_com_docker_swarm_task_name=~".+"}) + BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) > 1e+09 + for: 1m + annotations: + description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ + $labels.container_label_com_docker_swarm_node_id }}'' memory usage is {{ humanize + $value}}.' + summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name + }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' diff --git a/src/homework/05-virtualization/5.5/ansible/roles/configure-hosts-file/tasks/main.yml b/src/homework/05-virtualization/5.5/ansible/roles/configure-hosts-file/tasks/main.yml new file mode 100644 index 0000000..27083ef --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/roles/configure-hosts-file/tasks/main.yml @@ -0,0 +1,9 @@ +--- + - name: Configure Hosts File + lineinfile: > + path=/etc/hosts + regexp='.*{{ item }}$' + line="{{ hostvars[item].ansible_default_ipv4.address }} {{item}}" + state=present + when: hostvars[item].ansible_default_ipv4.address is defined + with_items: "{{ groups['nodes'] }}" diff --git a/src/homework/05-virtualization/5.5/ansible/roles/docker-installation/tasks/main.yml b/src/homework/05-virtualization/5.5/ansible/roles/docker-installation/tasks/main.yml new file mode 100644 index 0000000..f879877 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/roles/docker-installation/tasks/main.yml @@ -0,0 +1,19 @@ +--- + - name: Add docker repository + command: yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo + + - name: Installing docker package + yum: > + pkg={{ item }} + state=present + update_cache=yes + with_items: + - docker-ce + - docker-ce-cli + - containerd.io + + - name: Enable docker daemon + systemd: + name: docker + state: started + enabled: yes diff --git a/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-add-manager/tasks/main.yml b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-add-manager/tasks/main.yml new file mode 100644 index 0000000..a632d8f --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-add-manager/tasks/main.yml @@ -0,0 +1,4 @@ +--- + - name: Add Managers to the Swarm + shell: "docker swarm join --token {{ hostvars['node01.netology.yc']['manager_token']['stdout'] }} {{ hostvars['node01.netology.yc']['ansible_default_ipv4']['address'] }}:2377" + tags: swarm diff --git a/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-add-worker/tasks/main.yml b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-add-worker/tasks/main.yml new file mode 100644 index 0000000..9c0c141 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-add-worker/tasks/main.yml @@ -0,0 +1,4 @@ +--- + - name: Add Workers to the Swarm + shell: "docker swarm join --token {{ hostvars['node01.netology.yc']['worker_token']['stdout'] }} {{ hostvars['node01.netology.yc']['ansible_default_ipv4']['address'] }}:2377" + tags: swarm diff --git a/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-init/tasks/main.yml b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-init/tasks/main.yml new file mode 100644 index 0000000..958e3bf --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-init/tasks/main.yml @@ -0,0 +1,14 @@ +--- + - name: Initialize Docker Swarm + shell: "docker swarm init --advertise-addr={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}:2377" + tags: swarm + + - name: Get the Manager join-token + shell: docker swarm join-token --quiet manager + register: manager_token + tags: swarm + + - name: Get the worker join-token + shell: docker swarm join-token --quiet worker + register: worker_token + tags: swarm diff --git a/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-leave/tasks/main.yml b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-leave/tasks/main.yml new file mode 100644 index 0000000..7f86816 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/roles/docker-swarm-leave/tasks/main.yml @@ -0,0 +1,4 @@ +--- + - name: Nodes Leaving the Swarm + shell: docker swarm leave -f + ignore_errors: true diff --git a/src/homework/05-virtualization/5.5/ansible/roles/install-tools/tasks/main.yml b/src/homework/05-virtualization/5.5/ansible/roles/install-tools/tasks/main.yml new file mode 100644 index 0000000..3ae9aa7 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/roles/install-tools/tasks/main.yml @@ -0,0 +1,14 @@ +--- + - name: Installing tools + yum: > + pkg={{ item }} + state=present + update_cache=yes + with_items: + - ntp + - python + - tcpdump + - wget + - openssl + - curl + - git diff --git a/src/homework/05-virtualization/5.5/ansible/swarm-deploy-cluster.yml b/src/homework/05-virtualization/5.5/ansible/swarm-deploy-cluster.yml new file mode 100644 index 0000000..b97f105 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/swarm-deploy-cluster.yml @@ -0,0 +1,41 @@ +--- + - name: Install of Requrements Tools + hosts: nodes + become: yes + become_user: root + remote_user: centos + roles: + - install-tools + - configure-hosts-file + + - name: Install Docker Engine + hosts: nodes + become: yes + become_user: root + remote_user: centos + roles: + - docker-installation + + - name: Initialize Docker Swarm Cluster + hosts: active + become: yes + become_user: root + remote_user: centos + roles: + - docker-swarm-init + + - name: Add Managers Swarm Cluster + hosts: standby + become: yes + become_user: root + remote_user: centos + roles: + - docker-swarm-add-manager + + - name: Add Workers to the Swarm Cluster + hosts: workers + become: yes + become_user: root + remote_user: centos + roles: + - docker-swarm-add-worker diff --git a/src/homework/05-virtualization/5.5/ansible/swarm-deploy-stack.yml b/src/homework/05-virtualization/5.5/ansible/swarm-deploy-stack.yml new file mode 100644 index 0000000..eaea666 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/swarm-deploy-stack.yml @@ -0,0 +1,17 @@ +--- + - hosts: nodes + become: yes + become_user: root + remote_user: centos + + tasks: + - name: Check Current Leader + shell: docker node ls | grep {{ ansible_hostname }} + register: docker_info + changed_when: false + + - name: Run deploy, if node is leader + shell: docker stack deploy --compose-file /opt/monitoring/docker-compose.yml swarm_monitoring + when: + - "'Leader' in docker_info.stdout" + - "'Active' in docker_info.stdout" diff --git a/src/homework/05-virtualization/5.5/ansible/swarm-deploy-sync.yml b/src/homework/05-virtualization/5.5/ansible/swarm-deploy-sync.yml new file mode 100644 index 0000000..d85b5d6 --- /dev/null +++ b/src/homework/05-virtualization/5.5/ansible/swarm-deploy-sync.yml @@ -0,0 +1,15 @@ +--- + - hosts: nodes + become: yes + become_user: root + remote_user: centos + + tasks: + - name: Synchronization + copy: + src: monitoring/ + dest: "/opt/monitoring/" + owner: root + group: root + mode: 0644 + become: true diff --git a/src/homework/05-virtualization/5.5/readme.md b/src/homework/05-virtualization/5.5/readme.md index 24f3236..3c7babf 100644 --- a/src/homework/05-virtualization/5.5/readme.md +++ b/src/homework/05-virtualization/5.5/readme.md @@ -29,4 +29,82 @@ > - Что такое Overlay Network? `Overlay Network` - это особый тип docker-сети, который позволяет связать контейнеры, запущенные на разных нодах. -То есть, данная сеть позволяет направить трафик на определённый контейнер на определённой ноде только по имени контейнера. \ No newline at end of file +То есть, данная сеть позволяет направить трафик на определённый контейнер на определённой ноде только по имени контейнера. + +### Задача 2 + +> Создать ваш первый Docker Swarm кластер в Яндекс.Облаке +> +> Для получения зачета, вам необходимо предоставить скриншот из терминала (консоли), с выводом команды: +> +> ```shell +> docker node ls +> ``` + +Для начала необходимо выполнить шаги создания образа ОС в облаке, следуя инструкции из домашней работы [5.4](/src/homework/05-virtualization/5.4/readme.md#Задача 1). + +После того как образ будет создан: +1. Скопировать секреты для `terraform` из [variables.tf.example](./terraform/variables.tf.example) в `variables.tf` +2. Затем нужно изменить поля в конфигурации. +3. Инициализировать конфигурацию: `terraform init` (не работает без vpn, при получении данных отдаётся 403 статус код) +4. Просмотреть конфигурацию `terraform plan` +5. Применить конфигурацию к облаку `terraform apply -auto-approve` +6. Подключится по ssh к машине, чей ip-адрес будет выведен в строке с переменной `external_ip_address_node01` и выполнить необходимую команду: + ```shell + ssh centos@51.250.64.218 + sudo docker node ls + ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION + ttj5yee26pppcezlys0g0pzum * node01.netology.yc Ready Active Leader 20.10.16 + a717bja2genbm7c6prdxailfy node02.netology.yc Ready Active Reachable 20.10.16 + qijk98huwd1y1omhsphc28rjr node03.netology.yc Ready Active Reachable 20.10.16 + pbmbmjeawqf7sst6yia40llwp node04.netology.yc Ready Active 20.10.16 + y6g2mtvdcitnmyzxwnipklk82 node05.netology.yc Ready Active 20.10.16 + s7f1f34ef238lvd7qltftb6jt node06.netology.yc Ready Active 20.10.16 + ``` + +### Задача 3 + +> Создать ваш первый, готовый к боевой эксплуатации кластер мониторинга, состоящий из стека микросервисов. +> +> Для получения зачета, вам необходимо предоставить скриншот из терминала (консоли), с выводом команды: +> +> ```shell +> docker service ls +> ``` + +Стэк сервисов был развёрнут в рамках [второго задания](#Задача 2) при запуске `terraform`. + +```shell +sudo docker service ls +ID NAME MODE REPLICAS IMAGE PORTS +4iot28xmyl3w swarm_monitoring_alertmanager replicated 1/1 stefanprodan/swarmprom-alertmanager:v0.14.0 +5akoz6vjp9a3 swarm_monitoring_caddy replicated 1/1 stefanprodan/caddy:latest *:3000->3000/tcp, *:9090->9090/tcp, *:9093-9094->9093-9094/tcp +mf0c8h4vyuue swarm_monitoring_cadvisor global 6/6 google/cadvisor:latest +vbgaltbn2t17 swarm_monitoring_dockerd-exporter global 6/6 stefanprodan/caddy:latest +ihmlsx3bmxs0 swarm_monitoring_grafana replicated 1/1 stefanprodan/swarmprom-grafana:5.3.4 +uju9p0ws4vwm swarm_monitoring_node-exporter global 6/6 stefanprodan/swarmprom-node-exporter:v0.16.0 +8ipjzv0vax7m swarm_monitoring_prometheus replicated 1/1 stefanprodan/swarmprom-prometheus:v2.5.0 +96xidxmifhco swarm_monitoring_unsee replicated 1/1 cloudflare/unsee:v0.8.0 +``` + +Для того, чтобы зайти на web-панель `grafana`, необходимо: +1. Выяснить, на какой ноде был развёрнут данный сервис + ```shell + sudo docker service ps swarm_monitoring_grafana + ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS + t97g9zhyggja swarm_monitoring_grafana.1 stefanprodan/swarmprom-grafana:5.3.4 node02.netology.yc Running Running 7 minutes ago + ``` +2. Выяснить внешний ip-адрес искомой ноды. Для этого можно посмотреть в [`ansible/inventory`](./ansible/inventory) файл. + Либо выполнить команду: + ```shell + sudo docker inspect node02.netology.yc --format '{{ .Status.Addr }}' + 192.168.101.12 + ``` +3. Зайти по полученному адресу на порт `:3000`: `http://192.168.101.12:3000`. + +### Clean up + +Удаление всей инфраструктуры: + +1. Удаление ВМ, сетей: `terraform destroy -auto-approve` +2. Удаление образа ОС: `yc compute image delete --id {{ image_id }}` \ No newline at end of file diff --git a/src/homework/05-virtualization/5.5/terraform/.gitignore b/src/homework/05-virtualization/5.5/terraform/.gitignore new file mode 100644 index 0000000..e64c0a2 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/.gitignore @@ -0,0 +1,38 @@ +variables.tf + +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* +.terraform.lock.hcl + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sentitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +# +*.tfvars + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Include override files you do wish to add to version control using negated pattern +# +# !example_override.tf + +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* + +# Ignore CLI configuration files +.terraformrc +terraform.rc \ No newline at end of file diff --git a/src/homework/05-virtualization/5.5/terraform/ansible.cfg b/src/homework/05-virtualization/5.5/terraform/ansible.cfg new file mode 100644 index 0000000..a166c2d --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/ansible.cfg @@ -0,0 +1,6 @@ +[defaults] +inventory=./inventory +deprecation_warnings=False +command_warnings=False +ansible_port=22 +host_key_checking = False diff --git a/src/homework/05-virtualization/5.5/terraform/ansible.tf b/src/homework/05-virtualization/5.5/terraform/ansible.tf new file mode 100644 index 0000000..f6b64cb --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/ansible.tf @@ -0,0 +1,39 @@ +resource "null_resource" "wait" { + provisioner "local-exec" { + command = "sleep 100" + } + + depends_on = [ + local_file.inventory + ] +} + +resource "null_resource" "cluster" { + provisioner "local-exec" { + command = "ANSIBLE_FORCE_COLOR=1 ansible-playbook -i ../ansible/inventory ../ansible/swarm-deploy-cluster.yml" + } + + depends_on = [ + null_resource.wait + ] +} + +resource "null_resource" "sync" { + provisioner "local-exec" { + command = "ANSIBLE_FORCE_COLOR=1 ansible-playbook -i ../ansible/inventory ../ansible/swarm-deploy-sync.yml" + } + + depends_on = [ + null_resource.cluster + ] +} + +resource "null_resource" "monitoring" { + provisioner "local-exec" { + command = "ANSIBLE_FORCE_COLOR=1 ansible-playbook -i ../ansible/inventory ../ansible/swarm-deploy-stack.yml --limit=managers" + } + + depends_on = [ + null_resource.sync + ] +} diff --git a/src/homework/05-virtualization/5.5/terraform/inventory.tf b/src/homework/05-virtualization/5.5/terraform/inventory.tf new file mode 100644 index 0000000..89902e3 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/inventory.tf @@ -0,0 +1,36 @@ +resource "local_file" "inventory" { + content = <<-DOC + # Ansible inventory containing variable values from Terraform. + # Generated by Terraform. + + [nodes:children] + managers + workers + + [managers:children] + active + standby + + [active] + node01.netology.yc ansible_host=${yandex_compute_instance.node01.network_interface.0.nat_ip_address} + + [standby] + node02.netology.yc ansible_host=${yandex_compute_instance.node02.network_interface.0.nat_ip_address} + node03.netology.yc ansible_host=${yandex_compute_instance.node03.network_interface.0.nat_ip_address} + + [workers] + node04.netology.yc ansible_host=${yandex_compute_instance.node04.network_interface.0.nat_ip_address} + node05.netology.yc ansible_host=${yandex_compute_instance.node05.network_interface.0.nat_ip_address} + node06.netology.yc ansible_host=${yandex_compute_instance.node06.network_interface.0.nat_ip_address} + DOC + filename = "../ansible/inventory" + + depends_on = [ + yandex_compute_instance.node01, + yandex_compute_instance.node02, + yandex_compute_instance.node03, + yandex_compute_instance.node04, + yandex_compute_instance.node05, + yandex_compute_instance.node06 + ] +} diff --git a/src/homework/05-virtualization/5.5/terraform/network.tf b/src/homework/05-virtualization/5.5/terraform/network.tf new file mode 100644 index 0000000..fccb397 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/network.tf @@ -0,0 +1,11 @@ +# Network +resource "yandex_vpc_network" "default" { + name = "net" +} + +resource "yandex_vpc_subnet" "default" { + name = "subnet" + zone = "ru-central1-a" + network_id = "${yandex_vpc_network.default.id}" + v4_cidr_blocks = ["192.168.101.0/24"] +} diff --git a/src/homework/05-virtualization/5.5/terraform/node01.tf b/src/homework/05-virtualization/5.5/terraform/node01.tf new file mode 100644 index 0000000..fbbdc13 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/node01.tf @@ -0,0 +1,30 @@ +resource "yandex_compute_instance" "node01" { + name = "node01" + zone = "ru-central1-a" + hostname = "node01.netology.yc" + allow_stopping_for_update = true + + resources { + cores = 4 + memory = 8 + } + + boot_disk { + initialize_params { + image_id = "${var.centos-7-base}" + name = "root-node01" + type = "network-nvme" + size = "10" + } + } + + network_interface { + subnet_id = "${yandex_vpc_subnet.default.id}" + nat = true + ip_address = "192.168.101.11" + } + + metadata = { + ssh-keys = "centos:${file("~/.ssh/id_rsa.pub")}" + } +} diff --git a/src/homework/05-virtualization/5.5/terraform/node02.tf b/src/homework/05-virtualization/5.5/terraform/node02.tf new file mode 100644 index 0000000..6221ad4 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/node02.tf @@ -0,0 +1,30 @@ +resource "yandex_compute_instance" "node02" { + name = "node02" + zone = "ru-central1-a" + hostname = "node02.netology.yc" + allow_stopping_for_update = true + + resources { + cores = 4 + memory = 8 + } + + boot_disk { + initialize_params { + image_id = "${var.centos-7-base}" + name = "root-node02" + type = "network-nvme" + size = "10" + } + } + + network_interface { + subnet_id = "${yandex_vpc_subnet.default.id}" + nat = true + ip_address = "192.168.101.12" + } + + metadata = { + ssh-keys = "centos:${file("~/.ssh/id_rsa.pub")}" + } +} diff --git a/src/homework/05-virtualization/5.5/terraform/node03.tf b/src/homework/05-virtualization/5.5/terraform/node03.tf new file mode 100644 index 0000000..59d03f0 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/node03.tf @@ -0,0 +1,30 @@ +resource "yandex_compute_instance" "node03" { + name = "node03" + zone = "ru-central1-a" + hostname = "node03.netology.yc" + allow_stopping_for_update = true + + resources { + cores = 4 + memory = 8 + } + + boot_disk { + initialize_params { + image_id = "${var.centos-7-base}" + name = "root-node03" + type = "network-nvme" + size = "10" + } + } + + network_interface { + subnet_id = "${yandex_vpc_subnet.default.id}" + nat = true + ip_address = "192.168.101.13" + } + + metadata = { + ssh-keys = "centos:${file("~/.ssh/id_rsa.pub")}" + } +} diff --git a/src/homework/05-virtualization/5.5/terraform/node04.tf b/src/homework/05-virtualization/5.5/terraform/node04.tf new file mode 100644 index 0000000..82621af --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/node04.tf @@ -0,0 +1,30 @@ +resource "yandex_compute_instance" "node04" { + name = "node04" + zone = "ru-central1-a" + hostname = "node04.netology.yc" + allow_stopping_for_update = true + + resources { + cores = 4 + memory = 8 + } + + boot_disk { + initialize_params { + image_id = "${var.centos-7-base}" + name = "root-node04" + type = "network-nvme" + size = "40" + } + } + + network_interface { + subnet_id = "${yandex_vpc_subnet.default.id}" + nat = true + ip_address = "192.168.101.14" + } + + metadata = { + ssh-keys = "centos:${file("~/.ssh/id_rsa.pub")}" + } +} diff --git a/src/homework/05-virtualization/5.5/terraform/node05.tf b/src/homework/05-virtualization/5.5/terraform/node05.tf new file mode 100644 index 0000000..ff0f8cf --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/node05.tf @@ -0,0 +1,30 @@ +resource "yandex_compute_instance" "node05" { + name = "node05" + zone = "ru-central1-a" + hostname = "node05.netology.yc" + allow_stopping_for_update = true + + resources { + cores = 4 + memory = 8 + } + + boot_disk { + initialize_params { + image_id = "${var.centos-7-base}" + name = "root-node05" + type = "network-nvme" + size = "40" + } + } + + network_interface { + subnet_id = "${yandex_vpc_subnet.default.id}" + nat = true + ip_address = "192.168.101.15" + } + + metadata = { + ssh-keys = "centos:${file("~/.ssh/id_rsa.pub")}" + } +} diff --git a/src/homework/05-virtualization/5.5/terraform/node06.tf b/src/homework/05-virtualization/5.5/terraform/node06.tf new file mode 100644 index 0000000..d54a2f2 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/node06.tf @@ -0,0 +1,30 @@ +resource "yandex_compute_instance" "node06" { + name = "node06" + zone = "ru-central1-a" + hostname = "node06.netology.yc" + allow_stopping_for_update = true + + resources { + cores = 4 + memory = 8 + } + + boot_disk { + initialize_params { + image_id = "${var.centos-7-base}" + name = "root-node06" + type = "network-nvme" + size = "40" + } + } + + network_interface { + subnet_id = "${yandex_vpc_subnet.default.id}" + nat = true + ip_address = "192.168.101.16" + } + + metadata = { + ssh-keys = "centos:${file("~/.ssh/id_rsa.pub")}" + } +} diff --git a/src/homework/05-virtualization/5.5/terraform/output.tf b/src/homework/05-virtualization/5.5/terraform/output.tf new file mode 100644 index 0000000..760dd95 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/output.tf @@ -0,0 +1,47 @@ +output "internal_ip_address_node01" { + value = "${yandex_compute_instance.node01.network_interface.0.ip_address}" +} + +output "external_ip_address_node01" { + value = "${yandex_compute_instance.node01.network_interface.0.nat_ip_address}" +} + +output "internal_ip_address_node02" { + value = "${yandex_compute_instance.node02.network_interface.0.ip_address}" +} + +output "external_ip_address_node02" { + value = "${yandex_compute_instance.node02.network_interface.0.nat_ip_address}" +} + +output "internal_ip_address_node03" { + value = "${yandex_compute_instance.node03.network_interface.0.ip_address}" +} + +output "external_ip_address_node03" { + value = "${yandex_compute_instance.node03.network_interface.0.nat_ip_address}" +} + +output "internal_ip_address_node04" { + value = "${yandex_compute_instance.node04.network_interface.0.ip_address}" +} + +output "external_ip_address_node04" { + value = "${yandex_compute_instance.node04.network_interface.0.nat_ip_address}" +} + +output "internal_ip_address_node05" { + value = "${yandex_compute_instance.node05.network_interface.0.ip_address}" +} + +output "external_ip_address_node05" { + value = "${yandex_compute_instance.node05.network_interface.0.nat_ip_address}" +} + +output "internal_ip_address_node06" { + value = "${yandex_compute_instance.node06.network_interface.0.ip_address}" +} + +output "external_ip_address_node06" { + value = "${yandex_compute_instance.node06.network_interface.0.nat_ip_address}" +} diff --git a/src/homework/05-virtualization/5.5/terraform/provider.tf b/src/homework/05-virtualization/5.5/terraform/provider.tf new file mode 100644 index 0000000..6a6c7b0 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/provider.tf @@ -0,0 +1,14 @@ +# Provider +terraform { + required_providers { + yandex = { + source = "yandex-cloud/yandex" + } + } +} + +provider "yandex" { + token = var.yandex_cloud_token + cloud_id = var.yandex_cloud_id + folder_id = var.yandex_folder_id +} diff --git a/src/homework/05-virtualization/5.5/terraform/variables.tf.example b/src/homework/05-virtualization/5.5/terraform/variables.tf.example new file mode 100644 index 0000000..6d20b21 --- /dev/null +++ b/src/homework/05-virtualization/5.5/terraform/variables.tf.example @@ -0,0 +1,22 @@ +# Заменить на ID своего облака +# https://console.cloud.yandex.ru/cloud?section=overview +variable "yandex_cloud_id" { + default = "b1gu1gt5nqi6lqgu3t7s" +} + +# Заменить на Folder своего облака +# https://console.cloud.yandex.ru/cloud?section=overview +variable "yandex_folder_id" { + default = "b1gaec42k169jqpo02f7" +} + +# OAuth токен, используемый утилитой yc. Применялся на этапе с packer. +variable "yandex_cloud_token" { + default = "" +} + +# Заменить на ID своего образа +# ID можно узнать с помощью команды yc compute image list +variable "centos-7-base" { + default = "fd8ft6norj68lo29qlpi" +}