add homework 5.4

This commit is contained in:
2022-05-11 10:55:12 +07:00
parent dd9731b3d5
commit c464d8ccbe
29 changed files with 5741 additions and 0 deletions

View File

@@ -0,0 +1 @@
inventory

View File

@@ -0,0 +1,5 @@
[defaults]
inventory=./inventory
deprecation_warnings=False
command_warnings=False
ansible_port=22

View File

@@ -0,0 +1,5 @@
[nodes:children]
manager
[manager]
node01.netology.cloud ansible_host=

View File

@@ -0,0 +1,63 @@
---
- hosts: nodes
become: yes
become_user: root
remote_user: centos
tasks:
- name: Create directory for ssh-keys
file: state=directory mode=0700 dest=/root/.ssh/
- name: Adding rsa-key in /root/.ssh/authorized_keys
copy: src=~/.ssh/id_rsa.pub dest=/root/.ssh/authorized_keys owner=root mode=0600
ignore_errors: yes
- name: Checking DNS
command: host -t A google.com
- name: Installing tools
yum: >
pkg={{ item }}
state=present
update_cache=yes
with_items:
- git
- curl
- name: Add docker repository
command: yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
- name: Installing docker package
yum: >
pkg={{ item }}
state=present
update_cache=yes
with_items:
- docker-ce
- docker-ce-cli
- containerd.io
- name: Enable docker daemon
systemd:
name: docker
state: started
enabled: yes
- name: Install docker-compose
raw: $(curl -L https://github.com/docker/compose/releases/download/1.29.2/docker-compose-`uname -s`-`uname -m` -o /usr/bin/docker-compose && chmod +x /usr/bin/docker-compose)
- name: Synchronization
copy:
src: stack/
dest: "/opt/stack/"
owner: root
group: root
mode: 0644
become: true
- name: Pull all images in compose
command: docker-compose -f /opt/stack/docker-compose.yaml pull
- name: Up all services in compose
command: docker-compose -f /opt/stack/docker-compose.yaml up -d

View File

@@ -0,0 +1,2 @@
ADMIN_USER=admin
ADMIN_PASSWORD=admin

View File

@@ -0,0 +1,11 @@
route:
receiver: 'slack'
receivers:
- name: 'slack'
slack_configs:
- send_resolved: true
text: "{{ .CommonAnnotations.description }}"
username: 'Prometheus'
channel: '#<channel-name>'
api_url: 'https://hooks.slack.com/services/<webhook-id>'

View File

@@ -0,0 +1,39 @@
:9090 {
basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD}
proxy / prometheus:9090 {
transparent
}
errors stderr
tls off
}
:9093 {
basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD}
proxy / alertmanager:9093 {
transparent
}
errors stderr
tls off
}
:9091 {
basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD}
proxy / pushgateway:9091 {
transparent
}
errors stderr
tls off
}
:3000 {
proxy / grafana:3000 {
transparent
websocket
}
errors stderr
tls off
}

View File

@@ -0,0 +1,121 @@
version: '2.1'
networks:
monitoring:
driver: bridge
volumes:
prometheus_data: {}
grafana_data: {}
services:
prometheus:
image: prom/prometheus:v2.17.1
container_name: prometheus
volumes:
- ./prometheus:/etc/prometheus
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=15d'
- '--web.enable-lifecycle'
restart: always
networks:
- monitoring
labels:
org.label-schema.group: "monitoring"
alertmanager:
image: prom/alertmanager:v0.20.0
container_name: alertmanager
volumes:
- ./alertmanager:/etc/alertmanager
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
restart: always
networks:
- monitoring
labels:
org.label-schema.group: "monitoring"
nodeexporter:
image: prom/node-exporter:v0.18.1
container_name: nodeexporter
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
restart: always
networks:
- monitoring
labels:
org.label-schema.group: "monitoring"
cadvisor:
image: gcr.io/google-containers/cadvisor:v0.34.0
container_name: cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /cgroup:/cgroup:ro
restart: always
networks:
- monitoring
labels:
org.label-schema.group: "monitoring"
grafana:
image: grafana/grafana:7.4.2
container_name: grafana
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
environment:
- GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
restart: always
networks:
- monitoring
labels:
org.label-schema.group: "monitoring"
pushgateway:
image: prom/pushgateway:v1.2.0
container_name: pushgateway
restart: unless-stopped
networks:
- monitoring
labels:
org.label-schema.group: "monitoring"
caddy:
image: stefanprodan/caddy
container_name: caddy
ports:
- "0.0.0.0:3000:3000"
- "0.0.0.0:9090:9090"
- "0.0.0.0:9093:9093"
- "0.0.0.0:9091:9091"
volumes:
- ./caddy:/etc/caddy
environment:
- ADMIN_USER=${ADMIN_USER:-admin}
- ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
restart: always
networks:
- monitoring
labels:
org.label-schema.group: "monitoring"

View File

@@ -0,0 +1,36 @@
version: '2.1'
services:
nodeexporter:
image: prom/node-exporter:v0.18.1
container_name: nodeexporter
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
restart: unless-stopped
network_mode: host
labels:
org.label-schema.group: "monitoring"
cadvisor:
image: gcr.io/google-containers/cadvisor:v0.34.0
container_name: cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /cgroup:/cgroup:ro
restart: unless-stopped
network_mode: host
labels:
org.label-schema.group: "monitoring"

View File

@@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'Prometheus'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View File

@@ -0,0 +1,11 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: true
editable: true

View File

@@ -0,0 +1,69 @@
groups:
- name: targets
rules:
- alert: monitor_service_down
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
summary: "Monitor service non-operational"
description: "Service {{ $labels.instance }} is down."
- name: host
rules:
- alert: high_cpu_load
expr: node_load1 > 1.5
for: 30s
labels:
severity: warning
annotations:
summary: "Server under high load"
description: "Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- alert: high_memory_load
expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server memory is almost full"
description: "Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- alert: high_storage_load
expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server storage is almost full"
description: "Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- name: containers
rules:
#- alert: jenkins_down
# expr: absent(container_memory_usage_bytes{name="jenkins"})
# for: 30s
# labels:
# severity: critical
# annotations:
# summary: "Jenkins down"
# description: "Jenkins container is down for more than 30 seconds."
#- alert: jenkins_high_cpu
# expr: sum(rate(container_cpu_usage_seconds_total{name="jenkins"}[1m])) / count(node_cpu_seconds_total{mode="system"}) * 100 > 10
# for: 30s
# labels:
# severity: warning
# annotations:
# summary: "Jenkins high CPU usage"
# description: "Jenkins CPU usage is {{ humanize $value}}%."
#- alert: jenkins_high_memory
# expr: sum(container_memory_usage_bytes{name="jenkins"}) > 1200000000
# for: 30s
# labels:
# severity: warning
# annotations:
# summary: "Jenkins high memory usage"
# description: "Jenkins memory consumption is at {{ humanize $value}}."

View File

@@ -0,0 +1,55 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'prometheus'
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- "alert.rules"
# A scrape configuration containing exactly one endpoint to scrape.
scrape_configs:
- job_name: 'nodeexporter'
scrape_interval: 5s
static_configs:
- targets: ['nodeexporter:9100']
- job_name: 'cadvisor'
scrape_interval: 5s
static_configs:
- targets: ['cadvisor:8080']
- job_name: 'prometheus'
scrape_interval: 10s
static_configs:
- targets: ['localhost:9090']
- job_name: 'pushgateway'
scrape_interval: 10s
honor_labels: true
static_configs:
- targets: ['pushgateway:9091']
- job_name: 'alertmanager'
scrape_interval: 10s
honor_labels: true
static_configs:
- targets: ['alertmanager:9093']
- job_name: 'grafana'
scrape_interval: 10s
honor_labels: true
static_configs:
- targets: ['grafana:3000']
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- 'alertmanager:9093'

Binary file not shown.

After

Width:  |  Height:  |  Size: 137 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 244 KiB

View File

@@ -0,0 +1 @@
centos-7-base.json

View File

@@ -0,0 +1,27 @@
{
"builders": [
{
"disk_type": "network-nvme",
"folder_id": "",
"image_description": "build by packer",
"image_family": "centos",
"image_name": "centos-7-base",
"source_image_family": "centos-7",
"ssh_username": "centos",
"subnet_id": "",
"token": "",
"type": "yandex",
"use_ipv4_nat": true,
"zone": "ru-central1-a"
}
],
"provisioners": [
{
"inline": [
"sudo yum -y update",
"sudo yum -y install bridge-utils bind-utils iptables curl net-tools tcpdump rsync telnet openssh-server"
],
"type": "shell"
}
]
}

View File

@@ -0,0 +1,77 @@
Выполнение [домашнего задания](https://github.com/netology-code/virt-homeworks/blob/virt-11/05-virt-04-docker-compose/README.md)
по теме "5.4. Оркестрация группой Docker контейнеров на примере Docker Compose".
## Q/A
### Задача 1
> Создать собственный образ операционной системы с помощью Packer.
>
> Для получения зачета, вам необходимо предоставить:
> - Скриншот страницы, как на слайде из презентации (слайд 37).
Пошаговая инструкция работы с yandex-облаком:
1. Установка `yc`: `curl -sSL https://storage.yandexcloud.net/yandexcloud-yc/install.sh | bash`
2. Инициализация конфигурации: `yc init`
3. Проверка, что всё работает:
```shell
yc compute image list
+----+------+--------+-------------+--------+
| ID | NAME | FAMILY | PRODUCT IDS | STATUS |
+----+------+--------+-------------+--------+
+----+------+--------+-------------+--------+
```
4. Создание сети: `yc vpc network create --name net`
5. Создание подсети: `yc vpc subnet create --name my-subnet-a --zone ru-central1-a --range 10.1.2.0/24 --network-name net --description "test subnet for test net"`
6. Копирование примера конфигурации packer [centos-7-base.example.json](./packer/centos-7-base.example.json) в `centos-7-base.json`.
7. Затем нужно заполнить недостающие поля в конфигурации.
8. Запуск валидации конфигурации
```shell
packer validate packer/centos-7-base.json
The configuration is valid.
```
9. Запуск сборки образа `packer build packer/centos-7-base.json`
10. Удаление подсети: `yc vpc subnet delete --name my-subnet-a`
11. Удаление сети: `yc vpc network delete --name net`
**Результат**:
![cloud-images.png](./assets/cloud-images.png)
### Задача 2
> Создать вашу первую виртуальную машину в Яндекс.Облаке.
>
> Для получения зачета, вам необходимо предоставить:
> - Скриншот страницы свойств созданной ВМ
1. Копирование секретов для `terraform` из [variables.tf.example](./terraform/variables.tf.example) в `variables.tf`
2. Затем нужно изменить поля в конфигурации.
3. Инициализировать конфигурацию: `terraform init` (не работает без vpn, при получении данных отдаётся 403 статус код)
4. Просмотреть конфигурацию `terraform plan`
5. Применить конфигурацию к облаку `terraform apply -auto-approve`
**Результат**:
![cloud-vm.png](./assets/cloud-vm.png)
### Задача 3
> Создать ваш первый готовый к боевой эксплуатации компонент мониторинга, состоящий из стека микросервисов.
>
> Для получения зачета, вам необходимо предоставить:
> - Скриншот работающего веб-интерфейса Grafana с текущими метриками
1. Копирование inventory файла для `ansible` из [inventory.example](./ansible/inventory.example) в `inventory`
2. Запуск `ansible`: `cd ansible && ansible-playbook provision.yml`
**Результат**:
![grafana.png](./assets/grafana.png)
### Clean up
Удаление всей инфраструктуры:
1. Удаление ВМ, сетей: `terraform destroy -auto-approve`
2. Удаление образа ОС: `yc compute image delete --id fd8oponkic4t99ecuk8k`

View File

@@ -0,0 +1,37 @@
variables.tf
# Local .terraform directories
**/.terraform/*
# .tfstate files
*.tfstate
*.tfstate.*
# Crash log files
crash.log
crash.*.log
# Exclude all .tfvars files, which are likely to contain sentitive data, such as
# password, private keys, and other secrets. These should not be part of version
# control as they are data points which are potentially sensitive and subject
# to change depending on the environment.
#
*.tfvars
# Ignore override files as they are usually used to override resources locally and so
# are not checked in
override.tf
override.tf.json
*_override.tf
*_override.tf.json
# Include override files you do wish to add to version control using negated pattern
#
# !example_override.tf
# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
# example: *tfplan*
# Ignore CLI configuration files
.terraformrc
terraform.rc

View File

@@ -0,0 +1,22 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/yandex-cloud/yandex" {
version = "0.74.0"
hashes = [
"h1:WE0V59Nb+oj8gN02X7Xt5ZuP+Z+dP3lLaQgChj+8t1g=",
"zh:01914a42590934918a312324fcf8b0b342da113da76c13bc00d40b9d3c0a78d9",
"zh:0ae93ec70084677f0026c44513c99252dde3be31435c4d1ef5259c8ab5bde225",
"zh:59acf5f27d378069d7332549c1645e03de2a2ff9208e02e1546491d276031e23",
"zh:6662ab75109138675de0060957ce259c96c141e87617bc211dd80f1213d69419",
"zh:86143792b6395e582b2363ac052675e51741bb9b09dcdabc3f5512f501d49fe5",
"zh:883a06e44b64764459c1d0b37f24b52134a9fb95d7332f95b2b3c2271b76a958",
"zh:96ca7255602e1f38b42515533bac2e77313163638ef6e68c08a7772ab2515ed6",
"zh:9bad5d9a023aa238f34db6a05c1ea67f19f2c27fe640be76ec77d850e8cbecf6",
"zh:aebf8480d0cccbca57a085ccabb5af23d0e35a8d6e54b1bef15ae6432cfdf229",
"zh:c7114896af26237cd01395c10f81a670752cc103d6ce602e88f81f205987e617",
"zh:c84819a708453cc321746eba5fc4bab972e3735607b6533b3d9bab79c3f0d196",
"zh:ee82069747c38737e88f01007de0a1180770c14de26c13b79c9cc340204237fc",
"zh:f53439d40f328b0e4800d8ed00f18bc39b2b03ac3d776b0c7b497722d7f7f0b1",
]
}

View File

@@ -0,0 +1,11 @@
# Network
resource "yandex_vpc_network" "default" {
name = "net"
}
resource "yandex_vpc_subnet" "default" {
name = "subnet"
zone = "ru-central1-a"
network_id = "${yandex_vpc_network.default.id}"
v4_cidr_blocks = ["192.168.101.0/24"]
}

View File

@@ -0,0 +1,29 @@
resource "yandex_compute_instance" "node01" {
name = "node01"
zone = "ru-central1-a"
hostname = "node01.netology.cloud"
allow_stopping_for_update = true
resources {
cores = 8
memory = 8
}
boot_disk {
initialize_params {
image_id = "${var.centos-7-base}"
name = "root-node01"
type = "network-nvme"
size = "50"
}
}
network_interface {
subnet_id = "${yandex_vpc_subnet.default.id}"
nat = true
}
metadata = {
ssh-keys = "centos:${file("~/.ssh/id_rsa.pub")}"
}
}

View File

@@ -0,0 +1,7 @@
output "internal_ip_address_node01_yandex_cloud" {
value = "${yandex_compute_instance.node01.network_interface.0.ip_address}"
}
output "external_ip_address_node01_yandex_cloud" {
value = "${yandex_compute_instance.node01.network_interface.0.nat_ip_address}"
}

View File

@@ -0,0 +1,14 @@
# Provider
terraform {
required_providers {
yandex = {
source = "yandex-cloud/yandex"
}
}
}
provider "yandex" {
token = var.yandex_cloud_token
cloud_id = var.yandex_cloud_id
folder_id = var.yandex_folder_id
}

View File

@@ -0,0 +1,22 @@
# Заменить на ID своего облака
# https://console.cloud.yandex.ru/cloud?section=overview
variable "yandex_cloud_id" {
default = "b1gu1gt5nqi6lqgu3t7s"
}
# Заменить на Folder своего облака
# https://console.cloud.yandex.ru/cloud?section=overview
variable "yandex_folder_id" {
default = "b1gaec42k169jqpo02f7"
}
# OAuth токен, используемый утилитой yc. Применялся на этапе с packer.
variable "yandex_cloud_token" {
default = ""
}
# Заменить на ID своего образа
# ID можно узнать с помощью команды yc compute image list
variable "centos-7-base" {
default = "fd8ft6norj68lo29qlpi"
}