Déploiement de la pile ELK avec Docker Compose et instrumentation APM

Architecture de la solution d'observabilité

Cet article détaille la mise en place d'une infrastructure complète de collecte et d'analyse de logs basée sur les composants Elastic. La pile comprend Elasticsearch, Kibana, Filebeat, Metricbeat, Logstash, ainsi que Fleet Server et le serveur APM pour l'observabilité applicative.

Fichier d'environnement principal

Le fichier .env centralise les paramètres de configuration sensibles et les variables d'infrastructure. Les mots de pasce doivent comporter au moins six caractères.

# Identification du projet
PROJECT_LABEL=elastic-stack

# Mot de passe utilisateur superuser
AUTH_PWD=Ch@ngeMe2024!

# Mot de passe pour l'accès Kibana
KB_SYSTEM_PWD=Kib@na2024!

# Version cible de la stack (consulter https://www.elastic.co/downloads/past-releases)
ELASTIC_VERSION=8.15.1

# Nom du cluster Elasticsearch
ES_CLUSTER_LABEL=docker-es-cluster

# Type de licence
LICENSE_TYPE=basic

# Ports d'accès
ELASTICSEARCH_HTTP=9200
KIBANA_UI=5601
FLEET_ENDPOINT=8220
APM_ENDPOINT=8200

# Jeton d'authentification APM
APM_AUTH_TOKEN=your-secret-token-here

# Clé de chiffrement pour les objets sauvegardés
XPACK_CIPHER_KEY=a-random-encryption-key

# Limites mémoire (Go)
ES_HEAP=3g
KB_HEAP=1g
LS_HEAP=1g

Orchestration via Docker Compose

Le fichier docker-compose.yml définit l'ensemble des services, volumes persistants et réseau interne. Les scripts d'initialisation vérifient la présence des variables d'environnement et génèrent automatiquement les certificats TLS si nécessaire.

volumes:
  ca-materials:
    driver: local
  es-persistent-data:
    driver: local
  kb-persistent-data:
    driver: local
  mb-persistent-data:
    driver: local
  fb-persistent-data:
    driver: local
  ls-persistent-data:
    driver: local
  agent-persistent-data:
    driver: local

networks:
  observability:
    name: obs-network
    external: false

services:
  cert-initializer:
    image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION}
    volumes:
      - ca-materials:/usr/share/elasticsearch/config/certs
    user: "0"
    command: >
      bash -c '
        if [ x${AUTH_PWD} == x ]; then
          echo "Définir AUTH_PWD dans le fichier .env";
          exit 1;
        fi;
        if [ x${KB_SYSTEM_PWD} == x ]; then
          echo "Définir KB_SYSTEM_PWD dans le fichier .env";
          exit 1;
        fi;
        if [ ! -f config/certs/ca.zip ]; then
          echo "Génération de l autorité de certification";
          bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
          unzip config/certs/ca.zip -d config/certs;
        fi;
        if [ ! -f config/certs/certs.zip ]; then
          echo "Génération des certificats";
          echo -ne \
          "instances:\n"\
          "  - name: es-primary\n"\
          "    dns:\n"\
          "      - es-primary\n"\
          "      - localhost\n"\
          "    ip:\n"\
          "      - 127.0.0.1\n"\
          "  - name: kb-node\n"\
          "    dns:\n"\
          "      - kb-node\n"\
          "      - localhost\n"\
          "    ip:\n"\
          "      - 127.0.0.1\n"\
          > config/certs/instances.yml;
          bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
          unzip config/certs/certs.zip -d config/certs;
        fi;
        echo "Application des permissions";
        chown -R root:root config/certs;
        find . -type d -exec chmod 750 \{\} \;;
        find . -type f -exec chmod 640 \{\} \;;
        echo "Attente de la disponibilité d Elasticsearch";
        until curl -s --cacert config/certs/ca/ca.crt https://es-primary:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
        echo "Configuration du mot de passe kibana_system";
        until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${AUTH_PWD}" -H "Content-Type: application/json" https://es-primary:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KB_SYSTEM_PWD}\"}" | grep -q "^{}"; do sleep 10; done;
        echo "Initialisation terminée";
      '
    healthcheck:
      test: ["CMD-SHELL", "[ -f config/certs/es-primary/es-primary.crt ]"]
      interval: 1s
      timeout: 5s
      retries: 120

  es-primary:
    depends_on:
      cert-initializer:
        condition: service_healthy
    image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION}
    labels:
      co.elastic.logs/module: elasticsearch
    volumes:
      - ca-materials:/usr/share/elasticsearch/config/certs
      - es-persistent-data:/usr/share/elasticsearch/data
    ports:
      - ${ELASTICSEARCH_HTTP}:9200
    environment:
      - node.name=es-primary
      - cluster.name=${ES_CLUSTER_LABEL}
      - discovery.type=single-node
      - ELASTIC_PASSWORD=${AUTH_PWD}
      - bootstrap.memory_lock=true
      - xpack.security.enabled=true
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.key=certs/es-primary/es-primary.key
      - xpack.security.http.ssl.certificate=certs/es-primary/es-primary.crt
      - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.enabled=true
      - xpack.security.transport.ssl.key=certs/es-primary/es-primary.key
      - xpack.security.transport.ssl.certificate=certs/es-primary/es-primary.crt
      - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.verification_mode=certificate
      - xpack.license.self_generated.type=${LICENSE_TYPE}
    mem_limit: ${ES_HEAP}
    ulimits:
      memlock:
        soft: -1
        hard: -1
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120

  kb-node:
    depends_on:
      es-primary:
        condition: service_healthy
    image: docker.elastic.co/kibana/kibana:${ELASTIC_VERSION}
    labels:
      co.elastic.logs/module: kibana
    volumes:
      - ca-materials:/usr/share/kibana/config/certs
      - kb-persistent-data:/usr/share/kibana/data
      - ./kibana.yml:/usr/share/kibana/config/kibana.yml:ro
    ports:
      - ${KIBANA_UI}:5601
    environment:
      - SERVERNAME=kb-node
      - ELASTICSEARCH_HOSTS=https://es-primary:9200
      - ELASTICSEARCH_USERNAME=kibana_system
      - ELASTICSEARCH_PASSWORD=${KB_SYSTEM_PWD}
      - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
      - XPACK_SECURITY_ENCRYPTIONKEY=${XPACK_CIPHER_KEY}
      - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=${XPACK_CIPHER_KEY}
      - XPACK_REPORTING_ENCRYPTIONKEY=${XPACK_CIPHER_KEY}
      - XPACK_REPORTING_KIBANASERVER_HOSTNAME=localhost
      - SERVER_SSL_ENABLED=true
      - SERVER_SSL_CERTIFICATE=config/certs/kb-node/kb-node.crt
      - SERVER_SSL_KEY=config/certs/kb-node/kb-node.key
      - SERVER_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
      - ELASTIC_APM_SECRET_TOKEN=${APM_AUTH_TOKEN}
    mem_limit: ${KB_HEAP}
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -I -s --cacert config/certs/ca/ca.crt https://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120

  mb-collector:
    depends_on:
      es-primary:
        condition: service_healthy
      kb-node:
        condition: service_healthy
    image: docker.elastic.co/beats/metricbeat:${ELASTIC_VERSION}
    user: root
    volumes:
      - ca-materials:/usr/share/metricbeat/certs
      - mb-persistent-data:/usr/share/metricbeat/data
      - "./metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro"
      - "/var/run/docker.sock:/var/run/docker.sock:ro"
      - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro"
      - "/proc:/hostfs/proc:ro"
      - "/:/hostfs:ro"
    environment:
      - ES_USER=elastic
      - ES_PWD=${AUTH_PWD}
      - ES_HOSTS=https://es-primary:9200
      - KB_HOSTS=https://kb-node:5601
      - LS_HOSTS=http://ls-ingest:9600
      - ROOT_CA=certs/ca/ca.crt
      - NODE_CERT=certs/es-primary/es-primary.crt
      - NODE_KEY=certs/es-primary/es-primary.key
      - KB_CERT=certs/kb-node/kb-node.crt
      - KB_KEY=certs/kb-node/kb-node.key
    command:
      -strict.perms=false

  fb-collector:
    depends_on:
      es-primary:
        condition: service_healthy
    image: docker.elastic.co/beats/filebeat:${ELASTIC_VERSION}
    user: root
    volumes:
      - ca-materials:/usr/share/filebeat/certs
      - fb-persistent-data:/usr/share/filebeat/data
      - "./fb_ingest_data/:/usr/share/filebeat/ingest_data/"
      - "./filebeat.yml:/usr/share/filebeat/filebeat.yml:ro"
      - "/var/lib/docker/containers:/var/lib/docker/containers:ro"
      - "/var/run/docker.sock:/var/run/docker.sock:ro"
    environment:
      - ES_USER=elastic
      - ES_PWD=${AUTH_PWD}
      - ES_HOSTS=https://es-primary:9200
      - KB_HOSTS=https://kb-node:5601
      - LS_HOSTS=http://ls-ingest:9600
      - ROOT_CA=certs/ca/ca.crt
    command:
      -strict.perms=false

  ls-ingest:
    depends_on:
      es-primary:
        condition: service_healthy
      kb-node:
        condition: service_healthy
    image: docker.elastic.co/logstash/logstash:${ELASTIC_VERSION}
    labels:
      co.elastic.logs/module: logstash
    user: root
    volumes:
      - ca-materials:/usr/share/logstash/certs
      - ls-persistent-data:/usr/share/logstash/data
      - "./ls_ingest_data/:/usr/share/logstash/ingest_data/"
      - "./pipeline.conf:/usr/share/logstash/pipeline/pipeline.conf:ro"
    environment:
      - xpack.monitoring.enabled=false
      - ES_USER=elastic
      - ES_PWD=${AUTH_PWD}
      - ES_HOSTS=https://es-primary:9200

  fleet-controller:
    depends_on:
      kb-node:
        condition: service_healthy
      es-primary:
        condition: service_healthy
    image: docker.elastic.co/beats/elastic-agent:${ELASTIC_VERSION}
    volumes:
      - ca-materials:/certs
      - agent-persistent-data:/usr/share/elastic-agent
      - "/var/lib/docker/containers:/var/lib/docker/containers:ro"
      - "/var/run/docker.sock:/var/run/docker.sock:ro"
      - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro"
      - "/proc:/hostfs/proc:ro"
      - "/:/hostfs:ro"
    ports:
      - ${FLEET_ENDPOINT}:8220
      - ${APM_ENDPOINT}:8200
    user: root
    environment:
      - SSL_CERTIFICATE_AUTHORITIES=/certs/ca/ca.crt
      - CERTIFICATE_AUTHORITIES=/certs/ca/ca.crt
      - FLEET_CA=/certs/ca/ca.crt
      - FLEET_ENROLL=1
      - FLEET_INSECURE=true
      - FLEET_SERVER_ELASTICSEARCH_CA=/certs/ca/ca.crt
      - FLEET_SERVER_ELASTICSEARCH_HOST=https://es-primary:9200
      - FLEET_SERVER_ELASTICSEARCH_INSECURE=true
      - FLEET_SERVER_ENABLE=1
      - FLEET_SERVER_CERT=/certs/fleet-controller/fleet-controller.crt
      - FLEET_SERVER_CERT_KEY=/certs/fleet-controller/fleet-controller.key
      - FLEET_SERVER_INSECURE_HTTP=true
      - FLEET_SERVER_POLICY_ID=agent-management-policy
      - FLEET_URL=https://fleet-controller:8220
      - KIBANA_FLEET_CA=/certs/ca/ca.crt
      - KIBANA_FLEET_SETUP=1
      - KIBANA_FLEET_USERNAME=elastic
      - KIBANA_FLEET_PASSWORD=${AUTH_PWD}
      - KIBANA_HOST=https://kb-node:5601

  sample-webapp:
    build:
      context: app
    volumes:
      - "/var/lib/docker/containers:/var/lib/docker/containers:ro"
      - "/var/run/docker.sock:/var/run/docker.sock:ro"
      - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro"
      - "/proc:/hostfs/proc:ro"
      - "/:/hostfs:ro"
    ports:
      - 8000:8000

Configuration de Metricbeat

Metricbeat collecte des métriques système et applicatives depuis Elasticsearch, Logstash, Kibana et Docker, puis les indexe dans Elasticsearch.

metricbeat.config.modules:
  path: ${path.config}/modules.d/*.yml
  reload.enabled: false

metricbeat.modules:
- module: elasticsearch
  xpack.enabled: true
  period: 10s
  hosts: ${ES_HOSTS}
  username: ${ES_USER}
  password: ${ES_PWD}
  ssl:
    enabled: true
    certificate_authorities: ${ROOT_CA}

- module: logstash
  xpack.enabled: true
  period: 10s
  hosts: ${LS_HOSTS}

- module: kibana
  metricsets:
    - stats
  period: 10s
  hosts: ${KB_HOSTS}
  username: ${ES_USER}
  password: ${ES_PWD}
  xpack.enabled: true
  ssl:
    enabled: true
    certificate_authorities: ${ROOT_CA}

- module: docker
  metricsets:
    - "container"
    - "cpu"
    - "diskio"
    - "healthcheck"
    - "info"
    - "memory"
    - "network"
  hosts: ["unix:///var/run/docker.sock"]
  period: 10s
  enabled: true

processors:
  - add_host_metadata: ~
  - add_docker_metadata: ~

output.elasticsearch:
  hosts: ${ES_HOSTS}
  username: ${ES_USER}
  password: ${ES_PWD}
  ssl:
    enabled: true
    certificate_authorities: ${ROOT_CA}

Configuration de Filebeat

Filebeat récupère les fichiers journaux et exploite l'autodécouverte Docker pour collecter automatiquement les logs des conteneurs.

filebeat.inputs:
- type: filestream
  id: app-log-stream
  paths:
    - ingest_data/*.log

filebeat.autodiscover:
  providers:
    - type: docker
      hints.enabled: true

processors:
- add_docker_metadata: ~

setup.kibana:
  host: ${KB_HOSTS}
  username: ${ES_USER}
  password: ${ES_PWD}

output.elasticsearch:
  hosts: ${ES_HOSTS}
  username: ${ES_USER}
  password: ${ES_PWD}
  ssl:
    enabled: true
    certificate_authorities: ${ROOT_CA}

Pipeline Logstash

Logstash lit les fichiers placés dans le répertoire de données et les achemine vers Elasticsearch avec un index journalier.

input {
  file {
    mode => "tail"
    path => "/usr/share/logstash/ingest_data/*"
  }
}

filter {
}

output {
  elasticsearch {
    index => "logstash-%{+YYYY.MM.dd}"
    hosts=> "${ES_HOSTS}"
    user=> "${ES_USER}"
    password=> "${ES_PWD}"
    cacert=> "certs/ca/ca.crt"
  }
}

Configuration Kibana

Kibana est configuré pour activer APM et Fleet, avec des politiques d'agents pré-définies pour la collecte de métriques système, de logs et de traces applicatives.

elastic:
  apm:
    active: true
    serverUrl: "http://fleet-controller:8200"
    secretToken: ${APM_AUTH_TOKEN}
server.host: "0.0.0.0"
telemetry.enabled: "true"
xpack.fleet.packages:
  - name: fleet_server
    version: latest
  - name: system
    version: latest
  - name: elastic_agent
    version: latest
  - name: apm
    version: latest
xpack.fleet.agentPolicies:
  - name: Agent-Management-Policy
    id: agent-management-policy
    namespace: default
    monitoring_enabled:
      - logs
      - metrics
    package_policies:
      - name: fleet-server-integration
        package:
          name: fleet_server
      - name: system-metrics
        package:
          name: system
      - name: agent-monitoring
        package:
          name: elastic_agent
      - name: apm-integration
        package:
          name: apm
        inputs:
        - type: apm
          enabled: true
          vars:
          - name: host
            value: 0.0.0.0:8200
          - name: secret_token
            value: ${APM_AUTH_TOKEN}

Exemple d'application Python instrumentée avec APM

L'application suivante utilise FastAPI, NiceGUI et l'agent APM Python pour démontrer l'instrumentation de transactions et la capture d'erreurs.

# Dockerfile
FROM python:3.9-slim-buster
WORKDIR /srv
COPY requirements.txt .
RUN pip3 install -r requirements.txt
COPY main.py .
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "info", "--workers", "1"]

# main.py
from elasticapm.contrib.starlette import ElasticAPM, make_apm_client
from fastapi import FastAPI
from nicegui import ui
from typing import Callable
import asyncio
import functools
import httpx as http_client

try:
    apm_client = make_apm_client({
        'SERVICE_NAME': 'demo-fastapi-app',
        'SECRET_TOKEN': 'supersecrettoken',
        'SERVER_URL': 'http://fleet-controller:8200',
        'ENVIRONMENT': 'development'
    })
except Exception:
    apm_client = None

application = FastAPI()

if apm_client:
    application.add_middleware(ElasticAPM, client=apm_client)


@application.get("/send_message/{payload}")
async def send_message(payload: str):
    if apm_client:
        apm_client.capture_message(f"Payload received: {payload}")
    return {"result": f"Payload received: {payload}"}


@application.get("/trigger-error")
async def trigger_error():
    try:
        _ = 1 / 0
    except ZeroDivisionError:
        if apm_client:
            apm_client.capture_exception()
    return {"result": "Error captured by APM"}


def configure_ui(fastapi_app: FastAPI) -> None:

    @ui.page('/', title="APM Demonstration")
    async def render_page():
        with ui.header(elevated=True).style('background-color: #3874c8').classes('items-center justify-between'):
            ui.markdown('### APM Demonstration')
            ui.button(on_click=lambda: drawer.toggle(), icon='menu').props('flat color=white')
        with ui.right_drawer(fixed=False).style('background-color: #ebf1fa').props('bordered') as drawer:
            ui.chat_message('Bienvenue sur l application de démonstration APM!',
                            name='APM Bot',
                            stamp='instant',
                            avatar='https://robohash.org/apm')
        with ui.footer().style('background-color: #3874c8'):
            ui.label('Démo APM')

        with ui.card():
            ui.label('Provoquer une erreur Python')
            ui.button('Exécuter', on_click=invoke_python_error)

        with ui.card():
            ui.label('Provoquer une erreur JavaScript')
            ui.button('Exécuter', on_click=invoke_js_error)

        with ui.card():
            ui.label('Envoyer un message personnalisé')
            msg_input = ui.input(placeholder='Votre message')
            ui.button('Envoyer').on('click', handler=lambda: send_custom_msg(msg_input.value))

    ui.run_with(fastapi_app, storage_secret='app-secret-key')


async def run_blocking(callback: Callable, *args, **kwargs):
    return await asyncio.get_event_loop().run_in_executor(None, functools.partial(callback, *args, **kwargs))


async def invoke_python_error():
    try:
        resp = await run_blocking(http_client.get, 'http://localhost:8000/trigger-error')
        ui.notify(resp.text)
    except Exception as exc:
        if apm_client:
            apm_client.capture_exception()
        ui.notify(str(exc))


async def invoke_js_error():
    try:
        await ui.run_javascript('fetch("http://localhost:8000/trigger-error")')
        ui.notify('Requête envoyée avec succès')
    except Exception as exc:
        if apm_client:
            apm_client.capture_exception()
        ui.notify(str(exc))


async def send_custom_msg(text: str):
    try:
        resp = await run_blocking(http_client.get, f'http://localhost:8000/send_message/{text}')
        ui.notify(resp.text)
    except Exception as exc:
        if apm_client:
            apm_client.capture_exception()
        ui.notify(str(exc))


configure_ui(application)

if apm_client:
    apm_client.capture_message('Application démarrée')

if __name__ == '__main__':
    print('Lancer avec: uvicorn main:app --host 0.0.0.0 --port 8000')

Exemple d'application Go instrumentée avec APM

Cette application Go utilise le framework Gin et l'agent APM Elastic pour la collecte de transactions et d'erreurs.

# Dockerfile
FROM golang:1.20 AS build-stage
WORKDIR /src
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -o server .

FROM gcr.io/distroless/base
WORKDIR /app
COPY --from=build-stage /src/server .
EXPOSE 8000
CMD ["./server"]

// main.go
package main

import (
	"fmt"
	"net/http"

	"github.com/gin-gonic/gin"
	"github.com/elastic/apm-agent-go/v2/apm"
)

func main() {
	tracer, err := apm.NewTracer("demo-gin-service", "development")
	if err != nil {
		fmt.Println("Impossible de créer le tracer APM:", err)
		return
	}
	defer tracer.Close()

	router := gin.Default()

	router.Use(func(ctx *gin.Context) {
		tx := tracer.StartTransaction(ctx.Request.URL.Path, "http-request")
		defer tx.End()

		ctx.Next()

		for _, e := range ctx.Errors {
			apm.CaptureError(e.Err).Send()
		}
	})

	router.GET("/send_message/:msg", func(ctx *gin.Context) {
		msg := ctx.Param("msg")
		apm.CaptureMessage(fmt.Sprintf("Message reçu: %s", msg)).Send()
		ctx.JSON(http.StatusOK, gin.H{"result": fmt.Sprintf("Message reçu: %s", msg)})
	})

	router.GET("/trigger-error", func(ctx *gin.Context) {
		defer func() {
			if r := recover(); r != nil {
				apm.CaptureError(fmt.Errorf("panic récupéré: %v", r)).Send()
				ctx.JSON(http.StatusInternalServerError, gin.H{"result": "Erreur capturée"})
			}
		}()
		_ = 1 / 0
	})

	if err := router.Run(":8000"); err != nil {
		fmt.Println("Échec du démarrage:", err)
	}
}

Extraction des certificats

Le script ci-dessous permet de récupérer le certificat CA depuis le conteneur Elasticsearch et de l'afficher au format PEM.

docker cp es-es-primary-1:/usr/share/elasticsearch/config/certs/ca/ca.crt /tmp/.
cat /tmp/ca.crt
echo 'ssl:';
echo '  certificate_authorities:';
echo '  - |';
cat /tmp/ca.crt | sed 's/^/    /'
openssl x509 -fingerprint -sha256 -noout -in /tmp/ca.crt | awk -F"=" {' print $2 '} | sed s/://g

Étiquettes: Elasticsearch Kibana filebeat metricbeat Logstash

Publié le 24 juin à 19h44