From 7971f12489877d779bfb6848c374d852999a0257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20H=C3=BCster?= Date: Mon, 12 May 2025 13:34:23 +0200 Subject: [PATCH] switching out promtail for alloy (#22) * rework grafana log ingestion using alloy * fix docker-compose.yml formatting --- Makefile | 2 +- docker-compose.yml | 164 +++++++++++--------- files/alloy/README.md | 45 ++++++ files/alloy/config.alloy | 59 +++++++ files/grafana/README.md | 63 ++++++++ files/grafana/dashboards/enhanced_logs.json | 18 +-- files/loki/README.md | 51 ++++++ scripts/general.sh | 2 +- 8 files changed, 316 insertions(+), 88 deletions(-) create mode 100644 files/alloy/README.md create mode 100644 files/alloy/config.alloy create mode 100644 files/grafana/README.md create mode 100644 files/loki/README.md diff --git a/Makefile b/Makefile index 82cb333..89c409c 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ run: run-core: @echo "Starting the core services... hang in there." - @docker-compose up -d postgres openldap prometheus grafana elasticsearch mattermost keycloak loki promtail + @docker-compose up -d postgres openldap prometheus grafana elasticsearch mattermost keycloak loki alloy run-db-replicas: @echo "Starting with replicas. Hang in there..." diff --git a/docker-compose.yml b/docker-compose.yml index 681aa6d..221e284 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -54,20 +54,30 @@ services: - 3100:3100 volumes: - ./files/loki:/etc/loki - - ./volumes/logs-node-1:/logs/node-1 - - ./volumes/logs-node-2:/logs/node-2 + - ./volumes/loki-data:/tmp/loki command: -config.file=/etc/loki/loki-config.yaml - promtail: - container_name: cs-repro-promtail - image: grafana/promtail:3.2.2 + healthcheck: + test: + [ + "CMD-SHELL", + "wget --no-verbose --tries=1 --spider http://localhost:3100/loki/api/v1/labels || exit 1", + ] + interval: 20s + timeout: 3s + retries: 5 + start_period: 40s + alloy: + container_name: cs-repro-alloy + image: grafana/alloy:latest restart: unless-stopped ports: - 9080:9080 + - 9999:9999 volumes: - - ./files/promtail:/etc/promtail - - ./volumes/logs-node-1:/logs-node-1 - - ./volumes/logs-node-2:/logs-node-2 - command: -config.file=/etc/promtail/promtail-config.yaml + - ./files/alloy:/etc/alloy:ro + - ./volumes/alloy/data:/var/lib/alloy/data:rw + - ./volumes/logs-node-1:/mattermost/logs:ro + command: run --server.http.listen-addr=0.0.0.0:9080 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy depends_on: - loki grafana: @@ -130,7 +140,7 @@ services: # - MM_SqlSettings_DataSource=mmuser:mmuser_password@tcp(mysql:3306)/mattermost?charset=utf8mb4,utf8&writeTimeout=30s ## Disable this to migrate your config to the database -# - MM_CONFIG=postgres://mmuser:mmuser_password@cs-repro-postgres:5432/mattermost?sslmode=disable&connect_timeout=10&binary_parameters=yes + # - MM_CONFIG=postgres://mmuser:mmuser_password@cs-repro-postgres:5432/mattermost?sslmode=disable&connect_timeout=10&binary_parameters=yes keycloak: container_name: cs-repro-keycloak restart: unless-stopped @@ -218,49 +228,49 @@ services: depends_on: - postgres mattermost-2: - platform: linux/amd64 - container_name: cs-repro-mattermost-2 - depends_on: - postgres: - condition: service_healthy - mitmproxy: - condition: service_started - image: mattermost/mattermost-enterprise-edition:10.7 - restart: unless-stopped - security_opt: - - no-new-privileges:true - pids_limit: 200 - read_only: false - ports: - - "8066:8065" - - "8068:8067" - tmpfs: - - /tmp - volumes: - - ./volumes/mattermost/config:/mattermost/config:rw - - ./volumes/mattermost/data:/mattermost/data:rw - - ./volumes/logs-node-2:/mattermost-1/logs:rw - - ./volumes/mattermost_2/plugins:/mattermost/plugins:rw - - ./volumes/mattermost_2/client/plugins:/mattermost/client/plugins:rw - - ./volumes/mattermost_2/bleve-indexes:/mattermost/bleve-indexes:rw - # - ./license.mattermost:/mattermost/config/license.mattermost-enterprise:ro - # - ./files/mattermost/samlCert.crt:/mattermost/config/saml-cert.crt - # - ./files/mattermost/defaultConfig.json:/mattermost/config/defaultConfig.json:ro - # - ./files/mattermost/replicaConfig.json:/mattermost/config/replicaConfig.json:ro + platform: linux/amd64 + container_name: cs-repro-mattermost-2 + depends_on: + postgres: + condition: service_healthy + mitmproxy: + condition: service_started + image: mattermost/mattermost-enterprise-edition:10.7 + restart: unless-stopped + security_opt: + - no-new-privileges:true + pids_limit: 200 + read_only: false + ports: + - "8066:8065" + - "8068:8067" + tmpfs: + - /tmp + volumes: + - ./volumes/mattermost/config:/mattermost/config:rw + - ./volumes/mattermost/data:/mattermost/data:rw + - ./volumes/logs-node-2:/mattermost-1/logs:rw + - ./volumes/mattermost_2/plugins:/mattermost/plugins:rw + - ./volumes/mattermost_2/client/plugins:/mattermost/client/plugins:rw + - ./volumes/mattermost_2/bleve-indexes:/mattermost/bleve-indexes:rw + # - ./license.mattermost:/mattermost/config/license.mattermost-enterprise:ro + # - ./files/mattermost/samlCert.crt:/mattermost/config/saml-cert.crt + # - ./files/mattermost/defaultConfig.json:/mattermost/config/defaultConfig.json:ro + # - ./files/mattermost/replicaConfig.json:/mattermost/config/replicaConfig.json:ro - ## Files are required for the mitmproxy on the box - - ./files/mitmproxy/mitmproxy-ca.pem:/etc/ssl/certs/mitmproxy-ca.pem - - ./files/mitmproxy/mitmproxy-dhparam.pem:/etc/ssl/certs/mitmproxy-dhparam.pem - environment: - - HTTP_PROXY=http://cs-repro-mitmproxy:8080 - - HTTPS_PROXY=http://cs-repro-mitmproxy:8080 - - MM_SqlSettings_DriverName=postgres - - MM_SqlSettings_DataSource=postgres://mmuser:mmuser_password@cs-repro-postgres:5432/mattermost?sslmode=disable&connect_timeout=10&binary_parameters=yes - - MM_SAMLSETTINGS_IDPCERTIFICATEFILE=/mattermost/config/saml-cert.crt - - MM_ServiceSettings_EnableLocalMode=true - - MM_ServiceSettings_LocalModeSocketLocation=/var/tmp/mattermost_local.socket - - MM_ServiceSettings_LicenseFileLocation=/mattermost/config/license.mattermost-enterprise - - MM_LogSettings_Directory=/mattermost/logs + ## Files are required for the mitmproxy on the box + - ./files/mitmproxy/mitmproxy-ca.pem:/etc/ssl/certs/mitmproxy-ca.pem + - ./files/mitmproxy/mitmproxy-dhparam.pem:/etc/ssl/certs/mitmproxy-dhparam.pem + environment: + - HTTP_PROXY=http://cs-repro-mitmproxy:8080 + - HTTPS_PROXY=http://cs-repro-mitmproxy:8080 + - MM_SqlSettings_DriverName=postgres + - MM_SqlSettings_DataSource=postgres://mmuser:mmuser_password@cs-repro-postgres:5432/mattermost?sslmode=disable&connect_timeout=10&binary_parameters=yes + - MM_SAMLSETTINGS_IDPCERTIFICATEFILE=/mattermost/config/saml-cert.crt + - MM_ServiceSettings_EnableLocalMode=true + - MM_ServiceSettings_LocalModeSocketLocation=/var/tmp/mattermost_local.socket + - MM_ServiceSettings_LicenseFileLocation=/mattermost/config/license.mattermost-enterprise + - MM_LogSettings_Directory=/mattermost/logs mattermost-rtcd: container_name: cs-repro-mattermost-rtcd platform: linux/amd64 @@ -284,28 +294,28 @@ services: command: mitmweb --web-host 0.0.0.0 --set confdir=/certs --mode transparent # Named volumes section is removed since we're using host path mounts # mysql: - # container_name: cs-repro-mysql - # image: mysql:8 - # restart: always - # environment: - # MYSQL_DATABASE: "mattermost" - # # So you don't have to use root, but you can if you like - # MYSQL_USER: "mmuser" - # # You can use whatever password you like - # MYSQL_PASSWORD: "mmuser_password" - # # Password for root access - # MYSQL_ROOT_PASSWORD: "mmuser_password" - # healthcheck: - # test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] - # timeout: 20supdat - # retries: 10 - # ports: - # # : - # - "3306:3306" - # expose: - # # Opens port 3306 on the container - # - "3306" - # # Where our data will be persisted - # volumes: - # - ./volumes/db/mysql:/var/lib/mysql - +# container_name: cs-repro-mysql +# image: mysql:8 +# restart: always +# environment: +# MYSQL_DATABASE: "mattermost" +# # So you don't have to use root, but you can if you like +# MYSQL_USER: "mmuser" +# # You can use whatever password you like +# MYSQL_PASSWORD: "mmuser_password" +# # Password for root access +# MYSQL_ROOT_PASSWORD: "mmuser_password" +# healthcheck: +# test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] +# timeout: 20supdat +# retries: 10 +# ports: +# # : +# - "3306:3306" +# expose: +# # Opens port 3306 on the container +# - "3306" +# # Where our data will be persisted +# volumes: +# - ./volumes/db/mysql:/var/lib/mysql + diff --git a/files/alloy/README.md b/files/alloy/README.md new file mode 100644 index 0000000..9e3f955 --- /dev/null +++ b/files/alloy/README.md @@ -0,0 +1,45 @@ +# Alloy Log Agent Configuration + +This directory contains the configuration for Grafana Alloy, which has replaced Promtail as the log agent in this environment. + +## Key Differences + +- Alloy uses a component-based configuration format with `.alloy` extension +- The web UI is available at http://localhost:9080 +- Alloy can handle logs, metrics, and traces in one agent +- Configuration is more flexible with the River language + +## Configuration Explanation + +The `config.alloy` file follows the component-based model where: + +1. `loki.source.file` components directly collect logs from Mattermost log files +2. `loki.process` component parses and labels the JSON logs +3. `loki.write` component sends the logs to Loki + +## Current Setup + +Our configuration: +- Monitors Mattermost logs directly from mounted volumes +- Labels all logs with `job="mattermost"` for Grafana dashboard compatibility +- Extracts log level, message, and other metadata from JSON logs +- Sends logs to Loki service + +## Converting Promtail Config to Alloy + +If you need to update the configuration, you can convert Promtail configs to Alloy format with: + +```bash +# Example using the alloy CLI (if installed locally) +alloy convert --source-format=promtail --output=config.alloy promtail-config.yaml +``` + +## Important Syntax Notes + +- The River language used by Alloy requires commas at the end of each key-value pair in objects +- Trailing commas in lists and objects are supported and recommended + +## More Information + +- [Grafana Alloy Documentation](https://grafana.com/docs/alloy/latest/) +- [Migrating from Promtail to Alloy](https://grafana.com/docs/loki/latest/send-data/alloy/migrate-from-promtail/) \ No newline at end of file diff --git a/files/alloy/config.alloy b/files/alloy/config.alloy new file mode 100644 index 0000000..f8b2e98 --- /dev/null +++ b/files/alloy/config.alloy @@ -0,0 +1,59 @@ +// File-based log sources +loki.source.file "mattermost_logs" { + targets = [ + { "__path__" = "/mattermost/logs/mattermost.log", "filename" = "/logs-node-1/mattermost.log", "job" = "mattermost" }, + ] + forward_to = [loki.process.mm_logs_processor.receiver] +} + +loki.source.file "mattermost_advanced_logs" { + targets = [ + { "__path__" = "/mattermost/logs/advancedLogs.log", "filename" = "/logs-node-1/advancedLogs.log", "job" = "mattermost" }, + ] + forward_to = [loki.process.mm_logs_processor.receiver] +} + +loki.source.file "mattermost_ldap_logs" { + targets = [ + { "__path__" = "/mattermost/logs/advancedLdapLogs.log", "filename" = "/logs-node-1/advancedLdapLogs.log", "job" = "mattermost" }, + ] + forward_to = [loki.process.mm_logs_processor.receiver] +} + +// Process Mattermost logs +loki.process "mm_logs_processor" { + // First, try to parse as JSON + stage.json { + expressions = { + ts = "timestamp", + log_level = "level", + log_msg = "msg", + log_caller = "caller", + } + } + + // Set labels based on extracted fields + stage.labels { + values = { + level = "log_level", + } + } + + forward_to = [loki.write.loki.receiver] +} + +// API endpoint for direct log pushing +loki.source.api "push_api" { + http { + listen_address = "0.0.0.0" + listen_port = 9999 + } + forward_to = [loki.write.loki.receiver] +} + +// Send all logs to Loki +loki.write "loki" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + } +} \ No newline at end of file diff --git a/files/grafana/README.md b/files/grafana/README.md new file mode 100644 index 0000000..1616cd0 --- /dev/null +++ b/files/grafana/README.md @@ -0,0 +1,63 @@ +# Grafana Configuration + +This directory contains the configuration for Grafana, which serves as the visualization and dashboarding platform for the CS Repro environment. + +## Overview + +Grafana is an open-source platform for monitoring and observability that allows you to query, visualize, alert on, and understand your metrics, logs, and traces. + +## Current Setup + +- Running as a single instance (version 10.0.4) +- Available at http://localhost:3000 +- Default credentials: admin/admin + +## Directory Structure + +- **dashboards/**: Contains JSON dashboard definitions + - `enhanced_logs.json`: Dashboard for viewing and filtering Mattermost logs + - `kpiMetrics.json`: Dashboard for key performance indicators + - `metricsv2.json`: Dashboard for detailed application metrics + - `bonusMetrics.json`: Dashboard for additional metrics + +- **provisioning/**: Contains automatic provisioning configurations + - `datasources/`: Configures data source connections + - `dashboards/`: Sets up dashboard loading + +## Data Sources + +The environment is configured with the following data sources: + +1. **Prometheus** (Default): For metrics collection + - URL: http://prometheus:9090 + - Used by most of the metric dashboards + +2. **Loki**: For log aggregation + - URL: http://loki:3100 + - Used by the enhanced_logs dashboard + - Queries using the LogQL language + +## Log Dashboard + +The `enhanced_logs.json` dashboard is designed to work with logs collected via Alloy and stored in Loki. It provides: + +- Log filtering by level (error, warn, info, debug) +- Visual metrics about log levels and counts +- Error tracking and analysis +- Time-series views of log patterns + +The dashboard queries use the label `job="mattermost"` to filter logs from the Mattermost application. + +## Best Practices + +When modifying dashboards: +- Export/backup existing dashboards before making major changes +- Test queries in the Explore interface before adding to dashboards +- Use variables for consistent filtering across panels +- Maintain consistent styling + +## More Information + +- [Grafana Documentation](https://grafana.com/docs/grafana/latest/) +- [Dashboard JSON Model](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/dashboard-json-model/) +- [LogQL in Grafana](https://grafana.com/docs/grafana/latest/datasources/loki/query-editor/) \ No newline at end of file diff --git a/files/grafana/dashboards/enhanced_logs.json b/files/grafana/dashboards/enhanced_logs.json index e82788c..d7b5460 100644 --- a/files/grafana/dashboards/enhanced_logs.json +++ b/files/grafana/dashboards/enhanced_logs.json @@ -50,7 +50,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "builder", - "expr": "{filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=~\"${log_level}\" | line_format \"{{.level}} [{{.caller}}] {{.msg}}\"", + "expr": "{job=\"mattermost\"} | json | level=~\"${log_level}\" | line_format \"{{.level}} [{{.caller}}] {{.msg}}\"", "queryType": "range", "refId": "A" } @@ -140,7 +140,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=~\"(debug|info|warn|error)\" [$__interval])) by (level)", + "expr": "sum(count_over_time({job=\"mattermost\"} | json | level=~\"(debug|info|warn|error)\" [$__interval])) by (level)", "legendFormat": "{{level}}", "queryType": "range", "refId": "A" @@ -209,7 +209,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"error\" [$__range]))", + "expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"error\" [$__range]))", "queryType": "instant", "refId": "A" } @@ -277,7 +277,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"warn\" [$__range]))", + "expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"warn\" [$__range]))", "queryType": "instant", "refId": "A" } @@ -337,7 +337,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"info\" [$__range]))", + "expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"info\" [$__range]))", "queryType": "instant", "refId": "A" } @@ -397,7 +397,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"debug\" [$__range]))", + "expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"debug\" [$__range]))", "queryType": "instant", "refId": "A" } @@ -466,7 +466,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "sum by(level) (count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json [30m]))", + "expr": "sum by(level) (count_over_time({job=\"mattermost\"} | json [30m]))", "legendFormat": "{{level}}", "queryType": "range", "refId": "A" @@ -561,7 +561,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "topk(10, sum by(caller) (count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=~\"warn|error\" [30m])))", + "expr": "topk(10, sum by(caller) (count_over_time({job=\"mattermost\"} | json | level=~\"warn|error\" [30m])))", "legendFormat": "{{caller}}", "queryType": "range", "refId": "A" @@ -699,7 +699,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "code", - "expr": "topk(10, sum by(error) (count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"error\" [30m])))", + "expr": "topk(10, sum by(error) (count_over_time({job=\"mattermost\"} | json | level=\"error\" [30m])))", "queryType": "range", "refId": "A" } diff --git a/files/loki/README.md b/files/loki/README.md new file mode 100644 index 0000000..5330a5a --- /dev/null +++ b/files/loki/README.md @@ -0,0 +1,51 @@ +# Loki Configuration + +This directory contains the configuration for Grafana Loki, which serves as the log storage and aggregation system in this environment. + +## Overview + +Loki is a horizontally-scalable, highly-available, multi-tenant log aggregation system designed by Grafana Labs. It is optimized for efficiently storing and querying logs from Kubernetes and microservices deployments. + +## Current Setup + +- Running as a single instance in this CS Repro environment +- Available at http://localhost:3100 +- Receives logs from Grafana Alloy (which replaced Promtail) + +## Configuration Details + +The `loki-config.yaml` file contains the core configuration for the Loki service: + +- **Storage**: Configured to use the local filesystem for simplicity +- **Schema**: Uses the v11 schema with appropriate index/chunk configurations +- **Compaction**: Enabled to optimize storage over time +- **Limits**: Configured with reasonable defaults for this environment +- **Frontend**: Configured for basic query handling + +## Querying Logs + +Logs can be queried in several ways: + +1. **LogQL via Grafana**: The primary and most user-friendly method +2. **Direct Loki API**: Using the API endpoints at http://localhost:3100/loki/api/v1/ +3. **LogCLI**: If installed, you can use Grafana's logcli tool + +Example LogQL queries: +``` +{job="mattermost"} | json | level=~"error|warn" +{job="mattermost"} | json | level="error" | line_format "{{.msg}}" +``` + +## Labels + +The current configuration uses the following key labels: + +- `job`: The source application ("mattermost") +- `level`: The log level (error, warn, info, debug, etc.) +- `service_name`: Identifies the service +- `filename`: The source log file + +## More Information + +- [Grafana Loki Documentation](https://grafana.com/docs/loki/latest/) +- [LogQL Query Language](https://grafana.com/docs/loki/latest/logql/) \ No newline at end of file diff --git a/scripts/general.sh b/scripts/general.sh index 2d33a6e..556ca4f 100755 --- a/scripts/general.sh +++ b/scripts/general.sh @@ -10,7 +10,7 @@ logins () { echo " - For more info https://github.com/coltoneshaw/CS-Repro-Mattermost#use-grafana" echo "- Prometheus: http://localhost:9090" echo "- Loki: http://localhost:3100/ready" - echo "- Promtail: http://localhost:9080" + echo "- Alloy: http://localhost:9080" echo "- PostgreSQL" "localhost:5432" with 'mmuser' / 'mmuser_password' echo echo ===========================================================