From 32a587172e61aded6d9ad02bafd06be3ab8dbe44 Mon Sep 17 00:00:00 2001 From: Maksym Buz Date: Mon, 30 Mar 2026 20:46:46 +0000 Subject: [PATCH] docs: move pg_cron job management instructions to generic maintenance section --- postgresql/procedures/00_schema_create.sql | 1 - postgresql/procedures/README.md | 99 +++----------- template/README.md | 31 ++--- .../zbx_pg_partitions_monitor_agent2.yaml | 128 +++++++++++------- 4 files changed, 113 insertions(+), 146 deletions(-) diff --git a/postgresql/procedures/00_schema_create.sql b/postgresql/procedures/00_schema_create.sql index c20141a..811ed3c 100644 --- a/postgresql/procedures/00_schema_create.sql +++ b/postgresql/procedures/00_schema_create.sql @@ -22,7 +22,6 @@ CREATE TABLE IF NOT EXISTS partitions.version ( description text ); --- Set initial version INSERT INTO partitions.version (version, description) VALUES ('7-1', 'Zabbix 7.4 and 7.0 compatible version') ON CONFLICT (version) DO NOTHING; diff --git a/postgresql/procedures/README.md b/postgresql/procedures/README.md index da4724e..8a9706a 100644 --- a/postgresql/procedures/README.md +++ b/postgresql/procedures/README.md @@ -1,17 +1,15 @@ # PostgreSQL Partitioning for Zabbix -This is the declarative (PostgreSQL procedures based) partitioning implementation for Zabbix `history`, `trends`, and `auditlog` tables on PostgreSQL. This solution is intended to replace standard Zabbix housekeeping for the configured tables. Partitioning is very useful for large environments because it completely eliminates the housekeeper from the process. Instead of huge DELETE queries on several million rows, fast DDL queries (ALTER TABLE) are executed, which drop an entire partition. +This is the declarative partitioning implementation for Zabbix `history*`, `trends*`, and `auditlog` tables on PostgreSQL. This solution is intended to replace standard Zabbix housekeeping for the configured tables. Partitioning is very useful for large environments because it completely eliminates the housekeeper from the process. Instead of huge DELETE queries on several million rows, fast DDL queries (ALTER TABLE) are executed, which drop an entire partition. > [!WARNING] -> **High-Load Environments**: > 1. **Data Visibility**: After enabling partitioning, old data remains in `*_old` tables and is **NOT visible** in Zabbix. You must migrate data manually if needed. -> 2. **Disable Housekeeping**: You **MUST** disable Zabbix Housekeeper for History and Trends in *Administration -> Housekeeping*. Failure to do so will cause massive `DELETE` loads. +> 2. **Disable Housekeeping**: You **MUST** disable Zabbix Housekeeper for History and Trends in *Administration -> Housekeeping*. ## Table of Contents - [Architecture](#architecture) - [Components](#components) -- [Prerequisites: Database & User Creation](#prerequisites-database--user-creation) - [Installation](#installation) - [Configuration](#configuration) - [Modifying Retention](#modifying-retention) @@ -24,7 +22,6 @@ This is the declarative (PostgreSQL procedures based) partitioning implementatio - [`auditlog` Table](#auditlog-table) - [Converting Existing Tables](#converting-existing-tables) - [Upgrades](#upgrades) -- [Appendix: Zabbix Server & Frontend RDS Configuration](#appendix-zabbix-server--frontend-rds-configuration) ## Architecture @@ -37,21 +34,6 @@ All procedures, information, statistics and configuration are stored in the `par 3. **Monitoring View**: `partitions.monitoring` provides system state visibility. 4. **Version Table**: `partitions.version` provides information about installed version of the partitioning solution. -## Prerequisites: Database & User Creation -If you are deploying Zabbix on a fresh database instance (like AWS RDS) rather than a local server, you must first create the `zabbix` user and database using your administrator account (e.g., `postgres`). - -1. Connect to your DB instance as the administrator: - ```bash - psql "host=YOUR_RDS_HOST port=5432 user=postgres dbname=postgres sslmode=require" - ``` -2. Create the user and database: - ```sql - CREATE USER zabbix WITH PASSWORD 'your_secure_password'; - -- On Cloud DBs like RDS, the master user must inherit the new role to grant ownership - GRANT zabbix TO postgres; - CREATE DATABASE zabbix OWNER zabbix; - ``` - ## Installation The installation is performed by executing the SQL procedures in the following order: @@ -66,9 +48,9 @@ You can deploy these scripts manually against your Zabbix database using `psql`. ```bash # Connect as the zabbix database user export PGPASSWORD="your_zabbix_password" -DB_HOST="localhost" # Or your RDS endpoint +DB_HOST="localhost" # Or your DB endpoint DB_NAME="zabbix" -DB_USER="zabbix" +DB_USER="zbxpart_admin" for script in 00_schema_create.sql 01_maintenance.sql 02_enable_partitioning.sql 03_monitoring_view.sql; do echo "Applying $script..." @@ -113,25 +95,21 @@ CALL partitions.run_maintenance(); To ensure partitions are created in advance and old data is cleaned up, the maintenance procedure should be scheduled to run automatically. -It is recommended to run the maintenance **twice a day** (e.g., at 05:30 and 23:30). +It is recommended to run the maintenance **twice a day** and not in round hours because of the way housekeeper works (e.g., at 05:30 and 23:30). * **Primary Run**: Creates new future partitions and drops old ones. * **Secondary Run**: Acts as a safety check. Since the procedure is idempotent (safe to run multiple times), a second run ensures everything is consistent if the first run failed or was interrupted. You can schedule this using one of the following methods: #### Option 1: `pg_cron` (Recommended) -`pg_cron` is a cron-based job scheduler that runs directly inside the database as an extension. - -> [!NOTE] -> **Cloud Managed Databases (AWS RDS, Aurora, Azure, GCP):** -> Managed databases generally have `pg_cron` pre-installed and handle the authentication/connections securely for you automatically. You do **not** need to install OS packages or configure a `.pgpass` file! Simply modify your RDS Parameter Group to include `shared_preload_libraries = 'pg_cron'` and `cron.database_name = 'zabbix'`, reboot the instance, and execute `CREATE EXTENSION pg_cron;`. +`pg_cron` is a cron-based job scheduler that runs directly inside the database as an extension. It is very useful for cloud based databases like AWS RDS, Aurora, Azure, GCP, because it handles the authentication/connections securely for you automatically and its available as a managed extension. You do **not** need to install OS packages or configure anything. Simply modify the RDS Parameter Group to include `shared_preload_libraries = 'pg_cron'` and `cron.database_name = 'zabbix'`, reboot the instance, and execute `CREATE EXTENSION pg_cron;`. **Setup `pg_cron` (Self-Hosted):** 1. Install the package via your OS package manager (e.g., `postgresql-15-cron` on Debian/Ubuntu, or `pg_cron_15` on RHEL/CentOS). 2. Configure it modifying `postgresql.conf`: ```ini shared_preload_libraries = 'pg_cron' - cron.database_name = 'zabbix' # Define the database where pg_cron will run + cron.database_name = 'zabbix' ``` 3. Restart PostgreSQL: ```bash @@ -145,10 +123,6 @@ You can schedule this using one of the following methods: ```sql SELECT cron.schedule('zabbix_partition_maintenance', '30 5,23 * * *', 'CALL partitions.run_maintenance();'); ``` -6. **Manage your `pg_cron` jobs** (run as superuser): - - To **list all active schedules**: `SELECT * FROM cron.job;` - - To **view execution logs/history**: `SELECT * FROM cron.job_run_details;` - - To **remove/unschedule** the job: `SELECT cron.unschedule('zabbix_partition_maintenance');` **⚠️ Troubleshooting `pg_cron` Connection Errors:** If your cron jobs fail to execute and you see `FATAL: password authentication failed` in your PostgreSQL logs, it is because `pg_cron` attempts to connect via TCP (`localhost`) by default, which usually requires a password. @@ -224,9 +198,17 @@ If running in Docker, you can execute it via the host's cron by targeting the co ```bash 30 5,23 * * * docker exec zabbix-db-test psql -U zabbix -d zabbix -c "CALL partitions.run_maintenance();" ``` + +### Managing `pg_cron` Jobs + +If you are using `pg_cron` for scheduling, you can verify and manage your jobs (run as superuser): +- To **list all active schedules**: `SELECT * FROM cron.job;` +- To **view execution logs/history**: `SELECT * FROM cron.job_run_details;` +- To **remove/unschedule** the job: `SELECT cron.unschedule('zabbix_partition_maintenance');` + ## Monitoring & Permissions -System state can be monitored via the `partitions.monitoring` view. It includes a `future_partitions` column which counts how many partitions exist *after* the current period. This is useful for alerting (e.g., trigger if `future_partitions < 2`). +System state can be monitored via the `partitions.monitoring` view. It includes the information about number of future partitions and the time since the last maintenance run. Plus it includes the total size of the partitioned table in bytes. ```sql SELECT * FROM partitions.monitoring; @@ -238,17 +220,17 @@ To check the installed version of the partitioning solution: SELECT * FROM partitions.version ORDER BY installed_at DESC LIMIT 1; ``` -### Least Privilege Access (`zbx_monitor`) +### Least Privilege Access (`zbxpart_admin`) For monitoring purposes, it is recommended to create a dedicated user with read-only access to the monitoring view. ```sql -CREATE USER zbx_monitor WITH PASSWORD 'secure_password'; -GRANT USAGE ON SCHEMA partitions TO zbx_monitor; -GRANT SELECT ON partitions.monitoring TO zbx_monitor; +CREATE USER zbxpart_admin WITH PASSWORD 'secure_password'; +GRANT USAGE ON SCHEMA partitions TO zbxpart_admin; +GRANT SELECT ON partitions.monitoring TO zbxpart_admin; ``` > [!NOTE] -> If you ever apply updates to `03_monitoring_view.sql`, you should run the script as the `zabbix` database user (the original creator of the view). The script drops and recreates the view, so running it as `zabbix` ensures the view retains its ownership and preserves existing `GRANT` permissions for read-only users. +> If you ever apply updates to `03_monitoring_view.sql`, you should run the script as the `zbxpart_admin` database user (the original creator of the view). The script drops and recreates the view, so running it as `zbxpart_admin` ensures the view retains its ownership and preserves existing `GRANT` permissions for read-only users. ## Implementation Details @@ -267,41 +249,4 @@ The enablement script guarantees practically zero downtime by automatically rena When upgrading Zabbix: 1. **Backup**: Ensure a full database backup exists. 2. **Compatibility**: Zabbix upgrade scripts may attempt to `ALTER` tables. PostgreSQL supports `ALTER TABLE` on partitioned tables for adding columns, which propagates to partitions. -3. **Failure Scenarios**: If an upgrade script fails due to partitioning, the table may need to be temporarily reverted or the partition structure manually adjusted. - ---- - -## Appendix: Zabbix Server & Frontend RDS Configuration - -If you are running Zabbix against an external Cloud database (like AWS RDS) via SSL (`verify-full`), you must explicitly configure both the Zabbix Server daemon and the Web Frontend to enforce SSL and locate the downloaded Root CA Certificate. - -**Prerequisite:** Download your cloud provider's root certificate (e.g., `global-bundle.pem`) and place it in a secure location on your Zabbix Server (e.g., `/etc/zabbix/global-bundle.pem`). - -### 1. Zabbix Server (`/etc/zabbix/zabbix_server.conf`) -Ensure the following database lines are active: - -```ini -DBHost=YOUR_RDS_ENDPOINT.amazonaws.com -DBPort=5432 -DBName=zabbix -DBUser=zabbix -DBPassword=your_secure_password -DBTLSConnect=verify_full -DBTLSCAFile=/etc/zabbix/global-bundle.pem -``` - -### 2. Zabbix Frontend PHP (`/etc/zabbix/web/zabbix.conf.php`) -If you used the Web Setup Wizard, it might not configure the Root CA File correctly. Update your config array to enforce encryption and verify the host certificate: - -```php -$DB['TYPE'] = 'POSTGRESQL'; -$DB['SERVER'] = 'YOUR_RDS_ENDPOINT.amazonaws.com'; -$DB['PORT'] = '5432'; -$DB['DATABASE'] = 'zabbix'; -$DB['USER'] = 'zabbix'; -$DB['PASSWORD'] = 'your_secure_password'; -$DB['SCHEMA'] = ''; -$DB['ENCRYPTION'] = true; -$DB['VERIFY_HOST'] = true; -$DB['CA_FILE'] = '/etc/zabbix/global-bundle.pem'; -``` +3. **Failure Scenarios**: If an upgrade script fails due to partitioning, the table may need to be temporarily reverted or the partition structure manually adjusted. \ No newline at end of file diff --git a/template/README.md b/template/README.md index 41f1411..2bfcb41 100644 --- a/template/README.md +++ b/template/README.md @@ -1,17 +1,16 @@ # Zabbix PostgreSQL Partitioning Monitoring -This directory contains an extremely efficient, production-ready Zabbix Native Agent 2 monitoring suite designed to track the health of your actively partitioned PostgreSQL database tables. +This template relies on Zabbix Agent 2 and its PostgreSQL plugin. It allows you to monitor the health of your partitioned PostgreSQL database tables. It uses a single master item to pull all metrics in bulk over a single database connection, dynamically distributing the numbers to Zabbix using Dependent Items. +There are three item prototypes: +1. Future Partitions Buffer: Number of future partitions to be created +2. Total Size Bytes: Total size of the partitioned table in bytes +3. Time Since Last Maintenance: Time since the last maintenance script was run +They allows to monitor all the critical metrics and also they do have a triggers, which will create a problem in case something is wrong with the partitioning. -This template natively leverages the Zabbix Agent 2 backend. It uses a single, highly-optimized master payload query (`partitions.get_all.sql`) to pull all metrics in bulk over a single database connection, dynamically distributing the numbers to Zabbix using Dependent Items. - -### Step-by-Step Setup -1. Copy the **one** SQL file (`template/partitions.get_all.sql`) into a secure directory on your Agent machine. E.g., `/etc/zabbix/zabbix_agent2.d/postgresql/`. -2. Secure the local Linux permissions so the Zabbix user can strictly read it: - ```bash - chown -R zabbix:zabbix /etc/zabbix/zabbix_agent2.d/postgresql - chmod 644 /etc/zabbix/zabbix_agent2.d/postgresql/*.sql - ``` -3. Open your main `/etc/zabbix/zabbix_agent2.conf` file. Scroll to the absolute bottom and add these exact lines to safely establish your custom query module AND a secure named session (e.g., `AWS_RDS`): +### Setup +1. Copy the SQL file (`template/partitions.get_all.sql`) into a directory on your Agent machine. E.g., `/etc/zabbix/zabbix_agent2.d/postgresql/`. +2. Install zabbix-agent2-plugin-postgresql package. +3. Open your Plugin configuration file `/etc/zabbix/zabbix_agent2.d/plugins.d/postgresql.conf` and add these lines to establish your custom query module AND a secure named session (e.g., `AWS_RDS`). Adjust the parameters to match your environment. You can use uri instead of named session if you want. In this case you will need to modify the item keys to use the correct parameters. ```ini # 1. Enable Loadable Custom Queries (Mandatory in Zabbix 7.4+) Plugins.PostgreSQL.CustomQueriesPath=/etc/zabbix/zabbix_agent2.d/postgresql/ @@ -24,12 +23,10 @@ This template natively leverages the Zabbix Agent 2 backend. It uses a single, h Plugins.PostgreSQL.Sessions.AWS_RDS.TLSConnect=verify_full Plugins.PostgreSQL.Sessions.AWS_RDS.TLSCAFile=/etc/zabbix/global-bundle.pem ``` -4. Restart your agent to lock the session and SSL configurations into memory: +4. Restart your agent to apply the changes: ```bash systemctl restart zabbix-agent2 ``` -5. Import the `zbx_pg_partitions_monitor_agent2.yaml` template directly into your Zabbix Frontend. -6. Attach the template to your Host, navigate to its "Macros" tab, and define strictly exactly this **one** macro: - * `{$PG.CONNSTRING.AGENT2}`: `AWS_RDS` - -*(By pointing this macro to the Session name we built in Step 3, you are telling Zabbix to seamlessly abandon all plaintext frontend passwords and rely 100% on the encrypted local session parameters!)* +5. Import the `zbx_pg_partitions_monitor_agent2.yaml` template into your Zabbix. +6. Link the template to your Host, navigate to its "Macros" tab, and define the needed macros (in this case it's just named session): + * `{$PG.CONNSTRING.AGENT2}`: `AWS_RDS` diff --git a/template/zbx_pg_partitions_monitor_agent2.yaml b/template/zbx_pg_partitions_monitor_agent2.yaml index 52212b6..0673ffa 100644 --- a/template/zbx_pg_partitions_monitor_agent2.yaml +++ b/template/zbx_pg_partitions_monitor_agent2.yaml @@ -1,79 +1,105 @@ zabbix_export: - version: '7.0' + version: '7.4' + template_groups: + - uuid: 748ad4d098d447d492bb935c907f652f + name: Templates/Databases templates: - uuid: a1d5f8c3b2e44a7c9d6b1f2e8a3c5b4d - template: 'RDS PostgreSQL Partitioning Monitoring Agent 2' - name: 'RDS PostgreSQL Partitioning Monitoring (Agent 2)' - description: 'Monitors the custom partitions.monitoring view via the native Zabbix Agent 2 PostgreSQL plugin. Optimized using a master data payload.' + template: 'PostgreSQL Partitioning by Zabbix Agent 2' + name: 'PostgreSQL Partitioning by Zabbix Agent 2' + description: 'Monitors the custom partitions.monitoring view via the native Zabbix Agent 2 PostgreSQL plugin. Using a single master to minimize the DB connections and load.' + vendor: + name: Zabbix Support + version: 7.4-0 groups: - name: Templates/Databases - macros: - - macro: '{$PG.CONNSTRING.AGENT2}' - value: 'AWS_RDS' - description: 'Session name or URI of the PostgreSQL instance' - - macro: '{$PG.DBNAME}' - value: 'zabbix' items: - uuid: b8c7d6e5f4a34b2c8d2e3f4a5b6c7d8e name: 'PostgreSQL: Get Partitioning Data' - type: ZABBIX_PASSIVE key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' + history: '0' value_type: TEXT description: 'Master item that queries all partition statistics in a single bulk JSON sequence.' + tags: + - tag: component + value: raw discovery_rules: - uuid: b7c2a5d8f1e44b9c8a3f6d2e1c5b4a7d name: 'Partitioned Tables Discovery' type: DEPENDENT - key: 'db.partitions.discovery.dependent' - delay: '0' - master_item: - key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' - lld_macro_paths: - - lld_macro: '{#TABLE_NAME}' - path: '$.table_name' + key: db.partitions.discovery.dependent item_prototypes: - - uuid: c4b9e2a5f1d84c7a9f3b6d1e5a2c8b4d - name: 'Table {#TABLE_NAME}: Future Partitions Buffer' - type: DEPENDENT - key: 'db.partitions.future["{#TABLE_NAME}"]' - value_type: FLOAT - master_item: - key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' - preprocessing: - - type: JSONPATH - parameters: - - '$.[?(@.table_name == "{#TABLE_NAME}")].future_partitions.first()' - trigger_prototypes: - - uuid: d6e3a5c8b2f14d9e8a7b6c5d4e3f2a1b - expression: 'last(/RDS PostgreSQL Partitioning Monitoring Agent 2/db.partitions.future["{#TABLE_NAME}"])<2' - name: 'Table {#TABLE_NAME}: Future partitions buffer is critically low (< 2)' - priority: HIGH - - uuid: e8f2a1b3c4d54e6f9a8b7c6d5e4f3a2b - name: 'Table {#TABLE_NAME}: Total Size Bytes' - type: DEPENDENT - key: 'db.partitions.size["{#TABLE_NAME}"]' - value_type: FLOAT - units: B - master_item: - key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' - preprocessing: - - type: JSONPATH - parameters: - - '$.[?(@.table_name == "{#TABLE_NAME}")].total_size_bytes.first()' - uuid: f1a2b3c4d5e64f7a9b8c7d6e5f4a3b2c - name: 'Table {#TABLE_NAME}: Time Since Last Maintenance' + name: '{#TABLE_NAME}: Time Since Last Maintenance' type: DEPENDENT key: 'db.partitions.age["{#TABLE_NAME}"]' - value_type: FLOAT units: s - master_item: - key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' preprocessing: - type: JSONPATH parameters: - '$.[?(@.table_name == "{#TABLE_NAME}")].age_seconds.first()' + master_item: + key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' + tags: + - tag: metric + value: age + - tag: table + value: '{#TABLE_NAME}' trigger_prototypes: - uuid: a9b8c7d6e5f44a3b8c1d2e3f4a5b6c7d - expression: 'last(/RDS PostgreSQL Partitioning Monitoring Agent 2/db.partitions.age["{#TABLE_NAME}"])>172800' + expression: 'last(/PostgreSQL Partitioning by Zabbix Agent 2/db.partitions.age["{#TABLE_NAME}"])>{$PARTITIONS.AGE}' name: 'Table {#TABLE_NAME}: Maintenance script has not run successfully in over 48 hours' priority: WARNING + - uuid: c4b9e2a5f1d84c7a9f3b6d1e5a2c8b4d + name: '{#TABLE_NAME}: Future Partitions Buffer' + type: DEPENDENT + key: 'db.partitions.future["{#TABLE_NAME}"]' + preprocessing: + - type: JSONPATH + parameters: + - '$.[?(@.table_name == "{#TABLE_NAME}")].future_partitions.first()' + master_item: + key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' + tags: + - tag: metric + value: partitions + - tag: table + value: '{#TABLE_NAME}' + trigger_prototypes: + - uuid: d6e3a5c8b2f14d9e8a7b6c5d4e3f2a1b + expression: 'last(/PostgreSQL Partitioning by Zabbix Agent 2/db.partitions.future["{#TABLE_NAME}"])<{$PARTITIONS.LOW}' + name: 'Table {#TABLE_NAME}: Future partitions buffer is critically low (< 2)' + priority: HIGH + - uuid: e8f2a1b3c4d54e6f9a8b7c6d5e4f3a2b + name: '{#TABLE_NAME}: Total Size Bytes' + type: DEPENDENT + key: 'db.partitions.size["{#TABLE_NAME}"]' + units: B + preprocessing: + - type: JSONPATH + parameters: + - '$.[?(@.table_name == "{#TABLE_NAME}")].total_size_bytes.first()' + master_item: + key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' + tags: + - tag: metric + value: size + - tag: table + value: '{#TABLE_NAME}' + master_item: + key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]' + lld_macro_paths: + - lld_macro: '{#TABLE_NAME}' + path: $.table_name + macros: + - macro: '{$PARTITIONS.AGE}' + value: 24h + description: 'The maximum period during which no new partitions may be created' + - macro: '{$PARTITIONS.LOW}' + value: '2' + description: 'The minimum number of partitions that must exist in the future' + - macro: '{$PG.CONNSTRING.AGENT2}' + value: AWS_RDS + description: 'Session name or URI of the PostgreSQL instance' + - macro: '{$PG.DBNAME}' + value: zabbix