From 2b7a69ba11c80b6743b5fa7c00ca241f9e7a2d81 Mon Sep 17 00:00:00 2001 From: Maksym Buz Date: Thu, 26 Mar 2026 15:57:35 +0000 Subject: [PATCH] Refactor auditlog preparation, rename procedures sequentially, and update test suite --- ARCHITECTURE.md | 6 +- .../init_scripts/01_10_partitions_init.sql | 5 +- .../init_scripts/01_20_auditlog_prep.sql | 27 ----- .../docker/init_scripts/01_30_maintenance.sql | 3 +- .../docker/init_scripts/01_40_enable.sql | 17 ++- .../docker/init_scripts/01_50_monitoring.sql | 3 +- postgresql/docker/run_test_env.sh | 21 ++-- postgresql/procedures/01_auditlog_prep.sql | 26 ----- ...{02_maintenance.sql => 01_maintenance.sql} | 0 ...tioning.sql => 02_enable_partitioning.sql} | 12 +- ...toring_view.sql => 03_monitoring_view.sql} | 0 postgresql/procedures/README.md | 103 ++++++++++++++++-- 12 files changed, 128 insertions(+), 95 deletions(-) delete mode 100644 postgresql/docker/init_scripts/01_20_auditlog_prep.sql delete mode 100644 postgresql/procedures/01_auditlog_prep.sql rename postgresql/procedures/{02_maintenance.sql => 01_maintenance.sql} (100%) rename postgresql/procedures/{03_enable_partitioning.sql => 02_enable_partitioning.sql} (69%) rename postgresql/procedures/{04_monitoring_view.sql => 03_monitoring_view.sql} (100%) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index d02cfc4..1b0c19d 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -34,7 +34,7 @@ The solution is divided into a series of SQL scripts that must be executed seque * PostgreSQL range partitioning requires the partition key (in this case, `clock`) to be part of the Primary Key. * This script dynamically locates the existing Primary Key (usually just `auditid`) and alters it to a composite key `(auditid, clock)`. -### 3. `02_maintenance.sql` +### 3. `01_maintenance.sql` * **Purpose:** Contains the core PL/pgSQL procedural logic that manages the lifecycle of the partitions. * **Key Functions/Procedures:** * `partition_exists()`: Queries `pg_class` to verify if a specific child partition partition exists. @@ -43,14 +43,14 @@ The solution is divided into a series of SQL scripts that must be executed seque * `maintain_table()`: The orchestrator for a single table. It calculates the necessary UTC timestamps, calls `create_partition()` to build the future buffer, calls `create_partition()` recursively backward to cover the retention period, and finally calls `drop_old_partitions()`. * `run_maintenance()`: The global loop that iterates through `partitions.config` and triggers `maintain_table()` for every configured Zabbix table. -### 4. `03_enable_partitioning.sql` +### 4. `02_enable_partitioning.sql` * **Purpose:** The migration script that actually executes the partition conversion on the live database. * **Actions:** * It takes the original Zabbix table (e.g., `history`) and renames it to `history_old` (`ALTER TABLE ... RENAME TO ...`). * It immediately creates a new partitioned table with the original name, inheriting the exact structure of the old table (`CREATE TABLE ... (LIKE ... INCLUDING ALL) PARTITION BY RANGE (clock)`). * It triggers the first maintenance run so new incoming data has immediate partitions to land in. -### 5. `04_monitoring_view.sql` +### 5. `03_monitoring_view.sql` * **Purpose:** Provides an easy-to-read observability layer. * **Actions:** * Creates the `partitions.monitoring` view by joining `pg_class`, `pg_inherits`, `pg_tablespace`, and `pg_size_pretty`. diff --git a/postgresql/docker/init_scripts/01_10_partitions_init.sql b/postgresql/docker/init_scripts/01_10_partitions_init.sql index 84b8a05..156e6ae 100644 --- a/postgresql/docker/init_scripts/01_10_partitions_init.sql +++ b/postgresql/docker/init_scripts/01_10_partitions_init.sql @@ -1,7 +1,6 @@ -- ============================================================================ --- SCRIPT: 00_partitions_init.sql --- DESCRIPTION: Creates the 'partitions' schema and configuration table. --- Defines the structure for managing Zabbix partitioning. +-- Creates the 'partitions' schema and configuration table. +-- Defines the structure for managing Zabbix partitioning. -- ============================================================================ CREATE SCHEMA IF NOT EXISTS partitions; diff --git a/postgresql/docker/init_scripts/01_20_auditlog_prep.sql b/postgresql/docker/init_scripts/01_20_auditlog_prep.sql deleted file mode 100644 index b478c0c..0000000 --- a/postgresql/docker/init_scripts/01_20_auditlog_prep.sql +++ /dev/null @@ -1,27 +0,0 @@ --- ============================================================================ --- SCRIPT: 01_auditlog_prep.sql --- DESCRIPTION: Modifies the 'auditlog' table Primary Key to include 'clock'. --- This is REQUIRED for range partitioning by 'clock'. --- ============================================================================ - -DO $$ -BEGIN - -- Check if PK needs modification - -- Original PK is typically on (auditid) named 'auditlog_pkey' - IF EXISTS ( - SELECT 1 FROM pg_constraint - WHERE conname = 'auditlog_pkey' - AND conrelid = 'auditlog'::regclass - ) THEN - -- Verify if 'clock' is already in PK (basic check) - -- Realistically, if 'auditlog_pkey' exists on default Zabbix, it's just (auditid). - - RAISE NOTICE 'Dropping existing Primary Key on auditlog...'; - ALTER TABLE auditlog DROP CONSTRAINT auditlog_pkey; - - RAISE NOTICE 'Creating new Primary Key on auditlog (auditid, clock)...'; - ALTER TABLE auditlog ADD PRIMARY KEY (auditid, clock); - ELSE - RAISE NOTICE 'Constraint auditlog_pkey not found. Skipping or already modified.'; - END IF; -END $$; diff --git a/postgresql/docker/init_scripts/01_30_maintenance.sql b/postgresql/docker/init_scripts/01_30_maintenance.sql index 9d47026..c51a3fd 100644 --- a/postgresql/docker/init_scripts/01_30_maintenance.sql +++ b/postgresql/docker/init_scripts/01_30_maintenance.sql @@ -1,6 +1,5 @@ -- ============================================================================ --- SCRIPT: 02_maintenance.sql --- DESCRIPTION: Core functions for Zabbix partitioning (Create, Drop, Maintain). +-- Core functions for Zabbix partitioning (Create, Drop, Maintain). -- ============================================================================ -- Function to check if a partition exists diff --git a/postgresql/docker/init_scripts/01_40_enable.sql b/postgresql/docker/init_scripts/01_40_enable.sql index 42570fe..4777506 100644 --- a/postgresql/docker/init_scripts/01_40_enable.sql +++ b/postgresql/docker/init_scripts/01_40_enable.sql @@ -1,7 +1,6 @@ -- ============================================================================ --- SCRIPT: 03_enable_partitioning.sql --- DESCRIPTION: Converts standard Zabbix tables to Partitioned tables. --- WARNING: This renames existing tables to *_old. +-- Converts standard Zabbix tables to Partitioned tables. +-- WARNING: This renames existing tables to *_old. -- ============================================================================ DO $$ @@ -23,15 +22,21 @@ BEGIN WHERE c.relname = v_table; - -- Check if table exists and is NOT already partitioned IF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'r') THEN RAISE NOTICE 'Converting table % to partitioned table...', v_table; -- 1. Rename existing table EXECUTE format('ALTER TABLE %I.%I RENAME TO %I', v_schema, v_table, v_old_table); - -- 2. Create new partitioned table (copying structure) - EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table); + -- 2. Create new partitioned table (handling auditlog PK uniquely) + IF v_table = 'auditlog' THEN + EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING DEFAULTS INCLUDING COMMENTS) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table); + EXECUTE format('ALTER TABLE %I.%I ADD PRIMARY KEY (auditid, clock)', v_schema, v_table); + EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_1 ON %I.%I (userid, clock)', v_schema, v_table); + EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_2 ON %I.%I (clock)', v_schema, v_table); + ELSE + EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table); + END IF; -- 3. Create initial partitions RAISE NOTICE 'Creating initial partitions for %...', v_table; diff --git a/postgresql/docker/init_scripts/01_50_monitoring.sql b/postgresql/docker/init_scripts/01_50_monitoring.sql index 80139f1..f46ea11 100644 --- a/postgresql/docker/init_scripts/01_50_monitoring.sql +++ b/postgresql/docker/init_scripts/01_50_monitoring.sql @@ -1,6 +1,5 @@ -- ============================================================================ --- SCRIPT: 04_monitoring_view.sql --- DESCRIPTION: Creates a view to monitor partition status and sizes. +-- Creates a view to monitor partition status and sizes. -- ============================================================================ CREATE OR REPLACE VIEW partitions.monitoring AS diff --git a/postgresql/docker/run_test_env.sh b/postgresql/docker/run_test_env.sh index 5744b14..b1d5f2b 100755 --- a/postgresql/docker/run_test_env.sh +++ b/postgresql/docker/run_test_env.sh @@ -70,18 +70,15 @@ if [[ -f "$SQL_DIR/schema.sql" ]]; then if [[ -f "../procedures/00_partitions_init.sql" ]]; then cp "../procedures/00_partitions_init.sql" ./init_scripts/01_10_partitions_init.sql fi - if [[ -f "../procedures/01_auditlog_prep.sql" ]]; then - cp "../procedures/01_auditlog_prep.sql" ./init_scripts/01_20_auditlog_prep.sql - fi - if [[ -f "../procedures/02_maintenance.sql" ]]; then - cp "../procedures/02_maintenance.sql" ./init_scripts/01_30_maintenance.sql - fi - if [[ -f "../procedures/03_enable_partitioning.sql" ]]; then - cp "../procedures/03_enable_partitioning.sql" ./init_scripts/01_40_enable.sql - fi - if [[ -f "../procedures/04_monitoring_view.sql" ]]; then - cp "../procedures/04_monitoring_view.sql" ./init_scripts/01_50_monitoring.sql - fi + if [[ -f "../procedures/01_maintenance.sql" ]]; then + cp "../procedures/01_maintenance.sql" ./init_scripts/01_30_maintenance.sql + fi + if [[ -f "../procedures/02_enable_partitioning.sql" ]]; then + cp "../procedures/02_enable_partitioning.sql" ./init_scripts/01_40_enable.sql + fi + if [[ -f "../procedures/03_monitoring_view.sql" ]]; then + cp "../procedures/03_monitoring_view.sql" ./init_scripts/01_50_monitoring.sql + fi else echo -e "${RED}Error: schema.sql not found in $SQL_DIR${NC}" exit 1 diff --git a/postgresql/procedures/01_auditlog_prep.sql b/postgresql/procedures/01_auditlog_prep.sql deleted file mode 100644 index 1caef9b..0000000 --- a/postgresql/procedures/01_auditlog_prep.sql +++ /dev/null @@ -1,26 +0,0 @@ --- ============================================================================ --- Modifies the 'auditlog' table Primary Key to include 'clock'. --- This is REQUIRED for range partitioning by 'clock'. --- ============================================================================ - -DO $$ -BEGIN - -- Check if PK needs modification - -- Original PK is typically on (auditid) named 'auditlog_pkey' - IF EXISTS ( - SELECT 1 FROM pg_constraint - WHERE conname = 'auditlog_pkey' - AND conrelid = 'auditlog'::regclass - ) THEN - -- Verify if 'clock' is already in PK (basic check) - -- Realistically, if 'auditlog_pkey' exists on default Zabbix, it's just (auditid). - - RAISE NOTICE 'Dropping existing Primary Key on auditlog...'; - ALTER TABLE auditlog DROP CONSTRAINT auditlog_pkey; - - RAISE NOTICE 'Creating new Primary Key on auditlog (auditid, clock)...'; - ALTER TABLE auditlog ADD PRIMARY KEY (auditid, clock); - ELSE - RAISE NOTICE 'Constraint auditlog_pkey not found. Skipping or already modified.'; - END IF; -END $$; diff --git a/postgresql/procedures/02_maintenance.sql b/postgresql/procedures/01_maintenance.sql similarity index 100% rename from postgresql/procedures/02_maintenance.sql rename to postgresql/procedures/01_maintenance.sql diff --git a/postgresql/procedures/03_enable_partitioning.sql b/postgresql/procedures/02_enable_partitioning.sql similarity index 69% rename from postgresql/procedures/03_enable_partitioning.sql rename to postgresql/procedures/02_enable_partitioning.sql index acaf3b8..4777506 100644 --- a/postgresql/procedures/03_enable_partitioning.sql +++ b/postgresql/procedures/02_enable_partitioning.sql @@ -22,15 +22,21 @@ BEGIN WHERE c.relname = v_table; - -- Check if table exists and is NOT already partitioned IF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'r') THEN RAISE NOTICE 'Converting table % to partitioned table...', v_table; -- 1. Rename existing table EXECUTE format('ALTER TABLE %I.%I RENAME TO %I', v_schema, v_table, v_old_table); - -- 2. Create new partitioned table (copying structure) - EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table); + -- 2. Create new partitioned table (handling auditlog PK uniquely) + IF v_table = 'auditlog' THEN + EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING DEFAULTS INCLUDING COMMENTS) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table); + EXECUTE format('ALTER TABLE %I.%I ADD PRIMARY KEY (auditid, clock)', v_schema, v_table); + EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_1 ON %I.%I (userid, clock)', v_schema, v_table); + EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_2 ON %I.%I (clock)', v_schema, v_table); + ELSE + EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table); + END IF; -- 3. Create initial partitions RAISE NOTICE 'Creating initial partitions for %...', v_table; diff --git a/postgresql/procedures/04_monitoring_view.sql b/postgresql/procedures/03_monitoring_view.sql similarity index 100% rename from postgresql/procedures/04_monitoring_view.sql rename to postgresql/procedures/03_monitoring_view.sql diff --git a/postgresql/procedures/README.md b/postgresql/procedures/README.md index 2784e2d..6e27360 100644 --- a/postgresql/procedures/README.md +++ b/postgresql/procedures/README.md @@ -19,14 +19,28 @@ All procedures, information, statistics and configuration are stored in the `par 3. **Monitoring View**: `partitions.monitoring` provides system state visibility. 4. **Version Table**: `partitions.version` provides information about installed version of the partitioning solution. +## Prerequisites: Database & User Creation +If you are deploying Zabbix on a fresh database instance (like AWS RDS) rather than a local server, you must first create the `zabbix` user and database using your administrator account (e.g., `postgres`). + +1. Connect to your DB instance as the administrator: + ```bash + psql "host=YOUR_RDS_HOST port=5432 user=postgres dbname=postgres sslmode=require" + ``` +2. Create the user and database: + ```sql + CREATE USER zabbix WITH PASSWORD 'your_secure_password'; + -- On Cloud DBs like RDS, the master user must inherit the new role to grant ownership + GRANT zabbix TO postgres; + CREATE DATABASE zabbix OWNER zabbix; + ``` + ## Installation The installation is performed by executing the SQL procedures in the following order: 1. Initialize schema (`00_partitions_init.sql`). -2. Auditlog PK adjustment (`01_auditlog_prep.sql`). -3. Install maintenance procedures (`02_maintenance.sql`). -4. Enable partitioning on tables (`03_enable_partitioning.sql`). -5. Install monitoring views (`04_monitoring_view.sql`). +2. Install maintenance procedures (`01_maintenance.sql`). +3. Enable partitioning on tables (`02_enable_partitioning.sql`). +4. Install monitoring views (`03_monitoring_view.sql`). **Command Example:** You can deploy these scripts manually against your Zabbix database using `psql`. Navigate to the `procedures/` directory and run: @@ -38,7 +52,7 @@ DB_HOST="localhost" # Or your RDS endpoint DB_NAME="zabbix" DB_USER="zabbix" -for script in 00_partitions_init.sql 01_auditlog_prep.sql 02_maintenance.sql 03_enable_partitioning.sql 04_monitoring_view.sql; do +for script in 00_partitions_init.sql 01_maintenance.sql 02_enable_partitioning.sql 03_monitoring_view.sql; do echo "Applying $script..." psql -h $DB_HOST -U $DB_USER -d $DB_NAME -f "$script" done @@ -90,7 +104,11 @@ You can schedule this using one of the following methods: #### Option 1: `pg_cron` (Recommended) `pg_cron` is a cron-based job scheduler that runs directly inside the database as an extension. -**Setup `pg_cron`:** +> [!NOTE] +> **Cloud Managed Databases (AWS RDS, Aurora, Azure, GCP):** +> Managed databases generally have `pg_cron` pre-installed and handle the authentication/connections securely for you automatically. You do **not** need to install OS packages or configure a `.pgpass` file! Simply modify your RDS Parameter Group to include `shared_preload_libraries = 'pg_cron'` and `cron.database_name = 'zabbix'`, reboot the instance, and execute `CREATE EXTENSION pg_cron;`. + +**Setup `pg_cron` (Self-Hosted):** 1. Install the package via your OS package manager (e.g., `postgresql-15-cron` on Debian/Ubuntu, or `pg_cron_15` on RHEL/CentOS). 2. Configure it modifying `postgresql.conf`: ```ini @@ -114,6 +132,31 @@ You can schedule this using one of the following methods: - To **view execution logs/history**: `SELECT * FROM cron.job_run_details;` - To **remove/unschedule** the job: `SELECT cron.unschedule('zabbix_partition_maintenance');` +**⚠️ Troubleshooting `pg_cron` Connection Errors:** +If your cron jobs fail to execute and you see `FATAL: password authentication failed` in your PostgreSQL logs, it is because `pg_cron` attempts to connect via TCP (`localhost`) by default, which usually requires a password. + +**Solution A: Use Local Unix Sockets (Easier)** +Edit your `postgresql.conf` to force `pg_cron` to use the local Unix socket (which uses passwordless `peer` authentication): +```ini +cron.host = '/var/run/postgresql' # Or '/tmp', depending on your OS +``` +*(Restart PostgreSQL after making this change).* + +**Solution B: Provide a Password (`.pgpass`)** +If you *must* connect via TCP with a specific database user and password, the `pg_cron` background worker needs a way to authenticate. You provide this by creating a `.pgpass` file for the OS `postgres` user. +1. Switch to the OS database user: + ```bash + sudo su - postgres + ``` +2. Create or append your database credentials to `~/.pgpass` using the format `hostname:port:database:username:password`: + ```bash + echo "localhost:5432:zabbix:zabbix:my_secure_password" >> ~/.pgpass + ``` +3. Set strict permissions (PostgreSQL will ignore the file if permissions are too loose): + ```bash + chmod 0600 ~/.pgpass + ``` + #### Option 2: Systemd Timers Systemd timers provide better logging and error handling properties than standard cron. @@ -189,13 +232,14 @@ GRANT SELECT ON partitions.monitoring TO zbx_monitor; ## Implementation Details ### `auditlog` Table -The standard `auditlog` table Primary Key is `(auditid)`. Partitioning by `clock` requires the partition key to be part of the Primary Key. The initialization script modifies the PK to `(auditid, clock)`. +The standard Zabbix `auditlog` table has a primary key on `(auditid)`. Partitioning by `clock` requires the partition key to be part of the primary key. +To prevent placing a heavy, blocking lock on a highly active `auditlog` table to alter its primary key, the enablement script (`02_enable_partitioning.sql`) detects it and handles it exactly like the history tables: it automatically renames the live, existing table to `auditlog_old`, and instantly creates a brand new, empty partitioned `auditlog` table pre-configured with the required `(auditid, clock)` composite primary key. ### Converting Existing Tables -The enablement script renames the existing table to `table_name_old` and creates a new partitioned table with the same structure. -* **Note**: Data from the old table is NOT automatically migrated to minimize downtime. -* New data flows into the new partitioned table immediately. -* Old data remains accessible in `table_name_old` for manual query or migration if required. +The enablement script guarantees practically zero downtime by automatically renaming the existing tables to `table_name_old` and creating new partitioned tables matching the exact schema. +* **Note**: Data from the old tables is NOT automatically migrated to minimize downtime. +* New data flows into the new partitioned tables immediately. +* Old data remains accessible in `table_name_old` for manual lookup or migration if required. ## Upgrades @@ -203,3 +247,40 @@ When upgrading Zabbix: 1. **Backup**: Ensure a full database backup exists. 2. **Compatibility**: Zabbix upgrade scripts may attempt to `ALTER` tables. PostgreSQL supports `ALTER TABLE` on partitioned tables for adding columns, which propagates to partitions. 3. **Failure Scenarios**: If an upgrade script fails due to partitioning, the table may need to be temporarily reverted or the partition structure manually adjusted. + +--- + +## Appendix: Zabbix Server & Frontend RDS Configuration + +If you are running Zabbix against an external Cloud database (like AWS RDS) via SSL (`verify-full`), you must explicitly configure both the Zabbix Server daemon and the Web Frontend to enforce SSL and locate the downloaded Root CA Certificate. + +**Prerequisite:** Download your cloud provider's root certificate (e.g., `global-bundle.pem`) and place it in a secure location on your Zabbix Server (e.g., `/etc/zabbix/global-bundle.pem`). + +### 1. Zabbix Server (`/etc/zabbix/zabbix_server.conf`) +Ensure the following database lines are active: + +```ini +DBHost=YOUR_RDS_ENDPOINT.amazonaws.com +DBPort=5432 +DBName=zabbix +DBUser=zabbix +DBPassword=your_secure_password +DBTLSConnect=verify_full +DBTLSCAFile=/etc/zabbix/global-bundle.pem +``` + +### 2. Zabbix Frontend PHP (`/etc/zabbix/web/zabbix.conf.php`) +If you used the Web Setup Wizard, it might not configure the Root CA File correctly. Update your config array to enforce encryption and verify the host certificate: + +```php +$DB['TYPE'] = 'POSTGRESQL'; +$DB['SERVER'] = 'YOUR_RDS_ENDPOINT.amazonaws.com'; +$DB['PORT'] = '5432'; +$DB['DATABASE'] = 'zabbix'; +$DB['USER'] = 'zabbix'; +$DB['PASSWORD'] = 'your_secure_password'; +$DB['SCHEMA'] = ''; +$DB['ENCRYPTION'] = true; +$DB['VERIFY_HOST'] = true; +$DB['CA_FILE'] = '/etc/zabbix/global-bundle.pem'; +```