Compare commits

...

22 Commits

Author SHA1 Message Date
Maksym Buz
fb65b2f1e7 Enhance partitioning logic and update Zabbix config template 2026-04-02 17:02:18 +00:00
Maksym Buz
f83adeee8a build(repo): restructure repository into postgresql subdirectory with template and script 2026-03-30 21:18:15 +00:00
Maksym Buz
fc79992dee chore(template): update template compatibility version to 7.0 2026-03-30 21:01:29 +00:00
Maksym Buz
c7ab4f72d3 docs(procedures): refine zabbix monitor user instructions and pg_cron management section 2026-03-30 20:59:05 +00:00
Maksym Buz
06cde67714 docs(procedures): apply finalized user privilege instructions and text tweaks 2026-03-30 20:53:35 +00:00
Maksym Buz
4aded2a7f8 docs: move pg_cron job management instructions to generic maintenance section 2026-03-30 20:46:46 +00:00
Maksym Buz
d8233d53d5 feat(template): remove ODBC template, move agent2 SQL, and track template directory 2026-03-30 19:55:24 +00:00
Maksym Buz
d02a24ebd8 feat(versioning): update versioning scheme to use zabbix major releases (7-1) 2026-03-30 19:51:35 +00:00
Maksym Buz
56a1a0d1dd chore: revert accidental addition of ARCHITECTURE.md to master 2026-03-30 19:42:02 +00:00
Maksym Buz
000498e84d feat/fix(procedures): sync procedures and docs from test branch 2026-03-30 19:41:16 +00:00
Maksym Buz
7e8ee56215 docs: sync readme and gitignore perfectly from test branch 2026-03-30 19:16:25 +00:00
Maksym Buz
3166d516b2 feat(monitoring): expose raw total_size_bytes 2026-03-26 19:48:42 +00:00
Maksym Buz
37e4d534c3 fix(partitioning): add exact Zabbix auditlog indexes with renamed suffix to prevent IF NOT EXISTS collision 2026-03-26 19:35:54 +00:00
Maksym Buz
145fd74a57 Clean up old procedure versions after sequential renaming 2026-03-26 16:00:14 +00:00
Maksym Buz
e5fd9fd1fa Synced procedure scripts from test 2026-03-26 15:57:37 +00:00
Maksym Buz
4c78959d98 build: update .gitignore to exclude credentials and remove QUICKSTART.md 2026-03-20 17:57:36 +00:00
Maksym Buz
8565c99310 fix: move procedures based partitioning README to the correct directory 2026-02-19 18:28:10 +00:00
Maksym Buz
581c59a0ca docs: Add automatic maintenance (cron) scheduling instructions and examples 2026-02-19 18:27:07 +00:00
Maksym Buz
c94445e71b docs: Renamed PARTITIONING.md to README.md 2026-02-19 18:18:30 +00:00
Maksym Buz
99e25f2efb feat: introduce configurable future partition buffer and add monitoring for future partitions. 2026-02-19 17:27:31 +00:00
Maksym Buz
bd15e707cc change: version infor added to partitions schema 2026-02-19 17:02:10 +00:00
Maksym Buz
b1c3cd579d change: Initial version of procedures based partitioning. 2026-02-19 16:25:15 +00:00
11 changed files with 803 additions and 6 deletions

8
.gitignore vendored
View File

@@ -1,6 +1,2 @@
# Docker environment db_credentials
docker/ global-bundle.pem
z_gen_history_data.sql
# Schemas
sql-scripts*/

8
postgresql/README.md Normal file
View File

@@ -0,0 +1,8 @@
# PostgreSQL Partitioning for Zabbix
This directory contains solutions for partitioning a Zabbix database running on PostgreSQL. Partitioning is essential for large Zabbix environments as it eliminates the need for the built-in Zabbix Housekeeper to aggressively delete old data row-by-row, replacing it with instant DDL operations that drop entire daily or monthly chunks.
## Implementations
- **[procedures](procedures/)**: The recommended Declarative (SQL-based) implementation. It uses native PostgreSQL procedures and features like `pg_cron` for entirely self-contained maintenance.
- **[script](script/)**: External script-based management solution. (Coming soon)

View File

@@ -0,0 +1,47 @@
-- ============================================================================
-- Creates the 'partitions' schema and configuration table.
-- Defines the structure for managing Zabbix partitioning.
-- ============================================================================
CREATE SCHEMA IF NOT EXISTS partitions;
-- Configuration table to store partitioning settings per table
CREATE TABLE IF NOT EXISTS partitions.config (
table_name text NOT NULL,
period text NOT NULL,
keep_history interval NOT NULL,
future_partitions integer NOT NULL DEFAULT 5,
last_updated timestamp WITH TIME ZONE DEFAULT (now() AT TIME ZONE 'UTC'),
PRIMARY KEY (table_name)
);
-- Table to track installed version of the partitioning solution
CREATE TABLE IF NOT EXISTS partitions.version (
version text PRIMARY KEY,
installed_at timestamp with time zone DEFAULT (now() AT TIME ZONE 'UTC'),
description text
);
INSERT INTO partitions.version (version, description) VALUES ('7-1', 'Zabbix 7.4 and 7.0 compatible version')
ON CONFLICT (version) DO NOTHING;
-- Default configuration for Zabbix tables (adjust as needed)
-- History tables: Daily partitions, keep 30 days
INSERT INTO partitions.config (table_name, period, keep_history) VALUES
('history', 'day', '30 days'),
('history_uint', 'day', '30 days'),
('history_str', 'day', '30 days'),
('history_log', 'day', '30 days'),
('history_text', 'day', '30 days')
ON CONFLICT (table_name) DO NOTHING;
-- Trends tables: Monthly partitions, keep 12 months
INSERT INTO partitions.config (table_name, period, keep_history) VALUES
('trends', 'month', '12 months'),
('trends_uint', 'month', '12 months')
ON CONFLICT (table_name) DO NOTHING;
-- Auditlog: Monthly partitions, keep 12 months
INSERT INTO partitions.config (table_name, period, keep_history) VALUES
('auditlog', 'month', '12 months')
ON CONFLICT (table_name) DO NOTHING;

View File

@@ -0,0 +1,223 @@
-- ============================================================================
-- Core functions for Zabbix partitioning (Create, Drop, Maintain).
-- ============================================================================
-- Function to check if a partition exists
CREATE OR REPLACE FUNCTION partitions.partition_exists(p_partition_name text)
RETURNS boolean AS $$
BEGIN
RETURN EXISTS (
SELECT 1 FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = p_partition_name
);
END;
$$ LANGUAGE plpgsql;
-- Function to create a partition
CREATE OR REPLACE PROCEDURE partitions.create_partition(
p_parent_table text,
p_start_time timestamp with time zone,
p_end_time timestamp with time zone,
p_period text
) LANGUAGE plpgsql AS $$
DECLARE
v_partition_name text;
v_start_ts bigint;
v_end_ts bigint;
v_suffix text;
v_parent_schema text;
BEGIN
-- Determine the schema of the parent table
SELECT n.nspname INTO v_parent_schema
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = p_parent_table;
IF NOT FOUND THEN
RAISE EXCEPTION 'Parent table % not found', p_parent_table;
END IF;
-- (No changes needed for time here as passed params are already UTC-adjusted in caller)
v_start_ts := extract(epoch from p_start_time)::bigint;
v_end_ts := extract(epoch from p_end_time)::bigint;
IF p_period = 'month' THEN
v_suffix := to_char(p_start_time, 'YYYYMM');
ELSIF p_period LIKE '%hour%' THEN
v_suffix := to_char(p_start_time, 'YYYYMMDDHH24');
ELSE
v_suffix := to_char(p_start_time, 'YYYYMMDD');
END IF;
v_partition_name := p_parent_table || '_p' || v_suffix;
IF NOT partitions.partition_exists(v_partition_name) THEN
EXECUTE format(
'CREATE TABLE %I.%I PARTITION OF %I.%I FOR VALUES FROM (%s) TO (%s)',
v_parent_schema, v_partition_name, v_parent_schema, p_parent_table, v_start_ts, v_end_ts
);
END IF;
END;
$$;
-- Function to drop old partitions
CREATE OR REPLACE PROCEDURE partitions.drop_old_partitions(
p_parent_table text,
p_retention interval,
p_period text
) LANGUAGE plpgsql AS $$
DECLARE
v_cutoff_ts bigint;
v_partition record;
v_partition_date timestamp with time zone;
v_suffix text;
v_partition_schema text;
BEGIN
-- Calculate cutoff timestamp
v_cutoff_ts := extract(epoch from (now() - p_retention))::bigint;
FOR v_partition IN
SELECT
child.relname AS partition_name,
n.nspname AS partition_schema
FROM pg_inherits
JOIN pg_class parent ON pg_inherits.inhparent = parent.oid
JOIN pg_class child ON pg_inherits.inhrelid = child.oid
JOIN pg_namespace n ON child.relnamespace = n.oid
WHERE parent.relname = p_parent_table
LOOP
-- Parse partition suffix to determine age
-- Format: parent_pYYYYMM or parent_pYYYYMMDD
v_suffix := substring(v_partition.partition_name from length(p_parent_table) + 3);
BEGIN
IF length(v_suffix) = 6 THEN -- YYYYMM
v_partition_date := to_timestamp(v_suffix || '01', 'YYYYMMDD') AT TIME ZONE 'UTC';
ELSIF length(v_suffix) = 8 THEN -- YYYYMMDD
v_partition_date := to_timestamp(v_suffix, 'YYYYMMDD') AT TIME ZONE 'UTC';
ELSIF length(v_suffix) = 10 THEN -- YYYYMMDDHH
v_partition_date := to_timestamp(v_suffix, 'YYYYMMDDHH24') AT TIME ZONE 'UTC';
ELSE
CONTINUE; -- Ignore non-matching suffix lengths
END IF;
EXCEPTION WHEN OTHERS THEN
-- Safely ignore parsing errors for oddly named partitions
CONTINUE;
END;
-- Now check retention and execute DROP TABLE (so dropping errors are correctly raised!)
IF length(v_suffix) = 6 THEN -- YYYYMM
IF extract(epoch from (v_partition_date + '1 month'::interval)) < v_cutoff_ts THEN
RAISE NOTICE 'Dropping old partition %', v_partition.partition_name;
EXECUTE format('DROP TABLE %I.%I', v_partition.partition_schema, v_partition.partition_name);
COMMIT; -- Release lock immediately
END IF;
ELSIF length(v_suffix) = 8 THEN -- YYYYMMDD
-- If period is weekly, the partition spans an entire week. Otherwise, it spans one day.
IF p_period = 'week' THEN
IF extract(epoch from (v_partition_date + '1 week'::interval)) < v_cutoff_ts THEN
RAISE NOTICE 'Dropping old partition %', v_partition.partition_name;
EXECUTE format('DROP TABLE %I.%I', v_partition.partition_schema, v_partition.partition_name);
COMMIT; -- Release lock immediately
END IF;
ELSE
IF extract(epoch from (v_partition_date + '1 day'::interval)) < v_cutoff_ts THEN
RAISE NOTICE 'Dropping old partition %', v_partition.partition_name;
EXECUTE format('DROP TABLE %I.%I', v_partition.partition_schema, v_partition.partition_name);
COMMIT; -- Release lock immediately
END IF;
END IF;
ELSIF length(v_suffix) = 10 THEN -- YYYYMMDDHH
IF extract(epoch from (v_partition_date + p_period::interval)) < v_cutoff_ts THEN
RAISE NOTICE 'Dropping old partition %', v_partition.partition_name;
EXECUTE format('DROP TABLE %I.%I', v_partition.partition_schema, v_partition.partition_name);
COMMIT; -- Release lock immediately
END IF;
END IF;
END LOOP;
END;
$$;
-- MAIN Procedure to maintain a single table
CREATE OR REPLACE PROCEDURE partitions.maintain_table(
p_table_name text,
p_period text,
p_keep_history interval,
p_future_partitions integer DEFAULT 5
) LANGUAGE plpgsql AS $$
DECLARE
v_start_time timestamp with time zone;
v_period_interval interval;
i integer;
v_past_iterations integer;
BEGIN
IF p_period = 'day' THEN
v_period_interval := '1 day'::interval;
v_start_time := date_trunc('day', now() AT TIME ZONE 'UTC');
-- Calculate how many past days cover the retention period (86400 seconds = 1 day)
v_past_iterations := ceil(extract(epoch from p_keep_history) / 86400)::integer;
ELSIF p_period = 'week' THEN
v_period_interval := '1 week'::interval;
v_start_time := date_trunc('week', now() AT TIME ZONE 'UTC');
-- 604800 seconds = 1 week
v_past_iterations := ceil(extract(epoch from p_keep_history) / 604800)::integer;
ELSIF p_period = 'month' THEN
v_period_interval := '1 month'::interval;
v_start_time := date_trunc('month', now() AT TIME ZONE 'UTC');
-- Approximate 30 days per month (2592000 seconds)
v_past_iterations := ceil(extract(epoch from p_keep_history) / 2592000)::integer;
ELSIF p_period LIKE '%hour%' THEN
v_period_interval := p_period::interval;
v_start_time := date_trunc('hour', now() AT TIME ZONE 'UTC');
v_past_iterations := ceil(extract(epoch from p_keep_history) / extract(epoch from v_period_interval))::integer;
ELSE
RAISE EXCEPTION 'Unsupported partitioning period: %', p_period;
END IF;
-- 1. Create Future Partitions (Current + Buffer)
FOR i IN 0..p_future_partitions LOOP
CALL partitions.create_partition(
p_table_name,
v_start_time + (i * v_period_interval),
v_start_time + ((i + 1) * v_period_interval),
p_period
);
COMMIT; -- Release lock immediately
END LOOP;
-- 2. Create Past Partitions (Covering retention period)
IF v_past_iterations > 0 THEN
FOR i IN 1..v_past_iterations LOOP
CALL partitions.create_partition(
p_table_name,
v_start_time - (i * v_period_interval),
v_start_time - ((i - 1) * v_period_interval),
p_period
);
COMMIT; -- Release lock immediately
END LOOP;
END IF;
-- 3. Drop Old Partitions
CALL partitions.drop_old_partitions(p_table_name, p_keep_history, p_period);
-- 4. Update Metadata
UPDATE partitions.config SET last_updated = now() WHERE table_name = p_table_name;
END;
$$;
-- Global Maintenance Procedure
CREATE OR REPLACE PROCEDURE partitions.run_maintenance()
LANGUAGE plpgsql AS $$
DECLARE
v_row record;
BEGIN
FOR v_row IN SELECT * FROM partitions.config LOOP
CALL partitions.maintain_table(v_row.table_name, v_row.period, v_row.keep_history, v_row.future_partitions);
END LOOP;
END;
$$;

View File

@@ -0,0 +1,59 @@
-- ============================================================================
-- Converts standard Zabbix tables to Partitioned tables.
-- WARNING: This renames existing tables to *_old.
-- ============================================================================
DO $$
DECLARE
v_row record;
v_table text;
v_old_table text;
v_pk_sql text;
v_schema text;
BEGIN
FOR v_row IN SELECT * FROM partitions.config LOOP
v_table := v_row.table_name;
v_old_table := v_table || '_old';
-- Determine schema
SELECT n.nspname INTO v_schema
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = v_table;
IF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'r') THEN
RAISE NOTICE 'Converting table % to partitioned table...', v_table;
-- 1. Rename existing table
EXECUTE format('ALTER TABLE %I.%I RENAME TO %I', v_schema, v_table, v_old_table);
-- 2. Create new partitioned table (handling auditlog PK uniquely)
IF v_table = 'auditlog' THEN
EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING DEFAULTS INCLUDING COMMENTS) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table);
EXECUTE format('ALTER TABLE %I.%I ADD PRIMARY KEY (auditid, clock)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_p_1 ON %I.%I (userid, clock)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_p_2 ON %I.%I (clock)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_p_3 ON %I.%I (resourcetype, resourceid)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_p_4 ON %I.%I (recordsetid)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_p_5 ON %I.%I (ip)', v_schema, v_table);
ELSE
EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table);
END IF;
-- 3. Create initial partitions
RAISE NOTICE 'Creating initial partitions for %...', v_table;
CALL partitions.maintain_table(v_table, v_row.period, v_row.keep_history, v_row.future_partitions);
-- Optional: Migrate existing data
-- EXECUTE format('INSERT INTO %I.%I SELECT * FROM %I.%I', v_schema, v_table, v_schema, v_old_table);
ELSIF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'p') THEN
RAISE NOTICE 'Table % is already partitioned. Skipping conversion.', v_table;
-- Just run maintenance to ensure partitions exist
CALL partitions.run_maintenance();
ELSE
RAISE WARNING 'Table % not found!', v_table;
END IF;
END LOOP;
END $$;

View File

@@ -0,0 +1,33 @@
-- ============================================================================
-- Creates a view to monitor partition status and sizes.
-- ============================================================================
DROP VIEW IF EXISTS partitions.monitoring;
CREATE VIEW partitions.monitoring AS
SELECT
parent.relname AS parent_table,
c.table_name,
c.period,
c.keep_history,
count(child.relname) AS partition_count,
count(child.relname) FILTER (
WHERE
(c.period = 'day' AND child.relname > (parent.relname || '_p' || to_char(now() AT TIME ZONE 'UTC', 'YYYYMMDD')))
OR
(c.period = 'month' AND child.relname > (parent.relname || '_p' || to_char(now() AT TIME ZONE 'UTC', 'YYYYMM')))
OR
(c.period = 'week' AND child.relname > (parent.relname || '_p' || to_char(date_trunc('week', now() AT TIME ZONE 'UTC'), 'YYYYMMDD')))
OR
(c.period LIKE '%hour%' AND child.relname > (parent.relname || '_p' || to_char(now() AT TIME ZONE 'UTC', 'YYYYMMDDHH24')))
) AS future_partitions,
sum(pg_total_relation_size(child.oid)) AS total_size_bytes,
pg_size_pretty(sum(pg_total_relation_size(child.oid))) AS total_size,
min(child.relname) AS oldest_partition,
max(child.relname) AS newest_partition,
c.last_updated
FROM partitions.config c
JOIN pg_class parent ON parent.relname = c.table_name
LEFT JOIN pg_inherits ON pg_inherits.inhparent = parent.oid
LEFT JOIN pg_class child ON pg_inherits.inhrelid = child.oid
WHERE parent.relkind = 'p' -- Only partitioned tables
GROUP BY parent.relname, c.table_name, c.period, c.keep_history, c.last_updated;

View File

@@ -0,0 +1,251 @@
# PostgreSQL Partitioning for Zabbix
This is the declarative partitioning implementation for Zabbix `history*`, `trends*`, and `auditlog` tables on PostgreSQL. This solution is intended to replace standard Zabbix housekeeping for the configured tables. Partitioning is very useful for large environments because it completely eliminates the housekeeper from the process. Instead of huge DELETE queries on several million rows, fast DDL queries (ALTER TABLE) are executed, which drop an entire partition.
> [!WARNING]
> 1. **Data Visibility**: After enabling partitioning, old data remains in `*_old` tables and is **NOT visible** in Zabbix. You must migrate data manually if needed.
> 2. **Disable Housekeeping**: You **MUST** disable Zabbix Housekeeper for History and Trends in *Administration -> Housekeeping*.
## Table of Contents
- [Architecture](#architecture)
- [Components](#components)
- [Installation](#installation)
- [Configuration](#configuration)
- [Modifying Retention](#modifying-retention)
- [Maintenance](#maintenance)
- [Scheduling Maintenance](#scheduling-maintenance)
- [Monitoring & Permissions](#monitoring--permissions)
- [Versioning](#versioning)
- [Least Privilege Access (`zbxpart_monitor`)](#least-privilege-access-zbxpart_monitor)
- [Implementation Details](#implementation-details)
- [`auditlog` Table](#auditlog-table)
- [Converting Existing Tables](#converting-existing-tables)
- [Upgrades](#upgrades)
## Architecture
The solution uses PostgreSQL native declarative partitioning (`PARTITION BY RANGE`).
All procedures, information, statistics and configuration are stored in the `partitions` schema to maintain full separation from Zabbix schema.
### Components
1. **Configuration Table**: `partitions.config` defines retention policies.
2. **Maintenance Procedure**: `partitions.run_maintenance()` manages partition lifecycle.
3. **Monitoring View**: `partitions.monitoring` provides system state visibility.
4. **Version Table**: `partitions.version` provides information about installed version of the partitioning solution.
## Installation
The installation is performed by executing the SQL procedures in the following order:
1. Initialize schema (`00_schema_create.sql`).
2. Install maintenance procedures (`01_maintenance.sql`).
3. Enable partitioning on tables (`02_enable_partitioning.sql`).
4. Install monitoring views (`03_monitoring_view.sql`).
**Command Example:**
You can deploy these scripts manually against your Zabbix database using `psql`. Navigate to the `procedures/` directory and run:
```bash
# Connect as the zabbix database user
export PGPASSWORD="your_zabbix_password"
DB_HOST="localhost" # Or your DB endpoint
DB_NAME="zabbix"
DB_USER="zbxpart_admin"
for script in 00_schema_create.sql 01_maintenance.sql 02_enable_partitioning.sql 03_monitoring_view.sql; do
echo "Applying $script..."
psql -h $DB_HOST -U $DB_USER -d $DB_NAME -f "$script"
done
```
## Configuration
Partitioning policies are defined in the `partitions.config` table.
| Column | Type | Description |
|--------|------|-------------|
| `table_name` | text | Name of the Zabbix table (e.g., `history`, `trends`). |
| `period` | text | Partition interval: `day`, `week`, or `month`. |
| `keep_history` | interval | Data retention period (e.g., `30 days`, `12 months`). |
| `future_partitions` | integer | Number of future partitions to pre-create (buffer). Default: `5`. |
| `last_updated` | timestamp | Timestamp of the last successful maintenance run. |
### Modifying Retention
To change the retention period for a table, update the configuration:
```sql
UPDATE partitions.config
SET keep_history = '60 days'
WHERE table_name = 'history';
```
## Maintenance
The maintenance procedure `partitions.run_maintenance()` is responsible for:
1. Creating future partitions (current period + `future_partitions` buffer).
2. Creating past partitions (backward coverage based on `keep_history`).
3. Dropping partitions older than `keep_history`.
This procedure should be scheduled to run periodically (e.g., daily via `pg_cron` or system cron).
```sql
CALL partitions.run_maintenance();
```
### Scheduling Maintenance
To ensure partitions are created in advance and old data is cleaned up, the maintenance procedure should be scheduled to run automatically.
It is recommended to run the maintenance **twice a day** and not in round hours because of the way housekeeper works (e.g., at 05:30 and 23:30).
* **Primary Run**: Creates new future partitions and drops old ones.
* **Secondary Run**: Acts as a safety check. Since the procedure is idempotent (safe to run multiple times), a second run ensures everything is consistent if the first run failed or was interrupted.
You can schedule this using one of the following methods:
#### Option 1: `pg_cron` (Recommended)
`pg_cron` is a cron-based job scheduler that runs directly inside the database as an extension. It is very useful for cloud based databases like AWS RDS, Aurora, Azure, GCP, because it handles the authentication/connections securely for you automatically and its available as a managed extension. You do **not** need to install OS packages or configure anything. Simply modify the RDS Parameter Group to include `shared_preload_libraries = 'pg_cron'` and `cron.database_name = 'zabbix'`, reboot the instance, and execute `CREATE EXTENSION pg_cron;`.
**Setup `pg_cron` (Self-Hosted):**
1. Install the package via your OS package manager (e.g., `postgresql-15-cron` on Debian/Ubuntu, or `pg_cron_15` on RHEL/CentOS).
2. Configure it modifying `postgresql.conf`:
```ini
shared_preload_libraries = 'pg_cron'
cron.database_name = 'zabbix'
```
3. Restart PostgreSQL:
```bash
systemctl restart postgresql
```
4. Connect to your `zabbix` database as a superuser and create the extension:
```sql
CREATE EXTENSION pg_cron;
```
5. Schedule the job to run:
```sql
SELECT cron.schedule('zabbix_partition_maintenance', '30 5,23 * * *', 'CALL partitions.run_maintenance();');
```
**⚠️ Troubleshooting `pg_cron` Connection Errors:**
If your cron jobs fail to execute and you see `FATAL: password authentication failed` in your PostgreSQL logs, it is because `pg_cron` attempts to connect via TCP (`localhost`) by default, which usually requires a password.
**Solution A: Use Local Unix Sockets (Easier)**
Edit your `postgresql.conf` to force `pg_cron` to use the local Unix socket (which uses passwordless `peer` authentication):
```ini
cron.host = '/var/run/postgresql' # Or '/tmp', depending on your OS
```
*(Restart PostgreSQL after making this change).*
**Solution B: Provide a Password (`.pgpass`)**
If you *must* connect via TCP with a specific database user and password, the `pg_cron` background worker needs a way to authenticate. You provide this by creating a `.pgpass` file for the OS `postgres` user.
1. Switch to the OS database user:
```bash
sudo su - postgres
```
2. Create or append your database credentials to `~/.pgpass` using the format `hostname:port:database:username:password`:
```bash
echo "localhost:5432:zabbix:zabbix:my_secure_password" >> ~/.pgpass
```
3. Set strict permissions (PostgreSQL will ignore the file if permissions are too loose):
```bash
chmod 0600 ~/.pgpass
```
**Managing `pg_cron` Jobs:**
If you need to verify or manage your scheduled jobs (run as superuser):
- To **list all active schedules**: `SELECT * FROM cron.job;`
- To **view execution logs/history**: `SELECT * FROM cron.job_run_details;`
- To **remove/unschedule** the job: `SELECT cron.unschedule('zabbix_partition_maintenance');`
#### Option 2: Systemd Timers
Systemd timers provide better logging and error handling properties than standard cron.
1. Create a service file **`/etc/systemd/system/zabbix-partitions.service`**:
```ini
[Unit]
Description=Zabbix PostgreSQL Partition Maintenance
After=network.target postgresql.service
[Service]
Type=oneshot
User=postgres
ExecStart=/usr/bin/psql -d zabbix -c "CALL partitions.run_maintenance();"
```
2. Create a timer file **`/etc/systemd/system/zabbix-partitions.timer`**:
```ini
[Unit]
Description=Run Zabbix Partition Maintenance Twice Daily
[Timer]
OnCalendar=*-*-* 05:30:00
OnCalendar=*-*-* 23:30:00
Persistent=true
[Install]
WantedBy=timers.target
```
3. Enable and start the timer:
```bash
systemctl daemon-reload
systemctl enable --now zabbix-partitions.timer
```
#### Option 3: System Cron (`crontab`)
Standard system cron is a simple fallback.
**Example Crontab Entry (`crontab -e`):**
```bash
# Run Zabbix partition maintenance twice daily (5:30 AM and 5:30 PM)
30 5,23 * * * psql -U zabbix -d zabbix -c "CALL partitions.run_maintenance();" >> /var/log/zabbix_maintenance.log 2>&1
```
**Docker Environment:**
If running in Docker, you can execute it via the host's cron by targeting the container:
```bash
30 5,23 * * * docker exec zabbix-db-test psql -U zabbix -d zabbix -c "CALL partitions.run_maintenance();"
```
## Monitoring & Permissions
System state can be monitored via the `partitions.monitoring` view. It includes the information about number of future partitions and the time since the last maintenance run. Plus it includes the total size of the partitioned table in bytes.
```sql
SELECT * FROM partitions.monitoring;
```
### Versioning
To check the installed version of the partitioning solution:
```sql
SELECT * FROM partitions.version ORDER BY installed_at DESC LIMIT 1;
```
### Least Privilege Access (`zbxpart_monitor`)
For monitoring purposes, it is highly recommended to create a dedicated user with read-only access to the monitoring view instead of using the `zbxpart_admin` owner account.
```sql
CREATE USER zbxpart_monitor WITH PASSWORD 'secure_password';
GRANT USAGE ON SCHEMA partitions TO zbxpart_monitor;
GRANT SELECT ON partitions.monitoring TO zbxpart_monitor;
```
> [!WARNING]
> Because `03_monitoring_view.sql` uses a `DROP VIEW` command to apply updates, re-running the script will destroy all previously assigned `GRANT` permissions. If you ever update the view script, you **must** manually re-run the `GRANT SELECT` command above to restore access for the `zbxpart_monitor` user!
## Implementation Details
### `auditlog` Table
The standard Zabbix `auditlog` table has a primary key on `(auditid)`. Partitioning by `clock` requires the partition key to be part of the primary key.
To prevent placing a heavy, blocking lock on an `auditlog` table to alter its primary key, the enablement script (`02_enable_partitioning.sql`) detects it and handles it exactly like the history tables: it automatically renames the live, existing table to `auditlog_old`, and instantly creates a brand new, empty partitioned `auditlog` table pre-configured with the required `(auditid, clock)` composite primary key.
### Converting Existing Tables
The enablement script guarantees practically zero downtime by automatically renaming the existing tables to `table_name_old` and creating new partitioned tables matching the exact schema.
* **Note**: Data from the old tables is NOT automatically migrated to minimize downtime.
* New data flows into the new partitioned tables immediately.
* Old data remains accessible in `table_name_old` for manual lookup or migration if required.
## Upgrades
When upgrading Zabbix:
1. **Backup**: Ensure a full database backup exists.
2. **Compatibility**: Zabbix upgrade scripts may attempt to `ALTER` tables. PostgreSQL supports `ALTER TABLE` on partitioned tables for adding columns, which propagates to partitions.
3. **Failure Scenarios**: If an upgrade script fails due to partitioning, the table may need to be temporarily reverted or the partition structure manually adjusted.

View File

@@ -0,0 +1,3 @@
# Script-based Partitioning
(Coming soon)

View File

@@ -0,0 +1,32 @@
# Zabbix PostgreSQL Partitioning Monitoring
This template relies on Zabbix Agent 2 and its PostgreSQL plugin. It allows you to monitor the health of your partitioned PostgreSQL database tables. It uses a single master item to pull all metrics in bulk over a single database connection, dynamically distributing the numbers to Zabbix using Dependent Items.
There are three item prototypes:
1. Future Partitions Buffer: Number of future partitions to be created
2. Total Size Bytes: Total size of the partitioned table in bytes
3. Time Since Last Maintenance: Time since the last maintenance script was run
They allows to monitor all the critical metrics and also they do have a triggers, which will create a problem in case something is wrong with the partitioning.
### Setup
1. Copy the SQL file (`template/partitions.get_all.sql`) into a directory on your Agent machine. E.g., `/etc/zabbix/zabbix_agent2.d/postgresql/`.
2. Install zabbix-agent2-plugin-postgresql package.
3. Open your Plugin configuration file `/etc/zabbix/zabbix_agent2.d/plugins.d/postgresql.conf` and add these lines to establish your custom query module AND a secure named session (e.g., `AWS_RDS`). Adjust the parameters to match your environment. You can use uri instead of named session if you want. In this case you will need to modify the item keys to use the correct parameters.
```ini
# 1. Enable Loadable Custom Queries (Mandatory in Zabbix 7.4+)
Plugins.PostgreSQL.CustomQueriesPath=/etc/zabbix/zabbix_agent2.d/postgresql/
Plugins.PostgreSQL.CustomQueriesEnabled=true
# 2. Establish a Secure Backend Session
Plugins.PostgreSQL.Sessions.AWS_RDS.Uri=tcp://your-cluster-endpoint.amazonaws.com:5432
Plugins.PostgreSQL.Sessions.AWS_RDS.User=zabbix
Plugins.PostgreSQL.Sessions.AWS_RDS.Password=<YOUR_ZABBIX_PASSWORD>
Plugins.PostgreSQL.Sessions.AWS_RDS.TLSConnect=verify_full
Plugins.PostgreSQL.Sessions.AWS_RDS.TLSCAFile=/etc/zabbix/global-bundle.pem
```
4. Restart your agent to apply the changes:
```bash
systemctl restart zabbix-agent2
```
5. Import the `zbx_pg_partitions_monitor_agent2.yaml` template into your Zabbix.
6. Link the template to your Host, navigate to its "Macros" tab, and define the needed macros (in this case it's just named session):
* `{$PG.CONNSTRING.AGENT2}`: `AWS_RDS`

View File

@@ -0,0 +1,8 @@
SELECT
table_name,
period,
keep_history::text AS keep_history,
future_partitions,
total_size_bytes,
EXTRACT(EPOCH FROM (now() - last_updated)) AS age_seconds
FROM partitions.monitoring;

View File

@@ -0,0 +1,137 @@
zabbix_export:
version: '7.0'
template_groups:
- uuid: 748ad4d098d447d492bb935c907f652f
name: Templates/Databases
templates:
- uuid: a1d5f8c3b2e44a7c9d6b1f2e8a3c5b4d
template: 'PostgreSQL Partitioning by Zabbix Agent 2'
name: 'PostgreSQL Partitioning by Zabbix Agent 2'
description: 'Monitors the custom partitions.monitoring view via the native Zabbix Agent 2 PostgreSQL plugin. Using a single master to minimize the DB connections and load.'
vendor:
name: Zabbix Support
version: 7.0-0
groups:
- name: Templates/Databases
items:
- uuid: b8c7d6e5f4a34b2c8d2e3f4a5b6c7d8e
name: 'PostgreSQL: Get Partitioning Data'
key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]'
history: '0'
value_type: TEXT
description: 'Master item that queries all partition statistics in a single bulk JSON sequence.'
tags:
- tag: component
value: raw
discovery_rules:
- uuid: b7c2a5d8f1e44b9c8a3f6d2e1c5b4a7d
name: 'Partitioned Tables Discovery'
type: DEPENDENT
key: db.partitions.discovery.dependent
item_prototypes:
- uuid: f1a2b3c4d5e64f7a9b8c7d6e5f4a3b2c
name: '{#TABLE_NAME}: Time Since Last Maintenance'
type: DEPENDENT
key: 'db.partitions.age["{#TABLE_NAME}"]'
units: s
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.table_name == "{#TABLE_NAME}")].age_seconds.first()'
master_item:
key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]'
tags:
- tag: metric
value: age
- tag: table
value: '{#TABLE_NAME}'
trigger_prototypes:
- uuid: a9b8c7d6e5f44a3b8c1d2e3f4a5b6c7d
expression: 'last(/PostgreSQL Partitioning by Zabbix Agent 2/db.partitions.age["{#TABLE_NAME}"])>{$PARTITIONS.AGE}'
name: 'Table {#TABLE_NAME}: Maintenance script has not run successfully in over 48 hours'
priority: WARNING
- uuid: c4b9e2a5f1d84c7a9f3b6d1e5a2c8b4d
name: '{#TABLE_NAME}: Future Partitions Buffer'
type: DEPENDENT
key: 'db.partitions.future["{#TABLE_NAME}"]'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.table_name == "{#TABLE_NAME}")].future_partitions.first()'
master_item:
key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]'
tags:
- tag: metric
value: partitions
- tag: table
value: '{#TABLE_NAME}'
trigger_prototypes:
- uuid: d6e3a5c8b2f14d9e8a7b6c5d4e3f2a1b
expression: 'last(/PostgreSQL Partitioning by Zabbix Agent 2/db.partitions.future["{#TABLE_NAME}"])<{$PARTITIONS.LOW}'
name: 'Table {#TABLE_NAME}: Future partitions buffer is critically low (< 2)'
priority: HIGH
- uuid: e8f2a1b3c4d54e6f9a8b7c6d5e4f3a2b
name: '{#TABLE_NAME}: Total Size Bytes'
type: DEPENDENT
key: 'db.partitions.size["{#TABLE_NAME}"]'
units: B
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.table_name == "{#TABLE_NAME}")].total_size_bytes.first()'
master_item:
key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]'
tags:
- tag: metric
value: size
- tag: table
value: '{#TABLE_NAME}'
- uuid: ffa2b3c4d5e64f7a9b8c7d6e5f4a1001
name: '{#TABLE_NAME}: Configured Partition Period'
type: DEPENDENT
key: 'db.partitions.period["{#TABLE_NAME}"]'
value_type: CHAR
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.table_name == "{#TABLE_NAME}")].period.first()'
master_item:
key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]'
tags:
- tag: metric
value: config
- tag: table
value: '{#TABLE_NAME}'
- uuid: ffa2b3c4d5e64f7a9b8c7d6e5f4a1002
name: '{#TABLE_NAME}: Configured Retention (Keep History)'
type: DEPENDENT
key: 'db.partitions.retention["{#TABLE_NAME}"]'
value_type: CHAR
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.table_name == "{#TABLE_NAME}")].keep_history.first()'
master_item:
key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]'
tags:
- tag: metric
value: config
- tag: table
value: '{#TABLE_NAME}'
master_item:
key: 'pgsql.custom.query["{$PG.CONNSTRING.AGENT2}",,,"{$PG.DBNAME}","partitions.get_all"]'
lld_macro_paths:
- lld_macro: '{#TABLE_NAME}'
path: $.table_name
macros:
- macro: '{$PARTITIONS.AGE}'
value: 24h
description: 'The maximum period during which no new partitions may be created'
- macro: '{$PARTITIONS.LOW}'
value: '2'
description: 'The minimum number of partitions that must exist in the future'
- macro: '{$PG.CONNSTRING.AGENT2}'
value: AWS_RDS
description: 'Session name or URI of the PostgreSQL instance'
- macro: '{$PG.DBNAME}'
value: zabbix