feat: enterprise audit fixes (schema resolution, race conditions, documentation)

This commit is contained in:
Maksym Buz
2026-04-30 10:50:36 +00:00
parent fb65b2f1e7
commit 7305b79943
9 changed files with 399 additions and 52 deletions

View File

@@ -22,7 +22,7 @@ CREATE TABLE IF NOT EXISTS partitions.version (
description text description text
); );
INSERT INTO partitions.version (version, description) VALUES ('7-1', 'Zabbix 7.4 and 7.0 compatible version') INSERT INTO partitions.version (version, description) VALUES ('7-2', 'Added housekeeper task interceptor trigger to drop tasks for partitioned tables')
ON CONFLICT (version) DO NOTHING; ON CONFLICT (version) DO NOTHING;
-- Default configuration for Zabbix tables (adjust as needed) -- Default configuration for Zabbix tables (adjust as needed)
@@ -32,9 +32,16 @@ INSERT INTO partitions.config (table_name, period, keep_history) VALUES
('history_uint', 'day', '30 days'), ('history_uint', 'day', '30 days'),
('history_str', 'day', '30 days'), ('history_str', 'day', '30 days'),
('history_log', 'day', '30 days'), ('history_log', 'day', '30 days'),
('history_text', 'day', '30 days') ('history_text', 'day', '30 days'),
('history_bin', 'day', '30 days')
ON CONFLICT (table_name) DO NOTHING; ON CONFLICT (table_name) DO NOTHING;
-- Zabbix 8.0+ only: Uncomment the following lines if running Zabbix 8.0 or later
-- INSERT INTO partitions.config (table_name, period, keep_history) VALUES
-- ('history_json', 'day', '30 days')
-- ON CONFLICT (table_name) DO NOTHING;
-- Trends tables: Monthly partitions, keep 12 months -- Trends tables: Monthly partitions, keep 12 months
INSERT INTO partitions.config (table_name, period, keep_history) VALUES INSERT INTO partitions.config (table_name, period, keep_history) VALUES
('trends', 'month', '12 months'), ('trends', 'month', '12 months'),

View File

@@ -2,14 +2,15 @@
-- Core functions for Zabbix partitioning (Create, Drop, Maintain). -- Core functions for Zabbix partitioning (Create, Drop, Maintain).
-- ============================================================================ -- ============================================================================
-- Function to check if a partition exists -- Function to check if a partition exists in a specific schema
CREATE OR REPLACE FUNCTION partitions.partition_exists(p_partition_name text) CREATE OR REPLACE FUNCTION partitions.partition_exists(p_partition_name text, p_schema text)
RETURNS boolean AS $$ RETURNS boolean AS $$
BEGIN BEGIN
RETURN EXISTS ( RETURN EXISTS (
SELECT 1 FROM pg_class c SELECT 1 FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = p_partition_name WHERE c.relname = p_partition_name
AND n.nspname = p_schema
); );
END; END;
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;
@@ -32,7 +33,7 @@ BEGIN
SELECT n.nspname INTO v_parent_schema SELECT n.nspname INTO v_parent_schema
FROM pg_class c FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = p_parent_table; WHERE c.relname = p_parent_table AND pg_table_is_visible(c.oid);
IF NOT FOUND THEN IF NOT FOUND THEN
RAISE EXCEPTION 'Parent table % not found', p_parent_table; RAISE EXCEPTION 'Parent table % not found', p_parent_table;
@@ -51,11 +52,20 @@ BEGIN
v_partition_name := p_parent_table || '_p' || v_suffix; v_partition_name := p_parent_table || '_p' || v_suffix;
IF NOT partitions.partition_exists(v_partition_name) THEN IF NOT partitions.partition_exists(v_partition_name, v_parent_schema) THEN
EXECUTE format( BEGIN
'CREATE TABLE %I.%I PARTITION OF %I.%I FOR VALUES FROM (%s) TO (%s)', EXECUTE format(
v_parent_schema, v_partition_name, v_parent_schema, p_parent_table, v_start_ts, v_end_ts 'CREATE TABLE %I.%I PARTITION OF %I.%I FOR VALUES FROM (%s) TO (%s)',
); v_parent_schema, v_partition_name, v_parent_schema, p_parent_table, v_start_ts, v_end_ts
);
EXCEPTION
WHEN invalid_object_definition THEN
-- Ignore overlap errors (e.g., when transitioning from daily to hourly partitioning)
RAISE NOTICE 'Partition % overlaps with an existing partition. Skipping.', v_partition_name;
WHEN duplicate_table THEN
-- Ignore race condition: another process created the partition concurrently
RAISE NOTICE 'Partition % already exists (concurrent creation). Skipping.', v_partition_name;
END;
END IF; END IF;
END; END;
$$; $$;
@@ -84,7 +94,7 @@ BEGIN
JOIN pg_class parent ON pg_inherits.inhparent = parent.oid JOIN pg_class parent ON pg_inherits.inhparent = parent.oid
JOIN pg_class child ON pg_inherits.inhrelid = child.oid JOIN pg_class child ON pg_inherits.inhrelid = child.oid
JOIN pg_namespace n ON child.relnamespace = n.oid JOIN pg_namespace n ON child.relnamespace = n.oid
WHERE parent.relname = p_parent_table WHERE parent.relname = p_parent_table AND pg_table_is_visible(parent.oid)
LOOP LOOP
-- Parse partition suffix to determine age -- Parse partition suffix to determine age
-- Format: parent_pYYYYMM or parent_pYYYYMMDD -- Format: parent_pYYYYMM or parent_pYYYYMMDD
@@ -92,11 +102,11 @@ BEGIN
BEGIN BEGIN
IF length(v_suffix) = 6 THEN -- YYYYMM IF length(v_suffix) = 6 THEN -- YYYYMM
v_partition_date := to_timestamp(v_suffix || '01', 'YYYYMMDD') AT TIME ZONE 'UTC'; v_partition_date := timezone('UTC', to_timestamp(v_suffix || '01', 'YYYYMMDD')::timestamp without time zone);
ELSIF length(v_suffix) = 8 THEN -- YYYYMMDD ELSIF length(v_suffix) = 8 THEN -- YYYYMMDD
v_partition_date := to_timestamp(v_suffix, 'YYYYMMDD') AT TIME ZONE 'UTC'; v_partition_date := timezone('UTC', to_timestamp(v_suffix, 'YYYYMMDD')::timestamp without time zone);
ELSIF length(v_suffix) = 10 THEN -- YYYYMMDDHH ELSIF length(v_suffix) = 10 THEN -- YYYYMMDDHH
v_partition_date := to_timestamp(v_suffix, 'YYYYMMDDHH24') AT TIME ZONE 'UTC'; v_partition_date := timezone('UTC', to_timestamp(v_suffix, 'YYYYMMDDHH24')::timestamp without time zone);
ELSE ELSE
CONTINUE; -- Ignore non-matching suffix lengths CONTINUE; -- Ignore non-matching suffix lengths
END IF; END IF;
@@ -153,25 +163,25 @@ DECLARE
BEGIN BEGIN
IF p_period = 'day' THEN IF p_period = 'day' THEN
v_period_interval := '1 day'::interval; v_period_interval := '1 day'::interval;
v_start_time := date_trunc('day', now() AT TIME ZONE 'UTC'); v_start_time := date_trunc('day', now(), 'UTC');
-- Calculate how many past days cover the retention period (86400 seconds = 1 day) -- Calculate how many past days cover the retention period (86400 seconds = 1 day)
v_past_iterations := ceil(extract(epoch from p_keep_history) / 86400)::integer; v_past_iterations := ceil(extract(epoch from p_keep_history) / 86400)::integer;
ELSIF p_period = 'week' THEN ELSIF p_period = 'week' THEN
v_period_interval := '1 week'::interval; v_period_interval := '1 week'::interval;
v_start_time := date_trunc('week', now() AT TIME ZONE 'UTC'); v_start_time := date_trunc('week', now(), 'UTC');
-- 604800 seconds = 1 week -- 604800 seconds = 1 week
v_past_iterations := ceil(extract(epoch from p_keep_history) / 604800)::integer; v_past_iterations := ceil(extract(epoch from p_keep_history) / 604800)::integer;
ELSIF p_period = 'month' THEN ELSIF p_period = 'month' THEN
v_period_interval := '1 month'::interval; v_period_interval := '1 month'::interval;
v_start_time := date_trunc('month', now() AT TIME ZONE 'UTC'); v_start_time := date_trunc('month', now(), 'UTC');
-- Approximate 30 days per month (2592000 seconds) -- Approximate 30 days per month (2592000 seconds)
v_past_iterations := ceil(extract(epoch from p_keep_history) / 2592000)::integer; v_past_iterations := ceil(extract(epoch from p_keep_history) / 2592000)::integer;
ELSIF p_period LIKE '%hour%' THEN ELSIF p_period LIKE '%hour%' THEN
v_period_interval := p_period::interval; v_period_interval := p_period::interval;
v_start_time := date_trunc('hour', now() AT TIME ZONE 'UTC'); v_start_time := to_timestamp(floor(extract(epoch from now()) / extract(epoch from v_period_interval)) * extract(epoch from v_period_interval));
v_past_iterations := ceil(extract(epoch from p_keep_history) / extract(epoch from v_period_interval))::integer; v_past_iterations := ceil(extract(epoch from p_keep_history) / extract(epoch from v_period_interval))::integer;
ELSE ELSE
@@ -221,3 +231,14 @@ BEGIN
END LOOP; END LOOP;
END; END;
$$; $$;
-- Trigger function to silently discard housekeeper tasks for partitioned tables
CREATE OR REPLACE FUNCTION partitions.housekeeper_insert_trigger()
RETURNS TRIGGER AS $$
BEGIN
IF EXISTS (SELECT 1 FROM partitions.config WHERE table_name = NEW.tablename) THEN
RETURN NULL;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

View File

@@ -19,10 +19,10 @@ BEGIN
SELECT n.nspname INTO v_schema SELECT n.nspname INTO v_schema
FROM pg_class c FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = v_table; WHERE c.relname = v_table AND pg_table_is_visible(c.oid);
IF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'r') THEN IF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'r' AND pg_table_is_visible(oid)) THEN
RAISE NOTICE 'Converting table % to partitioned table...', v_table; RAISE NOTICE 'Converting table % to partitioned table...', v_table;
-- 1. Rename existing table -- 1. Rename existing table
@@ -48,12 +48,37 @@ BEGIN
-- Optional: Migrate existing data -- Optional: Migrate existing data
-- EXECUTE format('INSERT INTO %I.%I SELECT * FROM %I.%I', v_schema, v_table, v_schema, v_old_table); -- EXECUTE format('INSERT INTO %I.%I SELECT * FROM %I.%I', v_schema, v_table, v_schema, v_old_table);
ELSIF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'p') THEN ELSIF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'p' AND pg_table_is_visible(oid)) THEN
RAISE NOTICE 'Table % is already partitioned. Skipping conversion.', v_table; RAISE NOTICE 'Table % is already partitioned. Skipping conversion.', v_table;
-- Just run maintenance to ensure partitions exist -- Just run maintenance for this specific table to ensure partitions exist
CALL partitions.run_maintenance(); CALL partitions.maintain_table(v_table, v_row.period, v_row.keep_history, v_row.future_partitions);
ELSE ELSE
RAISE WARNING 'Table % not found!', v_table; RAISE WARNING 'Table % not found!', v_table;
END IF; END IF;
END LOOP; END LOOP;
-- Attach trigger to housekeeper table to silently discard tasks for partitioned tables.
-- Dynamically determine the schema of the housekeeper table to support custom schemas.
SELECT n.nspname INTO v_schema
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = 'housekeeper' AND pg_table_is_visible(c.oid);
IF v_schema IS NOT NULL THEN
EXECUTE format('DROP TRIGGER IF EXISTS housekeeper_filter ON %I.housekeeper', v_schema);
EXECUTE format('CREATE TRIGGER housekeeper_filter BEFORE INSERT ON %I.housekeeper FOR EACH ROW EXECUTE FUNCTION partitions.housekeeper_insert_trigger()', v_schema);
RAISE NOTICE 'Housekeeper intercept trigger installed on %.housekeeper', v_schema;
ELSE
RAISE WARNING 'housekeeper table not found — trigger NOT installed!';
END IF;
END $$; END $$;
-- ==========================================================================
-- IMPORTANT: If the Zabbix Server connects with a non-superuser (e.g., 'zabbix'),
-- that user MUST have access to the partitions schema for the housekeeper trigger
-- to work. Without these GRANTs, every INSERT into housekeeper will FAIL.
-- Uncomment and adjust the username below:
-- ==========================================================================
-- GRANT USAGE ON SCHEMA partitions TO zabbix;
-- GRANT SELECT ON partitions.config TO zabbix;

View File

@@ -2,13 +2,13 @@
-- Creates a view to monitor partition status and sizes. -- Creates a view to monitor partition status and sizes.
-- ============================================================================ -- ============================================================================
DROP VIEW IF EXISTS partitions.monitoring; CREATE OR REPLACE VIEW partitions.monitoring AS
CREATE VIEW partitions.monitoring AS
SELECT SELECT
parent.relname AS parent_table, parent.relname AS parent_table,
c.table_name, c.table_name,
c.period, c.period,
c.keep_history, c.keep_history,
c.future_partitions AS configured_future_partitions,
count(child.relname) AS partition_count, count(child.relname) AS partition_count,
count(child.relname) FILTER ( count(child.relname) FILTER (
WHERE WHERE
@@ -19,15 +19,15 @@ SELECT
(c.period = 'week' AND child.relname > (parent.relname || '_p' || to_char(date_trunc('week', now() AT TIME ZONE 'UTC'), 'YYYYMMDD'))) (c.period = 'week' AND child.relname > (parent.relname || '_p' || to_char(date_trunc('week', now() AT TIME ZONE 'UTC'), 'YYYYMMDD')))
OR OR
(c.period LIKE '%hour%' AND child.relname > (parent.relname || '_p' || to_char(now() AT TIME ZONE 'UTC', 'YYYYMMDDHH24'))) (c.period LIKE '%hour%' AND child.relname > (parent.relname || '_p' || to_char(now() AT TIME ZONE 'UTC', 'YYYYMMDDHH24')))
) AS future_partitions, ) AS actual_future_partitions,
sum(pg_total_relation_size(child.oid)) AS total_size_bytes, sum(pg_total_relation_size(child.oid)) AS total_size_bytes,
pg_size_pretty(sum(pg_total_relation_size(child.oid))) AS total_size, pg_size_pretty(sum(pg_total_relation_size(child.oid))) AS total_size,
min(child.relname) AS oldest_partition, min(child.relname) AS oldest_partition,
max(child.relname) AS newest_partition, max(child.relname) AS newest_partition,
c.last_updated c.last_updated
FROM partitions.config c FROM partitions.config c
JOIN pg_class parent ON parent.relname = c.table_name JOIN pg_class parent ON parent.relname = c.table_name AND pg_table_is_visible(parent.oid)
LEFT JOIN pg_inherits ON pg_inherits.inhparent = parent.oid LEFT JOIN pg_inherits ON pg_inherits.inhparent = parent.oid
LEFT JOIN pg_class child ON pg_inherits.inhrelid = child.oid LEFT JOIN pg_class child ON pg_inherits.inhrelid = child.oid
WHERE parent.relkind = 'p' -- Only partitioned tables WHERE parent.relkind = 'p' -- Only partitioned tables
GROUP BY parent.relname, c.table_name, c.period, c.keep_history, c.last_updated; GROUP BY parent.relname, c.table_name, c.period, c.keep_history, c.future_partitions, c.last_updated;

View File

@@ -0,0 +1,79 @@
-- ============================================================================
-- Reverts Zabbix partitioned tables back to standard non-partitioned tables.
-- Existing partitioned tables will be renamed to *_part (data is preserved).
-- ============================================================================
DO $$
DECLARE
v_row record;
v_table text;
v_part_table text;
v_schema text;
BEGIN
FOR v_row IN SELECT * FROM partitions.config LOOP
v_table := v_row.table_name;
v_part_table := v_table || '_part';
-- Determine schema of the partitioned table
SELECT n.nspname INTO v_schema
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = v_table AND c.relkind = 'p' AND pg_table_is_visible(c.oid);
IF v_schema IS NOT NULL THEN
RAISE NOTICE 'Reverting partitioned table %...', v_table;
-- 1. Rename existing partitioned table to *_part
EXECUTE format('ALTER TABLE %I.%I RENAME TO %I', v_schema, v_table, v_part_table);
-- 2. Create standard (unpartitioned) replacement table based on the structure
IF v_table = 'auditlog' THEN
-- For auditlog, we need to try and restore the original single-column PK (auditid) if possible
EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING DEFAULTS INCLUDING COMMENTS)', v_schema, v_table, v_schema, v_part_table);
BEGIN
EXECUTE format('ALTER TABLE %I.%I ADD PRIMARY KEY (auditid)', v_schema, v_table);
EXCEPTION WHEN others THEN
RAISE WARNING 'Failed to create primary key on auditlog, might already exist or duplicates present.';
END;
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_1 ON %I.%I (userid, clock)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_2 ON %I.%I (clock)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_3 ON %I.%I (resourcetype, resourceid)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_4 ON %I.%I (recordsetid)', v_schema, v_table);
EXECUTE format('CREATE INDEX IF NOT EXISTS auditlog_5 ON %I.%I (ip)', v_schema, v_table);
ELSE
-- For others, copy everything including indexes
EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL)', v_schema, v_table, v_schema, v_part_table);
END IF;
RAISE NOTICE 'SUCCESS: % reverted to default. Partitioned data stored in % (You can DROP TABLE % CASCADE; later).', v_table, v_part_table, v_part_table;
ELSIF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'r' AND pg_table_is_visible(oid)) THEN
RAISE NOTICE 'Table % is already a regular table. Skipping.', v_table;
ELSE
RAISE WARNING 'Partitioned table % not found!', v_table;
END IF;
END LOOP;
-- Drop the housekeeper intercept trigger (dynamically determine schema for custom schema support)
SELECT n.nspname INTO v_schema
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = 'housekeeper' AND pg_table_is_visible(c.oid);
IF v_schema IS NOT NULL THEN
EXECUTE format('DROP TRIGGER IF EXISTS housekeeper_filter ON %I.housekeeper', v_schema);
RAISE NOTICE 'Housekeeper intercept trigger removed from %.housekeeper', v_schema;
ELSE
RAISE WARNING 'housekeeper table not found — trigger removal skipped.';
END IF;
RAISE NOTICE '================================================================================';
RAISE NOTICE 'Undo complete. Partitioned tables have been renamed to *_part.';
RAISE NOTICE 'If you want to migrate your history back, you must do it manually:';
RAISE NOTICE ' INSERT INTO history SELECT * FROM history_part;';
RAISE NOTICE 'Once done, or if you do not need the data, drop the partitioned tables:';
RAISE NOTICE ' DROP TABLE history_part CASCADE;';
RAISE NOTICE 'After that, you can safely remove the partitions infrastructure:';
RAISE NOTICE ' DROP SCHEMA partitions CASCADE;';
RAISE NOTICE '================================================================================';
END $$;

View File

@@ -0,0 +1,161 @@
# Zabbix Partitioning Deployment Manual
This guide provides a step-by-step process for deploying the PostgreSQL partitioning solution for Zabbix.
**🚨 DANGER: CRITICAL WARNING 🚨**
**BEFORE YOU PROCEED, YOU ABSOLUTELY MUST TAKE A FULL BACKUP OF YOUR ZABBIX DATABASE.**
**DO NOT SKIP THIS STEP. Schema modifications are dangerous. If something goes wrong and you do not have a backup, your historical data will be lost permanently, and we take ZERO responsibility.**
---
## Step 1: Preparation & Safety
Because database migrations can take time (especially on large tables), **never** run these scripts directly in a standard SSH session that might disconnect.
1. Open a safe terminal session using `tmux` or `screen`:
```bash
tmux new -s zabbix_partitioning
# OR
screen -S zabbix_partitioning
```
2. Disable the Zabbix Housekeeper for History and Trends:
- Go to your Zabbix Web UI -> **Administration** -> **Housekeeping**.
- **Uncheck** "Enable internal housekeeping" for **History and Trends**.
- Click **Update**.
3. Stop your Zabbix Server to ensure no new data is being written during the schema migration:
```bash
sudo systemctl stop zabbix-server
```
---
## Step 2: Database Connection & Schema Selection
Connect to your PostgreSQL server as an administrator (e.g., `postgres` or the database owner).
```bash
psql -U postgres -h localhost
```
Once inside `psql`, connect to your Zabbix database (usually named `zabbix`):
```sql
\c zabbix
```
> [!IMPORTANT]
> **Custom Schemas:** By default, Zabbix installs into the `public` schema. If you installed Zabbix into a custom schema (e.g., `zabbix_schema`), you **must** set your `search_path` now before running the scripts, otherwise they will fail to find your tables:
> ```sql
> SET search_path TO zabbix_schema, public;
> ```
---
## Step 3: Execute Installation Scripts
Run the scripts in the following exact order. You can use the `\i` command in `psql` if you are in the `procedures` directory, or specify the full path.
**1. Create the partitioning schema and config tables:**
> [!NOTE]
> **Zabbix 8.0+ Users:** Zabbix 8.0 introduced a new `history_json` table. Before running the script below, open `00_schema_create.sql` in a text editor and uncomment the lines specifically marked for Zabbix 8.0 at the end of the history tables block.
```sql
\i 00_schema_create.sql
```
**2. Install the maintenance logic and functions:**
```sql
\i 01_maintenance.sql
```
**3. Enable Partitioning (MIGRATION STEP):**
*This step renames your existing large tables to `_old` and instantly creates new partitioned tables. This might take a few moments.*
```sql
\i 02_enable_partitioning.sql
```
**4. Create the Monitoring View:**
```sql
\i 03_monitoring_view.sql
```
---
## Step 4: Schedule Automated Maintenance
Partitioning requires a daily job to create new partitions for tomorrow and drop old partitions from last month.
If you are using **AWS RDS** or a managed database with `pg_cron` enabled, run this inside `psql`:
```sql
CREATE EXTENSION IF NOT EXISTS pg_cron;
SELECT cron.schedule('zabbix_partition_maintenance', '30 5,23 * * *', 'CALL partitions.run_maintenance();');
```
*(If you are self-hosting and don't have `pg_cron`, please refer to the `README.md` for instructions on setting up standard OS `cron` or systemd timers.)*
---
## Step 5: Start Zabbix Server
Now that the database is fully partitioned, you can safely start Zabbix Server again:
```bash
sudo systemctl start zabbix-server
```
*(Note: Your old history data remains in tables like `history_old`. It is no longer visible in the UI. If you need it, you must manually insert it into the new tables. See `README.md` for more details.)*
---
## Step 6: Configure Zabbix Agent Monitoring
To ensure your partitions don't run out, you must monitor them. We use Zabbix Agent 2 for this.
1. On your database server (where Zabbix Agent 2 is installed), create the SQL query file using this simple one-liner. Copy and paste the entire block below into your terminal:
```bash
cat << 'EOF' | sudo tee /etc/zabbix/zabbix_agent2.d/partitions.get_all.sql > /dev/null
SELECT
table_name,
period,
keep_history::text AS keep_history,
configured_future_partitions,
actual_future_partitions,
total_size_bytes,
EXTRACT(EPOCH FROM (now() - last_updated)) AS age_seconds
FROM partitions.monitoring;
EOF
```
2. Configure the PostgreSQL Plugin by editing `/etc/zabbix/zabbix_agent2.d/plugins.d/postgresql.conf`. Ensure you have defined a session (e.g., `MY_DB`) and enabled custom queries:
```ini
Plugins.PostgreSQL.CustomQueriesPath=/etc/zabbix/zabbix_agent2.d/
Plugins.PostgreSQL.CustomQueriesEnabled=true
# Example Session (replace with your actual credentials)
Plugins.PostgreSQL.Sessions.MY_DB.Uri=tcp://localhost:5432
Plugins.PostgreSQL.Sessions.MY_DB.User=zbx_monitor
Plugins.PostgreSQL.Sessions.MY_DB.Password=your_password
```
3. Restart the Zabbix Agent 2:
```bash
sudo systemctl restart zabbix-agent2
```
---
## Step 7: Import Template in Zabbix
1. Log into your Zabbix Web UI.
2. Go to **Data collection** -> **Templates** and click **Import**.
3. Upload the `template/zbx_pg_partitions_monitor_agent2.yaml` file from this repository.
4. Go to your Database Host in Zabbix, and link the newly imported template: `PostgreSQL Partitioning by Zabbix Agent 2`.
5. On the Host configuration, go to the **Macros** tab.
6. You will see a macro named `{$PG.CONNSTRING.AGENT2}` with the value `<replace_me>`.
7. Change `<replace_me>` to the name of the session you configured in Step 6 (e.g., `MY_DB`).
8. Click **Update**.
**Congratulations!** Your Zabbix database is now fully partitioned, optimized, and monitored.

View File

@@ -21,6 +21,8 @@ This is the declarative partitioning implementation for Zabbix `history*`, `tren
- [Implementation Details](#implementation-details) - [Implementation Details](#implementation-details)
- [`auditlog` Table](#auditlog-table) - [`auditlog` Table](#auditlog-table)
- [Converting Existing Tables](#converting-existing-tables) - [Converting Existing Tables](#converting-existing-tables)
- [PostgreSQL Tuning](#postgresql-tuning)
- [Uninstall / Reverting](#uninstall--reverting)
- [Upgrades](#upgrades) - [Upgrades](#upgrades)
## Architecture ## Architecture
@@ -36,27 +38,10 @@ All procedures, information, statistics and configuration are stored in the `par
## Installation ## Installation
The installation is performed by executing the SQL procedures in the following order: > [!IMPORTANT]
1. Initialize schema (`00_schema_create.sql`). > **Please refer to the [MANUAL.md](MANUAL.md) for the complete, step-by-step, foolproof installation instructions.**
2. Install maintenance procedures (`01_maintenance.sql`). > The manual contains critical safety procedures, backup warnings, and copy-pasteable commands for a safe deployment.
3. Enable partitioning on tables (`02_enable_partitioning.sql`).
4. Install monitoring views (`03_monitoring_view.sql`).
**Command Example:**
You can deploy these scripts manually against your Zabbix database using `psql`. Navigate to the `procedures/` directory and run:
```bash
# Connect as the zabbix database user
export PGPASSWORD="your_zabbix_password"
DB_HOST="localhost" # Or your DB endpoint
DB_NAME="zabbix"
DB_USER="zbxpart_admin"
for script in 00_schema_create.sql 01_maintenance.sql 02_enable_partitioning.sql 03_monitoring_view.sql; do
echo "Applying $script..."
psql -h $DB_HOST -U $DB_USER -d $DB_NAME -f "$script"
done
```
## Configuration ## Configuration
@@ -213,6 +198,24 @@ System state can be monitored via the `partitions.monitoring` view. It includes
SELECT * FROM partitions.monitoring; SELECT * FROM partitions.monitoring;
``` ```
### Zabbix Agent Integration
To monitor the state of the partitions directly from Zabbix, you need to provide the Zabbix Agent with the SQL query used to fetch this data. You can automatically generate the required `partitions.get_all.sql` file on your agent using this one-liner:
```bash
cat << 'EOF' | sudo tee /etc/zabbix/zabbix_agent2.d/partitions.get_all.sql > /dev/null
SELECT
table_name,
period,
keep_history::text AS keep_history,
configured_future_partitions,
actual_future_partitions,
total_size_bytes,
EXTRACT(EPOCH FROM (now() - last_updated)) AS age_seconds
FROM partitions.monitoring;
EOF
```
*(Make sure to adjust the destination path according to your Zabbix Agent template directory)*
### Versioning ### Versioning
To check the installed version of the partitioning solution: To check the installed version of the partitioning solution:
```sql ```sql
@@ -243,9 +246,59 @@ The enablement script guarantees practically zero downtime by automatically rena
* New data flows into the new partitioned tables immediately. * New data flows into the new partitioned tables immediately.
* Old data remains accessible in `table_name_old` for manual lookup or migration if required. * Old data remains accessible in `table_name_old` for manual lookup or migration if required.
## Upgrades ### Housekeeper Interceptor
Even when Zabbix Housekeeping is disabled in the UI for History and Trends, the Zabbix Server daemon may still generate and insert tasks into the `housekeeper` table (e.g., when an item or trigger is deleted, it schedules the deletion of its historical data). Without intervention, this results in the `housekeeper` table bloating massively over time, leading to slow sequential scans and `autovacuum` overhead.
When upgrading Zabbix: To prevent this, this extension installs a `BEFORE INSERT` trigger on the `housekeeper` table.
* When Zabbix attempts to insert a housekeeper task, the trigger intercepts it and checks if the target table is managed in `partitions.config`.
* If the table is partitioned (like `history`), the trigger **silently discards the insert** (`RETURNS NULL`), preventing disk I/O and table bloat entirely.
* If the table is not partitioned (like `events` or `sessions`), the task is allowed to be recorded and is cleaned up naturally by Zabbix.
## PostgreSQL Tuning
Before or immediately after enabling partitioning, you should tune your `postgresql.conf`. The standard configuration is not optimized for partitioned tables and might cause performance degradation or out-of-memory errors.
| Parameter | Recommended | Description |
|-----------|-------------|-------------|
| `max_locks_per_transaction`| `512` (or higher) | **Requires DB Restart.** Default is `64`, which is far too low. PostgreSQL lock tables per partition. With many partitioned tables (e.g., history x 30 days), operations like `pg_dump`, `VACUUM`, or queries crossing multiple boundaries will fail with *“out of shared memory”*. |
| `jit` | `off` | **Highly Recommended.** JIT adds overhead to query planning. With many partitions, JIT can drastically increase CPU usage as PostgreSQL attempts to optimize simple queries across dozens of partitions. |
**Default parameters to verify:**
The following are usually set correctly by default, but you should verify them just in case:
* `enable_partition_pruning = on` : **Critical.** Ensures PostgreSQL only queries the necessary partitions instead of scanning everything.
* `enable_partitionwise_join = off` : Zabbix does not do massive joins on history tables; enabling this only wastes planner CPU time.
* `enable_partitionwise_aggregate = off` : Zabbix doesn't perform complex DB-side `GROUP BY` aggregations on history. Leave it disabled.
## Uninstall / Reverting
If you wish to stop using partitioning and revert back to standard, unpartitioned tables without data loss, carefully follow these steps.
> [!CAUTION]
> Reverting partitioning replaces your partitioned tables with standard empty tables. If you need to retain data from the partitioned period, you must manually migrate it before dropping the partition sets. **Always stop Zabbix Server before proceeding.**
1. **Stop Zabbix Server** to prevent new data from being inserted during the transition.
2. **Execute Undo Script:** Run the `04_undo_partitioning.sql` script to recreate non-partitioned tables matching your original Zabbix schema. This script will rename your current partitioned tables to `*_part` (`history_part`, `trends_part`, etc.) and automatically create native, clean tables (`history`, `trends`) in their place.
```bash
psql -h $DB_HOST -U zbxpart_admin -d zabbix -f 04_undo_partitioning.sql
```
3. **Data Migration (Optional):** If you want to keep the metrics collected during the partitioned period, you must manually insert them into the newly created regular tables. This step can take hours depending on table sizes.
```sql
INSERT INTO history SELECT * FROM history_part;
INSERT INTO trends SELECT * FROM trends_part;
-- Repeat for all tables you wish to restore
```
4. **Cleanup:** Once you have migrated the data you need (or if you don't need it at all), you can drop the heavy partitioned tables and remove the partitioning extensions completely.
```sql
DROP TABLE history_part CASCADE;
DROP TABLE history_uint_part CASCADE;
-- Repeat for all *_part tables ...
-- To drop the automatic maintenance infrastructure:
DROP SCHEMA partitions CASCADE;
```
5. **Start Zabbix Server & Re-enable Housekeeper:** Once the tables are replaced, you can start the server. *Don't forget to re-enable Housekeeping for History and Trends in the Zabbix UI!*
## Upgrades
1. **Backup**: Ensure a full database backup exists. 1. **Backup**: Ensure a full database backup exists.
2. **Compatibility**: Zabbix upgrade scripts may attempt to `ALTER` tables. PostgreSQL supports `ALTER TABLE` on partitioned tables for adding columns, which propagates to partitions. 2. **Compatibility**: Zabbix upgrade scripts may attempt to `ALTER` tables. PostgreSQL supports `ALTER TABLE` on partitioned tables for adding columns, which propagates to partitions.
3. **Failure Scenarios**: If an upgrade script fails due to partitioning, the table may need to be temporarily reverted or the partition structure manually adjusted. 3. **Failure Scenarios**: If an upgrade script fails due to partitioning, the table may need to be temporarily reverted or the partition structure manually adjusted.

View File

@@ -2,7 +2,8 @@ SELECT
table_name, table_name,
period, period,
keep_history::text AS keep_history, keep_history::text AS keep_history,
future_partitions, configured_future_partitions,
actual_future_partitions,
total_size_bytes, total_size_bytes,
EXTRACT(EPOCH FROM (now() - last_updated)) AS age_seconds EXTRACT(EPOCH FROM (now() - last_updated)) AS age_seconds
FROM partitions.monitoring; FROM partitions.monitoring;

View File

@@ -131,7 +131,7 @@ zabbix_export:
value: '2' value: '2'
description: 'The minimum number of partitions that must exist in the future' description: 'The minimum number of partitions that must exist in the future'
- macro: '{$PG.CONNSTRING.AGENT2}' - macro: '{$PG.CONNSTRING.AGENT2}'
value: AWS_RDS value: '<replace_me>'
description: 'Session name or URI of the PostgreSQL instance' description: 'Session name or URI of the PostgreSQL instance'
- macro: '{$PG.DBNAME}' - macro: '{$PG.DBNAME}'
value: zabbix value: zabbix