Compare commits
11 Commits
99e25f2efb
...
alpha/8.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aac4b0d44f | ||
|
|
e75d9e0f52 | ||
|
|
9581dbdb62 | ||
|
|
9d1b84225c | ||
|
|
c77eb8e4af | ||
|
|
91eb4e17b8 | ||
|
|
d7b8c7c9c3 | ||
|
|
c4420bc1ad | ||
|
|
8565c99310 | ||
|
|
581c59a0ca | ||
|
|
c94445e71b |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,6 +5,7 @@ z_gen_history_data.sql
|
||||
# Local docs
|
||||
QUICKSTART.md
|
||||
init_extra_users.sql
|
||||
ARCHITECTURE.md
|
||||
|
||||
# Schemas
|
||||
sql-scripts*/
|
||||
@@ -1,7 +1,6 @@
|
||||
-- ============================================================================
|
||||
-- SCRIPT: 00_partitions_init.sql
|
||||
-- DESCRIPTION: Creates the 'partitions' schema and configuration table.
|
||||
-- Defines the structure for managing Zabbix partitioning.
|
||||
-- Creates the 'partitions' schema and configuration table.
|
||||
-- Defines the structure for managing Zabbix partitioning.
|
||||
-- ============================================================================
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS partitions;
|
||||
@@ -12,14 +11,14 @@ CREATE TABLE IF NOT EXISTS partitions.config (
|
||||
period text NOT NULL CHECK (period IN ('day', 'week', 'month', 'year')),
|
||||
keep_history interval NOT NULL,
|
||||
future_partitions integer NOT NULL DEFAULT 5,
|
||||
last_updated timestamp WITH TIME ZONE DEFAULT now(),
|
||||
last_updated timestamp WITH TIME ZONE DEFAULT (now() AT TIME ZONE 'UTC'),
|
||||
PRIMARY KEY (table_name)
|
||||
);
|
||||
|
||||
-- Table to track installed version of the partitioning solution
|
||||
CREATE TABLE IF NOT EXISTS partitions.version (
|
||||
version text PRIMARY KEY,
|
||||
installed_at timestamp with time zone DEFAULT now(),
|
||||
installed_at timestamp with time zone DEFAULT (now() AT TIME ZONE 'UTC'),
|
||||
description text
|
||||
);
|
||||
|
||||
@@ -34,7 +33,8 @@ INSERT INTO partitions.config (table_name, period, keep_history) VALUES
|
||||
('history_uint', 'day', '30 days'),
|
||||
('history_str', 'day', '30 days'),
|
||||
('history_log', 'day', '30 days'),
|
||||
('history_text', 'day', '30 days')
|
||||
('history_text', 'day', '30 days'),
|
||||
('history_json', 'day', '30 days')
|
||||
ON CONFLICT (table_name) DO NOTHING;
|
||||
|
||||
-- Trends tables: Monthly partitions, keep 12 months
|
||||
|
||||
@@ -1,24 +1,23 @@
|
||||
-- ============================================================================
|
||||
-- SCRIPT: 01_auditlog_prep.sql
|
||||
-- DESCRIPTION: Modifies the 'auditlog' table Primary Key to include 'clock'.
|
||||
-- This is REQUIRED for range partitioning by 'clock'.
|
||||
-- Modifies the 'auditlog' table Primary Key to include 'clock'.
|
||||
-- This is REQUIRED for range partitioning by 'clock'.
|
||||
-- ============================================================================
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Check if PK needs modification
|
||||
-- Original PK is typically on (auditid) named 'auditlog_pkey'
|
||||
-- Original PK is on auditid named 'auditlog_pkey'
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'auditlog_pkey'
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'auditlog_pkey'
|
||||
AND conrelid = 'auditlog'::regclass
|
||||
) THEN
|
||||
-- Verify if 'clock' is already in PK (basic check)
|
||||
-- Realistically, if 'auditlog_pkey' exists on default Zabbix, it's just (auditid).
|
||||
|
||||
-- Verify if 'clock' is already in PK (basic safety check)
|
||||
-- Realistically, if 'auditlog_pkey' exists on default Zabbix, it's just auditid.
|
||||
|
||||
RAISE NOTICE 'Dropping existing Primary Key on auditlog...';
|
||||
ALTER TABLE auditlog DROP CONSTRAINT auditlog_pkey;
|
||||
|
||||
|
||||
RAISE NOTICE 'Creating new Primary Key on auditlog (auditid, clock)...';
|
||||
ALTER TABLE auditlog ADD PRIMARY KEY (auditid, clock);
|
||||
ELSE
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
-- ============================================================================
|
||||
-- SCRIPT: 02_maintenance.sql
|
||||
-- DESCRIPTION: Core functions for Zabbix partitioning (Create, Drop, Maintain).
|
||||
-- Core functions for Zabbix partitioning (Create, Drop, Maintain).
|
||||
-- ============================================================================
|
||||
|
||||
-- Function to check if a partition exists
|
||||
@@ -11,7 +10,6 @@ BEGIN
|
||||
SELECT 1 FROM pg_class c
|
||||
JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE c.relname = p_partition_name
|
||||
AND n.nspname = 'public'
|
||||
);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
@@ -28,7 +26,17 @@ DECLARE
|
||||
v_start_ts bigint;
|
||||
v_end_ts bigint;
|
||||
v_suffix text;
|
||||
v_parent_schema text;
|
||||
BEGIN
|
||||
-- Determine the schema of the parent table
|
||||
SELECT n.nspname INTO v_parent_schema
|
||||
FROM pg_class c
|
||||
JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE c.relname = p_parent_table;
|
||||
|
||||
IF NOT FOUND THEN
|
||||
RAISE EXCEPTION 'Parent table % not found', p_parent_table;
|
||||
END IF;
|
||||
v_start_ts := extract(epoch from p_start_time)::bigint;
|
||||
v_end_ts := extract(epoch from p_end_time)::bigint;
|
||||
|
||||
@@ -42,8 +50,8 @@ BEGIN
|
||||
|
||||
IF NOT partitions.partition_exists(v_partition_name) THEN
|
||||
EXECUTE format(
|
||||
'CREATE TABLE public.%I PARTITION OF public.%I FOR VALUES FROM (%s) TO (%s)',
|
||||
v_partition_name, p_parent_table, v_start_ts, v_end_ts
|
||||
'CREATE TABLE %I.%I PARTITION OF %I.%I FOR VALUES FROM (%s) TO (%s)',
|
||||
v_parent_schema, v_partition_name, v_parent_schema, p_parent_table, v_start_ts, v_end_ts
|
||||
);
|
||||
END IF;
|
||||
END;
|
||||
@@ -60,16 +68,19 @@ DECLARE
|
||||
v_partition record;
|
||||
v_partition_date timestamp with time zone;
|
||||
v_suffix text;
|
||||
v_partition_schema text;
|
||||
BEGIN
|
||||
-- Calculate cutoff timestamp
|
||||
v_cutoff_ts := extract(epoch from (now() - p_retention))::bigint;
|
||||
|
||||
FOR v_partition IN
|
||||
SELECT
|
||||
child.relname AS partition_name
|
||||
child.relname AS partition_name,
|
||||
n.nspname AS partition_schema
|
||||
FROM pg_inherits
|
||||
JOIN pg_class parent ON pg_inherits.inhparent = parent.oid
|
||||
JOIN pg_class child ON pg_inherits.inhrelid = child.oid
|
||||
JOIN pg_namespace n ON child.relnamespace = n.oid
|
||||
WHERE parent.relname = p_parent_table
|
||||
LOOP
|
||||
-- Parse partition suffix to determine age
|
||||
@@ -78,19 +89,21 @@ BEGIN
|
||||
|
||||
BEGIN
|
||||
IF length(v_suffix) = 6 THEN -- YYYYMM
|
||||
v_partition_date := to_timestamp(v_suffix || '01', 'YYYYMMDD');
|
||||
v_partition_date := to_timestamp(v_suffix || '01', 'YYYYMMDD') AT TIME ZONE 'UTC';
|
||||
-- For monthly, we check if the END of the month is older than retention?
|
||||
-- Or just strict retention.
|
||||
-- To be safe, adding 1 month to check vs cutoff.
|
||||
IF extract(epoch from (v_partition_date + '1 month'::interval)) < v_cutoff_ts THEN
|
||||
RAISE NOTICE 'Dropping old partition %', v_partition.partition_name;
|
||||
EXECUTE format('DROP TABLE public.%I', v_partition.partition_name);
|
||||
EXECUTE format('DROP TABLE %I.%I', v_partition.partition_schema, v_partition.partition_name);
|
||||
COMMIT; -- Release lock immediately
|
||||
END IF;
|
||||
ELSIF length(v_suffix) = 8 THEN -- YYYYMMDD
|
||||
v_partition_date := to_timestamp(v_suffix, 'YYYYMMDD');
|
||||
v_partition_date := to_timestamp(v_suffix, 'YYYYMMDD') AT TIME ZONE 'UTC';
|
||||
IF extract(epoch from (v_partition_date + '1 day'::interval)) < v_cutoff_ts THEN
|
||||
RAISE NOTICE 'Dropping old partition %', v_partition.partition_name;
|
||||
EXECUTE format('DROP TABLE public.%I', v_partition.partition_name);
|
||||
EXECUTE format('DROP TABLE %I.%I', v_partition.partition_schema, v_partition.partition_name);
|
||||
COMMIT; -- Release lock immediately
|
||||
END IF;
|
||||
END IF;
|
||||
EXCEPTION WHEN OTHERS THEN
|
||||
@@ -116,26 +129,21 @@ DECLARE
|
||||
BEGIN
|
||||
IF p_period = 'day' THEN
|
||||
v_period_interval := '1 day'::interval;
|
||||
v_start_time := date_trunc('day', now());
|
||||
-- Calculate how many past days cover the retention period
|
||||
v_past_iterations := extract(day from p_keep_history)::integer;
|
||||
-- Safety cap or ensure minimum? default 7 if null?
|
||||
IF v_past_iterations IS NULL THEN v_past_iterations := 7; END IF;
|
||||
v_start_time := date_trunc('day', now() AT TIME ZONE 'UTC');
|
||||
-- Calculate how many past days cover the retention period (86400 seconds = 1 day)
|
||||
v_past_iterations := ceil(extract(epoch from p_keep_history) / 86400)::integer;
|
||||
|
||||
ELSIF p_period = 'week' THEN
|
||||
v_period_interval := '1 week'::interval;
|
||||
v_start_time := date_trunc('week', now());
|
||||
v_past_iterations := (extract(day from p_keep_history) / 7)::integer;
|
||||
v_start_time := date_trunc('week', now() AT TIME ZONE 'UTC');
|
||||
-- 604800 seconds = 1 week
|
||||
v_past_iterations := ceil(extract(epoch from p_keep_history) / 604800)::integer;
|
||||
|
||||
ELSIF p_period = 'month' THEN
|
||||
v_period_interval := '1 month'::interval;
|
||||
v_start_time := date_trunc('month', now());
|
||||
-- Approximate months
|
||||
v_past_iterations := (extract(year from p_keep_history) * 12 + extract(month from p_keep_history))::integer;
|
||||
-- Fallback if interval is just days (e.g. '365 days')
|
||||
IF v_past_iterations = 0 THEN
|
||||
v_past_iterations := (extract(day from p_keep_history) / 30)::integer;
|
||||
END IF;
|
||||
v_start_time := date_trunc('month', now() AT TIME ZONE 'UTC');
|
||||
-- Approximate 30 days per month (2592000 seconds)
|
||||
v_past_iterations := ceil(extract(epoch from p_keep_history) / 2592000)::integer;
|
||||
ELSE
|
||||
RETURN;
|
||||
END IF;
|
||||
@@ -148,6 +156,7 @@ BEGIN
|
||||
v_start_time + ((i + 1) * v_period_interval),
|
||||
p_period
|
||||
);
|
||||
COMMIT; -- Release lock immediately
|
||||
END LOOP;
|
||||
|
||||
-- 2. Create Past Partitions (Covering retention period)
|
||||
@@ -159,6 +168,7 @@ BEGIN
|
||||
v_start_time - ((i - 1) * v_period_interval),
|
||||
p_period
|
||||
);
|
||||
COMMIT; -- Release lock immediately
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
-- ============================================================================
|
||||
-- SCRIPT: 03_enable_partitioning.sql
|
||||
-- DESCRIPTION: Converts standard Zabbix tables to Partitioned tables.
|
||||
-- WARNING: This renames existing tables to *_old.
|
||||
-- Converts Zabbix tables to Partitioned tables.
|
||||
-- WARNING: This renames existing tables to *_old.
|
||||
-- ============================================================================
|
||||
|
||||
DO $$
|
||||
@@ -10,27 +9,35 @@ DECLARE
|
||||
v_table text;
|
||||
v_old_table text;
|
||||
v_pk_sql text;
|
||||
v_schema text;
|
||||
BEGIN
|
||||
FOR v_row IN SELECT * FROM partitions.config LOOP
|
||||
v_table := v_row.table_name;
|
||||
v_old_table := v_table || '_old';
|
||||
|
||||
-- Determine schema
|
||||
SELECT n.nspname INTO v_schema
|
||||
FROM pg_class c
|
||||
JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE c.relname = v_table;
|
||||
|
||||
|
||||
-- Check if table exists and is NOT already partitioned
|
||||
IF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'r') THEN
|
||||
RAISE NOTICE 'Converting table % to partitioned table...', v_table;
|
||||
|
||||
-- 1. Rename existing table
|
||||
EXECUTE format('ALTER TABLE public.%I RENAME TO %I', v_table, v_old_table);
|
||||
EXECUTE format('ALTER TABLE %I.%I RENAME TO %I', v_schema, v_table, v_old_table);
|
||||
|
||||
-- 2. Create new partitioned table (copying structure)
|
||||
EXECUTE format('CREATE TABLE public.%I (LIKE public.%I INCLUDING ALL) PARTITION BY RANGE (clock)', v_table, v_old_table);
|
||||
EXECUTE format('CREATE TABLE %I.%I (LIKE %I.%I INCLUDING ALL) PARTITION BY RANGE (clock)', v_schema, v_table, v_schema, v_old_table);
|
||||
|
||||
-- 3. Create initial partitions
|
||||
RAISE NOTICE 'Creating initial partitions for %...', v_table;
|
||||
CALL partitions.maintain_table(v_table, v_row.period, v_row.keep_history, v_row.future_partitions);
|
||||
|
||||
-- Optional: Migrate existing data
|
||||
-- EXECUTE format('INSERT INTO public.%I SELECT * FROM public.%I', v_table, v_old_table);
|
||||
-- EXECUTE format('INSERT INTO %I.%I SELECT * FROM %I.%I', v_schema, v_table, v_schema, v_old_table);
|
||||
|
||||
ELSIF EXISTS (SELECT 1 FROM pg_class WHERE relname = v_table AND relkind = 'p') THEN
|
||||
RAISE NOTICE 'Table % is already partitioned. Skipping conversion.', v_table;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
-- ============================================================================
|
||||
-- SCRIPT: 04_monitoring_view.sql
|
||||
-- DESCRIPTION: Creates a view to monitor partition status and sizes.
|
||||
-- Creates a view to monitor partition status and sizes.
|
||||
-- ============================================================================
|
||||
|
||||
CREATE OR REPLACE VIEW partitions.monitoring AS
|
||||
|
||||
@@ -2,6 +2,12 @@
|
||||
|
||||
This is the declarative (PostgreSQL procedures based) partitioning implementation for Zabbix `history`, `trends`, and `auditlog` tables on PostgreSQL. This solution is intended to replace standard Zabbix housekeeping for the configured tables. Partitioning is very useful for large environments because it completely eliminates the housekeeper from the process. Instead of huge DELETE queries on several million rows, fast DDL queries (ALTER TABLE) are executed, which drop an entire partition.
|
||||
|
||||
|
||||
> [!WARNING]
|
||||
> **High-Load Environments**:
|
||||
> 1. **Data Visibility**: After enabling partitioning, old data remains in `*_old` tables and is **NOT visible** in Zabbix. You must migrate data manually if needed.
|
||||
> 2. **Disable Housekeeping**: You **MUST** disable Zabbix Housekeeper for History and Trends in *Administration -> Housekeeping*. Failure to do so will cause massive `DELETE` loads.
|
||||
|
||||
## Architecture
|
||||
|
||||
The solution uses PostgreSQL native declarative partitioning (`PARTITION BY RANGE`).
|
||||
@@ -55,7 +61,79 @@ This procedure should be scheduled to run periodically (e.g., daily via `pg_cron
|
||||
```sql
|
||||
CALL partitions.run_maintenance();
|
||||
```
|
||||
### Automatic Maintenance
|
||||
|
||||
To ensure partitions are created in advance and old data is cleaned up, the maintenance procedure should be scheduled to run automatically.
|
||||
|
||||
It is recommended to run the maintenance **twice a day** (e.g., at 05:30 and 23:30).
|
||||
* **Primary Run**: Creates new future partitions and drops old ones.
|
||||
* **Secondary Run**: Acts as a safety check. Since the procedure is idempotent (safe to run multiple times), a second run ensures everything is consistent if the first run failed or was interrupted.
|
||||
|
||||
There are three ways to schedule this, depending on your environment:
|
||||
|
||||
#### Option 1: `pg_cron` (If you use RDS/Aurora)
|
||||
If you are running on managed PostgreSQL (like AWS Aurora) or prefer to keep scheduling inside the database, `pg_cron` is the way to go.
|
||||
|
||||
1. Ensure `pg_cron` is installed and loaded in `postgresql.conf` (`shared_preload_libraries = 'pg_cron'`).
|
||||
2. Run the following to schedule the maintenance:
|
||||
```sql
|
||||
CREATE EXTENSION IF NOT EXISTS pg_cron;
|
||||
SELECT cron.schedule('zabbix_maintenance', '30 5,23 * * *', 'CALL partitions.run_maintenance();');
|
||||
```
|
||||
*Where:*
|
||||
* `'zabbix_maintenance'` - The name of the job (must be unique).
|
||||
* `'30 5,23 * * *'` - The standard cron schedule (runs at 05:30 and 23:30 daily).
|
||||
* `'CALL partitions.run_maintenance();'` - The SQL command to execute.
|
||||
|
||||
|
||||
#### Option 2: `systemd` Timers
|
||||
For standard Linux VM deployments, `systemd` timers are modern, prevent overlapping runs, and provide excellent logging.
|
||||
|
||||
1. Create a service file (`/etc/systemd/system/zabbix-partitioning.service`):
|
||||
```ini
|
||||
[Unit]
|
||||
Description=Zabbix PostgreSQL Partition Maintenance
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=zabbix
|
||||
# Ensure .pgpass is configured for the zabbix user so it doesn't prompt for a password
|
||||
ExecStart=/usr/bin/psql -U zabbix -d zabbix -c "CALL partitions.run_maintenance();"
|
||||
```
|
||||
|
||||
2. Create a timer file (`/etc/systemd/system/zabbix-partitioning.timer`):
|
||||
```ini
|
||||
[Unit]
|
||||
Description=Zabbix Partitioning twice a day
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 05,23:30:00
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
```
|
||||
|
||||
3. Enable and start the timer:
|
||||
```bash
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now zabbix-partitioning.timer
|
||||
```
|
||||
|
||||
#### Option 3: Standard Cron
|
||||
This is the legacy, simple method for standard VMs and containerized environments.
|
||||
|
||||
**Example Crontab Entry (`crontab -e`):**
|
||||
```bash
|
||||
# Run Zabbix partition maintenance twice daily (5:30 AM and 11:30 PM)
|
||||
30 5,23 * * * psql -U zabbix -d zabbix -c "CALL partitions.run_maintenance();" >> /var/log/zabbix_maintenance.log 2>&1
|
||||
```
|
||||
|
||||
**Docker Environment:**
|
||||
If running in Docker, you can execute it via the container's host:
|
||||
```bash
|
||||
30 5,23 * * * docker exec zabbix-db psql -U zabbix -d zabbix -c "CALL partitions.run_maintenance();"
|
||||
```
|
||||
## Monitoring & Permissions
|
||||
|
||||
System state can be monitored via the `partitions.monitoring` view. It includes a `future_partitions` column which counts how many partitions exist *after* the current period. This is useful for alerting (e.g., trigger if `future_partitions < 2`).
|
||||
Reference in New Issue
Block a user