Zabbix : surveiller l’évolution du statut de triggers

Publié le 18 décembre 2018

Dans le cadre de mon travail il m’est arrivé de devoir surveiller l’évolution des triggers de plusieurs hôtes de façon arbitraire. Il est possible de s’en sortir via les Host groups et la page Monitoring > Triggers, mais ça peut vite être lourd à gérer quand on doit surveiller simultanément plusieurs groupes d’équipements qui n’ont rien en commun.

Comme à l’époque je débutais en Python, j’ai décidé de développer mon propre script pour me faire la main.

#!/usr/bin/env python3
#  Copyright 2018 palc.fr
#
#  Licensed under the WTFPL, Version 2
#            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 
# 0. You just DO WHAT THE FUCK YOU WANT TO.

"""
  A trigger status viewer

  This script displays the status of triggers on some devices

  Usage :
      ./triggerstatus.py zabbix_server [list of triggers id]
"""

import os       # To manage cursor and window size
import signal   # To manage Ctrl+C and widnow resizing
import sys      # To manage script arguments and terminal buffer
import time     # To get date and for temporisation

import getpass  # For password input without displaying it
import math     # For calculating graph datas
from pyzabbix import ZabbixAPI

def signal_sigint(sig, frame):
    """
        When receiving SIGINT (Ctrl+c), the script exit proprely
    """
    os.system('setterm -cursor on')
    print()
    exit (0)

def signal_sigwinch(sig, frame):
    """
        When the window is resized, redraw the screen
    """
    draw(ok, notclassified, information, warning, average, high, disaster, disable, date)

def draw(ok, notclassified, information, warning, average, high, disaster, disable, date):
    """
        Clear the screen and redraw all the datas
    """

    os.system('clear')

    # Get screen width and height (in characters)
    rows, columns = os.popen('stty size', 'r').read().split()
    rows=int(rows)
    columns=int(columns)

    # General informations
    sum_triggers=len(disable)+len(ok)+len(notclassified)+len(information)+len(warning)+len(average)+len(high)+len(disaster)
    print(date, end='')
    print(" | " + str(sum_triggers) + " (", end='')
    print("\033[32m" + str(len(ok)) + "\033[0m/", end='')
    print("\033[31m" + str(len(notclassified)+len(information)+len(warning)+len(average)+len(high)+len(disaster)) + "\033[0m/", end='')
    print("\033[90m" + str(len(disable)) + "\033[0m)")

    # Calculate graph width
    coef_graph=1
    if(sum_triggers>(columns-6)):
        coef_graph=(columns-6)/sum_triggers

    # Draw the graph
    print('[', end='')
    for i in (range(0, math.ceil(len(disaster)*coef_graph))):
        print("\033[91m", end='')
        print('|', end='')
    for i in (range(0, math.ceil(len(high)*coef_graph))):
        print("\033[31m", end='')
        print('|', end='')
    for i in (range(0, math.ceil(len(average)*coef_graph))):
        print("\033[93m", end='')
        print('|', end='')
    for i in (range(0, math.ceil(len(warning)*coef_graph))):
        print("\033[33m", end='')
        print('|', end='')
    for i in (range(0, math.ceil(len(information)*coef_graph))):
        print("\033[34m", end='')
        print('|', end='')
    for i in (range(0, math.ceil(len(notclassified)*coef_graph))):
        print("\033[37m", end='')
        print('|', end='')
    for i in (range(0, math.ceil(len(disable)*coef_graph))):
        print("\033[90m", end='')
        print('|', end='')
    for i in (range(0, math.ceil(len(ok)*coef_graph))):
        print("\033[32m", end='')
        print('|', end='')
    print("\033[0m]", end='')

    length=3

    # Display items in PROBLEM state
    for i in disaster:
        print("\033[91m", end='')
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1
    for i in high:
        print("\033[31m", end='')
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1
    for i in average:
        print("\033[93m", end='')
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1
    for i in warning:
        print("\033[33m", end='')
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1
    for i in information:
        print("\033[34m", end='')
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1
    for i in notclassified:
        print("\033[37m", end='')
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1

    # Display disabled items
    print("\033[90m", end='')
    for i in disable:
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1

    # Display items in OK state
    print("\033[32m", end='')
    for i in ok:
        if(length<rows):
            print("\n" + i[:columns], end='')
            length+=1

    print("\033[0m", end='')

    sys.stdout.flush()

# When receiving SIGINT (Ctrl+c)
signal.signal(signal.SIGINT, signal_sigint)
# When the window is resized
signal.signal(signal.SIGWINCH, signal_sigwinch)

# Connection to Zabbix server
user = input("Username: ")
password = getpass.getpass("Password for " + user + ": ")
try:
    zapi = ZabbixAPI('https://' + sys.argv[1])
    zapi.login(user, password)
except:
    print("Cannot conect to Zabbix server ☹")
    exit(1)

# Disable cursor
os.system('setterm -cursor off')

# main loop
while 1:
    # Tables for each trigger status
    ok=[]
    notclassified=[]
    information=[]
    warning=[]
    average=[]
    high=[]
    disaster=[]
    disable=[]

    for hostid in sys.argv[2:]:
        # Get triggers status
        try:
            triggers=zapi.trigger.get(hostids=hostid)
        except:
            print("Problem with Zabbix server ☹")
            exit(1)

        for trigger in triggers:
            # Item is in OK state
            if(trigger['status'] == '0' and trigger['value'] == '0'):
                ok.append(hostid + ' ' + trigger['description'])

            # Item is in PROBLEM state
            if(trigger['status'] == '0' and trigger['value'] == '1'):
                if(trigger['priority'] == '0'):
                    notclassified.append(hostid + ' ' + trigger['description'] + ' (not classified)')
                if(trigger['priority'] == '1'):
                    information.append(hostid + ' ' + trigger['description'] + ' (information)')
                if(trigger['priority'] == '2'):
                    warning.append(hostid + ' ' + trigger['description'] + ' (warning)')
                if(trigger['priority'] == '3'):
                    average.append(hostid + ' ' + trigger['description'] + ' (average)')
                if(trigger['priority'] == '4'):
                    high.append(hostid + ' ' + trigger['description'] + ' (high)')
                if(trigger['priority'] == '5'):
                    disaster.append(hostid + ' ' + trigger['description'] + ' (disaster)')

            # Item is disabled
            if(trigger['status'] == '1'):
                disable.append(hostid + ' ' + trigger['description'])

    date=time.strftime("%d/%m/%Y %H:%M:%S")

    draw(ok, notclassified, information, warning, average, high, disaster, disable, date)

    time.sleep(10)

exit(0)

Ce script plusieurs plusieurs paramètres :

Par exemple, pour superviser les équipements 13111, 13112 et 13123 sur le serveur zabbix.palc.fr :

./triggerstatus.py zabbix.palc.fr 13111 13112 13123

Je me suis clairement inspiré de htop pour faire ce script. Il affiche, dans l’ordre :

C’est mon tout premier « vrai » script fait en Pyhton (hors Hello word! ou équivalent), donc soyez indulgents.