add nrpe setup
This commit is contained in:
parent
1dca96ebea
commit
d886ac18dc
|
@ -0,0 +1,382 @@
|
||||||
|
#############################################################################
|
||||||
|
#
|
||||||
|
# Sample NRPE Config File
|
||||||
|
#
|
||||||
|
# Notes:
|
||||||
|
#
|
||||||
|
# This is a sample configuration file for the NRPE daemon. It needs to be
|
||||||
|
# located on the remote host that is running the NRPE daemon, not the host
|
||||||
|
# from which the check_nrpe client is being executed.
|
||||||
|
#
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
# LOG FACILITY
|
||||||
|
# The syslog facility that should be used for logging purposes.
|
||||||
|
|
||||||
|
log_facility=daemon
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# LOG FILE
|
||||||
|
# If a log file is specified in this option, nrpe will write to
|
||||||
|
# that file instead of using syslog.
|
||||||
|
|
||||||
|
#log_file=/var/log/nrpe.log
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# DEBUGGING OPTION
|
||||||
|
# This option determines whether or not debugging messages are logged to the
|
||||||
|
# syslog facility.
|
||||||
|
# Values: 0=debugging off, 1=debugging on
|
||||||
|
|
||||||
|
debug=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# PID FILE
|
||||||
|
# The name of the file in which the NRPE daemon should write it's process ID
|
||||||
|
# number. The file is only written if the NRPE daemon is started by the root
|
||||||
|
# user and is running in standalone mode.
|
||||||
|
|
||||||
|
pid_file=/run/nagios/nrpe.pid
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# PORT NUMBER
|
||||||
|
# Port number we should wait for connections on.
|
||||||
|
# NOTE: This must be a non-privileged port (i.e. > 1024).
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
server_port=5666
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# SERVER ADDRESS
|
||||||
|
# Address that nrpe should bind to in case there are more than one interface
|
||||||
|
# and you do not want nrpe to bind on all interfaces.
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
#server_address=127.0.0.1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# LISTEN QUEUE SIZE
|
||||||
|
# Listen queue size (backlog) for serving incoming connections.
|
||||||
|
# You may want to increase this value under high load.
|
||||||
|
|
||||||
|
#listen_queue_size=5
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# NRPE USER
|
||||||
|
# This determines the effective user that the NRPE daemon should run as.
|
||||||
|
# You can either supply a username or a UID.
|
||||||
|
#
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
nrpe_user=nagios
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# NRPE GROUP
|
||||||
|
# This determines the effective group that the NRPE daemon should run as.
|
||||||
|
# You can either supply a group name or a GID.
|
||||||
|
#
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
nrpe_group=nagios
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ALLOWED HOST ADDRESSES
|
||||||
|
# This is an optional comma-delimited list of IP address or hostnames
|
||||||
|
# that are allowed to talk to the NRPE daemon. Network addresses with a bit mask
|
||||||
|
# (i.e. 192.168.1.0/24) are also supported. Hostname wildcards are not currently
|
||||||
|
# supported.
|
||||||
|
#
|
||||||
|
# Note: The daemon only does rudimentary checking of the client's IP
|
||||||
|
# address. I would highly recommend adding entries in your /etc/hosts.allow
|
||||||
|
# file to allow only the specified host to connect to the port
|
||||||
|
# you are running this daemon on.
|
||||||
|
#
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
allowed_hosts=10.99.23.36,::1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# COMMAND ARGUMENT PROCESSING
|
||||||
|
# This option determines whether or not the NRPE daemon will allow clients
|
||||||
|
# to specify arguments to commands that are executed. This option only works
|
||||||
|
# if the daemon was configured with the --enable-command-args configure script
|
||||||
|
# option.
|
||||||
|
#
|
||||||
|
# *** ENABLING THIS OPTION IS A SECURITY RISK! ***
|
||||||
|
# Read the SECURITY file for information on some of the security implications
|
||||||
|
# of enabling this variable.
|
||||||
|
#
|
||||||
|
# Values: 0=do not allow arguments, 1=allow command arguments
|
||||||
|
|
||||||
|
dont_blame_nrpe=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# BASH COMMAND SUBSTITUTION
|
||||||
|
# This option determines whether or not the NRPE daemon will allow clients
|
||||||
|
# to specify arguments that contain bash command substitutions of the form
|
||||||
|
# $(...). This option only works if the daemon was configured with both
|
||||||
|
# the --enable-command-args and --enable-bash-command-substitution configure
|
||||||
|
# script options.
|
||||||
|
#
|
||||||
|
# *** ENABLING THIS OPTION IS A HIGH SECURITY RISK! ***
|
||||||
|
# Read the SECURITY file for information on some of the security implications
|
||||||
|
# of enabling this variable.
|
||||||
|
#
|
||||||
|
# Values: 0=do not allow bash command substitutions,
|
||||||
|
# 1=allow bash command substitutions
|
||||||
|
|
||||||
|
allow_bash_command_substitution=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# COMMAND PREFIX
|
||||||
|
# This option allows you to prefix all commands with a user-defined string.
|
||||||
|
# A space is automatically added between the specified prefix string and the
|
||||||
|
# command line from the command definition.
|
||||||
|
#
|
||||||
|
# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! ***
|
||||||
|
# Usage scenario:
|
||||||
|
# Execute restricted commmands using sudo. For this to work, you need to add
|
||||||
|
# the nagios user to your /etc/sudoers. An example entry for allowing
|
||||||
|
# execution of the plugins from might be:
|
||||||
|
#
|
||||||
|
# nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/
|
||||||
|
#
|
||||||
|
# This lets the nagios user run all commands in that directory (and only them)
|
||||||
|
# without asking for a password. If you do this, make sure you don't give
|
||||||
|
# random users write access to that directory or its contents!
|
||||||
|
|
||||||
|
# command_prefix=/usr/bin/sudo
|
||||||
|
|
||||||
|
|
||||||
|
# MAX COMMANDS
|
||||||
|
# This specifies how many children processes may be spawned at any one
|
||||||
|
# time, essentially limiting the fork()s that occur.
|
||||||
|
# Default (0) is set to unlimited
|
||||||
|
# max_commands=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# COMMAND TIMEOUT
|
||||||
|
# This specifies the maximum number of seconds that the NRPE daemon will
|
||||||
|
# allow plugins to finish executing before killing them off.
|
||||||
|
|
||||||
|
command_timeout=60
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# CONNECTION TIMEOUT
|
||||||
|
# This specifies the maximum number of seconds that the NRPE daemon will
|
||||||
|
# wait for a connection to be established before exiting. This is sometimes
|
||||||
|
# seen where a network problem stops the SSL being established even though
|
||||||
|
# all network sessions are connected. This causes the nrpe daemons to
|
||||||
|
# accumulate, eating system resources. Do not set this too low.
|
||||||
|
|
||||||
|
connection_timeout=300
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# WEAK RANDOM SEED OPTION
|
||||||
|
# This directive allows you to use SSL even if your system does not have
|
||||||
|
# a /dev/random or /dev/urandom (on purpose or because the necessary patches
|
||||||
|
# were not applied). The random number generator will be seeded from a file
|
||||||
|
# which is either a file pointed to by the environment valiable $RANDFILE
|
||||||
|
# or $HOME/.rnd. If neither exists, the pseudo random number generator will
|
||||||
|
# be initialized and a warning will be issued.
|
||||||
|
# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness
|
||||||
|
|
||||||
|
#allow_weak_random_seed=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# SSL/TLS OPTIONS
|
||||||
|
# These directives allow you to specify how to use SSL/TLS.
|
||||||
|
|
||||||
|
# SSL VERSION
|
||||||
|
# This can be any of: SSLv2 (only use SSLv2), SSLv2+ (use any version),
|
||||||
|
# SSLv3 (only use SSLv3), SSLv3+ (use SSLv3 or above), TLSv1 (only use
|
||||||
|
# TLSv1), TLSv1+ (use TLSv1 or above), TLSv1.1 (only use TLSv1.1),
|
||||||
|
# TLSv1.1+ (use TLSv1.1 or above), TLSv1.2 (only use TLSv1.2),
|
||||||
|
# TLSv1.2+ (use TLSv1.2 or above)
|
||||||
|
# If an "or above" version is used, the best will be negotiated. So if both
|
||||||
|
# ends are able to do TLSv1.2 and use specify SSLv2, you will get TLSv1.2.
|
||||||
|
# If you are using openssl 1.1.0 or above, the SSLv2 options are not available.
|
||||||
|
|
||||||
|
#ssl_version=SSLv2+
|
||||||
|
|
||||||
|
# SSL USE ADH
|
||||||
|
# This is for backward compatibility and is DEPRECATED. Set to 1 to enable
|
||||||
|
# ADH or 2 to require ADH. 1 is currently the default but will be changed
|
||||||
|
# in a later version.
|
||||||
|
|
||||||
|
#ssl_use_adh=1
|
||||||
|
|
||||||
|
# SSL CIPHER LIST
|
||||||
|
# This lists which ciphers can be used. For backward compatibility, this
|
||||||
|
# defaults to 'ssl_cipher_list=ALL:!MD5:@STRENGTH' for < OpenSSL 1.1.0,
|
||||||
|
# and 'ssl_cipher_list=ALL:!MD5:@STRENGTH:@SECLEVEL=0' for OpenSSL 1.1.0 and
|
||||||
|
# greater.
|
||||||
|
|
||||||
|
#ssl_cipher_list=ALL:!MD5:@STRENGTH
|
||||||
|
#ssl_cipher_list=ALL:!MD5:@STRENGTH:@SECLEVEL=0
|
||||||
|
#ssl_cipher_list=ALL:!aNULL:!eNULL:!SSLv2:!LOW:!EXP:!RC4:!MD5:@STRENGTH
|
||||||
|
|
||||||
|
# SSL Certificate and Private Key Files
|
||||||
|
|
||||||
|
#ssl_cacert_file=/etc/ssl/servercerts/ca-cert.pem
|
||||||
|
#ssl_cert_file=/etc/ssl/servercerts/nagios-cert.pem
|
||||||
|
#ssl_privatekey_file=/etc/ssl/servercerts/nagios-key.pem
|
||||||
|
|
||||||
|
# SSL USE CLIENT CERTS
|
||||||
|
# This options determines client certificate usage.
|
||||||
|
# Values: 0 = Don't ask for or require client certificates (default)
|
||||||
|
# 1 = Ask for client certificates
|
||||||
|
# 2 = Require client certificates
|
||||||
|
|
||||||
|
#ssl_client_certs=0
|
||||||
|
|
||||||
|
# SSL LOGGING
|
||||||
|
# This option determines which SSL messages are send to syslog. OR values
|
||||||
|
# together to specify multiple options.
|
||||||
|
|
||||||
|
# Values: 0x00 (0) = No additional logging (default)
|
||||||
|
# 0x01 (1) = Log startup SSL/TLS parameters
|
||||||
|
# 0x02 (2) = Log remote IP address
|
||||||
|
# 0x04 (4) = Log SSL/TLS version of connections
|
||||||
|
# 0x08 (8) = Log which cipher is being used for the connection
|
||||||
|
# 0x10 (16) = Log if client has a certificate
|
||||||
|
# 0x20 (32) = Log details of client's certificate if it has one
|
||||||
|
# -1 or 0xff or 0x2f = All of the above
|
||||||
|
|
||||||
|
#ssl_logging=0x00
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# NASTY METACHARACTERS
|
||||||
|
# This option allows you to override the list of characters that cannot
|
||||||
|
# be passed to the NRPE daemon.
|
||||||
|
|
||||||
|
# nasty_metachars=|`&><'\\[]{};\r\n
|
||||||
|
|
||||||
|
# This option allows you to enable or disable logging error messages to the syslog facilities.
|
||||||
|
# If this option is not set, the error messages will be logged.
|
||||||
|
disable_syslog=0
|
||||||
|
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
# Command definitions that this daemon will run. Definitions
|
||||||
|
# are in the following format:
|
||||||
|
#
|
||||||
|
# command[<command_name>]=<command_line>
|
||||||
|
#
|
||||||
|
# When the daemon receives a request to return the results of <command_name>
|
||||||
|
# it will execute the command specified by the <command_line> argument.
|
||||||
|
#
|
||||||
|
# Unlike Nagios, the command line cannot contain macros - it must be
|
||||||
|
# typed exactly as it should be executed.
|
||||||
|
#
|
||||||
|
# Note: Any plugins that are used in the command lines must reside
|
||||||
|
# on the machine that this daemon is running on! The examples below
|
||||||
|
# assume that you have plugins installed in a /usr/local/nagios/libexec
|
||||||
|
# directory. Also note that you will have to modify the definitions below
|
||||||
|
# to match the argument format the plugins expect. Remember, these are
|
||||||
|
# examples only!
|
||||||
|
|
||||||
|
|
||||||
|
# The following examples use hardcoded command arguments...
|
||||||
|
# This is by far the most secure method of using NRPE
|
||||||
|
|
||||||
|
#command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
|
||||||
|
#command[check_load]=/usr/lib/nagios/plugins/check_load -r -w .15,.10,.05 -c .30,.25,.20
|
||||||
|
#command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
|
||||||
|
#command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
|
||||||
|
#command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200
|
||||||
|
|
||||||
|
|
||||||
|
# The following examples allow user-supplied arguments and can
|
||||||
|
# only be used if the NRPE daemon was compiled with support for
|
||||||
|
# command arguments *AND* the dont_blame_nrpe directive in this
|
||||||
|
# config file is set to '1'. This poses a potential security risk, so
|
||||||
|
# make sure you read the SECURITY file before doing this.
|
||||||
|
|
||||||
|
### MISC SYSTEM METRICS ###
|
||||||
|
#command[check_users]=/usr/lib/nagios/plugins/check_users $ARG1$
|
||||||
|
#command[check_load]=/usr/lib/nagios/plugins/check_load $ARG1$
|
||||||
|
#command[check_disk]=/usr/lib/nagios/plugins/check_disk $ARG1$
|
||||||
|
#command[check_swap]=/usr/lib/nagios/plugins/check_swap $ARG1$
|
||||||
|
#command[check_cpu_stats]=/usr/lib/nagios/plugins/check_cpu_stats.sh $ARG1$
|
||||||
|
#command[check_mem]=/usr/lib/nagios/plugins/custom_check_mem -n $ARG1$
|
||||||
|
|
||||||
|
### GENERIC SERVICES ###
|
||||||
|
#command[check_init_service]=sudo /usr/lib/nagios/plugins/check_init_service $ARG1$
|
||||||
|
#command[check_services]=/usr/lib/nagios/plugins/check_services -p $ARG1$
|
||||||
|
|
||||||
|
### SYSTEM UPDATES ###
|
||||||
|
#command[check_yum]=/usr/lib/nagios/plugins/check_yum
|
||||||
|
#command[check_apt]=/usr/lib/nagios/plugins/check_apt
|
||||||
|
|
||||||
|
### PROCESSES ###
|
||||||
|
#command[check_all_procs]=/usr/lib/nagios/plugins/custom_check_procs
|
||||||
|
#command[check_procs]=/usr/lib/nagios/plugins/check_procs $ARG1$
|
||||||
|
|
||||||
|
### OPEN FILES ###
|
||||||
|
#command[check_open_files]=/usr/lib/nagios/plugins/check_open_files.pl $ARG1$
|
||||||
|
|
||||||
|
### NETWORK CONNECTIONS ###
|
||||||
|
#command[check_netstat]=/usr/lib/nagios/plugins/check_netstat.pl -p $ARG1$ $ARG2$
|
||||||
|
|
||||||
|
### ASTERISK ###
|
||||||
|
#command[check_asterisk]=/usr/lib/nagios/plugins/check_asterisk.pl $ARG1$
|
||||||
|
#command[check_sip]=/usr/lib/nagios/plugins/check_sip $ARG1$
|
||||||
|
#command[check_asterisk_sip_peers]=sudo /usr/lib/nagios/plugins/check_asterisk_sip_peers.sh $ARG1$
|
||||||
|
#command[check_asterisk_version]=/usr/lib/nagios/plugins/nagisk.pl -c version
|
||||||
|
#command[check_asterisk_peers]=/usr/lib/nagios/plugins/nagisk.pl -c peers
|
||||||
|
#command[check_asterisk_channels]=/usr/lib/nagios/plugins/nagisk.pl -c channels
|
||||||
|
#command[check_asterisk_zaptel]=/usr/lib/nagios/plugins/nagisk.pl -c zaptel
|
||||||
|
#command[check_asterisk_span]=/usr/lib/nagios/plugins/nagisk.pl -c span -s 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# INCLUDE CONFIG FILE
|
||||||
|
# This directive allows you to include definitions from an external config file.
|
||||||
|
|
||||||
|
#include=<somefile.cfg>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# INCLUDE CONFIG DIRECTORY
|
||||||
|
# This directive allows you to include definitions from config files (with a
|
||||||
|
# .cfg extension) in one or more directories (with recursion).
|
||||||
|
|
||||||
|
#include_dir=<somedirectory>
|
||||||
|
#include_dir=<someotherdirectory>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# local configuration:
|
||||||
|
# if you'd prefer, you can instead place directives here
|
||||||
|
|
||||||
|
include=/etc/nagios/nrpe_local.cfg
|
||||||
|
|
||||||
|
# you can place your config snipplets into nrpe.d/
|
||||||
|
# only snipplets ending in .cfg will get included
|
||||||
|
|
||||||
|
include_dir=/etc/nagios/nrpe.d/
|
||||||
|
|
||||||
|
|
||||||
|
# KEEP ENVIRONMENT VARIABLES
|
||||||
|
# This directive allows you to retain specific variables from the environment
|
||||||
|
# when starting the NRPE daemon.
|
||||||
|
|
||||||
|
#keep_env_vars=NRPE_MULTILINESUPPORT,NRPE_PROGRAMVERSION
|
|
@ -0,0 +1,40 @@
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
#
|
||||||
|
# Command definitions that this daemon will run. Definitions
|
||||||
|
# are in the following format:
|
||||||
|
#
|
||||||
|
# command[<command_name>]=<command_line>
|
||||||
|
#
|
||||||
|
# When the daemon receives a request to return the results of <command_name>
|
||||||
|
# it will execute the command specified by the <command_line> argument.
|
||||||
|
|
||||||
|
# Below are some examples using static arguments:
|
||||||
|
#
|
||||||
|
# command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
|
||||||
|
# command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
|
||||||
|
# command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200
|
||||||
|
# command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
|
||||||
|
# command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
|
||||||
|
|
||||||
|
# If you enable command argument processing (dont_blame_nrpe, above) you may
|
||||||
|
# include variables in the command definition that will be substituted for the
|
||||||
|
# values given by the client. Variables have the format $varname$, and are
|
||||||
|
# compatible with NRPE or Nagios arguments of the form $ARGn$.
|
||||||
|
#
|
||||||
|
# Here are some examples using argument passing, with NRPE-compatible names:
|
||||||
|
#
|
||||||
|
# command[check_disk]=/usr/lib/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||||
|
# command[check_load]=/usr/lib/nagios/plugins/check_load -w $ARG1$ -c $ARG2$
|
||||||
|
# command[check_procs]=/usr/lib/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
|
||||||
|
# command[check_users]=/usr/lib/nagios/plugins/check_users -w $ARG1$ -c $ARG2$
|
||||||
|
#
|
||||||
|
# And here are the same examples using arbitrary variable names:
|
||||||
|
#
|
||||||
|
# command[check_disk]=/usr/lib/nagios/plugins/check_disk -w $warn$ -c $crit$ -p $path$
|
||||||
|
# command[check_load]=/usr/lib/nagios/plugins/check_load -w $warn$ -c $crit$
|
||||||
|
# command[check_procs]=/usr/lib/nagios/plugins/check_procs -w $warn$ -c $crit$ -s $state$
|
||||||
|
# command[check_users]=/usr/lib/nagios/plugins/check_users -w $warn$ -c $crit$
|
||||||
|
|
||||||
|
command[check_docker_container_status]=/usr/lib/nagios/plugins/check_docker --status running --containers $ARG1$
|
||||||
|
command[check_docker_container_cpu]=/usr/lib/nagios/plugins/check_docker --cpu $ARG2$:$ARG3$ --containers $ARG1$
|
||||||
|
command[check_docker_container_memory]=/usr/lib/nagios/plugins/check_docker --memory $ARG2$:$ARG3$:% --containers $ARG1$
|
|
@ -0,0 +1,39 @@
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
#
|
||||||
|
# Command definitions that this daemon will run. Definitions
|
||||||
|
# are in the following format:
|
||||||
|
#
|
||||||
|
# command[<command_name>]=<command_line>
|
||||||
|
#
|
||||||
|
# When the daemon receives a request to return the results of <command_name>
|
||||||
|
# it will execute the command specified by the <command_line> argument.
|
||||||
|
|
||||||
|
# Below are some examples using static arguments:
|
||||||
|
#
|
||||||
|
# command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
|
||||||
|
# command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
|
||||||
|
# command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200
|
||||||
|
# command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
|
||||||
|
# command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
|
||||||
|
|
||||||
|
# If you enable command argument processing (dont_blame_nrpe, above) you may
|
||||||
|
# include variables in the command definition that will be substituted for the
|
||||||
|
# values given by the client. Variables have the format $varname$, and are
|
||||||
|
# compatible with NRPE or Nagios arguments of the form $ARGn$.
|
||||||
|
#
|
||||||
|
# Here are some examples using argument passing, with NRPE-compatible names:
|
||||||
|
#
|
||||||
|
# command[check_disk]=/usr/lib/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||||
|
# command[check_load]=/usr/lib/nagios/plugins/check_load -w $ARG1$ -c $ARG2$
|
||||||
|
# command[check_procs]=/usr/lib/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
|
||||||
|
# command[check_users]=/usr/lib/nagios/plugins/check_users -w $ARG1$ -c $ARG2$
|
||||||
|
#
|
||||||
|
# And here are the same examples using arbitrary variable names:
|
||||||
|
#
|
||||||
|
|
||||||
|
command[check_proc_docker]=/usr/lib/nagios/plugins/check_procs -c 1:1 -C dockerd
|
||||||
|
command[check_disk]=/usr/lib/nagios/plugins/check_disk -w $warn$ -c $crit$ -p $path$
|
||||||
|
command[check_load]=/usr/lib/nagios/plugins/check_load -w $warn$ -c $crit$
|
||||||
|
command[check_procs]=/usr/lib/nagios/plugins/check_procs -w $warn$ -c $crit$ -s $state$
|
||||||
|
command[check_users]=/usr/lib/nagios/plugins/check_users -w $warn$ -c $crit$
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
if [[ $EUID -ne 0 ]]; then
|
||||||
|
echo "You must be a root user" 2>&1
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
apt update
|
||||||
|
apt install -y nagios-nrpe-server monitoring-plugins-basic monitoring-plugins-common monitoring-plugins systemd
|
||||||
|
|
||||||
|
[[ ! -d "/etc/nagios/nrpe.d" ]] && mkdir -p /etc/nagios/nrpe.d
|
||||||
|
|
||||||
|
cp -v etc/nagios/nrpe.cfg /etc/nagios/
|
||||||
|
cp -rv etc/nagios/nrpe.d/* /etc/nagios/nrpe.d/
|
||||||
|
cp -rv usr/lib/nagios/plugins/* /usr/lib/nagios/plugins/
|
||||||
|
|
||||||
|
if [[ -f "etc/nagios/nrpe_$(hostname -s).cfg" ]]; then
|
||||||
|
cp -v etc/nagios/nrpe_$(hostname -s).cfg /etc/nagios/
|
||||||
|
echo "include=/etc/nagios/nrpe_$(hostname -s).cfg" >> /etc/nagios/nrpe.cfg
|
||||||
|
fi
|
||||||
|
|
||||||
|
usermod -a -G docker nagios
|
||||||
|
systemctl restart nagios-nrpe-server.service
|
|
@ -0,0 +1,996 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# logging.basicConfig(level=logging.DEBUG)
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
import stat
|
||||||
|
import traceback
|
||||||
|
from collections import deque, namedtuple, UserDict, defaultdict
|
||||||
|
from concurrent import futures
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from functools import lru_cache
|
||||||
|
from http.client import HTTPConnection
|
||||||
|
from sys import argv
|
||||||
|
from urllib import request
|
||||||
|
from urllib.error import HTTPError, URLError
|
||||||
|
from urllib.request import AbstractHTTPHandler, HTTPHandler, HTTPSHandler, OpenerDirector, HTTPRedirectHandler, \
|
||||||
|
Request, HTTPBasicAuthHandler
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
__author__ = 'Tim Laurence'
|
||||||
|
__copyright__ = "Copyright 2019"
|
||||||
|
__credits__ = ['Tim Laurence']
|
||||||
|
__license__ = "GPL"
|
||||||
|
__version__ = "2.2.2"
|
||||||
|
|
||||||
|
'''
|
||||||
|
nrpe compatible check for docker containers.
|
||||||
|
|
||||||
|
Requires Python 3
|
||||||
|
|
||||||
|
Note: I really would have preferred to have used requests for all the network connections but that would have added a
|
||||||
|
dependency.
|
||||||
|
'''
|
||||||
|
|
||||||
|
DEFAULT_SOCKET = '/var/run/docker.sock'
|
||||||
|
DEFAULT_TIMEOUT = 10.0
|
||||||
|
DEFAULT_PORT = 2375
|
||||||
|
DEFAULT_MEMORY_UNITS = 'B'
|
||||||
|
DEFAULT_HEADERS = [('Accept', 'application/vnd.docker.distribution.manifest.v2+json')]
|
||||||
|
DEFAULT_PUBLIC_REGISTRY = 'registry-1.docker.io'
|
||||||
|
|
||||||
|
# The second value is the power to raise the base to.
|
||||||
|
UNIT_ADJUSTMENTS_TEMPLATE = {
|
||||||
|
'%': 0,
|
||||||
|
'B': 0,
|
||||||
|
'KB': 1,
|
||||||
|
'MB': 2,
|
||||||
|
'GB': 3,
|
||||||
|
'TB': 4
|
||||||
|
}
|
||||||
|
unit_adjustments = None
|
||||||
|
|
||||||
|
# Reduce message to a single OK unless a checks fail.
|
||||||
|
no_ok = False
|
||||||
|
|
||||||
|
# Suppress performance data reporting
|
||||||
|
no_performance = False
|
||||||
|
|
||||||
|
OK_RC = 0
|
||||||
|
WARNING_RC = 1
|
||||||
|
CRITICAL_RC = 2
|
||||||
|
UNKNOWN_RC = 3
|
||||||
|
|
||||||
|
# These hold the final results
|
||||||
|
rc = -1
|
||||||
|
messages = []
|
||||||
|
performance_data = []
|
||||||
|
|
||||||
|
ImageName = namedtuple('ImageName', "registry name tag full_name")
|
||||||
|
|
||||||
|
|
||||||
|
class ThresholdSpec(UserDict):
|
||||||
|
def __init__(self, warn, crit, units=''):
|
||||||
|
super().__init__(warn=warn, crit=crit, units=units)
|
||||||
|
|
||||||
|
def __getattr__(self, item):
|
||||||
|
if item in ('warn', 'crit', 'units'):
|
||||||
|
return self.data[item]
|
||||||
|
return super().__getattr__(item)
|
||||||
|
|
||||||
|
|
||||||
|
# How much threading can we do? We are generally not CPU bound so I am using this a worse case cap
|
||||||
|
DEFAULT_PARALLELISM = 10
|
||||||
|
|
||||||
|
# Holds list of all threads
|
||||||
|
threads = []
|
||||||
|
|
||||||
|
# This is used during testing
|
||||||
|
DISABLE_THREADING = False
|
||||||
|
|
||||||
|
|
||||||
|
# Hacked up urllib to handle sockets
|
||||||
|
#############################################################################################
|
||||||
|
# Docker runs a http connection over a socket. http.client is knows how to deal with these
|
||||||
|
# but lacks some niceties. Urllib wraps that and makes up for some of the deficiencies but
|
||||||
|
# cannot fix the fact http.client can't read from socket files. In order to take advantage of
|
||||||
|
# urllib and http.client's capabilities the class below tweaks HttpConnection and passes it
|
||||||
|
# to urllib registering for socket:// connections
|
||||||
|
|
||||||
|
# This is all side effect so excluding coverage
|
||||||
|
class SocketFileHandler(AbstractHTTPHandler):
|
||||||
|
class SocketFileToHttpConnectionAdaptor(HTTPConnection): # pragma: no cover
|
||||||
|
def __init__(self, socket_file, timeout=DEFAULT_TIMEOUT):
|
||||||
|
super().__init__(host='', port=0, timeout=timeout)
|
||||||
|
self.socket_file = socket_file
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None)
|
||||||
|
self.sock.settimeout(self.timeout)
|
||||||
|
self.sock.connect(self.socket_file)
|
||||||
|
|
||||||
|
def socket_open(self, req):
|
||||||
|
socket_file, path = req.selector.split(':', 1)
|
||||||
|
req.host = socket_file
|
||||||
|
req.selector = path
|
||||||
|
return self.do_open(self.SocketFileToHttpConnectionAdaptor, req)
|
||||||
|
|
||||||
|
|
||||||
|
# Tokens are not cached because I expect the callers to cache the responses
|
||||||
|
class Oauth2TokenAuthHandler(HTTPBasicAuthHandler):
|
||||||
|
auth_failure_tracker = defaultdict(int)
|
||||||
|
|
||||||
|
def http_response(self, request, response):
|
||||||
|
code, hdrs = response.code, response.headers
|
||||||
|
|
||||||
|
www_authenticate_header = response.headers.get('www-authenticate', None)
|
||||||
|
if code == 401 and www_authenticate_header:
|
||||||
|
scheme = www_authenticate_header.split()[0]
|
||||||
|
if scheme.lower() == 'bearer':
|
||||||
|
return self.process_oauth2(request, response, www_authenticate_header)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
https_response = http_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_outh2_token(www_authenticate_header):
|
||||||
|
auth_fields = dict(re.findall(r"""(?:(?P<key>[^ ,=]+)="([^"]+)")""", www_authenticate_header))
|
||||||
|
|
||||||
|
auth_url = "{realm}?scope={scope}&service={service}".format(
|
||||||
|
realm=auth_fields['realm'],
|
||||||
|
scope=auth_fields['scope'],
|
||||||
|
service=auth_fields['service'],
|
||||||
|
)
|
||||||
|
token_request = Request(auth_url)
|
||||||
|
token_request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
|
||||||
|
token_response = request.urlopen(token_request)
|
||||||
|
return process_urllib_response(token_response)['token']
|
||||||
|
|
||||||
|
def process_oauth2(self, request, response, www_authenticate_header):
|
||||||
|
|
||||||
|
# This keeps infinite auth loops from happening
|
||||||
|
full_url = request.full_url
|
||||||
|
self.auth_failure_tracker[full_url] += 1
|
||||||
|
if self.auth_failure_tracker[full_url] > 1:
|
||||||
|
raise HTTPError(full_url, 401, "Stopping Oauth2 failure loop for {}".format(full_url),
|
||||||
|
response.headers, response)
|
||||||
|
|
||||||
|
auth_token = self._get_outh2_token(www_authenticate_header)
|
||||||
|
|
||||||
|
request.add_unredirected_header('Authorization', 'Bearer ' + auth_token)
|
||||||
|
return self.parent.open(request, timeout=request.timeout)
|
||||||
|
|
||||||
|
|
||||||
|
better_urllib_get = OpenerDirector()
|
||||||
|
better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
|
||||||
|
better_urllib_get.add_handler(HTTPHandler())
|
||||||
|
better_urllib_get.add_handler(HTTPSHandler())
|
||||||
|
better_urllib_get.add_handler(HTTPRedirectHandler())
|
||||||
|
better_urllib_get.add_handler(SocketFileHandler())
|
||||||
|
better_urllib_get.add_handler(Oauth2TokenAuthHandler())
|
||||||
|
|
||||||
|
|
||||||
|
class RegistryError(Exception):
|
||||||
|
def __init__(self, response):
|
||||||
|
self.response_obj = response
|
||||||
|
|
||||||
|
|
||||||
|
# Util functions
|
||||||
|
#############################################################################################
|
||||||
|
def parse_thresholds(spec, include_units=True, units_required=True):
|
||||||
|
"""
|
||||||
|
Given a spec string break it up into ':' separated chunks. Convert strings to ints as it makes sense
|
||||||
|
|
||||||
|
:param spec: The threshold specification being parsed
|
||||||
|
:param include_units: Specifies that units should be processed and returned if present
|
||||||
|
:param units_required: Mark spec as invalid if the units are missing.
|
||||||
|
:return: A list containing the thresholds in order of warn, crit, and units(if included and present)
|
||||||
|
"""
|
||||||
|
parts = deque(spec.split(':'))
|
||||||
|
if not all(parts):
|
||||||
|
raise ValueError("Blanks are not allowed in a threshold specification: {}".format(spec))
|
||||||
|
|
||||||
|
# Warn
|
||||||
|
warn = int(parts.popleft())
|
||||||
|
# Crit
|
||||||
|
crit = int(parts.popleft())
|
||||||
|
|
||||||
|
units = ''
|
||||||
|
if include_units:
|
||||||
|
if len(parts):
|
||||||
|
# units
|
||||||
|
units = parts.popleft()
|
||||||
|
elif units_required:
|
||||||
|
raise ValueError("Missing units in {}".format(spec))
|
||||||
|
|
||||||
|
if len(parts) != 0:
|
||||||
|
raise ValueError("Too many threshold specifiers in {}".format(spec))
|
||||||
|
|
||||||
|
return ThresholdSpec(warn=warn, crit=crit, units=units)
|
||||||
|
|
||||||
|
|
||||||
|
def pretty_time(seconds):
|
||||||
|
remainder = seconds
|
||||||
|
result = []
|
||||||
|
if remainder > 24 * 60 * 60:
|
||||||
|
days, remainder = divmod(remainder, 24 * 60 * 60)
|
||||||
|
result.append("{}d".format(int(days)))
|
||||||
|
if remainder > 60 * 60:
|
||||||
|
hours, remainder = divmod(remainder, 60 * 60)
|
||||||
|
result.append("{}h".format(int(hours)))
|
||||||
|
if remainder > 60:
|
||||||
|
minutes, remainder = divmod(remainder, 60)
|
||||||
|
result.append("{}min".format(int(minutes)))
|
||||||
|
result.append("{}s".format(int(remainder)))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_numeric_thresholds(container, value, thresholds, name, short_name,
|
||||||
|
min=None, max=None, greater_than=True):
|
||||||
|
rounder = lambda x: round(x, 2)
|
||||||
|
|
||||||
|
INTEGER_UNITS = ['B', '%', '']
|
||||||
|
|
||||||
|
# Some units don't have decimal places
|
||||||
|
rounded_value = int(value) if thresholds.units in INTEGER_UNITS else rounder(value)
|
||||||
|
|
||||||
|
perf_string = "{container}_{short_name}={value}{units};{warn};{crit}".format(
|
||||||
|
container=container,
|
||||||
|
short_name=short_name,
|
||||||
|
value=rounded_value,
|
||||||
|
**thresholds)
|
||||||
|
if min is not None:
|
||||||
|
rounded_min = math.floor(min) if thresholds.units in INTEGER_UNITS else rounder(min)
|
||||||
|
perf_string += ';{}'.format(rounded_min)
|
||||||
|
if max is not None:
|
||||||
|
rounded_max = math.ceil(max) if thresholds.units in INTEGER_UNITS else rounder(max)
|
||||||
|
perf_string += ';{}'.format(rounded_max)
|
||||||
|
|
||||||
|
global performance_data
|
||||||
|
performance_data.append(perf_string)
|
||||||
|
|
||||||
|
if thresholds.units == 's':
|
||||||
|
nice_time = ' '.join(pretty_time(rounded_value)[:2])
|
||||||
|
results_str = "{} {} is {}".format(container, name, nice_time)
|
||||||
|
else:
|
||||||
|
results_str = "{} {} is {}{}".format(container, name, rounded_value, thresholds.units)
|
||||||
|
|
||||||
|
if greater_than:
|
||||||
|
comparator = lambda value, threshold: value >= threshold
|
||||||
|
else:
|
||||||
|
comparator = lambda value, threshold: value <= threshold
|
||||||
|
|
||||||
|
if comparator(value, thresholds.crit):
|
||||||
|
critical(results_str)
|
||||||
|
elif comparator(value, thresholds.warn):
|
||||||
|
warning(results_str)
|
||||||
|
else:
|
||||||
|
ok(results_str)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_url(url):
|
||||||
|
logger.debug("get_url: {}".format(url))
|
||||||
|
response = better_urllib_get.open(url, timeout=timeout)
|
||||||
|
logger.debug("get_url: {} {}".format(url, response.status))
|
||||||
|
return process_urllib_response(response), response.status
|
||||||
|
|
||||||
|
|
||||||
|
def process_urllib_response(response):
|
||||||
|
response_bytes = response.read()
|
||||||
|
body = response_bytes.decode('utf-8')
|
||||||
|
# logger.debug("BODY: {}".format(body))
|
||||||
|
return json.loads(body)
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_info(name):
|
||||||
|
content, _ = get_url(daemon + '/containers/{container}/json'.format(container=name))
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_image_info(name):
|
||||||
|
content, _ = get_url(daemon + '/images/{image}/json'.format(image=name))
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_state(container):
|
||||||
|
return get_container_info(container)['State']
|
||||||
|
|
||||||
|
|
||||||
|
def get_stats(container):
|
||||||
|
content, _ = get_url(daemon + '/containers/{container}/stats?stream=0'.format(container=container))
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_ps_name(name_list):
|
||||||
|
# Pick the name that starts with a '/' but doesn't contain a '/' and return that value
|
||||||
|
for name in name_list:
|
||||||
|
if '/' not in name[1:] and name[0] == '/':
|
||||||
|
return name[1:]
|
||||||
|
else:
|
||||||
|
raise NameError("Error when trying to identify 'ps' name in {}".format(name_list))
|
||||||
|
|
||||||
|
|
||||||
|
def get_containers(names, require_present):
|
||||||
|
containers_list, _ = get_url(daemon + '/containers/json?all=1')
|
||||||
|
|
||||||
|
all_container_names = set(get_ps_name(x['Names']) for x in containers_list)
|
||||||
|
|
||||||
|
if 'all' in names:
|
||||||
|
return all_container_names
|
||||||
|
|
||||||
|
filtered = set()
|
||||||
|
for matcher in names:
|
||||||
|
found = False
|
||||||
|
for candidate in all_container_names:
|
||||||
|
if re.match("^{}$".format(matcher), candidate):
|
||||||
|
filtered.add(candidate)
|
||||||
|
found = True
|
||||||
|
# If we don't find a container that matches out regex
|
||||||
|
if require_present and not found:
|
||||||
|
critical("No containers match {}".format(matcher))
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_image_id(container):
|
||||||
|
# find registry and tag
|
||||||
|
inspection = get_container_info(container)
|
||||||
|
return inspection['Image']
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_image_urls(container):
|
||||||
|
inspection = get_container_info(container)
|
||||||
|
image_id = inspection['Image']
|
||||||
|
image_info = get_image_info(image_id)
|
||||||
|
return image_info['RepoTags']
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_image_name_to_manifest_url(image_name, insecure_registries):
|
||||||
|
parsed_url = parse_image_name(image_name)
|
||||||
|
|
||||||
|
lower_insecure = [reg.lower() for reg in insecure_registries]
|
||||||
|
|
||||||
|
# Registry query url
|
||||||
|
scheme = 'http' if parsed_url.registry.lower() in lower_insecure else 'https'
|
||||||
|
url = '{scheme}://{registry}/v2/{image_name}/manifests/{image_tag}'.format(scheme=scheme,
|
||||||
|
registry=parsed_url.registry,
|
||||||
|
image_name=parsed_url.name,
|
||||||
|
image_tag=parsed_url.tag)
|
||||||
|
return url, parsed_url.registry
|
||||||
|
|
||||||
|
|
||||||
|
# Auth servers seem picky about being hit too hard. Can't figure out why. ;)
|
||||||
|
# As result it is best to single thread this check
|
||||||
|
# This is based on https://docs.docker.com/registry/spec/auth/token/#requesting-a-token
|
||||||
|
def get_digest_from_registry(url):
|
||||||
|
logger.debug("get_digest_from_registry")
|
||||||
|
# query registry
|
||||||
|
# TODO: Handle logging in if needed
|
||||||
|
registry_info, status_code = get_url(url=url)
|
||||||
|
|
||||||
|
if status_code != 200:
|
||||||
|
raise RegistryError(response=registry_info)
|
||||||
|
return registry_info['config'].get('digest', None)
|
||||||
|
|
||||||
|
|
||||||
|
def set_rc(new_rc):
|
||||||
|
global rc
|
||||||
|
rc = new_rc if new_rc > rc else rc
|
||||||
|
|
||||||
|
|
||||||
|
def ok(message):
|
||||||
|
set_rc(OK_RC)
|
||||||
|
messages.append('OK: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def warning(message):
|
||||||
|
set_rc(WARNING_RC)
|
||||||
|
messages.append('WARNING: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def critical(message):
|
||||||
|
set_rc(CRITICAL_RC)
|
||||||
|
messages.append('CRITICAL: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def unknown(message):
|
||||||
|
set_rc(UNKNOWN_RC)
|
||||||
|
messages.append('UNKNOWN: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def require_running(name):
|
||||||
|
def inner_decorator(func):
|
||||||
|
def wrapper(container, *args, **kwargs):
|
||||||
|
container_state = get_state(container)
|
||||||
|
state = normalize_state(container_state)
|
||||||
|
if state.lower() == "running":
|
||||||
|
func(container, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
# container is not running, can't perform check
|
||||||
|
critical('{container} is not "running", cannot check {check}"'.format(container=container,
|
||||||
|
check=name))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return inner_decorator
|
||||||
|
|
||||||
|
|
||||||
|
def multithread_execution(disable_threading=DISABLE_THREADING):
|
||||||
|
def inner_decorator(func):
|
||||||
|
def wrapper(container, *args, **kwargs):
|
||||||
|
if DISABLE_THREADING:
|
||||||
|
func(container, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
threads.append(parallel_executor.submit(func, container, *args, **kwargs))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return inner_decorator
|
||||||
|
|
||||||
|
|
||||||
|
def singlethread_execution(disable_threading=DISABLE_THREADING):
|
||||||
|
def inner_decorator(func):
|
||||||
|
def wrapper(container, *args, **kwargs):
|
||||||
|
if DISABLE_THREADING:
|
||||||
|
func(container, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
threads.append(serial_executor.submit(func, container, *args, **kwargs))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return inner_decorator
|
||||||
|
|
||||||
|
|
||||||
|
def parse_image_name(image_name):
|
||||||
|
"""
|
||||||
|
Parses image names into their constituent parts.
|
||||||
|
:param image_name:
|
||||||
|
:return: ImageName
|
||||||
|
"""
|
||||||
|
|
||||||
|
# These are based on information found here
|
||||||
|
# https://docs.docker.com/engine/reference/commandline/tag/#extended-description
|
||||||
|
# https://github.com/docker/distribution/blob/master/reference/regexp.go
|
||||||
|
host_segment_re = '[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?'
|
||||||
|
hostname_re = r'({host_segment}\.)+{host_segment}'.format(host_segment=host_segment_re)
|
||||||
|
registry_re = r'((?P<registry>({hostname_re}(:\d+)?|{host_segment_re}:\d+))/)'.format(
|
||||||
|
host_segment_re=host_segment_re, hostname_re=hostname_re)
|
||||||
|
name_component_ends_re = '[a-z0-9]'
|
||||||
|
name_component_middle_re = '[a-z0-9._-]' # Ignoring spec limit of two _
|
||||||
|
name_component_re = '({end}{middle}*{end}|{end})'.format(end=name_component_ends_re,
|
||||||
|
middle=name_component_middle_re)
|
||||||
|
image_name_re = "(?P<image_name>({name_component}/)*{name_component})".format(name_component=name_component_re)
|
||||||
|
image_tag_re = '(?P<image_tag>[a-zA-Z0-9_][a-zA-Z0-9_.-]*)'
|
||||||
|
full_re = '^{registry}?{image_name}(:{image_tag})?$'.format(registry=registry_re, image_name=image_name_re,
|
||||||
|
image_tag=image_tag_re)
|
||||||
|
parsed = re.match(full_re, image_name)
|
||||||
|
|
||||||
|
registry = parsed.group('registry') if parsed.group('registry') else DEFAULT_PUBLIC_REGISTRY
|
||||||
|
|
||||||
|
image_name = parsed.group('image_name')
|
||||||
|
image_name = image_name if '/' in image_name or registry != DEFAULT_PUBLIC_REGISTRY else 'library/' + image_name
|
||||||
|
|
||||||
|
image_tag = parsed.group('image_tag')
|
||||||
|
image_tag = image_tag if image_tag else 'latest'
|
||||||
|
|
||||||
|
full_image_name = "{registry}/{image_name}:{image_tag}".format(
|
||||||
|
registry=registry,
|
||||||
|
image_name=image_name,
|
||||||
|
image_tag=image_tag)
|
||||||
|
|
||||||
|
return ImageName(registry=registry, name=image_name, tag=image_tag, full_name=full_image_name)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_state(status_info):
|
||||||
|
# Ugh, docker used to report state in as silly way then they figured out how to do it better.
|
||||||
|
# This tries the simpler new way and if that doesn't work fails back to the old way
|
||||||
|
|
||||||
|
# On new docker engines the status holds whatever the current state is, running, stopped, paused, etc.
|
||||||
|
if "Status" in status_info:
|
||||||
|
return status_info['Status']
|
||||||
|
|
||||||
|
status = 'Exited'
|
||||||
|
if status_info["Restarting"]:
|
||||||
|
status = 'Restarting'
|
||||||
|
elif status_info["Paused"]:
|
||||||
|
status = 'Paused'
|
||||||
|
elif status_info["Dead"]:
|
||||||
|
status = 'Dead'
|
||||||
|
elif status_info["Running"]:
|
||||||
|
return "Running"
|
||||||
|
return status
|
||||||
|
|
||||||
|
|
||||||
|
# Checks
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running(name='memory')
|
||||||
|
def check_memory(container, thresholds):
|
||||||
|
if not thresholds.units in unit_adjustments:
|
||||||
|
unknown("Memory units must be one of {}".format(list(unit_adjustments.keys())))
|
||||||
|
return
|
||||||
|
|
||||||
|
inspection = get_stats(container)
|
||||||
|
|
||||||
|
# Subtracting cache to match what `docker stats` does.
|
||||||
|
adjusted_usage = inspection['memory_stats']['usage'] - inspection['memory_stats']['stats']['total_cache']
|
||||||
|
if thresholds.units == '%':
|
||||||
|
max = 100
|
||||||
|
usage = int(100 * adjusted_usage / inspection['memory_stats']['limit'])
|
||||||
|
else:
|
||||||
|
max = inspection['memory_stats']['limit'] / unit_adjustments[thresholds.units]
|
||||||
|
usage = adjusted_usage / unit_adjustments[thresholds.units]
|
||||||
|
|
||||||
|
evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='memory',
|
||||||
|
short_name='mem', min=0, max=max)
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
def check_status(container, desired_state):
|
||||||
|
normized_desired_state = desired_state.lower()
|
||||||
|
normalized_state = normalize_state(get_state(container)).lower()
|
||||||
|
if normized_desired_state != normalized_state:
|
||||||
|
critical("{} state is not {}".format(container, desired_state))
|
||||||
|
return
|
||||||
|
ok("{} status is {}".format(container, desired_state))
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('health')
|
||||||
|
def check_health(container):
|
||||||
|
state = get_state(container)
|
||||||
|
if "Health" in state and "Status" in state["Health"]:
|
||||||
|
health = state["Health"]["Status"]
|
||||||
|
message = "{} is {}".format(container, health)
|
||||||
|
if health == 'healthy':
|
||||||
|
ok(message)
|
||||||
|
elif health == 'unhealthy':
|
||||||
|
critical(message)
|
||||||
|
else:
|
||||||
|
unknown(message)
|
||||||
|
else:
|
||||||
|
unknown('{} has no health check data'.format(container))
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('uptime')
|
||||||
|
def check_uptime(container, thresholds):
|
||||||
|
inspection = get_container_info(container)['State']['StartedAt']
|
||||||
|
only_secs = inspection[0:19]
|
||||||
|
start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S")
|
||||||
|
start = start.replace(tzinfo=timezone.utc)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
uptime = (now - start).total_seconds()
|
||||||
|
|
||||||
|
graph_padding = 2
|
||||||
|
thresholds.units = 's'
|
||||||
|
evaluate_numeric_thresholds(container=container, value=uptime, thresholds=thresholds, name='uptime',
|
||||||
|
short_name='up', min=0, max=graph_padding, greater_than=False)
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
def check_image_age(container, thresholds):
|
||||||
|
container_image = get_container_info(container)['Image']
|
||||||
|
image_created = get_image_info(container_image)['Created']
|
||||||
|
only_secs = image_created[0:19]
|
||||||
|
start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S")
|
||||||
|
start = start.replace(tzinfo=timezone.utc)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
image_age = (now - start).days
|
||||||
|
|
||||||
|
graph_padding = 2
|
||||||
|
thresholds.units = 'd'
|
||||||
|
evaluate_numeric_thresholds(container=container, value=image_age, thresholds=thresholds, name='image_age',
|
||||||
|
short_name='age', min=0, max=graph_padding, greater_than=True)
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('restarts')
|
||||||
|
def check_restarts(container, thresholds):
|
||||||
|
inspection = get_container_info(container)
|
||||||
|
|
||||||
|
restarts = int(inspection['RestartCount'])
|
||||||
|
graph_padding = 2
|
||||||
|
evaluate_numeric_thresholds(container=container, value=restarts, thresholds=thresholds, name='restarts',
|
||||||
|
short_name='re', min=0, max=graph_padding)
|
||||||
|
|
||||||
|
|
||||||
|
@singlethread_execution()
|
||||||
|
def check_version(container, insecure_registries):
|
||||||
|
image_id = get_container_image_id(container)
|
||||||
|
logger.debug("Local container image ID: {}".format(image_id))
|
||||||
|
if image_id is None:
|
||||||
|
unknown('Checksum missing for "{}", try doing a pull'.format(container))
|
||||||
|
return
|
||||||
|
|
||||||
|
image_urls = get_container_image_urls(container=container)
|
||||||
|
if len(image_urls) > 1:
|
||||||
|
unknown('"{}" has multiple tags/names. Unsure which one to use to check the version.'.format(container))
|
||||||
|
return
|
||||||
|
elif len(image_urls) == 0:
|
||||||
|
unknown('"{}" has last no repository tag. Is this anywhere else?'.format(container))
|
||||||
|
return
|
||||||
|
|
||||||
|
url, registry = normalize_image_name_to_manifest_url(image_urls[0], insecure_registries)
|
||||||
|
logger.debug("Looking up image digest here {}".format(url))
|
||||||
|
try:
|
||||||
|
registry_hash = get_digest_from_registry(url)
|
||||||
|
except URLError as e:
|
||||||
|
if hasattr(e.reason, 'reason') and e.reason.reason == 'UNKNOWN_PROTOCOL':
|
||||||
|
unknown(
|
||||||
|
"TLS error connecting to registry {} for {}, should you use the '--insecure-registry' flag?" \
|
||||||
|
.format(registry, container))
|
||||||
|
return
|
||||||
|
elif hasattr(e.reason, 'strerror') and e.reason.strerror == 'nodename nor servname provided, or not known':
|
||||||
|
unknown(
|
||||||
|
"Cannot reach registry for {} at {}".format(container, url))
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
except RegistryError as e:
|
||||||
|
unknown("Cannot check version, couldn't retrieve digest for {} while checking {}.".format(container, url))
|
||||||
|
return
|
||||||
|
logger.debug("Image digests, local={} remote={}".format(image_id, registry_hash))
|
||||||
|
if registry_hash == image_id:
|
||||||
|
ok("{}'s version matches registry".format(container))
|
||||||
|
return
|
||||||
|
critical("{}'s version does not match registry".format(container))
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_cpu_capacity_precentage(info, stats):
|
||||||
|
host_config = info['HostConfig']
|
||||||
|
|
||||||
|
if 'online_cpus' in stats['cpu_stats']:
|
||||||
|
num_cpus = stats['cpu_stats']['online_cpus']
|
||||||
|
else:
|
||||||
|
num_cpus = len(stats['cpu_stats']['cpu_usage']['percpu_usage'])
|
||||||
|
|
||||||
|
# Identify limit system being used
|
||||||
|
# --cpus
|
||||||
|
if 'NanoCpus' in host_config and host_config['NanoCpus'] != 0:
|
||||||
|
period = 1000000000
|
||||||
|
quota = host_config['NanoCpus']
|
||||||
|
# --cpu-quota
|
||||||
|
elif 'CpuQuota' in host_config and host_config['CpuQuota'] != 0:
|
||||||
|
period = 100000 if host_config['CpuPeriod'] == 0 else host_config['CpuPeriod']
|
||||||
|
quota = host_config['CpuQuota']
|
||||||
|
# unlimited
|
||||||
|
else:
|
||||||
|
period = 1
|
||||||
|
quota = num_cpus
|
||||||
|
|
||||||
|
if period * num_cpus < quota:
|
||||||
|
# This handles the case where the quota is actually bigger than amount available by all the cpus.
|
||||||
|
available_limit_ratio = 1
|
||||||
|
else:
|
||||||
|
available_limit_ratio = (period * num_cpus) / quota
|
||||||
|
|
||||||
|
cpu_delta = stats['cpu_stats']['cpu_usage']['total_usage'] - stats['precpu_stats']['cpu_usage']['total_usage']
|
||||||
|
system_delta = stats['cpu_stats']['system_cpu_usage'] - stats['precpu_stats']['system_cpu_usage']
|
||||||
|
usage = (cpu_delta / system_delta) * available_limit_ratio
|
||||||
|
usage = round(usage * 100, 0)
|
||||||
|
return usage
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('cpu')
|
||||||
|
def check_cpu(container, thresholds):
|
||||||
|
info = get_container_info(container)
|
||||||
|
|
||||||
|
stats = get_stats(container=container)
|
||||||
|
|
||||||
|
usage = calculate_cpu_capacity_precentage(info=info, stats=stats)
|
||||||
|
|
||||||
|
max = 100
|
||||||
|
thresholds.units = '%'
|
||||||
|
evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='cpu', short_name='cpu',
|
||||||
|
min=0, max=max)
|
||||||
|
|
||||||
|
|
||||||
|
def process_args(args):
|
||||||
|
parser = argparse.ArgumentParser(description='Check docker containers.')
|
||||||
|
|
||||||
|
# Connect to local socket or ip address
|
||||||
|
connection_group = parser.add_mutually_exclusive_group()
|
||||||
|
connection_group.add_argument('--connection',
|
||||||
|
dest='connection',
|
||||||
|
action='store',
|
||||||
|
default=DEFAULT_SOCKET,
|
||||||
|
type=str,
|
||||||
|
metavar='[/<path to>/docker.socket|<ip/host address>:<port>]',
|
||||||
|
help='Where to find docker daemon socket. (default: %(default)s)')
|
||||||
|
|
||||||
|
connection_group.add_argument('--secure-connection',
|
||||||
|
dest='secure_connection',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='[<ip/host address>:<port>]',
|
||||||
|
help='Where to find TLS protected docker daemon socket.')
|
||||||
|
|
||||||
|
base_group = parser.add_mutually_exclusive_group()
|
||||||
|
base_group.add_argument('--binary_units',
|
||||||
|
dest='units_base',
|
||||||
|
action='store_const',
|
||||||
|
const=1024,
|
||||||
|
help='Use a base of 1024 when doing calculations of KB, MB, GB, & TB (This is default)')
|
||||||
|
|
||||||
|
base_group.add_argument('--decimal_units',
|
||||||
|
dest='units_base',
|
||||||
|
action='store_const',
|
||||||
|
const=1000,
|
||||||
|
help='Use a base of 1000 when doing calculations of KB, MB, GB, & TB')
|
||||||
|
parser.set_defaults(units_base=1024)
|
||||||
|
|
||||||
|
# Connection timeout
|
||||||
|
parser.add_argument('--timeout',
|
||||||
|
dest='timeout',
|
||||||
|
action='store',
|
||||||
|
type=float,
|
||||||
|
default=DEFAULT_TIMEOUT,
|
||||||
|
help='Connection timeout in seconds. (default: %(default)s)')
|
||||||
|
|
||||||
|
# Container name
|
||||||
|
parser.add_argument('--containers',
|
||||||
|
dest='containers',
|
||||||
|
action='store',
|
||||||
|
nargs='+',
|
||||||
|
type=str,
|
||||||
|
default=['all'],
|
||||||
|
help='One or more RegEx that match the names of the container(s) to check. If omitted all containers are checked. (default: %(default)s)')
|
||||||
|
|
||||||
|
# Container name
|
||||||
|
parser.add_argument('--present',
|
||||||
|
dest='present',
|
||||||
|
default=False,
|
||||||
|
action='store_true',
|
||||||
|
help='Modifies --containers so that each RegEx must match at least one container.')
|
||||||
|
|
||||||
|
# Threads
|
||||||
|
parser.add_argument('--threads',
|
||||||
|
dest='threads',
|
||||||
|
default=DEFAULT_PARALLELISM,
|
||||||
|
action='store',
|
||||||
|
type=int,
|
||||||
|
help='This + 1 is the maximum number of concurent threads/network connections. (default: %(default)s)')
|
||||||
|
|
||||||
|
# CPU
|
||||||
|
parser.add_argument('--cpu',
|
||||||
|
dest='cpu',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Check cpu usage percentage taking into account any limits.')
|
||||||
|
|
||||||
|
# Memory
|
||||||
|
parser.add_argument('--memory',
|
||||||
|
dest='memory',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT:UNITS',
|
||||||
|
help='Check memory usage taking into account any limits. Valid values for units are %%,B,KB,MB,GB.')
|
||||||
|
|
||||||
|
# State
|
||||||
|
parser.add_argument('--status',
|
||||||
|
dest='status',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
help='Desired container status (running, exited, etc).')
|
||||||
|
|
||||||
|
# Health
|
||||||
|
parser.add_argument('--health',
|
||||||
|
dest='health',
|
||||||
|
default=None,
|
||||||
|
action='store_true',
|
||||||
|
help="Check container's health check status")
|
||||||
|
|
||||||
|
# Age
|
||||||
|
parser.add_argument('--uptime',
|
||||||
|
dest='uptime',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Minimum container uptime in seconds. Use when infrequent crashes are tolerated.')
|
||||||
|
|
||||||
|
# Image Age
|
||||||
|
parser.add_argument('--image-age',
|
||||||
|
dest='image_age',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Maximum image age in days.')
|
||||||
|
|
||||||
|
# Version
|
||||||
|
parser.add_argument('--version',
|
||||||
|
dest='version',
|
||||||
|
default=None,
|
||||||
|
action='store_true',
|
||||||
|
help='Check if the running images are the same version as those in the registry. Useful for finding stale images. Does not support login.')
|
||||||
|
|
||||||
|
# Version
|
||||||
|
parser.add_argument('--insecure-registries',
|
||||||
|
dest='insecure_registries',
|
||||||
|
action='store',
|
||||||
|
nargs='+',
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
help='List of registries to connect to with http(no TLS). Useful when using "--version" with images from insecure registries.')
|
||||||
|
|
||||||
|
# Restart
|
||||||
|
parser.add_argument('--restarts',
|
||||||
|
dest='restarts',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Container restart thresholds.')
|
||||||
|
|
||||||
|
# no-ok
|
||||||
|
parser.add_argument('--no-ok',
|
||||||
|
dest='no_ok',
|
||||||
|
action='store_true',
|
||||||
|
help='Make output terse suppressing OK messages. If all checks are OK return a single OK.')
|
||||||
|
|
||||||
|
# no-performance
|
||||||
|
parser.add_argument('--no-performance',
|
||||||
|
dest='no_performance',
|
||||||
|
action='store_true',
|
||||||
|
help='Suppress performance data. Reduces output when performance data is not being used.')
|
||||||
|
|
||||||
|
parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__))
|
||||||
|
|
||||||
|
if len(args) == 0:
|
||||||
|
parser.print_help()
|
||||||
|
|
||||||
|
parsed_args = parser.parse_args(args=args)
|
||||||
|
|
||||||
|
global timeout
|
||||||
|
timeout = parsed_args.timeout
|
||||||
|
|
||||||
|
global daemon
|
||||||
|
global connection_type
|
||||||
|
if parsed_args.secure_connection:
|
||||||
|
daemon = 'https://' + parsed_args.secure_connection
|
||||||
|
connection_type = 'https'
|
||||||
|
elif parsed_args.connection:
|
||||||
|
if parsed_args.connection[0] == '/':
|
||||||
|
daemon = 'socket://' + parsed_args.connection + ':'
|
||||||
|
connection_type = 'socket'
|
||||||
|
else:
|
||||||
|
daemon = 'http://' + parsed_args.connection
|
||||||
|
connection_type = 'http'
|
||||||
|
|
||||||
|
return parsed_args
|
||||||
|
|
||||||
|
|
||||||
|
def no_checks_present(parsed_args):
|
||||||
|
# Look for all functions whose name starts with 'check_'
|
||||||
|
checks = [key[6:] for key in globals().keys() if key.startswith('check_')]
|
||||||
|
# Act like --present is a check though it is not implemented like one
|
||||||
|
return all(getattr(parsed_args, check) is None for check in checks) and not parsed_args.present
|
||||||
|
|
||||||
|
|
||||||
|
def socketfile_permissions_failure(parsed_args):
|
||||||
|
if connection_type == 'socket':
|
||||||
|
return not (os.path.exists(parsed_args.connection)
|
||||||
|
and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode)
|
||||||
|
and os.access(parsed_args.connection, os.R_OK)
|
||||||
|
and os.access(parsed_args.connection, os.W_OK))
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def print_results():
|
||||||
|
if no_ok:
|
||||||
|
# Remove all the "OK"s
|
||||||
|
filtered_messages = [message for message in messages if not message.startswith('OK: ')]
|
||||||
|
if len(filtered_messages) == 0:
|
||||||
|
messages_concat = 'OK'
|
||||||
|
else:
|
||||||
|
messages_concat = '; '.join(filtered_messages)
|
||||||
|
|
||||||
|
else:
|
||||||
|
messages_concat = '; '.join(messages)
|
||||||
|
|
||||||
|
if no_performance or len(performance_data) == 0:
|
||||||
|
print(messages_concat)
|
||||||
|
else:
|
||||||
|
perfdata_concat = ' '.join(performance_data)
|
||||||
|
print(messages_concat + '|' + perfdata_concat)
|
||||||
|
|
||||||
|
|
||||||
|
def perform_checks(raw_args):
|
||||||
|
args = process_args(raw_args)
|
||||||
|
|
||||||
|
global parallel_executor
|
||||||
|
parallel_executor = futures.ThreadPoolExecutor(max_workers=args.threads)
|
||||||
|
global serial_executor
|
||||||
|
serial_executor = futures.ThreadPoolExecutor(max_workers=1)
|
||||||
|
|
||||||
|
global unit_adjustments
|
||||||
|
unit_adjustments = {key: args.units_base ** value for key, value in UNIT_ADJUSTMENTS_TEMPLATE.items()}
|
||||||
|
|
||||||
|
global no_ok
|
||||||
|
no_ok = args.no_ok
|
||||||
|
|
||||||
|
global no_performance
|
||||||
|
no_performance = args.no_ok
|
||||||
|
|
||||||
|
if socketfile_permissions_failure(args):
|
||||||
|
unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection))
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.containers == ["all"] and args.present:
|
||||||
|
unknown("You can not use --present without --containers")
|
||||||
|
return
|
||||||
|
|
||||||
|
if no_checks_present(args):
|
||||||
|
unknown("No checks specified.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Here is where all the work happens
|
||||||
|
#############################################################################################
|
||||||
|
containers = get_containers(args.containers, args.present)
|
||||||
|
|
||||||
|
if len(containers) == 0 and not args.present:
|
||||||
|
unknown("No containers names found matching criteria")
|
||||||
|
return
|
||||||
|
|
||||||
|
for container in containers:
|
||||||
|
|
||||||
|
# Check status
|
||||||
|
if args.status:
|
||||||
|
check_status(container, args.status)
|
||||||
|
|
||||||
|
# Check version
|
||||||
|
if args.version:
|
||||||
|
check_version(container, args.insecure_registries)
|
||||||
|
|
||||||
|
# below are checks that require a 'running' status
|
||||||
|
|
||||||
|
# Check status
|
||||||
|
if args.health:
|
||||||
|
check_health(container)
|
||||||
|
|
||||||
|
# Check cpu usage
|
||||||
|
if args.cpu:
|
||||||
|
check_cpu(container, parse_thresholds(args.cpu, units_required=False))
|
||||||
|
|
||||||
|
# Check memory usage
|
||||||
|
if args.memory:
|
||||||
|
check_memory(container, parse_thresholds(args.memory, units_required=False))
|
||||||
|
|
||||||
|
# Check uptime
|
||||||
|
if args.uptime:
|
||||||
|
check_uptime(container, parse_thresholds(args.uptime, include_units=False))
|
||||||
|
|
||||||
|
# Check image age
|
||||||
|
if args.image_age:
|
||||||
|
check_image_age(container, parse_thresholds(args.image_age, include_units=False))
|
||||||
|
|
||||||
|
# Check restart count
|
||||||
|
if args.restarts:
|
||||||
|
check_restarts(container, parse_thresholds(args.restarts, include_units=False))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
perform_checks(argv[1:])
|
||||||
|
|
||||||
|
# get results to let exceptions in threads bubble out
|
||||||
|
[x.result() for x in futures.as_completed(threads)]
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
unknown("Exception raised during check': {}".format(repr(e)))
|
||||||
|
print_results()
|
||||||
|
exit(rc)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue