| Current File : //bin/check_mk_agent |
#!/bin/bash
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
: "${MK_RUN_ASYNC_PARTS=true}"
: "${MK_RUN_SYNC_PARTS=true}"
export MK_RUN_ASYNC_PARTS
export MK_RUN_SYNC_PARTS
# Make sure that locally installed binaries are found
PATH=$PATH:/usr/local/bin
[ -d /var/qmail/bin ] && PATH=$PATH:/var/qmail/bin
export MK_LIBDIR='/usr/lib/check_mk_agent'
export MK_CONFDIR='/etc/check_mk'
export MK_VARDIR='/var/lib/check_mk_agent'
# Optionally set a tempdir for all subsequent calls
#export TMPDIR=
# Provide information about the remote host. That helps when data
# is being sent only once to each remote host.
if [ "$REMOTE_HOST" ]; then
export REMOTE=$REMOTE_HOST
elif [ "$REMOTE_ADDR" ]; then
export REMOTE=$REMOTE_ADDR
elif [ "$SSH_CLIENT" ]; then
export REMOTE=${SSH_CLIENT%% *}
fi
# The package name gets patched for baked agents to either
# "check-mk-agent" or the name set by the "name of agent packages" rule
# TODO: rename (used for systemd as well), deal with when fixing CMK-8339
XINETD_SERVICE_NAME=check-mk-agent
# All executables in PLUGINSDIR will simply be executed and their
# ouput appended to the output of the agent. Plugins define their own
# sections and must output headers with '<<<' and '>>>'
PLUGINSDIR=$MK_LIBDIR/plugins
# All executables in LOCALDIR will by executabled and their
# output inserted into the section <<<local>>>. Please
# refer to online documentation for details about local checks.
LOCALDIR=$MK_LIBDIR/local
# All files in SPOOLDIR will simply appended to the agent
# output if they are not outdated (see below)
SPOOLDIR=$MK_VARDIR/spool
# close standard input (for security reasons) and stderr when not
# explicitly in debug mode.
# When the nodes agent is executed by a e.g. docker node in a container,
# then don't close stdin, because the agent is piped through it in this
# case.
if [ "$1" = -d ]; then
set -xv
elif [ -z "$MK_FROM_NODE" ]; then
exec </dev/null 2>/dev/null
fi
# let RTCs know about this remote
[ -d "${MK_VARDIR}/rtc_remotes" ] || mkdir "${MK_VARDIR}/rtc_remotes"
[ -n "${REMOTE}" ] && touch "${MK_VARDIR}/rtc_remotes/${REMOTE}"
# Function to replace "if type [somecmd]" idiom
# 'command -v' tends to be more robust vs 'which' and 'type' based tests
inpath() {
command -v "${1:?No command to test}" >/dev/null 2>&1
}
# Remove locale settings to eliminate localized outputs where possible
# The locale logic here is used to make the Python encoding detection work (see CMK-2778).
unset -v LANG
if inpath locale && inpath paste; then
# The space at the beginning of " $(locale ...)" is important, since we also have a space in
# the conditions. We need this to also match C.UTF-8 and C.utf8 right at the beginning.
# Furthermore, we cannot simply use *'C.UTF-8'* (without the space), because this also
# matches e.g. es_EC.UTF-8.
case " $(locale -a | paste -sd ' ' -)" in
*' C.UTF-8'*) LC_ALL="C.UTF-8" ;;
*' C.utf8'*) LC_ALL="C.utf8" ;;
esac
fi
LC_ALL="${LC_ALL:-C}"
export LC_ALL
read_python_version() {
if inpath "$1"; then
version=$($1 -c 'import sys; print("%s.%s"%(sys.version_info[0], sys.version_info[1]))')
major=${version%%.*}
minor=${version##*.}
if [ "$major" -eq "$2" ] && [ "$minor" -ge "$3" ]; then
echo "$1"
return 0
fi
fi
return 1
}
PYTHON3=$(read_python_version python3 3 4 || read_python_version python 3 4)
PYTHON2=$(read_python_version python2 2 6 || read_python_version python 2 6)
if [ -f "$MK_CONFDIR/python_path.cfg" ]; then
# shellcheck source=/dev/null
. "$MK_CONFDIR/python_path.cfg"
fi
export PYTHON2 PYTHON3
if [ -z "$PYTHON2" ] && [ -z "$PYTHON3" ]; then
NO_PYTHON=true
elif [ -n "$PYTHON3" ] && [ "$(
$PYTHON3 -c 'pass' >/dev/null 2>&1
echo $?
)" -eq 127 ]; then
WRONG_PYTHON_COMMAND=true
elif [ -z "$PYTHON3" ] && [ "$(
$PYTHON2 -c 'pass' >/dev/null 2>&1
echo $?
)" -eq 127 ]; then
WRONG_PYTHON_COMMAND=true
fi
# Detect whether or not the agent is being executed in a container
# environment.
if [ -f /.dockerenv ]; then
IS_DOCKERIZED=1
elif grep container=lxc /proc/1/environ >/dev/null 2>&1; then
# Works in lxc environment e.g. on Ubuntu bionic, but does not
# seem to work in proxmox (see CMK-1561)
IS_LXC_CONTAINER=1
elif grep 'lxcfs /proc/cpuinfo fuse.lxcfs' /proc/mounts >/dev/null 2>&1; then
# Seems to work in proxmox
IS_LXC_CONTAINER=1
else
unset IS_DOCKERIZED
unset IS_LXC_CONTAINER
fi
if [ -n "$IS_DOCKERIZED" ] || [ -n "$IS_LXC_CONTAINER" ]; then
if [ "$(stat -fc'%t' /sys/fs/cgroup)" = "63677270" ]; then
IS_CGROUP_V2=1
CGROUP_SECTION_SUFFIX="_cgroupv2"
else
unset IS_CGROUP_V2
unset CGROUP_SECTION_SUFFIX
fi
fi
# Prefer (relatively) new /usr/bin/timeout from coreutils against
# our shipped waitmax. waitmax is statically linked and crashes on
# some Ubuntu versions recently.
if inpath timeout; then
waitmax() {
timeout "$@"
}
export -f waitmax
fi
if [ -f "$MK_CONFDIR/exclude_sections.cfg" ]; then
# shellcheck source=/dev/null
. "$MK_CONFDIR/exclude_sections.cfg"
fi
set_up_encryption() {
# shellcheck source=/dev/null
[ -f "$MK_CONFDIR/encryption.cfg" ] && . "$MK_CONFDIR/encryption.cfg"
if [ "$ENCRYPTED" == "yes" ]; then
OPENSSL_VERSION=$(openssl version | awk '{print $2}' | awk -F . '{print (($1 * 100) + $2) * 100+ $3}')
if [ "$OPENSSL_VERSION" -ge 10101 ]; then
echo -n "03"
# Depending on the Checkmk version, a key of proper length (256 bits) is provided. However, we
# always use key derivation here, as this is suitable for passwords of all lengths.
optionally_encrypt() { openssl enc -aes-256-cbc -md sha256 -iter 10000 -k "$PASSPHRASE"; }
elif [ "$OPENSSL_VERSION" -ge 10000 ]; then
echo -n "02"
optionally_encrypt() { openssl enc -aes-256-cbc -md sha256 -k "$PASSPHRASE" -nosalt; }
else
echo -n "00"
optionally_encrypt() { openssl enc -aes-256-cbc -md md5 -k "$PASSPHRASE" -nosalt; }
fi
else
optionally_encrypt() { cat; }
fi
}
RTC_PLUGINS=""
if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
# shellcheck source=/dev/null
. "$MK_CONFDIR/real_time_checks.cfg"
fi
section_checkmk() {
echo "<<<check_mk>>>"
echo "Version: 2.0.0p23"
echo "AgentOS: linux"
echo "Hostname: $(hostname)"
echo "AgentDirectory: $MK_CONFDIR"
echo "DataDirectory: $MK_VARDIR"
echo "SpoolDirectory: $SPOOLDIR"
echo "PluginsDirectory: $PLUGINSDIR"
echo "LocalDirectory: $LOCALDIR"
# If we are called via xinetd, try to find only_from configuration
if [ -n "$REMOTE_HOST" ]; then
sed -n "/^service[[:space:]]*${XINETD_SERVICE_NAME}/,/}/s/^[[:space:]]*only_from[[:space:]]*=[[:space:]]*\(.*\)/OnlyFrom: \1/p" /etc/xinetd.d/* | head -n1
# If we are called via systemd, try to find only_from configuration aswell
elif inpath systemctl; then
sed -n '/^IPAddressAllow/s/IPAddressAllow=/OnlyFrom: /p' /etc/systemd/system/${XINETD_SERVICE_NAME}.socket
# NOTE: The above line just reads back the socket file we deployed ourselves. Systemd units can be altered by
# other user defined unit files, so this *may* not be correct. A better way of doing this seemed to be querying
# systemctl itself about the 'effective' property:
#
# systemctl show --property IPAddressAllow "${XINETD_SERVICE_NAME}.socket" | sed 's/IPAddressAllow=/OnlyFrom: /'
#
# However this ("successfully") reports an empty list or '[unprintable]' on older systemd versions :-(
fi
}
#
# CHECK SECTIONS
#
section_labels() {
LABELS=""
if [ -n "$IS_DOCKERIZED" ] || [ -n "$IS_LXC_CONTAINER" ]; then
DEVICE_TYPE="container"
elif grep "hypervisor" /proc/cpuinfo >/dev/null 2>&1; then
DEVICE_TYPE="vm"
fi
if [ -n "$DEVICE_TYPE" ]; then
LABELS+="{\"cmk/device_type\":\"$DEVICE_TYPE\"}\n"
fi
if [ -n "$LABELS" ]; then
echo '<<<labels:sep(0)>>>'
echo -e "$LABELS"
fi
}
section_mem() {
if [ -n "$IS_DOCKERIZED" ]; then
echo "<<<docker_container_mem$CGROUP_SECTION_SUFFIX>>>"
if [ -n "$IS_CGROUP_V2" ]; then
cat /sys/fs/cgroup/memory.stat
echo "memory.current $(cat /sys/fs/cgroup/memory.current)"
echo "memory.max $(cat /sys/fs/cgroup/memory.max)"
else
cat /sys/fs/cgroup/memory/memory.stat
echo "usage_in_bytes $(cat /sys/fs/cgroup/memory/memory.usage_in_bytes)"
echo "limit_in_bytes $(cat /sys/fs/cgroup/memory/memory.limit_in_bytes)"
fi
grep -F 'MemTotal:' /proc/meminfo
elif [ -n "$IS_LXC_CONTAINER" ]; then
echo '<<<mem>>>'
grep -E -v '^Swap:|^Mem:|total:|^Vmalloc|^Committed' </proc/meminfo
else
echo '<<<mem>>>'
grep -E -v '^Swap:|^Mem:|total:' </proc/meminfo
fi
}
section_cpu() {
case "$(uname -m)" in
"armv7l" | "armv6l" | "aarch64")
CPU_REGEX='^processor'
;;
*)
CPU_REGEX='^CPU|^processor'
;;
esac
NUM_CPUS=$(grep -c -E ${CPU_REGEX} </proc/cpuinfo)
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ]; then
echo '<<<cpu>>>'
echo "$(cat /proc/loadavg) $NUM_CPUS"
if [ -f "/proc/sys/kernel/threads-max" ]; then
cat /proc/sys/kernel/threads-max
fi
else
if [ -n "$IS_DOCKERIZED" ]; then
echo "<<<docker_container_cpu$CGROUP_SECTION_SUFFIX>>>"
else
echo "<<<lxc_container_cpu$CGROUP_SECTION_SUFFIX>>>"
fi
if [ -n "$IS_CGROUP_V2" ]; then
echo "uptime $(cat /proc/uptime)"
echo "num_cpus $NUM_CPUS"
cat /sys/fs/cgroup/cpu.stat
else
grep "^cpu " /proc/stat
echo "num_cpus $NUM_CPUS"
cat /sys/fs/cgroup/cpuacct/cpuacct.stat
fi
fi
}
section_uptime() {
echo '<<<uptime>>>'
if [ -z "$IS_DOCKERIZED" ]; then
cat /proc/uptime
else
echo "$(($(date +%s) - $(stat -c %Z /dev/pts)))"
fi
}
# Print out Partitions / Filesystems. (-P gives non-wrapped POSIXed output)
# Heads up: NFS-mounts are generally supressed to avoid agent hangs.
# If hard NFS mounts are configured or you have too large nfs retry/timeout
# settings, accessing those mounts from the agent would leave you with
# thousands of agent processes and, ultimately, a dead monitored system.
# These should generally be monitored on the NFS server, not on the clients.
section_df() {
if [ -n "$IS_DOCKERIZED" ]; then
return
fi
# The exclusion list is getting a bit of a problem.
# -l should hide any remote FS but seems to be all but working.
local excludefs
excludefs="-x smbfs -x cifs -x iso9660 -x udf -x nfsv4 -x nfs -x mvfs -x prl_fs -x squashfs -x devtmpfs -x autofs"
if [ -z "$IS_LXC_CONTAINER" ]; then
excludefs+=" -x zfs"
fi
echo '<<<df>>>'
# We really *need* word splitting below!
# shellcheck disable=SC2086
df -PTlk ${excludefs} | sed 1d
# df inodes information
echo '<<<df>>>'
echo '[df_inodes_start]'
# We really *need* word splitting below!
# shellcheck disable=SC2086
df -PTli ${excludefs} | sed 1d
echo '[df_inodes_end]'
}
sections_systemd() {
if inpath systemctl; then
echo '<<<systemd_units>>>'
echo "[list-unit-files]"
systemctl list-unit-files --no-pager | tr -s ' '
echo "[all]"
systemctl --all --no-pager | sed '/^$/q' | tr -s ' '
fi
}
section_zfs() {
if inpath zfs; then
echo '<<<zfsget:sep(9)>>>'
zfs get -t filesystem,volume -Hp name,quota,used,avail,mountpoint,type 2>/dev/null
echo '<<<zfsget>>>'
echo '[df]'
df -PTlk -t zfs | sed 1d
fi
}
section_nfs_mounts() {
if inpath waitmax; then
STAT_VERSION=$(stat --version | head -1 | cut -d" " -f4)
STAT_BROKE="5.3.0"
echo '<<<nfsmounts>>>'
sed -n '/ nfs4\? /s/[^ ]* \([^ ]*\) .*/\1/p' </proc/mounts |
sed 's/\\040/ /g' |
while read -r MP; do
if [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP" ||
echo "$MP hanging 0 0 0 0"
else
waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP" &&
printf '\n' || echo "$MP hanging 0 0 0 0"
fi
done
echo '<<<cifsmounts>>>'
sed -n '/ cifs\? /s/[^ ]* \([^ ]*\) .*/\1/p' </proc/mounts |
sed 's/\\040/ /g' |
while read -r MP; do
if [ ! -r "$MP" ]; then
echo "$MP Permission denied"
elif [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" ||
echo "$MP hanging 0 0 0 0"
else
waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" &&
printf '\n' || echo "$MP hanging 0 0 0 0"
fi
done
fi
}
section_mounts() {
echo '<<<mounts>>>'
grep ^/dev </proc/mounts | grep -v " squashfs "
}
section_ps() {
if inpath ps; then
# processes including username, without kernel processes
echo '<<<ps_lnx>>>'
CGROUP=""
if [ -e /sys/fs/cgroup ]; then
CGROUP="cgroup:512,"
fi
echo "[header] $(ps ax -o "$CGROUP"user:32,vsz,rss,cputime,etime,pid,command --columns 10000 | tr -s ' ')"
fi
}
section_lnx_if() {
if inpath ip; then
echo '<<<lnx_if>>>'
echo "[start_iplink]"
ip address
echo "[end_iplink]"
fi
echo '<<<lnx_if:sep(58)>>>'
sed 1,2d /proc/net/dev
sed -e 1,2d /proc/net/dev | cut -d':' -f1 | sort | while read -r eth; do
echo "[$eth]"
if inpath ethtool; then
ethtool "$eth" | grep -E '(Speed|Duplex|Link detected|Auto-negotiation):'
else
# If interface down we get "Invalid argument"
speed=$(cat "/sys/class/net/$eth/speed" 2>/dev/null)
if [ -n "$speed" ] && [ $speed -ge 0 ]; then
echo -e "\tSpeed: ${speed}Mb/s\n"
fi
fi
echo -e "\tAddress: $(cat "/sys/class/net/$eth/address")\n"
done
}
section_bonding_interfaces() {
if [ -e /proc/net/bonding ]; then
echo '<<<lnx_bonding:sep(58)>>>'
pushd /proc/net/bonding >/dev/null
head -v -n 1000 ./*
popd >/dev/null
fi
}
section_vswitch_bonding() {
if inpath ovs-appctl; then
BONDS=$(ovs-appctl bond/list)
COL=$(echo "$BONDS" | awk '{for(i=1;i<=NF;i++) {if($i == "bond") printf("%d", i)} exit 0}')
echo '<<<ovs_bonding:sep(58)>>>'
for bond in $(echo "$BONDS" | sed -e 1d | cut -f"${COL}"); do
echo "[$bond]"
ovs-appctl bond/show "$bond"
done
fi
}
section_tcp() {
if inpath waitmax; then
echo '<<<tcp_conn_stats>>>'
THIS=$(waitmax 5 cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }')
if [ $? == 0 ]; then
echo "$THIS"
elif inpath ss; then
ss -ant | grep -v ^State | awk ' /:/ { c[$1]++; } END { for (x in c) { print x, c[x]; } }' |
sed -e 's/^ESTAB/01/g;s/^SYN-SENT/02/g;s/^SYN-RECV/03/g;s/^FIN-WAIT-1/04/g;s/^FIN-WAIT-2/05/g;s/^TIME-WAIT/06/g;s/^CLOSED/07/g;s/^CLOSE-WAIT/08/g;s/^LAST-ACK/09/g;s/^LISTEN/0A/g;s/^CLOSING/0B/g;'
fi
fi
}
section_multipathing() {
if inpath multipath; then
if [ -f /etc/multipath.conf ]; then
echo '<<<multipath>>>'
multipath -l
fi
fi
}
section_diskstat() {
if [ -z "$IS_DOCKERIZED" ]; then
echo '<<<diskstat>>>'
date +%s
grep -E ' (x?[shv]d[a-z]*[0-9]*|cciss/c[0-9]+d[0-9]+|emcpower[a-z]+|dm-[0-9]+|VxVM.*|mmcblk.*|dasd[a-z]*|bcache[0-9]+|nvme[0-9]+n[0-9]+) ' </proc/diskstats
if inpath dmsetup; then
echo '[dmsetup_info]'
dmsetup info -c --noheadings --separator ' ' -o name,devno,vg_name,lv_name
fi
if [ -d /dev/vx/dsk ]; then
echo '[vx_dsk]'
stat -c "%t %T %n" /dev/vx/dsk/*/*
fi
else
echo "<<<docker_container_diskstat$CGROUP_SECTION_SUFFIX>>>"
echo "[time]"
date +%s
if [ -n "$IS_CGROUP_V2" ]; then
echo "[io.stat]"
cat "/sys/fs/cgroup/io.stat"
else
for F in io_service_bytes io_serviced; do
echo "[$F]"
cat "/sys/fs/cgroup/blkio/blkio.throttle.$F"
done
fi
echo "[names]"
for F in /sys/block/*; do
echo "${F##*/} $(cat "$F/dev")"
done
fi
}
section_chrony() {
if inpath chronyc; then
# Force successful exit code. Otherwise section will be missing if daemon not running
#
# The "| cat" has been added for some kind of regression in RedHat 7.5. The
# SELinux rules shipped with that release were denying the chronyc call
# without cat.
run_cached -s chrony 30 "waitmax 5 chronyc -n tracking | cat || true"
fi
}
section_kernel() {
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ]; then
echo '<<<kernel>>>'
date +%s
cat /proc/vmstat /proc/stat
fi
}
section_ipmitool() {
if inpath ipmitool; then
run_cached -s "ipmi:sep(124)" 300 "waitmax 300 ipmitool sensor list | grep -v 'command failed' | grep -E -v '^[^ ]+ na ' | grep -v ' discrete '"
# readable discrete sensor states
run_cached -s "ipmi_discrete:sep(124)" 300 "waitmax 300 ipmitool sdr elist compact"
fi
}
section_ipmisensors() {
if (type ipmi-sensors && ls /dev/ipmi*) &>/dev/null; then
$MK_RUN_SYNC_PARTS && echo '<<<ipmi_sensors>>>'
# Newer ipmi-sensors version have new output format; Legacy format can be used
if ipmi-sensors --help | grep -q legacy-output; then
IPMI_FORMAT="--legacy-output"
else
IPMI_FORMAT=""
fi
if ipmi-sensors --help | grep -q " \-\-groups"; then
IPMI_GROUP_OPT="-g"
else
IPMI_GROUP_OPT="-t"
fi
# At least with ipmi-sensors 0.7.16 this group is Power_Unit instead of "Power Unit"
run_cached -s ipmi_sensors 300 "for class in Temperature Power_Unit Fan; do
ipmi-sensors $IPMI_FORMAT --sdr-cache-directory /var/cache $IPMI_GROUP_OPT \"\$class\" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@'
# In case of a timeout immediately leave loop.
if [ $? = 255 ]; then break ; fi
done"
fi
}
section_md() {
echo '<<<md>>>'
cat /proc/mdstat
}
section_dm_raid() {
if inpath dmraid && DMSTATUS=$(waitmax 3 dmraid -r); then
echo '<<<dmraid>>>'
# Output name and status
waitmax 20 dmraid -s | grep -e ^name -e ^status
# Output disk names of the RAID disks
DISKS=$(echo "$DMSTATUS" | cut -f1 -d":")
for disk in $DISKS; do
device=$(cat /sys/block/"$(basename "$disk")"/device/model)
status=$(echo "$DMSTATUS" | grep "^${disk}")
echo "${status} Model: ${device}"
done
fi
}
section_cfggen() {
if inpath cfggen; then
echo '<<<lsi>>>'
cfggen 0 DISPLAY |
grep -E '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' |
sed -e 's/ *//g' -e 's/:/ /'
fi
}
section_megaraid() {
if inpath MegaCli; then
MegaCli_bin="MegaCli"
elif inpath MegaCli64; then
MegaCli_bin="MegaCli64"
elif inpath megacli; then
MegaCli_bin="megacli"
elif inpath storcli; then
MegaCli_bin="storcli"
elif inpath storcli64; then
MegaCli_bin="storcli64"
else
MegaCli_bin="unknown"
fi
if [ "$MegaCli_bin" != "unknown" ]; then
echo '<<<megaraid_pdisks>>>'
for part in $($MegaCli_bin -EncInfo -aALL -NoLog </dev/null |
sed -rn 's/:/ /g; s/[[:space:]]+/ /g; s/^ //; s/ $//; s/Number of enclosures on adapter ([0-9]+).*/adapter \1/g; /^(Enclosure|Device ID|adapter) [0-9]+$/ p'); do
[ "$part" = adapter ] && echo ""
[ "$part" = 'Enclosure' ] && echo -ne "\ndev2enc"
echo -n " $part"
done
echo
$MegaCli_bin -PDList -aALL -NoLog </dev/null |
grep -E 'Enclosure|Raw Size|Slot Number|Device Id|Firmware state|Inquiry|Adapter|Predictive Failure Count'
echo '<<<megaraid_ldisks>>>'
$MegaCli_bin -LDInfo -Lall -aALL -NoLog </dev/null | grep -E 'Size|State|Number|Adapter|Virtual'
echo '<<<megaraid_bbu>>>'
$MegaCli_bin -AdpBbuCmd -GetBbuStatus -aALL -NoLog </dev/null | grep -v Exit
fi
}
section_3ware_raid() {
if inpath tw_cli; then
for C in $(tw_cli show | awk 'NR < 4 { next } { print $1 }'); do
echo '<<<3ware_info>>>'
tw_cli "/$C" show all | grep -E 'Model =|Firmware|Serial'
echo '<<<3ware_disks>>>'
tw_cli "/$C" show drivestatus | grep -E 'p[0-9]' | sed "s/^/$C\//"
echo '<<<3ware_units>>>'
tw_cli "/$C" show unitstatus | grep -E 'u[0-9]' | sed "s/^/$C\//"
done
fi
}
section_areca_raid() {
if inpath cli64; then
run_cached -s arc_raid_status 300 "cli64 rsf info | tail -n +3 | head -n -2"
fi
}
section_vbox_guest() {
echo '<<<vbox_guest>>>'
if inpath VBoxControl && lsmod | grep vboxguest >/dev/null 2>&1; then
VBoxControl -nologo guestproperty enumerate | cut -d, -f1,2
[ "${PIPESTATUS[0]}" = 0 ] || echo "ERROR"
fi
}
section_openvpn() {
if [ -e /etc/openvpn/openvpn-status.log ]; then
echo '<<<openvpn_clients:sep(44)>>>'
sed -n -e '/CLIENT LIST/,/ROUTING TABLE/p' </etc/openvpn/openvpn-status.log |
sed -e 1,3d -e '$d'
fi
}
section_nvidia() {
if inpath nvidia-settings && [ -S /tmp/.X11-unix/X0 ]; then
echo '<<<nvidia>>>'
for var in GPUErrors GPUCoreTemp; do
DISPLAY=:0 waitmax 2 nvidia-settings -t -q $var | sed "s/^/$var: /"
done
fi
}
section_drbd() {
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ] && [ -e /proc/drbd ]; then
echo '<<<drbd>>>'
cat /proc/drbd
cat /sys/kernel/debug/drbd/resources/*/connections/*/0/proc_drbd 2>/dev/null
fi
}
section_heartbeat() {
if [ -S /var/run/heartbeat/crm/cib_ro -o -S /var/run/crm/cib_ro ] || pgrep "^(crmd|pacemaker-contr)$" >/dev/null 2>&1; then
echo '<<<heartbeat_crm>>>'
TZ=UTC crm_mon -1 -r | grep -v ^$ | sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g'
fi
if inpath cl_status; then
echo '<<<heartbeat_rscstatus>>>'
cl_status rscstatus
echo '<<<heartbeat_nodes>>>'
for NODE in $(cl_status listnodes); do
if [ "$NODE" != "$(echo "$HOSTNAME" | tr '[:upper:]' '[:lower:]')" ]; then
STATUS=$(cl_status nodestatus "$NODE")
echo -n "$NODE $STATUS"
for LINK in $(cl_status listhblinks "$NODE" 2>/dev/null); do
echo -n " $LINK $(cl_status hblinkstatus "$NODE" "$LINK")"
done
echo
fi
done
fi
}
## Postfix mailqueue monitoring
## Determine the number of mails and their size in several postfix mail queues
read_postfix_queue_dirs() {
postfix_queue_dir=$1
if [ -n "$postfix_queue_dir" ]; then
echo '<<<postfix_mailq>>>'
echo "[[[${2}]]]"
for queue in deferred active; do
count=$(find "${postfix_queue_dir}/$queue" -type f | wc -l)
size=$(du -s "${postfix_queue_dir}/$queue" | awk '{print $1 }')
if [ -z "$size" ]; then
size=0
fi
if [ -z "$count" ]; then
echo "Mail queue is empty"
else
echo "QUEUE_${queue} $size $count"
fi
done
fi
}
## Postfix mailqueue monitoring
## Determine the number of mails and their size in several postfix mail queue
section_mailqueue() {
if inpath postconf; then
# Check if multi_instance_directories exists in main.cf and is not empty
# always takes the last entry, multiple entries possible
multi_instances_dirs=$(postconf -c /etc/postfix 2>/dev/null | grep ^multi_instance_directories | sed 's/.*=[[:space:]]*//g')
if [ -n "$multi_instances_dirs" ]; then
for queue_dir in $multi_instances_dirs; do
if [ -n "$queue_dir" ]; then
postfix_queue_dir=$(postconf -c "$queue_dir" 2>/dev/null | grep ^queue_directory | sed 's/.*=[[:space:]]*//g')
read_postfix_queue_dirs "$postfix_queue_dir" "$queue_dir"
fi
done
fi
# Always check for the default queue. It can exist even if multiple instances are configured
read_postfix_queue_dirs "$(postconf -h queue_directory 2>/dev/null)"
elif [ -x /usr/sbin/ssmtp ]; then
echo '<<<postfix_mailq>>>'
mailq 2>&1 | sed 's/^[^:]*: \(.*\)/\1/' | tail -n 6
fi
# Postfix status monitoring. Can handle multiple instances.
if inpath postfix; then
echo "<<<postfix_mailq_status:sep(58)>>>"
for i in /var/spool/postfix*/; do
if [ -e "$i/pid/master.pid" ]; then
if [ -r "$i/pid/master.pid" ]; then
postfix_pid=$(sed 's/ //g' <"$i/pid/master.pid") # handle possible spaces in output
if readlink -- "/proc/${postfix_pid}/exe" | grep -q ".*postfix/\(s\?bin/\)\?master.*"; then
echo "$i:the Postfix mail system is running:PID:$postfix_pid" | sed 's/\/var\/spool\///g'
else
echo "$i:PID file exists but instance is not running!" | sed 's/\/var\/spool\///g'
fi
else
echo "$i:PID file exists but is not readable"
fi
else
echo "$i:the Postfix mail system is not running" | sed 's/\/var\/spool\///g'
fi
done
fi
# Check status of qmail mailqueue
if inpath qmail-qstat; then
echo "<<<qmail_stats>>>"
qmail-qstat
fi
# Nullmailer queue monitoring
if inpath nullmailer-send; then
echo '<<<nullmailer_mailq>>>'
if [ -d /var/spool/nullmailer/queue ]; then
COUNT=$(find /var/spool/nullmailer/queue -type f | wc -l)
SIZE=$(du -s /var/spool/nullmailer/queue | awk '{print $1 }')
echo "$SIZE $COUNT deferred"
fi
if [ -d /var/spool/nullmailer/failed ]; then
COUNT=$(find /var/spool/nullmailer/failed -type f | wc -l)
SIZE=$(du -s /var/spool/nullmailer/failed | awk '{print $1 }')
echo "$SIZE $COUNT failed"
fi
fi
}
section_omd() {
if inpath omd; then
run_cached -s omd_status 60 "omd status --bare || true"
$MK_RUN_SYNC_PARTS || return
echo '<<<mknotifyd:sep(0)>>>'
date +%s
for statefile in /omd/sites/*/var/log/mknotifyd.state; do
if [ -e "$statefile" ]; then
site=${statefile%/var/log*}
site=${site#/omd/sites/}
echo "[$site]"
grep -v '^#' <"$statefile"
fi
done
echo '<<<omd_apache:sep(124)>>>'
for statsfile in /omd/sites/*/var/log/apache/stats; do
if [ -e "$statsfile" ]; then
site=${statsfile%/var/log*}
site=${site#/omd/sites/}
echo "[$site]"
cat "$statsfile"
: >"$statsfile"
# prevent next section to fail caused by a missing newline at the end of the statsfile
echo
fi
done
echo '<<<omd_info:sep(59)>>>'
echo '[versions]'
echo 'version;number;edition;demo'
for versiondir in /omd/versions/*; do
version=${versiondir#/omd/versions/}
# filter out special directory 'default'
if [[ $version == "default" ]]; then
continue
fi
number=$version
demo="0"
if [[ "$version" == *.demo ]]; then
number=${version%.demo}
demo="1"
fi
edition=${number##*.}
number=${number%.*}
echo "$version;$number;$edition;$demo"
done
echo '[sites]'
echo 'site;used_version;autostart'
for sitedir in /omd/sites/*; do
site=${sitedir#/omd/sites/}
used_version=$(readlink ${sitedir}/version)
used_version=${used_version##*/}
autostart="0"
if grep -q "CONFIG_AUTOSTART[[:blank:]]*=[[:blank:]]*'on'" $sitedir/etc/omd/site.conf; then
autostart="1"
fi
echo "$site;$used_version;$autostart"
done
fi
}
section_zpool() {
if inpath zpool; then
echo "<<<zpool_status>>>"
zpool status -x
echo "<<<zpool>>>"
zpool list
fi
}
section_veritas_cluster() {
if [ -x /opt/VRTSvcs/bin/haclus ]; then
echo "<<<veritas_vcs>>>"
vcshost=$(hostname | cut -d. -f1)
waitmax -s 9 2 /opt/VRTSvcs/bin/haclus -display -localclus | grep -e ClusterName -e ClusState
waitmax -s 9 2 /opt/VRTSvcs/bin/hasys -display -attribute SysState
waitmax -s 9 2 /opt/VRTSvcs/bin/hagrp -display -sys "$vcshost" -attribute State -localclus
waitmax -s 9 2 /opt/VRTSvcs/bin/hares -display -sys "$vcshost" -attribute State -localclus
waitmax -s 9 2 /opt/VRTSvcs/bin/hagrp -display -attribute TFrozen -attribute Frozen
fi
}
section_omd_core() {
if cd /omd/sites; then
echo '<<<livestatus_status:sep(59)>>>'
for site in *; do
if [ -S "/omd/sites/$site/tmp/run/live" ]; then
echo "[$site]"
echo -e "GET status" |
waitmax 3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/live"
fi
done
echo '<<<livestatus_ssl_certs:sep(124)>>>'
for site in *; do
echo "[$site]"
for PEM_PATH in "/omd/sites/$site/etc/ssl/ca.pem" "/omd/sites/$site/etc/ssl/sites/$site.pem"; do
if [ -f "$PEM_PATH" ]; then
CERT_DATE=$(openssl x509 -enddate -noout -in "$PEM_PATH")
CERT_DATE=${CERT_DATE/notAfter=/}
echo "$PEM_PATH|$(date --date="$CERT_DATE" --utc +%s)"
fi
done
done
echo '<<<mkeventd_status:sep(0)>>>'
for site in *; do
if [ -S "/omd/sites/$site/tmp/run/mkeventd/status" ]; then
echo "[\"$site\"]"
echo -e "GET status\nOutputFormat: json" |
waitmax 3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/mkeventd/status"
fi
done
echo '<<<cmk_site_statistics:sep(59)>>>'
for site in *; do
if [ -S "/omd/sites/$site/tmp/run/live" ]; then
echo "[$site]"
waitmax 5 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/live" <<LimitString
GET hosts
Stats: state = 0
Stats: scheduled_downtime_depth = 0
StatsAnd: 2
Stats: state = 1
Stats: scheduled_downtime_depth = 0
StatsAnd: 2
Stats: state = 2
Stats: scheduled_downtime_depth = 0
StatsAnd: 2
Stats: scheduled_downtime_depth > 0
Filter: custom_variable_names < _REALNAME
LimitString
waitmax 5 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/live" <<LimitString
GET services
Stats: state = 0
Stats: scheduled_downtime_depth = 0
Stats: host_scheduled_downtime_depth = 0
Stats: host_state = 0
Stats: host_has_been_checked = 1
StatsAnd: 5
Stats: scheduled_downtime_depth > 0
Stats: host_scheduled_downtime_depth > 0
StatsOr: 2
Stats: scheduled_downtime_depth = 0
Stats: host_scheduled_downtime_depth = 0
Stats: host_state != 0
StatsAnd: 3
Stats: state = 1
Stats: scheduled_downtime_depth = 0
Stats: host_scheduled_downtime_depth = 0
Stats: host_state = 0
Stats: host_has_been_checked = 1
StatsAnd: 5
Stats: state = 3
Stats: scheduled_downtime_depth = 0
Stats: host_scheduled_downtime_depth = 0
Stats: host_state = 0
Stats: host_has_been_checked = 1
StatsAnd: 5
Stats: state = 2
Stats: scheduled_downtime_depth = 0
Stats: host_scheduled_downtime_depth = 0
Stats: host_state = 0
Stats: host_has_been_checked = 1
StatsAnd: 5
Filter: host_custom_variable_names < _REALNAME
LimitString
fi
done
fi
}
section_mkbackup() {
if ls /omd/sites/*/var/check_mk/backup/*.state >/dev/null 2>&1; then
echo "<<<mkbackup>>>"
for F in /omd/sites/*/var/check_mk/backup/*.state; do
SITE=${F#/*/*/*}
SITE=${SITE%%/*}
JOB_IDENT=${F%.state}
JOB_IDENT=${JOB_IDENT##*/}
if [ "$JOB_IDENT" != "restore" ]; then
echo "[[[site:$SITE:$JOB_IDENT]]]"
cat "$F"
echo
fi
done
fi
# Collect states of configured CMA backup jobs
if inpath mkbackup && ls /var/lib/mkbackup/*.state >/dev/null 2>&1; then
echo "<<<mkbackup>>>"
for F in /var/lib/mkbackup/*.state; do
JOB_IDENT=${F%.state}
JOB_IDENT=${JOB_IDENT##*/}
if [ "$JOB_IDENT" != "restore" ]; then
echo "[[[system:$JOB_IDENT]]]"
cat "$F"
echo
fi
done
fi
}
section_job() {
if pushd "$MK_VARDIR/job" >/dev/null; then
echo '<<<job>>>'
for username in *; do
if [ -d "$username" ] && cd "$username"; then
if [ $EUID -eq 0 ]; then
su -s "$SHELL" "$username" -c "head -n -0 -v *"
else
head -n -0 -v ./*
fi
cd ..
fi
done
popd >/dev/null
fi
}
section_thermal() {
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ] && ls /sys/class/thermal/thermal_zone* >/dev/null 2>&1; then
echo '<<<lnx_thermal:sep(124)>>>'
for F in /sys/class/thermal/thermal_zone*; do
line="${F##*/}"
if [ ! -e "$F/mode" ]; then line="${line}|-"; else line="${line}|$(cat "$F"/mode)"; fi
line="${line}|$(cat "$F"/{type,temp} | tr \\n "|")"
for G in $(ls "$F"/trip_point_*_{temp,type}); do
line="${line}$(tr <"$G" \\n "|")"
done
echo "${line%?}"
done
fi
}
section_libelle() {
if inpath trd; then
echo "<<<libelle_business_shadow:sep(58)>>>"
trd -s
fi
}
section_http_accelerator() {
if inpath varnishstat; then
echo "<<<varnish>>>"
varnishstat -1
fi
}
section_proxmox() {
if inpath pvecm; then
echo "<<<pvecm_status:sep(58)>>>"
pvecm status
echo "<<<pvecm_nodes>>>"
pvecm nodes
fi
}
section_haproxy() {
for HAPROXY_SOCK in /run/haproxy/admin.sock /var/lib/haproxy/stats; do
if [ -r "$HAPROXY_SOCK" ] && inpath socat; then
echo "<<<haproxy:sep(44)>>>"
echo "show stat" | socat - "UNIX-CONNECT:$HAPROXY_SOCK"
fi
done
}
run_fileinfo() {
perl -e '
use File::Glob "bsd_glob";
my @patterns = ();
foreach (bsd_glob("$ARGV[0]/fileinfo.cfg"), bsd_glob("$ARGV[0]/fileinfo.d/*")) {
open my $handle, "<", $_ or next;
while (<$handle>) {
chomp;
next if /^\s*(#|$)/;
my $pattern = $_;
$pattern =~ s/\$DATE:(.*?)\$/substr(`date +"$1"`, 0, -1)/eg;
push @patterns, $pattern;
}
warn "error while reading $_: $!\n" if $!;
close $handle;
}
exit if ! @patterns;
print "<<<fileinfo:sep(124)>>>\n", time, "\n[[[header]]]\nname|status|size|time\n[[[content]]]\n";
foreach (@patterns) {
foreach (bsd_glob("$_")) {
if (! -f) {
print "$_|missing\n" if ! -d;
} elsif (my @infos = stat) {
print "$_|ok|$infos[7]|$infos[9]\n";
} else {
print "$_|stat failed: $!\n";
}
}
}
' -- "$MK_CONFDIR"
}
run_liveupdate() {
if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
if [ -z "$RTC_SECRET" ] && [ -z "$PASSPHRASE" ] && [ "$ENCRYPTED_RT" != "no" ]; then
echo "ERROR: Missing secret for encryping Real-Time Check data, while encryption is requested. Not starting Real-Time Checks." >&2
elif ! inpath openssl; then
echo "ERROR: openssl command is missing. Not starting Real-Time Checks." >&2
else
for trigger in "${MK_VARDIR}/rtc_remotes/"?*; do
# no such file => no expansion of ?* => nothing to do
[ -e "${trigger}" ] && run_real_time_checks_for_remote "${trigger}" >/dev/null &
done
fi
fi
}
run_remote_plugin() {
if [ -e "$MK_CONFDIR/mrpe.cfg" ]; then
grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/mrpe.cfg" |
while read -r descr cmdline; do
interval=
args="-m"
# NOTE: Due to an escaping-related bug in some old bash versions
# (3.2.x), we have to use an intermediate variable for the pattern.
pattern='\(([^\)]*)\)[[:space:]](.*)'
if [[ $cmdline =~ $pattern ]]; then
parameters=${BASH_REMATCH[1]}
cmdline=${BASH_REMATCH[2]}
# split multiple parameter assignments
for par in $(echo "$parameters" | tr ":" "\n"); do
# split each assignment
key=$(echo "$par" | cut -d= -f1)
value=$(echo "$par" | cut -d= -f2)
if [ "$key" = "interval" ]; then
interval=$value
elif [ "$key" = "appendage" ]; then
args="-ma"
fi
done
fi
if [ -z "$interval" ]; then
$MK_RUN_SYNC_PARTS && run_mrpe "$descr" "$cmdline"
else
run_cached "$args" "$descr" "$interval" "$cmdline"
fi
done
fi
}
run_runas_executor() {
if [ -e "$MK_CONFDIR/runas.cfg" ]; then
grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/runas.cfg" |
while read -r type user include; do
if [ -d "$include" -o \( "$type" == "mrpe" -a -f "$include" \) ]; then
PREFIX=""
if [ "$user" != "-" ]; then
PREFIX="su $user -c "
fi
# mrpe includes
if [ "$type" == "mrpe" ]; then
grep -Ev '^[[:space:]]*($|#)' "$include" |
while read -r descr cmdline; do
interval=
# NOTE: Due to an escaping-related bug in some old bash
# versions (3.2.x), we have to use an intermediate variable
# for the pattern.
pattern='\(([^\)]*)\)[[:space:]](.*)'
if [[ $cmdline =~ $pattern ]]; then
parameters=${BASH_REMATCH[1]}
cmdline=${BASH_REMATCH[2]}
# split multiple parameter assignments
for par in $(echo "$parameters" | tr ":" "\n"); do
# split each assignment
IFS='=' read -r key value <<<"${par}"
if [ "$key" = "interval" ]; then
interval=$value
fi
# no other parameters supported currently
done
fi
if [ -n "$PREFIX" ]; then
cmdline="$PREFIX'$cmdline'"
fi
if [ -z "$interval" ]; then
$MK_RUN_SYNC_PARTS && run_mrpe "$descr" "$cmdline"
else
run_cached -m "$descr" "$interval" "$cmdline"
fi
done
# local and plugin includes
elif [ "$type" == "local" -o "$type" == "plugin" ]; then
if [ "$type" == "local" ]; then
echo "<<<local:sep(0)>>>"
fi
find "$include" -executable -type f |
while read -r filename; do
if [ -n "$PREFIX" ]; then
cmdline="$PREFIX\"$filename\""
else
cmdline=$filename
fi
$cmdline
done
fi
fi
done
fi
}
is_valid_plugin() {
# NOTE: Due to an escaping-related bug in some old bash versions
# (3.2.x), we have to use an intermediate variable for the pattern.
pattern='\.dpkg-(new|old|temp)$'
#TODO Maybe we should change this mechanism
# shellcheck disable=SC2015
[[ -f "$1" && -x "$1" && ! "$1" =~ $pattern ]] && true || false
}
run_local_checks() {
# Local checks
if cd "$LOCALDIR"; then
if $MK_RUN_SYNC_PARTS; then
echo '<<<local:sep(0)>>>'
for skript in ./*; do
if is_valid_plugin "$skript"; then
./"$skript"
fi
done
fi
# Call some plugins only every X'th second
for skript in [1-9]*/*; do
if is_valid_plugin "$skript"; then
run_cached "local_${skript//\//\\}" "${skript%/*}" "$skript"
fi
done
fi
}
run_plugins() {
if cd "$PLUGINSDIR"; then
if $MK_RUN_SYNC_PARTS; then
for skript in ./*; do
if is_valid_plugin "$skript"; then
run_agent_plugin "$skript"
fi
done
fi
# Call some plugins only every Xth second
for skript in [1-9]*/*; do
if is_valid_plugin "$skript"; then
run_cached "plugins_${skript//\//\\}" "${skript%/*}" run_agent_plugin "$skript"
fi
done
fi
}
run_mrpe() {
local descr=$1
shift
local cmdline=$*
echo '<<<mrpe>>>'
PLUGIN=${cmdline%% *}
OUTPUT=$(eval "$cmdline")
echo -n "(${PLUGIN##*/}) $descr $? $OUTPUT" | tr \\n \\1
echo
}
export -f run_mrpe
add_failed_python_plugin() {
if [ -z "$FAILED_PYTHON_PLUGINS" ]; then
FAILED_PYTHON_PLUGINS=("$1")
else
FAILED_PYTHON_PLUGINS=("${FAILED_PYTHON_PLUGINS[@]}" "$1")
fi
}
run_agent_plugin() {
if [ "${1:(0):(2)}" == "./" ]; then
agent_plugin="${1:2}"
else
agent_plugin="$1"
fi
extension="${agent_plugin##*.}"
filename="${agent_plugin%.*}"
# Execute all non python plugins with ./foo
if [ "$extension" != "py" ]; then
./"$agent_plugin"
return
fi
if [ "${filename:(-2):(2)}" != "_2" ]; then
if [ -n "$NO_PYTHON" ] || [ -n "$WRONG_PYTHON_COMMAND" ]; then
add_failed_python_plugin "$agent_plugin"
return
fi
if [ -n "$PYTHON3" ]; then
$PYTHON3 "$agent_plugin"
return
fi
if [ ! -e "${filename}_2.py" ]; then
add_failed_python_plugin "$agent_plugin (Missing Python 3 installation)"
return
fi
return
fi
if [ -x "${filename%??}.py" ] && [ -n "$PYTHON3" ]; then
return
fi
if [ -n "$PYTHON2" ]; then
$PYTHON2 "$agent_plugin"
return
fi
add_failed_python_plugin "$agent_plugin (missing Python 2 installation)"
}
export -f run_agent_plugin
# Runs a command asynchronous by use of a cache file. Usage:
# run_cached [-s] NAME MAXAGE
# -s creates the section header <<<$NAME>>>
# -m mrpe-mode: stores exit code with the cache
# -ma mrpe-mode with age: stores exit code with the cache and adds the cache age
# NAME is the name of the section (also used as cache file name)
# MAXAGE is the maximum cache livetime in seconds
run_cached() {
local NOW
NOW=$(date +%s)
local section=
local mrpe=0
local append_age=0
# TODO: this function is unable to handle mulitple args at once
# for example: -s -m won't work, it is read as single token "-s -m"
if [ "$1" = -s ]; then
local section="echo '<<<$2:cached($NOW,$3)>>>' ; "
shift
fi
if [ "$1" = -m ]; then
local mrpe=1
shift
fi
if [ "$1" = "-ma" ]; then
local mrpe=1
local append_age=1
shift
fi
local NAME=$1
local MAXAGE=$2
shift 2
local CMDLINE=$section$*
if [ ! -d "$MK_VARDIR/cache" ]; then mkdir -p "$MK_VARDIR/cache"; fi
if [ "$mrpe" = 1 ]; then
CACHEFILE="$MK_VARDIR/cache/mrpe_$NAME.cache"
else
CACHEFILE="$MK_VARDIR/cache/$NAME.cache"
fi
# Check if the creation of the cache takes suspiciously long and kill the
# process if the age (access time) of $CACHEFILE.new is twice the MAXAGE.
# Output the evantually already cached section anyways and start the cache
# update again.
if [ -e "$CACHEFILE.new" ]; then
local CF_ATIME
CF_ATIME=$(stat -c %X "$CACHEFILE.new")
if [ $((NOW - CF_ATIME)) -ge $((MAXAGE * 2)) ]; then
# Kill the process still accessing that file in case
# it is still running. This avoids overlapping processes!
fuser -k -9 "$CACHEFILE.new" >/dev/null 2>&1
rm -f "$CACHEFILE.new"
fi
fi
# Check if cache file exists and is recent enough
if [ -s "$CACHEFILE" ]; then
local MTIME
MTIME=$(stat -c %Y "$CACHEFILE")
local AGE
AGE=$((NOW - MTIME))
if [ "$AGE" -le "$MAXAGE" ]; then local USE_CACHEFILE=1; fi
if $MK_RUN_SYNC_PARTS; then
# Output the file in any case, even if it is
# outdated. The new file will not yet be available
if [ $append_age -eq 1 ]; then
# insert the cached-string before the pipe (first -e)
# or, if no pipe found (-e t) append it (third -e),
# but only once and on the second line (2!b) (first line is section header,
# all further lines are long output)
sed -e "2s/|/ (Cached: ${AGE}\/${MAXAGE}s)|/" -e t -e "2s/$/ (Cached: ${AGE}\/${MAXAGE}s)/" <"$CACHEFILE"
else
CACHE_INFO="cached($MTIME,$MAXAGE)"
if [[ $NAME == local_* ]]; then
# prefix CACHE_INFO, but skip any headers and lines with existing CACHE_INFO
sed -e "/^<<<.*>>>/{p;d;}; /^cached([0-9]*,[0-9]*)/{p;d;}; s/^/${CACHE_INFO} /" "${CACHEFILE}"
else
# insert CACHE_INFO in header, but skip headers with existing CACHE_INFO
sed -e '/^<<<.*\(:cached(\).*>>>/{p;d;}; s/^<<<\([^>]*\)>>>$/<<<\1:'"${CACHE_INFO}"'>>>/' "${CACHEFILE}"
fi
fi
fi
fi
$MK_RUN_ASYNC_PARTS || return
# Cache file outdated and new job not yet running? Start it
if [ -z "$USE_CACHEFILE" ] && [ ! -e "$CACHEFILE.new" ]; then
# When the command fails, the output is throws away ignored
if [ $mrpe -eq 1 ]; then
echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; run_mrpe $NAME \"$CMDLINE\" && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /bin/bash >/dev/null 2>&1 &
else
echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; $CMDLINE && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /bin/bash >/dev/null 2>&1 &
fi
fi
}
# Make run_cached available for subshells (plugins, local checks, etc.)
export -f run_cached
_rt_pidfile_is_mine() {
[ "$(cat "${1}" 2>/dev/null)" = "$$" ]
}
_rt_pidfile_is_alive() {
[ "$(("$(date +%s)" - "$(stat -c %X "${1}")"))" -le "${RTC_TIMEOUT}" ]
}
# Implements Real-Time Check feature of the Checkmk agent which can send
# some section data in 1 second resolution. Useful for fast notifications and
# detailed graphing (if you configure your RRDs to this resolution).
run_real_time_checks_for_remote() {
pidfile="${1}"
remote="${pidfile##*/rtc_remotes/}"
if [ "$PASSPHRASE" != "" ]; then
# new mechanism to set the passphrase has priority
RTC_SECRET=$PASSPHRASE
fi
# have I already started for this remote?
_rt_pidfile_is_mine "${pidfile}" && return
echo $$ >"${pidfile}"
if [ "$ENCRYPTED_RT" != "no" ]; then
PROTOCOL=00
else
PROTOCOL=99
fi
while true; do
_rt_pidfile_is_mine "${pidfile}" || return
_rt_pidfile_is_alive "${pidfile}" || {
rm "${pidfile}"
return
}
for SECTION in $RTC_SECTIONS; do
# Be aware of maximum packet size. Maybe we need to check the size of the section
# output and do some kind of nicer error handling.
# 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
# dd is used to concatenate the output of all commands to a single write/block => udp packet
{
echo -n $PROTOCOL
date +%s | tr -d '\n'
if [ "$ENCRYPTED_RT" != "no" ]; then
export RTC_SECRET=$RTC_SECRET
section_"$SECTION" | openssl enc -aes-256-cbc -md md5 -pass env:RTC_SECRET -nosalt
else
section_"$SECTION"
fi
} | dd bs=9999 iflag=fullblock 2>/dev/null >"/dev/udp/${remote}/${RTC_PORT}"
done
# Plugins
if cd "$PLUGINSDIR"; then
for PLUGIN in $RTC_PLUGINS; do
if [ ! -f $PLUGIN ]; then
continue
fi
# Be aware of maximum packet size. Maybe we need to check the size of the section
# output and do some kind of nicer error handling.
# 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
# dd is used to concatenate the output of all commands to a single write/block => udp packet
{
echo -n $PROTOCOL
date +%s | tr -d '\n'
if [ "$ENCRYPTED_RT" != "no" ]; then
export RTC_SECRET=$RTC_SECRET
run_agent_plugin "$PLUGIN" | openssl enc -aes-256-cbc -md md5 -pass env:RTC_SECRET -nosalt
else
run_agent_plugin "$PLUGIN"
fi
} | dd bs=9999 iflag=fullblock 2>/dev/null >"/dev/udp/${remote}/${RTC_PORT}"
done
fi
sleep 1
done
}
# ntpq helper function
get_ntpq() {
inpath ntpq || return 1
run_cached -s ntp 30 "waitmax 5 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/' || true" | sed 's/\(<<<ntp:cached(.*,\)30)>>>/\1120)>>>/'
}
section_timesyncd() {
if [ -n "$IS_DOCKERIZED" ] || [ -n "$IS_LXC_CONTAINER" ]; then
return 0
fi
inpath systemctl || return 1
inpath timedatectl || return 1
systemctl is-enabled systemd-timesyncd.service >/dev/null 2>&1 || return 1
# debian 10.8 uses ConditionFileIsExecutable to "disable" systemd-timedatectl when ntp is installed.
# The service is still enabled, but does not start timesyncd as the condition is not met.
(inpath ntpd || inpath openntpd || inpath chronyd || inpath VBoxService) && return 1 # we check the same condition as the systemd condition
timedatectl timesync-status >/dev/null 2>&1 || return 1
if $MK_RUN_SYNC_PARTS; then
echo "<<<timesyncd>>>"
timedatectl timesync-status
get_file_mtime /var/lib/systemd/timesync/clock | awk '{print "[[["$1"]]]"}'
fi
return 0 # intended not to execute section_ntp even in the case where get_file_mtime fails
}
section_ntp() {
if [ -n "$IS_DOCKERIZED" ] || [ -n "$IS_LXC_CONTAINER" ]; then
return 0
fi
# First we try to identify if we're beholden to systemd
if inpath systemctl; then
# shellcheck disable=SC2016
if [ "$(systemctl | awk '/ntp.service|ntpd.service/{print $3; exit}')" = "active" ]; then
# remove heading, make first column space separated
get_ntpq
return
fi
fi
# If we get to this point, we attempt via classic ntp daemons (ntpq required)
if inpath ntpq; then
# Try to determine status via /etc/init.d
# This might also be appropriate for AIX, Solaris and others
for _ntp_daemon in ntp ntpd openntpd; do
# Check for a service script
if [ -x /etc/init.d/"${_ntp_daemon}" ]; then
# If the status returns 0, we assume we have a running service
if /etc/init.d/"${_ntp_daemon}" status >/dev/null 2>&1; then
get_ntpq
return
fi
fi
done
unset -v _ntp_daemon
# For other systems such as Slackware
if [ -x "/etc/rc.d/rc.ntpd" ]; then
get_ntpq
return
fi
fi
}
get_file_mtime() {
stat -c %Y "${1}" 2>/dev/null ||
stat -f %m "${1}" 2>/dev/null ||
perl -e 'if (! -f $ARGV[0]){die "0000000"};$mtime=(stat($ARGV[0]))[9];print $mtime."\n";' "${1}"
}
section_checkmk_failed_plugins() {
failed=("$@")
if [ -n "$failed" ]; then
echo "<<<check_mk>>>"
echo "FailedPythonPlugins: ${failed[*]}"
if [ -n "$NO_PYTHON" ]; then
echo "FailedPythonReason: No suitable python installation found."
elif [ -n "$WRONG_PYTHON_COMMAND" ]; then
echo "FailedPythonReason: Configured python command not found."
fi
fi
}
run_purely_synchronous_sections() {
section_checkmk
[ -z "$MK_SKIP_LABELS" ] && section_labels
[ -z "$MK_SKIP_DF" ] && section_df
[ -z "$MK_SKIP_SYSTEMD" ] && sections_systemd
# Filesystem usage for ZFS
[ -z "$MK_SKIP_ZFS" ] && section_zfs
# Check NFS mounts by accessing them with stat -f (System
# call statfs()). If this lasts more then 2 seconds we
# consider it as hanging. We need waitmax.
[ -z "$MK_SKIP_NFS_MOUNTS" ] && section_nfs_mounts
# Check mount options. Filesystems may switch to 'ro' in case
# of a read error.
[ -z "$MK_SKIP_MOUNTS" ] && section_mounts
[ -z "$MK_SKIP_PS" ] && section_ps
# Memory usage
[ -z "$MK_SKIP_MEM" ] && section_mem
# Load and number of processes
[ -z "$MK_SKIP_CPU" ] && section_cpu
# Uptime
[ -z "$MK_SKIP_UPTIME" ] && section_uptime
# New variant: Information about speed and state in one section
[ -z "$MK_SKIP_LNX_IF" ] && section_lnx_if
# Current state of bonding interfaces
[ -z "$MK_SKIP_BONDING_IF" ] && section_bonding_interfaces
# Same for Open vSwitch bonding
[ -z "$MK_SKIP_VSWITCH_BONDING" ] && section_vswitch_bonding
# Number of TCP connections in the various states
[ -z "$MK_SKIP_TCP" ] && section_tcp
# Linux Multipathing
[ -z "$MK_SKIP_MULTIPATHING" ] && section_multipathing
# Performancecounter Platten
[ -z "$MK_SKIP_DISKSTAT" ] && section_diskstat
# Performancecounter Kernel
[ -z "$MK_SKIP_KERNEL" ] && section_kernel
# RAID status of Linux software RAID
[ -z "$MK_SKIP_MD" ] && section_md
# RAID status of Linux RAID via device mapper
[ -z "$MK_SKIP_DM_RAID" ] && section_dm_raid
# RAID status of LSI controllers via cfggen
[ -z "$MK_SKIP_CFGGEN" ] && section_cfggen
# RAID status of LSI MegaRAID controller via MegaCli. You can download that tool from:
# http://www.lsi.com/downloads/Public/MegaRAID%20Common%20Files/8.02.16_MegaCLI.zip
[ -z "$MK_SKIP_MEGARAID" ] && section_megaraid
# RAID status of 3WARE disk controller (by Radoslaw Bak)
[ -z "$MK_SKIP_THREE_WARE_RAID" ] && section_3ware_raid
# VirtualBox Guests. Section must always been output. Otherwise the
# check would not be executed in case no guest additions are installed.
# And that is something the check wants to detect
[ -z "$MK_SKIP_VBOX_GUEST" ] && section_vbox_guest
# OpenVPN Clients. Currently we assume that the configuration # is in
# /etc/openvpn. We might find a safer way to find the configuration later.
[ -z "$MK_SKIP_OPENVPN" ] && section_openvpn
[ -z "$MK_SKIP_NVIDIA" ] && section_nvidia
[ -z "$MK_SKIP_DRBD" ] && section_drbd
# Heartbeat monitoring
# Different handling for heartbeat clusters with and without CRM
# for the resource state
[ -z "$MK_SKIP_HEARTBEAT" ] && section_heartbeat
[ -z "$MK_SKIP_MAILQUEUE" ] && section_mailqueue
# Welcome the ZFS check on Linux
# We do not endorse running ZFS on linux if your vendor doesnt support it ;)
# check zpool status
[ -z "$MK_SKIP_ZPOOL" ] && section_zpool
# Veritas Cluster Server
# Software is always installed in /opt/VRTSvcs.
# Secure mode must be off to allow root to execute commands
[ -z "$MK_SKIP_VERITAS" ] && section_veritas_cluster
## Fileinfo-Check: put patterns for files into /etc/check_mk/fileinfo.cfg
[ -z "$MK_SKIP_FILEINFO" ] && run_fileinfo
# Get stats about OMD monitoring cores running on this machine.
# Since cd is a shell builtin the check does not affect the performance
# on non-OMD machines.
[ -z "$MK_SKIP_OMD_CORES" ] && section_omd_core
# Collect states of configured Checkmk site backup jobs
section_mkbackup
# Get statistics about monitored jobs. Below the job directory there
# is a sub directory per user that ran a job. That directory must be
# owned by the user so that a symlink or hardlink attack for reading
# arbitrary files can be avoided.
[ -z "$MK_SKIP_JOB" ] && section_job
# Gather thermal information provided e.g. by acpi
# At the moment only supporting thermal sensors
[ -z "$MK_SKIP_THERMAL" ] && section_thermal
# Libelle Business Shadow
[ -z "$MK_SKIP_LIBELLE" ] && section_libelle
# HTTP Accelerator Statistics
[ -z "$MK_SKIP_HTTP_ACCELERATOR" ] && section_http_accelerator
# Proxmox Cluster
[ -z "$MK_SKIP_PROXMOX" ] && section_proxmox
[ -z "$MK_SKIP_HAPROXY" ] && section_haproxy
}
run_sections_with_run_cached() {
# Hardware sensors via IPMI (need ipmitool)
[ -z "$MK_SKIP_IPMITOOL" ] && section_ipmitool
# IPMI data via ipmi-sensors (of freeipmi). Please make sure, that if you
# have installed freeipmi that IPMI is really support by your hardware.
[ -z "$MK_SKIP_IPMISENSORS" ] && section_ipmisensors
# RAID controllers from areca (Taiwan)
# cli64 can be found at ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/CLI/
[ -z "$MK_SKIP_ARECA" ] && section_areca_raid
# Time synchronization with Chrony
[ -z "$MK_SKIP_CHRONY" ] && section_chrony
if [ -z "$MK_SKIP_TIMESYNCHRONISATION" ]; then
section_timesyncd || section_ntp
fi
## Check status of OMD sites and Checkmk Notification spooler
[ -z "$MK_SKIP_OMD" ] && section_omd
}
main() {
$MK_RUN_SYNC_PARTS && run_purely_synchronous_sections
($MK_RUN_SYNC_PARTS || $MK_RUN_ASYNC_PARTS) && run_sections_with_run_cached
# Start new liveupdate process in background on each agent execution. Starting
# a new live update process will terminate the old one automatically after
# max. 1 sec.
$MK_RUN_ASYNC_PARTS && run_liveupdate
# MK's Remote Plugin Executor
($MK_RUN_SYNC_PARTS || $MK_RUN_ASYNC_PARTS) && run_remote_plugin
# MK's runas Executor
($MK_RUN_SYNC_PARTS || $MK_RUN_ASYNC_PARTS) && run_runas_executor
# Local checks
($MK_RUN_SYNC_PARTS || $MK_RUN_ASYNC_PARTS) && run_local_checks
# Plugins
($MK_RUN_SYNC_PARTS || $MK_RUN_ASYNC_PARTS) && run_plugins
$MK_RUN_SYNC_PARTS && section_checkmk_failed_plugins "${FAILED_PYTHON_PLUGINS[@]}"
# Agent output snippets created by cronjobs, etc.
if $MK_RUN_SYNC_PARTS && [ -d "$SPOOLDIR" ] && [ -r "$SPOOLDIR" ]; then
pushd "$SPOOLDIR" >/dev/null
now=$(date +%s)
for file in *; do
test "$file" = "*" && break
# output every file in this directory. If the file is prefixed
# with a number, then that number is the maximum age of the
# file in seconds. If the file is older than that, it is ignored.
maxage=""
part="$file"
# Each away all digits from the front of the filename and
# collect them in the variable maxage.
while [ "${part/#[0-9]/}" != "$part" ]; do
maxage=$maxage${part:0:1}
part=${part:1}
done
# If there is at least one digit, than we honor that.
if [ "$maxage" ]; then
mtime=$(stat -c %Y "$file")
if [ $((now - mtime)) -gt "$maxage" ]; then
continue
fi
fi
# Output the file
cat "$file"
done
popd >/dev/null
fi
}
set_up_encryption
main | optionally_encrypt
# if MK_LOOP_INTERVAL is set, we assume we're a 'simple' systemd service
if [ -n "$MK_LOOP_INTERVAL" ]; then
while sleep "$MK_LOOP_INTERVAL"; do
# Note: this will not output anything if MK_RUN_SYNC_PARTS=false, which is the intended case.
# Anyway: rather send it to /dev/null than risk leaking unencrypted output.
MK_LOOP_INTERVAL="" main >/dev/null
done
fi