zabbix_MegaCli监控raid及磁盘
客户端
1、安装MegaCli工具
MegaCli-8.07.10-1.noarch.rpm
使用rpm -ivh安装命令
rpm -i MegaCli-8.07.10-1.noarch.rpm
2、拷贝disk.conf文件到/etc/zabbix/zabbix_agent2.d 目录,disk.conf文件内容文字末尾附录
通过xftp上传即可
3、拷贝脚本diskcheck_megacli.sh到/opt下自定义目录,本次为/opt/externalscripts/
因目录权限问题,应先把配置文件跟脚本拷贝到当前登录用户home下,然后通过
sudo cp命令拷贝到对应目录
4、配置以管理员身份运行脚本命令
执行sudo visudo
在最后一行添加
zabbix ALL=(ALL) NOPASSWD:/opt/externalscripts,/opt/MegaRAID/MegaCli/
修改/etc/zabbix/zabbix-agent2.conf文件,添加
Include=/etc/zabbix/zabbix_agentd.conf.d/
UnsafeUserParameters=1
5、配置zabbix环境变量(可能需要变更或修改,conf文件指定MegaCli64目录)
使用cat /etc/passwd查看zabbix用户目录,当前看到为/var/lib/zabbix,但实际目录并不存在
mkdir在对应目录创建zabbix目录并拷贝进入基本的配置文件
变更相应文件的所属主组
chown zabbix:zabbix .bashrc
chown zabbix:zabbix .bash_profile
chown -R zabbix:zabbix zabbix
6、重启agent2代理
sudo systemctl restart zabbix-agent2
7、zabbix给出的列出硬盘生成json的命令
MegaCli64 -PDlist -aAll -NoLog|grep Slot|awk 'BEGIN{printf "{\"data\":[\n\n"} {printf ",\n{ \"{#SLOT_NUM}\":\"%s\"}", $NF, $1;} END{ printf "\n\t]\n}\n";}' | sed '/^,$/d'
8、查看disk.conf中的监控项,测试
[krsc@node26 zabbix_agent2.d]$ zabbix_get -s node26 -p 10050 -k raid.pd.discovery
9、注意事项,监控项主要都依赖于自动发现,在zabbix系统中以配置,生效时间略长,约10-15分钟,无报错,查看下自动发现项是否发现成功即可
10、目前的监测标准
Medaia Error Count on Every Disk <=30
Other Error Count on Every Disk <=100
Predictive Failure Count On Every Disk <=2
Firmware State on Every Disk !=Unconfigured(bad),Failed
Raid Level State != Degrade
11、disk.cof文件内容
#硬盘自动发现
UserParameter=raid.pd.discovery,sudo /opt/MegaRAID/MegaCli/MegaCli64 -PDlist -aAll -NoLog|grep Slot|awk 'BEGIN{printf "{\"data\":[\n\n"} {printf ",\n{ \"{#SLOT_NUM}\":\"%s\"}", $NF, $1;} END{ printf "\n\t]\n}\n";}' | sed '/^,$/d'
#收集Media Error Count
UserParameter=raid.phy.mec[*],sudo /opt/externalscripts/diskcheck_megacli.sh mec $1
#收集Other Error Count
UserParameter=raid.phy.oec[*],sudo /opt/externalscripts/diskcheck_megacli.sh oec $1
#收集Predictive Failure Count
UserParameter=raid.phy.pfc[*],sudo /opt/externalscripts/diskcheck_megacli.sh pfc $1
#检测硬盘状态,有故障则回复-1
UserParameter=raid.phy.firms[*],sudo /opt/externalscripts/diskcheck_megacli.sh firm $1
#检测阵列等级,有降级则回复-1
UserParameter=raid.level.state,sudo /opt/externalscripts/diskcheck_megacli.sh rdlevel
12、配置文件引用的diskcheck_megacli.sh脚本内容
#!/bin/bash
#zabbix监控硬盘信息脚本
. ~/.bash_profile > /dev/null
#获取磁盘信息
/opt/MegaRAID/MegaCli/MegaCli64 -PDlist -a0|grep -Ei '(Slot Number|Media Error Count|Other Error Count|Predictive Failure Count|Raw Size|Firmware state)'|sed -e "s:\[0x.*Sectors\]::g" >/tmp/pdinfo.txt
#将每块磁盘信息拆分,进行逐盘分析
split -l 6 -d /tmp/pdinfo.txt /tmp/pdinfo
#获取磁盘数量(实际数量=PDNUM+1)
PDNUM=`/opt/MegaRAID/MegaCli/MegaCli64 -PDGetNum -aAll|grep Physical|awk '{print $8}'`
#磁盘分块后文件名规范统一化
for((i=0;i<PDNUM;i++))
do
mv /tmp/pdinfo0$i /tmp/pdinfo$i >/dev/null 2>&1
#ls /tmp/pdinfo${i}
done
SLOT_NUM=$2
DATAFORMATE()
{
while read LINE
do
if [[ ${LINE} == Slot* ]];
then
SLOTNUMNAME=`echo ${LINE}|awk -F: '{ print $1 }'`
SLOTNUM=`echo ${LINE}|awk -F: '{ print $2 }'`
elif [[ ${LINE} == Media* ]];
then
MECNAME=`echo ${LINE}|awk -F: '{ print $1 }'`
MEC=`echo ${LINE}|awk -F: '{ print $2 }'`
elif [[ ${LINE} == Other* ]];
then
OECNAME=`echo ${LINE}|awk -F: '{ print $1 }'`
OEC=`echo ${LINE}|awk -F: '{ print $2 }'`
elif [[ ${LINE} == Predictive* ]];
then
PFCNAME=`echo ${LINE}|awk -F: '{ print $1 }'`
PFC=`echo ${LINE}|awk -F: '{ print $2 }'`
elif [[ ${LINE} == Raw* ]];
then
RAWNAME=`echo ${LINE}|awk -F: '{ print $1 }'`
SIZE=`echo ${LINE}|awk -F: '{ print $2 }'`
elif [[ ${LINE} == Firmware* ]];
then
FIRMWARENAME=`echo ${LINE}|awk -F: '{ print $1 }'`
FIRMWARESTATUS=`echo ${LINE}|awk -F: '{ print $2 }'`
fi
done </tmp/pdinfo${SLOT_NUM}
}
#检测阵列等级状态
CHECKRAIDLEVEL()
{
opt/MegaRAID/MegaCli/MegaCli64 -LDInfo -Lall -aALL|grep Degraded
if [ $? = 0 ]
then
echo -1
else
echo 0
fi
}
OPTION=$1
case $OPTION in
mec) DATAFORMATE
echo ${MEC}
;;
oec) DATAFORMATE
echo ${OEC}
;;
pfc) DATAFORMATE
echo ${PFC}
;;
firm)
DATAFORMATE
if [[ "$FIRMWARESTATUS{}" = "Unconfigured(bad)" ]]
then
echo -1
elif [[ "$FIRMWARESTATUS{}" = "Failed" ]]
then
echo -1
else
echo 0
fi
;;
rdlevel)
CHECKRAIDLEVEL
;;
*) echo "Please select option: mec $slot_num ;oec $slot_num;pfc $slot_num;firm $slot_num;rdlevel"
esac