搭建nagios监控服务器

搭建nagios

安装nagios

yum install -y httpd gcc glibc glibc-common php gd gd-devel libpng libmng libjpeg zlib
useradd nagios
groupadd nagcmd
usermod -G nagcmd nagios
usermod -G nagcmd apache

tar xzvf nagios-3.5.1.tar.gz
cd nagios
./configure --with-command-group=nagcmd
make all
make install

# 生成的nagios web访问目录
ll /usr/local/nagios/share/

# 生成启动脚本
make install-init

# 设置工作目录权限
make install-commandmode

# 生成配置文件
make install-config
# 配置文件目录
ll /usr/local/nagios/etc/

# 将nagios配置文件加入apache配置文件
make install-webconf

# 生成访问nagios web登录的用户名和密码
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
  • 编译nagios插件

    # 安装插件之前先安装mysql服务
    yum install -y mysql-devel
    tar xzvf nagios-plugins-2.1.1.tar.gz
    cd nagios-plugins-2.1.1
    ./configure --with-nagios-user=nagios --with-nagios-group=nagcmd
    make && make install
    # 安装成功后会生成脚本文件
    ls -la /usr/local/nagios/libexec/
    
  • 检查配置文件

    /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
    
  • 启动nagios

    systemctl start nagios
    
  • 访问

    http://192.168.1.240/nagios
    

监控本机对象,监控NFS服务运行状态

cd /usr/local/nagios/etc/objects/
cp localhost.cfg localhost.cfg.bak

cat localhost.cfg
define host {
    use                     linux-server
    host_name               localhost
    alias                   localhost
    address                 127.0.0.1
  }

define hostgroup{
        hostgroup_name  linux-servers
        alias           Linux Servers
        members         localhost
}

define service {
    use    local-service
    host_name    localhost
    service_description    NFS
    check_command    check_tcp!2049 ;监听tcp端口2049
}

# 安装nfs服务
yum install -y rpc-bind nfs-utils
# 配置nfs
cat /etc/exports
/tmp    *(rw)
# 启动nfs
systemctl restart nfs
# 检查nfs是否启动成功
showmount -e 192.168.1.240
# 重启nagios
systemctl restart nagios
# 访问http://192.168.1.240/nagios查看服务状态
# 默认间隔5分钟检查一次,可以强制检查

# 如果出现Error: Could not stat() command file '/usr/local/nagios/var/rw/nagios.cmd'!错误,关闭防火墙

监控外部主机192.168.1.241上的mysql服务器运行状态

  • 编辑主配置文件

    vim /usr/local/nagios/etc/nagios.cfg
    # 添加以下两项配置
    cfg_file=/usr/local/nagios/etc/objects/mysql.cfg
    
    cat mysql.cfg
    define host{
            use                     linux-server
            host_name               nagios2
            alias                   mysql服务器
            address                 192.168.1.241
        hostgroups        mysql-server
    }
    
    define service {
            use     local-service
            host_name       nagios2
            service_description     mysql服务
            check_command   check_mysql
    }
    
    define hostgroup {
        hostgroup_name    mysql-server
        alias    mysql服务器
        members    nagios2
    }
    
    # check_command 命令必须在command.cfg中存在
    # 在command.cfg中添加以下内容
    define command {
    command_name    check_mysql
    command_line    $USER1$/check_mysql -H $HOSTADDRESS$ -u nagdb -d nagdb
    }
    
    # 在192.168.1.241服务器中安装mysql服务
    yum install -y mysql-server
    systemctl start mysqld
    # 创建nagdb数据库和用户
    create database nagdb;
    grant select on nagdb.* to [email protected]'192.168.1.240';
    
    # 回到192.168.1.240服务器中测试连接
    /usr/local/nagios/libexec/check_mysql -H 192.168.1.241 -u nagdb -d nagdb
    # 返回以下内容表示连接成功
    Uptime: 846  Threads: 1  Questions: 49  Slow queries: 0  Opens: 67  Flush tables: 1  Open tables: 60  Queries per second avg: 0.057|Connections=19c;;; Open_files=16;;; Open_tables=60;;; Qcache_free_memory=1031352;;; Qcache_hits=0c;;; Qcache_inserts=0c;;; Qcache_lowmem_prunes=0c;;; Qcache_not_cached=11c;;; Qcache_queries_in_cache=0;;; Queries=50c;;; Questions=44c;;; Table_locks_waited=0c;;; Threads_connected=1;;; Threads_running=1;;; Uptime=846c;;;
    # 关闭192.168.1.241上的mysql服务,测试返回结果为
    Can't connect to MySQL server on '192.168.1.241' (111)
    
    # 访问http://192.168.1.240/nagios查看mysql服务是否正常
    

监听服务器上的私有信息

监控192.168.1.241服务器上的私有信息(硬盘、。)

  • 正常情况下不能访问外部服务器的私有信息,需要通过nagios远程执行插件nrpe来获取
  • 两台机器通过ssl加密传输数据
  • 在两台机器上192.168.1.240和241服务器安装ssl

    yum install -y openssl openssl-devel
    
  • 两台服务器分别安装nrpe

    tar xvzf nrpe-2.15.tar.gz
    cd nrpe-2.15
    yum install -y gcc
    ./configure
    # 编译成功后显示
    General Options:
     -------------------------
     NRPE port:    5666
     NRPE user:    nagios
     NRPE group:   nagios
     Nagios user:  nagios
     Nagios group: nagios
    
    # 需要创建nagios用户
    useradd nagios
    groupadd nagcmd
    usermod -G nagcmd nagios
    
    make all
    make install
    
    # 插件安装后所在路径
    /usr/local/nagios/libexec/check_nrpe
    
    # 192.168.1.241服务器还需生成配置文件
    make install-daemon-config
    # 让xinetd服务可以管理nrep服务,生成xinetd下nrpe配置文件
    make install-xinetd
    yum install -y xinetd
    
  • 在主配置文件添加配置文件

    # 在/usr/local/nagios/etc/nagios.cfg中添加
    cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
    
    # 在/usr/local/nagios/etc/objects/hosts.cfg中添加
    define service {
        use     local-service
        host_name       nagios2
        service_description     磁盘检查
        check_command   check_nrpe!check_root ; 当磁盘空间小于80%警告,小于90%警告危急
    }
    
    define service {
        use local-service
        host_name       nagios2
        service_description     统计总进程
        check_command   check_nrpe!check_total_procs ; 总进程数大于250个警告,大于400个警告危急
    }
    
    define service {
        use     local-service
        host_name       nagios2
        service_description     当前负载
        check_command   check_nrpe!check_load ; 检查当前服务器负载
    }
    
    # 在/usr/local/nagios/etc/objects/command.cfg中添加
    define command {
        command_name    check_nrpe
        command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
    }
    

192.168.1.241配置

  • 安装nagios-plugins
  • 编辑 /etc/xinetd.d/nrpe配置文件,添加only_from 192.168.1.240,允许240服务器连接nrpe

    service nrpe
    {
            flags           = REUSE
            socket_type     = stream
            port            = 5666
            wait            = no
            user            = nagios
            group           = nagios
            server          = /usr/local/nagios/bin/nrpe
            server_args     = -c /usr/local/nagios/etc/nrpe.cfg --inetd
            log_on_failure  += USERID
            disable         = no
            only_from       = 127.0.0.1 192.168.1.240
    }
    
  • 在/etc/services文件中添加

    nrpe            5666/tcp                # NRPE
    
  • /etc/services文件作用

    • xinet.d来启动服务时会在/etc/services找相应的端口来启动服务。找不到对应端口,将不启动服务。
  • 启动xinetd

    systemctl start xinetd
    
  • 查看5666端口是否启动成功

    netstat -antlup | grep 5666
    
  • /usr/local/nagios/etc/nrpe.cfg添加,检查磁盘剩余空间,小于80%告警,小于90%告警危急

    command[check_root]=/usr/local/nagios/libexec/check_disk -w 80% -c 90% -p /dev/mapper/centos-root
    
  • 重启xinetd

    systemctl restart xinetd
    
  • 查看磁盘空间

    [[email protected] etc]# df -h
    文件系统                 容量  已用  可用 已用% 挂载点
    /dev/mapper/centos-root   18G  2.0G   16G   11% /
    devtmpfs                 484M     0  484M    0% /dev
    tmpfs                    494M     0  494M    0% /dev/shm
    tmpfs                    494M   13M  481M    3% /run
    tmpfs                    494M     0  494M    0% /sys/fs/cgroup
    /dev/sda1                497M  154M  344M   31% /boot
    tmpfs                     99M     0   99M    0% /run/user/0
    
  • 访问http://192.168.1.240/nagios

配置nagios邮件提醒

  • 安装sendmail

    yum install -y sendmail
    systemctl enable sendmail
    systemctl start sendmail
    
  • 修改配置文件/usr/local/nagios/etc/objects/contacts.cfg

    define contact{
        contact_name                    nagiosadmin
        use                             generic-contact
        alias                           Nagios Admin
        email                           [email protected] ; 多个联系人用空格分开
    }