Nagios监控生产环境redis群集服务战
在redis服务瓶颈或者异常时候即使报警通知,方便dba第一时间处理维护。
1,下载redis监控插件
Redis已经在服务器安装好了,所以直接能够进行监控。redis集群安装请參考:http://blog.itpub.net/26230597/viewspace-1145831/,下载地址为:http://download.csdn.net/detail/mchdba/8023351,有2个版本号,一个是perl脚本写成的,一个是php脚本写成的,能够随意选择一个,这里选择的是perl脚本。
2,赋予运行权限
将check_redis.php和check_redis.pl拷贝到/usr/lib/nagios/plugins/文件夹,然后赋予运行权限。
[root@wgq_41 plugins]# cd /usr/lib/nagios/plugins/
[root@wgq_41 plugins]# chown -R nagios.nagios check_redis.*
[root@wgq_41 plugins]# chmod 750 check_redis.*
3,定义监控命令
[root@wgq objects] vim /usr/local/nagios/etc/objects/commands.cfg
# add by tim on 20141010,for redis
# check redis
define command {
command_name check_redis
command_line /usr/lib/nagios/plugins/check_redis.pl -H $HOSTADDRESS$ -p $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -f
}
4,定义redis监控主机
[root@wgq etc]# vim /usr/local/nagios/etc/hosts.cfg
# No.018,redis master server
define host{
use linux-server
host_name cache-1
alias cache-1
address 10.xxx.3.x0
check_command check-host-alive
max_check_attempts 5
check_period 24x7
contact_groups ops
notification_interval 30
notification_period 24x7
notification_options d,u,r
}
# No.020 cache-3 redis slave server
define host{
use linux-server
host_name cache-3
alias cache-3
address 10.xx.3.x2
check_command check-host-alive
max_check_attempts 5
check_period 24x7
contact_groups ops
notification_interval 30
notification_period 24x7
notification_options d,u,r
}
5,定义redis监控主机组
define hostgroup {
hostgroup_name Redis_Servers
alias Redisservices
members cache-1,cache-2
}
6,定义redis监控服务选项
[root@wgq objects]# vim /usr/local/nagios/etc/objects/services_redis.cfg
# Redis Master 监控选项
define service {
host_name cache-1
servicegroups Redisservices
service_description Redis Master Clients
check_command check_redis!6379!'connected_clients,blocked_clients,client_longest_output_list,client_biggest_input_buf'!200,50,~,~!600,150,~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-1
servicegroups Redisservices
service_description Redis Master Memory
check_command check_redis!6379!'used_memory_human,used_memory_peak_human'!~,~!~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-1
servicegroups Redisservices
service_description Redis Master CPU
check_command check_redis!6379!'used_cpu_sys,used_cpu_user,used_cpu_sys_children,used_cpu_user_children'!~,~,~,~!~,~,~,~ ; #没有定义监控报警阀值
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
# Redis Slave 监控选项
define service {
host_name cache-3
servicegroups Redisservices
service_description Redis Slave Clients
check_command check_redis!6379!'connected_clients,blocked_clients,client_longest_output_list,client_biggest_input_buf'!200,50,~,~!600,150,~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-3
servicegroups Redisservices
service_description Redis Slave Memory
check_command check_redis!6379!'used_memory_human,used_memory_peak_human'!~,~!~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-3
servicegroups Redisservices
service_description Redis Slave CPU
check_command check_redis!6379!'used_cpu_sys,used_cpu_user,used_cpu_sys_children,used_cpu_user_children'!~,~,~,~!~,~,~,~ ; #没有定义监控报警阀值
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
赋予nagios用户运行权限
[root@wgq objects]# chown -R nagios.nagios services_redis.cfg
[root@wgq objects]# chmod 777 services_redis.cfg
加入监控服务项到nagios.cfg
[root@wgq etc]# vim /usr/local/nagios/etc/nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/services_redis.cfg
7,測试redis监控服务
运行命令/usr/lib/nagios/plugins/check_redis.pl -H cache-1 -a 'connected_clients,blocked_clients' -w ~,~ -c ~,~ -m -M 4G -A -R -T 来測试下redis监控是否正常运行
[root@wgq plugins]# /usr/lib/nagios/plugins/check_redis.pl -H 10.2xx.3.x0 -a 'connected_clients,blocked_clients' -w ~,~ -c ~,~ -m -M 4G -A -R -T
OK: REDIS 2.8.8 on 10.2xx.3.x0:6379 has 1 databases (db0) with 28497 keys, up 76 days 2 hours - response in 0.004s, hitrate is 12.83%, memory use is 194.14M (peak 205.14M, 6.49% of max, fragmentation 1.37%), connected_clients is 35, blocked_clients is 11 | redis_build_id=d322d411218ade61 total_connections_received=341191c used_memory_lua=33792 aof_rewrite_buffer_length=0 used_memory_rss=278749184B redis_git_dirty=0 loading=0 redis_mode=standalone latest_fork_usec=5588 repl_backlog_first_byte_offset=0 sync_partial_ok=0 master_repl_offset=0 uptime_in_days=76c aof_rewrite_scheduled=0 lru_clock=3649276 rdb_bgsave_in_progress=0 rejected_connections=0 repl_backlog_active=0 aof_delayed_fsync=1 sync_full=0 process_id=7776 used_memory_human=194.14M aof_current_rewrite_time_sec=-1 used_memory=203570960 aof_enabled=1 blocked_clients=11 aof_last_bgrewrite_status=ok aof_rewrite_in_progress=0 sync_partial_err=0 used_cpu_sys_children=2222.75 connected_slaves=0 repl_backlog_histlen=0 uptime_in_seconds=6576292c repl_backlog_size=1048576 os=Linux 2.6.32-358.el6.x86_64 x86_64 used_cpu_sys=32640.80 aof_pending_bio_fsync=0 connected_clients=35 rdb_last_bgsave_time_sec=1 used_memory_peak_human=205.14M run_id=d1fc098d26fa4bbcef3eabeec6d19a858f03dd00 rdb_last_bgsave_status=ok pubsub_patterns=8 client_biggest_input_buf=0 keyspace_hits=42175896c rdb_last_save_time=1412935342 rdb_changes_since_last_save=318 db0_keys=28497 db0_expires=7 db0_avg_ttl=34003 aof_pending_rewrite=0 aof_buffer_length=0 config_file=/usr/local/redis-2.8.8/etc/redis.conf pubsub_channels=0 used_cpu_user_children=21375.34 hz=10 aof_last_rewrite_time_sec=2 aof_last_write_status=ok aof_base_size=82883253 used_cpu_user=18460.42 keyspace_misses=286602797c tcp_port=6379 total_commands_processed=797581196c mem_fragmentation_ratio=1.37 aof_current_size=146485850 rdb_current_bgsave_time_sec=-1 client_longest_output_list=0 instantaneous_ops_per_sec=114 evicted_keys=0c used_memory_peak=215106272B expired_keys=58977c total_keys=28497 total_expires=7 response_time=0.003802s hitrate=12.8281% memory_utilization=6.49013519287109%
[root@wgq plugins]#
8,查看redis监控服务状态
,操作过程中的报错处理过程
报错:
[root@wgq_line_cache_3_41 plugins]# ./check_redis.pl --help
Can't locate Redis.pm in @INC (@INC contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .) at ./check_redis.pl line 421.
BEGIN failed--compilation aborted at ./check_redis.pl line 421.
[root@wgq_line_cache_3_41 plugins]#
[root@wgq_line_cache_3_41 plugins]# perl -MCPAN -e shell
Terminal does not support AddHistory.
cpan shell -- CPAN exploration and modules installation (v1.9402)
Enter 'h' for help.
cpan[1]> install Redis
…
Can't locate Module/Build/Tiny.pm in @INC (@INC contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .) at Build.PL line 2.
BEGIN failed--compilation aborted at Build.PL line 2.
Warning: No success on command[/usr/bin/perl Build.PL --installdirs site]
Warning (usually harmless): 'YAML' not installed, will not store persistent state
DAMS/Redis-1.976.tar.gz
/usr/bin/perl Build.PL --installdirs site -- NOT OK
Running Build test
Make had some problems, won't test
Running Build install
Make had some problems, won't install
Could not read '/root/.cpan/build/Redis-1.976-Zhz6xI/META.yml'. Falling back to other methods to determine prerequisites……
YAML是以数据为中央的标记语言,其使用ASCII码(如连字符、问号、冒号、逗号等)构造数据块(标量值或哈希码)。和XML同样。YAML也是一种机器可识别语言,并能和多种脚本语言相结合,当中一种便是Perl,须要安装YAML,例如以下运行:
cpan[2]>install YAML
……
Appending installation info to /usr/lib64/perl5/perllocal.pod
INGY/YAML-1.12.tar.gz
/usr/bin/make install -- OK
CPAN: YAML loaded ok (v1.12)
PS:这里可能会安装失败。失败原因是网络连接。能够多运行几次install YAML就会成功。
再继续运行install Redis,有例如以下提示信息
cpan[4]> install Redis
Running install for module 'Redis'
Running Build for D/DA/DAMS/Redis-1.976.tar.gz
Has already been unwrapped into directory /root/.cpan/build/Redis-1.976-cUL4rt
'/usr/bin/perl Build.PL --installdirs site' returned status 512, won't make
Running Build test
Make had some problems, won't test
Running Build install
Make had some problems, won't install
cpan[5]>
Build失败,Build.PL故障了,须要又一次安装下运行命令install Build
cpan[5]> install Build
成功后。再运行install Redis
cpan[6]> install Redis
Redis安装运行成功。
----------------------------------------------------------------------------------------------------------------
<版权全部,文章同意转载。但必须以链接方式注明源地址,否则追究法律责任!>
原博客地址: http://blog.itpub.net/26230597/viewspace-1294684/
原作者:黄杉 (mchdba)
----------------------------------------------------------------------------------------------------------------
参考文件:http://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details
Nagios监控生产环境redis群集服务战的更多相关文章
- prometheus+grafana 监控生产环境机器的系统信息、redis、mongodb以及jmx
介绍: 为了更好的对生产环境的一些中间件和操作系统的运行情况进行可视化的展示,近期了解了下prometheus加上grafana来实现这种效果,由于prometheus是新出来的开源项目,所以,监控的 ...
- redis的生产环境中的部署?
使用的是redis cluster 10台机器,5台机器部署了redis主实例,另外5台机器部署了redis 的从实例,每个主实例挂了一个从实例,5个节点对外提供读写服务,每个节点的读写高峰qps可能 ...
- Redis生产环境节点宕机问题报错及恢复排错
Redis故障发现 主观下线 当cluster-node-timeout时间内某节点无法与另一个节点顺利完成ping消息通信时,则将该节点标记为主观下线状态. 客观下线 当某个节点判断另一个节点主观下 ...
- Linux实战教学笔记51:Zabbix监控平台3.2.4(三)生产环境案例
https://www.cnblogs.com/chensiqiqi/p/9162986.html 一,Zabbix生产环境监测案例概述 1.1 项目规划 [x] :主机分组 交换机 Nginx To ...
- 170707、springboot编程之监控和管理生产环境
spring-boot-actuator模块提供了一个监控和管理生产环境的模块,可以使用http.jmx.ssh.telnet等拉管理和监控应用.审计(Auditing). 健康(health).数据 ...
- (33)Spring Boot 监控和管理生产环境【从零开始学Spring Boot】
[本文章是否对你有用以及是否有好的建议,请留言] spring-boot-actuator模块提供了一个监控和管理生产环境的模块,可以使用http.jmx.ssh.telnet等拉管理和监控应用.审计 ...
- NanoProfiler - 适合生产环境的性能监控类库 之 基本功能篇
背景 NanoProfiler是一个EF Learning Labs出品的免费性能监控类库(即将开源).它的思想和使用方式类似于MiniProfiler的.但是,设计理念有较大差异. MiniProf ...
- NanoProfiler - 适合生产环境的性能监控类库 之 大数据篇
上期回顾 上一期:NanoProfiler - 适合生产环境的性能监控类库 之 基本功能篇 上次介绍了NanoProfiler的基本功能,提到,NanoProfiler实现了MiniProfiler欠 ...
- NanoProfiler - 适合生产环境的性能监控类库 之 实践ELK篇
上期回顾 上一期:NanoProfiler - 适合生产环境的性能监控类库 之 大数据篇 上次介绍了NanoProfiler的大数据分析理念,一晃已经时隔一年多了,真是罪过! 有朋友问到何时开源的问题 ...
随机推荐
- Swift - 告警提示框(UIAlertController)的用法
自iOS8起,苹果就建议告警框使用UIAlertController来代替UIAlertView.下面总结了一些常见的用法: 1,简单的应用(同时按钮响应Handler使用闭包函数) 1 2 3 ...
- 高级Bash脚本编程指南(27):文本处理命令(三)
高级Bash脚本编程指南(27):文本处理命令(三) 成于坚持,败于止步 处理文本和文本文件的命令 tr 字符转换过滤器. 必须使用引用或中括号, 这样做才是合理的. 引用可以阻止shell重新解释出 ...
- Github Atom
码代码新神器-Github Atom 周末闲着没事,逛论坛发现了一个新的编辑器,由github发布的Atom编辑器.瞬间被吸引了,所以就去尝试着折腾了一下,后来发现这个编辑器确实很不错,他的特点就 ...
- 搜索引擎爬虫蜘蛛的USERAGENT大全
搜索引擎爬虫蜘蛛的USERAGENT大全 搜索引擎爬虫蜘蛛的USERAGENT收集,方便制作采集的朋友. 百度爬虫 * Baiduspider+(+http://www.baidu.com/sea ...
- extjs表单
Ext.onReady(function(){ Ext.BLANK_IMAGE_URL = '../ext/resources/images/default/s.gif'; Ext.QuickTi ...
- apk应用的反编译和源代码的生成
对于反编译一直持有无所谓有或无的态度.经过昨天一下午的尝试,也有了点心得和体会: 先给大家看看编译的过程和我们反编译的过程概图吧: 例如以下是反编译工具的根文件夹结构: 三个目录也实际上是下面三个步骤 ...
- C++不确定行为
一个简单的程序引发了一块让人纠结的领域,也许强调编程规范的重要性也在这把.规范了就easy避免一些问题. 程序是这种 int Change(int& a) { a = 4; return a; ...
- Maven 建 Struts2 基本实现 CURD Controller
开发环境 开发工具:Eclipse 数据库:MySQL server:Tomcat Struts2 请求原理流程图 构建一个 web maven project,在pom.xml引入struts2的j ...
- NET Core RC2
.NET Core RC2/RTM 明确了时间表 .NET Core 经过了将近2年的开发,去年12月份发布的RC1版本,明确来说那只是一个beta版本,自从RC1发布以来,看到github里的RC2 ...
- 端口映射工具 redir/socat/xinetd - 运维技术 - 开源中国社区
端口映射工具 redir/socat/xinetd - 运维技术 - 开源中国社区 端口映射工具 redir/socat/xinetd 10人收藏此文章, 我要收藏 发表于3天前(2013-08 ...