rules_up.yml

groups:
- name: up
rules:
- alert: mysql
expr: up{instance="db1",job="mysql"} != 0
for: 10s
labels:
instance: '{{$labels.instance}}'
priority: "3"
type: 'mysql'
annotations:
description: '{{ $labels.instance }} nginx-vts-web is down please handle'
summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle' - alert: node
expr: up{job="node"} != 0
for: 10s
labels:
instance: '{{$labels.instance}}'
priority: "3"
type: 'node'
annotations:
description: '{{ $labels.instance }} nginx-vts-web is down please handle'
summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle' - alert: nginx
expr: up{instance="web",job="nginx"} != 0
for: 10s
labels:
instance: '{{$labels.instance}}'
priority: "3"
type: 'nginx'
annotations:
description: '{{ $labels.instance }} nginx-vts-web is down please handle'
summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'

rules_mysql.yml

groups:
- name: mysql.rules
rules:
- record: mysql_slave_lag_seconds
expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay
- record: mysql_heartbeat_lag_seconds
expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds
- record: job:mysql_transactions:rate5m
expr: sum(rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))
WITHOUT (command)
- alert: MySQLGaleraNotReady
expr: mysql_global_status_wsrep_ready != 1
for: 5m
labels:
severity: warning
annotations:
description: '{{$labels.job}} on {{$labels.instance}} is not ready.'
summary: Galera cluster node not ready
- alert: MySQLGaleraOutOfSync
expr: (mysql_global_status_wsrep_local_state != 4 and mysql_global_variables_wsrep_desync
== 0)
for: 5m
labels:
severity: warning
annotations:
description: '{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}}
!= 4).'
summary: Galera cluster node out of sync
- alert: MySQLGaleraDonorFallingBehind
expr: (mysql_global_status_wsrep_local_state == 2 and mysql_global_status_wsrep_local_recv_queue
> 100)
for: 5m
labels:
severity: warning
annotations:
description: '{{$labels.job}} on {{$labels.instance}} is a donor (hotbackup)
and is falling behind (queue size {{$value}}).'
summary: xtradb cluster donor node falling behind
- alert: MySQLReplicationNotRunning
expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running
== 0
for: 2m
labels:
severity: critical
annotations:
description: Slave replication (IO or SQL) has been down for more than 2 minutes.
summary: Slave replication is not running
- alert: MySQLReplicationLag
expr: (mysql_slave_lag_seconds > 30) and ON(instance) (predict_linear(mysql_slave_lag_seconds[5m],
60 * 2) > 0)
for: 1m
labels:
severity: critical
annotations:
description: The mysql slave replication has fallen behind and is not recovering
summary: MySQL slave replication is lagging
- alert: MySQLReplicationLag
expr: (mysql_heartbeat_lag_seconds > 30) and ON(instance) (predict_linear(mysql_heartbeat_lag_seconds[5m],
60 * 2) > 0)
for: 1m
labels:
severity: critical
annotations:
description: The mysql slave replication has fallen behind and is not recovering
summary: MySQL slave replication is lagging
- alert: MySQLInnoDBLogWaits
expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10
labels:
severity: warning
annotations:
description: The innodb logs are waiting for disk at a rate of {{$value}} /
second
summary: MySQL innodb log writes stalling

rules_nginx.yml

groups:
- name: aws_ec2_nginx-vts-web
rules:
- alert: nginx-vts-web-status
expr: up{job="nginx-vts-web"} == 0
for: 1m
labels:
instance: '{{$labels.instance}}'
priority: "3"
type: 'nginx'
annotations:
description: '{{ $labels.instance }} nginx-vts-web is down please handle'
summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'
- alert: nginx application 5xx gt 10 per min
expr: sum(nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} offset 1m) by (application,direction) > 10
for: 1m
labels:
type: 'nginx'
priority: "4"
annotations:
description: ' {{$labels.application}} {{$labels.direction}} >10/m'
summary: ' {{$labels.application}} {{$labels.direction}} >10/m'
- alert: nginx application 5xx gt 1 per min
expr: sum(nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} offset 1m) by (application,direction,filter_name) > 1
for: 1m
labels:
type: 'nginx-detail'
priority: "4"
annotations:
description: ' {{$labels.application}} {{$labels.direction}}{{$labels.filter_name}} >10/m'
summary: ' {{$labels.application}} {{$labels.direction}} >10/m'
# - alert: nginx application 2xx gt 10 per min
# expr: sum(nginx_vts_filter_requests_total{direction="2xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="2xx",job="nginx-vts-web"} offset 1m) by (application,direction) > 10
# for: 1m
# labels:
# test: 'yes'
# priority: "4"
# annotations:
# description: ' {{$labels.application}} {{$labels.direction}} >10/m'
# summary: ' {{$labels.application}} {{$labels.direction}} >10/m' - alert: nginx interface time gt 300ms
expr: nginx_vts_filter_request_seconds{job="nginx-vts-web",filter_name!="/v1/users/kyc/upload"}*1000 > 300
for: 1m
labels:
type: 'nginx'
priority: "4"
annotations:
description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'
summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms' - alert: nginx interface time gt 2s
expr: nginx_vts_filter_request_seconds{job="nginx-vts-web",filter_name="/v1/users/kyc/upload"}*1000 > 2000
for: 1m
labels:
type: 'nginx'
priority: "4"
annotations:
description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'
summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'

rules_node.yml

groups:
- name: example-node-exporter-rules
rules:
# The count of CPUs per node, useful for getting CPU time as a percent of total.
- alert: instance:node_cpus:count
expr: count(node_cpu_seconds_total{mode="idle"}) without (cpu,mode) > 2
for: 1s
annotations:
description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'
summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'
- name: node_up
rules:
# The count of CPUs per node, useful for getting CPU time as a percent of total.
- alert: instance:node_cpus:count
expr: up{instance="node",job="node"} != 1
for: 1s
annotations:
description: ' {{$labels.application}} {{$lables.instance}} is up'
summary: ' {{$labels.application}} {{$lables.instance}} is up'

prometheus告警规则模板:MySQL,nginx,node的更多相关文章

  1. Prometheus告警规则增删改自动化

    Prometheus告警规则增删改自动化 前言: 随着容器技术的发展,zabbix监控方式与k8s的结合不完善,导致不得不放弃zabbix,而新的监控工具prometheus的使用就越来越多了.但是经 ...

  2. prometheus简单监控Linux,mysql,nginx

    prometheus安装 下载安装 #官网下载 解压即可使用 https://prometheus.io/download/ #docker 方式安装 sudo docker run -n prome ...

  3. prometheus 告警规则

    GitHub网址1 https://github.com/samber/awesome-prometheus-alerts 网址2 https://awesome-prometheus-alerts. ...

  4. 02 . Prometheus告警处理

    Prometheus告警简介 告警能力在Prometheus的架构中被划分成两个独立的部分.如下所示,通过在Prometheus中定义AlertRule(告警规则),Prometheus会周期性的对告 ...

  5. Prometheus告警处理

    在Prometheus Server中定义告警规则以及产生告警,Alertmanager组件则用于处理这些由Prometheus产生的告警.Alertmanager即Prometheus体系中告警的统 ...

  6. Prometheus告警模型分析

    Prometheus作为时下最为流行的开源监控系统,其庞大的生态体系:包括针对各种传统应用的Exporter,完整的二次开发工具链,与Kubernetes等主流平台的高度亲和以及由此带来的强大的自发现 ...

  7. Prometheus自身的监控告警规则

    1.先在 Prometheus 主程序目录下创建rules目录,然后在该目录下创建 prometheus-test.yml文件,内容如下: 内容很多,可以根据实际情况进行调整. 规则参考网址:http ...

  8. 实用干货丨如何使用Prometheus配置自定义告警规则

    前 言 Prometheus是一个用于监控和告警的开源系统.一开始由Soundcloud开发,后来在2016年,它迁移到CNCF并且称为Kubernetes之后最流行的项目之一.从整个Linux服务器 ...

  9. Prometheus中使用的告警规则

    参考网站:https://awesome-prometheus-alerts.grep.to/rules 这个网站上有好多常用软件的告警规则,但是有些并不一定实用,有些使用起来会有错误,这里就把这些都 ...

随机推荐

  1. C++学习日记:关于我决定开始学习C++的那些事

    苦恼于Python运行时感人的速度,我决定学习C++. 为了激励我自己好好地学习这门未曾谋面的编程语言,我决定在此开设专栏:C++学习日记.希望在读者们的监督下,我可以早日掌握这门语言.当然,如果那位 ...

  2. H5移动端实现一键复制或长摁复制

    今天接到了一个新的需求,要求我们对表单中的某一个字段进行复制,这个表单是不可选的,拿到需求的时候有点懵,不清楚下手点在哪,后来网上找了找,终于有了点眉目,感觉网上有些是实现不了的,特地在这里记录下进行 ...

  3. Ngnix初步学习

    Nginx下载与安装(Linux) nginx下载 1.root用户下进入/usr/local/src su root cd /usr/local/src 2.下载nginx所需包 # nginx w ...

  4. axios&spring前后端分离传参规范总结

    前后端分离开发的场景下,开发人员的工作内容更加专注与专业,但是也产生了一些额外的沟通成本.比如:本文中为大家说明的前后端参数传递与接受方法.本文主要是面对前端使用axios,后端使用Spring进行参 ...

  5. SpringBoot定时任务 - 集成quartz实现定时任务(单实例和分布式两种方式)

    最为常用定时任务框架是Quartz,并且Spring也集成了Quartz的框架,Quartz不仅支持单实例方式还支持分布式方式.本文主要介绍Quartz,基础的Quartz的集成案例本,以及实现基于数 ...

  6. 日夕如是寒暑不间,基于Python3+Tornado6+APScheduler/Celery打造并发异步动态定时任务轮询服务

    原文转载自「刘悦的技术博客」https://v3u.cn/a_id_220 定时任务的典型落地场景在各行业中都很普遍,比如支付系统中,支付过程中因为网络或者其他因素导致出现掉单.卡单的情况,账单变成了 ...

  7. P4983忘情

    今天挺开心的\(\sim\),省选加油\(!\) \(P4893\)忘情 我能说今晚我才真正学会\(wqs\)和斜率优化吗\(?\) 恰好选几个,必然需要\(wqs\)二分一下 那么考虑不考虑次数情况 ...

  8. 【原创】Magisk Root隐藏模块 Shamiko安装

    本文所有教程及源码.软件仅为技术研究.不涉及计算机信息系统功能的删除.修改.增加.干扰,更不会影响计算机信息系统的正常运行.不得将代码用于非法用途,如侵立删! Root隐藏模块 Shamiko安装 操 ...

  9. while 循环、do- while 循环 和 for 循环之间的那点事

    C语言自学之三种循环比较 使用循环计算1-2+3-4+5-6+--100的值?    在编辑器中给出了三种循环体结构的部分代码,请选择合适的循环结构补全代码实现此功能.    运行结果为: sum=- ...

  10. suse 11 sp3编译报错问题

    suse安装,默认是不安装gcc的,今天安装gcc之后,编译suse11 sp3,报错如下: scripts/basic/fixdep.c:106:23: fatal error: sys/types ...