rules_up.yml

groups:

- name: up

  rules:

  - alert: mysql

    expr: up{instance="db1",job="mysql"} != 0

    for: 10s

    labels:

      instance: '{{$labels.instance}}'

      priority: "3"

      type: 'mysql'

    annotations:

      description: '{{ $labels.instance }} nginx-vts-web  is down please handle'

      summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'

  - alert: node

    expr: up{job="node"} != 0

    for: 10s

    labels:

      instance: '{{$labels.instance}}'

      priority: "3"

      type: 'node'

    annotations:

      description: '{{ $labels.instance }} nginx-vts-web  is down please handle'

      summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'

  - alert: nginx

    expr: up{instance="web",job="nginx"} != 0

    for: 10s

    labels:

      instance: '{{$labels.instance}}'

      priority: "3"

      type: 'nginx'

    annotations:

      description: '{{ $labels.instance }} nginx-vts-web  is down please handle'

      summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'

rules_mysql.yml

groups:

- name: mysql.rules

  rules:

  - record: mysql_slave_lag_seconds

    expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay

  - record: mysql_heartbeat_lag_seconds

    expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds

  - record: job:mysql_transactions:rate5m

    expr: sum(rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))

      WITHOUT (command)

  - alert: MySQLGaleraNotReady

    expr: mysql_global_status_wsrep_ready != 1

    for: 5m

    labels:

      severity: warning

    annotations:

      description: '{{$labels.job}} on {{$labels.instance}} is not ready.'

      summary: Galera cluster node not ready

  - alert: MySQLGaleraOutOfSync

    expr: (mysql_global_status_wsrep_local_state != 4 and mysql_global_variables_wsrep_desync

      == 0)

    for: 5m

    labels:

      severity: warning

    annotations:

      description: '{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}}

        != 4).'

      summary: Galera cluster node out of sync

  - alert: MySQLGaleraDonorFallingBehind

    expr: (mysql_global_status_wsrep_local_state == 2 and mysql_global_status_wsrep_local_recv_queue

      > 100)

    for: 5m

    labels:

      severity: warning

    annotations:

      description: '{{$labels.job}} on {{$labels.instance}} is a donor (hotbackup)

        and is falling behind (queue size {{$value}}).'

      summary: xtradb cluster donor node falling behind

  - alert: MySQLReplicationNotRunning

    expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running

      == 0

    for: 2m

    labels:

      severity: critical

    annotations:

      description: Slave replication (IO or SQL) has been down for more than 2 minutes.

      summary: Slave replication is not running

  - alert: MySQLReplicationLag

    expr: (mysql_slave_lag_seconds > 30) and ON(instance) (predict_linear(mysql_slave_lag_seconds[5m],

      60 * 2) > 0)

    for: 1m

    labels:

      severity: critical

    annotations:

      description: The mysql slave replication has fallen behind and is not recovering

      summary: MySQL slave replication is lagging

  - alert: MySQLReplicationLag

    expr: (mysql_heartbeat_lag_seconds > 30) and ON(instance) (predict_linear(mysql_heartbeat_lag_seconds[5m],

      60 * 2) > 0)

    for: 1m

    labels:

      severity: critical

    annotations:

      description: The mysql slave replication has fallen behind and is not recovering

      summary: MySQL slave replication is lagging

  - alert: MySQLInnoDBLogWaits

    expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10

    labels:

      severity: warning

    annotations:

      description: The innodb logs are waiting for disk at a rate of {{$value}} /

        second

      summary: MySQL innodb log writes stalling

rules_nginx.yml

groups:

- name: aws_ec2_nginx-vts-web

  rules:

  - alert: nginx-vts-web-status

    expr: up{job="nginx-vts-web"} == 0

    for: 1m

    labels:

      instance: '{{$labels.instance}}'

      priority: "3"

      type: 'nginx'

    annotations:

      description: '{{ $labels.instance }} nginx-vts-web  is down please handle'

      summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'

  - alert: nginx application 5xx gt 10 per min

    expr: sum(nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} offset 1m) by (application,direction) > 10

    for: 1m

    labels:

      type: 'nginx'

      priority: "4"

    annotations:

      description: ' {{$labels.application}} {{$labels.direction}} >10/m'

      summary: '  {{$labels.application}} {{$labels.direction}}  >10/m'

  - alert: nginx application 5xx gt 1 per min

    expr: sum(nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} offset 1m) by (application,direction,filter_name) > 1

    for: 1m

    labels:

      type: 'nginx-detail'

      priority: "4"

    annotations:

      description: ' {{$labels.application}} {{$labels.direction}}{{$labels.filter_name}} >10/m'

      summary: '  {{$labels.application}} {{$labels.direction}}  >10/m'

 # - alert: nginx application 2xx gt 10 per min

 #   expr: sum(nginx_vts_filter_requests_total{direction="2xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="2xx",job="nginx-vts-web"} offset 1m) by (application,direction) > 10

 #   for: 1m

 #   labels:

 #     test: 'yes'

 #     priority: "4"

 #   annotations:

 #     description: ' {{$labels.application}} {{$labels.direction}} >10/m'

 #     summary: '  {{$labels.application}} {{$labels.direction}}  >10/m'

  - alert: nginx interface time  gt 300ms

    expr: nginx_vts_filter_request_seconds{job="nginx-vts-web",filter_name!="/v1/users/kyc/upload"}*1000 > 300

    for: 1m

    labels:

      type: 'nginx'

      priority: "4"

    annotations:

      description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}}  >300ms'

      summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'

  - alert: nginx interface time  gt 2s

    expr: nginx_vts_filter_request_seconds{job="nginx-vts-web",filter_name="/v1/users/kyc/upload"}*1000 > 2000

    for: 1m

    labels:

      type: 'nginx'

      priority: "4"

    annotations:

      description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}}  >300ms'

      summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'

rules_node.yml

groups:

- name: example-node-exporter-rules

  rules:

  # The count of CPUs per node, useful for getting CPU time as a percent of total.

  - alert: instance:node_cpus:count

    expr: count(node_cpu_seconds_total{mode="idle"}) without (cpu,mode) > 2

    for: 1s

    annotations:

      description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}}  >300ms'

      summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'

- name: node_up

  rules:

  # The count of CPUs per node, useful for getting CPU time as a percent of total.

  - alert: instance:node_cpus:count

    expr: up{instance="node",job="node"} != 1

    for: 1s

    annotations:

      description: ' {{$labels.application}} {{$lables.instance}}  is up'

      summary: ' {{$labels.application}} {{$lables.instance}}  is up'

prometheus告警规则模板：MySQL，nginx，node的更多相关文章

Prometheus告警规则增删改自动化
Prometheus告警规则增删改自动化前言: 随着容器技术的发展,zabbix监控方式与k8s的结合不完善,导致不得不放弃zabbix,而新的监控工具prometheus的使用就越来越多了.但是经 ...
prometheus简单监控Linux,mysql,nginx
prometheus安装下载安装 #官网下载解压即可使用 https://prometheus.io/download/ #docker 方式安装 sudo docker run -n prome ...
prometheus 告警规则
GitHub网址1 https://github.com/samber/awesome-prometheus-alerts 网址2 https://awesome-prometheus-alerts. ...
02 . Prometheus告警处理
Prometheus告警简介告警能力在Prometheus的架构中被划分成两个独立的部分.如下所示,通过在Prometheus中定义AlertRule(告警规则),Prometheus会周期性的对告 ...
Prometheus告警处理
在Prometheus Server中定义告警规则以及产生告警,Alertmanager组件则用于处理这些由Prometheus产生的告警.Alertmanager即Prometheus体系中告警的统 ...
Prometheus告警模型分析
Prometheus作为时下最为流行的开源监控系统,其庞大的生态体系:包括针对各种传统应用的Exporter,完整的二次开发工具链,与Kubernetes等主流平台的高度亲和以及由此带来的强大的自发现 ...
Prometheus自身的监控告警规则
1.先在 Prometheus 主程序目录下创建rules目录,然后在该目录下创建 prometheus-test.yml文件,内容如下: 内容很多,可以根据实际情况进行调整. 规则参考网址:http ...
实用干货丨如何使用Prometheus配置自定义告警规则
前言 Prometheus是一个用于监控和告警的开源系统.一开始由Soundcloud开发,后来在2016年,它迁移到CNCF并且称为Kubernetes之后最流行的项目之一.从整个Linux服务器 ...
Prometheus中使用的告警规则
参考网站:https://awesome-prometheus-alerts.grep.to/rules 这个网站上有好多常用软件的告警规则,但是有些并不一定实用,有些使用起来会有错误,这里就把这些都 ...

随机推荐

常用的函数式接口_Predicate接口和常用的函数式借楼_Predicate_默认方法and
package com.yang.Test.PredicateStudy; import java.util.function.Predicate; /** * java.util.function. ...
netcore 非注入全局获取配置文件
在netcore开发中,最常见的就是注入,比如想获取appsettings.json的内容,我们就需要去注入,然后在controller里面去获取,但是我们如果想要在service中使用appsett ...
MYSQL的Java操作器——JDBC
MYSQL的Java操作器--JDBC 在学习了Mysql之后,我们就要把Mysql和我们之前所学习的Java所结合起来而JDBC就是这样一种工具:帮助我们使用Java语言来操作Mysql数据库 J ...
针对多个球体的World类
World类其他都一样的,就修改build函数就行了,以后测试所有代码,都是基于两个或多个球体的,不再重复阐述. void World::build() { vp.set_hres(200); vp. ...
DolphinScheduler - 1.3 系列核心表结构剖析
Apache DolphinScheduler 是一个分布式去中心化,易扩展的可视化 DAG 工作流任务调度系统.致力于解决数据处理流程中错综复杂的依赖关系,使调度系统在数据处理流程中开箱即用. 近日 ...
Linux 08 磁盘管理
参考源 https://www.bilibili.com/video/BV187411y7hF?spm_id_from=333.999.0.0 版本本文章基于 CentOS 7.6 概述 Linux ...
彩虹女神跃长空,Go语言进阶之Go语言高性能Web框架Iris项目实战-项目入口与路由EP01
书接上回,我们已经安装好Iris框架,并且构建好了Iris项目,同时配置了fresh自动监控项目的实时编译,万事俱备,只欠东风,彩虹女神蓄势待发.现在我们来看看Iris的基础功能,如何编写项目入口文件 ...
延时任务-基于netty时间轮算法实现
一.时间轮算法简介为了大家能够理解下文中的代码,我们先来简单了解一下netty时间轮算法的核心原理时间轮算法名副其实,时间轮就是一个环形的数据结构,类似于表盘,将时间轮分成多个bucket(比如: ...
[CF1527D] MEX Tree （lca）
题面给你一棵 n n n 个结点的树,对于所有的 k ∈ [ 0 , n ] k\in[0,n] k∈[0,n] ,求出 M E X = k {\rm MEX}=k MEX=k 的路径数量. 一条路 ...
快速搭建 SpringCloud Alibaba Nacos 配置中心！
Spring Cloud Alibaba 是阿里巴巴提供的一站式微服务开发解决方案,目前已被 Spring Cloud 官方收录.而 Nacos 作为 Spring Cloud Alibaba 的核心 ...

prometheus告警规则模板：MySQL，nginx，node

rules_up.yml

rules_mysql.yml

rules_nginx.yml

rules_node.yml

prometheus告警规则模板：MySQL，nginx，node的更多相关文章

随机推荐

热门专题